{
  "version": "1.0.0",
  "exported_at": "2026-06-02T21:00:00.000Z",
  "project": {
    "name": "Google News Scraper by URL",
    "description": "Extracts Google-News-style article data from a list of news article URLs: keyword, source, title, publish date, URL, and body text. Uses multi-URL navigation with loop-continue and appends one CSV row per URL. No on-page pagination was detected; navigation is handled by the supplied URL list. WSJ sample may be blocked by DataDome/CAPTCHA/paywall unless the browser profile has valid access.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 240,
      "config": {
        "urls": [
          "https://www.wsj.com/articles/nobel-peace-prize-obama-eu-7795cf7a",
          "https://www.dailysabah.com/turkiye/nobel-laureate-sancar-stresses-vital-role-of-science-in-turkiyes-2nd-century/news"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 240,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 840,
      "position_y": 240,
      "config": {
        "jsCode": "(() => { const btns = ['#CybotCookiebotDialogBodyButtonDecline', '#CybotCookiebotDialogBodyLevelButtonLevelOptinDeclineAll', '#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll', '#CybotCookiebotDialogBodyButtonAccept']; for (const s of btns) { const el = document.querySelector(s); if (el) { el.click(); return 'clicked ' + s; } } document.querySelectorAll('#CybotCookiebotDialog,[id*=cookie i],[class*=cookie i],[class*=consent i]').forEach(el => { const t = (el.innerText || '').toLowerCase(); if (t.includes('cookie') || t.includes('consent')) el.style.display = 'none'; }); return 'cookie cleanup attempted'; })()",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1200,
      "position_y": 240,
      "config": {
        "duration": 1
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1560,
      "position_y": 240,
      "config": {
        "selector": "body",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1920,
      "position_y": 240,
      "config": {
        "rowSelector": "body",
        "fileName": "google_news_scraper_by_url.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "keyword",
            "selector": "(() => { const params = new URL(location.href).searchParams; const q = params.get('q') || params.get('query') || params.get('keyword'); if (q) return q; const metaKeywords = document.querySelector('meta[name=keywords]')?.content || ''; const found = metaKeywords.split(',').map(s => s.trim()).find(s => /nobel/i.test(s)); return found || 'Nobel Prize'; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "source",
            "selector": "(() => { const meta = k => document.querySelector('meta[property=' + CSS.escape(k) + '],meta[name=' + CSS.escape(k) + ']')?.content || ''; return meta('og:site_name') || meta('application-name') || location.hostname.replace(/^www\\./, ''); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "title",
            "selector": "(() => { const meta = k => document.querySelector('meta[property=' + CSS.escape(k) + '],meta[name=' + CSS.escape(k) + ']')?.content || ''; const h1 = document.querySelector('h1')?.innerText?.trim() || ''; return meta('og:title') || meta('twitter:title') || h1 || document.title || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "publish_date",
            "selector": "(() => { const meta = k => document.querySelector('meta[property=' + CSS.escape(k) + '],meta[name=' + CSS.escape(k) + ']')?.content || ''; return meta('article:published_time') || meta('datePublished') || meta('publishdate') || meta('DC.date.issued') || document.querySelector('time[datetime]')?.getAttribute('datetime') || document.querySelector('[datetime]')?.getAttribute('datetime') || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "news_url",
            "selector": "(() => { const canonical = document.querySelector('link[rel=canonical]')?.href || ''; const og = document.querySelector('meta[property=\"og:url\"],meta[name=\"og:url\"]')?.content || ''; return canonical || og || location.href; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "news_text",
            "selector": "(() => { const selectors = ['article', '[itemprop=articleBody]', '.article-body', '.article_body', '.article-content', '.article_content', '.story-body', '.story_body', '.news-detail', '.news_detail', '.content-body', '.post-content', 'main']; const candidates = selectors.map(s => document.querySelector(s)).filter(Boolean); let root = candidates.sort((a, b) => (b.innerText || '').length - (a.innerText || '').length)[0] || document.body; const clone = root.cloneNode(true); clone.querySelectorAll('script,style,noscript,iframe,svg,nav,header,footer,aside,form,button,input,select,textarea,#CybotCookiebotDialog,[id*=cookie i],[class*=cookie i],[class*=consent i],[class*=advert i],[class*=ad- i]').forEach(e => e.remove()); return (clone.innerText || '').replace(/\\s+/g, ' ').trim(); })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2280,
      "position_y": 240,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 136,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 768,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1848,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2208,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts Google-News-style article data from a list of news article URLs: keyword, source, title, publish date, URL, and body text. Uses multi-URL navigation with loop-continue and appends one CSV row per URL. No on-page pagination was detected; navigation is handled by the supplied URL list. WSJ sample may be blocked by DataDome/CAPTCHA/paywall unless the browser profile has valid access.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { const btns = ['#CybotCookiebotDialogBodyButtonDecline', '#CybotCookiebotDialogBodyLevelButt...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1040,
      "position_y": 220,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (keyword, source, title, publish_date, news_url). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 220,
      "width": 340,
      "height": 129,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 220,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}