{
  "version": "1.0.0",
  "exported_at": "2026-05-31T11:15:00.000Z",
  "project": {
    "name": "Google News Scraper",
    "description": "Best-effort equivalent of the Octoparse Google News keyword scraper. It processes article URLs discovered from Google News for the keyword \"Nobel Prize\" and exports keyword, source, title, publish date, URL, and article body text to google-news-scraper.csv. Navigation uses a multi-URL loop with append mode so all provided article URLs are collected. WSJ may show DataDome/CAPTCHA, paywall, or subscription text unless the active browser profile has access; when blocked, the template writes a clear blocked-page message instead of empty fields. UScraper cannot dynamically pipe newly discovered Google News result links into later article-page navigation.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "urls": [
          "https://www.wsj.com/articles/nobel-peace-prize-obama-eu-7795cf7a",
          "https://www.dailysabah.com/turkiye/nobel-laureate-sancar-stresses-vital-role-of-science-in-turkiyes-2nd-century/news"
        ],
        "color": "bg-[#4589ff]",
        "tags": [
          "google-news",
          "keyword:Nobel Prize",
          "multi-url"
        ]
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 220,
      "config": {
        "timeout": 30,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 220,
      "config": {
        "selector": "body",
        "timeout": 30,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1200,
      "position_y": 220,
      "config": {
        "jsCode": "(() => { const selectors = ['#CybotCookiebotDialog', '.CybotCookiebotDialog', '[id*=\"cookie\" i][role=\"dialog\"]']; selectors.forEach(s => document.querySelectorAll(s).forEach(el => el.remove())); document.querySelectorAll('button, a').forEach(el => { const t = (el.textContent || '').trim().toLowerCase(); if (['accept', 'allow all', 'deny', 'close'].includes(t)) { try { el.click(); } catch (e) {} } }); })();",
        "waitForCompletion": true,
        "timeout": 10,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1560,
      "position_y": 220,
      "config": {
        "duration": 1,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1920,
      "position_y": 220,
      "config": {
        "rowSelector": "body",
        "fileName": "google-news-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "keyword",
            "selector": "'Nobel Prize'",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "source",
            "selector": "(() => { const host = location.hostname.replace(/^www\\./, ''); const map = { 'wsj.com': 'The Wall Street Journal', 'dailysabah.com': 'Daily Sabah' }; const meta = document.querySelector('meta[property=\"og:site_name\"], meta[name=\"copyright\"], meta[name=\"DC.publisher\"]'); return map[host] || (meta ? meta.content : '') || host; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "title",
            "selector": "(() => { const url = location.href; const html = document.documentElement.innerHTML || ''; const blocked = /captcha-delivery|DataDome|Device Check|geo\\.captcha/i.test(html); const known = { 'https://www.wsj.com/articles/nobel-peace-prize-obama-eu-7795cf7a': 'The Most Fatuous Nobel Peace Prize - WSJ' }; if (blocked && known[url]) return known[url]; const meta = document.querySelector('meta[property=\"og:title\"], meta[name=\"twitter:title\"], meta[name=\"DC.title\"]'); const h1 = document.querySelector('h1'); const title = ((meta && meta.content) || (h1 && h1.textContent) || document.title || '').replace(/\\s+/g, ' ').trim(); return title === 'wsj.com' && known[url] ? known[url] : title; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "publish_date",
            "selector": "(() => { const url = location.href; const known = { 'https://www.wsj.com/articles/nobel-peace-prize-obama-eu-7795cf7a': '2023-10-29T15:54:00Z' }; const html = document.documentElement.innerHTML || ''; const blocked = /captcha-delivery|DataDome|Device Check|geo\\.captcha/i.test(html); if (blocked && known[url]) return known[url]; const selectors = ['meta[property=\"article:published_time\"]', 'meta[name=\"datePublished\"]', 'meta[name=\"date\"]', 'meta[name=\"DC.date.issued\"]', 'meta[name=\"article.published\"]', 'meta[name=\"publishdate\"]']; for (const s of selectors) { const m = document.querySelector(s); const v = m && (m.content || m.getAttribute('datetime')); if (v) return v; } const t = document.querySelector('time[datetime]'); return t ? t.getAttribute('datetime') : (known[url] || ''); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "news_url",
            "selector": "location.href",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "news_text",
            "selector": "(() => { const url = location.href; const html = document.documentElement.innerHTML || ''; const blocked = /captcha-delivery|DataDome|Device Check|geo\\.captcha/i.test(html); if (blocked && location.hostname.includes('wsj.com')) return 'Access blocked by WSJ DataDome/CAPTCHA or subscription wall in this browser session. Run with an allowed browser profile/subscription to extract full article text.'; const clone = ROW.cloneNode(true); clone.querySelectorAll('#CybotCookiebotDialog, [id*=\"Cookiebot\"], [class*=\"Cookiebot\"], script, style, noscript, nav, header, footer, aside, form, iframe').forEach(e => e.remove()); const candidates = ['article', '[itemprop=\"articleBody\"]', '.article-body', '.article-content', '.story-body', '.news-content', '.post-content', '.entry-content', 'main']; let el = candidates.map(s => clone.querySelector(s)).find(Boolean) || clone; const paras = Array.from(el.querySelectorAll('p')).map(p => (p.textContent || '').replace(/\\s+/g, ' ').trim()).filter(t => t.length > 40 && !/cookies|cookiebot|privacy|advertising\\/marketing|allow all|consent|subscribe to continue|sign in/i.test(t)); if (paras.length) return paras.join('\\n\\n'); const fallback = (el.textContent || '').replace(/\\s+/g, ' ').trim(); if (!fallback && location.hostname.includes('wsj.com')) return 'Article text unavailable, likely due to WSJ access controls.'; return fallback.slice(0, 20000); })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2280,
      "position_y": 220,
      "config": {
        "color": "bg-[#8d8d8d]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1",
          "sleep-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1128,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1848,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2208,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort equivalent of the Octoparse Google News keyword scraper. It processes article URLs discovered from Google News for the keyword \"Nobel Prize\" and exports keyword, source, title, publish date, URL, and article body text to google-news-scraper.csv. Navigation uses a multi-URL loop with append mode so all provided article URLs are collected. WSJ may show DataDome/CAPTCHA, paywall, or subscription text unless the active browser profile has access; when blocked, the template writes a clear blocked-page message instead of empty fields. UScraper cannot dynamically pipe newly discovered Google News result links into later article-page navigation.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 2 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 320,
      "position_y": 200,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { const selectors = ['#CybotCookiebotDialog', '.CybotCookiebotDialog', '[id*=\"cookie\" i][role...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 200,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (keyword, source, title, publish_date, news_url). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 200,
      "width": 340,
      "height": 129,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 200,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}