{
  "version": "1.0.0",
  "exported_at": "2026-06-03T16:45:00.000Z",
  "project": {
    "name": "Google News Scraper Cloud",
    "description": "Equivalent best-effort template for Octoparse Google News Scraper Cloud. It accepts a list of Google News result/article URLs, visits each URL, and extracts source, title, canonical URL, publish date, author, keywords, abstract, article body, videos, and images. Pagination/navigation strategy: URL-list loop using navigate.urls plus loop-continue; structured export appends one row per URL. Replace or expand the sample URLs with your Google News article URLs. Direct image URLs from the Octoparse preview are treated as output assets, not article input pages.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "urls": [
          "https://www.nationalnursesunited.org/press/long-beach-medical-center-nurses-vote-no-confidence-in-management",
          "https://www.jacksonlewis.com/insights/us-senate-vote-next-week-set-restore-nlrb-quorum"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 220,
      "config": {
        "timeout": 30,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 220,
      "config": {
        "selector": "body",
        "timeout": 30,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1200,
      "position_y": 220,
      "config": {
        "duration": 2,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1560,
      "position_y": 220,
      "config": {
        "rowSelector": "body",
        "fileName": "google-news-scraper-cloud.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "Search_word",
            "selector": "(() => { const p=new URLSearchParams(location.search); return (p.get('q') || p.get('query') || p.get('search') || '').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Source",
            "selector": "(() => { const q=s=>document.querySelector(s); const v=q(`meta[property='og:site_name']`)?.content || q('meta[name=application-name]')?.content || location.hostname.replace(/^www\\./,''); return v.trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Title",
            "selector": "(() => { const q=s=>document.querySelector(s); const v=q(`meta[property='og:title']`)?.content || q('meta[name=twitter:title]')?.content || q('h1')?.textContent || document.title || ''; return v.trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Title_URL",
            "selector": "(() => { const c=document.querySelector('link[rel=canonical]'); return (c?.href || location.href).trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "PublishDate",
            "selector": "(() => { const q=s=>document.querySelector(s); const meta=q(`meta[property='article:published_time']`)?.content || q('meta[name=date]')?.content || q('meta[name=pubdate]')?.content || q('time[datetime]')?.getAttribute('datetime'); if (meta) return meta.trim(); try { for (const s of document.querySelectorAll(`script[type='application/ld+json']`)) { const data=JSON.parse(s.textContent); const nodes=Array.isArray(data)?data:[data]; for (const n of nodes) { if (n?.datePublished) return String(n.datePublished).trim(); } } } catch(e) {} return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Author",
            "selector": "(() => { const q=s=>document.querySelector(s); const meta=q('meta[name=author]')?.content || q(`meta[property='article:author']`)?.content; if (meta) return meta.trim(); try { for (const s of document.querySelectorAll(`script[type='application/ld+json']`)) { const data=JSON.parse(s.textContent); const nodes=Array.isArray(data)?data:[data]; for (const n of nodes) { if (n?.author) { const a=Array.isArray(n.author)?n.author[0]:n.author; return String(typeof a==='string'?a:(a?.name || '')).trim(); } } } } catch(e) {} return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Start_URL",
            "selector": "(() => location.href)()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Keywords",
            "selector": "(() => { const q=s=>document.querySelector(s); const kw=q('meta[name=keywords]')?.content || ''; const tags=Array.from(document.querySelectorAll(`meta[property='article:tag']`)).map(m=>m.content).filter(Boolean); return Array.from(new Set((kw?kw.split(','):[]).concat(tags).map(x=>x.trim()).filter(Boolean))).join(';'); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Abstract",
            "selector": "(() => { const q=s=>document.querySelector(s); const meta=q(`meta[property='og:description']`)?.content || q('meta[name=description]')?.content || q('meta[name=twitter:description]')?.content; if (meta) return meta.trim(); const p=document.querySelector('article p, main p, p'); return (p?.innerText || '').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Text",
            "selector": "(() => { const root=document.querySelector('article') || document.querySelector('main') || document.body; const parts=Array.from(root.querySelectorAll('p, li')).map(e=>e.innerText.trim()).filter(t=>t.length>20); return parts.join('\\n\\n'); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Video",
            "selector": "(() => { const q=s=>document.querySelector(s); return q(`meta[property='og:video']`)?.content || q(`meta[property='og:video:url']`)?.content || q('video source')?.src || q('video')?.currentSrc || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "First_image",
            "selector": "(() => { const q=s=>document.querySelector(s); return q(`meta[property='og:image']`)?.content || q(`meta[name='twitter:image']`)?.content || document.querySelector('article img, main img, img')?.src || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Images",
            "selector": "(() => { const meta=Array.from(document.querySelectorAll(`meta[property='og:image'], meta[name='twitter:image']`)).map(m=>m.content); const imgs=Array.from(document.images).map(i=>i.currentSrc || i.src); return Array.from(new Set(meta.concat(imgs).filter(Boolean))).join(';'); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Text_by_XPath",
            "selector": "(() => { const xp='string((//article | //main)[1])'; const t=document.evaluate(xp, document, null, XPathResult.STRING_TYPE, null).stringValue || document.body.innerText || ''; return t.replace(/\\s+/g,' ').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "ErrorMessage",
            "selector": "(() => { if (/\\.(png|jpe?g|gif|webp|svg)([?#].*)?$/i.test(location.pathname)) return 'Direct image URL; not an article page'; if ((document.body?.innerText || '').trim().length < 100) return 'Article text not detected or page may be blocked'; return ''; })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 1920,
      "position_y": 220,
      "config": {
        "color": "bg-[#ff832b]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 1400,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1",
          "sleep-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1488,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1848,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Equivalent best-effort template for Octoparse Google News Scraper Cloud. It accepts a list of Google News result/article URLs, visits each URL, and extracts source, title, canonical URL, publish date, author, keywords, abstract, article body, videos, and images. Pagination/navigation strategy: URL-list loop using navigate.urls plus loop-continue; structured export appends one row per URL. Replace or expand the sample URLs with your Google News article URLs. Direct image URLs from the Octoparse preview are treated as output assets, not article input pages.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (Search_word, Source, Title, Title_URL, PublishDate). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 200,
      "width": 340,
      "height": 130,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 200,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}