{
  "version": "1.0.0",
  "exported_at": "2026-06-01T00:00:00.000Z",
  "project": {
    "name": "CNN news Scraper",
    "description": "Extracts CNN article data matching the Octoparse CNN news template fields: keyword, title, News_link, Publish_date, Description, Author, Updated, and Text. The provided target is a single CNN article detail page, so no pagination is used. A JavaScript cleanup step attempts to dismiss common cookie/newsletter overlays before extraction.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 240,
      "config": {
        "url": "https://www.cnn.com/2024/05/23/sport/ademola-lookman-hat-trick-atalanta-europa-league-bayer-leverkusen-spt-intl/index.html",
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 240,
      "config": {
        "timeout": 30,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 840,
      "position_y": 240,
      "config": {
        "jsCode": "(() => { const clickText = /^(accept|accept all|close|got it|continue)$/i; Array.from(document.querySelectorAll('button, [role=\"button\"], a')).forEach(el => { const txt = (el.innerText || el.textContent || '').trim(); if (clickText.test(txt)) { try { el.click(); } catch (e) {} } }); Array.from(document.querySelectorAll('[aria-modal=\"true\"], .bx-campaign, .bx-campaign-2534682, .onetrust-pc-dark-filter, #onetrust-banner-sdk')).forEach(el => { try { el.style.display = 'none'; } catch (e) {} }); document.documentElement.style.overflow = 'auto'; document.body.style.overflow = 'auto'; return true; })();",
        "waitForCompletion": true,
        "timeout": 10,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1200,
      "position_y": 240,
      "config": {
        "duration": 1,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1560,
      "position_y": 240,
      "config": {
        "selector": "h1",
        "timeout": 30,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1920,
      "position_y": 240,
      "config": {
        "rowSelector": "body",
        "fileName": "CNN-news-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "create",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "keyword",
            "selector": "\"UEFA EURO 2024\"",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "title",
            "selector": "(() => { const el = document.querySelector('h1[data-editable=\"headlineText\"], h1.headline__text, h1'); return (el?.innerText || document.title.replace(/\\s*\\|\\s*CNN.*$/i, '') || '').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "News_link",
            "selector": "location.href",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Publish_date",
            "selector": "(() => { const raw = document.querySelector('meta[property=\"article:published_time\"], meta[name=\"pubdate\"], meta[name=\"publish-date\"]')?.getAttribute('content') || ''; if (raw) { const d = new Date(raw); if (!isNaN(d)) return d.toLocaleDateString('en-US', { month: 'long', day: 'numeric', year: 'numeric' }); } const txt = (document.querySelector('[data-editable=\"timestamp\"], .timestamp, time')?.innerText || '').trim(); const m = txt.match(/(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun),?\\s*([A-Z][a-z]+\\s+\\d{1,2},\\s+\\d{4})|([A-Z][a-z]+\\s+\\d{1,2},\\s+\\d{4})/); return (m?.[1] || m?.[2] || txt).trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Description",
            "selector": "(() => { const meta = document.querySelector('meta[name=\"description\"], meta[property=\"og:description\"]')?.getAttribute('content'); const dek = document.querySelector('[data-editable=\"description\"], .headline__sub-text, .article__subtitle, .subheadline')?.innerText; return (meta || dek || '').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Author",
            "selector": "(() => { const meta = document.querySelector('meta[name=\"author\"], meta[property=\"article:author\"]')?.getAttribute('content'); const byline = document.querySelector('.byline__names, .byline__name, [data-editable=\"byline\"], .byline')?.innerText; return (meta || byline || '').replace(/^By\\s+/i, '').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Updated",
            "selector": "(() => { const visible = (document.querySelector('[data-editable=\"timestamp\"], .timestamp, time')?.innerText || '').trim(); if (visible) return visible; const modified = document.querySelector('meta[property=\"article:modified_time\"], meta[name=\"lastmod\"]')?.getAttribute('content') || ''; if (modified) return modified; const published = document.querySelector('meta[property=\"article:published_time\"], meta[name=\"pubdate\"]')?.getAttribute('content') || ''; return published.trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Text",
            "selector": "(() => { const selectors = ['article p.paragraph', 'article .paragraph', '[data-component-name=\"paragraph\"]', '.article__content p', 'main article p', 'main p']; const seen = new Set(); const parts = []; selectors.forEach(sel => { document.querySelectorAll(sel).forEach(el => { const txt = (el.innerText || el.textContent || '').replace(/\\s+/g, ' ').trim(); if (!txt) return; if (/^(Ad Feedback|Advertisement|Enter your email|Sign me up|By subscribing|Success!|Get a daily roundup)/i.test(txt)) return; if (/privacy policy/i.test(txt) && /subscribe/i.test(txt)) return; if (!seen.has(txt)) { seen.add(txt); parts.push(txt); } }); }); return parts.join('\\n\\n'); })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "end-1",
      "block_type": "output",
      "title": "End",
      "description": "Terminate execution flow",
      "position_x": 2280,
      "position_y": 240,
      "config": {
        "color": "bg-[#8d8d8d]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "end-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 136,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 768,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1848,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-control",
      "element_type": "group",
      "title": "Control Flow",
      "color": "#8d8d8d",
      "position_x": 2208,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "end-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts CNN article data matching the Octoparse CNN news template fields: keyword, title, News_link, Publish_date, Description, Author, Updated, and Text. The provided target is a single CNN article detail page, so no pagination is used. A JavaScript cleanup step attempts to dismiss common cookie/newsletter overlays before extraction.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { const clickText = /^(accept|accept all|close|got it|continue)$/i; Array.from(document.query...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1040,
      "position_y": 220,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (keyword, title, News_link, Publish_date, Description). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 220,
      "width": 340,
      "height": 131,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    }
  ]
}