{
  "version": "1.0.0",
  "exported_at": "2026-05-31T05:20:00.000Z",
  "project": {
    "name": "HTML Scraper",
    "description": "Generic HTML/source scraper equivalent to the Octoparse HTML Scraper. Navigates through the supplied URL list, captures page title, meta keywords, meta description, and full document HTML/source into html-scraper.csv. Navigation strategy: known multi-URL list using navigate.urls[] plus loop-continue; no on-page pagination is required. Some sample URLs may redirect or return 403/400 error pages, which are captured best-effort.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated-html-scraper"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "urls": [
          "https://apify.com/apidojo/tweet-scraper",
          "https://api-iam.intercom.io",
          "https://cookie-cdn.cookiepro.com/"
        ],
        "color": "bg-[#4589ff]",
        "tags": [
          "multi-url",
          "input-pages"
        ]
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 220,
      "config": {
        "timeout": 45,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 220,
      "config": {
        "selector": "html",
        "timeout": 30,
        "visible": false,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1200,
      "position_y": 220,
      "config": {
        "rowSelector": "html",
        "fileName": "html-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "tags": [
          "metadata",
          "html-source"
        ],
        "columns": [
          {
            "name": "original_url",
            "selector": "(() => { const configured = ['https://apify.com/apidojo/tweet-scraper','https://api-iam.intercom.io','https://cookie-cdn.cookiepro.com/']; const href = window.location.href; const host = window.location.hostname.toLowerCase(); const normalize = (u) => { try { return new URL(u).hostname.toLowerCase(); } catch (e) { return ''; } }; const exact = configured.find(u => href === u || href.startsWith(u)); if (exact) return exact; const hostMatch = configured.find(u => { const h = normalize(u); return host === h || host.endsWith('.' + h) || (h.includes('intercom') && host.includes('intercom')) || (h.includes('cookiepro') && host.includes('cookiepro')) || (h.includes('apify') && host.includes('apify')); }); return hostMatch || href; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "title",
            "selector": "document.title || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "keywords",
            "selector": "document.querySelector('meta[name=\"keywords\" i]')?.getAttribute('content') || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "description",
            "selector": "document.querySelector('meta[name=\"description\" i]')?.getAttribute('content') || document.querySelector('meta[property=\"og:description\" i]')?.getAttribute('content') || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "source_code",
            "selector": "document.documentElement ? document.documentElement.outerHTML : ''",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 1560,
      "position_y": 220,
      "config": {
        "color": "bg-[#ff832b]",
        "tags": [
          "multi-url-loop"
        ]
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 1040,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1128,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1488,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Generic HTML/source scraper equivalent to the Octoparse HTML Scraper. Navigates through the supplied URL list, captures page title, meta keywords, meta description, and full document HTML/source into html-scraper.csv. Navigation strategy: known multi-URL list using navigate.urls[] plus loop-continue; no on-page pagination is required. Some sample URLs may redirect or return 403/400 error pages, which are captured best-effort.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (original_url, title, keywords, description, source_code). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 200,
      "width": 340,
      "height": 132,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 200,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    },
    {
      "id": "note-limitations",
      "element_type": "note",
      "title": "Blocked/error pages",
      "content": "If a URL redirects, blocks scraping, or returns an HTTP error page, this template still captures the final rendered document source and available metadata best-effort.",
      "color": "#fa4d56",
      "position_x": 840,
      "position_y": 440,
      "width": 320,
      "height": 120,
      "z_index": 22,
      "block_id": "wait-for-element-1",
      "connection_id": null,
      "data": {
        "minimized": false
      }
    }
  ]
}