{
  "version": "1.0.0",
  "exported_at": "2026-05-31T00:00:00.000Z",
  "project": {
    "name": "Indeed Scraper",
    "description": "Best-effort Indeed job scraper equivalent to the Octoparse template. Live testing showed Indeed's normal HTML results and RSS endpoints can return no scrapeable job-card/feed DOM in automated browsers, likely due to anti-bot/interstitial blocking or endpoint changes. This template uses Indeed RSS search URLs with start-offset pagination, normalizes any available feed items into regular HTML rows, and exports job title, URL, company, location, salary, job type, posted date, valid/scraped date, experience level, description, guid, source, feed page URL, and scrape status. If Indeed blocks or returns no items, the CSV receives a diagnostic row instead of failing. Edit Navigate URLs to change keyword, location, or page depth.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 240,
      "config": {
        "urls": [
          "https://www.indeed.com/rss?q=software+engineer&l=United+States&start=0",
          "https://www.indeed.com/rss?q=software+engineer&l=United+States&start=10",
          "https://www.indeed.com/rss?q=software+engineer&l=United+States&start=20",
          "https://www.indeed.com/rss?q=software+engineer&l=United+States&start=30",
          "https://www.indeed.com/rss?q=software+engineer&l=United+States&start=40"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 240,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 840,
      "position_y": 240,
      "config": {
        "duration": 1
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1200,
      "position_y": 240,
      "config": {
        "waitForCompletion": true,
        "timeout": 20,
        "jsCode": "(() => {\n  const fields = ['job_title','job_url','company','location','salary','job_type','posted_date','valid_date','experience_level','description','guid','source','feed_page_url','scrape_status'];\n  const esc = (s) => String(s || '').replace(/[&<>\"']/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;','\"':'&quot;',\"'\":'&#39;'}[c]));\n  const decode = (s) => {\n    try {\n      const t = document.createElement('textarea');\n      t.innerHTML = String(s || '');\n      return t.value;\n    } catch (e) {\n      return String(s || '');\n    }\n  };\n  const strip = (s) => {\n    let v = String(s || '');\n    v = v.replace(/^\\s*<!\\[CDATA\\[/, '').replace(/\\]\\]>\\s*$/, '');\n    v = decode(v);\n    v = v.replace(/<[^>]*>/g, ' ').replace(/\\s+/g, ' ').trim();\n    return decode(v);\n  };\n  const getXmlTag = (item, tag) => {\n    try {\n      const el = item.getElementsByTagName(tag)[0];\n      return el ? (el.textContent || '').trim() : '';\n    } catch (e) {\n      return '';\n    }\n  };\n  const getStrTag = (txt, tag) => {\n    try {\n      const re = new RegExp('<' + tag + '\\\\b[^>]*>([\\\\s\\\\S]*?)<\\\\/' + tag + '>', 'i');\n      const m = String(txt || '').match(re);\n      return m ? m[1] : '';\n    } catch (e) {\n      return '';\n    }\n  };\n  const parseXmlItems = (txt) => {\n    try {\n      if (!txt || !String(txt).match(/<rss|<feed|<item/i)) return [];\n      const xml = new DOMParser().parseFromString(String(txt), 'application/xml');\n      if (xml.getElementsByTagName('parsererror')[0]) return [];\n      return Array.from(xml.getElementsByTagName('item')).map(item => ({\n        title: getXmlTag(item, 'title'),\n        link: getXmlTag(item, 'link'),\n        description: getXmlTag(item, 'description'),\n        pubDate: getXmlTag(item, 'pubDate'),\n        guid: getXmlTag(item, 'guid'),\n        source: getXmlTag(item, 'source')\n      }));\n    } catch (e) {\n      return [];\n    }\n  };\n  const parseRegexItems = (txt) => {\n    try {\n      const blocks = String(txt || '').match(/<item\\b[\\s\\S]*?<\\/item>/gi) || [];\n      return blocks.map(b => ({\n        title: getStrTag(b, 'title'),\n        link: getStrTag(b, 'link'),\n        description: getStrTag(b, 'description'),\n        pubDate: getStrTag(b, 'pubDate'),\n        guid: getStrTag(b, 'guid'),\n        source: getStrTag(b, 'source')\n      }));\n    } catch (e) {\n      return [];\n    }\n  };\n  const rawCandidates = [];\n  try { rawCandidates.push(document.documentElement ? document.documentElement.outerHTML : ''); } catch (e) {}\n  try { rawCandidates.push(document.documentElement ? document.documentElement.textContent : ''); } catch (e) {}\n  try { rawCandidates.push(document.body ? document.body.innerText : ''); } catch (e) {}\n  try {\n    const xhr = new XMLHttpRequest();\n    xhr.open('GET', window.location.href, false);\n    xhr.send(null);\n    if (xhr.status >= 200 && xhr.status < 400) rawCandidates.push(xhr.responseText || '');\n  } catch (e) {}\n  let items = [];\n  try {\n    const domItems = Array.from(document.getElementsByTagName('item'));\n    if (domItems.length) {\n      items = domItems.map(item => ({\n        title: getXmlTag(item, 'title'),\n        link: getXmlTag(item, 'link'),\n        description: getXmlTag(item, 'description'),\n        pubDate: getXmlTag(item, 'pubDate'),\n        guid: getXmlTag(item, 'guid'),\n        source: getXmlTag(item, 'source')\n      }));\n    }\n  } catch (e) {}\n  for (const raw of rawCandidates) {\n    if (items.length) break;\n    const variants = [raw, decode(raw), decode(decode(raw))];\n    for (const v of variants) {\n      if (items.length) break;\n      items = parseXmlItems(v);\n      if (!items.length) items = parseRegexItems(v);\n    }\n  }\n  const rows = [];\n  for (const item of items) {\n    const fullTitle = strip(item.title);\n    if (!fullTitle && !item.link && !item.description) continue;\n    const parts = fullTitle.split(/\\s+-\\s+/);\n    const desc = strip(item.description);\n    const salaryMatch = desc.match(/(?:\\$|USD\\s*)[0-9][0-9,]*(?:\\.?[0-9]{0,2})?(?:\\s*(?:-|–|to)\\s*(?:\\$|USD\\s*)?[0-9][0-9,]*(?:\\.?[0-9]{0,2})?)?(?:\\s*(?:an hour|per hour|hourly|a year|per year|yearly|annually|a month|per month|weekly))?/i);\n    const typeMatch = desc.match(/\\b(full[- ]?time|part[- ]?time|contract|temporary|temp|internship|permanent|remote|hybrid)\\b/i);\n    const expMatch = desc.match(/\\b(entry[- ]?level|junior|associate|mid[- ]?level|senior|lead|principal|manager|director|[0-9]+\\+?\\s*years?\\s*(?:of\\s*)?(?:experience)?)\\b/i);\n    rows.push({\n      job_title: parts[0] || fullTitle,\n      job_url: strip(item.link),\n      company: parts.length >= 2 ? parts[1] : '',\n      location: parts.length >= 3 ? parts.slice(2).join(' - ') : '',\n      salary: salaryMatch ? salaryMatch[0] : '',\n      job_type: typeMatch ? typeMatch[0] : '',\n      posted_date: strip(item.pubDate),\n      valid_date: new Date().toISOString().slice(0, 10),\n      experience_level: expMatch ? expMatch[0] : '',\n      description: desc,\n      guid: strip(item.guid),\n      source: strip(item.source),\n      feed_page_url: window.location.href,\n      scrape_status: 'ok'\n    });\n  }\n  if (!rows.length) {\n    const visibleText = strip(rawCandidates.join(' ').slice(0, 3000));\n    rows.push({\n      job_title: '',\n      job_url: '',\n      company: '',\n      location: '',\n      salary: '',\n      job_type: '',\n      posted_date: '',\n      valid_date: new Date().toISOString().slice(0, 10),\n      experience_level: '',\n      description: 'No Indeed RSS items found. Indeed may have blocked the automated browser, returned an interstitial/CAPTCHA, or disabled this RSS endpoint. Page text preview: ' + visibleText.slice(0, 500),\n      guid: '',\n      source: 'Indeed',\n      feed_page_url: window.location.href,\n      scrape_status: 'blocked_or_no_items'\n    });\n  }\n  const rowsHtml = rows.map(r => '<div class=\"uscraper-rss-item\">' + fields.map(f => {\n    if (f === 'job_url') return '<a class=\"job_url\" href=\"' + esc(r[f]) + '\">' + esc(r[f]) + '</a>';\n    return '<span class=\"' + f + '\">' + esc(r[f]) + '</span>';\n  }).join('') + '</div>').join('');\n  const html = '<!doctype html><html><head><meta charset=\"utf-8\"><title>UScraper Indeed RSS Rows</title></head><body><div id=\"uscraper-rss-items\">' + rowsHtml + '</div></body></html>';\n  try {\n    document.open();\n    document.write(html);\n    document.close();\n  } catch (e) {\n    try {\n      const root = document.documentElement || document;\n      const container = document.createElement('div');\n      container.setAttribute('id', 'uscraper-rss-items');\n      container.innerHTML = rowsHtml;\n      root.appendChild(container);\n    } catch (e2) {}\n  }\n  return rows.length;\n})()"
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1560,
      "position_y": 240,
      "config": {
        "duration": 1
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1920,
      "position_y": 240,
      "config": {
        "selector": ".uscraper-rss-item",
        "timeout": 10,
        "visible": false
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2280,
      "position_y": 240,
      "config": {
        "rowSelector": ".uscraper-rss-item",
        "fileName": "indeed-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "job_title",
            "selector": ".job_title",
            "attribute": "text"
          },
          {
            "name": "job_url",
            "selector": "a.job_url",
            "attribute": "href"
          },
          {
            "name": "company",
            "selector": ".company",
            "attribute": "text"
          },
          {
            "name": "location",
            "selector": ".location",
            "attribute": "text"
          },
          {
            "name": "salary",
            "selector": ".salary",
            "attribute": "text"
          },
          {
            "name": "job_type",
            "selector": ".job_type",
            "attribute": "text"
          },
          {
            "name": "posted_date",
            "selector": ".posted_date",
            "attribute": "text"
          },
          {
            "name": "valid_date",
            "selector": ".valid_date",
            "attribute": "text"
          },
          {
            "name": "experience_level",
            "selector": ".experience_level",
            "attribute": "text"
          },
          {
            "name": "description",
            "selector": ".description",
            "attribute": "text"
          },
          {
            "name": "guid",
            "selector": ".guid",
            "attribute": "text"
          },
          {
            "name": "source",
            "selector": ".source",
            "attribute": "text"
          },
          {
            "name": "feed_page_url",
            "selector": ".feed_page_url",
            "attribute": "text"
          },
          {
            "name": "scrape_status",
            "selector": ".scrape_status",
            "attribute": "text"
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2640,
      "position_y": 240,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 136,
      "width": 2120,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "sleep-2",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1128,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2208,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2568,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort Indeed job scraper equivalent to the Octoparse template. Live testing showed Indeed's normal HTML results and RSS endpoints can return no scrapeable job-card/feed DOM in automated browsers, likely due to anti-bot/interstitial blocking or endpoint changes. This template uses Indeed RSS search URLs with start-offset pagination, normalizes any available feed items into regular HTML rows, and exports job title, URL, company, location, salary, job type, posted date, valid/scraped date, experience level, description, guid, source, feed page URL, and scrape status. If Indeed blocks or returns no items, the CSV receives a diagnostic row instead of failing. Edit Navigate URLs to change keyword, location, or page depth.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 5 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 320,
      "position_y": 220,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => {\n  const fields = ['job_title','job_url','company','location','salary','job_type','posted_da...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 220,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Extracts rows matching `.uscraper-rss-item`. Confirm row count > 0 before running at scale.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 220,
      "width": 340,
      "height": 110,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 220,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}