{
  "version": "1.0.0",
  "exported_at": "2026-06-03T13:10:00.000Z",
  "project": {
    "name": "Craigslist Job Details Scraper",
    "description": "Scrapes Craigslist job detail pages by a list of job URLs, exporting page URL, title, business/company name, image URL, posting body, location, latitude, longitude, map accuracy, notices, post ID, and listing date. Navigation uses navigate.urls[] plus loop-continue so multiple job detail URLs are appended into one CSV. The provided Octoparse sample URLs currently return Craigslist 404 pages, so this best-effort template still writes one row per input URL and flags expired/removed pages in the notice field; replace sample URLs with active Craigslist job posting URLs for full data.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 260,
      "config": {
        "urls": [
          "https://newyork.craigslist.org/brk/spa/d/brooklyn-per-diem-assisted-stretch/7880377285.html",
          "https://newyork.craigslist.org/mnh/rej/d/metropolitan-property-group-is-looking/7880376857.html",
          "https://newyork.craigslist.org/brk/fbh/d/brooklyn-dishwahser/7880373261.html"
        ],
        "color": "bg-[#4589ff]",
        "tags": [
          "craigslist",
          "job-urls",
          "detail-pages"
        ]
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 260,
      "config": {
        "timeout": 30,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 260,
      "config": {
        "selector": "body",
        "timeout": 20,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1200,
      "position_y": 260,
      "config": {
        "rowSelector": "body",
        "fileName": "craigslist-job-details-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "page_url",
            "selector": "window.location.href",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "title",
            "selector": "(() => { if (!document.querySelector('#postingbody')) return ''; return document.querySelector('#titletextonly')?.textContent.trim() || document.querySelector('h1.postingtitle')?.textContent.replace(/\\s+/g, ' ').trim() || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "business_name",
            "selector": "(() => { if (!document.querySelector('#postingbody')) return ''; const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); const org = json.map(j => j.hiringOrganization && (j.hiringOrganization.name || (Array.isArray(j.hiringOrganization) && j.hiringOrganization[0]?.name))).find(Boolean); if (org) return org; const spans = Array.from(document.querySelectorAll('.attrgroup span, p.attrgroup span')).map(e => e.textContent.trim()); const hit = spans.find(t => /^(company|company name|organization|business name|employer):/i.test(t)); return hit ? hit.split(':').slice(1).join(':').trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "image_url",
            "selector": "(() => { if (!document.querySelector('#postingbody')) return ''; return document.querySelector('meta[property=\"og:image\"]')?.getAttribute('content') || document.querySelector('.gallery img, .slide img, img[src*=\"images.craigslist.org\"]')?.src || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "posting_body",
            "selector": "(() => { const el = document.querySelector('#postingbody'); if (!el) return ''; const clone = el.cloneNode(true); clone.querySelectorAll('.print-qrcode-container, script, style').forEach(n => n.remove()); return clone.textContent.replace('QR Code Link to This Post', '').replace(/\\s+/g, ' ').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "location",
            "selector": "(() => { if (!document.querySelector('#postingbody')) return ''; const map = document.querySelector('.mapaddress'); if (map && map.textContent.trim()) return map.textContent.trim(); const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); const loc = json.map(j => Array.isArray(j.jobLocation) ? j.jobLocation[0] : j.jobLocation).find(Boolean); const addr = loc && loc.address; if (!addr) return ''; if (typeof addr === 'string') return addr; return [addr.streetAddress, addr.addressLocality, addr.addressRegion, addr.postalCode].filter(Boolean).join(', '); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "latitude",
            "selector": "(() => { if (!document.querySelector('#postingbody')) return ''; const map = document.querySelector('#map[data-latitude]'); if (map) return map.getAttribute('data-latitude') || ''; const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); const loc = json.map(j => Array.isArray(j.jobLocation) ? j.jobLocation[0] : j.jobLocation).find(Boolean); return loc?.geo?.latitude || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "longitude",
            "selector": "(() => { if (!document.querySelector('#postingbody')) return ''; const map = document.querySelector('#map[data-longitude]'); if (map) return map.getAttribute('data-longitude') || ''; const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); const loc = json.map(j => Array.isArray(j.jobLocation) ? j.jobLocation[0] : j.jobLocation).find(Boolean); return loc?.geo?.longitude || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "data_accuracy",
            "selector": "(() => { if (!document.querySelector('#postingbody')) return ''; return document.querySelector('#map[data-accuracy]')?.getAttribute('data-accuracy') || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "notice",
            "selector": "(() => { if (!document.querySelector('#postingbody')) { const is404 = /Page Not Found|404 Error|There is nothing here/i.test(document.body.textContent || ''); return is404 ? 'expired_or_removed: Craigslist Page Not Found' : 'posting_body_not_found'; } return Array.from(document.querySelectorAll('.notices li')).map(e => e.textContent.replace(/\\s+/g, ' ').trim()).filter(Boolean).join(' | '); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "post_id",
            "selector": "(() => { const text = document.body.textContent || ''; const fromPage = text.match(/post id:\\s*\\d+/i); if (fromPage) return fromPage[0]; const fromUrl = window.location.pathname.match(/\\/(\\d+)\\.html$/); return fromUrl ? 'post id: ' + fromUrl[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "listing_date",
            "selector": "(() => { if (!document.querySelector('#postingbody')) return ''; const t = document.querySelector('.postinginfos time[datetime], time.date[datetime], time[datetime]'); if (t) return t.getAttribute('datetime') || ''; const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); return json.map(j => j.datePosted).find(Boolean) || ''; })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 1560,
      "position_y": 260,
      "config": {
        "color": "bg-[#ff832b]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 156,
      "width": 1040,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1128,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1488,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Scrapes Craigslist job detail pages by a list of job URLs, exporting page URL, title, business/company name, image URL, posting body, location, latitude, longitude, map accuracy, notices, post ID, and listing date. Navigation uses navigate.urls[] plus loop-continue so multiple job detail URLs are appended into one CSV. The provided Octoparse sample URLs currently return Craigslist 404 pages, so this best-effort template still writes one row per input URL and flags expired/removed pages in the notice field; replace sample URLs with active Craigslist job posting URLs for full data.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (page_url, title, business_name, image_url, posting_body). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 240,
      "width": 340,
      "height": 132,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}