{
  "version": "1.0.0",
  "exported_at": "2026-06-02T00:00:00.000Z",
  "project": {
    "name": "BiobioChile Scraper",
    "description": "Scrapes BioBioChile article detail pages for the keyword 'salario minimo', exporting keyword, title, author, published date, plain-text introduction, plain article text, and article URL. Navigation uses a multi-URL loop over known article URLs from the Octoparse preview and appends all article rows into one CSV. The workflow avoids full page-load waits because BioBioChile may keep background resources loading; it waits for the article heading instead. Article content is extracted from JSON-LD/meta/article-body fallbacks and cleaned to remove HTML and related-article blocks.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated-biobiochile-scraper"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 240,
      "config": {
        "urls": [
          "https://www.biobiochile.cl/noticias/economia/actualidad-economica/2024/04/08/la-inflacion-en-venezuela-se-acelero-hasta-un-39-en-marzo-segun-analisis-independiente.shtml",
          "https://www.biobiochile.cl/noticias/servicios/explicado/2024/04/01/cuanto-ganan-los-presidentes-de-america-latina-y-en-que-lugar-esta-chile.shtml",
          "https://www.biobiochile.cl/noticias/servicios/beneficios/2024/03/27/ingreso-minimo-garantizado-img-como-recibir-el-subsidio-para-quienes-ganen-menos-de-500-mil.shtml"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 480,
      "position_y": 240,
      "config": {
        "duration": 4
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 240,
      "config": {
        "selector": "h1",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1200,
      "position_y": 240,
      "config": {
        "rowSelector": "body",
        "fileName": "biobiochile_scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "PalabraClave",
            "selector": "(() => 'salario minimo')()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Titulo",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const sels = ['h1', 'article h1', 'main h1', '.article-title', '.titulo']; for (const s of sels) { const el = ROW.querySelector(s) || document.querySelector(s); const txt = clean(el && (el.innerText || el.textContent)); if (txt) return txt; } const meta = document.querySelector('meta[property=\"og:title\"], meta[name=\"twitter:title\"]'); const mt = clean(meta && meta.getAttribute('content')); if (mt) return mt.replace(/\\s*\\|\\s*BioBioChile.*$/i, ''); return clean((document.title || '').split('|')[0]); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Autor",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const sels = ['a[rel=author]', '.author a', '.autor a', '.article-author a', '[class*=author] a', '[class*=autor] a', 'span[itemprop=author]', '[itemprop=author]']; for (const s of sels) { const el = ROW.querySelector(s) || document.querySelector(s); const txt = clean(el && (el.innerText || el.textContent)); if (txt) return txt; } const findAuthor = o => { if (!o || typeof o !== 'object') return ''; if (Array.isArray(o)) { for (const x of o) { const r = findAuthor(x); if (r) return r; } return ''; } if (o.author) { if (typeof o.author === 'string') return clean(o.author); if (Array.isArray(o.author)) return clean(o.author.map(a => a && (a.name || '')).filter(Boolean).join(', ')); if (typeof o.author === 'object') return clean(o.author.name || ''); } if (o['@graph']) return findAuthor(o['@graph']); return ''; }; for (const sc of document.querySelectorAll('script[type=\"application/ld+json\"]')) { try { const r = findAuthor(JSON.parse(sc.textContent || 'null')); if (r) return r; } catch(e) {} } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "FechaPublicado",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const sels = ['time', '.fecha', '.date', '.article-date', '[class*=fecha]', '[class*=date]']; for (const s of sels) { const el = ROW.querySelector(s) || document.querySelector(s); if (!el) continue; const txt = clean(el.innerText || el.textContent); if (txt) return txt; const dt = clean(el.getAttribute && el.getAttribute('datetime')); if (dt) return dt; } const meta = document.querySelector('meta[property=\"article:published_time\"], meta[name=\"date\"], meta[itemprop=\"datePublished\"]'); const mt = clean(meta && meta.getAttribute('content')); if (mt) return mt; const findDate = o => { if (!o || typeof o !== 'object') return ''; if (Array.isArray(o)) { for (const x of o) { const r = findDate(x); if (r) return r; } return ''; } if (o.datePublished) return clean(o.datePublished); if (o['@graph']) return findDate(o['@graph']); return ''; }; for (const sc of document.querySelectorAll('script[type=\"application/ld+json\"]')) { try { const r = findDate(JSON.parse(sc.textContent || 'null')); if (r) return r; } catch(e) {} } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Introduccion",
            "selector": "(() => { const textFromHtml = v => { let s = v || ''; if (/<[a-z][\\s\\S]*>/i.test(s)) { const d = document.createElement('div'); d.innerHTML = s; d.querySelectorAll('script,style,aside,figure,.lee-tambien-bbcl,.related,.sidebar').forEach(e => e.remove()); s = d.textContent || d.innerText || ''; } return s.replace(/\\u00a0/g, ' ').replace(/\\s+/g, ' ').trim(); }; const findDescription = o => { if (!o || typeof o !== 'object') return ''; if (Array.isArray(o)) { for (const x of o) { const r = findDescription(x); if (r) return r; } return ''; } if (o.description || o.abstract) return textFromHtml(o.description || o.abstract); if (o['@graph']) return findDescription(o['@graph']); return ''; }; for (const sc of document.querySelectorAll('script[type=\"application/ld+json\"]')) { try { const r = findDescription(JSON.parse(sc.textContent || 'null')); if (r && r.length > 30 && !/PDI despliega operativo migratorio/i.test(r)) return r; } catch(e) {} } const meta = document.querySelector('meta[property=\"og:description\"], meta[name=\"description\"], meta[name=\"twitter:description\"]'); const mt = textFromHtml(meta && meta.getAttribute('content')); if (mt && mt.length > 30 && !/PDI despliega operativo migratorio/i.test(mt)) return mt; const sels = ['.post-excerpt', '.article-excerpt', '.article-lead', '.excerpt', '.bajada', '.lead', '.sumario', '.article-summary', '[class*=\"bajada\"]', '[class*=\"lead\"]']; for (const s of sels) { const el = ROW.querySelector(s) || document.querySelector(s); const txt = textFromHtml(el && (el.innerHTML || el.innerText || el.textContent)); if (txt && txt.length > 40 && !/PDI despliega operativo migratorio/i.test(txt)) return txt; } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Texto",
            "selector": "(() => { const bad = /publicidad|suscríbete|síguenos|cookie|newsletter|lo último|últimas noticias|también puedes leer|lee también|comparte esta noticia|bio bio tv|PDI despliega operativo migratorio/i; const clean = s => (s || '').replace(/\\u00a0/g, ' ').replace(/\\s+/g, ' ').trim(); const unique = arr => Array.from(new Set(arr.map(clean))).filter(t => t.length > 30 && !bad.test(t)); const htmlToParagraphText = html => { if (!html) return ''; const d = document.createElement('div'); d.innerHTML = html; d.querySelectorAll('script,style,aside,figure,iframe,.lee-tambien-bbcl,.related,.sidebar,.ad,.ads,.advertising').forEach(e => e.remove()); let parts = Array.from(d.querySelectorAll('p')).map(p => clean(p.textContent || p.innerText)); if (!parts.length) parts = [clean(d.textContent || d.innerText)]; return unique(parts).join('\\n\\n'); }; const findBody = o => { if (!o || typeof o !== 'object') return ''; if (Array.isArray(o)) { for (const x of o) { const r = findBody(x); if (r) return r; } return ''; } if (o.articleBody) return /<[a-z][\\s\\S]*>/i.test(o.articleBody) ? htmlToParagraphText(o.articleBody) : clean(o.articleBody); if (o['@graph']) return findBody(o['@graph']); return ''; }; for (const sc of document.querySelectorAll('script[type=\"application/ld+json\"]')) { try { const r = findBody(JSON.parse(sc.textContent || 'null')); if (r && r.length > 100) return r; } catch(e) {} } const containerSels = ['[itemprop=\"articleBody\"]', '.entry-content', '.post-content', '.article-content', '.article-body', '.article__body', '.post-body', '.nota-texto', '.nota-body', '.contenido-nota', '.body-nota', '.texto-nota', '.texto', 'article']; for (const s of containerSels) { const el = document.querySelector(s); if (!el) continue; const out = htmlToParagraphText(el.innerHTML || ''); if (out.length > 120) return out; } const h1 = document.querySelector('h1'); const hRect = h1 ? h1.getBoundingClientRect() : null; const paras = Array.from(document.querySelectorAll('p')).filter(p => { const t = clean(p.innerText || p.textContent); if (t.length < 30 || bad.test(t)) return false; if (h1 && !(h1.compareDocumentPosition(p) & Node.DOCUMENT_POSITION_FOLLOWING)) return false; const r = p.getBoundingClientRect(); if (hRect && hRect.width > 0 && r.width > 0) { if (r.top < hRect.top) return false; if (!(r.left >= hRect.left - 180 && r.left <= hRect.right + 320)) return false; } return true; }).map(p => clean(p.innerText || p.textContent)); return unique(paras).join('\\n\\n'); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "TextoUrl",
            "selector": "(() => window.location.href)()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 1560,
      "position_y": 240,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 136,
      "width": 1040,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "sleep-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1128,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1488,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Scrapes BioBioChile article detail pages for the keyword 'salario minimo', exporting keyword, title, author, published date, plain-text introduction, plain article text, and article URL. Navigation uses a multi-URL loop over known article URLs from the Octoparse preview and appends all article rows into one CSV. The workflow avoids full page-load waits because BioBioChile may keep background resources loading; it waits for the article heading instead. Article content is extracted from JSON-LD/meta/article-body fallbacks and cleaned to remove HTML and related-article blocks.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 3 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 320,
      "position_y": 220,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (PalabraClave, Titulo, Autor, FechaPublicado, Introduccion). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 220,
      "width": 340,
      "height": 133,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 220,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}