{
  "version": "1.0.0",
  "exported_at": "2026-06-01T16:30:00.000Z",
  "project": {
    "name": "Infobae News Keywords Scraper",
    "description": "Best-effort Infobae keyword article scraper equivalent to the Octoparse template preview. It extracts Source_URL, PalabraClave, Título, Subtítulo, Autor, FechaPublicado, Texto, and TextoUrl from Infobae article detail pages for the keyword 'educación'. Navigation uses a multi-URL loop over the keyword-matching article URLs shown in the Octoparse sample and appends all rows to one CSV. Infobae pages may keep ads/background resources active, so this template avoids wait-for-page-load and instead waits for the article h1. Author and publication date are extracted primarily from JSON-LD article metadata; date falls back to article meta tags or the URL date path. To scrape more keyword results, extend navigate.urls[] and update the PalabraClave JS constant.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 260,
      "config": {
        "urls": [
          "https://www.infobae.com/politica/2024/04/22/docentes-de-la-universidad-de-san-andres-y-torcuato-di-tella-apoyaron-la-marcha-en-defensa-de-la-educacion-publica/",
          "https://www.infobae.com/mexico/2024/04/22/sheinbaum-xochitl-y-maynez-firman-pacto-por-la-primera-infancia-destacan-importancia-de-salud-y-educacion/"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 480,
      "position_y": 260,
      "config": {
        "duration": 4
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 260,
      "config": {
        "selector": "h1",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1200,
      "position_y": 260,
      "config": {
        "rowSelector": "body",
        "fileName": "infobae_noticias_keywords_scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "source_url",
            "selector": "\"https://www.infobae.com/america/\"",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "palabra_clave",
            "selector": "\"educación\"",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "titulo",
            "selector": "(() => { const clean = v => (v || '').toString().trim().replace(/\\s+/g, ' '); const el = document.querySelector('h1.article-headline, h1.headline, h1'); if (el && clean(el.textContent)) return clean(el.textContent); const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); const walk = x => { if (!x) return null; if (Array.isArray(x)) { for (const i of x) { const r = walk(i); if (r) return r; } } else if (typeof x === 'object') { const t = x['@type']; if ((Array.isArray(t) ? t : [t]).some(v => /NewsArticle|Article/i.test(String(v || ''))) && x.headline) return x; for (const k of Object.keys(x)) { const r = walk(x[k]); if (r) return r; } } return null; }; const article = walk(json); return article ? clean(article.headline) : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "subtitulo",
            "selector": "(() => { const clean = v => (v || '').toString().trim().replace(/\\s+/g, ' '); const selectors = ['h2.article-subheadline', 'h2.article-deck', '.article-subheadline', '.article-deck', '.deck', 'h2']; for (const sel of selectors) { const el = document.querySelector(sel); if (el && clean(el.textContent)) return clean(el.textContent); } const meta = document.querySelector('meta[name=\"description\"], meta[property=\"og:description\"]'); if (meta && clean(meta.getAttribute('content'))) return clean(meta.getAttribute('content')); const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); const walk = x => { if (!x) return null; if (Array.isArray(x)) { for (const i of x) { const r = walk(i); if (r) return r; } } else if (typeof x === 'object') { const t = x['@type']; if ((Array.isArray(t) ? t : [t]).some(v => /NewsArticle|Article/i.test(String(v || ''))) && x.description) return x; for (const k of Object.keys(x)) { const r = walk(x[k]); if (r) return r; } } return null; }; const article = walk(json); return article ? clean(article.description) : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "autor",
            "selector": "(() => { const clean = v => (v || '').toString().trim().replace(/^Por\\s+/i, '').replace(/\\s+/g, ' '); const bad = v => !v || /Agregar Infobae|Infobae en|PUBLICIDAD|Newsletter|Compartir/i.test(v); const getName = a => Array.isArray(a) ? a.map(getName).filter(v => !bad(v)).join('; ') : (a && typeof a === 'object' ? clean(a.name || a.author || '') : clean(a)); const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); const walk = x => { if (!x) return ''; if (Array.isArray(x)) { for (const i of x) { const r = walk(i); if (r && !bad(r)) return r; } } else if (typeof x === 'object') { const t = x['@type']; if ((Array.isArray(t) ? t : [t]).some(v => /NewsArticle|Article/i.test(String(v || '')))) { const n = getName(x.author || x.creator); if (n && !bad(n)) return n; } for (const k of Object.keys(x)) { const r = walk(x[k]); if (r && !bad(r)) return r; } } return ''; }; const fromJson = walk(json); if (fromJson && !bad(fromJson)) return fromJson; const selectors = ['a[rel=\"author\"]', '.article-author-name', '.author-name', '.byline-name', '[data-testid=\"author-name\"]']; for (const sel of selectors) { const el = document.querySelector(sel); const txt = clean(el && el.textContent); if (txt && !bad(txt)) return txt; } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "fecha_publicado",
            "selector": "(() => { const clean = v => (v || '').toString().trim().replace(/\\s+/g, ' '); const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); const walk = x => { if (!x) return ''; if (Array.isArray(x)) { for (const i of x) { const r = walk(i); if (r) return r; } } else if (typeof x === 'object') { const t = x['@type']; if ((Array.isArray(t) ? t : [t]).some(v => /NewsArticle|Article/i.test(String(v || '')))) { const d = clean(x.datePublished || x.dateCreated || ''); if (d) return d; } for (const k of Object.keys(x)) { const r = walk(x[k]); if (r) return r; } } return ''; }; const fromJson = walk(json); if (fromJson) return fromJson; const meta = document.querySelector('meta[property=\"article:published_time\"], meta[name=\"article:published_time\"], meta[name=\"datePublished\"], meta[itemprop=\"datePublished\"]'); if (meta && clean(meta.getAttribute('content'))) return clean(meta.getAttribute('content')); const time = document.querySelector('article time[datetime], main time[datetime]'); if (time) return clean(time.getAttribute('datetime') || time.textContent); const m = window.location.pathname.match(/\\/(20\\d{2})\\/(\\d{2})\\/(\\d{2})\\//); if (m) return `${m[1]}-${m[2]}-${m[3]}`; return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "texto",
            "selector": "(() => { const candidates = ['article p', '.article-body p', '.body-article p', '.story-body p', '.article-content p', '[class*=\"article\"] p']; let paragraphs = []; for (const sel of candidates) { paragraphs = Array.from(document.querySelectorAll(sel)).map(p => p.textContent.trim().replace(/\\s+/g, ' ')).filter(t => t.length > 40 && !/PUBLICIDAD|SEGUIR LEYENDO|NEWSLETTER|Recibí.*noticias/i.test(t)); if (paragraphs.length >= 2) break; } const seen = new Set(); const text = paragraphs.filter(t => { if (seen.has(t)) return false; seen.add(t); return true; }).join('\\n'); if (text) return text; const json = Array.from(document.querySelectorAll('script[type=\"application/ld+json\"]')).map(s => { try { return JSON.parse(s.textContent); } catch(e) { return null; } }).filter(Boolean); const walk = x => { if (!x) return ''; if (Array.isArray(x)) { for (const i of x) { const r = walk(i); if (r) return r; } } else if (typeof x === 'object') { const t = x['@type']; if ((Array.isArray(t) ? t : [t]).some(v => /NewsArticle|Article/i.test(String(v || ''))) && x.articleBody) return x.articleBody.toString().trim(); for (const k of Object.keys(x)) { const r = walk(x[k]); if (r) return r; } } return ''; }; return walk(json); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "texto_url",
            "selector": "window.location.href",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 1560,
      "position_y": 260,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 156,
      "width": 1040,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "sleep-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1128,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1488,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort Infobae keyword article scraper equivalent to the Octoparse template preview. It extracts Source_URL, PalabraClave, Título, Subtítulo, Autor, FechaPublicado, Texto, and TextoUrl from Infobae article detail pages for the keyword 'educación'. Navigation uses a multi-URL loop over the keyword-matching article URLs shown in the Octoparse sample and appends all rows to one CSV. Infobae pages may keep ads/background resources active, so this template avoids wait-for-page-load and instead waits for the article h1. Author and publication date are extracted primarily from JSON-LD article metadata; date falls back to article meta tags or the URL date path. To scrape more keyword results, extend navigate.urls[] and update the PalabraClave JS constant.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (source_url, palabra_clave, titulo, subtitulo, autor). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 240,
      "width": 340,
      "height": 131,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}