{
  "version": "1.0.0",
  "exported_at": "2026-06-03T00:00:00.000Z",
  "project": {
    "name": "Informador Scraper",
    "description": "Extracts Informador.mx news article data equivalent to the Octoparse Informador template: section, secondary section, title, subtitle, author, publication date, cleaned article text, topics, and related Lee También links. Navigation strategy: multi-URL article batch using navigate.urls[] plus loop-continue with append output. Add more keyword-discovered Informador article URLs to navigate.urls[] to scrape additional articles; no search-results pagination page was provided in the analysis. Uses a clean snake_case output filename to avoid previously appended test rows.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 240,
      "config": {
        "urls": [
          "https://www.informador.mx/jalisco/Guadalajara-es-una-de-las-mejores-ciudades-en-el-mundo-para-salir-de-noche-de-acuerdo-con-Time-Out-20240820-0103.html",
          "https://www.informador.mx/jalisco/Habra-sancion-economica-en-Jalisco-para-usuarios-de-patines-electricos-que-excedan-velocidad-20240820-0094.html"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 240,
      "config": {
        "timeout": 30,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 240,
      "config": {
        "selector": "article.news-full",
        "timeout": 30,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1200,
      "position_y": 240,
      "config": {
        "rowSelector": "article.news-full",
        "fileName": "informador_mx_scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "seccion",
            "selector": ".news-section",
            "attribute": "text"
          },
          {
            "name": "seccion_secundaria",
            "selector": "(() => { const label = ROW.querySelector('.news-label'); if (!label) return ''; return label.textContent.replace('|', '').replace(/\\s+/g, ' ').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "titulo",
            "selector": "header.news-header h1.news-title",
            "attribute": "text"
          },
          {
            "name": "subtitulo",
            "selector": ".news-excerpt",
            "attribute": "text"
          },
          {
            "name": "autor",
            "selector": ".news-author a",
            "attribute": "text"
          },
          {
            "name": "fecha_publicado",
            "selector": "time.news-date",
            "attribute": "text"
          },
          {
            "name": "texto",
            "selector": "(() => { const skipSelector = 'header.news-header,.news-photogallery,.news-share,.news-tags,.tags,.newsletter,.mod,.banner,.trc_related_container,.OUTBRAIN,.taboola,.videoCube,.ad,.st-placement,.news-related,script,style'; const paragraphs = Array.from(ROW.querySelectorAll('p')).filter(p => !p.closest(skipSelector) && !p.closest('.news-excerpt') && !p.closest('.news-author')); const values = paragraphs.map(p => p.textContent.replace(/\\s+/g, ' ').trim()).filter(t => t && !/^Por:/i.test(t) && !/^(OB|FS|MV)$/i.test(t) && !/Mantente al día con las noticias/i.test(t) && !/únete a nuestro canal de WhatsApp/i.test(t) && !/canal de WhatsApp/i.test(t) && !/Registrarse implica aceptar/i.test(t) && !/Todo lo que necesitas saber/i.test(t) && !/Recibe las últimas noticias/i.test(t)); let text = Array.from(new Set(values)).join(' '); text = text.replace(/\\*\\s*\\*\\s*\\*\\s*Mantente al día con las noticias[\\s\\S]*$/i, '').replace(/\\s+(OB|FS|MV)\\s*$/i, '').replace(/\\s+/g, ' ').trim(); return text; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "temas",
            "selector": "(() => { const meta = document.querySelector('meta[name=\"keywords\"]')?.content || document.querySelector('meta[name=\"news_keywords\"]')?.content || ''; const attr = ROW.getAttribute('data-tags') || ROW.getAttribute('data-stags') || ''; return (meta || attr).split(',').map(t => t.trim()).filter(Boolean).join(', '); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "lee_tambien",
            "selector": "(() => { const current = location.href.replace(/#.*$/, ''); const sameSiteArticle = href => { try { const u = new URL(href, location.origin); return u.origin === location.origin && /\\.html($|[?#])/.test(u.href) && u.href.replace(/#.*$/, '') !== current && !/terminos-y-condiciones|aviso|privacy|privacidad/i.test(u.href); } catch (e) { return false; } }; const clean = href => new URL(href, location.origin).href; const badContainers = 'header.news-header,.news-share,.newsletter,.taboola,.OUTBRAIN,.trc_related_container,.banner,.ad,.st-placement'; const headings = Array.from(ROW.querySelectorAll('h2,h3')).filter(h => /Lee\\s+También/i.test(h.textContent || '')); let links = []; for (const h of headings) { let n = h.nextElementSibling; let guard = 0; while (n && guard < 10 && !/^H2$|^H3$/i.test(n.tagName)) { links.push(...Array.from(n.querySelectorAll ? n.querySelectorAll('a[href]') : []).filter(a => !a.closest(badContainers)).map(a => a.getAttribute('href')).filter(sameSiteArticle).map(clean)); n = n.nextElementSibling; guard++; } const container = h.closest('section,div'); if (container && container !== ROW) { links.push(...Array.from(container.querySelectorAll('a[href]')).filter(a => !a.closest(badContainers)).map(a => a.getAttribute('href')).filter(sameSiteArticle).map(clean)); } } if (!links.length) { links = Array.from(ROW.querySelectorAll('a[href]')).filter(a => !a.closest(badContainers)).map(a => a.getAttribute('href')).filter(sameSiteArticle).map(clean); } return Array.from(new Set(links)).join(' | '); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "url",
            "selector": "location.href",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 1560,
      "position_y": 240,
      "config": {
        "color": "bg-[#8d8d8d]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 136,
      "width": 1040,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1128,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1488,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts Informador.mx news article data equivalent to the Octoparse Informador template: section, secondary section, title, subtitle, author, publication date, cleaned article text, topics, and related Lee También links. Navigation strategy: multi-URL article batch using navigate.urls[] plus loop-continue with append output. Add more keyword-discovered Informador article URLs to navigate.urls[] to scrape additional articles; no search-results pagination page was provided in the analysis. Uses a clean snake_case output filename to avoid previously appended test rows.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 2 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 320,
      "position_y": 220,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (seccion_secundaria, texto, temas, lee_tambien, url). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 220,
      "width": 340,
      "height": 130,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 220,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}