{
  "version": "1.0.0",
  "exported_at": "2026-05-31T20:35:00.000Z",
  "project": {
    "name": "Yellow Pages Spain ScraperListing",
    "description": "Scrapes Páginas Amarillas España search/listing result pages for business title, profile URL, opening status, category, description, phone, website, street, postal code, city, and scrape timestamp. Uses predictable numbered listing URLs for pages 1-10 with navigate.urls[] and loop-continue, avoiding unreliable next-button loops. Extend the urls[] list if the search has more than 10 pages. A JavaScript preprocessing step marks one unique listing card per business to avoid duplicate opinion/map/contact links. Cookie banners are handled best-effort; CAPTCHA must be solved manually if shown.",
    "color": "bg-[#f1c21b]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 260,
      "config": {
        "urls": [
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/1?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true",
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/2?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true",
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/3?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true",
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/4?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true",
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/5?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true",
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/6?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true",
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/7?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true",
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/8?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true",
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/9?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true",
          "https://www.paginasamarillas.es/search/all-ac/all-ma/all-pr/all-is/all-ci/all-ba/all-pu/all-nc/10?what=abierto+dia+y+noche&where=ss&ub=false&aprob=0.0&nprob=1.0&qc=true"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 260,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Run custom JavaScript on the page",
      "position_x": 840,
      "position_y": 260,
      "config": {
        "jsCode": "(()=>{try{if(window.Didomi&&Didomi.setUserAgreeToAll){Didomi.setUserAgreeToAll();}}catch(e){}try{document.querySelectorAll('#didomi-host,.didomi-popup-backdrop,.didomi-notice,.didomi-consent-popup,[class*=\"didomi\" i],[class*=\"consent\" i]').forEach(el=>{if(el&&el.parentNode)el.parentNode.removeChild(el);});document.body.classList.remove('didomi-popup-open','didomi-popup-open-ios');document.documentElement.style.overflow='';document.body.style.overflow='';}catch(e){}const clean=s=>(s||'').replace(/\\s+/g,' ').trim();const validTitle=t=>t&&!/^(Danos tu opinión|Contactar|Llamar|Cómo ir|Ver|Ver teléfono|Sitio Web|Mapa|Opiniones)$/i.test(t)&&!/^[+\\d\\s]{7,}$/.test(t);document.querySelectorAll('.uscraper-pa-card').forEach(el=>{el.classList.remove('uscraper-pa-card');});const links=Array.from(document.querySelectorAll('a[href*=\"/f/\"][href*=\".html\"]')).filter(a=>{const raw=a.getAttribute('href')||'';const t=clean(a.textContent).replace(/\\+info/ig,'').trim();return !raw.includes('#')&&!raw.includes('?gm')&&!raw.includes('consultar')&&!raw.includes('mapa')&&validTitle(t);});const seen=new Set();let count=0;for(const link of links){const url=(link.href||link.getAttribute('href')||'').split('#')[0].replace(/\\?gm.*/,'');if(!url||seen.has(url))continue;seen.add(url);let title=clean(link.textContent).replace(/\\+info/ig,'').replace(/Danos tu opinión/ig,'').trim();if(!validTitle(title))continue;let best=link;let e=link;for(let i=0;i<10&&e;i++,e=e.parentElement){const txt=clean(e.innerText||e.textContent);if(txt.length>clean(best.innerText||best.textContent).length&&txt.includes(title)){best=e;}if(txt.length>80&&(/Contactar|Ver teléfono|Sitio Web|\\b\\d{5}\\b|tel:/i.test(txt))){best=e;break;}}const card=best;const text=clean(card.innerText||card.textContent);const telEl=card.querySelector('a[href^=\"tel:\"]');const phone=telEl?clean((telEl.getAttribute('href')||'').replace(/^tel:/i,'')):(text.match(/(?:\\+34\\s*)?[6789]\\d{2}\\s?\\d{2}\\s?\\d{2}\\s?\\d{2}|(?:\\+34\\s*)?[89]\\d{8}/)||[''])[0].replace(/\\s+/g,'');const allLinks=Array.from(card.querySelectorAll('a[href]'));const webLink=allLinks.find(a=>/sitio\\s*web|web/i.test(clean(a.textContent)+' '+(a.className||'')+' '+(a.title||'')))||allLinks.find(a=>{try{const raw=a.getAttribute('href')||'';const u=new URL(a.href,location.href);return !/^tel:|^mailto:|^javascript:/i.test(raw)&&!/paginasamarillas\\.es|beedigital\\.es/i.test(u.hostname);}catch(e){return false;}});const website=webLink?(webLink.href||''):'';const explicitCat=Array.from(card.querySelectorAll('[class*=\"categoria\" i],[class*=\"category\" i],[class*=\"actividad\" i],[itemprop=\"description\"]')).map(x=>clean(x.textContent)).find(x=>x&&x.length>2);let beforeContact=text.replace(title,'').replace(/\\+info|Danos tu opinión/ig,' ').trim().split(/\\b(Contactar|Ver teléfono|Ver|Llamar|Sitio Web|Cómo ir|\\d{5})\\b/i)[0].trim();const category=explicitCat||beforeContact.slice(0,240);let desc=Array.from(card.querySelectorAll('[class*=\"descripcion\" i],[class*=\"description\" i],[class*=\"desc\" i],[class*=\"extract\" i],p')).map(x=>clean(x.textContent)).find(x=>x.length>20&&!/cookies|opinión/i.test(x))||category;let status='';const statusEl=Array.from(card.querySelectorAll('.badge,[class*=\"horario\" i],[class*=\"time\" i],[class*=\"open\" i],[class*=\"estado\" i],span,strong')).map(x=>clean(x.textContent)).find(x=>/\\b(Abierto|Cerrado)\\b/i.test(x)&&clean(x).toLowerCase()!==title.toLowerCase());if(statusEl)status=statusEl;let street='',postal='',city='';const cpMatch=text.match(/\\b(\\d{5})\\b/);if(cpMatch){postal=cpMatch[1];const idx=text.indexOf(postal);let before=text.slice(Math.max(0,idx-130),idx).replace(/.*?(Danos tu opinión|Contactar|Ver teléfono|Ver|Llamar|Sitio Web|Electrodomésticos: reparación|Restaurantes?|Averías?|Cerrajeros?)/i,'').replace(/^[A-Z]\\s+/,'').trim();street=before.replace(/[,;\\s]+$/,'');let after=text.slice(idx+5,idx+80).replace(/^[,;\\s]+/,'').trim();after=after.split(/\\s+(Contactar|Ver|Llamar|Sitio Web|Cómo ir|NAVARRA|GIPUZKOA|LA RIOJA|BARCELONA|MADRID|VALENCIA|SEVILLA|MÁLAGA|ZARAGOZA)\\b/i)[0].trim();city=after;}card.classList.add('uscraper-pa-card');card.setAttribute('data-pa-title',title);card.setAttribute('data-pa-url',url);card.setAttribute('data-pa-status',status);card.setAttribute('data-pa-category',category);card.setAttribute('data-pa-description',desc);card.setAttribute('data-pa-phone',phone);card.setAttribute('data-pa-website',website);card.setAttribute('data-pa-street',street);card.setAttribute('data-pa-postal',postal);card.setAttribute('data-pa-city',city);count++;}return count;})()",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1200,
      "position_y": 260,
      "config": {
        "duration": 1
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1560,
      "position_y": 260,
      "config": {
        "selector": ".uscraper-pa-card",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1920,
      "position_y": 260,
      "config": {
        "rowSelector": ".uscraper-pa-card",
        "fileName": "paginas-amarillas-listados-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "titulo",
            "selector": "ROW.getAttribute('data-pa-title')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "url",
            "selector": "ROW.getAttribute('data-pa-url')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "horario",
            "selector": "ROW.getAttribute('data-pa-status')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "categoria",
            "selector": "ROW.getAttribute('data-pa-category')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "descripcion",
            "selector": "ROW.getAttribute('data-pa-description')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "telefono",
            "selector": "ROW.getAttribute('data-pa-phone')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "sitio_web",
            "selector": "ROW.getAttribute('data-pa-website')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "calle",
            "selector": "ROW.getAttribute('data-pa-street')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "codigo_postal",
            "selector": "ROW.getAttribute('data-pa-postal')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "ciudad",
            "selector": "ROW.getAttribute('data-pa-city')||''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "hora_actual",
            "selector": "new Date().toISOString()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2280,
      "position_y": 260,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 156,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 768,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1848,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2208,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Scrapes Páginas Amarillas España search/listing result pages for business title, profile URL, opening status, category, description, phone, website, street, postal code, city, and scrape timestamp. Uses predictable numbered listing URLs for pages 1-10 with navigate.urls[] and loop-continue, avoiding unreliable next-button loops. Extend the urls[] list if the search has more than 10 pages. A JavaScript preprocessing step marks one unique listing card per business to avoid duplicate opinion/map/contact links. Cookie banners are handled best-effort; CAPTCHA must be solved manually if shown.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(()=>{try{if(window.Didomi&&Didomi.setUserAgreeToAll){Didomi.setUserAgreeToAll();}}catch(e){}try{doc...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1040,
      "position_y": 240,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (titulo, url, horario, categoria, descripcion). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 240,
      "width": 340,
      "height": 128,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}