{
  "version": "1.0.0",
  "exported_at": "2026-06-02T07:25:00.000Z",
  "project": {
    "name": "Pagine Gialle Shop Detail Scraper",
    "description": "Scrapes PagineGialle.it shop detail pages from a preconfigured list of shop page URLs. One row is exported per valid shop page with shop URL, name, address, phone, descriptions, services, opening hours, products, VAT/tax IDs, categories, rating, review count, and image URL. Navigation uses navigate.urls[] plus loop-continue so multiple supplied shop detail URLs are processed. Deleted/removed PagineGialle pages are detected and skipped to avoid junk rows. Extraction uses JSON-LD/meta data plus filtered page text fallbacks.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "urls": [
          "https://www.paginegialle.it/pizzeria-bisteccheria-triticum-roma",
          "https://www.paginegialle.it/pizzeria-ristorante-la-ruota",
          "https://www.paginegialle.it/ristorante-giapponese-daifuku-roma",
          "https://www.paginegialle.it/ristorante-sant-anna-roma",
          "https://www.paginegialle.it/ristorantetoma-roma",
          "https://www.paginegialle.it/ristorante-citta-d-oriente-roma"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 456,
      "position_y": 220,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 792,
      "position_y": 220,
      "config": {
        "jsCode": "(()=>{const clean=s=>(s||'').replace(/\\s+/g,' ').trim().toLowerCase();const positive=['accetta','accetto','accept','accept all','consenti','continua','ok'];const forbidden=['cookie policy','privacy','informativa','preferenze','personalizza','gestisci','maggiori informazioni','leggi'];const candidates=[...document.querySelectorAll('button,input[type=button],input[type=submit],[role=button]')].filter(e=>e.offsetParent!==null||e.getClientRects().length);for(const e of candidates){const txt=clean(e.innerText||e.value||e.getAttribute('aria-label')||e.id||e.className);if(!txt)continue;if(forbidden.some(w=>txt.includes(w)))continue;if(positive.some(w=>txt===w||txt.includes(w))){e.click();return 'clicked safe consent button: '+txt;}}return 'no safe consent button clicked';})()",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1128,
      "position_y": 220,
      "config": {
        "duration": 2
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1464,
      "position_y": 220,
      "config": {
        "selector": "body",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "text-contains-1",
      "block_type": "process",
      "title": "Text Contains",
      "description": "Check if page contains text",
      "position_x": 1800,
      "position_y": 220,
      "config": {
        "selector": "body",
        "text": "La pagina che stai cercando è stata eliminata",
        "caseSensitive": false,
        "timeout": 5
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2136,
      "position_y": 520,
      "config": {}
    },
    {
      "block_id": "inject-javascript-2",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1800,
      "position_y": 520,
      "config": {
        "jsCode": "(()=>{const clean=s=>(s||'').replace(/\\s+/g,' ').trim();const abs=u=>{if(!u)return '';try{return new URL(u,location.href).href;}catch(e){return u;}};const uniq=a=>[...new Set(a.map(clean).filter(Boolean))];const text=document.body.innerText||'';const lines=text.split(/\\n+/).map(clean).filter(Boolean);const data=[...document.querySelectorAll('script[type=\"application/ld+json\"]')].flatMap(s=>{try{const j=JSON.parse(s.textContent);return Array.isArray(j)?j:(j&&Array.isArray(j['@graph'])?j['@graph']:[j]);}catch(e){return[];}});const typeOf=o=>Array.isArray(o?.['@type'])?o['@type'].join(' '):String(o?.['@type']||'');const biz=data.find(o=>o&&typeof o==='object'&&/LocalBusiness|Restaurant|FoodEstablishment|Store|Organization/i.test(typeOf(o)))||data.find(o=>o&&o.name&&o.address)||{};const metaDesc=document.querySelector('meta[name=description]')?.content||[...document.querySelectorAll('meta')].find(m=>m.getAttribute('property')==='og:description')?.content||'';const ogTitle=[...document.querySelectorAll('meta')].find(m=>m.getAttribute('property')==='og:title')?.content||'';const canonical=document.querySelector('link[rel=canonical]')?.href||location.href;const isBadLabel=v=>/^(informazioni attività|caratteristiche e servizi|prodotti|categorie|informazioni azienda|sito internet|informativa privacy|per informazioni|modifica orari|recensioni|telefono|sito web|rivendicala gratis|scrivi una recensione|paginegialle|accedi|registrati|farmacie|supermercati|ristoranti|estetisti|parrucchieri|medici|dentisti|vedi tutte?|preventivi casa|dove mangiare|salute e bellezza|professionisti|muoversi e viaggiare|dove dormire|italiano|tedesco|cerca|aziende|apre|chiuso|-|\\(\\d+\\))$/i.test(clean(v));const isStop=v=>/^(categorie|informazioni azienda|sito internet|p\\.?\\s*iva|codice fiscale|informativa privacy|recensioni|scrivi una recensione|paginegialle)$/i.test(clean(v));const goodItem=v=>{v=clean(v).replace(/,+$/,'');if(!v||v.length>90)return '';if(isBadLabel(v)||isStop(v))return '';if(/[0-9]{1,2}:[0-9]{2}/.test(v))return '';if(/^(lunedì|martedì|mercoledì|giovedì|venerdì|sabato|domenica|mo |tu |we |th |fr |sa |su )/i.test(v))return '';if(/^[0-9]+$/.test(v))return '';if(/^(tempio di mecenate|fantasie napoletane|ristorante il casaletto|pizzium|pizza in trevi|antica osteria|ristorante era ora)/i.test(v))return '';return v;};const collectAfter=re=>{const idx=lines.findIndex(l=>re.test(l));if(idx<0)return[];const out=[];for(let i=idx+1;i<lines.length&&out.length<25;i++){const l=lines[i];if(isStop(l))break;const g=goodItem(l);if(g)out.push(g);}return uniq(out);};let services=collectAfter(/^caratteristiche e servizi$/i);if(!services.length)services=collectAfter(/^informazioni attività$/i);if(!services.length)services=collectAfter(/^modifica orari$/i);services=services.filter(v=>!/^(pizzerie|ristoranti)$/i.test(v));const products=collectAfter(/^prodotti$/i).filter(v=>!/^specialità$/i.test(v));const address=(()=>{const a=biz.address;if(a){if(typeof a==='string')return clean(a);return clean([a.streetAddress,a.postalCode,a.addressLocality,a.addressRegion].filter(Boolean).join(' '));}return clean(document.querySelector('[itemprop=address],address,[class*=indirizzo],[class*=address]')?.textContent);})();const phone=clean(String(biz.telephone||document.querySelector('a[href^=tel]')?.getAttribute('href')||document.querySelector('[class*=telefono],[class*=phone],[data-phone]')?.textContent||'').replace(/^tel:/i,''));const opening=(()=>{const oh=biz.openingHours||biz.openingHoursSpecification;if(Array.isArray(oh))return JSON.stringify(oh.map(x=>typeof x==='string'?x:clean([].concat(x.dayOfWeek||[]).join(', ')+' '+(x.opens||'')+' - '+(x.closes||''))));if(typeof oh==='string')return oh;const dom=[...document.querySelectorAll('time,[class*=orari],[class*=opening],[class*=hours] li')].map(e=>clean(e.textContent)).filter(Boolean);return dom.length?JSON.stringify(uniq(dom)):'';})();const flat=text.replace(/\\s+/g,' ');const piva=(flat.match(/P\\.?\\s*I\\.?\\s*V\\.?\\s*A\\.?\\s*[:\\-]?\\s*([A-Z0-9]{8,16})/i)||[])[1]||'';const cf=(flat.match(/Codice\\s+fiscale\\s*[:\\-]?\\s*([A-Z0-9]{8,20})/i)||flat.match(/C\\.?\\s*F\\.?\\s*[:\\-]?\\s*([A-Z0-9]{8,20})/i)||[])[1]||'';const category=(()=>{const c=biz.category||biz.additionalType;if(Array.isArray(c))return c.map(clean).filter(Boolean).join(', ');if(c)return clean(String(c));const seg=location.pathname.split('/').filter(Boolean);const fromPath=seg.length>1?seg[1]:'';return fromPath?fromPath.replace(/-/g,' ').replace(/\\b\\w/g,m=>m.toUpperCase()):'';})();const rating=(()=>{const r=biz.aggregateRating?.ratingValue||document.querySelector('[itemprop=ratingValue]')?.getAttribute('content')||document.querySelector('[itemprop=ratingValue]')?.textContent||'';const m=clean(String(r)).match(/[0-9]+([\\.,][0-9]+)?/);return m?m[0].replace(',','.'):'';})();const reviews=(()=>{const r=biz.aggregateRating?.reviewCount||biz.aggregateRating?.ratingCount||document.querySelector('[itemprop=reviewCount]')?.getAttribute('content')||document.querySelector('[itemprop=reviewCount]')?.textContent||'';const m=clean(String(r)).match(/[0-9]+/);if(m)return m[0];const line=lines.find(l=>/recension/i.test(l)&&/[0-9]+/.test(l));return (line?.match(/[0-9]+/)||[])[0]||'';})();const image=(()=>{const valid=u=>{u=abs(u);return u&&!/maps|logo|sprite|icon|placeholder|superheader-footer|google-play|app-store|FB_Scheda_Azienda/i.test(u);};const img=biz.image;let candidates=[];if(Array.isArray(img))candidates=img.map(x=>typeof x==='string'?x:(x?.url||''));else if(typeof img==='string')candidates=[img];else if(img?.url)candidates=[img.url];candidates.push([...document.querySelectorAll('meta')].find(m=>m.getAttribute('property')==='og:image')?.content||'');candidates.push(...[...document.images].map(i=>i.currentSrc||i.src));return abs(candidates.find(valid)||'');})();const longDesc=(()=>{const schema=clean(biz.description);const heads=[...document.querySelectorAll('h2,h3,h4')];const h=heads.find(e=>/descrizione|chi siamo|presentazione/i.test(clean(e.textContent)));let vals=[];if(h){let n=h.nextElementSibling;let guard=0;while(n&&guard++<8&&!/^H[234]$/.test(n.tagName)){const t=clean(n.textContent);if(t&&!isBadLabel(t))vals.push(t);n=n.nextElementSibling;}}return clean(vals.join(' '))||schema||clean(metaDesc);})();window.__pgData={'URL_negozio':canonical,'Nome_negozio':clean(biz.name)||clean(document.querySelector('h1')?.textContent)||clean(ogTitle),'Indirizzo':address,'Numero_telefono':phone,'Astratto':clean(metaDesc),'Descrizione':longDesc,'Servizio1':services[0]||'','Servizio2':services[1]||'','Servizio3':services[2]||'','Orari_di_apertura':opening,'Caratteristiche_e_servizi':services.join(', '),'Prodotti':products.join(', '),'P_IVA':piva,'Codice_fiscale':cf,'Categorie':category,'Valutazione':rating,'Recensioni_totali':reviews,'URL_immagine':image};return 'pg data cached';})()",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2304,
      "position_y": 800,
      "config": {
        "rowSelector": "body",
        "fileName": "crawler_dettagli_negozi_paginegialle_v2.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "URL_negozio",
            "selector": "window.__pgData?.URL_negozio || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Nome_negozio",
            "selector": "window.__pgData?.Nome_negozio || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Indirizzo",
            "selector": "window.__pgData?.Indirizzo || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Numero_telefono",
            "selector": "window.__pgData?.Numero_telefono || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Astratto",
            "selector": "window.__pgData?.Astratto || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Descrizione",
            "selector": "window.__pgData?.Descrizione || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Servizio1",
            "selector": "window.__pgData?.Servizio1 || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Servizio2",
            "selector": "window.__pgData?.Servizio2 || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Servizio3",
            "selector": "window.__pgData?.Servizio3 || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Orari_di_apertura",
            "selector": "window.__pgData?.Orari_di_apertura || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Caratteristiche_e_servizi",
            "selector": "window.__pgData?.Caratteristiche_e_servizi || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Prodotti",
            "selector": "window.__pgData?.Prodotti || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "P_IVA",
            "selector": "window.__pgData?.P_IVA || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Codice_fiscale",
            "selector": "window.__pgData?.Codice_fiscale || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Categorie",
            "selector": "window.__pgData?.Categorie || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Valutazione",
            "selector": "window.__pgData?.Valutazione || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Recensioni_totali",
            "selector": "window.__pgData?.Recensioni_totali || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "URL_immagine",
            "selector": "window.__pgData?.URL_immagine || ''",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-2",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2640,
      "position_y": 800,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "text-contains-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "text-contains-1",
      "from_connector_id": "true",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "text-contains-1",
      "from_connector_id": "false",
      "to_block_id": "inject-javascript-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-2",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-2",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 1664,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 720,
      "position_y": 116,
      "width": 1328,
      "height": 596,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1",
          "inject-javascript-2"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1728,
      "position_y": 116,
      "width": 1160,
      "height": 876,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "text-contains-1",
          "loop-continue-1",
          "loop-continue-2"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2232,
      "position_y": 696,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Scrapes PagineGialle.it shop detail pages from a preconfigured list of shop page URLs. One row is exported per valid shop page with shop URL, name, address, phone, descriptions, services, opening hours, products, VAT/tax IDs, categories, rating, review count, and image URL. Navigation uses navigate.urls[] plus loop-continue so multiple supplied shop detail URLs are processed. Deleted/removed PagineGialle pages are detected and skipped to avoid junk rows. Extraction uses JSON-LD/meta data plus filtered page text fallbacks.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(()=>{const clean=s=>(s||'').replace(/\\s+/g,' ').trim().toLowerCase();const positive=['accetta','acc...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 992,
      "position_y": 200,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-text-contains-1",
      "element_type": "note",
      "title": "Note: Text Contains",
      "content": "Condition block: checks `body`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 2000,
      "position_y": 200,
      "width": 340,
      "height": 131,
      "z_index": 22,
      "data": {
        "block_id": "text-contains-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2336,
      "position_y": 500,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    },
    {
      "id": "note-block-inject-javascript-2",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(()=>{const clean=s=>(s||'').replace(/\\s+/g,' ').trim();const abs=u=>{if(!u)return '';try{return new...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 2000,
      "position_y": 500,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-2"
      }
    },
    {
      "id": "note-block-loop-continue-2",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 780,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-2"
      }
    }
  ]
}