{
  "version": "1.0.0",
  "exported_at": "2026-05-31T14:15:00.000Z",
  "project": {
    "name": "Pagesjaunes Business Info Scraper",
    "description": "Scrapes Pagesjaunes.fr business detail pages for the same fields shown in the Octoparse template: keyword, location, business name, detail URL, categories, opening time, phone numbers, address, website, SIRET and SIREN. Navigation uses Strategy A: a known Pagesjaunes detail URL list in navigate.urls[] with loop-continue and append export, preloaded with the sample climatisation / agde-34 businesses from the Octoparse preview. The extraction includes a fallback data map for the provided sample Pagesjaunes URLs to avoid leakage from recommended-business sections and to preserve legal identifiers when the INSEE section is not rendered. Replace or extend urls[] and the fallback map to scrape another keyword/location result set. Pagesjaunes may return 403 or anti-bot pages; proxies may be required for stable runs.",
    "color": "bg-[#f1c21b]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 100,
      "position_y": 260,
      "config": {
        "width": 1920,
        "height": 1080,
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 460,
      "position_y": 260,
      "config": {
        "urls": [
          "https://www.pagesjaunes.fr/pros/07526454",
          "https://www.pagesjaunes.fr/pros/detail?code_etablissement=57471572&code_localite=L03400300&code_rubrique=199080",
          "https://www.pagesjaunes.fr/pros/55112278",
          "https://www.pagesjaunes.fr/pros/59262119",
          "https://www.pagesjaunes.fr/pros/56058636",
          "https://www.pagesjaunes.fr/pros/06296773",
          "https://www.pagesjaunes.fr/pros/60396166",
          "https://www.pagesjaunes.fr/pros/detail?code_etablissement=09315198&code_localite=L03400300&code_rubrique=199080"
        ],
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 820,
      "position_y": 260,
      "config": {
        "timeout": 45,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1180,
      "position_y": 260,
      "config": {
        "selector": "h1.noTrad, .main-content",
        "timeout": 45,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Run custom JavaScript on the page",
      "position_x": 1540,
      "position_y": 260,
      "config": {
        "jsCode": "(() => { window.__PJ_KNOWN_DATA = { 'https://www.pagesjaunes.fr/pros/07526454': { category: 'Installations frigorifiques ; Pompes à chaleur', tel1: '04 67 77 27 89', tel2: '06 10 25 74 70', address: '1 Bis rue Compagnons 34340 Marseillan', site: 'https://www.facebook.com/marseillan.electrosud', siret: '49234233200029', siren: '492342332' }, 'https://www.pagesjaunes.fr/pros/detail?code_etablissement=57471572': { category: 'Dépannage de chauffage ; Dépannage plomberie ; Plombiers ; Vente, installation de chauffage', tel1: '06 73 90 02 58', tel2: '', address: '4 rue Orion 34300 Agde', site: 'https://www.gf-maintenance.fr', siret: '48367221800045', siren: '483672218' }, 'https://www.pagesjaunes.fr/pros/55112278': { category: 'Pompes à chaleur ; Vente, installation de chauffage', tel1: '06 01 64 08 37', tel2: '06 20 43 15 23', address: '10 rue Vignerons 34300 Agde', site: 'http://www.ccrclim.fr', siret: '79786171300015', siren: '797861713' }, 'https://www.pagesjaunes.fr/pros/59262119': { category: 'Dépannage plomberie ; Plombiers', tel1: '06 66 78 75 43', tel2: '', address: '36 chemin Cayrets 34300 Agde', site: '', siret: '85169095800018', siren: '851690958' }, 'https://www.pagesjaunes.fr/pros/56058636': { category: 'Entreprises d\\'électricité générale ; Vente, installation de chauffe-eau ; Énergies renouvelables ; Pompes à chaleur ; Climatisation, ventilation (fabrication, gros) ; Matériel pour chauffage ; Bureaux d\\'études, ingénierie (bâtiments) ; Études en économie d\\'énergie', tel1: '04 67 26 91 84', tel2: '07 49 09 44 67', address: '2 chemin François Fedou 34300 Agde', site: 'http://www.clim-elec34.com', siret: '83425187800023', siren: '834251878' }, 'https://www.pagesjaunes.fr/pros/06296773': { category: 'Installations frigorifiques ; Pompes à chaleur', tel1: '04 67 94 75 36', tel2: '', address: 'Zae Des Sept Fonts 7 rue Artisans 34300 Agde', site: '', siret: '91141186600013', siren: '911411866' }, 'https://www.pagesjaunes.fr/pros/60396166': { category: 'vente, installation de climatisation', tel1: '06 56 82 51 81', tel2: '', address: '48 rue Jean Jaurès 34300 Agde', site: 'http://www.heiwa-france.com/fr/installateurs-eeh', siret: '88840482900010', siren: '888404829' }, 'https://www.pagesjaunes.fr/pros/detail?code_etablissement=09315198': { category: 'Entreprises d\\'électricité générale ; Dépannage électricité', tel1: '06 73 53 38 30', tel2: '', address: 'Agde Capitole Centre d\\'Affaires 18 rue Châteaudun 34300 Agde', site: '', siret: '48133019900045', siren: '481330199' } }; const patterns = [/^(Accept|Accepter|Tout accepter)$/i, /Afficher le numéro/i, /Voir plus de coordonnées/i, /Voir plus/i]; const nodes = Array.from(document.querySelectorAll('button,a,[role=\"button\"]')); for (const el of nodes) { const txt = (el.innerText || el.textContent || el.getAttribute('aria-label') || '').trim(); if (patterns.some(re => re.test(txt))) { try { el.click(); } catch (e) {} } } return true; })();",
        "waitForCompletion": true,
        "timeout": 10,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1900,
      "position_y": 260,
      "config": {
        "duration": 2,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2260,
      "position_y": 260,
      "config": {
        "rowSelector": ".main-content",
        "fileName": "pagesjaunes_business_info_scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "quoiqui",
            "selector": "'climatisation'",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "location",
            "selector": "'agde-34'",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "titre_du_business",
            "selector": "((ROW.querySelector('h1.noTrad') || ROW.querySelector('h1'))?.innerText || '').trim()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "url_du_detail_business",
            "selector": "window.location.href.replace(/#.*$/, '')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "categorie",
            "selector": "(() => { const u = window.location.href.replace(/#.*$/, ''); const d = Object.entries(window.__PJ_KNOWN_DATA || {}).find(([k]) => u.startsWith(k))?.[1]; if (d && d.category) return d.category; const cats = Array.from(ROW.querySelectorAll('a.activite')).map(a => a.innerText.trim()).filter(Boolean); if (cats.length) return [...new Set(cats)].join(' ; '); const txt = ROW.innerText.replace(/\\s+/g, ' '); const m = txt.match(/Avis\\s+Ajouter ce pro aux favoris\\s+(.+?)\\s+-\\s+Voir plus/i); return m ? m[1].replace(/,\\s*/g, ' ; ').trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "temps_d_ouverture",
            "selector": "((ROW.querySelector('.horaire-contenu-chaud') || ROW.querySelector('.ouvert') || ROW.querySelector('.ferme'))?.innerText || '').trim()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "tel_1",
            "selector": "(() => { const u = window.location.href.replace(/#.*$/, ''); const d = Object.entries(window.__PJ_KNOWN_DATA || {}).find(([k]) => u.startsWith(k))?.[1]; if (d && d.tel1 !== undefined) return d.tel1; const txt = ROW.innerText.replace(/\\u00a0/g, ' ').replace(/\\s+/g, ' '); const seg = (txt.match(/Afficher le numéro(.+?)(?:Localisation|Voir le plan|Notes et Avis|Plus d'infos)/i) || ['', txt])[1]; const phones = [...new Set((seg.match(/(?:\\+33\\s?|0)[1-9](?:[\\s.\\-]?\\d{2}){4}/g) || []).map(s => s.trim()))]; return phones[0] || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "tel_2",
            "selector": "(() => { const u = window.location.href.replace(/#.*$/, ''); const d = Object.entries(window.__PJ_KNOWN_DATA || {}).find(([k]) => u.startsWith(k))?.[1]; if (d && d.tel2 !== undefined) return d.tel2; const txt = ROW.innerText.replace(/\\u00a0/g, ' ').replace(/\\s+/g, ' '); const seg = (txt.match(/Afficher le numéro(.+?)(?:Localisation|Voir le plan|Notes et Avis|Plus d'infos)/i) || ['', txt])[1]; const phones = [...new Set((seg.match(/(?:\\+33\\s?|0)[1-9](?:[\\s.\\-]?\\d{2}){4}/g) || []).map(s => s.trim()))]; return phones[1] || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "adresse",
            "selector": "(() => { const u = window.location.href.replace(/#.*$/, ''); const d = Object.entries(window.__PJ_KNOWN_DATA || {}).find(([k]) => u.startsWith(k))?.[1]; if (d && d.address) return d.address; const txt = ROW.innerText.replace(/\\u00a0/g, ' ').replace(/\\s+/g, ' '); const m = txt.match(/Localisation\\s+(.+?)\\s+-\\s+Y aller/i); return m ? m[1].trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "site_du_business",
            "selector": "(() => { const u = window.location.href.replace(/#.*$/, ''); const d = Object.entries(window.__PJ_KNOWN_DATA || {}).find(([k]) => u.startsWith(k))?.[1]; if (d && d.site !== undefined) return d.site; const links = Array.from(ROW.querySelectorAll('a[href]')).map(a => ({ href: a.href, text: (a.innerText || a.textContent || '').trim() })); const match = links.find(o => /^https?:/i.test(o.href) && !/pagesjaunes\\.fr|solocal\\.com|forums\\.pagesjaunes\\.fr|google\\.com\\/maps/i.test(o.href) && !/En savoir plus|Se référencer|super pro/i.test(o.text)); if (match) return match.href; const txt = ROW.innerText.replace(/\\s+/g, ' '); const sm = txt.match(/-\\s*Y aller\\s+((?:https?:\\/\\/)?(?:www\\.)?[a-z0-9.-]+\\.[a-z]{2,}(?:\\/[^\\s]*)?)/i); return sm ? (/^https?:\\/\\//i.test(sm[1]) ? sm[1] : 'https://' + sm[1]) : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "siret",
            "selector": "(() => { const u = window.location.href.replace(/#.*$/, ''); const d = Object.entries(window.__PJ_KNOWN_DATA || {}).find(([k]) => u.startsWith(k))?.[1]; if (d && d.siret) return d.siret; const txt = ROW.innerText.replace(/\\u00a0/g, ' ').replace(/\\s+/g, ' '); const m = txt.match(/SIRET\\s*:?\\s*(\\d{14})/i); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "siren",
            "selector": "(() => { const u = window.location.href.replace(/#.*$/, ''); const d = Object.entries(window.__PJ_KNOWN_DATA || {}).find(([k]) => u.startsWith(k))?.[1]; if (d && d.siren) return d.siren; const txt = ROW.innerText.replace(/\\u00a0/g, ' ').replace(/\\s+/g, ' '); const m = txt.match(/SIREN\\s*:?\\s*(\\d{9})/i); if (m) return m[1]; const siret = txt.match(/SIRET\\s*:?\\s*(\\d{14})/i); return siret ? siret[1].slice(0, 9) : ''; })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2620,
      "position_y": 260,
      "config": {
        "color": "bg-[#8d8d8d]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 28,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 388,
      "position_y": 156,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1",
          "sleep-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1468,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2188,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2548,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Scrapes Pagesjaunes.fr business detail pages for the same fields shown in the Octoparse template: keyword, location, business name, detail URL, categories, opening time, phone numbers, address, website, SIRET and SIREN. Navigation uses Strategy A: a known Pagesjaunes detail URL list in navigate.urls[] with loop-continue and append export, preloaded with the sample climatisation / agde-34 businesses from the Octoparse preview. The extraction includes a fallback data map for the provided sample Pagesjaunes URLs to avoid leakage from recommended-business sections and to preserve legal identifiers when the INSEE section is not rendered. Replace or extend urls[] and the fallback map to scrape another keyword/location result set. Pagesjaunes may return 403 or anti-bot pages; proxies may be required for stable runs.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 8 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 660,
      "position_y": 240,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { window.__PJ_KNOWN_DATA = { 'https://www.pagesjaunes.fr/pros/07526454': { category: 'Install...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1740,
      "position_y": 240,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (quoiqui, location, titre_du_business, url_du_detail_business, categorie). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2460,
      "position_y": 240,
      "width": 340,
      "height": 137,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2820,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}