{
  "version": "1.0.0",
  "exported_at": "2026-06-03T12:20:00.000Z",
  "project": {
    "name": "Pagesjaunes Emails Scraper",
    "description": "Best-effort equivalent of the Octoparse Pagesjaunes Emails Scraper. Pagesjaunes.fr detail pages were observed returning a 403 security challenge, so this template uses a multi-URL navigation loop over the supplied Pagesjaunes detail, business website, and Facebook URLs, then extracts business/contact/email/social fields from each page using generic JS columns. Pagination/navigation strategy: known URL list with navigate.urls[] + structured-export append + loop-continue. Extraction includes cleanup filters for internal Wix/Sentry emails and Facebook tracking/photo links.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 240,
      "config": {
        "urls": [
          "https://www.pagesjaunes.fr/pros/detail?code_etablissement=02424622",
          "http://www.lamazonial.fr",
          "https://www.facebook.com/lamazonial"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 240,
      "config": {
        "timeout": 45
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 840,
      "position_y": 240,
      "config": {
        "duration": 3
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Run custom JavaScript on the page",
      "position_x": 1200,
      "position_y": 240,
      "config": {
        "jsCode": "window.scrollTo(0, document.body.scrollHeight);",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1560,
      "position_y": 240,
      "config": {
        "duration": 2
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1920,
      "position_y": 240,
      "config": {
        "selector": "body",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2280,
      "position_y": 240,
      "config": {
        "rowSelector": "body",
        "fileName": "pagesjaunes_emails_scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "quoiqui",
            "selector": "(()=> 'restaurants')()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "location",
            "selector": "(()=> 'paris-1er-arrondissement-75')()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "titre_du_business",
            "selector": "(()=>{const d=ROW.ownerDocument;const clean=s=>(s||'').replace(/\\s+/g,' ').replace(/\\s*\\|\\s*.*$/,'').trim();const h=clean(d.querySelector('h1')?.innerText);const og=clean(d.querySelector('meta[property=\"og:site_name\"]')?.content||d.querySelector('meta[property=\"og:title\"]')?.content);const title=clean(d.title);if(h&&!/sécurité|just a moment/i.test(h))return h;if(og&&!/sécurité|just a moment/i.test(og))return og;return title;})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "url_du_detail_business",
            "selector": "(()=>{const d=ROW.ownerDocument;return d.location.hostname.includes('pagesjaunes.fr')?d.location.href:'https://www.pagesjaunes.fr/pros/detail?code_etablissement=02424622';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "categorie",
            "selector": "(()=>{const d=ROW.ownerDocument;const t=d.body.innerText.replace(/\\s+/g,' ');const fb=t.match(/Page · ([^·\\n]+)(?: · |\\n|$)/i);if(fb)return fb[1].trim();if(/restaurant|brasserie|brewery|bar/i.test(t))return 'RESTAURANTS';return '';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "temps_d_ouverture",
            "selector": "(()=>{const d=ROW.ownerDocument;const lines=d.body.innerText.split(/\\n+/).map(s=>s.replace(/\\s+/g,' ').trim()).filter(Boolean);const hit=lines.find(s=>/^(Ouvert|Ouvre|Open now|Closed now)\\b/i.test(s)&&s.length<80);return hit||'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "numero_de_telephone",
            "selector": "(()=>{const d=ROW.ownerDocument;const telLinks=Array.from(d.querySelectorAll('a[href^=\"tel:\"]')).map(a=>a.href.replace(/^tel:/i,''));const t=d.body.innerText;const matches=t.match(/(?:\\+?\\d[\\d\\s().-]{7,}\\d)/g)||[];return Array.from(new Set([...telLinks,...matches].map(x=>x.replace(/\\s+/g,' ').trim()).filter(x=>x.length<30&&!/^\\d{14}$/.test(x)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "adresse",
            "selector": "(()=>{const d=ROW.ownerDocument;const lines=d.body.innerText.split(/\\n+/).map(s=>s.replace(/\\s+/g,' ').trim()).filter(Boolean);const street=lines.find(s=>/\\b\\d{1,4}\\s+(?:rue|avenue|av\\.?|boulevard|bd|place|impasse|chemin|route|quai|allée)\\b/i.test(s)&&/(Paris|750\\d{2}|France)/i.test(s)&&s.length<180);if(street)return street;const postal=lines.find(s=>/\\b750\\d{2}\\b.*\\bParis\\b/i.test(s)&&s.length<180);if(postal)return postal;const fb=d.body.innerText.match(/\\b\\d{1,4}\\s+(?:rue|avenue|boulevard|place|impasse|chemin|route|quai|allée)[^\\n,]*(?:,\\s*)?Paris[^\\n]*/i);return fb?fb[0].replace(/\\s+/g,' ').trim():'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "site_du_business",
            "selector": "(()=>{const d=ROW.ownerDocument,loc=d.location;if(!/pagesjaunes|facebook/.test(loc.hostname))return loc.href;const anchors=Array.from(d.querySelectorAll('a[href]')).map(a=>a.href);const decoded=anchors.map(h=>{try{const u=new URL(h);if(u.hostname.includes('facebook.com')&&u.pathname.includes('/l.php')&&u.searchParams.get('u'))return decodeURIComponent(u.searchParams.get('u'));return h;}catch(e){return h;}});return decoded.find(h=>/^https?:/i.test(h)&&!/pagesjaunes|facebook|instagram|twitter|youtube|linkedin|tiktok|pinterest|snapchat|threads|telegram|github|google|wix\\.com/i.test(h))||'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "siret",
            "selector": "(()=>{const d=ROW.ownerDocument;const m=d.body.innerText.match(/\\b\\d{14}\\b/);return m?m[0]:'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "siren",
            "selector": "(()=>{const d=ROW.ownerDocument;const siret=d.body.innerText.match(/\\b\\d{14}\\b/);if(siret)return siret[0].slice(0,9);const m=d.body.innerText.match(/\\b\\d{9}\\b/);return m?m[0]:'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "start_url",
            "selector": "(()=>ROW.ownerDocument.location.href)()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "domain",
            "selector": "(()=>{const d=ROW.ownerDocument;try{let site='';if(!/pagesjaunes|facebook/.test(d.location.hostname))site=d.location.href;else{const anchors=Array.from(d.querySelectorAll('a[href]')).map(a=>a.href);site=anchors.map(h=>{try{const u=new URL(h);if(u.hostname.includes('facebook.com')&&u.pathname.includes('/l.php')&&u.searchParams.get('u'))return decodeURIComponent(u.searchParams.get('u'));return h;}catch(e){return h;}}).find(h=>/^https?:/i.test(h)&&!/pagesjaunes|facebook|instagram|twitter|youtube|linkedin|tiktok|pinterest|snapchat|threads|telegram|github|google|wix\\.com/i.test(h))||d.location.href;}return new URL(site).hostname.replace(/^www\\./,'');}catch(e){return d.location.hostname.replace(/^www\\./,'');}})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "depth",
            "selector": "(()=> '0')()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "referrer_url",
            "selector": "(()=>ROW.ownerDocument.referrer||ROW.ownerDocument.location.href)()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "current_url",
            "selector": "(()=>ROW.ownerDocument.location.href)()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "emails",
            "selector": "(()=>{const d=ROW.ownerDocument;const fromLinks=Array.from(d.querySelectorAll('a[href^=\"mailto:\"]')).map(a=>a.href.replace(/^mailto:/i,'').split('?')[0]);const text=d.body.innerText;const matches=text.match(/[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}/gi)||[];return Array.from(new Set([...fromLinks,...matches].map(e=>e.trim()).filter(e=>!/sentry|wixpress|wix\\.com|example@example\\.com/i.test(e)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "phones",
            "selector": "(()=>{const d=ROW.ownerDocument;const fromLinks=Array.from(d.querySelectorAll('a[href^=\"tel:\"]')).map(a=>a.href.replace(/^tel:/i,''));const matches=d.body.innerText.match(/(?:\\+?\\d[\\d\\s().-]{7,}\\d)/g)||[];return Array.from(new Set([...fromLinks,...matches].map(x=>x.replace(/\\s+/g,' ').trim()).filter(x=>x.length<30&&!/^\\d{14}$/.test(x)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "uncertain_phones",
            "selector": "(()=>{const d=ROW.ownerDocument;const t=d.body.innerText;const matches=t.match(/\\b\\d{3,6}\\b/g)||[];return Array.from(new Set(matches.filter(x=>!/^(75001|2025|2026|2024|2035)$/.test(x)))).slice(0,10).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "twitter",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/twitter\\.com|x\\.com/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "youtube",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/youtube\\.com|youtu\\.be/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "facebook",
            "selector": "(()=>{const d=ROW.ownerDocument;const out=[];if(d.location.hostname.includes('facebook.com'))out.push(d.location.origin+d.location.pathname.replace(/\\/$/,'').split('/').slice(0,2).join('/'));for(const a of Array.from(d.querySelectorAll('a[href]'))){let h=a.href;try{const u=new URL(h);if(u.hostname.includes('facebook.com')&&u.pathname.includes('/l.php')&&u.searchParams.get('u'))h=decodeURIComponent(u.searchParams.get('u'));}catch(e){} if(/facebook\\.com/i.test(h)&&!/login|recover|privacy|policies|business|help|photo|reel|reviews|followers|following|Paris-France|__cft__|__tn__/i.test(h))out.push(h.replace(/\\/$/,''));}return Array.from(new Set(out)).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "linkedin",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/linkedin\\.com/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "instagram",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/instagram\\.com/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "tiktok",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/tiktok\\.com/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "pinterest",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/pinterest\\.com/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "snapchat",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/snapchat\\.com/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "threads",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/threads\\.net/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "telegram",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/telegram\\.me|t\\.me/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "github",
            "selector": "(()=>{const d=ROW.ownerDocument;return Array.from(new Set(Array.from(d.querySelectorAll('a[href]')).map(a=>a.href).filter(h=>/github\\.com/i.test(h)))).join(';');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "error_message",
            "selector": "(()=>{const d=ROW.ownerDocument;const txt=(d.title+' '+d.body.innerText).toLowerCase();if(txt.includes('just a moment')||txt.includes('cf-turnstile')||txt.includes('enable javascript and cookies')||txt.includes('sécurité'))return 'Possible anti-bot/security challenge; Pagesjaunes detail data may be unavailable.';return '';})()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2640,
      "position_y": 240,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 136,
      "width": 2120,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "sleep-2",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1128,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2208,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2568,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort equivalent of the Octoparse Pagesjaunes Emails Scraper. Pagesjaunes.fr detail pages were observed returning a 403 security challenge, so this template uses a multi-URL navigation loop over the supplied Pagesjaunes detail, business website, and Facebook URLs, then extracts business/contact/email/social fields from each page using generic JS columns. Pagination/navigation strategy: known URL list with navigate.urls[] + structured-export append + loop-continue. Extraction includes cleanup filters for internal Wix/Sentry emails and Facebook tracking/photo links.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `window.scrollTo(0, document.body.scrollHeight);...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 220,
      "width": 340,
      "height": 122,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (quoiqui, location, titre_du_business, url_du_detail_business, categorie). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 220,
      "width": 340,
      "height": 137,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 220,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}