{
  "version": "1.0.0",
  "exported_at": "2026-06-01T07:55:00.000Z",
  "project": {
    "name": "Pagesjaunesca Business DÃtails Scraper",
    "description": "Extracts PagesJaunes/YellowPages business detail data and visible review rows: business name, address, phone, website, restaurant/category details, languages, rating, reviewer, review date, comment, and helpful count. Navigation uses a multi-URL loop equivalent to Octoparse input URLs; add all desired business detail page URLs to navigate.urls[] and results append to one CSV. Cookie overlays are safely cleaned with JavaScript, phone buttons are clicked/revealed before extraction, and review text is cleaned to avoid duplicated author/date/helpful labels. Review pagination/load-more was not detected in the supplied detail-page analysis, so only reviews visible after page load are exported.",
    "color": "bg-[#ffcd00]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 240,
      "config": {
        "urls": [
          "https://www.pagesjaunes.ca/bus/Quebec/Lachine/Borenstein-Plumbing-Heating-Inc/8152771.html"
        ],
        "color": "bg-[#4589ff]",
        "tags": [
          "input-urls",
          "pagesjaunes",
          "yellowpages"
        ]
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 240,
      "config": {
        "timeout": 30,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 840,
      "position_y": 240,
      "config": {
        "jsCode": "(function(){\n  const visible = el => !!(el && (el.offsetWidth || el.offsetHeight || el.getClientRects().length));\n  const candidates = Array.from(document.querySelectorAll('#onetrust-accept-btn-handler,#accept-recommended-btn-handler,button[id*=\"accept\" i],button[class*=\"accept\" i],button[aria-label*=\"Accept\" i],button[aria-label*=\"Accepter\" i]'));\n  const btn = candidates.find(visible);\n  if (btn) { try { btn.click(); } catch(e) {} }\n  setTimeout(function(){\n    document.querySelectorAll('#onetrust-banner-sdk,#onetrust-consent-sdk,.onetrust-pc-dark-filter,.ot-sdk-container,.ot-sdk-row,.cky-consent-container,.cky-overlay,.cky-modal,[class*=\"cookie\" i][role=\"dialog\"],[id*=\"cookie\" i][role=\"dialog\"]').forEach(function(el){\n      try { el.remove(); } catch(e) { el.style.display = 'none'; }\n    });\n    document.documentElement.style.overflow = 'auto';\n    document.body.style.overflow = 'auto';\n  }, 300);\n})();",
        "waitForCompletion": true,
        "timeout": 10,
        "color": "bg-[#a56eff]",
        "tags": [
          "cookie-cleanup",
          "safe-js"
        ]
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1200,
      "position_y": 240,
      "config": {
        "duration": 1,
        "color": "bg-[#8d8d8d]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1560,
      "position_y": 240,
      "config": {
        "selector": "body",
        "timeout": 30,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "inject-javascript-2",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1920,
      "position_y": 240,
      "config": {
        "jsCode": "(function(){\n  const visible = el => !!(el && (el.offsetWidth || el.offsetHeight || el.getClientRects().length));\n  const phoneTriggers = Array.from(document.querySelectorAll('a,button,span,div')).filter(function(el){\n    const text = (el.textContent || el.getAttribute('aria-label') || el.getAttribute('title') || '').trim();\n    const cls = el.className ? String(el.className) : '';\n    return visible(el) && (/^(Téléphone|Phone Number|Phone|Afficher le numéro|Show phone)/i.test(text) || /jsMlrMenu|phone/i.test(cls));\n  });\n  phoneTriggers.slice(0, 8).forEach(function(el){ try { el.click(); } catch(e) {} });\n})();",
        "waitForCompletion": true,
        "timeout": 10,
        "color": "bg-[#a56eff]",
        "tags": [
          "reveal-phone",
          "safe-js"
        ]
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 2280,
      "position_y": 240,
      "config": {
        "duration": 1,
        "color": "bg-[#8d8d8d]"
      }
    },
    {
      "block_id": "inject-javascript-3",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 2640,
      "position_y": 240,
      "config": {
        "jsCode": "(function(){\n  const clean = v => (v == null ? '' : String(v)).replace(/\\s+/g, ' ').replace(/\\s+»/g, ' »').trim();\n  const escReg = s => String(s || '').replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n  const phoneRe = /(?:\\+?1[\\s.\\-]?)?\\(?\\d{3}\\)?[\\s.\\-]?\\d{3}[\\s.\\-]?\\d{4}/;\n  const isBadPhone = v => !v || /^(Téléphone|Phone Number|Phone)$/i.test(clean(v));\n  const absolutize = href => { try { return href ? new URL(href, location.href).href : ''; } catch(e) { return href || ''; } };\n  const firstText = (selectors, root=document, reject) => {\n    for (const s of selectors) {\n      const el = root.querySelector(s);\n      if (el) {\n        const txt = clean(el.getAttribute('content') || el.getAttribute('aria-label') || el.textContent || el.innerText || '');\n        if (txt && !(reject && reject(txt))) return txt;\n      }\n    }\n    return '';\n  };\n  const firstAttr = (selectors, attr, root=document) => {\n    for (const s of selectors) {\n      const el = root.querySelector(s);\n      if (el) {\n        let val = clean(el.getAttribute(attr) || (attr === 'href' ? el.href : '') || '');\n        if (val) return attr === 'href' ? absolutize(val) : val;\n      }\n    }\n    return '';\n  };\n  const valueNearStrictLabel = labels => {\n    const nodes = Array.from(document.querySelectorAll('dt, h2, h3, strong'));\n    for (const node of nodes) {\n      const t = clean(node.textContent).toLowerCase();\n      if (!t || t.length > 90) continue;\n      if (labels.some(l => t.includes(l))) {\n        const dd = node.nextElementSibling;\n        if (dd) {\n          const val = clean(dd.textContent);\n          if (val && val.toLowerCase() !== t && val.length < 500) return val;\n        }\n        const parent = node.parentElement;\n        if (parent) {\n          const raw = clean(parent.textContent);\n          const val = raw.replace(clean(node.textContent), '').trim();\n          if (val && val.length < 500 && !/que recherchez-vous|où recherchez-vous|search/i.test(val)) return val;\n        }\n      }\n    }\n    return '';\n  };\n  const findPhone = () => {\n    let p = firstAttr(['a[href^=\"tel:\"]'], 'href').replace(/^tel:/i, '');\n    if (!isBadPhone(p)) return p;\n    p = firstText(['[itemprop=\"telephone\"]','.merchant__phone','.merchant-phone','.phoneNumber','.phone-number','.phone','.mlr__submenu','.mlr__submenu__item'], document, isBadPhone);\n    if (!isBadPhone(p) && phoneRe.test(p)) return (p.match(phoneRe) || [''])[0];\n    for (const el of Array.from(document.querySelectorAll('*'))) {\n      for (const a of Array.from(el.attributes || [])) {\n        if (/phone|tel/i.test(a.name + ' ' + a.value)) {\n          const m = clean(a.value).match(phoneRe);\n          if (m) return m[0];\n        }\n      }\n    }\n    const all = clean((document.body.innerText || '') + ' ' + (document.documentElement.innerHTML || ''));\n    const m = all.match(phoneRe);\n    return m ? m[0] : '';\n  };\n  const businessName = firstText(['h1[itemprop=\"name\"]','[itemprop=\"name\"] h1','h1.merchant__title','h1.merchant-title','h1.merchant-name','.merchant__title h1','.listing-name h1','h1']) || document.title.replace(/\\s*[|\\-].*$/,'');\n  const locationText = firstText(['[itemprop=\"address\"]','.merchant__address','.merchant-address','.listing__address','.address','.adr','a[href*=\"/merchant/directions/\"]']).replace(/Itinéraire\\s*»?/i,'').replace(/Get directions/i,'');\n  const telephone = findPhone();\n  const siteWeb = firstAttr(['a[href*=\"/gourl/\"]','a[data-qa*=\"website\" i]','a[class*=\"website\" i]','a[title*=\"Website\" i]','a[title*=\"site\" i]'], 'href');\n  const typeRestaurant = valueNearStrictLabel(['restaurant type','type de restaurant']) || firstText(['[itemprop=\"servesCuisine\"]','.merchant__categories a','.categories a','a[href*=\"Restaurants\"]']);\n  const specialite = valueNearStrictLabel(['specialties','spécialité','spécialités','specialty','cuisine']) || firstText(['.specialties','.merchant__specialties','[class*=\"special\" i]']);\n  const language = valueNearStrictLabel(['languages spoken','langues parlées']);\n  const businessRating = firstAttr(['meta[itemprop=\"ratingValue\"]'], 'content') || firstText(['[itemprop=\"ratingValue\"]','.rating','.ypStars','.listing__ratings']);\n  let reviewEls = Array.from(document.querySelectorAll('[itemprop=\"review\"], #ypgReviews [class*=\"review\" i], #ypgReviewsHeader ~ * [class*=\"review\" i], .review__item, .review-item, .ypreview, li[id*=\"review\" i], div[id*=\"review\" i]'));\n  reviewEls = Array.from(new Set(reviewEls)).filter(el => {\n    const t = clean(el.innerText || el.textContent || '');\n    return t.length > 20 && !/privacy|cookie|write a review|écrire un avis|your data|essential cookies/i.test(t);\n  });\n  if (!reviewEls.length) reviewEls = [null];\n  let container = document.querySelector('#uscraper-yp-review-rows');\n  if (container) container.remove();\n  container = document.createElement('div');\n  container.id = 'uscraper-yp-review-rows';\n  container.style.display = 'none';\n  const cleanComment = (raw, author, date) => {\n    let out = clean(raw);\n    if (author) out = out.replace(new RegExp('^Par\\\\s+' + escReg(author) + '\\\\s*', 'i'), '').replace(new RegExp('^' + escReg(author) + '\\\\s*', 'i'), '');\n    if (date) out = out.replace(new RegExp('^' + escReg(date) + '\\\\s*', 'i'), '');\n    out = out.replace(/^Par\\s+/i, '').replace(/Utile\\s*\\d+.*$/i, '').replace(/>\\s*Signaler.*$/i, '').replace(/Report.*$/i, '');\n    return clean(out);\n  };\n  const makeRow = data => {\n    const row = document.createElement('div');\n    row.className = 'uscraper-yp-review-row';\n    Object.entries(data).forEach(([k,v]) => row.setAttribute('data-' + k, clean(v)));\n    container.appendChild(row);\n  };\n  for (const r of reviewEls) {\n    const root = r || document;\n    const author = r ? firstText(['[itemprop=\"author\"] [itemprop=\"name\"]','[itemprop=\"author\"]','.review__author','.reviewer','.author','.name'], root) : '';\n    const date = r ? (firstAttr(['time'], 'datetime', root) || firstText(['time','[itemprop=\"datePublished\"]','.review__date','.date'], root)) : '';\n    let comment = r ? firstText(['[itemprop=\"reviewBody\"]','.review__text','.review-text','.reviewContent','.comment','.description','p'], root) : '';\n    if (r && (!comment || comment.length < 10)) comment = clean(r.innerText || r.textContent || '');\n    comment = cleanComment(comment, author, date);\n    const usefulText = r ? firstText(['.useful','.helpful','[class*=\"useful\" i]','[class*=\"helpful\" i]'], root) : '';\n    const usefulMatch = usefulText.match(/\\d+/);\n    const rowRating = r ? (firstAttr(['meta[itemprop=\"ratingValue\"]'], 'content', root) || firstText(['[itemprop=\"ratingValue\"]','.rating','.ypStars'], root)) : '';\n    makeRow({\n      'nom-du-magasin': businessName,\n      'location': locationText,\n      'telephone': telephone,\n      'site-web': siteWeb,\n      'type-de-restaurant': typeRestaurant,\n      'specialite': specialite,\n      'language': language,\n      'star-rating': rowRating || businessRating,\n      'nom-du-client': author,\n      'date': date,\n      'commentaires': comment,\n      'utile': usefulMatch ? usefulMatch[0] : ''\n    });\n  }\n  document.body.appendChild(container);\n})();",
        "waitForCompletion": true,
        "timeout": 15,
        "color": "bg-[#a56eff]",
        "tags": [
          "normalize-detail-page",
          "reviews"
        ]
      }
    },
    {
      "block_id": "wait-for-element-2",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 3000,
      "position_y": 240,
      "config": {
        "selector": "#uscraper-yp-review-rows .uscraper-yp-review-row",
        "timeout": 10,
        "visible": false,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 3360,
      "position_y": 240,
      "config": {
        "rowSelector": "#uscraper-yp-review-rows .uscraper-yp-review-row",
        "fileName": "pagesjaunes-ca-detail-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "tags": [
          "csv",
          "business-details",
          "reviews"
        ],
        "columns": [
          {
            "name": "nom_du_magasin",
            "selector": "",
            "attribute": "data-nom-du-magasin"
          },
          {
            "name": "location",
            "selector": "",
            "attribute": "data-location"
          },
          {
            "name": "téléphone",
            "selector": "",
            "attribute": "data-telephone"
          },
          {
            "name": "site_web",
            "selector": "",
            "attribute": "data-site-web"
          },
          {
            "name": "type_de_restaurant",
            "selector": "",
            "attribute": "data-type-de-restaurant"
          },
          {
            "name": "spécialité",
            "selector": "",
            "attribute": "data-specialite"
          },
          {
            "name": "Language",
            "selector": "",
            "attribute": "data-language"
          },
          {
            "name": "star_rating",
            "selector": "",
            "attribute": "data-star-rating"
          },
          {
            "name": "nom_du_client",
            "selector": "",
            "attribute": "data-nom-du-client"
          },
          {
            "name": "date",
            "selector": "",
            "attribute": "data-date"
          },
          {
            "name": "commentaires",
            "selector": "",
            "attribute": "data-commentaires"
          },
          {
            "name": "utile",
            "selector": "",
            "attribute": "data-utile"
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 3720,
      "position_y": 240,
      "config": {
        "color": "bg-[#8d8d8d]",
        "tags": [
          "multi-url-loop"
        ]
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-2",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-3",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-3",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-2",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 136,
      "width": 3200,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1",
          "sleep-2",
          "wait-for-element-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 768,
      "position_y": 136,
      "width": 2120,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1",
          "inject-javascript-2",
          "inject-javascript-3"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 3288,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 3648,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts PagesJaunes/YellowPages business detail data and visible review rows: business name, address, phone, website, restaurant/category details, languages, rating, reviewer, review date, comment, and helpful count. Navigation uses a multi-URL loop equivalent to Octoparse input URLs; add all desired business detail page URLs to navigate.urls[] and results append to one CSV. Cookie overlays are safely cleaned with JavaScript, phone buttons are clicked/revealed before extraction, and review text is cleaned to avoid duplicated author/date/helpful labels. Review pagination/load-more was not detected in the supplied detail-page analysis, so only reviews visible after page load are exported.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(function(){\n  const visible = el => !!(el && (el.offsetWidth || el.offsetHeight || el.getClientRect...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1040,
      "position_y": 220,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-inject-javascript-2",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(function(){\n  const visible = el => !!(el && (el.offsetWidth || el.offsetHeight || el.getClientRect...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 220,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-2"
      }
    },
    {
      "id": "note-block-inject-javascript-3",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(function(){\n  const clean = v => (v == null ? '' : String(v)).replace(/\\s+/g, ' ').replace(/\\s+»/g,...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 220,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-3"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 3920,
      "position_y": 220,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}