{
  "version": "1.0.0",
  "exported_at": "2026-06-02T09:20:00.000Z",
  "project": {
    "name": "Carrefour Data Scraper",
    "description": "Extracts Carrefour France product listing data from https://www.carrefour.fr/s?q=vin+rouge: Prix, Titre, Description, Rating, Nombre_de_commentaire, Image, and URL_de_la_page_détaillée. After live testing, click-pagination was removed because Carrefour exposed a large repeated product set in the loaded search DOM and the next-page loop duplicated the same products. This version builds a synthetic one-row-per-canonical-product table and exports only unique product URLs once. Best-effort: Carrefour may show Cloudflare/Turnstile anti-bot verification; run with a verified persistent browser session if challenged.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 100,
      "position_y": 220,
      "config": {
        "width": 1920,
        "height": 1080,
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 460,
      "position_y": 220,
      "config": {
        "url": "https://www.carrefour.fr/s?q=vin+rouge",
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 820,
      "position_y": 220,
      "config": {
        "timeout": 45,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1180,
      "position_y": 220,
      "config": {
        "duration": 4,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1540,
      "position_y": 220,
      "config": {
        "jsCode": "(() => {\n  const old = document.getElementById('uscraper-carrefour-products');\n  if (old) old.remove();\n\n  const root = document.createElement('div');\n  root.id = 'uscraper-carrefour-products';\n  root.style.display = 'none';\n  document.body.appendChild(root);\n\n  const clean = (v) => String(v || '').replace(/\\s+/g, ' ').trim();\n\n  const isDisplayedBox = (el) => {\n    if (!el || el.nodeType !== 1) return false;\n    const style = window.getComputedStyle(el);\n    const rect = el.getBoundingClientRect();\n    return style.display !== 'none' && style.visibility !== 'hidden' && Number(style.opacity || 1) !== 0 && rect.width > 20 && rect.height > 20;\n  };\n\n  const canonicalProductUrl = (href) => {\n    try {\n      const u = new URL(href, location.href);\n      if (!u.pathname.startsWith('/p/')) return '';\n      if (u.search || u.hash) return '';\n      return u.origin + u.pathname;\n    } catch (_) {\n      return '';\n    }\n  };\n\n  const getProductUrlsInside = (el) => {\n    return new Set(Array.from(el.querySelectorAll('a[href*=\"/p/\"]'))\n      .map(a => canonicalProductUrl(a.getAttribute('href') || ''))\n      .filter(Boolean));\n  };\n\n  const findCard = (anchor, canonicalUrl) => {\n    const preferred = anchor.closest('[data-testid*=\"product\" i], article, li[class*=\"product\" i], div[class*=\"product-card\" i], div[class*=\"ProductCard\"], li, article');\n    if (preferred && preferred !== document.body && isDisplayedBox(preferred)) {\n      return preferred;\n    }\n\n    let best = anchor;\n    for (let p = anchor.parentElement; p && p !== document.body && p !== document.documentElement; p = p.parentElement) {\n      if (!isDisplayedBox(p)) continue;\n      const text = clean(p.innerText || p.textContent);\n      if (text.length > 3500) break;\n      const urls = getProductUrlsInside(p);\n      const hasThisProduct = urls.has(canonicalUrl);\n      const hasPrice = /\\d+\\s*(?:[,.]\\s*\\d{2}|€\\s*\\d{2})\\s*€?/.test(text) || /\\d+[,.]\\d{2}\\s*€/.test(text);\n      const hasImg = !!p.querySelector('img');\n      const productish = p.matches('[data-testid*=\"product\" i], article, li, div[class*=\"product\" i], div[class*=\"Product\"], div[class*=\"card\" i]');\n      if (hasThisProduct && (hasPrice || hasImg || productish)) {\n        best = p;\n      }\n      if (urls.size > 1 && best !== anchor) break;\n    }\n    return best;\n  };\n\n  const readImage = (card) => {\n    const imgs = Array.from(card.querySelectorAll('img'));\n    const img = imgs.find(i => clean(i.currentSrc || i.src || i.getAttribute('src') || i.getAttribute('data-src') || '').includes('media.carrefour.fr')) || imgs[0];\n    if (!img) return '';\n    const srcset = img.getAttribute('srcset') || img.getAttribute('data-srcset') || '';\n    const srcsetFirst = srcset ? clean(srcset.split(',')[0].trim().split(/\\s+/)[0]) : '';\n    return clean(img.currentSrc || img.src || img.getAttribute('src') || img.getAttribute('data-src') || img.getAttribute('data-lazy-src') || srcsetFirst);\n  };\n\n  const readPrice = (card) => {\n    const pieces = Array.from(card.querySelectorAll('[data-testid*=\"price\" i], [class*=\"price\" i], [aria-label*=\"prix\" i], [itemprop=\"price\"], span, div'))\n      .map(el => clean(el.getAttribute('aria-label') || el.getAttribute('content') || el.innerText || el.textContent))\n      .filter(Boolean);\n    pieces.push(clean(card.innerText || card.textContent));\n\n    for (const t of pieces) {\n      let m = t.match(/(\\d+)\\s*[,.]\\s*(\\d{2})\\s*€/);\n      if (m) return `${m[1]},${m[2]} €`;\n      m = t.match(/(\\d+)\\s*€\\s*(\\d{2})/);\n      if (m) return `${m[1]},${m[2]} €`;\n      m = t.match(/€\\s*(\\d+)\\s*[,.]\\s*(\\d{2})/);\n      if (m) return `${m[1]},${m[2]} €`;\n    }\n    return '';\n  };\n\n  const readTitle = (card, anchor) => {\n    const candidates = [\n      anchor.getAttribute('aria-label'),\n      anchor.getAttribute('title'),\n      anchor.innerText,\n      anchor.textContent,\n      card.querySelector('h1,h2,h3,[data-testid*=\"title\" i],[data-testid*=\"name\" i],[class*=\"title\" i],[class*=\"name\" i]')?.innerText,\n      card.querySelector('h1,h2,h3,[data-testid*=\"title\" i],[data-testid*=\"name\" i],[class*=\"title\" i],[class*=\"name\" i]')?.textContent\n    ].map(clean).filter(Boolean);\n    const title = candidates.find(t => t.length >= 5 && !/avis|commentaire|note moyenne|customers-reviews/i.test(t)) || '';\n    return title.replace(/^Voir le produit\\s*/i, '').trim();\n  };\n\n  const readDescription = (card) => {\n    const pieces = Array.from(card.querySelectorAll('[data-testid*=\"description\" i], [data-testid*=\"packaging\" i], [class*=\"description\" i], [class*=\"packaging\" i], [class*=\"details\" i], span, p, div'))\n      .map(el => clean(el.innerText || el.textContent))\n      .filter(Boolean);\n    pieces.push(clean(card.innerText || card.textContent));\n    for (const t of pieces) {\n      const m = t.match(/\\b\\d+\\s?(?:cL|cl|mL|ml|L|g|kg)\\b[^€\\n]{0,120}(?:\\d+[,.]\\d{1,2}\\s*€\\s*\\/\\s*(?:L|kg|g|mL|cl))?/i);\n      if (m) return clean(m[0]);\n    }\n    return '';\n  };\n\n  const readRating = (card) => {\n    const pieces = Array.from(card.querySelectorAll('[aria-label*=\"sur 5\" i], [title*=\"sur 5\" i], [aria-label*=\"out of 5\" i], [class*=\"rating\" i], [data-testid*=\"rating\" i], a[href*=\"customers-reviews\"], a[href*=\"anchor=pdp-customers-reviews\"]'))\n      .map(el => clean(el.getAttribute('aria-label') || el.getAttribute('title') || el.innerText || el.textContent))\n      .filter(Boolean);\n    pieces.push(clean(card.innerText || card.textContent));\n    for (const t of pieces) {\n      const m = t.match(/\\d+(?:[,.]\\d+)?\\s*(?:sur|\\/|out of)\\s*5/i);\n      if (m) return clean(m[0]).replace('.', ',');\n    }\n    return '';\n  };\n\n  const readReviewCount = (card) => {\n    const pieces = Array.from(card.querySelectorAll('[aria-label*=\"avis\" i], [class*=\"review\" i], [class*=\"comment\" i], [data-testid*=\"review\" i], a[href*=\"customers-reviews\"], a[href*=\"anchor=pdp-customers-reviews\"]'))\n      .map(el => clean(el.getAttribute('aria-label') || el.getAttribute('title') || el.innerText || el.textContent))\n      .filter(Boolean);\n    pieces.push(clean(card.innerText || card.textContent));\n    for (const t of pieces) {\n      let m = t.match(/(\\d+)\\s*(?:avis|commentaires?|reviews?)/i);\n      if (m) return m[1];\n      m = t.match(/\\((\\d+)\\)/);\n      if (m && /avis|note moyenne|sur 5/i.test(t)) return m[1];\n    }\n    return '';\n  };\n\n  const scope = document.querySelector('main') || document.body;\n  const anchors = Array.from(scope.querySelectorAll('a[href*=\"/p/\"]'));\n  const seen = new Set();\n  let created = 0;\n\n  for (const a of anchors) {\n    const url = canonicalProductUrl(a.getAttribute('href') || '');\n    if (!url || seen.has(url)) continue;\n\n    const anchorText = clean(a.innerText || a.textContent || a.getAttribute('aria-label') || a.getAttribute('title'));\n    if (/avis|commentaire|note moyenne|customers-reviews/i.test(anchorText)) continue;\n\n    const card = findCard(a, url);\n    if (!card || !isDisplayedBox(card)) continue;\n\n    const title = readTitle(card, a);\n    if (!title || /avis|commentaire|note moyenne/i.test(title)) continue;\n\n    const row = document.createElement('div');\n    row.className = 'uscraper-product-row';\n    row.setAttribute('data-price', readPrice(card));\n    row.setAttribute('data-title', title);\n    row.setAttribute('data-description', readDescription(card));\n    row.setAttribute('data-rating', readRating(card));\n    row.setAttribute('data-reviews', readReviewCount(card));\n    row.setAttribute('data-image', readImage(card));\n    row.setAttribute('data-url', url);\n    root.appendChild(row);\n\n    seen.add(url);\n    created++;\n  }\n\n  return created;\n})()",
        "waitForCompletion": true,
        "timeout": 20,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1900,
      "position_y": 220,
      "config": {
        "selector": "#uscraper-carrefour-products .uscraper-product-row",
        "timeout": 30,
        "visible": false,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2260,
      "position_y": 220,
      "config": {
        "rowSelector": "#uscraper-carrefour-products .uscraper-product-row",
        "fileName": "carrefour-data-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "create",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "Prix",
            "selector": "ROW.getAttribute('data-price') || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Titre",
            "selector": "ROW.getAttribute('data-title') || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Description",
            "selector": "ROW.getAttribute('data-description') || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Rating",
            "selector": "ROW.getAttribute('data-rating') || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Nombre_de_commentaire",
            "selector": "ROW.getAttribute('data-reviews') || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Image",
            "selector": "ROW.getAttribute('data-image') || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "URL_de_la_page_détaillée",
            "selector": "ROW.getAttribute('data-url') || ''",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "end-1",
      "block_type": "output",
      "title": "End",
      "description": "Terminate execution flow",
      "position_x": 2620,
      "position_y": 220,
      "config": {
        "color": "bg-[#8d8d8d]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "end-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 28,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 388,
      "position_y": 116,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1468,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2188,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-control",
      "element_type": "group",
      "title": "Control Flow",
      "color": "#8d8d8d",
      "position_x": 2548,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "end-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts Carrefour France product listing data from https://www.carrefour.fr/s?q=vin+rouge: Prix, Titre, Description, Rating, Nombre_de_commentaire, Image, and URL_de_la_page_détaillée. After live testing, click-pagination was removed because Carrefour exposed a large repeated product set in the loaded search DOM and the next-page loop duplicated the same products. This version builds a synthetic one-row-per-canonical-product table and exports only unique product URLs once. Best-effort: Carrefour may show Cloudflare/Turnstile anti-bot verification; run with a verified persistent browser session if challenged.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => {\n  const old = document.getElementById('uscraper-carrefour-products');\n  if (old) old.remove...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1740,
      "position_y": 200,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (Prix, Titre, Description, Rating, Nombre_de_commentaire). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2460,
      "position_y": 200,
      "width": 340,
      "height": 132,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    }
  ]
}