{
  "version": "1.0.0",
  "exported_at": "2026-06-02T20:20:00.000Z",
  "project": {
    "name": "Decathlonfr Data Scraper",
    "description": "Decathlon.fr listing scraper equivalent to the Octoparse template. Extracts current price, brand, product description, review/comment count, delivery text, image URL, and product detail URL from Decathlon France search/listing URLs such as https://www.decathlon.fr/search?Ntt=chaussure. Uses append mode and a next-page pagination loop so all reachable result pages are collected. Includes Didomi cookie-popup dismissal and JavaScript-based next-page navigation to avoid overlay click interception. Decathlon.fr may still show Cloudflare verification; a verified persistent browser session may be required.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to Decathlon.fr listing/search URL input",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "urls": [
          "https://www.decathlon.fr/search?Ntt=chaussure"
        ],
        "color": "bg-[#4589ff]",
        "tags": [
          "decathlon",
          "input-url",
          "listing"
        ]
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for the listing page to finish loading",
      "position_x": 480,
      "position_y": 220,
      "config": {
        "timeout": 45
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Allow Decathlon client-side content or verification redirect to settle",
      "position_x": 840,
      "position_y": 220,
      "config": {
        "duration": 5
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Dismiss or remove Didomi cookie popup",
      "position_x": 1200,
      "position_y": 220,
      "config": {
        "jsCode": "(() => {\n  const host = document.querySelector('#didomi-host');\n  if (!host) return 'no didomi popup';\n  const buttons = Array.from(host.querySelectorAll('button, a[role=\"button\"], [role=\"button\"]'));\n  const preferred = buttons.find(el => /^(tout accepter|accepter|accept all|accept|j'accepte|continuer)$/i.test((el.textContent || '').trim())) || buttons.find(el => /accepter|accept/i.test((el.textContent || '').trim()));\n  if (preferred) {\n    preferred.click();\n    return 'clicked didomi consent';\n  }\n  document.querySelectorAll('#didomi-host, .didomi-popup-backdrop, .didomi-notice-popup, .didomi-popup__backdrop').forEach(el => el.remove());\n  document.documentElement.style.overflow = '';\n  document.body.style.overflow = '';\n  return 'removed didomi overlay';\n})()",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "sleep-3",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait briefly after cookie popup handling",
      "position_x": 1560,
      "position_y": 220,
      "config": {
        "duration": 1
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait for product cards to appear",
      "position_x": 1920,
      "position_y": 220,
      "config": {
        "selector": "[data-testid=\"product-card\"], [data-testid=\"product-tile\"], article:has(a[href*=\"/p/\"][href*=\"/c\"]), li:has(a[href*=\"/p/\"][href*=\"/c\"])",
        "timeout": 45,
        "visible": true
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export Decathlon product data with custom columns",
      "position_x": 2280,
      "position_y": 220,
      "config": {
        "rowSelector": "[data-testid=\"product-card\"], [data-testid=\"product-tile\"], article:has(a[href*=\"/p/\"][href*=\"/c\"])",
        "fileName": "decathlon-fr-data-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "prix_actuel",
            "selector": "(() => { const row = ROW; const txt = (row.innerText || '').replace(/\\s+/g, ' ').trim(); const m = txt.match(/\\b\\d+(?:[,.]\\d{1,2})?\\s?€/); return m ? m[0].replace(/\\s+/g, '') : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "marque",
            "selector": "(() => { const row = ROW; const direct = row.querySelector('[data-testid*=\"brand\" i], [class*=\"brand\" i], [aria-label*=\"marque\" i]'); if (direct && direct.textContent.trim() && !/[€()]|avis/i.test(direct.textContent)) return direct.textContent.trim(); const a = row.querySelector('a[href*=\"/p/\"][href*=\"/c\"]') || row.querySelector('a[href*=\"/p/\"]'); const img = row.querySelector('img'); const raw = [row.querySelector('[data-testid*=\"title\" i], [data-testid*=\"name\" i], [class*=\"title\" i], [class*=\"name\" i], h2, h3')?.textContent, a?.getAttribute('aria-label'), a?.getAttribute('title'), a?.textContent, img?.getAttribute('alt')].map(v => (v || '').replace(/\\s+/g, ' ').trim()).find(v => v && !/^\\d+(?:[,.]\\d{1,2})?\\s?€$/.test(v) && !/^https?:\\/\\//i.test(v)) || ''; const words = raw.split(/\\s+/); const brand = []; for (const w of words) { if (brand.length >= 3) break; if (/^[A-Z0-9À-ÖØ-Þ&'.-]+$/.test(w) && !/^\\(?[\\d\\s\\u202f\\u00a0]+\\)?$/.test(w) && !/€/.test(w)) brand.push(w); else break; } return brand.join(' '); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "description",
            "selector": "(() => { const row = ROW; const a = row.querySelector('a[href*=\"/p/\"][href*=\"/c\"]') || row.querySelector('a[href*=\"/p/\"]'); const img = row.querySelector('img'); let raw = [row.querySelector('[data-testid*=\"title\" i], [data-testid*=\"name\" i], [class*=\"title\" i], [class*=\"name\" i], h2, h3')?.textContent, a?.getAttribute('aria-label'), a?.getAttribute('title'), a?.textContent, img?.getAttribute('alt')].map(v => (v || '').replace(/\\s+/g, ' ').trim()).find(v => v && !/^\\d+(?:[,.]\\d{1,2})?\\s?€$/.test(v) && !/^https?:\\/\\//i.test(v)) || ''; raw = raw.replace(/^Voir le produit\\s*/i, '').trim(); const words = raw.split(/\\s+/); let i = 0; while (i < words.length && i < 3 && /^[A-Z0-9À-ÖØ-Þ&'.-]+$/.test(words[i]) && !/^\\(?[\\d\\s\\u202f\\u00a0]+\\)?$/.test(words[i]) && !/€/.test(words[i])) i++; const stripped = words.slice(i).join(' ').trim(); return stripped || raw; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "commentaire",
            "selector": "(() => { const txt = (ROW.innerText || '').replace(/\\s+/g, ' ').trim(); const paren = txt.match(/\\(\\s*[\\d\\s\\u202f\\u00a0]+\\s*\\)/); if (paren) return paren[0].replace(/\\s+/g, ' '); const review = txt.match(/\\b[\\d\\s\\u202f\\u00a0]+\\s*(?:avis|commentaires?)\\b/i); return review ? review[0].replace(/\\s+/g, ' ') : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "livraison",
            "selector": "(() => { const lines = (ROW.innerText || '').split(/\\n+/).map(s => s.replace(/\\s+/g, ' ').trim()).filter(Boolean); const line = lines.find(s => /livraison|retrait|disponible|chez vous|24h|48h/i.test(s)); return line || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "image_url",
            "selector": "(() => { const img = ROW.querySelector('img[src*=\"contents.mediadecathlon.com\"], img[data-src*=\"contents.mediadecathlon.com\"], img'); if (!img) return ''; return img.currentSrc || img.src || img.getAttribute('data-src') || img.getAttribute('src') || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "url_de_la_page_detaillee",
            "selector": "(() => { const a = ROW.querySelector('a[href*=\"/p/\"][href*=\"/c\"]') || ROW.querySelector('a[href*=\"/p/\"]'); return a ? a.href : ''; })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "element-exists-1",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check whether an enabled next-page pagination control exists",
      "position_x": 2640,
      "position_y": 220,
      "config": {
        "selector": "//a[(contains(@rel,'next') or contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'page suivante') or contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'suivant') or contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'suivant')) and not(@aria-disabled='true') and not(contains(translate(@class,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'disabled'))] | //button[(contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'page suivante') or contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'suivant') or contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'suivant')) and not(@disabled) and not(@aria-disabled='true') and not(contains(translate(@class,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'disabled'))]"
      }
    },
    {
      "block_id": "inject-javascript-2",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Navigate to next page by href instead of physical click",
      "position_x": 2640,
      "position_y": 540,
      "config": {
        "jsCode": "(() => {\n  document.querySelectorAll('#didomi-host, .didomi-popup-backdrop, .didomi-notice-popup, .didomi-popup__backdrop').forEach(el => el.remove());\n  const xpath = \"//a[(contains(@rel,'next') or contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'page suivante') or contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'suivant') or contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'suivant')) and not(@aria-disabled='true') and not(contains(translate(@class,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'disabled'))] | //button[(contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'page suivante') or contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'suivant') or contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'suivant')) and not(@disabled) and not(@aria-disabled='true') and not(contains(translate(@class,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'disabled'))]\";\n  const node = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;\n  if (!node) return 'no next page node';\n  const href = node.href || node.getAttribute('href');\n  if (href) {\n    window.location.href = new URL(href, window.location.href).href;\n    return 'navigating to ' + href;\n  }\n  node.click();\n  return 'clicked next button via js';\n})()",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "wait-for-page-load-2",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait after pagination navigation",
      "position_x": 3000,
      "position_y": 540,
      "config": {
        "timeout": 45
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Allow Decathlon product results to refresh after pagination",
      "position_x": 3360,
      "position_y": 540,
      "config": {
        "duration": 3
      }
    },
    {
      "block_id": "wait-for-element-2",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait for product cards on the next page before extracting again",
      "position_x": 3720,
      "position_y": 540,
      "config": {
        "selector": "[data-testid=\"product-card\"], [data-testid=\"product-tile\"], article:has(a[href*=\"/p/\"][href*=\"/c\"]), li:has(a[href*=\"/p/\"][href*=\"/c\"])",
        "timeout": 45,
        "visible": true
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue to the next configured Decathlon listing URL",
      "position_x": 2640,
      "position_y": 860,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-3",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-3",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "element-exists-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "true",
      "to_block_id": "inject-javascript-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-2",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-2",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-2",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "false",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 3920,
      "height": 616,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "sleep-3",
          "wait-for-element-1",
          "wait-for-page-load-2",
          "sleep-2",
          "wait-for-element-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1128,
      "position_y": 116,
      "width": 1760,
      "height": 616,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1",
          "inject-javascript-2"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2208,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2568,
      "position_y": 116,
      "width": 380,
      "height": 936,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "element-exists-1",
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Decathlon.fr listing scraper equivalent to the Octoparse template. Extracts current price, brand, product description, review/comment count, delivery text, image URL, and product detail URL from Decathlon France search/listing URLs such as https://www.decathlon.fr/search?Ntt=chaussure. Uses append mode and a next-page pagination loop so all reachable result pages are collected. Includes Didomi cookie-popup dismissal and JavaScript-based next-page navigation to avoid overlay click interception. Decathlon.fr may still show Cloudflare verification; a verified persistent browser session may be required.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => {\n  const host = document.querySelector('#didomi-host');\n  if (!host) return 'no didomi popup...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 200,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (prix_actuel, marque, description, commentaire, livraison). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 200,
      "width": 340,
      "height": 132,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-element-exists-1",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `//a[(contains(@rel,'next') or contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 200,
      "width": 340,
      "height": 170,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-1"
      }
    },
    {
      "id": "note-block-inject-javascript-2",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => {\n  document.querySelectorAll('#didomi-host, .didomi-popup-backdrop, .didomi-notice-popup, .d...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 520,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-2"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 840,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}