{
  "version": "1.0.0",
  "exported_at": "2026-06-03T07:15:00.000Z",
  "project": {
    "name": "Coppel Listing Scraper",
    "description": "Extracts Coppel listing fields equivalent to the Octoparse template: titulo, precio, precio_original, cuota, imagen_url, and producto_url. Direct browser navigation to www.coppel.com repeatedly failed with ERR_HTTP2_PROTOCOL_ERROR during testing, so this best-effort template uses Jina Reader-wrapped Coppel listing URLs. Pagination uses the four known beginIndex URLs from the Octoparse template and appends parsed product rows into one clean CSV. The parser filters out category facets such as Filtrar por, Precio, Color, and Marca. If direct Coppel scraping is required, use a browser/proxy/network profile that can load coppel.com without HTTP/2 blocking.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 120,
      "position_y": 280,
      "config": {
        "width": 1920,
        "height": 1080
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 480,
      "position_y": 280,
      "config": {
        "urls": [
          "https://r.jina.ai/http://r.jina.ai/http://https://www.coppel.com/ct/electronica/hogar-inteligente/cat000071?beginIndex=0&pmNodeId=11404&prNodeId=11419&regionTelcel=9",
          "https://r.jina.ai/http://r.jina.ai/http://https://www.coppel.com/ct/electronica/hogar-inteligente/cat000071?beginIndex=24&pmNodeId=11404&prNodeId=11419&regionTelcel=9",
          "https://r.jina.ai/http://r.jina.ai/http://https://www.coppel.com/ct/electronica/hogar-inteligente/cat000071?beginIndex=48&pmNodeId=11404&prNodeId=11419&regionTelcel=9",
          "https://r.jina.ai/http://r.jina.ai/http://https://www.coppel.com/ct/electronica/hogar-inteligente/cat000071?beginIndex=72&pmNodeId=11404&prNodeId=11419&regionTelcel=9"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 840,
      "position_y": 280,
      "config": {
        "timeout": 45
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1200,
      "position_y": 280,
      "config": {
        "selector": "body",
        "timeout": 45,
        "visible": true
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1560,
      "position_y": 280,
      "config": {
        "duration": 2
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1920,
      "position_y": 280,
      "config": {
        "jsCode": "(() => { const raw = document.body.innerText || ''; const lineObjs = []; let pos = 0; raw.split(/\\n/).forEach(line => { lineObjs.push({ text: line.trim(), raw: line, start: pos, end: pos + line.length }); pos += line.length + 1; }); const lines = lineObjs.map(x => x.text); const clean = s => (s || '').replace(/^#+\\s*/, '').replace(/!\\[[^\\]]*\\]\\([^)]*\\)/g, ' ').replace(/\\[[^\\]]*\\]\\(([^)]*)\\)/g, ' ').replace(/\\s+/g, ' ').trim(); const normalizeUrl = u => { u = (u || '').replace(/[)\\],.;]+$/g, '').trim(); if (u.startsWith('/')) return 'https://www.coppel.com' + u; return u; }; const lineIndexAt = p => { for (let i = 0; i < lineObjs.length; i++) { if (p >= lineObjs[i].start && p <= lineObjs[i].end + 1) return i; } return Math.max(0, lineObjs.length - 1); }; const isProductUrl = u => /https?:\\/\\/(?:www\\.)?coppel\\.com\\/.*(?:\\/pdp\\/|-pm-|mkp-)/i.test(u || ''); const badTitle = t => { t = clean(t); if (!t || t.length < 6 || t.length > 220) return true; if (/^(filtrar por|filtrar|precio|color|marca|categor[ií]a|departamento|ordenar|relevancia|calificaci[oó]n|vendido por|env[ií]o|talla|ver m[aá]s|mostrar|inicio|coppel)$/i.test(t)) return true; if (/^(https?:|www\\.|\\$|desde\\s*\\$)/i.test(t)) return true; if (/\\.(jpg|jpeg|png|webp|avif)(\\?|$)/i.test(t)) return true; if (/quincenal|agregar|favorito|wishlist|comprar|carrito|p[aá]gina|siguiente|anterior/i.test(t)) return true; return false; }; const titleFromSlug = url => { try { let part = new URL(url).pathname.split('/').filter(Boolean).pop() || ''; part = part.replace(/-pm-\\d+.*$/i, '').replace(/-mkp-\\d+.*$/i, '').replace(/-p-\\d+.*$/i, '').replace(/-/g, ' '); return part.replace(/\\b\\w/g, c => c.toUpperCase()).trim(); } catch(e) { return ''; } }; const productId = url => { const s = url || ''; const m = s.match(/(?:-|\\/)(?:mkp|pm)-?(\\d+)/i) || s.match(/(?:mkp|pm)\\/(\\d+)/i) || s.match(/(?:pm|mkp)(\\d+)/i); return m ? m[1] : ''; }; const imageUrls = text => (text.match(/https?:\\/\\/[^\\s)\\]]+\\.(?:jpg|jpeg|png|webp|avif)(?:\\?[^\\s)\\]]*)?/gi) || []).map(normalizeUrl).filter(Boolean); const candidatesMap = new Map(); const addCandidate = (title, url, idx, source) => { url = normalizeUrl(url); if (!isProductUrl(url)) return; const key = url.split('?')[0]; const prev = candidatesMap.get(key); const ctitle = clean(title); if (!prev || (badTitle(prev.title) && !badTitle(ctitle))) candidatesMap.set(key, { title: ctitle, url, idx, source }); }; let m; const mdRe = /\\[([^\\]]{2,260})\\]\\((https?:\\/\\/[^)\\s]*coppel\\.com\\/[^)\\s]+)\\)/gi; while ((m = mdRe.exec(raw)) !== null) addCandidate(m[1], m[2], lineIndexAt(m.index), 'markdown'); const urlRe = /https?:\\/\\/(?:www\\.)?coppel\\.com\\/[^\\s)\\]]*(?:\\/pdp\\/|-pm-|mkp-)[^\\s)\\]]*/gi; while ((m = urlRe.exec(raw)) !== null) addCandidate('', m[0], lineIndexAt(m.index), 'plain-url'); let candidates = Array.from(candidatesMap.values()).sort((a, b) => a.idx - b.idx); const products = []; const seen = new Set(); for (let j = 0; j < candidates.length; j++) { const c = candidates[j]; const nextIdx = candidates[j + 1] ? candidates[j + 1].idx : lines.length; const localStart = Math.max(0, c.idx - 6); const localEnd = Math.min(lines.length, Math.max(c.idx + 8, Math.min(nextIdx, c.idx + 28))); const localLines = lines.slice(localStart, localEnd); const localText = localLines.join(' ').replace(/\\s+/g, ' ').trim(); const cuotaMatch = localText.match(/Desde\\s*\\$?\\s*[\\d.,]+\\s*quincenal/i); const cuota = cuotaMatch ? cuotaMatch[0].replace(/\\s+/g, ' ').trim() : ''; const amounts = localText.match(/\\$\\s?[\\d.,]+/g) || []; const cuotaAmount = ((cuota.match(/\\$\\s?[\\d.,]+/) || [])[0] || '').replace(/\\s+/g, ' '); const nonCuota = amounts.filter(a => a.replace(/\\s+/g, ' ') !== cuotaAmount); const precio = (nonCuota[0] || '').trim(); const precioOriginal = (nonCuota[1] || '').trim(); if (!precio && !cuota) continue; let titulo = clean(c.title); if (badTitle(titulo)) { const around = lines.slice(Math.max(0, c.idx - 5), Math.min(lines.length, c.idx + 8)); const foundTitle = around.map(clean).find(t => !badTitle(t) && !/coppel\\.com|http|\\$|quincenal/i.test(t)); titulo = foundTitle || titleFromSlug(c.url); } if (badTitle(titulo)) titulo = titleFromSlug(c.url); if (badTitle(titulo)) continue; const id = productId(c.url); const imgText = lines.slice(Math.max(0, c.idx - 10), Math.min(lines.length, localEnd + 8)).join(' '); let imgs = imageUrls(imgText).filter(src => !/logo|logohs|icon|sprite|placeholder|blank|loading/i.test(src)); let imagen = ''; if (id) imagen = imgs.find(src => src.includes('/' + id + '-') || src.includes('/' + id + '.') || src.includes('/' + id + '?') || src.includes('/' + id + '/')) || ''; if (!imagen) imagen = imgs.find(src => /cdn\\d*\\.coppel\\.com|catalog|\\/pm\\/|\\/mkp\\//i.test(src)) || imgs[0] || ''; const key = c.url.split('?')[0]; if (seen.has(key)) continue; seen.add(key); products.push({ titulo, precio, precio_original: precioOriginal, cuota, imagen_url: imagen, producto_url: c.url }); } const root = document.createElement('div'); root.id = 'uscraper-products-root'; root.innerHTML = ''; products.forEach(p => { const row = document.createElement('div'); row.className = 'uscraper-product'; row.innerHTML = '<span class=\"titulo\"></span><span class=\"precio\"></span><span class=\"precio-original\"></span><span class=\"cuota\"></span><a class=\"producto-url\"></a><img class=\"imagen-url\">'; row.querySelector('.titulo').textContent = p.titulo || ''; row.querySelector('.precio').textContent = p.precio || ''; row.querySelector('.precio-original').textContent = p.precio_original || ''; row.querySelector('.cuota').textContent = p.cuota || ''; row.querySelector('.producto-url').href = p.producto_url || ''; row.querySelector('.producto-url').textContent = p.producto_url || ''; row.querySelector('.imagen-url').src = p.imagen_url || ''; row.querySelector('.imagen-url').setAttribute('data-src', p.imagen_url || ''); root.appendChild(row); }); if (!products.length) { const empty = document.createElement('div'); empty.className = 'uscraper-no-products'; empty.textContent = 'No products parsed on this page'; root.appendChild(empty); } document.body.prepend(root); })();",
        "waitForCompletion": true,
        "timeout": 20
      }
    },
    {
      "block_id": "wait-for-element-2",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 2280,
      "position_y": 280,
      "config": {
        "selector": ".uscraper-product, .uscraper-no-products",
        "timeout": 20,
        "visible": true
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2640,
      "position_y": 280,
      "config": {
        "rowSelector": ".uscraper-product",
        "fileName": "coppel_listados_scraper_clean.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "titulo",
            "selector": ".titulo",
            "attribute": "text"
          },
          {
            "name": "precio",
            "selector": ".precio",
            "attribute": "text"
          },
          {
            "name": "precio_original",
            "selector": ".precio-original",
            "attribute": "text"
          },
          {
            "name": "cuota",
            "selector": ".cuota",
            "attribute": "text"
          },
          {
            "name": "imagen_url",
            "selector": ".imagen-url",
            "attribute": "data-src"
          },
          {
            "name": "producto_url",
            "selector": ".producto-url",
            "attribute": "href"
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 3000,
      "position_y": 280,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-2",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 48,
      "position_y": 176,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 408,
      "position_y": 176,
      "width": 2120,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1",
          "sleep-1",
          "wait-for-element-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1848,
      "position_y": 176,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2568,
      "position_y": 176,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2928,
      "position_y": 176,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts Coppel listing fields equivalent to the Octoparse template: titulo, precio, precio_original, cuota, imagen_url, and producto_url. Direct browser navigation to www.coppel.com repeatedly failed with ERR_HTTP2_PROTOCOL_ERROR during testing, so this best-effort template uses Jina Reader-wrapped Coppel listing URLs. Pagination uses the four known beginIndex URLs from the Octoparse template and appends parsed product rows into one clean CSV. The parser filters out category facets such as Filtrar por, Precio, Color, and Marca. If direct Coppel scraping is required, use a browser/proxy/network profile that can load coppel.com without HTTP/2 blocking.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 4 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 680,
      "position_y": 260,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { const raw = document.body.innerText || ''; const lineObjs = []; let pos = 0; raw.split(/\\n/...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 260,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Extracts rows matching `.uscraper-product`. Confirm row count > 0 before running at scale.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 260,
      "width": 340,
      "height": 110,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 3200,
      "position_y": 260,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}