{
  "version": "1.0.0",
  "exported_at": "2026-06-02T02:25:00.000Z",
  "project": {
    "name": "Papfr Immobilier Data Scraper",
    "description": "Best-effort PAP.fr real-estate detail-page scraper equivalent to the Octoparse template. It extracts one row per supplied PAP.fr annonce URL: search/context, title, detail URL, reference/date, price, location, rooms, bedrooms, surface, price per m2, owner phone, plus an access_status diagnostic. Navigation uses a multi-URL loop with loop-continue and appends all supplied detail URLs into pap-scraper.csv. PAP.fr returned Cloudflare/security verification HTTP 403 in analysis/testing, so real listing fields require a browser profile/session that can pass PAP.fr verification; when blocked, the CSV still records the URL and blocked status instead of timing out.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 260,
      "config": {
        "urls": [
          "https://www.pap.fr/annonces/appartement-paris-20e-75020-r452100741",
          "https://www.pap.fr/annonces/appartement-paris-19e-75019-r452200020",
          "https://www.pap.fr/annonces/appartement-paris-20e-75020-r451800971"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 260,
      "config": {
        "timeout": 45
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 840,
      "position_y": 260,
      "config": {
        "duration": 10
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1200,
      "position_y": 260,
      "config": {
        "jsCode": "(() => { const bodyText = document.body?.innerText || ''; if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(bodyText)) return 'blocked/security verification page detected; no click attempted'; const els = Array.from(document.querySelectorAll('button,a,[role=button]')); const re = /t[ée]l[ée]phone|num[ée]ro|afficher|appeler|contacter|voir/i; const target = els.find(el => re.test(((el.innerText || '') + ' ' + (el.getAttribute('aria-label') || '') + ' ' + (el.className || '') + ' ' + (el.id || '')).trim())); if (target) { target.scrollIntoView({ block: 'center' }); target.click(); return 'clicked possible phone/contact reveal button'; } return 'no phone reveal button found'; })()",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1560,
      "position_y": 260,
      "config": {
        "duration": 2
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1920,
      "position_y": 260,
      "config": {
        "rowSelector": "body",
        "fileName": "pap-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "votre_recherche",
            "selector": "(() => { const blocked = /security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(document.body?.innerText || ''); if (blocked) return ''; const el = document.querySelector('nav[aria-label], .breadcrumb, [class*=breadcrumb], [class*=ariane], [class*=fil-ariane]'); const txt = (el?.innerText || '').replace(/\\s+/g, ' ').trim(); if (txt) return txt; const title = document.title || ''; return title.replace(/\\s*-\\s*PAP.*$/i, '').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "titre",
            "selector": "(() => { const body = document.body?.innerText || ''; if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(body)) return ''; const h1 = Array.from(document.querySelectorAll('h1')).map(e => e.innerText.trim()).find(s => s && !/^www\\.pap\\.fr$/i.test(s)); if (h1) return h1; const og = document.querySelector('meta[property=\"og:title\"]')?.content; return (og || document.title || '').replace(/\\s*-\\s*PAP.*$/i, '').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "url_de_page_detail",
            "selector": "location.href",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "reference",
            "selector": "(() => { const t = ROW.innerText.replace(/\\s+/g, ' '); if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(t)) return ''; const m = t.match(/\\b[A-Z]?\\d{2,3}\\/[A-Z]?\\d{3,4}\\b\\s*\\/?\\s*(?:\\d{1,2}\\s+[A-Za-zÀ-ÿ]+\\s+\\d{4})?/); return m ? m[0].trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "prix",
            "selector": "(() => { const body = ROW.innerText.replace(/\\s+/g, ' '); if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(body)) return ''; const candidates = Array.from(document.querySelectorAll('[class*=price], [class*=prix], strong, h1, h2')).map(e => e.innerText.replace(/\\s+/g, ' ').trim()).filter(Boolean); const found = candidates.find(s => /€/.test(s) && !/m[²2]|le\\s*m/i.test(s)); if (found) { const m = found.match(/\\d[\\d.\\s]*\\s*€/); if (m) return m[0].trim(); } const all = Array.from(body.matchAll(/\\d[\\d.\\s]*\\s*€/g)).map(x => x[0].trim()); return all.find(x => !new RegExp(x.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&') + '\\\\s*(?:le\\\\s*)?m[²2]', 'i').test(body)) || all[0] || ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "location",
            "selector": "(() => { const t = ROW.innerText.replace(/\\s+/g, ' '); if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(t)) return ''; const m = t.match(/(?:Paris|[A-ZÉÈÀÇ][A-Za-zÀ-ÿ' -]+)\\s*(?:\\d{1,2}(?:e|er))?\\s*\\(\\d{5}\\)/); return m ? m[0].trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "pieces",
            "selector": "(() => { const t = ROW.innerText.replace(/\\s+/g, ' '); if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(t)) return ''; const m = t.match(/\\b\\d+\\s*pi[eè]ces?\\b/i); return m ? m[0].trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "chambres",
            "selector": "(() => { const t = ROW.innerText.replace(/\\s+/g, ' '); if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(t)) return ''; const m = t.match(/\\b\\d+\\s*chambres?\\b/i); return m ? m[0].trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "surface",
            "selector": "(() => { const t = ROW.innerText.replace(/\\s+/g, ' '); if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(t)) return ''; const m = t.match(/\\b\\d+(?:[,.]\\d+)?\\s*m[²2]\\b/i); return m ? m[0].trim().replace('m2', 'm²') : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "prix_per_m2",
            "selector": "(() => { const t = ROW.innerText.replace(/\\s+/g, ' '); if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(t)) return ''; const m = t.match(/\\b\\d[\\d.\\s]*\\s*€\\s*(?:le\\s*)?m[²2]\\b/i); return m ? m[0].trim().replace('m2', 'm²') : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "telephone_du_proprietaire",
            "selector": "(() => { const body = ROW.innerText.replace(/\\s+/g, ' '); if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(body)) return ''; const tel = Array.from(document.querySelectorAll('a[href^=tel]')).map(a => (a.getAttribute('href') || '').replace(/^tel:/i, '').trim()).find(Boolean); if (tel) return tel; const m = body.match(/(?:\\+33|0033|0)\\s*[1-9](?:[\\s.\\-]?\\d{2}){4}|\\(\\s*00\\s*\\d{1,4}\\s*\\)\\s*\\d[\\d\\s.\\-]{6,}\\d/); return m ? m[0].trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "access_status",
            "selector": "(() => { const t = document.body?.innerText || ''; if (/security verification|Cloudflare|not a bot|Enable JavaScript and cookies/i.test(t)) return 'blocked_by_pap_security_verification'; if (/€|pi[eè]ces?|chambres?|m[²2]/i.test(t)) return 'listing_loaded'; return 'unknown_page_state'; })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2280,
      "position_y": 260,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 156,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "sleep-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1128,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1848,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2208,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort PAP.fr real-estate detail-page scraper equivalent to the Octoparse template. It extracts one row per supplied PAP.fr annonce URL: search/context, title, detail URL, reference/date, price, location, rooms, bedrooms, surface, price per m2, owner phone, plus an access_status diagnostic. Navigation uses a multi-URL loop with loop-continue and appends all supplied detail URLs into pap-scraper.csv. PAP.fr returned Cloudflare/security verification HTTP 403 in analysis/testing, so real listing fields require a browser profile/session that can pass PAP.fr verification; when blocked, the CSV still records the URL and blocked status instead of timing out.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { const bodyText = document.body?.innerText || ''; if (/security verification|Cloudflare|not ...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 240,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (votre_recherche, titre, url_de_page_detail, reference, prix). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 240,
      "width": 340,
      "height": 133,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}