{
  "version": "1.0.0",
  "exported_at": "2026-05-31T21:25:00.000Z",
  "project": {
    "name": "Tripadvisor Hotel Listing Scraperï¼ˆfor Spainï¼",
    "description": "Best-effort Tripadvisor Spain hotel scraper equivalent to the Octoparse template. Tripadvisor returned DataDome CAPTCHA/403 during analysis, so this version uses the provided Tripadvisor hotel URLs as multi-URL input and appends one row per URL. If the hotel page loads, it extracts hotel name, detail URL, price, rating, review count, OTA/provider, info/ranking text, amenities, and timestamp from the page. If CAPTCHA blocks the page, it still exports URL-derived hotel name and URL, leaving unavailable fields blank. Uses navigate.urls plus loop-continue for multiple input URLs.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 100,
      "position_y": 260,
      "config": {
        "width": 1920,
        "height": 1080,
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 460,
      "position_y": 260,
      "config": {
        "urls": [
          "https://www.tripadvisor.es/Hotel_Review-g187529-d206949-Reviews-Hotel_Malcom_and_Barret-Valencia_Province_of_Valencia_Valencian_Community.html",
          "https://www.tripadvisor.es/Hotel_Review-g187529-d18956865-Reviews-Helen_Berger_Boutique_Hotel-Valencia_Province_of_Valencia_Valencian_Community.html",
          "https://www.tripadvisor.es/Hotel_Review-g187529-d17240464-Reviews-Only_YOU_Hotel_Valencia-Valencia_Province_of_Valencia_Valencian_Community.html"
        ],
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 820,
      "position_y": 260,
      "config": {
        "timeout": 45
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1180,
      "position_y": 260,
      "config": {
        "duration": 5
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1540,
      "position_y": 260,
      "config": {
        "jsCode": "(() => {\n  const labels = ['Aceptar', 'Accept', 'Estoy de acuerdo', 'I agree', 'Agree', 'OK'];\n  const buttons = Array.from(document.querySelectorAll('button, [role=\"button\"], input[type=\"button\"], input[type=\"submit\"]'));\n  const btn = buttons.find(el => labels.some(label => ((el.innerText || el.value || el.getAttribute('aria-label') || '').trim().toLowerCase()).includes(label.toLowerCase())));\n  if (btn) {\n    btn.click();\n    return 'cookie_or_consent_clicked';\n  }\n  return 'no_cookie_banner_found';\n})()",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1900,
      "position_y": 260,
      "config": {
        "rowSelector": "body",
        "fileName": "tripadvisor_hotel_listados_scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "numero_de_alojamientos",
            "selector": "(() => {\n  const t = (document.body.innerText || '').replace(/\\s+/g, ' ');\n  const m = t.match(/([\\d.,]+)\\s+(?:alojamientos|hoteles|properties|hotels)/i);\n  return m ? `${m[1]} alojamientos` : '';\n})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "titulo",
            "selector": "(() => {\n  const clean = s => (s || '').replace(/\\s+/g, ' ').trim();\n  const fromDom = clean(document.querySelector('h1')?.innerText || document.querySelector('meta[property=\"og:title\"]')?.content || document.title || '');\n  if (fromDom && !/^tripadvisor\\.es$/i.test(fromDom) && !/captcha|access denied|forbidden/i.test(fromDom)) return fromDom.replace(/\\s*-\\s*Tripadvisor.*$/i, '');\n  const url = decodeURIComponent(location.href);\n  const m = url.match(/Reviews-([^?#]+?)(?:-Valencia|-Province|-Spain|-Reviews|\\.html)/i) || url.match(/Reviews-([^?#]+?)\\.html/i);\n  if (!m) return '';\n  return m[1].replace(/_/g, ' ').replace(/-/g, ' ').replace(/\\s+/g, ' ').trim();\n})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "hotel_name",
            "selector": "(() => {\n  const clean = s => (s || '').replace(/\\s+/g, ' ').trim();\n  const h1 = clean(document.querySelector('h1')?.innerText || '');\n  if (h1 && !/captcha|tripadvisor\\.es|access denied|forbidden/i.test(h1)) return h1.replace(/^\\d+[.)]\\s*/, '').replace(/\\s*-\\s*Tripadvisor.*$/i, '');\n  const og = clean(document.querySelector('meta[property=\"og:title\"]')?.content || '');\n  if (og && !/captcha|tripadvisor\\.es|access denied|forbidden/i.test(og)) return og.replace(/^\\d+[.)]\\s*/, '').replace(/\\s*-\\s*Tripadvisor.*$/i, '');\n  const url = decodeURIComponent(location.href);\n  const m = url.match(/Reviews-([^?#]+?)(?:-Valencia|-Province|-Spain|-Reviews|\\.html)/i) || url.match(/Reviews-([^?#]+?)\\.html/i);\n  return m ? m[1].replace(/_/g, ' ').replace(/-/g, ' ').replace(/\\s+/g, ' ').trim() : '';\n})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "url_de_pagina_de_detalles",
            "selector": "(() => location.href.split('#')[0])()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "precio",
            "selector": "(() => {\n  const t = (document.body.innerText || '').replace(/\\s+/g, ' ');\n  if (/captcha-delivery|DataDome|captcha/i.test(t) && !/(€|EUR|US\\$|\\$)\\s*\\d/.test(t)) return '';\n  let m = t.match(/(?:€|EUR)\\s*([\\d.,]{2,6})|([\\d.,]{2,6})\\s*(?:€|EUR)/i);\n  if (!m) m = t.match(/(?:US\\$|\\$)\\s*([\\d.,]{2,6})/i);\n  return m ? (m[1] || m[2] || '').replace(/[, ].*$/, '') : '';\n})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "rating",
            "selector": "(() => {\n  const aria = Array.from(document.querySelectorAll('[aria-label]')).map(e => e.getAttribute('aria-label') || '').find(v => /(de\\s*5\\s*burbujas|of\\s*5\\s*bubbles|bubble rating|rating)/i.test(v));\n  const t = (aria || document.body.innerText || '').replace(/\\s+/g, ' ');\n  const m = t.match(/[0-5](?:[,.]\\d)?\\s*(?:de\\s*5\\s*burbujas|of\\s*5\\s*bubbles|bubble rating)/i);\n  return m ? m[0] : (aria || '');\n})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "numero_de_opiniones",
            "selector": "(() => {\n  const t = (document.body.innerText || '').replace(/\\s+/g, ' ');\n  const m = t.match(/([\\d.,]+)\\s*(?:opiniones|reviews)/i);\n  return m ? `${m[1]} opiniones` : '';\n})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "ota_recomendada",
            "selector": "(() => {\n  const text = (document.body.innerText || '').toLowerCase();\n  const altTitle = Array.from(document.querySelectorAll('[alt], [title], [aria-label]')).map(e => `${e.getAttribute('alt') || ''} ${e.getAttribute('title') || ''} ${e.getAttribute('aria-label') || ''}`.toLowerCase()).join(' ');\n  const combined = `${text} ${altTitle}`;\n  const providers = ['booking', 'agoda', 'expedia', 'hotels.com', 'trip.com', 'ebookers', 'lastminute', 'destinia', 'travelocity', 'priceline', 'zenhotels', 'vio.com'];\n  return providers.find(p => combined.includes(p)) || '';\n})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "info",
            "selector": "(() => {\n  const lines = (document.body.innerText || '').split(/\\n+/).map(s => s.replace(/\\s+/g, ' ').trim()).filter(Boolean);\n  const rankLine = lines.find(l => /^(N\\.º|#)\\s*\\d+|mejor relación|calidad-precio|best value|places to stay/i.test(l));\n  if (rankLine) return rankLine;\n  const t = lines.join(' ');\n  const m = t.match(/(?:N\\.º|#)\\s*\\d+[^.\\n]{0,180}/i);\n  return m ? m[0].trim() : '';\n})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "amenities",
            "selector": "(() => {\n  const lines = (document.body.innerText || '').split(/\\n+/).map(s => s.replace(/\\s+/g, ' ').trim()).filter(Boolean);\n  const re = /(wifi|wi-fi|internet|piscina|pool|parking|aparcamiento|desayuno|breakfast|restaurante|restaurant|bar|spa|gimnasio|fitness|aire acondicionado|air conditioning|mascotas|pets|playa|beach|habitaciones familiares|family rooms)/i;\n  return Array.from(new Set(lines.filter(l => re.test(l)))).slice(0, 12).join(' | ');\n})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "hora_actual",
            "selector": "(() => new Date().toISOString().replace('T', ' ').replace('Z', ''))()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 2260,
      "position_y": 260,
      "config": {
        "duration": 2
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2620,
      "position_y": 260,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 28,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 388,
      "position_y": 156,
      "width": 2120,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "sleep-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1468,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1828,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2548,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort Tripadvisor Spain hotel scraper equivalent to the Octoparse template. Tripadvisor returned DataDome CAPTCHA/403 during analysis, so this version uses the provided Tripadvisor hotel URLs as multi-URL input and appends one row per URL. If the hotel page loads, it extracts hotel name, detail URL, price, rating, review count, OTA/provider, info/ranking text, amenities, and timestamp from the page. If CAPTCHA blocks the page, it still exports URL-derived hotel name and URL, leaving unavailable fields blank. Uses navigate.urls plus loop-continue for multiple input URLs.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 3 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 660,
      "position_y": 240,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => {\n  const labels = ['Aceptar', 'Accept', 'Estoy de acuerdo', 'I agree', 'Agree', 'OK'];\n  con...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1740,
      "position_y": 240,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (numero_de_alojamientos, titulo, hotel_name, url_de_pagina_de_detalles, precio). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2100,
      "position_y": 240,
      "width": 340,
      "height": 139,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2820,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}