{
  "version": "1.0.0",
  "exported_at": "2026-05-31T19:20:00.000Z",
  "project": {
    "name": "TripAdvisor HÃtel Info Scraper",
    "description": "Best-effort TripAdvisor France hotel info scraper equivalent to the Octoparse template. Extracts hotel ranking, name, detail URL, image, price, rating, review count, hotel website/commerce link, review author, review snippet, source page URL, and page number from TripAdvisor hotel listing pages. Uses predictable TripAdvisor London pagination URLs with -oa30 offsets and appends all pages into one CSV. Live analysis and autonomous tests show TripAdvisor/DataDome returns HTTP 403 CAPTCHA/interstitial in this environment; this template checks for hotel rows, extracts normally when available, and writes a diagnostic blocked row per URL when blocked so the pagination loop still completes. Successful hotel scraping may require a trusted browser profile, manual CAPTCHA resolution, or compliant access/proxy.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 100,
      "position_y": 220,
      "config": {
        "width": 1920,
        "height": 1080,
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 460,
      "position_y": 220,
      "config": {
        "urls": [
          "https://www.tripadvisor.fr/Hotels-g186338-London_England-Hotels.html",
          "https://www.tripadvisor.fr/Hotels-g186338-oa30-London_England-Hotels.html",
          "https://www.tripadvisor.fr/Hotels-g186338-oa60-London_England-Hotels.html",
          "https://www.tripadvisor.fr/Hotels-g186338-oa90-London_England-Hotels.html",
          "https://www.tripadvisor.fr/Hotels-g186338-oa120-London_England-Hotels.html",
          "https://www.tripadvisor.fr/Hotels-g186338-oa150-London_England-Hotels.html",
          "https://www.tripadvisor.fr/Hotels-g186338-oa180-London_England-Hotels.html",
          "https://www.tripadvisor.fr/Hotels-g186338-oa210-London_England-Hotels.html",
          "https://www.tripadvisor.fr/Hotels-g186338-oa240-London_England-Hotels.html",
          "https://www.tripadvisor.fr/Hotels-g186338-oa270-London_England-Hotels.html"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 820,
      "position_y": 220,
      "config": {
        "timeout": 45,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1180,
      "position_y": 220,
      "config": {
        "duration": 4,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "element-exists-1",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check if element exists",
      "position_x": 1540,
      "position_y": 220,
      "config": {
        "selector": "a[href*='Hotel_Review-'][href*='Reviews-'], div[data-automation='hotel-card'], div[data-testid='property-card'], div[data-test-target='hotel-listing'], div[data-locationid][data-prwidget-name*='meta_hsx'], div.listing",
        "color": "bg-[#ff832b]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1900,
      "position_y": 120,
      "config": {
        "rowSelector": "a[href*='Hotel_Review-'][href*='Reviews-'], div[data-automation='hotel-card'], div[data-testid='property-card'], div[data-test-target='hotel-listing'], div[data-locationid][data-prwidget-name*='meta_hsx'], div.listing",
        "fileName": "tripadvisor-hotel-info-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "Page_URL",
            "selector": "window.location.href",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "classement",
            "selector": "(()=>{const card=ROW.closest('div[data-automation=\"hotel-card\"],div[data-testid=\"property-card\"],div[data-test-target=\"hotel-listing\"],div[data-locationid],div.listing')||ROW;const t=card.innerText||ROW.innerText||'';const m=t.match(/^\\s*(\\d{1,4})\\s*[\\.)]/m)||t.match(/(?:^|\\s)#\\s*(\\d{1,4})/);if(m)return m[1]+'.';const sel='a[href*=\"Hotel_Review-\"][href*=\"Reviews-\"]';const rows=Array.from(document.querySelectorAll(sel)).filter((a,i,arr)=>arr.findIndex(x=>x.href===a.href)===i);const key=ROW.matches(sel)?ROW.href:(ROW.querySelector(sel)||{}).href;const idx=rows.findIndex(a=>a.href===key);const om=location.pathname.match(/-oa(\\d+)-/);const offset=om?parseInt(om[1],10):0;return idx>=0?String(offset+idx+1)+'.':'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "nom",
            "selector": "(()=>{const card=ROW.closest('div[data-automation=\"hotel-card\"],div[data-testid=\"property-card\"],div[data-test-target=\"hotel-listing\"],div[data-locationid],div.listing')||ROW;const qs=['[data-automation=\"hotel-card-title\"]','[data-testid=\"title\"]','a[data-clicksource=\"HotelName\"]','.property_title','a[href*=\"Hotel_Review-\"]','h3 a','h2 a'];for(const s of qs){const e=card.querySelector?card.querySelector(s):null;const v=e&&e.textContent&&e.textContent.trim();if(v)return v.replace(/^\\d+\\.?\\s*/,'');}const self=(ROW.textContent||ROW.getAttribute('aria-label')||ROW.getAttribute('title')||'').trim();return self.replace(/^\\d+\\.?\\s*/,'');})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "détail_url",
            "selector": "(()=>{const a=ROW.matches&&ROW.matches('a[href*=\"Hotel_Review-\"]')?ROW:(ROW.querySelector?ROW.querySelector('a[href*=\"Hotel_Review-\"]'):null);return a?new URL(a.getAttribute('href'),location.origin).href:'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "image_url",
            "selector": "(()=>{const card=ROW.closest('div[data-automation=\"hotel-card\"],div[data-testid=\"property-card\"],div[data-test-target=\"hotel-listing\"],div[data-locationid],div.listing')||ROW;const img=card.querySelector?card.querySelector('img'):null;return img?(img.currentSrc||img.src||img.getAttribute('data-src')||img.getAttribute('data-lazyurl')||''):'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "prix",
            "selector": "(()=>{const card=ROW.closest('div[data-automation=\"hotel-card\"],div[data-testid=\"property-card\"],div[data-test-target=\"hotel-listing\"],div[data-locationid],div.listing')||ROW;const qs=['[data-automation=\"price\"]','[data-testid=\"price\"]','.price-wrap','.price'];for(const s of qs){const e=card.querySelector?card.querySelector(s):null;const v=e&&e.textContent&&e.textContent.trim();if(v&&/[€$£]/.test(v))return v;}const m=(card.innerText||'').match(/\\b\\d[\\d\\s .,]*\\s*€/);return m?m[0].trim():'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "note",
            "selector": "(()=>{const card=ROW.closest('div[data-automation=\"hotel-card\"],div[data-testid=\"property-card\"],div[data-test-target=\"hotel-listing\"],div[data-locationid],div.listing')||ROW;const el=card.querySelector?card.querySelector('[aria-label*=\"bulle\"],[aria-label*=\"rating\"],[title*=\"bulle\"],[title*=\"rating\"]'):null;const txt=((el&&(el.getAttribute('aria-label')||el.getAttribute('title')||el.textContent))||card.innerText||'');const m=txt.match(/([0-5](?:[\\.,]\\d)?)\\s*(?:sur|of)?\\s*5/)||txt.match(/([0-5][\\.,]\\d)/);return m?m[1].replace('.',','):'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "nombre_avis",
            "selector": "(()=>{const card=ROW.closest('div[data-automation=\"hotel-card\"],div[data-testid=\"property-card\"],div[data-test-target=\"hotel-listing\"],div[data-locationid],div.listing')||ROW;const t=card.innerText||'';const m=t.match(/([\\d\\s .,]+)\\s*avis/i)||t.match(/([\\d\\s .,]+)\\s*reviews/i);return m?m[1].trim().replace(/\\s+/g,' ')+' avis':'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "site_hôtel",
            "selector": "(()=>{const card=ROW.closest('div[data-automation=\"hotel-card\"],div[data-testid=\"property-card\"],div[data-test-target=\"hotel-listing\"],div[data-locationid],div.listing')||ROW;const a=card.querySelector?card.querySelector('a[href*=\"/Commerce\"], a[href*=\"Commerce\"]'):null;return a?new URL(a.getAttribute('href'),location.origin).href:'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "auteur_avis",
            "selector": "(()=>{const card=ROW.closest('div[data-automation=\"hotel-card\"],div[data-testid=\"property-card\"],div[data-test-target=\"hotel-listing\"],div[data-locationid],div.listing')||ROW;const qs=['[data-test-target=\"reviewer-name\"]','[data-testid=\"reviewer-name\"]','.reviewerName','.member_info a','a[href*=\"Profile\"]'];for(const s of qs){const e=card.querySelector?card.querySelector(s):null;const v=e&&e.textContent&&e.textContent.trim();if(v)return v;}return '';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "commentaire",
            "selector": "(()=>{const card=ROW.closest('div[data-automation=\"hotel-card\"],div[data-testid=\"property-card\"],div[data-test-target=\"hotel-listing\"],div[data-locationid],div.listing')||ROW;const qs=['[data-test-target=\"review-snippet\"]','[data-testid=\"review-snippet\"]','.reviewText','.review_snippet','.prw_reviews_text_summary_hsx','q','blockquote'];for(const s of qs){const e=card.querySelector?card.querySelector(s):null;const v=e&&e.textContent&&e.textContent.trim();if(v&&v.length>20)return v;}const texts=Array.from(card.querySelectorAll?card.querySelectorAll('span,div,p'):[]).map(e=>(e.textContent||'').trim()).filter(v=>v.length>80&&!/Tripadvisor|Réserver|€|avis/i.test(v));return texts[0]||'';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Numéro_de_la_page",
            "selector": "(()=>{const m=location.pathname.match(/-oa(\\d+)-/);return m?String(Math.floor(parseInt(m[1],10)/30)+1):'1';})()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "structured-export-2",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1900,
      "position_y": 520,
      "config": {
        "rowSelector": "body",
        "fileName": "tripadvisor-hotel-info-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#ff832b]",
        "columns": [
          {
            "name": "Page_URL",
            "selector": "window.location.href",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "classement",
            "selector": "''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "nom",
            "selector": "''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "détail_url",
            "selector": "''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "image_url",
            "selector": "''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "prix",
            "selector": "''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "note",
            "selector": "''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "nombre_avis",
            "selector": "''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "site_hôtel",
            "selector": "''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "auteur_avis",
            "selector": "''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "commentaire",
            "selector": "(()=>{const txt=document.body&&document.body.innerText?document.body.innerText:'';const hasDataDome=document.documentElement.innerHTML.includes('captcha-delivery.com')||/DataDome|CAPTCHA|Device Check/i.test(txt);return hasDataDome?'BLOCKED_BY_DATADOME_CAPTCHA: hotel listing DOM was not available in this browser session':'NO_HOTEL_ROWS_FOUND: page loaded but no hotel listing rows matched the configured selectors';})()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Numéro_de_la_page",
            "selector": "(()=>{const m=location.pathname.match(/-oa(\\d+)-/);return m?String(Math.floor(parseInt(m[1],10)/30)+1):'1';})()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 2260,
      "position_y": 320,
      "config": {
        "duration": 2,
        "color": "bg-[#ff832b]"
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2620,
      "position_y": 320,
      "config": {
        "color": "bg-[#ff832b]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "element-exists-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "true",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "false",
      "to_block_id": "structured-export-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-2",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 28,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 388,
      "position_y": 116,
      "width": 2120,
      "height": 396,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "sleep-2"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1468,
      "position_y": 116,
      "width": 1400,
      "height": 396,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "element-exists-1",
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1828,
      "position_y": 16,
      "width": 380,
      "height": 696,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1",
          "structured-export-2"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort TripAdvisor France hotel info scraper equivalent to the Octoparse template. Extracts hotel ranking, name, detail URL, image, price, rating, review count, hotel website/commerce link, review author, review snippet, source page URL, and page number from TripAdvisor hotel listing pages. Uses predictable TripAdvisor London pagination URLs with -oa30 offsets and appends all pages into one CSV. Live analysis and autonomous tests show TripAdvisor/DataDome returns HTTP 403 CAPTCHA/interstitial in this environment; this template checks for hotel rows, extracts normally when available, and writes a diagnostic blocked row per URL when blocked so the pagination loop still completes. Successful hotel scraping may require a trusted browser profile, manual CAPTCHA resolution, or compliant access/proxy.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-element-exists-1",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `a[href*='Hotel_Review-'][href*='Reviews-'], div[data-automation='hotel-card'], div[data-testid='property-card'], div[dat`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 1740,
      "position_y": 200,
      "width": 340,
      "height": 170,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (Page_URL, classement, nom, détail_url, image_url). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2100,
      "position_y": 100,
      "width": 340,
      "height": 130,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-structured-export-2",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (Page_URL, classement, nom, détail_url, image_url). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2100,
      "position_y": 500,
      "width": 340,
      "height": 130,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-2"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2820,
      "position_y": 300,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}