{
  "version": "1.0.0",
  "exported_at": "2026-06-02T07:45:00.000Z",
  "project": {
    "name": "Tripadvisor Hotel List Scraper for Italy",
    "description": "Best-effort Tripadvisor.it hotel listing scraper equivalent to the Octoparse template. It targets Italian Tripadvisor hotel listing pages such as https://www.tripadvisor.it/Hotels-g187849-Milan_Lombardy-Hotels.html and extracts Url_inserito, Parola_chiave, Nome_dell_hotel, Prezzo, Valutazione, Recensioni_totali, and Pagina_dei_dettagli. When listing content is accessible, it exports the current results and follows click-next pagination to scrape all available pages. The attached analysis and autonomous tests show Tripadvisor serving a DataDome CAPTCHA/403 page; this template detects CAPTCHA, waits once for manual solving, then ends gracefully if hotel rows are still unavailable. A trusted browser profile or manual CAPTCHA resolution is required for successful extraction.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "width": 1920,
        "height": 1080,
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 456,
      "position_y": 220,
      "config": {
        "url": "https://www.tripadvisor.it/Hotels-g187849-Milan_Lombardy-Hotels.html",
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 792,
      "position_y": 220,
      "config": {
        "timeout": 60
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for initial client-side rendering",
      "position_x": 1128,
      "position_y": 220,
      "config": {
        "duration": 5
      }
    },
    {
      "block_id": "element-exists-1",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check if DataDome CAPTCHA is present",
      "position_x": 1464,
      "position_y": 220,
      "config": {
        "selector": "iframe[title*=\"DataDome\"], iframe[src*=\"captcha-delivery.com\"], script[src*=\"captcha-delivery.com\"]"
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Manual CAPTCHA solve window",
      "position_x": 1800,
      "position_y": 520,
      "config": {
        "duration": 90
      }
    },
    {
      "block_id": "wait-for-page-load-2",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait after CAPTCHA/manual interaction",
      "position_x": 2136,
      "position_y": 520,
      "config": {
        "timeout": 60
      }
    },
    {
      "block_id": "element-exists-2",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check if hotel listing rows are available",
      "position_x": 2472,
      "position_y": 520,
      "config": {
        "selector": "div[data-automation=\"hotel-card\"], div[data-test-target=\"hotel-card\"], div[data-locationid], div[data-location-id], a[href*=\"/Hotel_Review-\"]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export hotel listing data",
      "position_x": 2808,
      "position_y": 520,
      "config": {
        "rowSelector": "div[data-automation=\"hotel-card\"], div[data-test-target=\"hotel-card\"], div[data-locationid], div[data-location-id], a[href*=\"/Hotel_Review-\"]",
        "fileName": "crawler-lista-hotel-tripsdvisor.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "Url_inserito",
            "selector": "window.location.href",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Parola_chiave",
            "selector": "(() => { const h=(document.querySelector('h1')?.innerText || document.title || '').trim(); const m=h.match(/hotel\\s+(?:a\\s+)?([^,|:-]+)/i); if (m) return 'Hotel ' + m[1].trim(); const p=(decodeURIComponent(location.pathname).match(/Hotels-g\\d+(?:-oa\\d+)?-([^.-]+)-Hotels/i)||[])[1]; return p ? 'Hotel ' + p.split('_')[0].replace(/[-_]/g,' ') : 'Hotel Milano'; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Nome_dell_hotel",
            "selector": "(() => { const card = ROW.matches('a') ? (ROW.closest('div[data-automation=\"hotel-card\"], div[data-test-target=\"hotel-card\"], div[data-locationid], div[data-location-id]') || ROW) : ROW; const el = ROW.matches('a') ? ROW : card.querySelector('[data-automation=\"hotel-card-title\"] a, [data-automation=\"hotel-card-title\"], a[href*=\"/Hotel_Review-\"]'); return (el?.innerText || el?.textContent || '').trim().replace(/^\\d+\\.\\s*/, ''); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Prezzo",
            "selector": "(() => { const card = ROW.matches('a') ? (ROW.closest('div[data-automation=\"hotel-card\"], div[data-test-target=\"hotel-card\"], div[data-locationid], div[data-location-id]') || ROW.closest('div') || ROW) : ROW; const el=card.querySelector('[data-automation=\"hotel-card-price\"], [data-test-target=\"price\"], [data-automation*=\"price\" i], [class*=\"price\" i]'); const t=(el?.innerText || card.innerText || '').replace(/\\s+/g,' '); const m=t.match(/(?:€\\s?\\d+[\\d.,]*|\\d+[\\d.,]*\\s?€)/); return m ? m[0].trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Valutazione",
            "selector": "(() => { const card = ROW.matches('a') ? (ROW.closest('div[data-automation=\"hotel-card\"], div[data-test-target=\"hotel-card\"], div[data-locationid], div[data-location-id]') || ROW.closest('div') || ROW) : ROW; const nodes=Array.from(card.querySelectorAll('[aria-label], [title], svg')); const el=nodes.find(n => /bubble|pall|valut|rating/i.test((n.getAttribute('aria-label') || '') + ' ' + (n.getAttribute('title') || ''))); const t=(el?.getAttribute('aria-label') || el?.getAttribute('title') || card.innerText || ''); const m=t.match(/\\b\\d+[,.]\\d+\\b/); return m ? m[0].replace('.', ',') : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Recensioni_totali",
            "selector": "(() => { const card = ROW.matches('a') ? (ROW.closest('div[data-automation=\"hotel-card\"], div[data-test-target=\"hotel-card\"], div[data-locationid], div[data-location-id]') || ROW.closest('div') || ROW) : ROW; const t=(card.innerText || '').replace(/\\s+/g,' '); const m=t.match(/([\\d.\\s]+)\\s*(?:recensioni|reviews)/i); return m ? m[1].replace(/[.\\s]/g,'').trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Pagina_dei_dettagli",
            "selector": "(() => { const a = ROW.matches('a') ? ROW : ROW.querySelector('a[href*=\"/Hotel_Review-\"]'); return a ? new URL(a.getAttribute('href'), location.origin).href : ''; })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "element-exists-3",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check for next page",
      "position_x": 3144,
      "position_y": 520,
      "config": {
        "selector": "a[aria-label*=\"successiva\" i]:not([aria-disabled=\"true\"]):not(.disabled), a[aria-label*=\"Next\" i]:not([aria-disabled=\"true\"]):not(.disabled), a.nav.next:not(.disabled), a[data-smoke-attr=\"pagination-next-arrow\"]:not(.disabled)"
      }
    },
    {
      "block_id": "click-1",
      "block_type": "process",
      "title": "Click",
      "description": "Click next page",
      "position_x": 3480,
      "position_y": 520,
      "config": {
        "selector": "a[aria-label*=\"successiva\" i]:not([aria-disabled=\"true\"]):not(.disabled), a[aria-label*=\"Next\" i]:not([aria-disabled=\"true\"]):not(.disabled), a.nav.next:not(.disabled), a[data-smoke-attr=\"pagination-next-arrow\"]:not(.disabled)",
        "timeout": 20
      }
    },
    {
      "block_id": "wait-for-page-load-3",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait after pagination click",
      "position_x": 3816,
      "position_y": 520,
      "config": {
        "timeout": 60
      }
    },
    {
      "block_id": "sleep-3",
      "block_type": "process",
      "title": "Sleep",
      "description": "Allow next page results to render",
      "position_x": 4152,
      "position_y": 758,
      "config": {
        "duration": 4
      }
    },
    {
      "block_id": "end-1",
      "block_type": "output",
      "title": "End",
      "description": "Terminate execution flow",
      "position_x": 3144,
      "position_y": 520,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "element-exists-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "true",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "false",
      "to_block_id": "element-exists-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-2",
      "from_connector_id": "right",
      "to_block_id": "element-exists-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-2",
      "from_connector_id": "true",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-2",
      "from_connector_id": "false",
      "to_block_id": "end-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "element-exists-3",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-3",
      "from_connector_id": "true",
      "to_block_id": "click-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-3",
      "from_connector_id": "false",
      "to_block_id": "end-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "click-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-3",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-3",
      "from_connector_id": "right",
      "to_block_id": "sleep-3",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-3",
      "from_connector_id": "right",
      "to_block_id": "element-exists-2",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 48,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 384,
      "position_y": 116,
      "width": 4016,
      "height": 834,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "sleep-2",
          "wait-for-page-load-2",
          "wait-for-page-load-3",
          "sleep-3"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1392,
      "position_y": 116,
      "width": 2336,
      "height": 596,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "element-exists-1",
          "element-exists-2",
          "element-exists-3",
          "click-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2736,
      "position_y": 416,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-control",
      "element_type": "group",
      "title": "Control Flow",
      "color": "#8d8d8d",
      "position_x": 3072,
      "position_y": 416,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "end-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort Tripadvisor.it hotel listing scraper equivalent to the Octoparse template. It targets Italian Tripadvisor hotel listing pages such as https://www.tripadvisor.it/Hotels-g187849-Milan_Lombardy-Hotels.html and extracts Url_inserito, Parola_chiave, Nome_dell_hotel, Prezzo, Valutazione, Recensioni_totali, and Pagina_dei_dettagli. When listing content is accessible, it exports the current results and follows click-next pagination to scrape all available pages. The attached analysis and autonomous tests show Tripadvisor serving a DataDome CAPTCHA/403 page; this template detects CAPTCHA, waits once for manual solving, then ends gracefully if hotel rows are still unavailable. A trusted browser profile or manual CAPTCHA resolution is required for successful extraction.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-element-exists-1",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `iframe[title*=\"DataDome\"], iframe[src*=\"captcha-delivery.com\"], script[src*=\"captcha-delivery.com\"]`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 1664,
      "position_y": 200,
      "width": 340,
      "height": 163,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-1"
      }
    },
    {
      "id": "note-block-element-exists-2",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `div[data-automation=\"hotel-card\"], div[data-test-target=\"hotel-card\"], div[data-locationid], div[data-location-id], a[hr`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 2672,
      "position_y": 500,
      "width": 340,
      "height": 170,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-2"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (Url_inserito, Parola_chiave, Nome_dell_hotel, Prezzo, Valutazione). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 3008,
      "position_y": 500,
      "width": 340,
      "height": 135,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-element-exists-3",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `a[aria-label*=\"successiva\" i]:not([aria-disabled=\"true\"]):not(.disabled), a[aria-label*=\"Next\" i]:not([aria-disabled=\"tr`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 3344,
      "position_y": 500,
      "width": 340,
      "height": 170,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-3"
      }
    },
    {
      "id": "note-block-click-1",
      "element_type": "note",
      "title": "Note: Click",
      "content": "Pagination click — add waits after this block; the page reloads asynchronously.",
      "color": "#ee5396",
      "position_x": 3680,
      "position_y": 500,
      "width": 316,
      "height": 106,
      "z_index": 22,
      "data": {
        "block_id": "click-1"
      }
    }
  ]
}