{
  "version": "1.0.0",
  "exported_at": "2026-06-03T18:00:00.000Z",
  "project": {
    "name": "Tripadvisor Restaurant Scraper Listing",
    "description": "Best-effort Tripadvisor restaurant listing scraper for https://www.tripadvisor.com/FindRestaurants?geo=187323&cuisines=5086&establishmentTypes=10591&broadened=false. Extracts listing data equivalent to the Octoparse template: restaurant title/name, ranking, image, rating, review count, cuisine/type, price level, open/closed state, menu indicator, review snippets, and detail URL. Pagination is handled with a click-next loop: after each page is exported in append mode, the template checks for an enabled Next button/link, clicks it, waits for the next page/AJAX update, and repeats until no Next control exists. Tripadvisor may present CAPTCHA or anti-bot challenges; if encountered, pause and solve manually in the browser.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "width": 1920,
        "height": 1080
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 480,
      "position_y": 220,
      "config": {
        "url": "https://www.tripadvisor.com/FindRestaurants?geo=187323&cuisines=5086&establishmentTypes=10591&broadened=false",
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 840,
      "position_y": 220,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1200,
      "position_y": 220,
      "config": {
        "duration": 3
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1560,
      "position_y": 220,
      "config": {
        "jsCode": "(() => {\n  const old = document.querySelector('#uscraper-tripadvisor-results');\n  if (old) old.remove();\n\n  const container = document.createElement('div');\n  container.id = 'uscraper-tripadvisor-results';\n  container.setAttribute('data-uscraper-generated', 'true');\n  container.style.position = 'absolute';\n  container.style.left = '-99999px';\n  container.style.top = '0';\n  container.style.width = '1px';\n  container.style.height = '1px';\n  container.style.overflow = 'hidden';\n\n  const normalize = value => (value || '').replace(/\\s+/g, ' ').trim();\n  const unique = arr => Array.from(new Set(arr.map(normalize).filter(Boolean)));\n  const absUrl = href => {\n    try { return new URL(href, location.origin).href; } catch (_) { return href || ''; }\n  };\n\n  const isRestaurantDetailLink = a => {\n    const href = a.getAttribute('href') || '';\n    const text = normalize(a.innerText || a.textContent || '');\n    if (!href.includes('Restaurant_Review-') || !href.includes('-Reviews-')) return false;\n    if (href.includes('ShowUserReviews')) return false;\n    if (!text || text.length < 2) return false;\n    if (/^(reviews?|write a review|read more|view all)$/i.test(text)) return false;\n    return true;\n  };\n\n  const findCard = a => {\n    let best = a;\n    let node = a;\n    for (let i = 0; i < 9 && node && node !== document.body; i += 1) {\n      const text = normalize(node.innerText || '');\n      const restaurantLinks = node.querySelectorAll('a[href*=\"Restaurant_Review-\"][href*=\"-Reviews-\"]').length;\n      const hasImage = !!node.querySelector('img');\n      if (text.length > 80 && text.length < 3500 && restaurantLinks <= 8) {\n        best = node;\n        if (hasImage) break;\n      }\n      node = node.parentElement;\n    }\n    return best;\n  };\n\n  const imageFromCard = card => {\n    const imgs = Array.from(card.querySelectorAll('img'));\n    const preferred = imgs.find(img => {\n      const src = img.currentSrc || img.src || img.getAttribute('data-src') || '';\n      return /tripadvisor|media-cdn|dynamic-media-cdn/i.test(src) && !src.startsWith('data:image/svg');\n    }) || imgs.find(img => !String(img.src || '').startsWith('data:image/svg'));\n    if (!preferred) return '';\n    const srcset = preferred.getAttribute('srcset') || '';\n    if (srcset) {\n      const first = srcset.split(',').map(s => s.trim().split(' ')[0]).filter(Boolean)[0];\n      if (first) return absUrl(first);\n    }\n    return absUrl(preferred.currentSrc || preferred.src || preferred.getAttribute('data-src') || '');\n  };\n\n  const ratingFromCard = card => {\n    const ratingNode = card.querySelector('[data-automation=\"bubbleRatingValue\"], [data-automation*=\"RatingValue\"], [aria-label*=\"bubbles\" i], [title*=\"bubbles\" i], [aria-label*=\"of 5\" i], [title*=\"of 5\" i]');\n    const ratingText = normalize((ratingNode && (ratingNode.getAttribute('aria-label') || ratingNode.getAttribute('title') || ratingNode.textContent)) || '') || normalize(card.innerText || '');\n    const match = ratingText.match(/([0-5](?:\\.\\d)?)\\s*(?:of\\s*5|bubbles?)?/i);\n    return match ? match[1] : '';\n  };\n\n  const reviewsFromCard = card => {\n    const text = normalize(card.innerText || '');\n    const paren = text.match(/\\([\\d,\\.]+\\s+reviews?\\)/i);\n    if (paren) return paren[0];\n    const plain = text.match(/[\\d,\\.]+\\s+reviews?/i);\n    return plain ? plain[0] : '';\n  };\n\n  const priceFromCard = card => {\n    const text = normalize(card.innerText || '');\n    const match = text.match(/\\${1,4}(?:\\s*-\\s*\\${1,4})?/);\n    return match ? match[0] : '';\n  };\n\n  const stateFromCard = card => {\n    const text = normalize(card.innerText || '');\n    const match = text.match(/\\b(Closed now|Open now|Open until [^•|,]+|Opens in [^•|,]+|Closed until [^•|,]+)\\b/i);\n    return match ? normalize(match[0]) : '';\n  };\n\n  const menuFromCard = card => {\n    const menu = Array.from(card.querySelectorAll('a, button')).find(el => /^menu$/i.test(normalize(el.innerText || el.textContent || '')));\n    return menu ? 'Menu' : '';\n  };\n\n  const cuisineFromCard = card => {\n    const linkTexts = unique(Array.from(card.querySelectorAll('a[href*=\"-c\"], a[href*=\"cuisines\"], a[href*=\"Restaurants-\"]')).map(a => a.innerText || a.textContent || ''));\n    const filtered = linkTexts.filter(t => {\n      if (/restaurant_review|restaurants in|reviews?|menu|website|write/i.test(t)) return false;\n      if (/^\\$/.test(t)) return false;\n      if (/^#/.test(t)) return false;\n      if (t.length > 40) return false;\n      return true;\n    });\n    return unique(filtered).join(', ');\n  };\n\n  const reviewSnippetsFromCard = card => {\n    const direct = unique(Array.from(card.querySelectorAll('a[href*=\"ShowUserReviews\"], q, blockquote, [data-test-target*=\"review\" i], [class*=\"review\" i]')).map(el => el.innerText || el.textContent || ''));\n    let candidates = direct.filter(t => t.length >= 8 && t.length <= 260 && !/transparency report|write a review|all reviews/i.test(t));\n    if (candidates.length < 2) {\n      const lines = unique((card.innerText || '').split('\\n'));\n      const fallback = lines.filter(t => {\n        if (t.length < 25 || t.length > 260) return false;\n        if (/reviews?|restaurants?|menu|website|open now|closed now|\\${1,4}|tripadvisor/i.test(t)) return false;\n        return true;\n      });\n      candidates = unique(candidates.concat(fallback));\n    }\n    return candidates.slice(0, 2);\n  };\n\n  const anchors = Array.from(document.querySelectorAll('a[href*=\"Restaurant_Review-\"][href*=\"-Reviews-\"]')).filter(isRestaurantDetailLink);\n  const seen = new Set();\n  let count = 0;\n\n  anchors.forEach(a => {\n    const url = absUrl(a.getAttribute('href') || '');\n    const cleanUrl = url.split('#')[0];\n    if (seen.has(cleanUrl)) return;\n    seen.add(cleanUrl);\n\n    const card = findCard(a);\n    const cardText = normalize(card.innerText || '');\n    const rawTitle = normalize(a.innerText || a.textContent || '');\n    const rankingFromTitle = (rawTitle.match(/^\\s*(\\d+)\\./) || [])[1] || '';\n    const rankingFromText = (cardText.match(/#\\s?[\\d,]+(?:\\s+of\\s+[\\d,]+[^,\\n]*)?/i) || [])[0] || '';\n    const ranking = rankingFromTitle || rankingFromText;\n    const name = normalize(rawTitle.replace(/^\\d+\\.\\s*/, ''));\n    const snippets = reviewSnippetsFromCard(card);\n\n    const row = document.createElement('div');\n    row.className = 'uscraper-row';\n    row.setAttribute('data-titre', rawTitle);\n    row.setAttribute('data-name', name);\n    row.setAttribute('data-ranking', ranking);\n    row.setAttribute('data-image', imageFromCard(card));\n    row.setAttribute('data-note', ratingFromCard(card));\n    row.setAttribute('data-nombre-avis', reviewsFromCard(card));\n    row.setAttribute('data-type', cuisineFromCard(card));\n    row.setAttribute('data-niveau', priceFromCard(card));\n    row.setAttribute('data-etat', stateFromCard(card));\n    row.setAttribute('data-menu', menuFromCard(card));\n    row.setAttribute('data-avis1', snippets[0] || '');\n    row.setAttribute('data-avis2', snippets[1] || '');\n    row.setAttribute('data-url-detail', cleanUrl);\n    row.textContent = rawTitle || cleanUrl;\n    container.appendChild(row);\n    count += 1;\n  });\n\n  document.body.appendChild(container);\n  return count;\n})();",
        "waitForCompletion": true,
        "timeout": 15
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1920,
      "position_y": 220,
      "config": {
        "selector": "#uscraper-tripadvisor-results .uscraper-row",
        "timeout": 20,
        "visible": false
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2280,
      "position_y": 220,
      "config": {
        "rowSelector": "#uscraper-tripadvisor-results .uscraper-row",
        "fileName": "tripadvisor-restaurant-scraper-listing.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "titre",
            "selector": "",
            "attribute": "data-titre"
          },
          {
            "name": "restaurant_name",
            "selector": "",
            "attribute": "data-name"
          },
          {
            "name": "ranking",
            "selector": "",
            "attribute": "data-ranking"
          },
          {
            "name": "image",
            "selector": "",
            "attribute": "data-image"
          },
          {
            "name": "note",
            "selector": "",
            "attribute": "data-note"
          },
          {
            "name": "nombre_avis",
            "selector": "",
            "attribute": "data-nombre-avis"
          },
          {
            "name": "type",
            "selector": "",
            "attribute": "data-type"
          },
          {
            "name": "niveau",
            "selector": "",
            "attribute": "data-niveau"
          },
          {
            "name": "etat",
            "selector": "",
            "attribute": "data-etat"
          },
          {
            "name": "menu",
            "selector": "",
            "attribute": "data-menu"
          },
          {
            "name": "avis1",
            "selector": "",
            "attribute": "data-avis1"
          },
          {
            "name": "avis2",
            "selector": "",
            "attribute": "data-avis2"
          },
          {
            "name": "url_detail",
            "selector": "",
            "attribute": "data-url-detail"
          }
        ]
      }
    },
    {
      "block_id": "element-exists-1",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check if element exists",
      "position_x": 2640,
      "position_y": 220,
      "config": {
        "selector": "(//a[contains(translate(@aria-label,'NEXT','next'),'next') and not(@aria-disabled='true') and not(contains(@class,'disabled'))] | //button[contains(translate(@aria-label,'NEXT','next'),'next') and not(@disabled) and not(@aria-disabled='true') and not(contains(@class,'disabled'))])[last()]"
      }
    },
    {
      "block_id": "end-1",
      "block_type": "output",
      "title": "End",
      "description": "Terminate execution flow",
      "position_x": 2640,
      "position_y": 560,
      "config": {}
    },
    {
      "block_id": "click-1",
      "block_type": "process",
      "title": "Click",
      "description": "Click on element",
      "position_x": 3000,
      "position_y": 560,
      "config": {
        "selector": "(//a[contains(translate(@aria-label,'NEXT','next'),'next') and not(@aria-disabled='true') and not(contains(@class,'disabled'))] | //button[contains(translate(@aria-label,'NEXT','next'),'next') and not(@disabled) and not(@aria-disabled='true') and not(contains(@class,'disabled'))])[last()]",
        "timeout": 15
      }
    },
    {
      "block_id": "wait-for-page-load-2",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 3360,
      "position_y": 560,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 3720,
      "position_y": 560,
      "config": {
        "duration": 3
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "element-exists-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "false",
      "to_block_id": "end-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "true",
      "to_block_id": "click-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "click-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-2",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 48,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 408,
      "position_y": 116,
      "width": 3560,
      "height": 636,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1",
          "wait-for-page-load-2",
          "sleep-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1488,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2208,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2568,
      "position_y": 116,
      "width": 680,
      "height": 636,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "element-exists-1",
          "click-1"
        ]
      }
    },
    {
      "id": "group-control",
      "element_type": "group",
      "title": "Control Flow",
      "color": "#8d8d8d",
      "position_x": 2568,
      "position_y": 456,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "end-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort Tripadvisor restaurant listing scraper for https://www.tripadvisor.com/FindRestaurants?geo=187323&cuisines=5086&establishmentTypes=10591&broadened=false. Extracts listing data equivalent to the Octoparse template: restaurant title/name, ranking, image, rating, review count, cuisine/type, price level, open/closed state, menu indicator, review snippets, and detail URL. Pagination is handled with a click-next loop: after each page is exported in append mode, the template checks for an enabled Next button/link, clicks it, waits for the next page/AJAX update, and repeats until no Next control exists. Tripadvisor may present CAPTCHA or anti-bot challenges; if encountered, pause and solve manually in the browser.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => {\n  const old = document.querySelector('#uscraper-tripadvisor-results');\n  if (old) old.remov...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 200,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-element-exists-1",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `(//a[contains(translate(@aria-label,'NEXT','next'),'next') and not(@aria-disabled='true') and not(contains(@class,'disab`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 200,
      "width": 340,
      "height": 170,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-1"
      }
    },
    {
      "id": "note-block-click-1",
      "element_type": "note",
      "title": "Note: Click",
      "content": "Uses XPath `(//a[contains(translate(@aria-label,'NEXT','next'),'next') and not(@aria-disabled='true') and not(co`. XPath breaks easily if DOM structure changes.",
      "color": "#ee5396",
      "position_x": 3200,
      "position_y": 540,
      "width": 340,
      "height": 133,
      "z_index": 22,
      "data": {
        "block_id": "click-1"
      }
    }
  ]
}