{
  "version": "1.0.0",
  "exported_at": "2026-06-01T00:00:00.000Z",
  "project": {
    "name": "Bookingcom Reviews Scraper",
    "description": "Scrapes Booking.com hotel reviews for Shinjuku Prince Hotel, matching the Octoparse review template fields: hotel name, review URL, category scores, reviewer name/country, review date, review text, and review score. The workflow opens the Booking.com reviews area, extracts visible review cards, and follows review pagination with a guarded click-next loop. A safety cap of 190 review pages is included to prevent Booking.com's persistent Next controls from causing an infinite loop under UScraper's execution limit. CAPTCHA or Booking.com markup changes may require manual intervention.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "url": "https://www.booking.com/hotel/jp/shinjuku-prince.ja.html?aid=304142&label=gen173nr-1FCAEoggI46AdIFVgEaIkCiAEBmAEVuAEZyAEM2AEB6AEB-AECiAIBqAIDuALjj4GZBsACAdICJDQ5MTkxMWEzLTY0ZmQtNDI5My05MmQzLTdkMjJmNjcxMmQzZdgCBeACAQ&sid=de48acf0e7b0af3752df77ec2c30f964&dist=0;group_adults=2;group_children=0;hapos=1;hpos=1;no_rooms=1;req_adults=2;req_children=0;room1=A%2CA;sb_price_type=total;sr_order=popularity;srepoch=1663059951;srpvid=dbf43ff7e5560114;type=total;ucfs=1&#tab-reviews",
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 220,
      "config": {
        "timeout": 45
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 840,
      "position_y": 220,
      "config": {
        "jsCode": "(() => { const textOf = el => ((el && (el.textContent || el.getAttribute('aria-label'))) || '').trim(); const clickMatch = re => { const el = Array.from(document.querySelectorAll('button,a')).find(e => re.test(textOf(e))); if (el) { el.scrollIntoView({block:'center'}); el.click(); return true; } return false; }; window.__uscraper_review_pages = 0; window.scrollTo(0, Math.max(0, document.body.scrollHeight * 0.45)); clickMatch(/Guest reviews|Reviews|クチコミ/i); setTimeout(() => { clickMatch(/read all.{0,30}reviews|see all.{0,30}reviews|show all.{0,30}reviews|all reviews|すべてのクチコミ|クチコミをすべて表示|すべて表示/i); }, 900); setTimeout(() => { window.scrollBy(0, 900); }, 1800); })();",
        "waitForCompletion": false,
        "timeout": 10
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1200,
      "position_y": 220,
      "config": {
        "duration": 4
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1560,
      "position_y": 220,
      "config": {
        "selector": "[data-testid=\"review-card\"], .review_item, li.review_list_new_item_block",
        "timeout": 45,
        "visible": true
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1920,
      "position_y": 220,
      "config": {
        "rowSelector": "[data-testid=\"review-card\"], .review_item, li.review_list_new_item_block",
        "fileName": "booking-jp-reviews-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "hotel_name",
            "selector": "(() => { const el = document.querySelector('[data-testid=\"title\"], h2.pp-header__title, h2, h1'); return (el ? el.textContent : '').trim().replace(/\\s*\\(.*$/, '').replace(/\\s+Deals$/i, ''); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "review_url",
            "selector": "location.href",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "staff",
            "selector": "(() => { const t = document.body.innerText; const m = t.match(/(?:Staff|スタッフ)\\s*([0-9]+(?:\\.[0-9]+)?)/i); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "facilities",
            "selector": "(() => { const t = document.body.innerText; const m = t.match(/(?:Facilities|施設・設備)\\s*([0-9]+(?:\\.[0-9]+)?)/i); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "cleanliness",
            "selector": "(() => { const t = document.body.innerText; const m = t.match(/(?:Cleanliness|清潔さ)\\s*([0-9]+(?:\\.[0-9]+)?)/i); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "comfort",
            "selector": "(() => { const t = document.body.innerText; const m = t.match(/(?:Comfort|快適さ)\\s*([0-9]+(?:\\.[0-9]+)?)/i); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "value_for_money",
            "selector": "(() => { const t = document.body.innerText; const m = t.match(/(?:Value for money|Value|お得感)\\s*([0-9]+(?:\\.[0-9]+)?)/i); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "location",
            "selector": "(() => { const t = document.body.innerText; const m = t.match(/(?:Location|ロケーション)\\s*([0-9]+(?:\\.[0-9]+)?)/i); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "free_wifi",
            "selector": "(() => { const t = document.body.innerText; const m = t.match(/(?:Free WiFi|Free Wi-Fi|WiFi|Wi-Fi|WiFi\\s*無料)\\s*([0-9]+(?:\\.[0-9]+)?)/i); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "reviewer_id",
            "selector": "(() => { const q = s => ROW.querySelector(s); const getCountry = () => { const sels = ['[data-testid=\"reviewer-country\"]', '.bui-avatar-block__subtitle', '.reviewer_country', '.review_item_user_location']; for (const s of sels) { const el = q(s); const txt = el && el.textContent.trim(); if (txt) return txt; } return ''; }; const country = getCountry(); const sels = ['[data-testid=\"reviewer-name\"]', '.bui-avatar-block__title', '.reviewer_name', '.review_item_reviewer']; for (const s of sels) { const el = q(s); let txt = el && el.textContent.trim(); if (txt) { txt = txt.replace(/^Reviewed by\\s*/i, '').trim(); if (country && txt.endsWith(country)) txt = txt.slice(0, -country.length).trim(); return txt; } } const lines = ROW.innerText.split('\\n').map(x => x.trim()).filter(Boolean); let name = lines[0] || ''; if (country && name.endsWith(country)) name = name.slice(0, -country.length).trim(); return name; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "country",
            "selector": "(() => { const q = s => ROW.querySelector(s); const sels = ['[data-testid=\"reviewer-country\"]', '.bui-avatar-block__subtitle', '.reviewer_country', '.review_item_user_location']; for (const s of sels) { const el = q(s); const txt = el && el.textContent.trim(); if (txt) return txt; } const img = q('img[alt]'); if (img && img.alt.trim()) return img.alt.trim(); const lines = ROW.innerText.split('\\n').map(x => x.trim()).filter(Boolean); return lines[1] && !/[0-9]/.test(lines[1]) ? lines[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "review_date",
            "selector": "(() => { const q = s => ROW.querySelector(s); const sels = ['[data-testid=\"review-date\"]', '.review_item_date', '.c-review-block__date']; for (const s of sels) { const el = q(s); const txt = el && el.textContent.trim(); if (txt) return txt.replace(/^Reviewed:\\s*/i, '').replace(/^投稿日[:：]?\\s*/, '').trim(); } const txt = ROW.innerText; const m = txt.match(/(?:Reviewed:|投稿日[:：]?)\\s*([^\\n]+)/i) || txt.match(/\\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\\s+\\d{1,2},?\\s+\\d{4}\\b/i) || txt.match(/\\d{4}年\\d{1,2}月/); return m ? (m[1] || m[0]).trim() : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "review_text",
            "selector": "(() => { const sels = ['[data-testid=\"review-title\"]', '[data-testid=\"review-positive-text\"]', '[data-testid=\"review-negative-text\"]', '.review_item_header_content', '.review_pos .review_item_review_content', '.review_neg .review_item_review_content', '.c-review__body']; const parts = []; for (const s of sels) ROW.querySelectorAll(s).forEach(el => { const txt = el.textContent.trim(); if (txt && !parts.includes(txt)) parts.push(txt); }); if (parts.length) return parts.join(' '); const lines = ROW.innerText.split('\\n').map(x => x.trim()).filter(Boolean).filter(x => !/^(Scored|Rated|Reviewed:|投稿日|Helpful|Was this helpful|See translation)/i.test(x)); return lines.slice(2).join(' '); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "review_score",
            "selector": "(() => { const q = s => ROW.querySelector(s); const sels = ['[data-testid=\"review-score\"]', '.review-score-badge', '.bui-review-score__badge', '.c-score-bar__score']; for (const s of sels) { const el = q(s); const txt = el && el.textContent.trim(); if (txt) { const m = txt.match(/[0-9]+(?:\\.[0-9]+)?/); return m ? m[0] : txt; } } const m = ROW.innerText.match(/(?:Scored|Rated|Score)\\s*([0-9]+(?:\\.[0-9]+)?)/i) || ROW.innerText.match(/\\b([0-9]+(?:\\.[0-9]+)?)\\s*\\/\\s*10\\b/); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "inject-javascript-2",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 2280,
      "position_y": 220,
      "config": {
        "jsCode": "(() => { const MAX_PAGES = 190; window.__uscraper_review_pages = (window.__uscraper_review_pages || 0) + 1; document.querySelectorAll('[data-uscraper-review-next=\"true\"]').forEach(el => el.removeAttribute('data-uscraper-review-next')); const old = document.getElementById('uscraper-next-review-page'); if (old) old.remove(); const isUsable = el => { if (!el) return false; const disabled = el.disabled || el.getAttribute('aria-disabled') === 'true' || /disabled/i.test(el.className || ''); const r = el.getBoundingClientRect(); return !disabled && r.width > 0 && r.height > 0; }; const selectorGroups = ['div[role=\"dialog\"] button[aria-label*=\"Next page\" i]', 'div[role=\"dialog\"] button[aria-label*=\"Next\" i]', '[data-testid*=\"pagination\"] button[aria-label*=\"Next\" i]', 'nav button[aria-label*=\"Next\" i]', 'button[aria-label*=\"Next\" i]', '[aria-label*=\"next\" i][role=\"button\"]']; let btn = null; for (const sel of selectorGroups) { btn = Array.from(document.querySelectorAll(sel)).find(isUsable); if (btn) break; } if (btn && window.__uscraper_review_pages < MAX_PAGES) { btn.setAttribute('data-uscraper-review-next', 'true'); btn.scrollIntoView({block:'center'}); const marker = document.createElement('div'); marker.id = 'uscraper-next-review-page'; marker.style.display = 'none'; marker.setAttribute('data-page-count', String(window.__uscraper_review_pages)); document.body.appendChild(marker); } })();",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "element-exists-1",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check if element exists",
      "position_x": 2640,
      "position_y": 220,
      "config": {
        "selector": "#uscraper-next-review-page",
        "color": "bg-[#ff832b]"
      }
    },
    {
      "block_id": "end-1",
      "block_type": "output",
      "title": "End",
      "description": "Terminate execution flow",
      "position_x": 2640,
      "position_y": 560,
      "config": {}
    },
    {
      "block_id": "click-1",
      "block_type": "process",
      "title": "Click",
      "description": "Click on element",
      "position_x": 3000,
      "position_y": 560,
      "config": {
        "selector": "[data-uscraper-review-next=\"true\"]",
        "timeout": 15
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 3360,
      "position_y": 560,
      "config": {
        "duration": 3
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-2",
      "from_connector_id": "right",
      "to_block_id": "element-exists-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "false",
      "to_block_id": "end-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "true",
      "to_block_id": "click-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "click-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 3560,
      "height": 636,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1",
          "sleep-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 768,
      "position_y": 116,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1",
          "inject-javascript-2"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1848,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2568,
      "position_y": 116,
      "width": 680,
      "height": 636,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "element-exists-1",
          "click-1"
        ]
      }
    },
    {
      "id": "group-control",
      "element_type": "group",
      "title": "Control Flow",
      "color": "#8d8d8d",
      "position_x": 2568,
      "position_y": 456,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "end-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Scrapes Booking.com hotel reviews for Shinjuku Prince Hotel, matching the Octoparse review template fields: hotel name, review URL, category scores, reviewer name/country, review date, review text, and review score. The workflow opens the Booking.com reviews area, extracts visible review cards, and follows review pagination with a guarded click-next loop. A safety cap of 190 review pages is included to prevent Booking.com's persistent Next controls from causing an infinite loop under UScraper's execution limit. CAPTCHA or Booking.com markup changes may require manual intervention.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { const textOf = el => ((el && (el.textContent || el.getAttribute('aria-label'))) || '').trim...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1040,
      "position_y": 200,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (hotel_name, review_url, staff, facilities, cleanliness). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 200,
      "width": 340,
      "height": 132,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-inject-javascript-2",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { const MAX_PAGES = 190; window.__uscraper_review_pages = (window.__uscraper_review_pages || ...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 200,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-2"
      }
    },
    {
      "id": "note-block-element-exists-1",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `#uscraper-next-review-page`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 200,
      "width": 340,
      "height": 139,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-1"
      }
    },
    {
      "id": "note-block-click-1",
      "element_type": "note",
      "title": "Note: Click",
      "content": "Pagination click — add waits after this block; the page reloads asynchronously.",
      "color": "#ee5396",
      "position_x": 3200,
      "position_y": 540,
      "width": 316,
      "height": 106,
      "z_index": 22,
      "data": {
        "block_id": "click-1"
      }
    }
  ]
}