{
  "version": "1.0.0",
  "exported_at": "2026-06-03T00:00:00.000Z",
  "project": {
    "name": "Airbnb EN Review Details Scraper",
    "description": "Best-effort UScraper equivalent of the Octoparse Airbnb EN Review Details Scraper. Targets Airbnb room/review URLs and exports review details: house URL, review URL, average category ratings, reviewer name/location, posted month, review text, trip purpose, and star rating. Navigation strategy: uses navigate.urls[] so users can add multiple Airbnb room or /reviews URLs, then runs a JS review-loader that opens the full reviews modal/page, scrolls lazy-loaded reviews, deduplicates review cards, creates normalized data rows, and appends results across input URLs. Airbnb may show CAPTCHA, require manual interaction, or change dynamic markup.",
    "color": "bg-[#ff5a5f]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 120,
      "position_y": 260,
      "config": {
        "width": 1920,
        "height": 1080,
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 480,
      "position_y": 260,
      "config": {
        "urls": [
          "https://www.airbnb.com/rooms/788465505169839820/reviews?source_impression_id=p3_1757302399_P3N4b8IbdRs9Ush6"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 840,
      "position_y": 260,
      "config": {
        "timeout": 45,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1200,
      "position_y": 260,
      "config": {
        "selector": "h1",
        "timeout": 45,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1560,
      "position_y": 260,
      "config": {
        "jsCode": "(async () => {\n  const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));\n  const text = (el) => (el && (el.innerText || el.textContent || '')).trim();\n  const clean = (s) => String(s || '').replace(/\\s+/g, ' ').trim();\n  const monthYear = /\\b(January|February|March|April|May|June|July|August|September|October|November|December)\\s+\\d{4}\\b/i;\n  const ratingRe = /Rating,\\s*\\d+\\s*stars?/i;\n\n  const houseMatch = location.href.match(/\\/rooms\\/(\\d+)/);\n  const houseUrl = houseMatch ? `https://www.airbnb.com/rooms/${houseMatch[1]}` : location.href.split('?')[0].replace(/\\/reviews\\/?$/, '');\n  const reviewUrl = location.href.includes('/reviews') ? location.href : (houseMatch ? `https://www.airbnb.com/rooms/${houseMatch[1]}/reviews` : location.href);\n\n  const controls = Array.from(document.querySelectorAll('button, a'));\n  const showAllReviews = controls.find((el) => /show\\s+all\\s+\\d+\\s+reviews/i.test(text(el)) || /show\\s+all.*reviews/i.test(text(el)) || (/reviews/i.test(text(el)) && /\\d+/.test(text(el))));\n  if (showAllReviews) {\n    try { showAllReviews.click(); } catch (_) {}\n    await sleep(2500);\n  }\n\n  for (const btn of Array.from(document.querySelectorAll('button'))) {\n    if (/^show more$/i.test(text(btn))) {\n      try { btn.click(); } catch (_) {}\n      await sleep(100);\n    }\n  }\n\n  const getScrollable = () => {\n    const candidates = Array.from(document.querySelectorAll('[role=\"dialog\"], div, main, body, html')).filter((el) => {\n      try { return el.scrollHeight > el.clientHeight + 150; } catch (_) { return false; }\n    });\n    candidates.sort((a, b) => (b.scrollHeight - b.clientHeight) - (a.scrollHeight - a.clientHeight));\n    return candidates[0] || document.scrollingElement || document.documentElement;\n  };\n\n  let stableRounds = 0;\n  let lastProfileCount = 0;\n  let lastHeight = 0;\n  for (let i = 0; i < 45; i++) {\n    const scroller = getScrollable();\n    const beforeCount = document.querySelectorAll('a[href*=\"/users/profile/\"]:not([href*=\"previous_page_name\"])').length;\n    const beforeHeight = scroller.scrollHeight || document.documentElement.scrollHeight;\n    try { scroller.scrollTop = scroller.scrollHeight; } catch (_) {}\n    try { window.scrollTo(0, document.body.scrollHeight); } catch (_) {}\n    await sleep(850);\n    for (const btn of Array.from(document.querySelectorAll('button'))) {\n      if (/^show more$/i.test(text(btn))) {\n        try { btn.click(); } catch (_) {}\n        await sleep(70);\n      }\n    }\n    const afterCount = document.querySelectorAll('a[href*=\"/users/profile/\"]:not([href*=\"previous_page_name\"])').length;\n    const afterHeight = scroller.scrollHeight || document.documentElement.scrollHeight;\n    if (afterCount === lastProfileCount && afterHeight === lastHeight && afterCount === beforeCount && afterHeight === beforeHeight) stableRounds += 1;\n    else stableRounds = 0;\n    lastProfileCount = afterCount;\n    lastHeight = afterHeight;\n    if (stableRounds >= 5) break;\n  }\n\n  const allDocText = () => {\n    const headingAndAria = Array.from(document.querySelectorAll('[aria-label], h1, h2, h3')).map((el) => `${el.getAttribute('aria-label') || ''} ${text(el)}`).join(' ');\n    return `${document.body.innerText || ''} ${document.body.textContent || ''} ${headingAndAria}`;\n  };\n\n  const avg = (label) => {\n    const t = allDocText();\n    const variants = label === 'check-in' ? ['check-in', 'check in'] : [label];\n    for (const v of variants) {\n      const re1 = new RegExp('Rated\\\\s+([0-9.]+)\\\\s+out of 5 stars for ' + v, 'i');\n      const m1 = t.match(re1);\n      if (m1) return m1[1];\n    }\n    const display = label === 'check-in' ? 'Check[- ]?in' : label.charAt(0).toUpperCase() + label.slice(1);\n    const re2 = new RegExp(display + '\\\\s+([0-9.]+)', 'i');\n    const m2 = t.match(re2);\n    return m2 ? m2[1] : '';\n  };\n\n  const averageCleanliness = avg('cleanliness');\n  const averageAccuracy = avg('accuracy');\n  const averageCommunication = avg('communication');\n  const averageLocation = avg('location');\n  const averageCheckIn = avg('check-in');\n  const averageValue = avg('value');\n\n  const parseRecord = (el) => {\n    const raw = text(el);\n    if (!raw || !ratingRe.test(raw) || !monthYear.test(raw)) return null;\n    const ratingMatches = raw.match(/Rating,\\s*\\d+\\s*stars?/gi) || [];\n    if (ratingMatches.length !== 1) return null;\n    const postedMatch = raw.match(monthYear);\n    const ratingMatch = raw.match(ratingRe);\n    const lines = raw.split('\\n').map((s) => s.trim()).filter(Boolean);\n    const ratingLineIdx = lines.findIndex((l) => ratingRe.test(l));\n    const badName = /^(Rating,|,|·|Show more|Show less|Helpful|Report|Response from|Rated\\s)/i;\n    const userName = clean(lines.find((l, idx) => idx <= Math.max(2, ratingLineIdx) && !badName.test(l) && !monthYear.test(l) && !/(trip|stayed|kids|family|business|couple|few nights|night)/i.test(l) && !/^[0-9.]+$/.test(l)) || '');\n    let reviewerLocation = '';\n    if (ratingLineIdx > 0) {\n      reviewerLocation = clean(lines.slice(0, ratingLineIdx).find((l, idx) => idx > 0 && /,/.test(l) && /[A-Za-z]/.test(l) && !badName.test(l)) || '');\n    }\n    if (!reviewerLocation) {\n      reviewerLocation = clean(lines.find((l, idx) => idx > 0 && /,/.test(l) && /[A-Za-z]/.test(l) && !ratingRe.test(l) && !monthYear.test(l) && !/(trip|stayed|kids|family|business|couple|few nights|night)/i.test(l)) || '');\n    }\n    let purpose = '';\n    const postedIdx = lines.findIndex((l) => monthYear.test(l));\n    if (postedIdx >= 0) {\n      const nearby = lines.slice(postedIdx, postedIdx + 4).join(' ');\n      const pm = nearby.match(/(Stayed\\s+with\\s+kids|Stayed\\s+a\\s+few\\s+nights|Group\\s+trip|Family\\s+trip|Business\\s+trip|Couple\\s+trip|Solo\\s+trip)/i);\n      purpose = pm ? clean(pm[1]) : '';\n    }\n    let review = '';\n    if (postedMatch) {\n      let s = raw.slice(postedMatch.index + postedMatch[0].length);\n      s = s.replace(/^\\s*,?\\s*·?\\s*/, '');\n      s = s.replace(/^(Stayed\\s+with\\s+kids|Stayed\\s+a\\s+few\\s+nights|Group\\s+trip|Family\\s+trip|Business\\s+trip|Couple\\s+trip|Solo\\s+trip)\\b\\s*,?\\s*·?\\s*/i, '');\n      s = s.replace(/\\b(Show more|Show less)\\b/gi, ' ');\n      s = s.replace(/\\bResponse from\\b[\\s\\S]*$/i, '');\n      s = s.replace(/\\b(Helpful|Report)\\b[\\s\\S]*$/i, '');\n      review = clean(s);\n    }\n    if (!userName || !postedMatch || !ratingMatch) return null;\n    return {\n      house_url: houseUrl,\n      review_url: reviewUrl,\n      average_cleanliness: averageCleanliness,\n      average_accuracy: averageAccuracy,\n      average_communication: averageCommunication,\n      average_location: averageLocation,\n      average_check_in: averageCheckIn,\n      average_value: averageValue,\n      username: userName,\n      posted: postedMatch[0],\n      review,\n      purpose_of_trip: purpose,\n      customer_star_rating: clean(ratingMatch[0]),\n      reviewer_location: reviewerLocation\n    };\n  };\n\n  const recordsByKey = new Map();\n  const candidates = Array.from(document.querySelectorAll('div, li, section')).filter((el) => {\n    const t = text(el);\n    return t && t.length < 3500 && ratingRe.test(t) && monthYear.test(t) && el.querySelector('a[href*=\"/users/profile/\"]');\n  });\n\n  for (const el of candidates) {\n    const rec = parseRecord(el);\n    if (!rec) continue;\n    const key = [rec.username, rec.posted, rec.customer_star_rating, rec.reviewer_location].join('|').toLowerCase();\n    const prev = recordsByKey.get(key);\n    if (!prev || (rec.review || '').length > (prev.review || '').length) recordsByKey.set(key, rec);\n  }\n\n  document.querySelectorAll('#uscraper-normalized-airbnb-reviews').forEach((el) => el.remove());\n  const container = document.createElement('div');\n  container.id = 'uscraper-normalized-airbnb-reviews';\n  container.style.cssText = 'display:block; position:absolute; top:0; left:0; width:20px; min-height:20px; opacity:0.01; overflow:hidden; pointer-events:none; z-index:1;';\n  for (const rec of recordsByKey.values()) {\n    const row = document.createElement('div');\n    row.setAttribute('data-uscraper-review-row', 'true');\n    row.style.cssText = 'display:block; height:1px; width:1px;';\n    for (const [k, v] of Object.entries(rec)) row.setAttribute('data-' + k.replaceAll('_', '-'), v || '');\n    row.textContent = `${rec.username} ${rec.posted} ${rec.customer_star_rating} ${rec.review}`;\n    container.appendChild(row);\n  }\n  document.body.prepend(container);\n})();",
        "waitForCompletion": true,
        "timeout": 120,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1920,
      "position_y": 260,
      "config": {
        "duration": 2,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "wait-for-element-2",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 2280,
      "position_y": 260,
      "config": {
        "selector": "[data-uscraper-review-row=\"true\"]",
        "timeout": 45,
        "visible": true,
        "color": "bg-[#42be65]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2640,
      "position_y": 260,
      "config": {
        "rowSelector": "[data-uscraper-review-row=\"true\"]",
        "columns": [
          {
            "name": "House_url",
            "selector": "",
            "attribute": "data-house-url"
          },
          {
            "name": "Review_url",
            "selector": "",
            "attribute": "data-review-url"
          },
          {
            "name": "Average_Cleanliness",
            "selector": "",
            "attribute": "data-average-cleanliness"
          },
          {
            "name": "Average_Accuracy",
            "selector": "",
            "attribute": "data-average-accuracy"
          },
          {
            "name": "Average_Communication",
            "selector": "",
            "attribute": "data-average-communication"
          },
          {
            "name": "Average_Location",
            "selector": "",
            "attribute": "data-average-location"
          },
          {
            "name": "Average_Check_in",
            "selector": "",
            "attribute": "data-average-check-in"
          },
          {
            "name": "Average_Value",
            "selector": "",
            "attribute": "data-average-value"
          },
          {
            "name": "UserName",
            "selector": "",
            "attribute": "data-username"
          },
          {
            "name": "Posted",
            "selector": "",
            "attribute": "data-posted"
          },
          {
            "name": "Review",
            "selector": "",
            "attribute": "data-review"
          },
          {
            "name": "Purpose_Of_Trip",
            "selector": "",
            "attribute": "data-purpose-of-trip"
          },
          {
            "name": "Customer_Star_Rating",
            "selector": "",
            "attribute": "data-customer-star-rating"
          },
          {
            "name": "Reviewer_Location",
            "selector": "",
            "attribute": "data-reviewer-location"
          }
        ],
        "fileName": "airbnb_en_review_details_scraper_final.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]"
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 3000,
      "position_y": 260,
      "config": {
        "color": "bg-[#8d8d8d]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-2",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 48,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 408,
      "position_y": 156,
      "width": 2120,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1",
          "sleep-1",
          "wait-for-element-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1488,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2568,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2928,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Best-effort UScraper equivalent of the Octoparse Airbnb EN Review Details Scraper. Targets Airbnb room/review URLs and exports review details: house URL, review URL, average category ratings, reviewer name/location, posted month, review text, trip purpose, and star rating. Navigation strategy: uses navigate.urls[] so users can add multiple Airbnb room or /reviews URLs, then runs a JS review-loader that opens the full reviews modal/page, scrolls lazy-loaded reviews, deduplicates review cards, creates normalized data rows, and appends results across input URLs. Airbnb may show CAPTCHA, require manual interaction, or change dynamic markup.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 1 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 680,
      "position_y": 240,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(async () => {\n  const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));\n  const te...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 240,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Extracts rows matching `[data-uscraper-review-row=\"true\"]`. Confirm row count > 0 before running at scale.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 240,
      "width": 340,
      "height": 115,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 3200,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}