{
  "version": "1.0.0",
  "exported_at": "2026-05-31T18:15:00.000Z",
  "project": {
    "name": "SUUMO Detached House Listing Scraper",
    "description": "Extracts detached used-house listing data from SUUMO Sapporo listing pages, matching the Octoparse preview fields: property name/title, property URL, price, location, train/access, land area, layout, building area, build date, agent/company, review/comment count when visible, phone number, and source page URL. Pagination is implemented with a click-next loop using the Japanese “次へ” pagination link, so all result pages are scraped until no next page remains. Note: some sample detail URLs are currently expired/404 on SUUMO, so this template scrapes the active listing page.",
    "color": "bg-[#42be65]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "url": "https://suumo.jp/chukoikkodate/hokkaido_/sa_sapporo/",
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 220,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 220,
      "config": {
        "selector": ".property_unit",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1200,
      "position_y": 220,
      "config": {
        "rowSelector": ".property_unit",
        "fileName": "suumo-detached-house-listing-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "property_name",
            "selector": "(() => { const h=ROW.querySelector('h2'); return h ? h.textContent.trim().replace(/\\s+/g,' ') : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "property_link",
            "selector": "(() => { const a=ROW.querySelector('a[href*=\"/nc_\"]'); return a ? a.href : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "price",
            "selector": "(() => { const labels=['販売価格','価格']; for (const label of labels) { const cell=Array.from(ROW.querySelectorAll('dt,th')).find(e=>e.textContent.trim().includes(label)); if (cell) { const value=(cell.parentElement&&cell.parentElement.querySelector('dd,td'))||cell.nextElementSibling; return value ? value.textContent.trim().replace(/\\s+/g,' ') : ''; } } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "location",
            "selector": "(() => { const labels=['所在地']; for (const label of labels) { const cell=Array.from(ROW.querySelectorAll('dt,th')).find(e=>e.textContent.trim().includes(label)); if (cell) { const value=(cell.parentElement&&cell.parentElement.querySelector('dd,td'))||cell.nextElementSibling; return value ? value.textContent.trim().replace(/\\s+/g,' ') : ''; } } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "train_access",
            "selector": "(() => { const labels=['沿線・駅','沿線']; for (const label of labels) { const cell=Array.from(ROW.querySelectorAll('dt,th')).find(e=>e.textContent.trim().includes(label)); if (cell) { const value=(cell.parentElement&&cell.parentElement.querySelector('dd,td'))||cell.nextElementSibling; return value ? value.textContent.trim().replace(/\\s+/g,' ') : ''; } } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "land_area",
            "selector": "(() => { const labels=['土地面積']; for (const label of labels) { const cell=Array.from(ROW.querySelectorAll('dt,th')).find(e=>e.textContent.trim().includes(label)); if (cell) { const value=(cell.parentElement&&cell.parentElement.querySelector('dd,td'))||cell.nextElementSibling; return value ? value.textContent.trim().replace(/\\s+/g,' ') : ''; } } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "layout",
            "selector": "(() => { const labels=['間取り']; for (const label of labels) { const cell=Array.from(ROW.querySelectorAll('dt,th')).find(e=>e.textContent.trim().includes(label)); if (cell) { const value=(cell.parentElement&&cell.parentElement.querySelector('dd,td'))||cell.nextElementSibling; return value ? value.textContent.trim().replace(/\\s+/g,' ') : ''; } } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "building_area",
            "selector": "(() => { const labels=['建物面積']; for (const label of labels) { const cell=Array.from(ROW.querySelectorAll('dt,th')).find(e=>e.textContent.trim().includes(label)); if (cell) { const value=(cell.parentElement&&cell.parentElement.querySelector('dd,td'))||cell.nextElementSibling; return value ? value.textContent.trim().replace(/\\s+/g,' ') : ''; } } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "built_date",
            "selector": "(() => { const labels=['築年月']; for (const label of labels) { const cell=Array.from(ROW.querySelectorAll('dt,th')).find(e=>e.textContent.trim().includes(label)); if (cell) { const value=(cell.parentElement&&cell.parentElement.querySelector('dd,td'))||cell.nextElementSibling; return value ? value.textContent.trim().replace(/\\s+/g,' ') : ''; } } return ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "agent_name",
            "selector": "(() => { const text=ROW.textContent.replace(/\\s+/g,' ').trim(); const phone=text.match(/0\\d{1,4}-\\d{1,4}-\\d{3,4}/); if(!phone) return ''; const before=text.slice(0,text.indexOf(phone[0])).trim(); const companyPattern=/(\\(株\\)|（株）|\\(有\\)|（有）|株式会社|有限会社|合同会社|不動産|SUMiTAS|ホーム|住宅|リバブル|住友|ハウス|センター|販売|企画|建設|地所|流通|開発|エステート|リアルティ|信託|商事|札都|カチタス|ハウスドゥ|センチュリー|三井|東急|ミサワ|土屋)/; const tokens=before.split(' ').map(t=>t.trim()).filter(Boolean); for(let i=tokens.length-1;i>=0;i--){ if(companyPattern.test(tokens[i])) return tokens[i].replace(/^担当[:：]?/,''); } const m=before.match(/((?:\\(株\\)|（株）|\\(有\\)|（有）|株式会社|有限会社|合同会社)[^\\s　]{1,40})$/); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "customer_review_comment_count",
            "selector": "(() => { const text=ROW.textContent.replace(/\\s+/g,' '); const m=text.match(/接客評価コメント\\s*(\\d+)/)||text.match(/口コミ\\s*(\\d+)/)||text.match(/評価コメント\\s*(\\d+)/); return m ? m[1] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "phone_number",
            "selector": "(() => { const m=ROW.textContent.match(/0\\d{1,4}-\\d{1,4}-\\d{3,4}/); return m ? m[0] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "page_url",
            "selector": "window.location.href",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "element-exists-1",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check if element exists",
      "position_x": 1560,
      "position_y": 220,
      "config": {
        "selector": "(//div[contains(@class,'pagination')]//a[normalize-space()='次へ'])[1]"
      }
    },
    {
      "block_id": "click-1",
      "block_type": "process",
      "title": "Click",
      "description": "Click on element",
      "position_x": 1920,
      "position_y": 220,
      "config": {
        "selector": "(//div[contains(@class,'pagination')]//a[normalize-space()='次へ'])[1]",
        "timeout": 15
      }
    },
    {
      "block_id": "wait-for-page-load-2",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 2280,
      "position_y": 220,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "wait-for-element-2",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 2640,
      "position_y": 220,
      "config": {
        "selector": ".property_unit",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 2640,
      "position_y": 560,
      "config": {
        "duration": 1
      }
    },
    {
      "block_id": "end-1",
      "block_type": "output",
      "title": "End",
      "description": "Terminate execution flow",
      "position_x": 1560,
      "position_y": 560,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "element-exists-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "true",
      "to_block_id": "click-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "false",
      "to_block_id": "end-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "click-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-2",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-2",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 2840,
      "height": 636,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1",
          "wait-for-page-load-2",
          "wait-for-element-2",
          "sleep-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1128,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1488,
      "position_y": 116,
      "width": 680,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "element-exists-1",
          "click-1"
        ]
      }
    },
    {
      "id": "group-control",
      "element_type": "group",
      "title": "Control Flow",
      "color": "#8d8d8d",
      "position_x": 1488,
      "position_y": 456,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "end-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts detached used-house listing data from SUUMO Sapporo listing pages, matching the Octoparse preview fields: property name/title, property URL, price, location, train/access, land area, layout, building area, build date, agent/company, review/comment count when visible, phone number, and source page URL. Pagination is implemented with a click-next loop using the Japanese “次へ” pagination link, so all result pages are scraped until no next page remains. Note: some sample detail URLs are currently expired/404 on SUUMO, so this template scrapes the active listing page.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (property_name, property_link, price, location, train_access). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 200,
      "width": 340,
      "height": 133,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-element-exists-1",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `(//div[contains(@class,'pagination')]//a[normalize-space()='次へ'])[1]`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 200,
      "width": 340,
      "height": 153,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-1"
      }
    },
    {
      "id": "note-block-click-1",
      "element_type": "note",
      "title": "Note: Click",
      "content": "Uses XPath `(//div[contains(@class,'pagination')]//a[normalize-space()='次へ'])[1]`. XPath breaks easily if DOM structure changes.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 200,
      "width": 340,
      "height": 122,
      "z_index": 22,
      "data": {
        "block_id": "click-1"
      }
    }
  ]
}