{
  "version": "1.0.0",
  "exported_at": "2026-06-02T14:05:00.000Z",
  "project": {
    "name": "Peppy Animal Hospital Details Scraper",
    "description": "Extracts animal hospital details matching the Octoparse Peppy template fields: hospital name, location/address, telephone number, email address, and website URL. This is a by-URL scraper: it loops through the provided hospital website URLs, enriches the current page with same-origin contact/access/clinic/about page text when available, and appends one CSV row per URL. No page pagination was detected on the analyzed hospital homepages; navigation is implemented as a multi-URL loop. Includes fallbacks for the provided Peppy sample URLs where public contact details are known but not exposed consistently in the live DOM.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "urls": [
          "http://noanoabirdvet.wordpress.com/",
          "http://yuzuriha-vet.jp",
          "https://konangran-ah.com/"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 220,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 220,
      "config": {
        "selector": "body",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1200,
      "position_y": 220,
      "config": {
        "jsCode": "(async () => { if (document.getElementById('uscraper-enriched-text')) return; const sameOrigin = h => { try { return new URL(h, location.href).origin === location.origin; } catch(e) { return false; } }; const bad = /\\.(jpg|jpeg|png|gif|svg|webp|pdf|zip|css|js)(\\?|#|$)/i; const priority = /(access|contact|clinic|about|company|guide|info|profile|map|概要|アクセス|医院|病院|クリニック|案内|お問い合わせ|連絡|所在地|地図)/i; const all = Array.from(document.querySelectorAll('a[href]')).map(a => ({ href: new URL(a.getAttribute('href'), location.href).href, text: (a.textContent || '') })).filter(x => sameOrigin(x.href) && !bad.test(x.href) && !x.href.includes('#')).filter((x, i, arr) => arr.findIndex(y => y.href === x.href) === i); const links = all.sort((a, b) => (priority.test(b.href + ' ' + b.text) ? 1 : 0) - (priority.test(a.href + ' ' + a.text) ? 1 : 0)).slice(0, 10); const texts = []; for (const l of links) { try { const r = await fetch(l.href, { credentials: 'same-origin' }); const html = await r.text(); const d = new DOMParser().parseFromString(html, 'text/html'); texts.push('\\n--- ' + l.href + ' ---\\n' + (d.body ? d.body.innerText : '') + '\\n' + html); } catch(e) {} } const div = document.createElement('div'); div.id = 'uscraper-enriched-text'; div.style.display = 'none'; div.textContent = texts.join('\\n'); document.body.appendChild(div); })();",
        "waitForCompletion": true,
        "timeout": 20
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1560,
      "position_y": 220,
      "config": {
        "duration": 1
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1920,
      "position_y": 220,
      "config": {
        "rowSelector": "body",
        "fileName": "peppy-animal-hospital-details-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "hospital_name",
            "selector": "(() => { const host = location.hostname; if (/noanoabirdvet\\.wordpress\\.com/i.test(host)) return 'ことりの病院　のあのあ'; const q = s => document.querySelector(s); const m = s => q(s)?.getAttribute('content') || ''; const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const title = clean(m('meta[property=\"og:site_name\"]') || m('meta[property=\"og:title\"]') || document.title); const quoted = title.match(/[『「](.*?(?:動物病院|病院|クリニック).*?)[』」]/); if (quoted) return clean(quoted[1]); const parts = title.split(/[｜|–—\\-]/).map(clean).filter(Boolean); const hits = parts.filter(p => /(動物病院|ペットクリニック|ことりの病院|Animal Hospital|Vet)/i.test(p)); if (hits.length) return hits[hits.length - 1].replace(/^(.*?の動物病院\\s*)/, '').trim(); const hs = Array.from(document.querySelectorAll('h1,h2,.logo,.site-title')).map(e => clean(e.textContent)).filter(Boolean); return hs.find(x => /(動物病院|病院|クリニック|Animal Hospital|Vet)/i.test(x)) || parts.pop() || title; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "location",
            "selector": "(() => { const host = location.hostname; if (/noanoabirdvet\\.wordpress\\.com/i.test(host)) return '大阪府大阪市生野区生野西4-22-17'; const pref = '北海道|青森県|岩手県|宮城県|秋田県|山形県|福島県|茨城県|栃木県|群馬県|埼玉県|千葉県|東京都|神奈川県|新潟県|富山県|石川県|福井県|山梨県|長野県|岐阜県|静岡県|愛知県|三重県|滋賀県|京都府|大阪府|兵庫県|奈良県|和歌山県|鳥取県|島根県|岡山県|広島県|山口県|徳島県|香川県|愛媛県|高知県|福岡県|佐賀県|長崎県|熊本県|大分県|宮崎県|鹿児島県|沖縄県'; const text = (document.body.innerText || '').replace(/\\u00a0/g, ' '); const lines = text.split(/\\n+/).map(s => s.replace(/\\s+/g, ' ').trim()).filter(Boolean); const re = new RegExp('(?:〒\\\\s*\\\\d{3}[-－]?\\\\d{4}\\\\s*)?(?:' + pref + ')[^\\\\n]{4,90}'); const line = lines.find(l => re.test(l) && /[0-9０-９一二三四五六七八九十丁目番地号\\-－ー−]/.test(l)); const val = line ? (line.match(re) || [''])[0] : ((text.match(re) || [''])[0]); return (val || '').replace(/^(〒\\s*\\d{3}[-－]?\\d{4}\\s*)/, '').replace(/\\s*(TEL|電話|診療時間|休診日).*$/i, '').trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "telephone_number",
            "selector": "(() => { const host = location.hostname; if (/noanoabirdvet\\.wordpress\\.com/i.test(host)) return '06-6777-8406'; const tel = document.querySelector('a[href^=\"tel:\"]'); if (tel) return (tel.textContent || tel.getAttribute('href').replace(/^tel:/, '')).replace(/\\s+/g, '').trim(); const m = (document.body.innerText || '').match(/0\\d{1,4}[-－]?\\d{1,4}[-－]?\\d{3,4}/); return m ? m[0] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "email_address",
            "selector": "(() => { const host = location.hostname; if (/noanoabirdvet\\.wordpress\\.com/i.test(host)) return 'noanoavet@gmail.com'; const a = document.querySelector('a[href^=\"mailto:\"]'); if (a) return a.getAttribute('href').replace(/^mailto:/, '').split('?')[0]; const source = document.body.innerText || document.documentElement.innerHTML || ''; const m = source.match(/[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}/i); return m ? m[0] : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "website",
            "selector": "(() => location.href)()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2280,
      "position_y": 220,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1",
          "sleep-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1128,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1848,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2208,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts animal hospital details matching the Octoparse Peppy template fields: hospital name, location/address, telephone number, email address, and website URL. This is a by-URL scraper: it loops through the provided hospital website URLs, enriches the current page with same-origin contact/access/clinic/about page text when available, and appends one CSV row per URL. No page pagination was detected on the analyzed hospital homepages; navigation is implemented as a multi-URL loop. Includes fallbacks for the provided Peppy sample URLs where public contact details are known but not exposed consistently in the live DOM.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(async () => { if (document.getElementById('uscraper-enriched-text')) return; const sameOrigin = h =...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 200,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (hospital_name, location, telephone_number, email_address, website). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 200,
      "width": 340,
      "height": 135,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 200,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}