{
  "version": "1.0.0",
  "exported_at": "2026-06-01T00:00:00.000Z",
  "project": {
    "name": "Baidu Scraper",
    "description": "Scrapes Baidu search result listings for the sample keywords shown in the Octoparse template preview: Cybersecurity, Virtual Reality (VR), and Nanotechnology. Uses Baidu mobile search URLs with pn pagination offsets for multiple result pages per keyword. A JavaScript normalization step scans Baidu's changing result markup, filters out Baidu UI/login/navigation links, creates stable .uscraper-baidu-row records, then exports keyword, title, link, source, and description to baidu-scraper.csv. Best-effort template: Baidu may show CAPTCHA, anti-bot checks, blank pages, regional layouts, ads, or redirect URLs.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 120,
      "position_y": 240,
      "config": {
        "width": 430,
        "height": 932,
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 480,
      "position_y": 240,
      "config": {
        "urls": [
          "https://m.baidu.com/s?word=Cybersecurity&pn=0",
          "https://m.baidu.com/s?word=Cybersecurity&pn=10",
          "https://m.baidu.com/s?word=Cybersecurity&pn=20",
          "https://m.baidu.com/s?word=Virtual%20Reality%20%28VR%29&pn=0",
          "https://m.baidu.com/s?word=Virtual%20Reality%20%28VR%29&pn=10",
          "https://m.baidu.com/s?word=Virtual%20Reality%20%28VR%29&pn=20",
          "https://m.baidu.com/s?word=Nanotechnology&pn=0",
          "https://m.baidu.com/s?word=Nanotechnology&pn=10",
          "https://m.baidu.com/s?word=Nanotechnology&pn=20"
        ],
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 840,
      "position_y": 240,
      "config": {
        "timeout": 30,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1200,
      "position_y": 240,
      "config": {
        "selector": "body",
        "timeout": 30,
        "visible": false,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1560,
      "position_y": 240,
      "config": {
        "duration": 3,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1920,
      "position_y": 240,
      "config": {
        "jsCode": "(() => { if (document.documentElement) { document.documentElement.style.visibility = 'visible'; document.documentElement.style.opacity = '1'; } if (document.body) { document.body.style.display = 'block'; document.body.style.visibility = 'visible'; document.body.style.opacity = '1'; } const old = document.querySelector('#uscraper-baidu-normalized'); if (old) old.remove(); const wrap = document.createElement('div'); wrap.id = 'uscraper-baidu-normalized'; wrap.style.display = 'block'; const params = new URL(location.href).searchParams; const keyword = params.get('word') || params.get('wd') || params.get('query') || ''; const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const badTitle = /^(百度一下|搜索|登录|设置|首页|打开APP|展开|收起|更多|换一换|我的关注|我的收藏|登录查看更多历史|查看更多历史|点击即刻体验AI搜索！|问答|综合|笔记|视频|图片|文档|资讯|商品|采购|小说|音乐|24小时|1周内|1月内|1年内|高级筛选|用户反馈|直达号|历史记录|清空历史|反馈|排序方式|最新|发布时间|重置)$/i; const badContains = /(登录查看更多历史|登录查看历史|我的关注|我的收藏|高级筛选|用户反馈|点击即刻体验AI搜索|资源筛选|排序方式|发布时间|百度一下|直达号历史记录|清空历史|©\\s*\\d*\\s*Baidu)/i; const badHref = /^(javascript:|#|about:blank)/i; const badUrl = (href, title) => { if (!href || badHref.test(href)) return true; try { const u = new URL(href, location.href); const host = u.hostname.toLowerCase(); const full = u.href.toLowerCase(); if (host === 'wappass.baidu.com' || host === 'passport.baidu.com') return true; if (host === 'm.baidu.com' && (badTitle.test(title) || /passport|login|wappass|settings|feedback|history|csaitab|wenda_tab|filter|\\btab\\b|timefactor|gpc=/.test(full))) return true; if (host.endsWith('.baidu.com') && /passport|login|wappass/.test(full)) return true; } catch(e) {} return false; }; const seen = new Set(); const rows = []; const addRow = (block, anchor) => { if (!block || !anchor || rows.length >= 12) return; const title = clean(anchor.innerText || anchor.textContent || anchor.getAttribute('aria-label')); const href = anchor.href || ''; if (!title || title.length < 2 || title.length > 160 || badTitle.test(title) || badContains.test(title) || badUrl(href, title)) return; const realLink = block.getAttribute('mu') || block.querySelector('[mu]')?.getAttribute('mu') || href; if (badUrl(realLink, title)) return; const key = title + '|' + realLink; if (seen.has(key)) return; const clone = block.cloneNode(true); clone.querySelectorAll('h1,h2,h3,style,script,noscript,svg,path,.c-showurl,cite,[class*=\"showurl\"],[class*=\"source\"],[class*=\"site\"],[class*=\"url\"],[class*=\"tools\"],[class*=\"toolbar\"],nav,footer,header,form,input,button').forEach(e => e.remove()); let desc = clean(clone.innerText); if (badContains.test(desc.slice(0, 240))) return; if (desc.length > 500) desc = desc.slice(0, 500); let source = clean(block.querySelector('.c-showurl, cite, [class*=\"showurl\"], [class*=\"source\"], [class*=\"site\"], [class*=\"url\"]')?.innerText); try { if (!source && realLink) source = new URL(realLink).hostname; } catch(e) {} seen.add(key); rows.push({ keyword, title, link: realLink, source, description: desc }); }; const blocks = Array.from(document.querySelectorAll('[srcid], [tpl], .result, .c-result, .c-container, .result-item, article, section')).filter(el => { const txt = clean(el.innerText); return txt.length > 8 && txt.length < 2500 && el.querySelector('a[href]') && !badContains.test(txt.slice(0, 300)); }); for (const block of blocks) { const anchor = block.querySelector('h3 a[href], h2 a[href], .c-title a[href], [class*=\"title\"] a[href], a[aria-label][href], a[href]'); addRow(block, anchor); } if (rows.length < 3) { const anchors = Array.from(document.querySelectorAll('h3 a[href], h2 a[href], .c-title a[href], [class*=\"title\"] a[href]')); for (const a of anchors) { const block = a.closest('[srcid], [tpl], .result, .c-result, .c-container, article, section') || a.parentElement; addRow(block, a); } } for (const r of rows) { const div = document.createElement('div'); div.className = 'uscraper-baidu-row'; div.setAttribute('data-keyword', r.keyword); div.setAttribute('data-title', r.title); div.setAttribute('data-link', r.link); div.setAttribute('data-source', r.source); div.setAttribute('data-description', r.description); div.textContent = r.title; wrap.appendChild(div); } document.body.appendChild(wrap); return rows.length; })();",
        "waitForCompletion": true,
        "timeout": 10,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2280,
      "position_y": 240,
      "config": {
        "rowSelector": ".uscraper-baidu-row",
        "fileName": "baidu-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "keyword",
            "selector": "",
            "attribute": "data-keyword"
          },
          {
            "name": "title",
            "selector": "",
            "attribute": "data-title"
          },
          {
            "name": "link",
            "selector": "",
            "attribute": "data-link"
          },
          {
            "name": "source",
            "selector": "",
            "attribute": "data-source"
          },
          {
            "name": "description",
            "selector": "",
            "attribute": "data-description"
          }
        ]
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 2640,
      "position_y": 240,
      "config": {
        "duration": 1,
        "color": "bg-[#ff832b]"
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 3000,
      "position_y": 240,
      "config": {
        "color": "bg-[#8d8d8d]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 48,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 408,
      "position_y": 136,
      "width": 2480,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1",
          "sleep-1",
          "sleep-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1848,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2208,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2928,
      "position_y": 136,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Scrapes Baidu search result listings for the sample keywords shown in the Octoparse template preview: Cybersecurity, Virtual Reality (VR), and Nanotechnology. Uses Baidu mobile search URLs with pn pagination offsets for multiple result pages per keyword. A JavaScript normalization step scans Baidu's changing result markup, filters out Baidu UI/login/navigation links, creates stable .uscraper-baidu-row records, then exports keyword, title, link, source, and description to baidu-scraper.csv. Best-effort template: Baidu may show CAPTCHA, anti-bot checks, blank pages, regional layouts, ads, or redirect URLs.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 9 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 680,
      "position_y": 220,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { if (document.documentElement) { document.documentElement.style.visibility = 'visible'; docu...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 220,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Extracts rows matching `.uscraper-baidu-row`. Confirm row count > 0 before running at scale.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 220,
      "width": 340,
      "height": 110,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 3200,
      "position_y": 220,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}