{
  "version": "1.0.0",
  "exported_at": "2026-06-01T15:45:00.000Z",
  "project": {
    "name": "Microsoft Research Scraper",
    "description": "Extracts Microsoft Careers keyword-search results equivalent to the Octoparse Microsoft Research Scraper: keyword, type, title, link, and abstract/summary. Starts with the keyword \"data science\", searches Microsoft Careers, creates stable synthetic rows from the dynamic React results DOM, exports them, then follows the enabled pagination Next button until no more pages are available. Best-effort note: Microsoft Careers is dynamic and sometimes does not expose job detail links or abstracts directly in each listing row; when unavailable, link may be blank and abstract falls back to the visible listing text.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "url": "https://apply.careers.microsoft.com/careers?start=0&sort_by=timestamp",
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 456,
      "position_y": 220,
      "config": {
        "timeout": 45
      }
    },
    {
      "block_id": "element-exists-1",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check if element exists",
      "position_x": 792,
      "position_y": 220,
      "config": {
        "selector": "[id*='cookie' i] button, [aria-label*='Accept' i], button[id*='accept' i]"
      }
    },
    {
      "block_id": "click-1",
      "block_type": "process",
      "title": "Click",
      "description": "Click on element",
      "position_x": 1128,
      "position_y": 520,
      "config": {
        "selector": "[id*='cookie' i] button, [aria-label*='Accept' i], button[id*='accept' i]",
        "timeout": 8
      }
    },
    {
      "block_id": "type-text-1",
      "block_type": "process",
      "title": "Type Text",
      "description": "Type text into input",
      "position_x": 1464,
      "position_y": 520,
      "config": {
        "selector": "input[data-testid='position-query-search-search']",
        "text": "data science",
        "clearFirst": true
      }
    },
    {
      "block_id": "click-2",
      "block_type": "process",
      "title": "Click",
      "description": "Click on element",
      "position_x": 1800,
      "position_y": 520,
      "config": {
        "selector": "button[data-testid='search-jobs-button']",
        "timeout": 15
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 2136,
      "position_y": 520,
      "config": {
        "duration": 4
      }
    },
    {
      "block_id": "wait-for-text-1",
      "block_type": "process",
      "title": "Wait for Text",
      "description": "Wait until text appears",
      "position_x": 2472,
      "position_y": 520,
      "config": {
        "text": "Posted",
        "selector": "main",
        "timeout": 45
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 2808,
      "position_y": 520,
      "config": {
        "jsCode": "(() => { const KEYWORD = 'data science'; const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const locRe = /\\b(United States|United Kingdom|India|Canada|Israel|Romania|Czech Republic|Netherlands|Remote|Multiple Locations|Redmond|Mountain View|London|Bangalore|Bengaluru|Hyderabad|Vancouver|Toronto|Tel Aviv|Bucharest|Tokyo|Atlanta|Reston|Prague|Dublin|Germany|France|Spain|Italy|China|Japan|Australia|Brazil|Mexico|Poland|Singapore|Ireland)\\b/i; const postedRe = /\\bPosted\\s+((?:an\\s+hour|a\\s+day|\\d+\\s+(?:minutes?|hours?|days?|weeks?))\\s+ago|[A-Z][a-z]{2}\\s+\\d{1,2},\\s+\\d{4})/i; const titleFrom = txt => { let t = clean(txt).replace(/^Manage\\s+/i, ''); let before = clean(t.split(postedRe)[0] || t); let title = clean(before.split(locRe)[0] || before); title = title.replace(/^(Jobs|Search jobs|Job cart \\d+|Sort: Latest|Turn on job alerts for this search)\\s*/i, '').trim(); return title || before.slice(0, 160); }; const linkFrom = (title, el) => { let n = el; for (let i = 0; n && i < 6; i++, n = n.parentElement) { const a = n.querySelector && n.querySelector('a[href*=\"/job/\"], a[href*=\"apply.careers.microsoft.com\"], a[href*=\"jobs.careers.microsoft.com\"]'); if (a && a.href && !a.href.startsWith('javascript:')) return a.href; const href = n.getAttribute && n.getAttribute('href'); if (href && !href.startsWith('javascript:')) return new URL(href, location.href).href; const id = n.getAttribute && (n.getAttribute('data-job-id') || n.getAttribute('data-job-number') || n.getAttribute('data-id')); if (id && /\\d{6,}/.test(id)) { const slug = clean(title).toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, ''); return 'https://jobs.careers.microsoft.com/global/en/job/' + id + '/' + slug; } } return ''; }; const old = document.getElementById('uscraper-ms-job-rows'); if (old) old.remove(); const wrap = document.createElement('div'); wrap.id = 'uscraper-ms-job-rows'; wrap.style.cssText = 'position:absolute;left:-99999px;top:auto;width:1px;height:1px;overflow:hidden;'; const rows = []; const seen = new Set(); const candidates = Array.from(document.querySelectorAll('main li, main [role=\"listitem\"], main button, main [role=\"button\"], main a')); for (const el of candidates) { const txt = clean(el.innerText || el.getAttribute('aria-label') || ''); if (!postedRe.test(txt) || txt.length < 20 || txt.length > 900) continue; const title = titleFrom(txt); if (!title || /^(Jobs|Search jobs|Apply now|Add to cart)$/i.test(title)) continue; const post = (txt.match(postedRe) || [''])[0]; const key = title + '|' + post; if (seen.has(key)) continue; seen.add(key); rows.push({ title, link: linkFrom(title, el), abstract: txt }); } if (rows.length < 3) { let text = clean((document.querySelector('main') || document.body).innerText || ''); text = text.replace(/^.*?(?:Turn on job alerts for this search\\s+Manage|Sort:\\s*Latest)/i, ''); text = text.split(/\\b\\d+\\s+of\\s+\\d+\\b/)[0]; const re = /(.+?)\\s+Posted\\s+((?:an\\s+hour|a\\s+day|\\d+\\s+(?:minutes?|hours?|days?|weeks?))\\s+ago|[A-Z][a-z]{2}\\s+\\d{1,2},\\s+\\d{4})/gi; let m; while ((m = re.exec(text)) && rows.length < 50) { const chunk = clean(m[1] + ' Posted ' + m[2]); const title = titleFrom(chunk); const key = title + '|' + m[2]; if (title && !seen.has(key)) { seen.add(key); rows.push({ title, link: '', abstract: chunk }); } } } rows.slice(0, 50).forEach(r => { const d = document.createElement('div'); d.className = 'uscraper-ms-job-row'; d.setAttribute('data-keyword', KEYWORD); d.setAttribute('data-type', 'Career Opportunity'); d.setAttribute('data-title', r.title || ''); d.setAttribute('data-link', r.link || ''); d.setAttribute('data-abstract', r.abstract || ''); wrap.appendChild(d); }); document.body.appendChild(wrap); return rows.length; })();",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 3144,
      "position_y": 520,
      "config": {
        "selector": ".uscraper-ms-job-row",
        "timeout": 10,
        "visible": false
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 3480,
      "position_y": 520,
      "config": {
        "rowSelector": ".uscraper-ms-job-row",
        "fileName": "microsoft-research-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "keyword",
            "selector": "",
            "attribute": "data-keyword"
          },
          {
            "name": "type",
            "selector": "",
            "attribute": "data-type"
          },
          {
            "name": "title",
            "selector": "",
            "attribute": "data-title"
          },
          {
            "name": "link",
            "selector": "",
            "attribute": "data-link"
          },
          {
            "name": "abstract",
            "selector": "",
            "attribute": "data-abstract"
          }
        ]
      }
    },
    {
      "block_id": "element-exists-2",
      "block_type": "process",
      "title": "Element Exists",
      "description": "Check if element exists",
      "position_x": 3816,
      "position_y": 520,
      "config": {
        "selector": "button[class*='pagination-next']:not([disabled]):not([aria-disabled='true'])"
      }
    },
    {
      "block_id": "click-3",
      "block_type": "process",
      "title": "Click",
      "description": "Click on element",
      "position_x": 4152,
      "position_y": 520,
      "config": {
        "selector": "button[class*='pagination-next']:not([disabled]):not([aria-disabled='true'])",
        "timeout": 15
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 4488,
      "position_y": 520,
      "config": {
        "duration": 4
      }
    },
    {
      "block_id": "wait-for-text-2",
      "block_type": "process",
      "title": "Wait for Text",
      "description": "Wait until text appears",
      "position_x": 4824,
      "position_y": 758,
      "config": {
        "text": "Posted",
        "selector": "main",
        "timeout": 45
      }
    },
    {
      "block_id": "end-1",
      "block_type": "output",
      "title": "End",
      "description": "Terminate execution flow",
      "position_x": 3816,
      "position_y": 520,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "element-exists-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "true",
      "to_block_id": "click-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-1",
      "from_connector_id": "false",
      "to_block_id": "type-text-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "click-1",
      "from_connector_id": "right",
      "to_block_id": "type-text-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "type-text-1",
      "from_connector_id": "right",
      "to_block_id": "click-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "click-2",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-text-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-text-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "element-exists-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-2",
      "from_connector_id": "true",
      "to_block_id": "click-3",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "element-exists-2",
      "from_connector_id": "false",
      "to_block_id": "end-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "click-3",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "wait-for-text-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-text-2",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 5024,
      "height": 834,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-text-1",
          "wait-for-element-1",
          "sleep-2",
          "wait-for-text-2"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 720,
      "position_y": 116,
      "width": 3680,
      "height": 596,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "element-exists-1",
          "click-1",
          "click-2",
          "element-exists-2",
          "click-3"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1392,
      "position_y": 416,
      "width": 1664,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "type-text-1",
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 3408,
      "position_y": 416,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-control",
      "element_type": "group",
      "title": "Control Flow",
      "color": "#8d8d8d",
      "position_x": 3744,
      "position_y": 416,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "end-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts Microsoft Careers keyword-search results equivalent to the Octoparse Microsoft Research Scraper: keyword, type, title, link, and abstract/summary. Starts with the keyword \"data science\", searches Microsoft Careers, creates stable synthetic rows from the dynamic React results DOM, exports them, then follows the enabled pagination Next button until no more pages are available. Best-effort note: Microsoft Careers is dynamic and sometimes does not expose job detail links or abstracts directly in each listing row; when unavailable, link may be blank and abstract falls back to the visible listing text.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-element-exists-1",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `[id*='cookie' i] button, [aria-label*='Accept' i], button[id*='accept' i]`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 992,
      "position_y": 200,
      "width": 340,
      "height": 154,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-1"
      }
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(() => { const KEYWORD = 'data science'; const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); c...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 3008,
      "position_y": 500,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Extracts rows matching `.uscraper-ms-job-row`. Confirm row count > 0 before running at scale.",
      "color": "#ee5396",
      "position_x": 3680,
      "position_y": 500,
      "width": 340,
      "height": 111,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-element-exists-2",
      "element_type": "note",
      "title": "Note: Element Exists",
      "content": "Condition block: checks `button[class*='pagination-next']:not([disabled]):not([aria-disabled='true'])`. True / False branches control which path runs next. Keep enough space between branches so both connector lines are visible.",
      "color": "#ee5396",
      "position_x": 4016,
      "position_y": 500,
      "width": 340,
      "height": 155,
      "z_index": 22,
      "data": {
        "block_id": "element-exists-2"
      }
    },
    {
      "id": "note-block-click-3",
      "element_type": "note",
      "title": "Note: Click",
      "content": "Pagination click — add waits after this block; the page reloads asynchronously.",
      "color": "#ee5396",
      "position_x": 4352,
      "position_y": 500,
      "width": 316,
      "height": 106,
      "z_index": 22,
      "data": {
        "block_id": "click-3"
      }
    }
  ]
}