{
  "version": "1.0.0",
  "exported_at": "2026-06-01T00:00:00.000Z",
  "project": {
    "name": "North Data Scraper",
    "description": "Scrapes North Data company detail pages equivalent to the Octoparse North Data Scraper: company URL, company title, registry identifier, up to six dated register/event entries, and the source keyword. Navigation uses a multi-URL loop over configured North Data company detail URLs and appends all rows to north-data-scraper.csv. Replace or extend the URL list for other keyword-discovered companies. Includes best-effort handling for North Data consent banners and heuristic parsing of timeline entries.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window size",
      "position_x": 120,
      "position_y": 260,
      "config": {
        "width": 1920,
        "height": 1080,
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 480,
      "position_y": 260,
      "config": {
        "urls": [
          "https://www.northdata.de/SWT+Maschinenbau+GmbH,+Wain/Amtsgericht+Ulm+HRB+734855",
          "https://www.northdata.de/SH+Maschinenbau+GmbH,+Weilburg/Amtsgericht+Limburg+a.+d.+Lahn+HRB+5674",
          "https://www.northdata.de/BeLi+Maschinenbau+GmbH,+Seligenstadt/Amtsgericht+Offenbach+am+Main+HRB+50567",
          "https://www.northdata.de/AP+Maschinenbau+e.+K.,+Blaibach/Amtsgericht+Regensburg+HRA+8900",
          "https://www.northdata.de/SPS+Maschinenbau+GmbH,+Altm%C3%BCnster/301652g",
          "https://www.northdata.de/2K+Maschinenbau+GmbH,+Neum%C3%BCnster/Amtsgericht+Kiel+HRB+1827+NM",
          "https://www.northdata.de/R+%2B+W+Maschinenbau+OHG,+Bad+M%C3%BCnstereifel/Amtsgericht+Bonn+HRA+9747"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 840,
      "position_y": 260,
      "config": {
        "timeout": 30,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1200,
      "position_y": 260,
      "config": {
        "jsCode": "(()=>{Array.from(document.querySelectorAll('button,a,[role=button],input[type=button],input[type=submit]')).find(el=>/accept|akzeptieren|zustimmen|einverstanden/i.test((el.innerText||el.value||'').trim()))?.click();const clean=s=>(s||'').replace(/[\\uE000-\\uF8FF]/g,'').replace(/[\\u00ad]/g,'').replace(/\\s+/g,' ').trim();const dec=s=>{try{return decodeURIComponent(s||'').replace(/\\+/g,' ')}catch(e){return (s||'').replace(/\\+/g,' ')}};const dateRe=/\\b\\d{2}\\.\\d{2}\\.\\d{4}\\b|\\bJahr\\s+\\d{4}\\b/;const keywordRe=/\\s+(Jahres|Jah­res|Eintragung|Ein­tra­gung|Sitzverlegung|Geschäftsführer|GF:|Liquidation|Li­qui­da­ti­on|Anschrift|Kapital|Originaldokument|Patent|Wort-|Wort\\/Bildmarke|Übernahme|Verschmelzung|Komplementär|Pers\\.|Persönlich|Formwechsel|Eröffnung|Stichtag|Beschluss|Zu Eintragungsnummer|Staatliche|Inhaber:|Nicht mehr|Kmdt\\.|PhG\\.|Firmen­sitz|Fir­men­sitz|Rechtsform|Rechts­form)/i;function urlParts(){const p=location.pathname.split('/').filter(Boolean);return {title:clean(dec(p[0]||'')),registry:clean(dec(p.slice(1).join('/')||''))}}function eventNodes(root=document.body){const nodes=Array.from(root.querySelectorAll('[class*=event],[class*=publication],[class*=notice],[class*=entry],li,tr,article'));const items=[];for(const el of nodes){const txt=clean(el.innerText);if(!dateRe.test(txt)||txt.length<12||txt.length>1500)continue;if(/Dossier Watch|North Data Home|Registerbekanntmachung/i.test(txt)&&txt.length<80)continue;if(items.some(it=>it.contains(el)||el.contains(it)))continue;items.push(el)}return items}function eventData(idx){const el=eventNodes()[idx];if(!el)return {title:'',date:'',details:''};const txt=clean(el.innerText);const dm=txt.match(dateRe);const date=dm?dm[0]:'';let after=date?clean(txt.slice(txt.indexOf(date)+date.length)):txt;let title='';const ctx=after.match(/^(als|in|über)\\s+(.+?)(?=$|\\s+(Jahres|Jah­res|Eintragung|Ein­tra­gung|Sitzverlegung|Geschäftsführer|GF:|Liquidation|Li­qui­da­ti­on|Anschrift|Kapital|Originaldokument|Patent|Wort-|Wort\\/Bildmarke|Übernahme|Verschmelzung|Komplementär|Pers\\.|Persönlich|Formwechsel|Eröffnung|Stichtag|Beschluss|Staatliche|Inhaber:|Nicht mehr|Kmdt\\.|PhG\\.|Firmen­sitz|Fir­men­sitz|Rechtsform|Rechts­form))/i);if(ctx)title=clean(ctx[0]);let details=title?clean(after.replace(title,'')):after;return {title,date,details}}window.__northDataExtract=(field,idx,part)=>{const p=urlParts();if(field==='url')return location.href;if(field==='title')return p.title;if(field==='registry')return p.registry;if(field==='keyword')return 'maschinenbau';if(field==='event'){const e=eventData(idx||0);return e[part]||''}return ''};})(); true;",
        "waitForCompletion": true,
        "timeout": 10,
        "color": "bg-[#a56eff]"
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1560,
      "position_y": 260,
      "config": {
        "duration": 2,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1920,
      "position_y": 260,
      "config": {
        "selector": "body",
        "timeout": 30,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2280,
      "position_y": 260,
      "config": {
        "rowSelector": "body",
        "fileName": "north-data-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "Firma_url",
            "selector": "window.__northDataExtract('url')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Titel",
            "selector": "window.__northDataExtract('title')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Unternehmensname",
            "selector": "window.__northDataExtract('registry')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Titel1",
            "selector": "window.__northDataExtract('event',0,'title')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "EIntragung_Datum1",
            "selector": "window.__northDataExtract('event',0,'date')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Details1",
            "selector": "window.__northDataExtract('event',0,'details')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Titel2",
            "selector": "window.__northDataExtract('event',1,'title')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "EIntragung_Datum2",
            "selector": "window.__northDataExtract('event',1,'date')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Details2",
            "selector": "window.__northDataExtract('event',1,'details')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Titel3",
            "selector": "window.__northDataExtract('event',2,'title')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "EIntragung_Datum3",
            "selector": "window.__northDataExtract('event',2,'date')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Details3",
            "selector": "window.__northDataExtract('event',2,'details')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Titel4",
            "selector": "window.__northDataExtract('event',3,'title')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "EIntragung_Datum4",
            "selector": "window.__northDataExtract('event',3,'date')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Details4",
            "selector": "window.__northDataExtract('event',3,'details')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Titel5",
            "selector": "window.__northDataExtract('event',4,'title')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "EIntragung_Datum5",
            "selector": "window.__northDataExtract('event',4,'date')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Details5",
            "selector": "window.__northDataExtract('event',4,'details')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Titel6",
            "selector": "window.__northDataExtract('event',5,'title')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "EIntragung_Datum6",
            "selector": "window.__northDataExtract('event',5,'date')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Eintragung_Details6",
            "selector": "window.__northDataExtract('event',5,'details')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "Schluesselwort_der_Suche",
            "selector": "window.__northDataExtract('keyword')",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2640,
      "position_y": 260,
      "config": {
        "color": "bg-[#8d8d8d]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 48,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 408,
      "position_y": 156,
      "width": 1760,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1128,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2208,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2568,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Scrapes North Data company detail pages equivalent to the Octoparse North Data Scraper: company URL, company title, registry identifier, up to six dated register/event entries, and the source keyword. Navigation uses a multi-URL loop over configured North Data company detail URLs and appends all rows to north-data-scraper.csv. Replace or extend the URL list for other keyword-discovered companies. Includes best-effort handling for North Data consent banners and heuristic parsing of timeline entries.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `(()=>{Array.from(document.querySelectorAll('button,a,[role=button],input[type=button],input[type=sub...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1400,
      "position_y": 240,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (Firma_url, Titel, Unternehmensname, Eintragung_Titel1, EIntragung_Datum1). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 240,
      "width": 340,
      "height": 138,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}