{
  "version": "1.0.0",
  "exported_at": "2026-05-31T08:30:00.000Z",
  "project": {
    "name": "Gelbe Seiten Scraper",
    "description": "Scrapes Gelbe Seiten business detail pages for lead fields equivalent to the Octoparse Gelbe Seiten template: detail URL, ID, name, address, postcode, location, branch, opening status, phone, email, homepage, and detailed opening hours. Navigation uses a multi-URL loop over Gelbe Seiten business detail URLs and appends each page to one CSV. The analyzed detail pages have no internal pagination; replace or expand the urls array with detail URLs collected from keyword/city searches for larger runs.",
    "color": "bg-[#ffdc00]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 200,
      "config": {
        "urls": [
          "https://www.gelbeseiten.de/gsbiz/106aafe5-1274-4746-abe5-2091e0ed96f9",
          "https://www.gelbeseiten.de/gsbiz/40d3e9ca-0398-402c-ad5a-6710b4dec488"
        ],
        "color": "bg-[#ffdc00]",
        "tags": [
          "gelbeseiten",
          "lead-generation",
          "multi-url"
        ]
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 200,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 840,
      "position_y": 200,
      "config": {
        "selector": ".mod-TeilnehmerKopf__name",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1200,
      "position_y": 200,
      "config": {
        "duration": 1
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1560,
      "position_y": 200,
      "config": {
        "rowSelector": "body",
        "fileName": "gelbe-seiten-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "webseite_url",
            "selector": "window.location.href",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "id",
            "selector": "(ROW.querySelector('input[name=\"c4allTeilnehmerId\"]')?.value || ROW.querySelector('[data-wipe*=\"id\"]')?.getAttribute('data-wipe')?.match(/\"id\"\\s*:\\s*\"?(\\d+)\"?/)?.[1] || '')",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "name",
            "selector": "(ROW.querySelector('.mod-TeilnehmerKopf__name, h1')?.textContent || '').replace(/\\s+/g, ' ').trim()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "adresse",
            "selector": "(ROW.querySelector('.mod-TeilnehmerKopf__adresse .mod-TeilnehmerKopf__adresse-daten')?.textContent || '').replace(/\\s+/g, ' ').trim()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "plz",
            "selector": "(Array.from(ROW.querySelectorAll('.mod-TeilnehmerKopf__adresse .mod-TeilnehmerKopf__adresse-daten')).map(e => e.textContent.trim()).find(t => /^\\d{5}/.test(t)) || '').match(/\\d{5}/)?.[0] || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "standort",
            "selector": "(ROW.querySelector('.mod-TeilnehmerKopf__adresse-daten--noborder')?.textContent || '').replace(/\\s+/g, ' ').trim()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "branche",
            "selector": "Array.from(ROW.querySelectorAll('[data-selenium=\"teilnehmerkopf__branche\"]')).map(e => e.textContent.replace(/\\s+/g, ' ').trim()).filter(Boolean).join(' | ').toUpperCase()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "oeffnungsstatus",
            "selector": "(ROW.querySelector('.mod-TeilnehmerKopf__oeffnungszeiten')?.innerText || '').replace(/\\s+/g, ' ').trim()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "telefonnummer",
            "selector": "(ROW.querySelector('#kontaktdaten [data-role=\"telefonnummer\"] a span, #kontaktdaten .contains-icon-big-tel a span, .telefonnummer')?.textContent || '').replace(/\\s+/g, ' ').trim()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "e_mail",
            "selector": "(() => { const raw = ROW.querySelector('#email_versenden')?.getAttribute('data-link') || ROW.querySelector('a[href^=\"mailto:\"]')?.getAttribute('href') || ''; return raw.replace(/^mailto:/i, '').split('?')[0].trim(); })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "homepage",
            "selector": "(() => { const a = ROW.querySelector('#kontaktdaten .contains-icon-big-homepage a[href], .aktionsleiste a[data-wipe-realview=\"detailseite_aktionsleiste_webadresse\"][href]'); return a ? a.href : ''; })()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "konkrete_oeffnungszeiten",
            "selector": "(() => { const c = ROW.querySelector('#oeffnungszeiten .mod-Oeffnungszeiten__container, #oeffnungszeiten'); if (!c) return ''; const rows = Array.from(c.querySelectorAll(':scope > div')).map(r => r.innerText.replace(/\\s+/g, ' ').trim()).filter(Boolean); const txt = rows.length ? rows.join(' | ') : c.innerText.replace(/\\s+/g, ' ').trim(); return txt.replace(/^Öffnungszeiten\\s*/i, '').trim(); })()",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 1920,
      "position_y": 200,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 96,
      "width": 1400,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1",
          "sleep-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1488,
      "position_y": 96,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1848,
      "position_y": 96,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Scrapes Gelbe Seiten business detail pages for lead fields equivalent to the Octoparse Gelbe Seiten template: detail URL, ID, name, address, postcode, location, branch, opening status, phone, email, homepage, and detailed opening hours. Navigation uses a multi-URL loop over Gelbe Seiten business detail URLs and appends each page to one CSV. The analyzed detail pages have no internal pagination; replace or expand the urls array with detail URLs collected from keyword/city searches for larger runs.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (webseite_url, id, name, adresse, plz). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 180,
      "width": 340,
      "height": 126,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2120,
      "position_y": 180,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}