{
  "version": "1.0.0",
  "exported_at": "2026-06-02T20:45:00.000Z",
  "project": {
    "name": "Le Monde Article Scraper",
    "description": "Extracts Le Monde article metadata equivalent to the Octoparse Le Monde Article Scraper: rubrique/category fields, title, subtitle/chapeau, author, publication date, reading time, and URL. Uses a multi-URL navigate loop so additional Le Monde article URLs can be added to the navigate.urls array and appended into one CSV. Article detail pages do not have pagination; navigation is by input URL list. Some articles may be subscriber-only, but header metadata is usually publicly available.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated"
  },
  "blocks": [
    {
      "block_id": "set-window-size-1",
      "block_type": "process",
      "title": "Set Window Size",
      "description": "Set browser window dimensions",
      "position_x": 100,
      "position_y": 260,
      "config": {
        "width": 1920,
        "height": 1080,
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 460,
      "position_y": 260,
      "config": {
        "urls": [
          "https://www.lemonde.fr/sport/article/2024/07/30/jo-2024-huit-medailles-dont-deux-en-or-en-une-seule-journee-l-equipe-de-france-commence-ses-jeux-par-une-moisson-historique_6261482_3242.html"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 820,
      "position_y": 260,
      "config": {
        "timeout": 30,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1180,
      "position_y": 260,
      "config": {
        "selector": ".article__header .ds-title, h1.ds-title",
        "timeout": 30,
        "visible": true,
        "color": "bg-[#08bdba]"
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 1540,
      "position_y": 260,
      "config": {
        "rowSelector": "section.article--single",
        "fileName": "le-monde-article-scraper.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "color": "bg-[#42be65]",
        "columns": [
          {
            "name": "rubrique_1",
            "selector": ".ds-breadcrumb__item:nth-child(1) .link__txt",
            "attribute": "text"
          },
          {
            "name": "rubrique_2",
            "selector": ".ds-breadcrumb__item:nth-child(2) .link__txt",
            "attribute": "text"
          },
          {
            "name": "titre",
            "selector": "h1.ds-title",
            "attribute": "text"
          },
          {
            "name": "chapeau",
            "selector": ".ds-chapo",
            "attribute": "text"
          },
          {
            "name": "auteur",
            "selector": "Array.from(ROW.querySelectorAll('.article__author-link')).map(a => a.textContent.trim()).filter(Boolean).join('; ') || document.querySelector('meta[property=\"og:article:author\"]')?.content || ''",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "publication",
            "selector": ".meta__date--header",
            "attribute": "text"
          },
          {
            "name": "lecture",
            "selector": "(ROW.querySelector('.meta__reading-time--header')?.innerText || ROW.querySelector('.meta__reading-time--header')?.textContent || '').replace(/Temps de\\s*/i, '').replace(/^Lecture\\s*/i, '').trim()",
            "attribute": "text",
            "isJs": true
          },
          {
            "name": "article_url",
            "selector": "location.href",
            "attribute": "text",
            "isJs": true
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 1900,
      "position_y": 260,
      "config": {
        "color": "bg-[#8d8d8d]"
      }
    }
  ],
  "connections": [
    {
      "from_block_id": "set-window-size-1",
      "from_connector_id": "right",
      "to_block_id": "navigate-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-entry",
      "element_type": "group",
      "title": "Entry & Setup",
      "color": "#4589ff",
      "position_x": 28,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "set-window-size-1"
        ]
      }
    },
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 388,
      "position_y": 156,
      "width": 1040,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "wait-for-element-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 1468,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 1828,
      "position_y": 156,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Extracts Le Monde article metadata equivalent to the Octoparse Le Monde Article Scraper: rubrique/category fields, title, subtitle/chapeau, author, publication date, reading time, and URL. Uses a multi-URL navigate loop so additional Le Monde article URLs can be added to the navigate.urls array and appended into one CSV. Article detail pages do not have pagination; navigation is by input URL list. Some articles may be subscriber-only, but header metadata is usually publicly available.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 1 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 660,
      "position_y": 240,
      "width": 328,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (auteur, lecture, article_url). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 1740,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2100,
      "position_y": 240,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}