{
  "version": "1.0.0",
  "exported_at": "2026-06-01T00:00:00.000Z",
  "project": {
    "name": "Goodreads Comments Scraper",
    "description": "Goodreads reviews/comments scraper equivalent to the Octoparse Goodreads Comments Scraper. It targets Goodreads book review pages and exports current_page, book_title, author, username, rating, comment_time, comment_content, and likes. Pagination uses a finite known-URL list for pages 1-10 with Goodreads ?page=N URLs, and structured export appends each page to one fresh CSV file. Columns use JavaScript IIFE expressions with multiple Goodreads selector fallbacks because Goodreads markup varies. Change navigate.urls to scrape another Goodreads book or page range. Goodreads may block scraping with CAPTCHA, rate limits, login prompts, or layout changes.",
    "color": "bg-[#4589ff]",
    "template_id": "ai-generated-goodreads-comments-scraper"
  },
  "blocks": [
    {
      "block_id": "navigate-1",
      "block_type": "process",
      "title": "Navigate",
      "description": "Go to a URL",
      "position_x": 120,
      "position_y": 220,
      "config": {
        "urls": [
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=1",
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=2",
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=3",
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=4",
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=5",
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=6",
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=7",
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=8",
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=9",
          "https://www.goodreads.com/book/show/17851885-i-am-malala/reviews?page=10"
        ],
        "color": "bg-[#4589ff]"
      }
    },
    {
      "block_id": "wait-for-page-load-1",
      "block_type": "process",
      "title": "Wait for Page Load",
      "description": "Wait for page to finish loading",
      "position_x": 480,
      "position_y": 220,
      "config": {
        "timeout": 30
      }
    },
    {
      "block_id": "sleep-1",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 840,
      "position_y": 220,
      "config": {
        "duration": 2
      }
    },
    {
      "block_id": "wait-for-element-1",
      "block_type": "process",
      "title": "Wait for Element",
      "description": "Wait until element appears",
      "position_x": 1200,
      "position_y": 220,
      "config": {
        "selector": "article.ReviewCard, div.ReviewCard, div.review, [data-testid='reviewCard']",
        "timeout": 30,
        "visible": true
      }
    },
    {
      "block_id": "inject-javascript-1",
      "block_type": "process",
      "title": "Inject JavaScript",
      "description": "Execute custom JavaScript",
      "position_x": 1560,
      "position_y": 220,
      "config": {
        "jsCode": "Array.from(document.querySelectorAll('button, a')).filter(function(el) { var t = (el.textContent || '').trim().toLowerCase(); return (t === 'show more' || t === 'more' || t.indexOf('show more') >= 0) && t.indexOf('more reviews') === -1 && t.indexOf('show more reviews') === -1; }).slice(0, 300).forEach(function(el) { try { el.click(); } catch (e) {} });",
        "waitForCompletion": true,
        "timeout": 10
      }
    },
    {
      "block_id": "sleep-2",
      "block_type": "process",
      "title": "Sleep",
      "description": "Wait for specified time",
      "position_x": 1920,
      "position_y": 220,
      "config": {
        "duration": 1
      }
    },
    {
      "block_id": "structured-export-1",
      "block_type": "process",
      "title": "Structured Export",
      "description": "Export data with custom columns",
      "position_x": 2280,
      "position_y": 220,
      "config": {
        "rowSelector": "article.ReviewCard, div.ReviewCard, div.review, [data-testid='reviewCard']",
        "fileName": "goodreads_comments_scraper_reviews.csv",
        "saveLocation": "C:\\Users\\theskd\\Documents\\UScraper\\templates",
        "includeHeaders": true,
        "fileMode": "append",
        "columns": [
          {
            "name": "current_page",
            "selector": "(() => { try { return new URL(window.location.href).searchParams.get('page') || '1'; } catch (e) { return '1'; } })()",
            "jsCode": "(() => { try { return new URL(window.location.href).searchParams.get('page') || '1'; } catch (e) { return '1'; } })()",
            "attribute": "text",
            "isJs": true,
            "input_type": "js"
          },
          {
            "name": "book_title",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const el = document.querySelector('[data-testid=\"bookTitle\"], h1#bookTitle, h1.Text__title1, h1'); if (el && clean(el.textContent)) return clean(el.textContent); const title = clean(document.title).replace(/\\s*\\|\\s*Goodreads\\s*$/i, ''); return title.split(/\\s+by\\s+/i)[0] || title; })()",
            "jsCode": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const el = document.querySelector('[data-testid=\"bookTitle\"], h1#bookTitle, h1.Text__title1, h1'); if (el && clean(el.textContent)) return clean(el.textContent); const title = clean(document.title).replace(/\\s*\\|\\s*Goodreads\\s*$/i, ''); return title.split(/\\s+by\\s+/i)[0] || title; })()",
            "attribute": "text",
            "isJs": true,
            "input_type": "js"
          },
          {
            "name": "author",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const nodes = document.querySelectorAll('.ContributorLink__name, .ContributorLinksList a, #bookAuthors a span, #bookAuthors a, .authorName span, .authorName, a[href*=\"/author/show/\"]'); const vals = Array.from(nodes).map(n => clean(n.textContent)).filter(Boolean); if (vals.length) return Array.from(new Set(vals)).join(', '); const title = clean(document.title).replace(/\\s*\\|\\s*Goodreads\\s*$/i, ''); const parts = title.split(/\\s+by\\s+/i); return parts.length > 1 ? clean(parts.slice(1).join(' by ')) : ''; })()",
            "jsCode": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const nodes = document.querySelectorAll('.ContributorLink__name, .ContributorLinksList a, #bookAuthors a span, #bookAuthors a, .authorName span, .authorName, a[href*=\"/author/show/\"]'); const vals = Array.from(nodes).map(n => clean(n.textContent)).filter(Boolean); if (vals.length) return Array.from(new Set(vals)).join(', '); const title = clean(document.title).replace(/\\s*\\|\\s*Goodreads\\s*$/i, ''); const parts = title.split(/\\s+by\\s+/i); return parts.length > 1 ? clean(parts.slice(1).join(' by ')) : ''; })()",
            "attribute": "text",
            "isJs": true,
            "input_type": "js"
          },
          {
            "name": "username",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const selectors = ['.ReviewerProfile__name a', '.ReviewerProfile__name', '[data-testid=\"name\"]', 'a[href*=\"/user/show/\"]', 'a.user', '.user a', '.user']; for (const sel of selectors) { const el = ROW.querySelector(sel); const txt = el ? clean(el.textContent) : ''; if (txt) return txt; } const links = Array.from(ROW.querySelectorAll('a')).map(a => clean(a.textContent)).filter(Boolean); return links[0] || ''; })()",
            "jsCode": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const selectors = ['.ReviewerProfile__name a', '.ReviewerProfile__name', '[data-testid=\"name\"]', 'a[href*=\"/user/show/\"]', 'a.user', '.user a', '.user']; for (const sel of selectors) { const el = ROW.querySelector(sel); const txt = el ? clean(el.textContent) : ''; if (txt) return txt; } const links = Array.from(ROW.querySelectorAll('a')).map(a => clean(a.textContent)).filter(Boolean); return links[0] || ''; })()",
            "attribute": "text",
            "isJs": true,
            "input_type": "js"
          },
          {
            "name": "rating",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const el = ROW.querySelector('[aria-label*=\"out of 5\"], [aria-label*=\"Rating\"], .RatingStars, span.staticStars, .staticStars'); let raw = el ? clean(el.getAttribute('aria-label') || el.getAttribute('title') || el.textContent) : ''; let m = raw.match(/(\\d+(?:\\.\\d+)?)\\s*out of\\s*5/i); if (m) { const n = Math.round(parseFloat(m[1])); return ({1:'did not like it',2:'it was ok',3:'liked it',4:'really liked it',5:'it was amazing'})[n] || raw; } const text = clean(ROW.innerText || ROW.textContent); m = text.match(/\\b(it was amazing|really liked it|liked it|it was ok|did not like it)\\b/i); return m ? m[1].toLowerCase() : raw.replace(/^Rating\\s*/i, '').trim(); })()",
            "jsCode": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const el = ROW.querySelector('[aria-label*=\"out of 5\"], [aria-label*=\"Rating\"], .RatingStars, span.staticStars, .staticStars'); let raw = el ? clean(el.getAttribute('aria-label') || el.getAttribute('title') || el.textContent) : ''; let m = raw.match(/(\\d+(?:\\.\\d+)?)\\s*out of\\s*5/i); if (m) { const n = Math.round(parseFloat(m[1])); return ({1:'did not like it',2:'it was ok',3:'liked it',4:'really liked it',5:'it was amazing'})[n] || raw; } const text = clean(ROW.innerText || ROW.textContent); m = text.match(/\\b(it was amazing|really liked it|liked it|it was ok|did not like it)\\b/i); return m ? m[1].toLowerCase() : raw.replace(/^Rating\\s*/i, '').trim(); })()",
            "attribute": "text",
            "isJs": true,
            "input_type": "js"
          },
          {
            "name": "comment_time",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const selectors = ['.ReviewCard__dateUpdated', '.reviewDate', 'a.reviewDate', 'a[href*=\"/review/show/\"]', 'span[class*=\"Text__body3\"]']; for (const sel of selectors) { const nodes = Array.from(ROW.querySelectorAll(sel)); for (const el of nodes) { const txt = clean(el.textContent); const m = txt.match(/\\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)[a-z]*\\s+\\d{1,2},\\s+\\d{4}\\b/i); if (m) return m[0]; } } const text = clean(ROW.innerText || ROW.textContent); const m = text.match(/\\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)[a-z]*\\s+\\d{1,2},\\s+\\d{4}\\b/i); return m ? m[0] : ''; })()",
            "jsCode": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const selectors = ['.ReviewCard__dateUpdated', '.reviewDate', 'a.reviewDate', 'a[href*=\"/review/show/\"]', 'span[class*=\"Text__body3\"]']; for (const sel of selectors) { const nodes = Array.from(ROW.querySelectorAll(sel)); for (const el of nodes) { const txt = clean(el.textContent); const m = txt.match(/\\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)[a-z]*\\s+\\d{1,2},\\s+\\d{4}\\b/i); if (m) return m[0]; } } const text = clean(ROW.innerText || ROW.textContent); const m = text.match(/\\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)[a-z]*\\s+\\d{1,2},\\s+\\d{4}\\b/i); return m ? m[0] : ''; })()",
            "attribute": "text",
            "isJs": true,
            "input_type": "js"
          },
          {
            "name": "comment_content",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const selectors = ['.ReviewText', 'section[class*=\"ReviewText\"]', '[data-testid=\"reviewText\"]', '.reviewText', '.readable', 'span.Formatted', '.Formatted']; for (const sel of selectors) { const el = ROW.querySelector(sel); const txt = el ? clean(el.innerText || el.textContent) : ''; if (txt && txt.length > 10) return txt.replace(/\\s*\\(less\\)\\s*$/i, '').trim(); } let text = clean(ROW.innerText || ROW.textContent); text = text.replace(/\\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)[a-z]*\\s+\\d{1,2},\\s+\\d{4}\\b/ig, ' '); text = text.replace(/\\b(it was amazing|really liked it|liked it|it was ok|did not like it)\\b/ig, ' '); text = text.replace(/\\b\\d[\\d,]*\\s+likes?\\b/ig, ' '); text = text.replace(/\\bLike\\b|\\bComment\\b|\\bMore reviews\\b/ig, ' '); return clean(text); })()",
            "jsCode": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const selectors = ['.ReviewText', 'section[class*=\"ReviewText\"]', '[data-testid=\"reviewText\"]', '.reviewText', '.readable', 'span.Formatted', '.Formatted']; for (const sel of selectors) { const el = ROW.querySelector(sel); const txt = el ? clean(el.innerText || el.textContent) : ''; if (txt && txt.length > 10) return txt.replace(/\\s*\\(less\\)\\s*$/i, '').trim(); } let text = clean(ROW.innerText || ROW.textContent); text = text.replace(/\\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)[a-z]*\\s+\\d{1,2},\\s+\\d{4}\\b/ig, ' '); text = text.replace(/\\b(it was amazing|really liked it|liked it|it was ok|did not like it)\\b/ig, ' '); text = text.replace(/\\b\\d[\\d,]*\\s+likes?\\b/ig, ' '); text = text.replace(/\\bLike\\b|\\bComment\\b|\\bMore reviews\\b/ig, ' '); return clean(text); })()",
            "attribute": "text",
            "isJs": true,
            "input_type": "js"
          },
          {
            "name": "likes",
            "selector": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const selectors = ['.SocialFooter__statsContainer', '.likesCount', '.likeText', 'a.likesCount', '[class*=\"like\"]']; for (const sel of selectors) { const nodes = Array.from(ROW.querySelectorAll(sel)); for (const el of nodes) { const txt = clean(el.textContent); const m = txt.match(/\\b(\\d[\\d,]*)\\s+likes?\\b/i) || txt.match(/^\\s*(\\d[\\d,]*)\\s*$/); if (m) return m[1].replace(/,/g, ''); } } const text = clean(ROW.innerText || ROW.textContent); const m = text.match(/\\b(\\d[\\d,]*)\\s+likes?\\b/i); return m ? m[1].replace(/,/g, '') : ''; })()",
            "jsCode": "(() => { const clean = s => (s || '').replace(/\\s+/g, ' ').trim(); const selectors = ['.SocialFooter__statsContainer', '.likesCount', '.likeText', 'a.likesCount', '[class*=\"like\"]']; for (const sel of selectors) { const nodes = Array.from(ROW.querySelectorAll(sel)); for (const el of nodes) { const txt = clean(el.textContent); const m = txt.match(/\\b(\\d[\\d,]*)\\s+likes?\\b/i) || txt.match(/^\\s*(\\d[\\d,]*)\\s*$/); if (m) return m[1].replace(/,/g, ''); } } const text = clean(ROW.innerText || ROW.textContent); const m = text.match(/\\b(\\d[\\d,]*)\\s+likes?\\b/i); return m ? m[1].replace(/,/g, '') : ''; })()",
            "attribute": "text",
            "isJs": true,
            "input_type": "js"
          }
        ]
      }
    },
    {
      "block_id": "loop-continue-1",
      "block_type": "process",
      "title": "Loop Continue",
      "description": "Continue multi-input loop",
      "position_x": 2640,
      "position_y": 220,
      "config": {}
    }
  ],
  "connections": [
    {
      "from_block_id": "navigate-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-page-load-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-page-load-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-1",
      "from_connector_id": "right",
      "to_block_id": "wait-for-element-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "wait-for-element-1",
      "from_connector_id": "right",
      "to_block_id": "inject-javascript-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "inject-javascript-1",
      "from_connector_id": "right",
      "to_block_id": "sleep-2",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "sleep-2",
      "from_connector_id": "right",
      "to_block_id": "structured-export-1",
      "to_connector_id": "left"
    },
    {
      "from_block_id": "structured-export-1",
      "from_connector_id": "right",
      "to_block_id": "loop-continue-1",
      "to_connector_id": "left"
    }
  ],
  "canvas_elements": [
    {
      "id": "group-load",
      "element_type": "group",
      "title": "Page Load",
      "color": "#08bdba",
      "position_x": 48,
      "position_y": 116,
      "width": 2120,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "navigate-1",
          "wait-for-page-load-1",
          "sleep-1",
          "wait-for-element-1",
          "sleep-2"
        ]
      }
    },
    {
      "id": "group-interaction",
      "element_type": "group",
      "title": "Interaction",
      "color": "#a56eff",
      "position_x": 1488,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "inject-javascript-1"
        ]
      }
    },
    {
      "id": "group-extract",
      "element_type": "group",
      "title": "Data Extraction",
      "color": "#42be65",
      "position_x": 2208,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "structured-export-1"
        ]
      }
    },
    {
      "id": "group-pagination",
      "element_type": "group",
      "title": "Pagination Loop",
      "color": "#ff832b",
      "position_x": 2568,
      "position_y": 116,
      "width": 380,
      "height": 296,
      "z_index": 20,
      "data": {
        "memberBlockIds": [
          "loop-continue-1"
        ]
      }
    },
    {
      "id": "note-overview",
      "element_type": "note",
      "title": "Overview",
      "content": "Goodreads reviews/comments scraper equivalent to the Octoparse Goodreads Comments Scraper. It targets Goodreads book review pages and exports current_page, book_title, author, username, rating, comment_time, comment_content, and likes. Pagination uses a finite known-URL list for pages 1-10 with Goodreads ?page=N URLs, and structured export appends each page to one fresh CSV file. Columns use JavaScript IIFE expressions with multiple Goodreads selector fallbacks because Goodreads markup varies. Change navigate.urls to scrape another Goodreads book or page range. Goodreads may block scraping with CAPTCHA, rate limits, login prompts, or layout changes.",
      "color": "#f1c21b",
      "position_x": 80,
      "position_y": 20,
      "width": 480,
      "height": 160,
      "z_index": 22,
      "data": {}
    },
    {
      "id": "note-block-navigate-1",
      "element_type": "note",
      "title": "Note: Navigate",
      "content": "Multi-URL loop over 10 pages. Pair with loop-continue at the end of each iteration.",
      "color": "#ee5396",
      "position_x": 320,
      "position_y": 200,
      "width": 332,
      "height": 107,
      "z_index": 22,
      "data": {
        "block_id": "navigate-1"
      }
    },
    {
      "id": "note-block-inject-javascript-1",
      "element_type": "note",
      "title": "Note: Inject JavaScript",
      "content": "Runs custom JavaScript in the page: `Array.from(document.querySelectorAll('button, a')).filter(function(el) { var t = (el.textContent || ...` Verify in browser if results are empty.",
      "color": "#ee5396",
      "position_x": 1760,
      "position_y": 200,
      "width": 340,
      "height": 140,
      "z_index": 22,
      "data": {
        "block_id": "inject-javascript-1"
      }
    },
    {
      "id": "note-block-structured-export-1",
      "element_type": "note",
      "title": "Note: Structured Export",
      "content": "Structured export with JS columns (current_page, book_title, author, username, rating). These selectors are fragile — update if the site layout changes.",
      "color": "#ee5396",
      "position_x": 2480,
      "position_y": 200,
      "width": 340,
      "height": 130,
      "z_index": 22,
      "data": {
        "block_id": "structured-export-1"
      }
    },
    {
      "id": "note-block-loop-continue-1",
      "element_type": "note",
      "title": "Note: Loop Continue",
      "content": "Loop Continue advances a multi-URL or multi-text loop. Place at the end of the loop body with a clear back-edge to the loop start.",
      "color": "#ee5396",
      "position_x": 2840,
      "position_y": 200,
      "width": 340,
      "height": 123,
      "z_index": 22,
      "data": {
        "block_id": "loop-continue-1"
      }
    }
  ]
}