Fix SingleFile CLI: remove invalid --crawl-delay flag

SingleFile CLI has no --crawl-delay option. The invalid flag caused the process to print help and exit with no output. Added --crawl-no-parent and --crawl-replace-URLs instead. Removed unused crawl_delay config key. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-20 06:34:40 +02:00 · 2026-04-18 19:28:03 +00:00 · 2026-04-18 19:28:03 +00:00 · 125602fa13
commit 125602fa13
parent da50e5f0b8
2 changed files with 3 additions and 4 deletions
--- a/config.yaml
+++ b/config.yaml
@ -489,7 +489,6 @@ scraper:
    executable: single-file
    chromium_path: ""              # Auto-detected from Playwright if empty
    crawl_max_depth: 10
    crawl_delay: 2                 # Seconds between page fetches
 # Stream B: New Library Pipeline
 new_pipeline:
--- a/lib/scraper_runner.py
+++ b/lib/scraper_runner.py
@ -368,7 +368,6 @@ def _crawl_singlefile(job, url, site_dir, config, stop_event, db):
    executable = sf_cfg.get('executable', 'single-file')
    chromium_path = _get_chromium_path(config)
    crawl_max_depth = sf_cfg.get('crawl_max_depth', 10)
    crawl_delay = sf_cfg.get('crawl_delay', 2)
    if not chromium_path:
        return 0, 'Chromium not found — cannot use browser crawl mode'
@ -382,8 +381,9 @@ def _crawl_singlefile(job, url, site_dir, config, stop_event, db):
        executable,
        '--crawl-links=true',
        '--crawl-inner-links-only=true',
        '--crawl-no-parent=true',
        '--crawl-replace-URLs=true',
        f'--crawl-max-depth={crawl_max_depth}',
        f'--crawl-delay={crawl_delay * 1000}',  # milliseconds
        f'--browser-executable-path={chromium_path}',
        '--browser-headless=true',
        '--browser-args=["--no-sandbox","--disable-dev-shm-usage"]',
@ -391,7 +391,7 @@ def _crawl_singlefile(job, url, site_dir, config, stop_event, db):
        url,
    ]
-    logger.info(f"Job {job_id}: SingleFile crawl starting (depth={crawl_max_depth}, delay={crawl_delay}s)")
+    logger.info(f"Job {job_id}: SingleFile crawl starting (depth={crawl_max_depth})")
    sf_log = os.path.join(workspace, 'singlefile.log')
    try:
        with open(sf_log, 'w') as log_fh: