From 125602fa1369ea0b1ec7a98406e2321473e428d1 Mon Sep 17 00:00:00 2001
From: Matt <matt@echo6.co>
Date: Sat, 18 Apr 2026 19:28:03 +0000
Subject: [PATCH] Fix SingleFile CLI: remove invalid --crawl-delay flag

SingleFile CLI has no --crawl-delay option. The invalid flag caused the
process to print help and exit with no output. Added --crawl-no-parent
and --crawl-replace-URLs instead. Removed unused crawl_delay config key.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 config.yaml           | 1 -
 lib/scraper_runner.py | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/config.yaml b/config.yaml
index c98a866..bdabf69 100644
--- a/config.yaml
+++ b/config.yaml
@@ -489,7 +489,6 @@ scraper:
     executable: single-file
     chromium_path: ""              # Auto-detected from Playwright if empty
     crawl_max_depth: 10
-    crawl_delay: 2                 # Seconds between page fetches
 
 # Stream B: New Library Pipeline
 new_pipeline:
diff --git a/lib/scraper_runner.py b/lib/scraper_runner.py
index 1599f2e..a3ff820 100644
--- a/lib/scraper_runner.py
+++ b/lib/scraper_runner.py
@@ -368,7 +368,6 @@ def _crawl_singlefile(job, url, site_dir, config, stop_event, db):
     executable = sf_cfg.get('executable', 'single-file')
     chromium_path = _get_chromium_path(config)
     crawl_max_depth = sf_cfg.get('crawl_max_depth', 10)
-    crawl_delay = sf_cfg.get('crawl_delay', 2)
 
     if not chromium_path:
         return 0, 'Chromium not found — cannot use browser crawl mode'
@@ -382,8 +381,9 @@ def _crawl_singlefile(job, url, site_dir, config, stop_event, db):
         executable,
         '--crawl-links=true',
         '--crawl-inner-links-only=true',
+        '--crawl-no-parent=true',
+        '--crawl-replace-URLs=true',
         f'--crawl-max-depth={crawl_max_depth}',
-        f'--crawl-delay={crawl_delay * 1000}',  # milliseconds
         f'--browser-executable-path={chromium_path}',
         '--browser-headless=true',
         '--browser-args=["--no-sandbox","--disable-dev-shm-usage"]',
@@ -391,7 +391,7 @@ def _crawl_singlefile(job, url, site_dir, config, stop_event, db):
         url,
     ]
 
-    logger.info(f"Job {job_id}: SingleFile crawl starting (depth={crawl_max_depth}, delay={crawl_delay}s)")
+    logger.info(f"Job {job_id}: SingleFile crawl starting (depth={crawl_max_depth})")
     sf_log = os.path.join(workspace, 'singlefile.log')
     try:
         with open(sf_log, 'w') as log_fh: