From 9fa60f9c86b313e7305ce5b66647216441480af6 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 14 Apr 2026 20:15:48 +0000 Subject: [PATCH] Fix: stale cleanup in processors must fail loudly on permission errors Phase 5c-2 failed because shutil.rmtree(ignore_errors=True) silently failed to clean up root-owned legacy files in processing/{hash}/, letting the processor proceed into a half-cleaned directory and then crash on subsequent file writes. Changes: removed ignore_errors=True, wrapped in try/except that logs and re-raises, so the processor fails early and visibly if stale cleanup fails. Recovery from Phase 5c-2 failure. --- lib/processors/pdf_processor.py | 14 ++++++++++++-- lib/processors/transcript_processor.py | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/lib/processors/pdf_processor.py b/lib/processors/pdf_processor.py index 2c9224e..5d7a57f 100644 --- a/lib/processors/pdf_processor.py +++ b/lib/processors/pdf_processor.py @@ -372,8 +372,18 @@ def pre_flight(content_path, meta_path, db, config): ) proc_dir = os.path.join(processing_root, file_hash) concepts_dir = os.path.join(config['paths']['concepts'], file_hash) - shutil.rmtree(proc_dir, ignore_errors=True) - shutil.rmtree(concepts_dir, ignore_errors=True) + if os.path.exists(proc_dir): + try: + shutil.rmtree(proc_dir) + except Exception as e: + logger.error("Stale cleanup failed for %s: %s", proc_dir, e) + raise + if os.path.exists(concepts_dir): + try: + shutil.rmtree(concepts_dir) + except Exception as e: + logger.error("Stale cleanup failed for %s: %s", concepts_dir, e) + raise # ── Step 3: Hash dedupe ─────────────────────────────────────── conn = db._get_conn() diff --git a/lib/processors/transcript_processor.py b/lib/processors/transcript_processor.py index 21860cf..c5d8023 100644 --- a/lib/processors/transcript_processor.py +++ b/lib/processors/transcript_processor.py @@ -57,8 +57,18 @@ def pre_flight(content_path, meta_path, db, config): ) proc_dir = os.path.join(processing_root, file_hash) concepts_dir = os.path.join(config['paths']['concepts'], file_hash) - shutil.rmtree(proc_dir, ignore_errors=True) - shutil.rmtree(concepts_dir, ignore_errors=True) + if os.path.exists(proc_dir): + try: + shutil.rmtree(proc_dir) + except Exception as e: + logger.error("Stale cleanup failed for %s: %s", proc_dir, e) + raise + if os.path.exists(concepts_dir): + try: + shutil.rmtree(concepts_dir) + except Exception as e: + logger.error("Stale cleanup failed for %s: %s", concepts_dir, e) + raise # Hash dedupe: if hash exists in catalogue, delete the pair and return conn = db._get_conn()