Fix: stale cleanup in processors must fail loudly on permission errors

Phase 5c-2 failed because shutil.rmtree(ignore_errors=True) silently
failed to clean up root-owned legacy files in processing/{hash}/,
letting the processor proceed into a half-cleaned directory and then
crash on subsequent file writes.

Changes: removed ignore_errors=True, wrapped in try/except that logs
and re-raises, so the processor fails early and visibly if stale
cleanup fails.

Recovery from Phase 5c-2 failure.
This commit is contained in:
Matt 2026-04-14 20:15:48 +00:00
commit 9fa60f9c86
2 changed files with 24 additions and 4 deletions

View file

@ -372,8 +372,18 @@ def pre_flight(content_path, meta_path, db, config):
)
proc_dir = os.path.join(processing_root, file_hash)
concepts_dir = os.path.join(config['paths']['concepts'], file_hash)
shutil.rmtree(proc_dir, ignore_errors=True)
shutil.rmtree(concepts_dir, ignore_errors=True)
if os.path.exists(proc_dir):
try:
shutil.rmtree(proc_dir)
except Exception as e:
logger.error("Stale cleanup failed for %s: %s", proc_dir, e)
raise
if os.path.exists(concepts_dir):
try:
shutil.rmtree(concepts_dir)
except Exception as e:
logger.error("Stale cleanup failed for %s: %s", concepts_dir, e)
raise
# ── Step 3: Hash dedupe ───────────────────────────────────────
conn = db._get_conn()

View file

@ -57,8 +57,18 @@ def pre_flight(content_path, meta_path, db, config):
)
proc_dir = os.path.join(processing_root, file_hash)
concepts_dir = os.path.join(config['paths']['concepts'], file_hash)
shutil.rmtree(proc_dir, ignore_errors=True)
shutil.rmtree(concepts_dir, ignore_errors=True)
if os.path.exists(proc_dir):
try:
shutil.rmtree(proc_dir)
except Exception as e:
logger.error("Stale cleanup failed for %s: %s", proc_dir, e)
raise
if os.path.exists(concepts_dir):
try:
shutil.rmtree(concepts_dir)
except Exception as e:
logger.error("Stale cleanup failed for %s: %s", concepts_dir, e)
raise
# Hash dedupe: if hash exists in catalogue, delete the pair and return
conn = db._get_conn()