mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
Phase 5c-1: dispatcher loop, filing worker loop, service rewire
Adds dispatch_loop() alongside dispatch_once() for service-thread use. Adds filing_worker_loop() that watches for status=complete items in /opt/recon/data/processing/ and files them to library/Domain/Subdomain/. Rewires cmd_service() to run the new architecture: - Removed: scanner_loop, peertube_scanner_loop, crawler_scheduler_loop, organizer_loop (all replaced by dispatcher + new filing worker) - Kept: enrich and embed stage workers, progress, dashboard - Kept (vestigial): extract stage worker — will be removed in Phase 6 cleanup - Added: dispatcher loop thread, filing worker thread Phase 5c-1 of the refactor. Service not yet started — Phase 5c-2 will do that. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
96e1e642c4
commit
d9aed35fd7
3 changed files with 98 additions and 242 deletions
|
|
@ -156,3 +156,60 @@ def file_processed_item(doc_hash, source_file_path, db, config, dry_run=False):
|
|||
logger.error("DB/Qdrant update failed for %s: %s", doc_hash[:8], e)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def filing_worker_loop(stop_event, db, config, interval=30):
|
||||
"""Run filing on items ready to be filed until stop_event is set.
|
||||
|
||||
Watches for documents with status='complete', organized_at IS NULL,
|
||||
and path in /opt/recon/data/processing/. Files them to library.
|
||||
|
||||
Designed to run as a service thread. Never raises to the caller.
|
||||
"""
|
||||
logger.info("[filing] Worker started (interval: %ds)", interval)
|
||||
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
conn = db._get_conn()
|
||||
rows = conn.execute(
|
||||
"SELECT hash, path FROM documents "
|
||||
"WHERE status = 'complete' "
|
||||
"AND organized_at IS NULL "
|
||||
"AND path LIKE '/opt/recon/data/processing/%' "
|
||||
"LIMIT 50"
|
||||
).fetchall()
|
||||
|
||||
if rows:
|
||||
filed = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
for row in rows:
|
||||
if stop_event.is_set():
|
||||
break
|
||||
try:
|
||||
result = file_processed_item(row['hash'], row['path'], db, config)
|
||||
action = result.get('action', 'unknown')
|
||||
if action == 'filed':
|
||||
filed += 1
|
||||
elif action.startswith('skip'):
|
||||
skipped += 1
|
||||
elif action == 'error':
|
||||
errors += 1
|
||||
logger.warning("[filing] Error filing %s: %s",
|
||||
row['hash'][:8], result.get('error', 'unknown'))
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
logger.error("[filing] Exception filing %s: %s",
|
||||
row['hash'][:8], e, exc_info=True)
|
||||
|
||||
logger.info("[filing] Batch: %d filed, %d skipped, %d errors",
|
||||
filed, skipped, errors)
|
||||
else:
|
||||
logger.debug("[filing] No items ready to file")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("[filing] Error in filing worker: %s", e, exc_info=True)
|
||||
|
||||
stop_event.wait(interval)
|
||||
|
||||
logger.info("[filing] Worker stopped")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue