mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
Phase 3: dispatcher, transcript processor, text_dir resolution
- lib/dispatcher.py: one-shot dispatcher that scans acquired/<type>/ for content+sidecar pairs and routes to registered processors - lib/processors/transcript_processor.py: pre_flight() for transcripts (hash, dedupe, split into pages, register in DB, set text_dir) - lib/utils.py: resolve_text_dir() helper for text_dir column fallback - lib/enricher.py: use resolve_text_dir() instead of hardcoded path - lib/embedder.py: use resolve_text_dir() instead of hardcoded path - lib/processors/__init__.py, lib/acquisition/__init__.py: package inits Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
de2c59a501
commit
66fadb7487
7 changed files with 293 additions and 2 deletions
|
|
@ -25,6 +25,7 @@ import google.generativeai as genai
|
|||
|
||||
from .utils import get_config, setup_logging
|
||||
from .status import StatusDB
|
||||
from .utils import resolve_text_dir
|
||||
|
||||
logger = setup_logging('recon.enricher')
|
||||
|
||||
|
|
@ -345,7 +346,7 @@ def enrich_single(file_hash, db, config, key_rotator):
|
|||
if not doc:
|
||||
return False
|
||||
|
||||
text_dir = os.path.join(config['paths']['text'], file_hash)
|
||||
text_dir = resolve_text_dir(file_hash, config, db)
|
||||
concepts_dir = os.path.join(config['paths']['concepts'], file_hash)
|
||||
window_size = config['processing']['enrich_window_size']
|
||||
delay = config['processing']['rate_limit_delay']
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue