From df29d598d377fcc1de81d912a743c292c4689f99 Mon Sep 17 00:00:00 2001
From: Matt <matt@echo6.co>
Date: Tue, 14 Apr 2026 22:49:21 +0000
Subject: [PATCH] Phase 6a: transcripts mark organized in-place, skip filing

Transcripts are derived text from PeerTube videos, not primary source
files. They do not belong in library/Domain/Subdomain/ like PDFs.

Change: transcript_processor.pre_flight() now sets organized_at =
CURRENT_TIMESTAMP at the end of successful processing, marking the
transcript as organized in place. The watch URL remains in
catalogue.path and Qdrant download_url so users clicking search
results go to the PeerTube video.

The filing workers path LIKE filter naturally excludes transcripts
since their documents.path is the watch URL, not a filesystem path.
No filing worker changes needed.

Back-fills 2,260 drain items from Phase 5c-2 via one-time SQL.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 lib/processors/transcript_processor.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lib/processors/transcript_processor.py b/lib/processors/transcript_processor.py
index c5d8023..dbc3013 100644
--- a/lib/processors/transcript_processor.py
+++ b/lib/processors/transcript_processor.py
@@ -149,10 +149,16 @@ def pre_flight(content_path, meta_path, db, config):
     # Queue and advance to extracted
     db.queue_document(file_hash)
 
-    # Set text_dir and page_count on the documents row
+    # Set text_dir and page_count on the documents row.
+    # Transcripts are derived text from PeerTube videos, not primary sources.
+    # They don't get filed into library/Domain/Subdomain/ like PDFs -- instead,
+    # they're marked organized in-place. Their watch URL remains in catalogue.path
+    # and Qdrant download_url so users clicking search results go to PeerTube.
+    # The filing worker's path LIKE filter naturally excludes transcripts since
+    # their documents.path is the watch URL, not a filesystem path.
     conn = db._get_conn()
     conn.execute(
-        "UPDATE documents SET text_dir = ?, page_count = ? WHERE hash = ?",
+        "UPDATE documents SET text_dir = ?, page_count = ?, organized_at = CURRENT_TIMESTAMP WHERE hash = ?",
         (proc_dir, len(pages), file_hash)
     )
     conn.commit()