Fix Kiwix status badges to reflect full pipeline state

Status was showing COMPLETE after ZIM extraction finished, even when
documents were still queued for enrichment/embedding. Now computes
effective_status by checking actual pipeline state per-source:

- DETECTED: ingest not enabled (gray)
- EXTRACTING: ZIM processor running (blue)
- PROCESSING: extracted but docs still in enricher/embedder queue (amber)
- COMPLETE: all docs fully enriched and embedded in Qdrant (green)

Also fixed _build_kiwix_sources pipeline query to filter by category
per-source instead of returning global kiwix stats for every source.

Progress column now shows "X / Y in Qdrant" when processing, or
"X / Y extracted" otherwise.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt 2026-04-17 15:22:44 +00:00
commit fed02186fa
3 changed files with 31 additions and 8 deletions

View file

@ -2198,19 +2198,20 @@ def _build_kiwix_sources():
for r in rows: for r in rows:
source = dict(r) source = dict(r)
zim_title = r['title'] or r['zim_filename']
total_articles += r['article_count'] or 0 total_articles += r['article_count'] or 0
total_processed += r['processed_count'] or 0 total_processed += r['processed_count'] or 0
# Get pipeline stats for this source's documents # Get pipeline stats for THIS source's documents (filtered by category)
pipeline = {} pipeline = {}
try: try:
pipe_rows = conn.execute(""" pipe_rows = conn.execute("""
SELECT d.status, COUNT(*) as cnt SELECT d.status, COUNT(*) as cnt
FROM documents d FROM documents d
JOIN catalogue c ON d.hash = c.hash JOIN catalogue c ON d.hash = c.hash
WHERE c.source = 'kiwix' WHERE c.source = 'kiwix' AND c.category = ?
GROUP BY d.status GROUP BY d.status
""").fetchall() """, (zim_title,)).fetchall()
for pr in pipe_rows: for pr in pipe_rows:
pipeline[pr['status']] = pr['cnt'] pipeline[pr['status']] = pr['cnt']
except Exception: except Exception:
@ -2219,6 +2220,19 @@ def _build_kiwix_sources():
in_pipe = sum(v for k, v in pipeline.items() if k not in ('complete', 'failed')) in_pipe = sum(v for k, v in pipeline.items() if k not in ('complete', 'failed'))
total_in_pipeline += in_pipe total_in_pipeline += in_pipe
source['pipeline'] = pipeline source['pipeline'] = pipeline
# Compute effective status reflecting full pipeline state
db_status = r['status']
if db_status == 'complete' and pipeline:
if in_pipe > 0:
source['effective_status'] = 'processing'
else:
source['effective_status'] = 'complete'
elif db_status == 'ingesting':
source['effective_status'] = 'extracting'
else:
source['effective_status'] = db_status # 'detected'
sources.append(source) sources.append(source)
# Check kiwix-serve health # Check kiwix-serve health

View file

@ -329,3 +329,5 @@ tr:hover { background: var(--bg-secondary); }
.badge-complete { background: #1a4a2e; color: #00ff41; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; } .badge-complete { background: #1a4a2e; color: #00ff41; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; }
.badge-ingesting { background: #1a3a5a; color: #0ea5e9; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; } .badge-ingesting { background: #1a3a5a; color: #0ea5e9; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; }
.badge-detected { background: #333; color: #888; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; } .badge-detected { background: #333; color: #888; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; }
.badge-processing { background: #4a3a1a; color: #f59e0b; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; }
.badge-extracting { background: #1a3a5a; color: #0ea5e9; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; }

View file

@ -20,9 +20,15 @@
var sources = data.sources || []; var sources = data.sources || [];
var html = ''; var html = '';
sources.forEach(function(s) { sources.forEach(function(s) {
var pctDone = s.article_count > 0 ? (s.processed_count / s.article_count * 100).toFixed(1) : 0; var es = s.effective_status || s.status;
var statusBadge = s.status === 'complete' ? '<span class="badge-complete">COMPLETE</span>' : var pipe = s.pipeline || {};
s.status === 'ingesting' ? '<span class="badge-ingesting">INGESTING</span>' : var pipeComplete = pipe.complete || 0;
var pipeTotal = 0;
for (var k in pipe) pipeTotal += pipe[k];
var pctDone = pipeTotal > 0 ? (pipeComplete / pipeTotal * 100).toFixed(1) : 0;
var statusBadge = es === 'complete' ? '<span class="badge-complete">COMPLETE</span>' :
es === 'processing' ? '<span class="badge-processing">PROCESSING</span>' :
es === 'extracting' ? '<span class="badge-extracting">EXTRACTING</span>' :
'<span class="badge-detected">DETECTED</span>'; '<span class="badge-detected">DETECTED</span>';
// Derive browse URL from zim_filename // Derive browse URL from zim_filename
var zimName = s.zim_filename.replace(/_(?:maxi|mini|nopic)_[\d-]+\.zim$/, ''); var zimName = s.zim_filename.replace(/_(?:maxi|mini|nopic)_[\d-]+\.zim$/, '');
@ -38,8 +44,9 @@
'<div class="text-small text-muted">' + s.zim_filename + '</div></td>' + '<div class="text-small text-muted">' + s.zim_filename + '</div></td>' +
'<td>' + (s.language || '\u2014') + '</td>' + '<td>' + (s.language || '\u2014') + '</td>' +
'<td>' + RECON.fmt(s.article_count) + '</td>' + '<td>' + RECON.fmt(s.article_count) + '</td>' +
'<td>' + RECON.fmt(s.processed_count) + ' / ' + RECON.fmt(s.article_count) + '<td>' + (es === 'processing' ?
' (' + pctDone + '%)</td>' + RECON.fmt(pipeComplete) + ' / ' + RECON.fmt(pipeTotal) + ' in Qdrant (' + pctDone + '%)' :
RECON.fmt(s.processed_count) + ' / ' + RECON.fmt(s.article_count) + ' extracted') + '</td>' +
'<td>' + statusBadge + '</td>' + '<td>' + statusBadge + '</td>' +
'<td>' + toggle + '</td>' + '<td>' + toggle + '</td>' +
'<td><a href="' + browseUrl + '" target="_blank">Browse</a></td>' + '<td><a href="' + browseUrl + '" target="_blank">Browse</a></td>' +