Kiwix integration: ZIM processor, dashboard tab, wiki.echo6.co citations

- ZIM processor: extract articles from ZIM files, feed into existing enrichment pipeline
- Dashboard: Kiwix tab with library table, ingest toggle, upload, remove
- kiwix-serve on port 8430, wiki.echo6.co behind Authentik
- Citation URLs point to wiki.echo6.co/{zimname}/{article_path}
- Dashboard shows WIKI type badge for ZIM-sourced content
- Appropedia EN (19,445 articles) fully ingested as proof of concept
This commit is contained in:
Matt 2026-04-17 07:00:24 +00:00
commit 2635160887
7 changed files with 521 additions and 3 deletions

View file

@ -10,6 +10,7 @@ Dependencies: requests, qdrant-client
Config: embedding, vector_db, processing.embed_workers
"""
import json
import re
import os
import time
import traceback
@ -290,7 +291,17 @@ def embed_single(file_hash, db, config):
page_timestamps = meta['page_timestamps']
except Exception:
pass
if doc.get('path'):
# For ZIM articles, build wiki.echo6.co URL from meta.json
if source_type == 'zim' and meta.get('article_path'):
from urllib.parse import quote as url_quote
zim_name = meta.get('zim_name', '')
if not zim_name:
# Derive from zim_file: strip flavor/date suffix
zf = meta.get('zim_file', '')
zim_name = re.sub(r'_(?:maxi|mini|nopic)_[\d-]+\.zim$', '', zf)
article_path = url_quote(meta['article_path'], safe='/:@!$&()*+,;=-._~')
download_url = f'https://wiki.echo6.co/{zim_name}/{article_path}'
elif doc.get('path'):
download_url = generate_download_url(
doc['path'], config.get('library_root', '/mnt/library')
)