mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
Kiwix integration: ZIM processor, dashboard tab, wiki.echo6.co citations
- ZIM processor: extract articles from ZIM files, feed into existing enrichment pipeline
- Dashboard: Kiwix tab with library table, ingest toggle, upload, remove
- kiwix-serve on port 8430, wiki.echo6.co behind Authentik
- Citation URLs point to wiki.echo6.co/{zimname}/{article_path}
- Dashboard shows WIKI type badge for ZIM-sourced content
- Appropedia EN (19,445 articles) fully ingested as proof of concept
This commit is contained in:
parent
c60aa5e80d
commit
2635160887
7 changed files with 521 additions and 3 deletions
|
|
@ -10,6 +10,7 @@ Dependencies: requests, qdrant-client
|
|||
Config: embedding, vector_db, processing.embed_workers
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
|
|
@ -290,7 +291,17 @@ def embed_single(file_hash, db, config):
|
|||
page_timestamps = meta['page_timestamps']
|
||||
except Exception:
|
||||
pass
|
||||
if doc.get('path'):
|
||||
# For ZIM articles, build wiki.echo6.co URL from meta.json
|
||||
if source_type == 'zim' and meta.get('article_path'):
|
||||
from urllib.parse import quote as url_quote
|
||||
zim_name = meta.get('zim_name', '')
|
||||
if not zim_name:
|
||||
# Derive from zim_file: strip flavor/date suffix
|
||||
zf = meta.get('zim_file', '')
|
||||
zim_name = re.sub(r'_(?:maxi|mini|nopic)_[\d-]+\.zim$', '', zf)
|
||||
article_path = url_quote(meta['article_path'], safe='/:@!$&()*+,;=-._~')
|
||||
download_url = f'https://wiki.echo6.co/{zim_name}/{article_path}'
|
||||
elif doc.get('path'):
|
||||
download_url = generate_download_url(
|
||||
doc['path'], config.get('library_root', '/mnt/library')
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue