import json
import os
import requests as http_requests
from flask import Flask, request, jsonify, redirect
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
from .utils import get_config, content_hash, setup_logging
from .status import StatusDB
logger = setup_logging('recon.api')
app = Flask(__name__)
HTML_TEMPLATE = """
RECON
{{CONTENT}}
"""
def render(content):
return HTML_TEMPLATE.replace('{{CONTENT}}', content)
@app.route('/')
def dashboard():
db = StatusDB()
counts = db.get_status_counts()
cat = counts.get('catalogue', {})
doc = counts.get('documents', {})
total_cat = sum(cat.values())
total_doc = sum(doc.values())
complete = doc.get('complete', 0)
failed = doc.get('failed', 0)
stats = f"""
Catalogued PDFs
{total_cat}
Pipeline Status
| Status | Count |
"""
for status in ['queued', 'extracting', 'extracted', 'enriching', 'enriched', 'embedding', 'complete', 'failed']:
count = doc.get(status, 0)
stats += f'| {status} | {count} |
\n'
stats += "
"
sources = db.source_breakdown()
if sources:
stats += 'Sources
| Source | Count | Size |
'
for s in sources:
size_mb = (s.get('total_bytes', 0) or 0) / (1024 * 1024)
stats += f"| {s['source']} | {s['count']} | {size_mb:.1f} MB |
"
stats += "
"
return render(stats)
@app.route('/search')
def search_page():
query = request.args.get('q', '')
if not query:
content = """
Semantic Search
Enter a query to search across all embedded concepts.
"""
return render(content)
config = get_config()
limit = int(request.args.get('limit', 20))
source_filter = request.args.get('source_type', None)
try:
url = f"http://{config['embedding']['host']}:{config['embedding']['port']}/api/embed"
resp = http_requests.post(url, json={
"model": config['embedding']['model'],
"input": query
}, timeout=120)
resp.raise_for_status()
query_vector = resp.json()['embeddings'][0]
qdrant = QdrantClient(
host=config['vector_db']['host'],
port=config['vector_db']['port'],
timeout=60
)
search_filter = None
if source_filter:
search_filter = Filter(must=[
FieldCondition(key="source_type", match=MatchValue(value=source_filter))
])
results = qdrant.query_points(
collection_name=config['vector_db']['collection'],
query=query_vector,
limit=limit,
query_filter=search_filter
).points
content = f"""
Results for: {query}
{len(results)} results
"""
for r in results:
p = r.payload
title = p.get('title', 'Untitled')
summary = p.get('summary', p.get('content', '')[:200])
score = r.score
domains = p.get('domain', [])
book = p.get('book_title', p.get('filename', ''))
source_type = p.get('source_type', 'document')
domain_tags = ''.join(f'{d}' for d in (domains if isinstance(domains, list) else []))
content += f"""
{score:.4f}
{title}
{book} | {source_type} | {p.get('skill_level', 'unknown')}
{summary}
{domain_tags}
"""
return render(content)
except Exception as e:
return render(f'Search error: {e}
')
@app.route('/catalogue')
def catalogue_page():
db = StatusDB()
source = request.args.get('source', None)
category = request.args.get('category', None)
limit = int(request.args.get('limit', 100))
docs = db.get_all_documents(source=source, category=category, limit=limit)
content = 'Document Catalogue
'
sources = db.get_sources()
if sources:
content += ''
content += '
All'
for s in sources:
content += f'
{s}'
content += '
'
content += """
| Filename | Source | Status | Pages | Concepts | Vectors |
"""
for d in docs:
status = d.get('status', 'unknown')
content += f"""
| {d.get('filename', '?')} |
{d.get('source', '')} |
{status} |
{d.get('pages_extracted', 0)} |
{d.get('concepts_extracted', 0)} |
{d.get('vectors_inserted', 0)} |
"""
content += "
"
return render(content)
@app.route('/failures')
def failures_page():
db = StatusDB()
failures = db.get_failures()
content = 'Failed Documents
'
if not failures:
content += 'No failures.
'
return render(content)
content += '| Filename | Error | Retries | Actions |
'
for f in failures:
content += f"""
| {f.get('filename', '?')} |
{f.get('error_message', 'unknown')[:100]} |
{f.get('retry_count', 0)} |
|
"""
content += "
"
return render(content)
@app.route('/api/search', methods=['POST'])
def api_search():
config = get_config()
data = request.get_json()
if not data or 'query' not in data:
return jsonify({'error': 'Missing query'}), 400
query = data['query']
limit = data.get('limit', 20)
source_type = data.get('source_type', None)
try:
url = f"http://{config['embedding']['host']}:{config['embedding']['port']}/api/embed"
resp = http_requests.post(url, json={
"model": config['embedding']['model'],
"input": query
}, timeout=120)
resp.raise_for_status()
query_vector = resp.json()['embeddings'][0]
qdrant = QdrantClient(
host=config['vector_db']['host'],
port=config['vector_db']['port'],
timeout=60
)
search_filter = None
if source_type:
search_filter = Filter(must=[
FieldCondition(key="source_type", match=MatchValue(value=source_type))
])
results = qdrant.query_points(
collection_name=config['vector_db']['collection'],
query=query_vector,
limit=limit,
query_filter=search_filter
).points
return jsonify({
'query': query,
'results': [
{
'score': r.score,
'payload': r.payload
}
for r in results
]
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/status')
def api_status():
db = StatusDB()
return jsonify(db.get_status_counts())
@app.route('/api/retry/', methods=['POST'])
def api_retry(file_hash):
db = StatusDB()
db.increment_retry(file_hash)
return redirect('/failures')
@app.route('/api/ingest', methods=['POST'])
def api_ingest():
from .ingester import ingest_intel
data = request.get_json()
if not data:
return jsonify({'error': 'No JSON body'}), 400
config = get_config()
result = ingest_intel(data, config)
if result is not None:
return jsonify({'intel_id': result})
return jsonify({'error': 'Ingestion failed'}), 500
def run_server():
config = get_config()
host = config['web']['host']
port = config['web']['port']
logger.info(f"Starting RECON web dashboard on {host}:{port}")
app.run(host=host, port=port, debug=False)