import json import os import requests as http_requests from flask import Flask, request, jsonify, redirect from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, MatchValue from .utils import get_config, content_hash, setup_logging from .status import StatusDB logger = setup_logging('recon.api') app = Flask(__name__) HTML_TEMPLATE = """ RECON

RECON

Knowledge Base Management System
{{CONTENT}}
""" def render(content): return HTML_TEMPLATE.replace('{{CONTENT}}', content) @app.route('/') def dashboard(): db = StatusDB() counts = db.get_status_counts() cat = counts.get('catalogue', {}) doc = counts.get('documents', {}) total_cat = sum(cat.values()) total_doc = sum(doc.values()) complete = doc.get('complete', 0) failed = doc.get('failed', 0) stats = f"""
Catalogued PDFs
{total_cat}
In Pipeline
{total_doc}
Complete
{complete}
Failed
{failed}

Pipeline Status

""" for status in ['queued', 'extracting', 'extracted', 'enriching', 'enriched', 'embedding', 'complete', 'failed']: count = doc.get(status, 0) stats += f'\n' stats += "
StatusCount
{status}{count}
" sources = db.source_breakdown() if sources: stats += '

Sources

' for s in sources: size_mb = (s.get('total_bytes', 0) or 0) / (1024 * 1024) stats += f"" stats += "
SourceCountSize
{s['source']}{s['count']}{size_mb:.1f} MB
" return render(stats) @app.route('/search') def search_page(): query = request.args.get('q', '') if not query: content = """

Semantic Search

Enter a query to search across all embedded concepts.

""" return render(content) config = get_config() limit = int(request.args.get('limit', 20)) source_filter = request.args.get('source_type', None) try: url = f"http://{config['embedding']['host']}:{config['embedding']['port']}/api/embed" resp = http_requests.post(url, json={ "model": config['embedding']['model'], "input": query }, timeout=120) resp.raise_for_status() query_vector = resp.json()['embeddings'][0] qdrant = QdrantClient( host=config['vector_db']['host'], port=config['vector_db']['port'], timeout=60 ) search_filter = None if source_filter: search_filter = Filter(must=[ FieldCondition(key="source_type", match=MatchValue(value=source_filter)) ]) results = qdrant.query_points( collection_name=config['vector_db']['collection'], query=query_vector, limit=limit, query_filter=search_filter ).points content = f"""

Results for: {query}

{len(results)} results

""" for r in results: p = r.payload title = p.get('title', 'Untitled') summary = p.get('summary', p.get('content', '')[:200]) score = r.score domains = p.get('domain', []) book = p.get('book_title', p.get('filename', '')) source_type = p.get('source_type', 'document') domain_tags = ''.join(f'{d}' for d in (domains if isinstance(domains, list) else [])) content += f"""
{score:.4f}
{title}
{book} | {source_type} | {p.get('skill_level', 'unknown')}
{summary}
{domain_tags}
""" return render(content) except Exception as e: return render(f'

Search error: {e}

') @app.route('/catalogue') def catalogue_page(): db = StatusDB() source = request.args.get('source', None) category = request.args.get('category', None) limit = int(request.args.get('limit', 100)) docs = db.get_all_documents(source=source, category=category, limit=limit) content = '

Document Catalogue

' sources = db.get_sources() if sources: content += '
' content += 'All' for s in sources: content += f'{s}' content += '
' content += """""" for d in docs: status = d.get('status', 'unknown') content += f"""""" content += "
FilenameSourceStatusPagesConceptsVectors
{d.get('filename', '?')} {d.get('source', '')} {status} {d.get('pages_extracted', 0)} {d.get('concepts_extracted', 0)} {d.get('vectors_inserted', 0)}
" return render(content) @app.route('/failures') def failures_page(): db = StatusDB() failures = db.get_failures() content = '

Failed Documents

' if not failures: content += '

No failures.

' return render(content) content += '' for f in failures: content += f"""""" content += "
FilenameErrorRetriesActions
{f.get('filename', '?')} {f.get('error_message', 'unknown')[:100]} {f.get('retry_count', 0)}
" return render(content) @app.route('/api/search', methods=['POST']) def api_search(): config = get_config() data = request.get_json() if not data or 'query' not in data: return jsonify({'error': 'Missing query'}), 400 query = data['query'] limit = data.get('limit', 20) source_type = data.get('source_type', None) try: url = f"http://{config['embedding']['host']}:{config['embedding']['port']}/api/embed" resp = http_requests.post(url, json={ "model": config['embedding']['model'], "input": query }, timeout=120) resp.raise_for_status() query_vector = resp.json()['embeddings'][0] qdrant = QdrantClient( host=config['vector_db']['host'], port=config['vector_db']['port'], timeout=60 ) search_filter = None if source_type: search_filter = Filter(must=[ FieldCondition(key="source_type", match=MatchValue(value=source_type)) ]) results = qdrant.query_points( collection_name=config['vector_db']['collection'], query=query_vector, limit=limit, query_filter=search_filter ).points return jsonify({ 'query': query, 'results': [ { 'score': r.score, 'payload': r.payload } for r in results ] }) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/api/status') def api_status(): db = StatusDB() return jsonify(db.get_status_counts()) @app.route('/api/retry/', methods=['POST']) def api_retry(file_hash): db = StatusDB() db.increment_retry(file_hash) return redirect('/failures') @app.route('/api/ingest', methods=['POST']) def api_ingest(): from .ingester import ingest_intel data = request.get_json() if not data: return jsonify({'error': 'No JSON body'}), 400 config = get_config() result = ingest_intel(data, config) if result is not None: return jsonify({'intel_id': result}) return jsonify({'error': 'Ingestion failed'}), 500 def run_server(): config = get_config() host = config['web']['host'] port = config['web']['port'] logger.info(f"Starting RECON web dashboard on {host}:{port}") app.run(host=host, port=port, debug=False)