mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 14:44:54 +02:00
Initial commit: RECON codebase baseline
Current state of the pipeline code as of 2026-04-14 (Phase 1 scaffolding complete). Config has new_pipeline.enabled=false and crawler.sites=[] per refactor plan. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
563c16bb71
59 changed files with 18327 additions and 0 deletions
53
templates/knowledge/catalogue.html
Normal file
53
templates/knowledge/catalogue.html
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<h3 class="section-title mb-16">Document Catalogue</h3>
|
||||
|
||||
{% if sources %}
|
||||
<div class="mb-16">
|
||||
<a href="/catalogue" class="btn{% if not current_source %} active{% endif %}" style="margin-right:4px;">All</a>
|
||||
{% for s in sources %}
|
||||
<a href="/catalogue?source={{ s }}" class="btn{% if current_source == s %} active{% endif %}" style="margin-right:4px;">{{ s }}</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="text-dim text-xs mb-16">
|
||||
Showing {{ docs|length }}{% if total_count %} of {{ total_count }}{% endif %} documents
|
||||
{% if current_source %} in <strong>{{ current_source }}</strong>{% endif %}
|
||||
(page {{ page }} of {{ total_pages }})
|
||||
</div>
|
||||
|
||||
<table>
|
||||
<tr><th>Filename</th><th>Source</th><th>Status</th><th>Pages</th><th>Concepts</th><th>Vectors</th></tr>
|
||||
{% for d in docs %}
|
||||
<tr>
|
||||
<td>{{ d.filename or '?' }}</td>
|
||||
<td>{{ d.source or '' }}</td>
|
||||
<td><span class="status status-{{ d.status or 'unknown' }}">{{ d.status or 'unknown' }}</span></td>
|
||||
<td>{{ d.pages_extracted or 0 }}</td>
|
||||
<td>{{ d.concepts_extracted or 0 }}</td>
|
||||
<td>{{ d.vectors_inserted or 0 }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
|
||||
{% if total_pages > 1 %}
|
||||
<div class="pagination">
|
||||
{% if page > 1 %}
|
||||
<a href="/catalogue?page={{ page - 1 }}{% if current_source %}&source={{ current_source }}{% endif %}&per_page={{ per_page }}">«</a>
|
||||
{% endif %}
|
||||
{% for p in range(1, total_pages + 1) %}
|
||||
{% if p == page %}
|
||||
<span class="current">{{ p }}</span>
|
||||
{% elif p <= 3 or p > total_pages - 3 or (p >= page - 2 and p <= page + 2) %}
|
||||
<a href="/catalogue?page={{ p }}{% if current_source %}&source={{ current_source }}{% endif %}&per_page={{ per_page }}">{{ p }}</a>
|
||||
{% elif p == 4 or p == total_pages - 3 %}
|
||||
<span class="text-dim">...</span>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% if page < total_pages %}
|
||||
<a href="/catalogue?page={{ page + 1 }}{% if current_source %}&source={{ current_source }}{% endif %}&per_page={{ per_page }}">»</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
72
templates/knowledge/dashboard.html
Normal file
72
templates/knowledge/dashboard.html
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<div id="kb-dashboard">
|
||||
<div class="stat-grid">
|
||||
<div class="stat-card"><div class="label">Catalogued</div><div class="value" id="kv-catalogued">—</div><div class="sublabel">total known documents</div></div>
|
||||
<div class="stat-card"><div class="label">In Pipeline</div><div class="value" id="kv-pipeline">—</div><div class="sublabel" id="kv-pipeline-sub">processing</div></div>
|
||||
<div class="stat-card"><div class="label">Complete</div><div class="value" id="kv-complete">—</div><div class="sublabel">in Qdrant</div></div>
|
||||
<div class="stat-card"><div class="label">Failed</div><div class="value" id="kv-failed">—</div><div class="sublabel"> </div></div>
|
||||
</div>
|
||||
|
||||
<div class="mb-24">
|
||||
<div class="flex-between mb-16" style="margin-bottom:4px;font-size:11px;color:#888;">
|
||||
<span id="progress-label">Pipeline Progress</span>
|
||||
<span id="progress-pct"></span>
|
||||
</div>
|
||||
<div id="progress-bar" class="pipeline-bar"></div>
|
||||
<div id="progress-legend" class="pipeline-legend"></div>
|
||||
</div>
|
||||
|
||||
<div class="stat-grid grid-3">
|
||||
<div class="stat-card"><div class="label">Concepts</div><div class="value" id="kv-concepts">—</div><div class="sublabel">extracted</div></div>
|
||||
<div class="stat-card"><div class="label">Vectors</div><div class="value" id="kv-vectors">—</div><div class="sublabel">in Qdrant</div></div>
|
||||
<div class="stat-card"><div class="label">Pages</div><div class="value" id="kv-pages">—</div><div class="sublabel">processed</div></div>
|
||||
</div>
|
||||
|
||||
<div id="pipeline-activity" class="panel" style="display:none;">
|
||||
<h3 style="color:#ffa500;font-size:13px;margin-bottom:8px;">Pipeline Activity</h3>
|
||||
<div id="activity-content" style="font-size:12px;color:#ccc;"></div>
|
||||
</div>
|
||||
|
||||
<div id="qdrant-health" class="panel" style="padding:10px 16px;font-size:12px;color:#888;">
|
||||
Qdrant: <span id="qdrant-status">checking...</span>
|
||||
</div>
|
||||
|
||||
<div id="kb-chart-container" class="panel" style="display:none;">
|
||||
<h3 class="section-title" style="margin-bottom:8px;">Pipeline Activity (24h)</h3>
|
||||
<canvas id="kb-chart" width="800" height="200" style="width:100%;height:200px;"></canvas>
|
||||
</div>
|
||||
|
||||
<h3 class="section-title" id="sources-toggle" style="cursor:pointer;user-select:none;"><span id="sources-arrow">▶</span> Sources</h3>
|
||||
<table>
|
||||
<thead id="sources-thead" style="display:none;"><tr><th>Source</th><th>Type</th><th>Catalogued</th><th>Complete</th><th>In Pipeline</th><th>Progress</th><th>Concepts</th><th>Vectors</th></tr></thead>
|
||||
<tbody id="sources-tbody" style="display:none;"><tr><td colspan="8" class="text-dim">Loading...</td></tr></tbody>
|
||||
<tfoot id="sources-tfoot"></tfoot>
|
||||
</table>
|
||||
|
||||
<div class="grid-2 mt-24">
|
||||
<div>
|
||||
<h3 class="section-title">Domain Distribution</h3>
|
||||
<div id="domain-bars" class="text-small">Loading...</div>
|
||||
</div>
|
||||
<div>
|
||||
<h3 class="section-title">Knowledge Type</h3>
|
||||
<div id="knowledge-type-bars" class="text-small">Loading...</div>
|
||||
<div id="knowledge-type-migration" class="text-small" style="margin-top:6px;color:#666;font-size:11px;"></div>
|
||||
<h3 class="section-title" style="margin-top:16px;">Complexity</h3>
|
||||
<div id="complexity-bars" class="text-small">Loading...</div>
|
||||
<div id="complexity-migration" class="text-small" style="margin-top:6px;color:#666;font-size:11px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3 class="section-title mt-24">Recently Completed</h3>
|
||||
<table>
|
||||
<thead><tr><th>Title</th><th>Type</th><th>Concepts</th><th>Vectors</th></tr></thead>
|
||||
<tbody id="recent-tbody"><tr><td colspan="4" class="text-dim">Loading...</td></tr></tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% endblock %}
|
||||
{% block scripts %}
|
||||
<script src="/static/js/charts.js"></script>
|
||||
<script src="/static/js/dashboard.js"></script>
|
||||
{% endblock %}
|
||||
56
templates/knowledge/failures.html
Normal file
56
templates/knowledge/failures.html
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<h3 style="color:#ff4444;margin-bottom:16px;">Failed Documents</h3>
|
||||
{% if not failures %}
|
||||
<p class="text-dim">No failures.</p>
|
||||
{% else %}
|
||||
<div style="margin-bottom:16px;">
|
||||
<button class="btn" id="retry-all-btn" onclick="retryAll()">Retry All ({{ failures|length }})</button>
|
||||
<span id="retry-all-status" style="margin-left:12px;font-size:12px;"></span>
|
||||
</div>
|
||||
<table>
|
||||
<tr><th>Filename</th><th>Error</th><th>Age</th><th>Retries</th><th>Actions</th></tr>
|
||||
{% for f in failures %}
|
||||
<tr>
|
||||
<td>{{ f.filename or '?' }}</td>
|
||||
<td style="color:#ff4444;font-size:11px;">{{ (f.error_message or 'unknown')[:100] }}</td>
|
||||
<td class="text-dim text-xs">{{ f.discovered_at or '' }}</td>
|
||||
<td>{{ f.retry_count or 0 }}</td>
|
||||
<td>
|
||||
<form method="post" action="/api/retry/{{ f.hash }}" style="display:inline;">
|
||||
<button class="btn" type="submit">Retry</button>
|
||||
</form>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
{% block scripts %}
|
||||
<script>
|
||||
async function retryAll() {
|
||||
var btn = document.getElementById('retry-all-btn');
|
||||
var status = document.getElementById('retry-all-status');
|
||||
if (!confirm('Retry all {{ failures|length }} failed documents?')) return;
|
||||
btn.disabled = true;
|
||||
status.style.color = '#ffa500';
|
||||
status.textContent = 'Retrying...';
|
||||
try {
|
||||
var resp = await fetch('/api/retry-all', {method: 'POST'});
|
||||
var data = await resp.json();
|
||||
if (resp.ok) {
|
||||
status.style.color = '#00ff41';
|
||||
status.textContent = 'Retried ' + data.count + ' documents';
|
||||
setTimeout(function() { location.reload(); }, 2000);
|
||||
} else {
|
||||
status.style.color = '#ff4444';
|
||||
status.textContent = data.error || 'Failed';
|
||||
}
|
||||
} catch(e) {
|
||||
status.style.color = '#ff4444';
|
||||
status.textContent = 'Error: ' + e.message;
|
||||
}
|
||||
btn.disabled = false;
|
||||
}
|
||||
</script>
|
||||
{% endblock %}
|
||||
83
templates/knowledge/upload.html
Normal file
83
templates/knowledge/upload.html
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<h3 class="section-title mb-16">Upload PDF</h3>
|
||||
<div class="panel">
|
||||
<form id="upload-form" enctype="multipart/form-data">
|
||||
<div class="mb-16">
|
||||
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">PDF File</label>
|
||||
<input type="file" name="file" accept=".pdf" id="upload-file"
|
||||
style="background:#0a0a0a;border:1px solid #333;color:#c0c0c0;padding:8px;width:100%;font-family:inherit;">
|
||||
</div>
|
||||
<div class="mb-16">
|
||||
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Category</label>
|
||||
<input type="text" name="category" id="upload-category" list="cat-list" class="search-box"
|
||||
placeholder="Select or type a category..." style="margin-bottom:0;">
|
||||
<datalist id="cat-list">{{ options_html|safe }}</datalist>
|
||||
</div>
|
||||
<button type="submit" class="btn" id="upload-btn">Upload</button>
|
||||
<span id="upload-status" style="margin-left:12px;font-size:12px;"></span>
|
||||
</form>
|
||||
</div>
|
||||
<div id="upload-result" style="display:none;" class="panel"></div>
|
||||
|
||||
<h3 class="section-title">Recent Documents</h3>
|
||||
<table>
|
||||
<tr><th>Filename</th><th>Source</th><th>Status</th></tr>
|
||||
{% for d in recent %}
|
||||
<tr>
|
||||
<td>{{ d.filename or '?' }}</td>
|
||||
<td>{{ d.source or '' }}</td>
|
||||
<td><span class="status status-{{ d.status or 'unknown' }}">{{ d.status or 'unknown' }}</span></td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
{% endblock %}
|
||||
{% block scripts %}
|
||||
<script>
|
||||
document.getElementById('upload-form').addEventListener('submit', async function(e) {
|
||||
e.preventDefault();
|
||||
var btn = document.getElementById('upload-btn');
|
||||
var status = document.getElementById('upload-status');
|
||||
var result = document.getElementById('upload-result');
|
||||
var fileInput = document.getElementById('upload-file');
|
||||
var category = document.getElementById('upload-category').value;
|
||||
|
||||
if (!fileInput.files.length) {
|
||||
status.style.color = '#ff4444';
|
||||
status.textContent = 'No file selected';
|
||||
return;
|
||||
}
|
||||
|
||||
btn.disabled = true;
|
||||
status.style.color = '#ffa500';
|
||||
status.textContent = 'Uploading...';
|
||||
result.style.display = 'none';
|
||||
|
||||
var formData = new FormData();
|
||||
formData.append('file', fileInput.files[0]);
|
||||
formData.append('category', category);
|
||||
|
||||
try {
|
||||
var resp = await fetch('/api/upload', { method: 'POST', body: formData });
|
||||
var data = await resp.json();
|
||||
if (resp.ok) {
|
||||
status.style.color = '#00ff41';
|
||||
status.textContent = 'Upload successful';
|
||||
result.style.display = 'block';
|
||||
result.innerHTML = '<span style="color:#00ff41;">Queued for processing</span><br>' +
|
||||
'<span class="text-dim">Hash: ' + data.hash + '</span><br>' +
|
||||
'<span class="text-dim">File: ' + data.filename + '</span><br>' +
|
||||
'<span class="text-dim">Category: ' + data.source + '/' + data.category + '</span>';
|
||||
fileInput.value = '';
|
||||
} else {
|
||||
status.style.color = '#ff4444';
|
||||
status.textContent = data.error || 'Upload failed';
|
||||
}
|
||||
} catch (err) {
|
||||
status.style.color = '#ff4444';
|
||||
status.textContent = 'Network error: ' + err.message;
|
||||
}
|
||||
btn.disabled = false;
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
76
templates/knowledge/web_ingest.html
Normal file
76
templates/knowledge/web_ingest.html
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<h3 class="section-title mb-16">Web Ingest</h3>
|
||||
<div style="margin-bottom:8px;">
|
||||
<a href="#single" class="btn active" onclick="showSection('single')" id="tab-single">Single/Batch URL</a>
|
||||
<a href="#crawl" class="btn" onclick="showSection('crawl')" id="tab-crawl">Site Crawl</a>
|
||||
</div>
|
||||
|
||||
<div id="section-single">
|
||||
<div class="panel">
|
||||
<div class="mb-16">
|
||||
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">URL(s) — one per line for batch</label>
|
||||
<textarea id="wi-urls" class="search-box" rows="4" placeholder="https://example.com/article" style="resize:vertical;margin-bottom:0;"></textarea>
|
||||
</div>
|
||||
<div class="mb-16">
|
||||
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Category</label>
|
||||
<input type="text" id="wi-category" list="wi-cat-list" class="search-box" value="Web"
|
||||
placeholder="Category..." style="margin-bottom:0;">
|
||||
<datalist id="wi-cat-list">{{ options_html|safe }}</datalist>
|
||||
</div>
|
||||
<button class="btn" id="wi-btn" onclick="doWebIngest()">Ingest</button>
|
||||
<span id="wi-status" style="margin-left:12px;font-size:12px;"></span>
|
||||
</div>
|
||||
<div id="wi-results" style="display:none;" class="panel" style="max-height:300px;overflow-y:auto;"></div>
|
||||
</div>
|
||||
|
||||
<div id="section-crawl" style="display:none;">
|
||||
<div class="panel">
|
||||
<div class="mb-16">
|
||||
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Site URL</label>
|
||||
<input type="text" id="crawl-url" class="search-box" placeholder="https://example.com" style="margin-bottom:0;">
|
||||
</div>
|
||||
<div class="grid-2 mb-16">
|
||||
<div>
|
||||
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Category</label>
|
||||
<input type="text" id="crawl-category" list="wi-cat-list" class="search-box" value="Web" style="margin-bottom:0;">
|
||||
</div>
|
||||
<div>
|
||||
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Max Pages</label>
|
||||
<input type="number" id="crawl-max-pages" class="search-box" value="500" min="1" max="5000" style="margin-bottom:0;">
|
||||
</div>
|
||||
</div>
|
||||
<div class="grid-2 mb-16">
|
||||
<div>
|
||||
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Include Paths (comma-separated)</label>
|
||||
<input type="text" id="crawl-include" class="search-box" placeholder="/docs/, /blog/" style="margin-bottom:0;">
|
||||
</div>
|
||||
<div>
|
||||
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Exclude Paths (comma-separated)</label>
|
||||
<input type="text" id="crawl-exclude" class="search-box" placeholder="/search, /login" style="margin-bottom:0;">
|
||||
</div>
|
||||
</div>
|
||||
<button class="btn" id="crawl-preview-btn" onclick="doCrawl(true)">Preview</button>
|
||||
<button class="btn" id="crawl-btn" onclick="doCrawl(false)" style="margin-left:8px;">Crawl & Ingest</button>
|
||||
<span id="crawl-status" style="margin-left:12px;font-size:12px;"></span>
|
||||
</div>
|
||||
<div id="crawl-results" style="display:none;" class="panel" style="max-height:400px;overflow-y:auto;font-size:12px;"></div>
|
||||
</div>
|
||||
|
||||
<h3 class="section-title mt-24">Recent Web Ingestions</h3>
|
||||
<table>
|
||||
<tr><th>Title</th><th>Source/Category</th><th>Status</th><th>Pages</th><th>Concepts</th></tr>
|
||||
{% for d in web_docs %}
|
||||
<tr>
|
||||
<td title="{{ d.path or '' }}" style="max-width:400px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;">{{ d.book_title or d.filename or '?' }}</td>
|
||||
<td>{{ d.source or '' }}/{{ d.category or '' }}</td>
|
||||
<td><span class="status status-{{ d.status or 'unknown' }}">{{ d.status or 'unknown' }}</span></td>
|
||||
<td>{{ d.pages_extracted or 0 }}</td>
|
||||
<td>{{ d.concepts_extracted or 0 }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
{% endblock %}
|
||||
{% block scripts %}
|
||||
<script src="/static/js/web-ingest.js"></script>
|
||||
{% endblock %}
|
||||
Loading…
Add table
Add a link
Reference in a new issue