Initial commit: RECON codebase baseline

Current state of the pipeline code as of 2026-04-14 (Phase 1 scaffolding complete).
Config has new_pipeline.enabled=false and crawler.sites=[] per refactor plan.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt 2026-04-14 14:57:23 +00:00
commit 563c16bb71
59 changed files with 18327 additions and 0 deletions

View file

@ -0,0 +1,53 @@
{% extends "base.html" %}
{% block content %}
<h3 class="section-title mb-16">Document Catalogue</h3>
{% if sources %}
<div class="mb-16">
<a href="/catalogue" class="btn{% if not current_source %} active{% endif %}" style="margin-right:4px;">All</a>
{% for s in sources %}
<a href="/catalogue?source={{ s }}" class="btn{% if current_source == s %} active{% endif %}" style="margin-right:4px;">{{ s }}</a>
{% endfor %}
</div>
{% endif %}
<div class="text-dim text-xs mb-16">
Showing {{ docs|length }}{% if total_count %} of {{ total_count }}{% endif %} documents
{% if current_source %} in <strong>{{ current_source }}</strong>{% endif %}
(page {{ page }} of {{ total_pages }})
</div>
<table>
<tr><th>Filename</th><th>Source</th><th>Status</th><th>Pages</th><th>Concepts</th><th>Vectors</th></tr>
{% for d in docs %}
<tr>
<td>{{ d.filename or '?' }}</td>
<td>{{ d.source or '' }}</td>
<td><span class="status status-{{ d.status or 'unknown' }}">{{ d.status or 'unknown' }}</span></td>
<td>{{ d.pages_extracted or 0 }}</td>
<td>{{ d.concepts_extracted or 0 }}</td>
<td>{{ d.vectors_inserted or 0 }}</td>
</tr>
{% endfor %}
</table>
{% if total_pages > 1 %}
<div class="pagination">
{% if page > 1 %}
<a href="/catalogue?page={{ page - 1 }}{% if current_source %}&source={{ current_source }}{% endif %}&per_page={{ per_page }}">&laquo;</a>
{% endif %}
{% for p in range(1, total_pages + 1) %}
{% if p == page %}
<span class="current">{{ p }}</span>
{% elif p <= 3 or p > total_pages - 3 or (p >= page - 2 and p <= page + 2) %}
<a href="/catalogue?page={{ p }}{% if current_source %}&source={{ current_source }}{% endif %}&per_page={{ per_page }}">{{ p }}</a>
{% elif p == 4 or p == total_pages - 3 %}
<span class="text-dim">...</span>
{% endif %}
{% endfor %}
{% if page < total_pages %}
<a href="/catalogue?page={{ page + 1 }}{% if current_source %}&source={{ current_source }}{% endif %}&per_page={{ per_page }}">&raquo;</a>
{% endif %}
</div>
{% endif %}
{% endblock %}

View file

@ -0,0 +1,72 @@
{% extends "base.html" %}
{% block content %}
<div id="kb-dashboard">
<div class="stat-grid">
<div class="stat-card"><div class="label">Catalogued</div><div class="value" id="kv-catalogued"></div><div class="sublabel">total known documents</div></div>
<div class="stat-card"><div class="label">In Pipeline</div><div class="value" id="kv-pipeline"></div><div class="sublabel" id="kv-pipeline-sub">processing</div></div>
<div class="stat-card"><div class="label">Complete</div><div class="value" id="kv-complete"></div><div class="sublabel">in Qdrant</div></div>
<div class="stat-card"><div class="label">Failed</div><div class="value" id="kv-failed"></div><div class="sublabel">&nbsp;</div></div>
</div>
<div class="mb-24">
<div class="flex-between mb-16" style="margin-bottom:4px;font-size:11px;color:#888;">
<span id="progress-label">Pipeline Progress</span>
<span id="progress-pct"></span>
</div>
<div id="progress-bar" class="pipeline-bar"></div>
<div id="progress-legend" class="pipeline-legend"></div>
</div>
<div class="stat-grid grid-3">
<div class="stat-card"><div class="label">Concepts</div><div class="value" id="kv-concepts"></div><div class="sublabel">extracted</div></div>
<div class="stat-card"><div class="label">Vectors</div><div class="value" id="kv-vectors"></div><div class="sublabel">in Qdrant</div></div>
<div class="stat-card"><div class="label">Pages</div><div class="value" id="kv-pages"></div><div class="sublabel">processed</div></div>
</div>
<div id="pipeline-activity" class="panel" style="display:none;">
<h3 style="color:#ffa500;font-size:13px;margin-bottom:8px;">Pipeline Activity</h3>
<div id="activity-content" style="font-size:12px;color:#ccc;"></div>
</div>
<div id="qdrant-health" class="panel" style="padding:10px 16px;font-size:12px;color:#888;">
Qdrant: <span id="qdrant-status">checking...</span>
</div>
<div id="kb-chart-container" class="panel" style="display:none;">
<h3 class="section-title" style="margin-bottom:8px;">Pipeline Activity (24h)</h3>
<canvas id="kb-chart" width="800" height="200" style="width:100%;height:200px;"></canvas>
</div>
<h3 class="section-title" id="sources-toggle" style="cursor:pointer;user-select:none;"><span id="sources-arrow">&#9654;</span> Sources</h3>
<table>
<thead id="sources-thead" style="display:none;"><tr><th>Source</th><th>Type</th><th>Catalogued</th><th>Complete</th><th>In Pipeline</th><th>Progress</th><th>Concepts</th><th>Vectors</th></tr></thead>
<tbody id="sources-tbody" style="display:none;"><tr><td colspan="8" class="text-dim">Loading...</td></tr></tbody>
<tfoot id="sources-tfoot"></tfoot>
</table>
<div class="grid-2 mt-24">
<div>
<h3 class="section-title">Domain Distribution</h3>
<div id="domain-bars" class="text-small">Loading...</div>
</div>
<div>
<h3 class="section-title">Knowledge Type</h3>
<div id="knowledge-type-bars" class="text-small">Loading...</div>
<div id="knowledge-type-migration" class="text-small" style="margin-top:6px;color:#666;font-size:11px;"></div>
<h3 class="section-title" style="margin-top:16px;">Complexity</h3>
<div id="complexity-bars" class="text-small">Loading...</div>
<div id="complexity-migration" class="text-small" style="margin-top:6px;color:#666;font-size:11px;"></div>
</div>
</div>
<h3 class="section-title mt-24">Recently Completed</h3>
<table>
<thead><tr><th>Title</th><th>Type</th><th>Concepts</th><th>Vectors</th></tr></thead>
<tbody id="recent-tbody"><tr><td colspan="4" class="text-dim">Loading...</td></tr></tbody>
</table>
</div>
{% endblock %}
{% block scripts %}
<script src="/static/js/charts.js"></script>
<script src="/static/js/dashboard.js"></script>
{% endblock %}

View file

@ -0,0 +1,56 @@
{% extends "base.html" %}
{% block content %}
<h3 style="color:#ff4444;margin-bottom:16px;">Failed Documents</h3>
{% if not failures %}
<p class="text-dim">No failures.</p>
{% else %}
<div style="margin-bottom:16px;">
<button class="btn" id="retry-all-btn" onclick="retryAll()">Retry All ({{ failures|length }})</button>
<span id="retry-all-status" style="margin-left:12px;font-size:12px;"></span>
</div>
<table>
<tr><th>Filename</th><th>Error</th><th>Age</th><th>Retries</th><th>Actions</th></tr>
{% for f in failures %}
<tr>
<td>{{ f.filename or '?' }}</td>
<td style="color:#ff4444;font-size:11px;">{{ (f.error_message or 'unknown')[:100] }}</td>
<td class="text-dim text-xs">{{ f.discovered_at or '' }}</td>
<td>{{ f.retry_count or 0 }}</td>
<td>
<form method="post" action="/api/retry/{{ f.hash }}" style="display:inline;">
<button class="btn" type="submit">Retry</button>
</form>
</td>
</tr>
{% endfor %}
</table>
{% endif %}
{% endblock %}
{% block scripts %}
<script>
async function retryAll() {
var btn = document.getElementById('retry-all-btn');
var status = document.getElementById('retry-all-status');
if (!confirm('Retry all {{ failures|length }} failed documents?')) return;
btn.disabled = true;
status.style.color = '#ffa500';
status.textContent = 'Retrying...';
try {
var resp = await fetch('/api/retry-all', {method: 'POST'});
var data = await resp.json();
if (resp.ok) {
status.style.color = '#00ff41';
status.textContent = 'Retried ' + data.count + ' documents';
setTimeout(function() { location.reload(); }, 2000);
} else {
status.style.color = '#ff4444';
status.textContent = data.error || 'Failed';
}
} catch(e) {
status.style.color = '#ff4444';
status.textContent = 'Error: ' + e.message;
}
btn.disabled = false;
}
</script>
{% endblock %}

View file

@ -0,0 +1,83 @@
{% extends "base.html" %}
{% block content %}
<h3 class="section-title mb-16">Upload PDF</h3>
<div class="panel">
<form id="upload-form" enctype="multipart/form-data">
<div class="mb-16">
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">PDF File</label>
<input type="file" name="file" accept=".pdf" id="upload-file"
style="background:#0a0a0a;border:1px solid #333;color:#c0c0c0;padding:8px;width:100%;font-family:inherit;">
</div>
<div class="mb-16">
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Category</label>
<input type="text" name="category" id="upload-category" list="cat-list" class="search-box"
placeholder="Select or type a category..." style="margin-bottom:0;">
<datalist id="cat-list">{{ options_html|safe }}</datalist>
</div>
<button type="submit" class="btn" id="upload-btn">Upload</button>
<span id="upload-status" style="margin-left:12px;font-size:12px;"></span>
</form>
</div>
<div id="upload-result" style="display:none;" class="panel"></div>
<h3 class="section-title">Recent Documents</h3>
<table>
<tr><th>Filename</th><th>Source</th><th>Status</th></tr>
{% for d in recent %}
<tr>
<td>{{ d.filename or '?' }}</td>
<td>{{ d.source or '' }}</td>
<td><span class="status status-{{ d.status or 'unknown' }}">{{ d.status or 'unknown' }}</span></td>
</tr>
{% endfor %}
</table>
{% endblock %}
{% block scripts %}
<script>
document.getElementById('upload-form').addEventListener('submit', async function(e) {
e.preventDefault();
var btn = document.getElementById('upload-btn');
var status = document.getElementById('upload-status');
var result = document.getElementById('upload-result');
var fileInput = document.getElementById('upload-file');
var category = document.getElementById('upload-category').value;
if (!fileInput.files.length) {
status.style.color = '#ff4444';
status.textContent = 'No file selected';
return;
}
btn.disabled = true;
status.style.color = '#ffa500';
status.textContent = 'Uploading...';
result.style.display = 'none';
var formData = new FormData();
formData.append('file', fileInput.files[0]);
formData.append('category', category);
try {
var resp = await fetch('/api/upload', { method: 'POST', body: formData });
var data = await resp.json();
if (resp.ok) {
status.style.color = '#00ff41';
status.textContent = 'Upload successful';
result.style.display = 'block';
result.innerHTML = '<span style="color:#00ff41;">Queued for processing</span><br>' +
'<span class="text-dim">Hash: ' + data.hash + '</span><br>' +
'<span class="text-dim">File: ' + data.filename + '</span><br>' +
'<span class="text-dim">Category: ' + data.source + '/' + data.category + '</span>';
fileInput.value = '';
} else {
status.style.color = '#ff4444';
status.textContent = data.error || 'Upload failed';
}
} catch (err) {
status.style.color = '#ff4444';
status.textContent = 'Network error: ' + err.message;
}
btn.disabled = false;
});
</script>
{% endblock %}

View file

@ -0,0 +1,76 @@
{% extends "base.html" %}
{% block content %}
<h3 class="section-title mb-16">Web Ingest</h3>
<div style="margin-bottom:8px;">
<a href="#single" class="btn active" onclick="showSection('single')" id="tab-single">Single/Batch URL</a>
<a href="#crawl" class="btn" onclick="showSection('crawl')" id="tab-crawl">Site Crawl</a>
</div>
<div id="section-single">
<div class="panel">
<div class="mb-16">
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">URL(s) — one per line for batch</label>
<textarea id="wi-urls" class="search-box" rows="4" placeholder="https://example.com/article" style="resize:vertical;margin-bottom:0;"></textarea>
</div>
<div class="mb-16">
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Category</label>
<input type="text" id="wi-category" list="wi-cat-list" class="search-box" value="Web"
placeholder="Category..." style="margin-bottom:0;">
<datalist id="wi-cat-list">{{ options_html|safe }}</datalist>
</div>
<button class="btn" id="wi-btn" onclick="doWebIngest()">Ingest</button>
<span id="wi-status" style="margin-left:12px;font-size:12px;"></span>
</div>
<div id="wi-results" style="display:none;" class="panel" style="max-height:300px;overflow-y:auto;"></div>
</div>
<div id="section-crawl" style="display:none;">
<div class="panel">
<div class="mb-16">
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Site URL</label>
<input type="text" id="crawl-url" class="search-box" placeholder="https://example.com" style="margin-bottom:0;">
</div>
<div class="grid-2 mb-16">
<div>
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Category</label>
<input type="text" id="crawl-category" list="wi-cat-list" class="search-box" value="Web" style="margin-bottom:0;">
</div>
<div>
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Max Pages</label>
<input type="number" id="crawl-max-pages" class="search-box" value="500" min="1" max="5000" style="margin-bottom:0;">
</div>
</div>
<div class="grid-2 mb-16">
<div>
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Include Paths (comma-separated)</label>
<input type="text" id="crawl-include" class="search-box" placeholder="/docs/, /blog/" style="margin-bottom:0;">
</div>
<div>
<label class="text-dim text-xs" style="text-transform:uppercase;display:block;margin-bottom:4px;">Exclude Paths (comma-separated)</label>
<input type="text" id="crawl-exclude" class="search-box" placeholder="/search, /login" style="margin-bottom:0;">
</div>
</div>
<button class="btn" id="crawl-preview-btn" onclick="doCrawl(true)">Preview</button>
<button class="btn" id="crawl-btn" onclick="doCrawl(false)" style="margin-left:8px;">Crawl &amp; Ingest</button>
<span id="crawl-status" style="margin-left:12px;font-size:12px;"></span>
</div>
<div id="crawl-results" style="display:none;" class="panel" style="max-height:400px;overflow-y:auto;font-size:12px;"></div>
</div>
<h3 class="section-title mt-24">Recent Web Ingestions</h3>
<table>
<tr><th>Title</th><th>Source/Category</th><th>Status</th><th>Pages</th><th>Concepts</th></tr>
{% for d in web_docs %}
<tr>
<td title="{{ d.path or '' }}" style="max-width:400px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;">{{ d.book_title or d.filename or '?' }}</td>
<td>{{ d.source or '' }}/{{ d.category or '' }}</td>
<td><span class="status status-{{ d.status or 'unknown' }}">{{ d.status or 'unknown' }}</span></td>
<td>{{ d.pages_extracted or 0 }}</td>
<td>{{ d.concepts_extracted or 0 }}</td>
</tr>
{% endfor %}
</table>
{% endblock %}
{% block scripts %}
<script src="/static/js/web-ingest.js"></script>
{% endblock %}