diff --git a/lib/api.py b/lib/api.py index cbb3377..aa13a39 100644 --- a/lib/api.py +++ b/lib/api.py @@ -60,7 +60,10 @@ PEERTUBE_SUBNAV = [ ] -KIWIX_SUBNAV = [] # Single-page, no subnav needed +KIWIX_SUBNAV = [ + {'href': '/kiwix', 'label': 'Library'}, + {'href': '/kiwix/scraper', 'label': 'Scraper'}, +] SETTINGS_SUBNAV = [ {'href': '/settings/keys', 'label': 'API Keys'}, {'href': '/settings/cookies', 'label': 'YouTube Cookies'}, @@ -1956,6 +1959,12 @@ def kiwix_dashboard(): domain='kiwix', subnav=KIWIX_SUBNAV, active_page='/kiwix') +@app.route('/kiwix/scraper') +def kiwix_scraper(): + return render_template('kiwix/scraper.html', + domain='kiwix', subnav=KIWIX_SUBNAV, active_page='/kiwix/scraper') + + @app.route('/api/kiwix/sources') def api_kiwix_sources(): """Serve pre-cached Kiwix sources data (never blocks).""" diff --git a/static/css/recon.css b/static/css/recon.css index 31d6306..a272876 100644 --- a/static/css/recon.css +++ b/static/css/recon.css @@ -331,3 +331,4 @@ tr:hover { background: var(--bg-secondary); } .badge-detected { background: #333; color: #888; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; } .badge-processing { background: #4a3a1a; color: #f59e0b; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; } .badge-extracting { background: #1a3a5a; color: #0ea5e9; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; } +.badge-failed { background: #4a1a1a; color: #ff4444; padding: 2px 8px; border-radius: var(--radius); font-size: 11px; } diff --git a/static/js/scraper.js b/static/js/scraper.js new file mode 100644 index 0000000..6aa23d7 --- /dev/null +++ b/static/js/scraper.js @@ -0,0 +1,155 @@ +/* RECON Scraper Dashboard JS */ +(function() { + 'use strict'; + + function loadJobs() { + return RECON.fetchJSON('/api/scraper/jobs').then(function(data) { + var jobs = data.jobs || []; + + // Stats + var total = jobs.length; + var active = 0, complete = 0, failed = 0; + jobs.forEach(function(j) { + if (j.status === 'complete') complete++; + else if (j.status === 'failed') failed++; + else if (j.status === 'running' || j.status === 'pending') active++; + }); + RECON.set('sc-total', RECON.fmt(total)); + RECON.set('sc-active', RECON.fmt(active)); + RECON.set('sc-complete', RECON.fmt(complete)); + RECON.set('sc-failed', RECON.fmt(failed)); + + // Table + var html = ''; + jobs.forEach(function(j) { + var badge = statusBadge(j.status); + var mode = j.crawl_mode ? + '' + j.crawl_mode + '' : '\u2014'; + var pages = j.page_count ? RECON.fmt(j.page_count) : '\u2014'; + var zim = j.zim_filename ? + '' + j.zim_filename + '' : '\u2014'; + var actions = ''; + + if (j.status === 'running' || j.status === 'pending') { + actions = ''; + } else if (j.status === 'failed' || j.status === 'cancelled') { + actions = ''; + } + + // Truncate URL for display + var displayUrl = j.url.length > 40 ? j.url.substring(0, 40) + '\u2026' : j.url; + + html += '' + + '' + j.id + '' + + '' + escHtml(displayUrl) + '' + + '' + escHtml(j.title || '\u2014') + '' + + '' + mode + '' + + '' + pages + '' + + '' + badge + errorTooltip(j) + '' + + '' + zim + '' + + '' + actions + '' + + ''; + }); + if (!html) html = 'No scrape jobs'; + RECON.setHTML('sc-table-body', html); + }).catch(function(err) { + console.error('Scraper dashboard error:', err); + }); + } + + function statusBadge(status) { + var map = { + 'pending': 'PENDING', + 'running': 'RUNNING', + 'complete': 'COMPLETE', + 'failed': 'FAILED', + 'cancelled': 'CANCELLED' + }; + return map[status] || '' + (status || 'UNKNOWN').toUpperCase() + ''; + } + + function errorTooltip(job) { + if (!job.error_message) return ''; + var short = job.error_message.length > 80 ? + job.error_message.substring(0, 80) + '\u2026' : job.error_message; + return '
' + escHtml(short) + '
'; + } + + function escHtml(str) { + if (!str) return ''; + return str.replace(/&/g, '&').replace(//g, '>') + .replace(/"/g, '"').replace(/'/g, '''); + } + + function submit(e) { + e.preventDefault(); + var url = document.getElementById('sf-url').value.trim(); + if (!url) return false; + + var body = { url: url }; + var title = document.getElementById('sf-title').value.trim(); + var lang = document.getElementById('sf-lang').value; + var category = document.getElementById('sf-category').value.trim(); + var mode = document.getElementById('sf-mode').value; + + if (title) body.title = title; + if (lang) body.language = lang; + if (category) body.category = category; + if (mode) body.crawl_mode = mode; + + var btn = document.getElementById('sf-submit-btn'); + var feedback = document.getElementById('sf-feedback'); + btn.disabled = true; + btn.textContent = 'Submitting...'; + + RECON.postJSON('/api/scraper/submit', body).then(function(data) { + btn.disabled = false; + btn.textContent = 'Submit'; + if (data.ok) { + feedback.style.display = 'block'; + feedback.style.color = '#00ff41'; + feedback.textContent = 'Job #' + data.job_id + ' submitted successfully'; + document.getElementById('sf-url').value = ''; + document.getElementById('sf-title').value = ''; + document.getElementById('sf-category').value = ''; + setTimeout(function() { feedback.style.display = 'none'; }, 4000); + loadJobs(); + } else { + feedback.style.display = 'block'; + feedback.style.color = '#ff4444'; + feedback.textContent = 'Error: ' + (data.error || 'Unknown error'); + } + }).catch(function(err) { + btn.disabled = false; + btn.textContent = 'Submit'; + feedback.style.display = 'block'; + feedback.style.color = '#ff4444'; + feedback.textContent = 'Network error: ' + err.message; + }); + + return false; + } + + function cancel(jobId) { + if (!confirm('Cancel job #' + jobId + '?')) return; + RECON.postJSON('/api/scraper/cancel/' + jobId).then(function(data) { + if (data.ok) loadJobs(); + else alert('Error: ' + (data.error || 'Unknown')); + }); + } + + function retry(jobId) { + RECON.postJSON('/api/scraper/retry/' + jobId).then(function(data) { + if (data.ok) loadJobs(); + else alert('Error: ' + (data.error || 'Unknown')); + }); + } + + // Expose for inline onclick + window.SCRAPER = { submit: submit, cancel: cancel, retry: retry }; + + document.addEventListener('DOMContentLoaded', function() { + RECON.startRefresh(loadJobs, 10000); + }); +})(); diff --git a/templates/kiwix/scraper.html b/templates/kiwix/scraper.html new file mode 100644 index 0000000..53d3e23 --- /dev/null +++ b/templates/kiwix/scraper.html @@ -0,0 +1,91 @@ +{% extends "base.html" %} +{% block content %} +
+ +
+

Submit Scrape Job

+
+
+
+ + +
+
+ + +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
+ +
+
+ + +
+
Total Jobs
+
Active
+
Complete
+
Failed
+
+ + +
+

Scrape Jobs

+ + + + + + + + + + + + + + + + +
IDURLTitleModePagesStatusZIM
Loading...
+
+
+{% endblock %} +{% block scripts %} + +{% endblock %}