mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 14:44:54 +02:00
Replace mega-channel size rule with explicit skip list
The >500-video threshold was too aggressive — it skipped tiebreaking for legitimate large channels (1a-auto, forgotten-weapons, etc.) where channel context correctly resolves ties. Replace with an explicit MEGA_CHANNEL_SKIP_LIST in recon_domains.py. Only known non-topical catch-alls (currently just "Transcript") skip the tiebreaker. Removed _channel_video_count() helper and MEGA_CHANNEL_THRESHOLD constant (no longer used). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d8196e60c7
commit
299be21f42
3 changed files with 26 additions and 26 deletions
|
|
@ -32,3 +32,15 @@ DOMAIN_CATEGORY_MAP = {
|
|||
VALID_DOMAINS = set(DOMAIN_CATEGORY_MAP.keys())
|
||||
|
||||
CATEGORY_DOMAIN_MAP = {v: k for k, v in DOMAIN_CATEGORY_MAP.items()}
|
||||
|
||||
# Channels whose tiebreaker is skipped because their content is non-topical
|
||||
# (catch-alls, miscellany dumps, etc.). Items in these channels with tied
|
||||
# domain counts go straight to tied_manual without channel-context tiebreaker.
|
||||
#
|
||||
# This is intentionally a hardcoded explicit list, not a size threshold.
|
||||
# Adding a channel here requires an explicit decision — only add channels
|
||||
# that are genuinely non-topical catch-alls where channel-wide concept
|
||||
# aggregation would produce meaningless noise.
|
||||
MEGA_CHANNEL_SKIP_LIST = {
|
||||
'Transcript', # Legacy catch-all, ~9,200 videos, no topical coherence
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue