scorer.js
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.scoreSource = scoreSource;
const SENSATIONALISM = [
"shocking", "you won't believe", "incredible", "unbelievable", "mind-blowing",
"bombshell", "explosive", "stunning", "outrageous", "jaw-dropping",
"breaking:", "urgent:", "alert:", "exposed", "revealed", "secret",
"they don't want you to know", "doctors hate", "trick",
];
const EVIDENCE_MARKERS = [
"according to", "study found", "reported by", "research shows",
"data shows", "statistics show", "survey found", "analysis found",
"cited in", "published in", "peer-reviewed", "researchers found",
];
const AUTHORITY_TLDS = [".edu", ".gov", ".ac.", ".mil"];
const MODERATE_TLDS = [".org", ".int"];
const TIME_SENSITIVE_DOMAINS = [
"techcrunch", "wired", "arstechnica", "theverge", "zdnet", "engadget",
"webmd", "mayoclinic", "nih.gov", "cdc.gov", "wsj", "bloomberg",
"reuters", "ft.com", "economist", "forbes",
];
const CLICKBAIT_URL_PATTERNS = [
"/top-10/", "/top-5/", "/shocking/", "/you-wont-believe/", "/viral/",
];
function extractDomain(url) {
try {
return new URL(url).hostname.replace(/^www\./, "");
}
catch {
return url;
}
}
function scoreCrossSourceCorroboration(url, otherSourceUrls) {
if (otherSourceUrls.length === 0)
return 0;
const currentDomain = extractDomain(url);
const uniqueOtherDomains = new Set(otherSourceUrls.map(extractDomain).filter(d => d !== currentDomain));
if (uniqueOtherDomains.size === 0)
return 0;
if (uniqueOtherDomains.size === 1)
return 50;
return 100;
}
function scoreLinguisticObjectivity(bodyText) {
const lower = bodyText.toLowerCase();
const sensationalCount = SENSATIONALISM.filter(m => lower.includes(m)).length;
const evidenceCount = EVIDENCE_MARKERS.filter(m => lower.includes(m)).length;
const numberCount = (bodyText.match(/\b\d+(?:\.\d+)?(?:\s*%|\s*million|\s*billion)?\b/g) ?? []).length;
const evidenceBonus = Math.min(evidenceCount * 7 + Math.min(numberCount, 5) * 3, 40);
const sensationalPenalty = Math.min(sensationalCount * 15, 60);
return Math.max(0, Math.min(100, 60 - sensationalPenalty + evidenceBonus));
}
function scoreContentDepth(bodyText) {
const wordCount = bodyText.split(/\s+/).filter(Boolean).length;
let score = 0;
if (wordCount >= 800)
score += 60;
else if (wordCount >= 400)
score += 40;
else if (wordCount >= 200)
score += 20;
const lower = bodyText.toLowerCase();
const citationMarkers = ["according to", "cited in", "published in", "source:", "[1]", "[2]", "ibid", "footnote"];
score += Math.min(citationMarkers.filter(m => lower.includes(m)).length * 10, 40);
return Math.min(score, 100);
}
function scoreAuthorityAccountability(url, bodyText) {
let score = 30;
const domain = extractDomain(url);
if (AUTHORITY_TLDS.some(t => domain.includes(t)))
score += 35;
else if (MODERATE_TLDS.some(t => domain.endsWith(t)))
score += 10;
const opening = bodyText.toLowerCase().slice(0, 500);
if (["by ", "author:", "written by", "reporter:", "published by"].some(p => opening.includes(p)))
score += 20;
if (/[A-Z][a-z]+\s+[A-Z][a-z]+/.test(bodyText.slice(0, 200)))
score += 10;
return Math.min(score, 100);
}
function scoreStructuralClarity(url, hasSchema, hasHeadings) {
let score = 30;
if (hasSchema)
score += 35;
if (hasHeadings)
score += 20;
if (!CLICKBAIT_URL_PATTERNS.some(p => url.includes(p)))
score += 15;
return Math.min(score, 100);
}
// Recency is conditional: only penalizes time-sensitive domains (tech/medical/finance/news).
// Evergreen domains (history, law, academic) return a neutral 70 regardless of age.
function scoreRecency(publishedDate, domain) {
if (!publishedDate)
return 70;
if (!TIME_SENSITIVE_DOMAINS.some(d => domain.includes(d)))
return 70;
const pub = new Date(publishedDate);
if (isNaN(pub.getTime()))
return 70;
const ageYears = (Date.now() - pub.getTime()) / (1000 * 60 * 60 * 24 * 365);
if (ageYears <= 1)
return 100;
if (ageYears <= 2)
return 80;
if (ageYears <= 3)
return 60;
return 20;
}
function getVerdict(score) {
if (score >= 75)
return "high";
if (score >= 50)
return "moderate";
if (score >= 25)
return "low";
return "very_low";
}
function scoreSource(url, bodyText, publishedDate, hasSchema, hasHeadings, otherSourceUrls = []) {
const domain = extractDomain(url);
const signals = [
{ name: "cross_source_corroboration", score: scoreCrossSourceCorroboration(url, otherSourceUrls), weight: 0.25 },
{ name: "linguistic_objectivity", score: scoreLinguisticObjectivity(bodyText), weight: 0.20 },
{ name: "authority_accountability", score: scoreAuthorityAccountability(url, bodyText), weight: 0.20 },
{ name: "content_depth", score: scoreContentDepth(bodyText), weight: 0.15 },
{ name: "structural_clarity", score: scoreStructuralClarity(url, hasSchema, hasHeadings), weight: 0.10 },
{ name: "recency", score: scoreRecency(publishedDate, domain), weight: 0.10 },
];
const score = Math.round(signals.reduce((sum, s) => sum + s.score * s.weight, 0));
return { score, verdict: getVerdict(score), signals, caveat: "Score measures reliability proxies, not factual correctness." };
}