Project Files
src / retrieval / dedupe.ts
import { WHITESPACE_RE, normalizeWhitespace, stableHash } from "../utils/text";
export interface RetrievedItem {
text: string;
score: number;
citation?: string;
sourceName?: string;
confidence?: number;
}
const MAX_SEEN_CACHE = 2048;
const seenCache = new Map<string, RetrievedItem>();
export function dedupeResults(results: RetrievedItem[]): RetrievedItem[] {
const kept: RetrievedItem[] = [];
for (const result of results) {
const normalized = normalizeWhitespace(result.text).toLowerCase().slice(0, 700);
const key = stableHash(normalized);
const existing = seenCache.get(key);
if (existing && existing.score >= result.score) continue;
seenCache.set(key, result);
kept.push(result);
}
if (seenCache.size > MAX_SEEN_CACHE) {
const toRemove = seenCache.size - MAX_SEEN_CACHE;
let removed = 0;
for (const key of seenCache.keys()) {
if (removed++ >= toRemove) break;
seenCache.delete(key);
}
}
return kept;
}