"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.rerankChunks = rerankChunks;
const text_1 = require("../utils/text");
/**
 * Token cache to avoid redundant computations across chunks.
 */
const MAX_TOKEN_CACHE = 256;
const tokenCache = new Map();
function getCacheKey(input) {
    return (0, text_1.stableHash)(input.substring(0, 100));
}
function tokens(input) {
    const key = getCacheKey(input);
    const cached = tokenCache.get(key);
    if (cached)
        return cached;
    const value = (0, text_1.normalizeWhitespace)(input.toLowerCase())
        .split(/\W+/)
        .filter(token => token.length > 2);
    if (tokenCache.size >= MAX_TOKEN_CACHE) {
        const oldest = tokenCache.keys().next().value;
        if (oldest !== undefined)
            tokenCache.delete(oldest);
    }
    tokenCache.set(key, value);
    return value;
}
function tokenSet(input) {
    return new Set(tokens(input));
}
/**
 * STEP 2 IMPROVEMENT: N-Gram Overlap (Fuzzy Matching)
 */
function getNGrams(text, n = 3) {
    const grams = new Set();
    const cleanText = (0, text_1.normalizeWhitespace)(text.substring(0, 150).toLowerCase());
    if (cleanText.length < n) {
        grams.add(cleanText);
        return grams;
    }
    for (let i = 0; i <= cleanText.length - n; i++) {
        grams.add(cleanText.substring(i, i + n));
    }
    return grams;
}
function ngramOverlapScore(query, text) {
    const queryGrams = getNGrams(query);
    const textGrams = getNGrams(text);
    if (queryGrams.size === 0 || textGrams.size === 0)
        return 0;
    let matches = 0;
    for (const gram of queryGrams) {
        if (textGrams.has(gram))
            matches++;
    }
    return matches / queryGrams.size;
}
function lexicalOverlap(queryTokens, text) {
    if (queryTokens.size === 0)
        return 0;
    const textTokens = tokenSet(text);
    let matches = 0;
    for (const token of queryTokens) {
        if (textTokens.has(token))
            matches++;
    }
    return matches / queryTokens.size;
}
function sourceBoost(query, sourceName) {
    if (!sourceName)
        return 0;
    const qTokens = tokens(query);
    if (qTokens.length === 0)
        return 0;
    const src = (0, text_1.normalizeWhitespace)(sourceName.toLowerCase());
    if (!src)
        return 0;
    if (qTokens.some(token => src.includes(token)))
        return 0.06;
    return 0;
}
function confidenceFrom(score, overlap) {
    const base = Math.max(0, Math.min(1, score));
    return Math.max(0, Math.min(1, base * 0.75 + overlap * 0.25));
}
function rerankChunks(chunks, query) {
    if (chunks.length <= 2)
        return [...chunks];
    const queryTokens = tokenSet(query);
    return [...chunks]
        .map(chunk => {
        // Calculate both precise and fuzzy scores
        const lexScore = lexicalOverlap(queryTokens, chunk.text);
        const ngramScore = ngramOverlapScore(query, chunk.text);
        // Weighted score: 40% Exact Word, 60% Fuzzy N-Gram
        const semanticLiteScore = (lexScore * 0.4) + (ngramScore * 0.6);
        const boost = sourceBoost(query, chunk.sourceName);
        const confidence = confidenceFrom(chunk.score, semanticLiteScore);
        return {
            ...chunk,
            score: chunk.score + semanticLiteScore + boost + (confidence * 0.08),
            confidence,
        };
    })
        .sort((a, b) => {
        if (b.score !== a.score)
            return b.score - a.score;
        const confA = a.confidence ?? 0;
        const confB = b.confidence ?? 0;
        if (confB !== confA)
            return confB - confA;
        return a.text.length - b.text.length;
    });
}
rag-ultimate

rag-ultimate