Project Files
dist / retrieval / hybridSearch.js
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.hybridScore = hybridScore;
const text_1 = require("../utils/text");
const MAX_QUERY_TOKEN_CACHE = 128;
const queryTokenCache = new Map();
function tokenizedQuery(query) {
const normalizedQuery = (0, text_1.normalizeWhitespace)(query.toLowerCase());
const cached = queryTokenCache.get(normalizedQuery);
if (cached)
return cached;
const tokens = normalizedQuery.split(/\W+/).filter(word => word.length > 2);
if (queryTokenCache.size >= MAX_QUERY_TOKEN_CACHE) {
const oldest = queryTokenCache.keys().next().value;
if (oldest !== undefined)
queryTokenCache.delete(oldest);
}
queryTokenCache.set(normalizedQuery, tokens);
return tokens;
}
// ── CHANGE: cache lowercased chunk text ──────────────────────────────────────
// When multiQueryCount > 1 (parallel queries), the same chunk can appear in
// the result sets of multiple queries, causing text.toLowerCase() to run
// repeatedly on the same large string. A bounded FIFO cache avoids that.
const MAX_LOWER_CACHE = 128;
const lowerCache = new Map();
function lowerCached(text) {
const cached = lowerCache.get(text);
if (cached !== undefined)
return cached;
const lower = text.toLowerCase();
if (lowerCache.size >= MAX_LOWER_CACHE) {
const oldest = lowerCache.keys().next().value;
if (oldest !== undefined)
lowerCache.delete(oldest);
}
lowerCache.set(text, lower);
return lower;
}
function keywordBonus(queryWords, normalizedQuery, text) {
if (queryWords.length === 0)
return 0;
const haystack = lowerCached(text); // was: text.toLowerCase()
let matches = 0;
let exactPhraseBoost = 0;
for (const word of queryWords) {
if (haystack.includes(word))
matches += 1;
}
if (normalizedQuery && haystack.includes(normalizedQuery)) {
exactPhraseBoost = 0.15;
}
const overlap = matches / queryWords.length;
return overlap * 0.22 + exactPhraseBoost;
}
function hybridScore(query, text, semanticScore) {
const normalizedQuery = (0, text_1.normalizeWhitespace)(query.toLowerCase());
const queryWords = tokenizedQuery(query);
return semanticScore + keywordBonus(queryWords, normalizedQuery, text);
}