Project Files
src / retrieval / queryExpansion.ts
// Maximum number of expansion results to keep in memory. Old entries are
// evicted by insertion order (FIFO approximation) once this cap is reached.
const MAX_EXPANSION_CACHE = 512;
const expansionCache = new Map<string, string[]>();
const CLAUSE_SPLIT_RE = /[,;\/]|\b(?:and|or|vs|versus)\b/gi;
function normalizeQuery(query: string): string {
return query.trim().replace(/\s+/g, " ");
}
function splitClauses(query: string): string[] {
return query
.split(CLAUSE_SPLIT_RE)
.map(part => part.trim())
.filter(Boolean);
}
function unique(values: string[]): string[] {
const seen = new Set<string>();
const result: string[] = [];
for (let i = 0; i < values.length; i++) {
const v = values[i];
const key = v.toLowerCase();
if (seen.has(key)) continue;
seen.add(key);
result.push(v);
}
return result;
}
export function expandQueries(query: string, maxCount: number): string[] {
const base = normalizeQuery(query);
const cacheKey = base + "\u0000" + maxCount;
const cached = expansionCache.get(cacheKey);
if (cached) return cached;
const words = base.split(/\W+/).filter(Boolean);
const focus = words.slice(0, 6).join(" ");
const clauseVariants = splitClauses(base).slice(0, 2);
const v0 = words.length > 5 ? words.slice(0, 5).join(" ") : "";
const v1 = focus && focus !== base ? focus : "";
const v2 = clauseVariants[0] ? "details about " + clauseVariants[0] : "";
const v3 = clauseVariants[1] ? "details about " + clauseVariants[1] : "";
const v4 = words.length > 3 ? "technical details about " + base : "";
const limited = unique([base, v0, v1, v2, v3, v4]).slice(0, Math.max(1, maxCount));
if (limited.length === 0) {
expansionCache.set(cacheKey, [base]);
return [base];
}
if (expansionCache.size >= MAX_EXPANSION_CACHE) {
const oldest = expansionCache.keys().next().value;
if (oldest !== undefined) expansionCache.delete(oldest);
}
expansionCache.set(cacheKey, limited);
return limited;
}