src / retrieval / engine.ts
/**
* @file retrieval/engine.ts
* Retrieval engine: combines TF-IDF similarity, memory decay, access
* frequency, and confidence into a single composite score.
*/
import { TfIdfIndex } from "./tfidf";
import { MemoryDatabase } from "../storage/db";
import { srlmRerank } from "../processing/ai";
import {
DECAY_HALF_LIFE_DAYS,
DECAY_WEIGHT,
FREQUENCY_WEIGHT,
SIMILARITY_WEIGHT,
CONFIDENCE_WEIGHT,
MIN_RELEVANCE_THRESHOLD,
MAX_SEARCH_RESULTS,
} from "../constants";
import type { MemoryRecord, ScoredMemory, RetrievalResult } from "../types";
const MS_PER_DAY = 86_400_000;
function computeDecay(lastAccessedAt: number, halfLifeDays: number): number {
const daysSinceAccess = (Date.now() - lastAccessedAt) / MS_PER_DAY;
return Math.pow(2, -daysSinceAccess / halfLifeDays);
}
function normalizeFrequency(accessCount: number, maxAccessCount: number): number {
if (maxAccessCount <= 0) return 0;
return Math.log(1 + accessCount) / Math.log(1 + maxAccessCount);
}
export class RetrievalEngine {
private tfIdf = new TfIdfIndex();
private db: MemoryDatabase;
private maxAccessCount = 1;
constructor(db: MemoryDatabase) { this.db = db; }
rebuildIndex(): void {
this.tfIdf.clear();
this.maxAccessCount = 1;
for (const mem of this.db.getAll(10_000)) {
this.tfIdf.addDocument(mem.id, `${mem.content} ${mem.tags.join(" ")} ${mem.category}`);
if (mem.accessCount > this.maxAccessCount) this.maxAccessCount = mem.accessCount;
}
}
indexMemory(id: string, content: string, tags: string[], category: string): void {
this.tfIdf.addDocument(id, `${content} ${tags.join(" ")} ${category}`);
}
removeFromIndex(id: string): void { this.tfIdf.removeDocument(id); }
retrieve(
query: string,
limit: number = MAX_SEARCH_RESULTS,
halfLifeDays: number = DECAY_HALF_LIFE_DAYS,
touchAccess: boolean = true,
): RetrievalResult {
const start = performance.now();
const candidateLimit = Math.min(limit * 3, 100);
const tfIdfResults = this.tfIdf.search(query, candidateLimit);
if (tfIdfResults.length === 0) {
const ftsResults = this.db.ftsSearch(query, limit);
if (ftsResults.length === 0) {
return { memories: [], totalMatched: 0, queryTerms: [], timeTakenMs: performance.now() - start };
}
return this.scoreAndRank(ftsResults, 0.5, limit, halfLifeDays, start, query, undefined, touchAccess);
}
const memories = this.db.getByIds(tfIdfResults.map(([id]) => id));
const similarityMap = new Map<string, number>(tfIdfResults);
for (const mem of memories) {
if (mem.accessCount > this.maxAccessCount) this.maxAccessCount = mem.accessCount;
}
return this.scoreAndRank(memories, null, limit, halfLifeDays, start, query, similarityMap, touchAccess);
}
private scoreAndRank(
memories: MemoryRecord[],
flatSimilarity: number | null,
limit: number,
halfLifeDays: number,
startTime: number,
query: string,
similarityMap?: Map<string, number>,
touchAccess: boolean = true,
): RetrievalResult {
const scored: ScoredMemory[] = [];
for (const mem of memories) {
const similarity = flatSimilarity ?? similarityMap?.get(mem.id) ?? 0;
if (similarity < MIN_RELEVANCE_THRESHOLD && flatSimilarity === null) continue;
const decay = computeDecay(mem.lastAccessedAt, halfLifeDays);
const frequency = normalizeFrequency(mem.accessCount, this.maxAccessCount);
const confidence = mem.confidence;
const composite =
SIMILARITY_WEIGHT * similarity +
DECAY_WEIGHT * decay +
FREQUENCY_WEIGHT * frequency +
CONFIDENCE_WEIGHT * confidence;
if (composite < MIN_RELEVANCE_THRESHOLD) continue;
scored.push({ ...mem, relevanceScore: similarity, decayScore: decay, compositeScore: composite });
}
scored.sort((a, b) => b.compositeScore - a.compositeScore);
const results = scored.slice(0, limit);
if (touchAccess && results.length > 0) {
try { this.db.touchAccessBatch(results.map((m) => m.id)); } catch { /* ignore */ }
}
const queryTerms = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
return { memories: results, totalMatched: scored.length, queryTerms, timeTakenMs: performance.now() - startTime };
}
get indexStats() { return this.tfIdf.stats; }
async retrieveWithSRLM(
query: string,
limit: number = MAX_SEARCH_RESULTS,
halfLifeDays: number = DECAY_HALF_LIFE_DAYS,
K: number = 3,
): Promise<RetrievalResult> {
const baseResult = this.retrieve(query, Math.min(limit * 2, 30), halfLifeDays, true);
if (baseResult.memories.length === 0) return baseResult;
try {
const candidates = baseResult.memories.slice(0, 12).map((m) => ({ id: m.id, content: m.content }));
const srlmScores = await srlmRerank(query, candidates, K);
if (srlmScores.size > 0) {
for (const mem of baseResult.memories) {
const srlmScore = srlmScores.get(mem.id);
mem.compositeScore = srlmScore !== undefined
? 0.6 * mem.compositeScore + 0.4 * srlmScore
: mem.compositeScore * 0.5;
}
baseResult.memories.sort((a, b) => b.compositeScore - a.compositeScore);
}
} catch { /* AI unavailable — fall back to base scores */ }
baseResult.memories = baseResult.memories.slice(0, limit);
return baseResult;
}
}