Project Files
scripts / smoke-core-pipeline.ts
import {
buildRagEvidenceBlocks,
dedupeRagCandidates,
fuseRagCandidates,
mergeHybridRagCandidates,
rerankRagCandidates,
} from "../packages/core/src/retrievalPipeline";
import {
toEvidenceBlocks,
toRetrievalResultEntries,
} from "../packages/adapter-lmstudio/src/lmstudioCoreBridge";
import type { RagCandidate } from "../packages/core/src/contracts";
function assert(condition: unknown, message: string): asserts condition {
if (!condition) {
throw new Error(message);
}
}
function makeCandidate(
sourceId: string,
sourceName: string,
content: string,
score: number
): RagCandidate {
return {
sourceId,
sourceName,
content,
score,
metadata: {
source: {
identifier: sourceId,
name: sourceName,
},
},
};
}
function main() {
const semanticRuns = [
[
makeCandidate(
"architecture.md",
"architecture.md",
"# Session Service\nThe session service uses PostgreSQL and accepts higher write latency to preserve failover consistency.",
0.82
),
makeCandidate(
"architecture.md",
"architecture.md",
"Analytics uses ClickHouse for aggregate dashboards.",
0.61
),
],
[
makeCandidate(
"architecture.md",
"architecture.md",
"The analytics backend uses ClickHouse for aggregate dashboards.",
0.77
),
makeCandidate(
"architecture.md",
"architecture.md",
"Session service tradeoff: higher write latency in exchange for failover consistency.",
0.75
),
],
];
const fused = fuseRagCandidates(
semanticRuns,
"reciprocal-rank-fusion",
4
);
assert(fused.length >= 3, "Expected fused candidates from multiple retrieval runs.");
const lexical = [
makeCandidate(
"architecture.md",
"architecture.md",
"Tradeoff summary: PostgreSQL gives durable state while accepting higher write latency during failover.",
0.93
),
];
const hybrid = mergeHybridRagCandidates(fused, lexical, {
semanticWeight: 0.65,
lexicalWeight: 0.35,
maxCandidates: 4,
});
assert(hybrid.length >= 3, "Expected hybrid merge to retain multiple candidates.");
const reranked = rerankRagCandidates(
"Compare the session service tradeoff and analytics backend.",
hybrid,
{
topK: 3,
strategy: "heuristic-v1",
}
);
assert(reranked.length === 3, "Expected heuristic rerank to return top-k candidates.");
assert(
reranked[0]!.features.lexicalOverlap > 0,
"Expected reranked candidates to include computed lexical-overlap features."
);
const deduped = dedupeRagCandidates(
reranked.map((candidate) => ({
...candidate.candidate,
score: candidate.rerankScore,
})),
0.8,
3
);
assert(deduped.length >= 2, "Expected dedupe to retain at least two distinct evidence candidates.");
const evidenceBlocks = buildRagEvidenceBlocks(deduped);
const lmStudioEvidenceBlocks = toEvidenceBlocks(evidenceBlocks);
const roundTrippedEntries = toRetrievalResultEntries(
deduped.map((candidate) => ({
...candidate,
metadata: {
source: {
identifier: candidate.sourceId,
name: candidate.sourceName,
},
},
}))
);
assert(
lmStudioEvidenceBlocks[0]?.fileName === "architecture.md",
"Expected bridge conversion to preserve source file names."
);
assert(
roundTrippedEntries[0]?.source.identifier === "architecture.md",
"Expected bridge conversion to round-trip source identifiers."
);
console.log("Core pipeline smoke test passed.\n");
console.log(`Fused: ${fused.length}`);
console.log(`Hybrid: ${hybrid.length}`);
console.log(`Reranked: ${reranked.length}`);
console.log(`Deduped: ${deduped.length}`);
}
try {
main();
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
console.error(`Core pipeline smoke test failed: ${message}`);
process.exit(1);
}