Project Files
packages / adapter-lmstudio / src / evidence.ts
import type { RetrievalResultEntry } from "@lmstudio/sdk";
import type { EvidenceBlock } from "./types/evidence";
function normalizeWhitespace(value: string) {
return value.trim().replace(/\s+/g, " ");
}
function tokenize(value: string) {
return normalizeWhitespace(value)
.toLowerCase()
.replace(/[^a-z0-9\s]/g, " ")
.split(/\s+/)
.filter((token) => token.length > 0);
}
function computeSimilarity(left: string, right: string) {
const leftTokens = new Set(tokenize(left));
const rightTokens = new Set(tokenize(right));
if (leftTokens.size === 0 || rightTokens.size === 0) {
return 0;
}
let intersectionSize = 0;
for (const token of leftTokens) {
if (rightTokens.has(token)) {
intersectionSize++;
}
}
const unionSize = new Set([...leftTokens, ...rightTokens]).size;
return unionSize === 0 ? 0 : intersectionSize / unionSize;
}
export function dedupeEvidenceEntries(
entries: Array<RetrievalResultEntry>,
threshold: number,
maxEvidenceBlocks: number
): Array<RetrievalResultEntry> {
const deduped: Array<RetrievalResultEntry> = [];
for (const entry of entries) {
const isNearDuplicate = deduped.some((existing) => {
const sameFile = existing.source.identifier === entry.source.identifier;
return sameFile && computeSimilarity(existing.content, entry.content) >= threshold;
});
if (!isNearDuplicate) {
deduped.push(entry);
}
if (deduped.length >= maxEvidenceBlocks) {
break;
}
}
return deduped;
}
export function buildEvidenceBlocks(
entries: Array<RetrievalResultEntry>
): Array<EvidenceBlock> {
return entries.map((entry, index) => ({
label: `Citation ${index + 1}`,
fileName: entry.source.name,
content: normalizeWhitespace(entry.content),
score: entry.score,
entry,
}));
}
export function formatEvidenceBlocks(blocks: Array<EvidenceBlock>) {
if (blocks.length === 0) {
return "";
}
return blocks
.map(
(block) =>
`${block.label} (file: ${block.fileName}, score: ${block.score.toFixed(
3
)}):\n"${block.content}"`
)
.join("\n\n");
}