Project Files
dist / retrieval / tokenBudget.js
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.fitToBudget = fitToBudget;
const text_1 = require("../utils/text");
function sourceKey(chunk, index) {
return chunk.sourceName || chunk.citation || `chunk-${index}`;
}
function fitToBudget(chunks, maxChars) {
const sorted = [...chunks].sort((a, b) => {
if (b.score !== a.score)
return b.score - a.score;
if ((b.confidence ?? 0) !== (a.confidence ?? 0))
return (b.confidence ?? 0) - (a.confidence ?? 0);
return a.text.length - b.text.length;
});
const kept = [];
const sourceCounts = new Map();
const normalizedLengths = sorted.map(chunk => (0, text_1.normalizeWhitespace)(chunk.text).length);
let total = 0;
// ── Loop prevention: suffix-minimum pre-computation ───────────────────────
// After we've consumed most of the budget, the loop keeps testing chunks that
// can't possibly fit, iterating all the way to the end of a potentially large
// sorted array. Pre-computing the running minimum chunk length from right to
// left lets us detect this condition in O(1) and break early.
//
// suffixMin[i] = min(normalizedLengths[i], normalizedLengths[i+1], ..., end)
// If (maxChars - total) < suffixMin[i] + separatorOverhead, no chunk from
// index i onward can ever fit and the loop can terminate immediately.
const n = sorted.length;
const suffixMin = new Array(n);
if (n > 0) {
suffixMin[n - 1] = normalizedLengths[n - 1];
for (let i = n - 2; i >= 0; i--) {
suffixMin[i] = Math.min(normalizedLengths[i], suffixMin[i + 1]);
}
}
for (let i = 0; i < n; i++) {
// Early exit: if even the smallest remaining chunk (plus separator overhead)
// cannot fit in the remaining budget, no further chunk ever will.
const separatorOverhead = kept.length === 0 ? 0 : 12;
if (maxChars - total < suffixMin[i] + separatorOverhead)
break;
const chunk = sorted[i];
const key = sourceKey(chunk, i);
const count = sourceCounts.get(key) ?? 0;
if (count >= 2)
continue;
const normalizedLength = normalizedLengths[i];
const extra = kept.length === 0 ? normalizedLength : normalizedLength + 12;
if (total + extra > maxChars)
continue;
sourceCounts.set(key, count + 1);
kept.push(chunk);
total += extra;
if (total >= maxChars)
break;
}
if (kept.length === 0 && sorted.length > 0) {
const first = sorted[0];
kept.push({
...first,
text: first.text.slice(0, Math.max(0, maxChars - 1)).trimEnd() + (first.text.length > maxChars ? "…" : ""),
});
}
return kept;
}