"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.preprocess = preprocess;
const sdk_1 = require("@lmstudio/sdk");
const config_1 = require("./config");
const MemoryService_1 = require("./memory/MemoryService");
const embeddingCache_1 = require("./retrieval/embeddingCache");
const queryExpansion_1 = require("./retrieval/queryExpansion");
const hybridSearch_1 = require("./retrieval/hybridSearch");
const dedupe_1 = require("./retrieval/dedupe");
// ── CHANGE 1 ──────────────────────────────────────────────────────────────────
// Import the single-chunk helper directly so we can compress inside the same
// .map() pass that builds BudgetedChunks (see CHANGE 3 below), eliminating
// the intermediate string[] allocation that compressChunks() created.
const compression_1 = require("./retrieval/compression");
const tokenBudget_1 = require("./retrieval/tokenBudget");
const reranker_1 = require("./retrieval/reranker");
const promptInjection_1 = require("./security/promptInjection");
const document_context_1 = require("./templates/document_context");
const text_1 = require("./utils/text");
const retrievalCache = new Map();
const CACHE_TTL_MS = 300_000;
// Hard cap on live cache entries *after* TTL sweeping.
const CACHE_MAX_SIZE = 64;
const MAX_RETRIEVAL_FILES = 8;
const MEMORY_MIN_PROMPT_LENGTH = 20;
const fileSignatureCache = new WeakMap();
const fileTimestampCache = new WeakMap();
const fileTextLikeCache = new WeakMap();
const fileSelectionCache = new Map();
const FILE_SELECTION_CACHE_MAX = 128;
// ── Loop prevention: per-query retrieval timeout ──────────────────────────
// If ctl.client.files.retrieve() stalls (e.g. the embedding model is loading
// or the LM Studio RPC layer is backed up), every query in the Promise.all
// waits indefinitely.  A hard timeout converts a hang into a non-abort error,
// which the per-query catch block handles by returning an empty result set —
// letting the other queries (if any) still contribute, and unblocking the
// pipeline so the next user message isn't also stalled.
const RETRIEVAL_TIMEOUT_MS = 8_000;
function withTimeout(promise, ms) {
    let timer;
    const race = Promise.race([
        promise,
        new Promise((_, reject) => {
            timer = setTimeout(() => reject(new Error(`retrieval timed out after ${ms}ms`)), ms);
        }),
    ]);
    // Always clear the timer so the Node process isn't kept alive by a dangling
    // setTimeout after the promise settles (prevents a subtle event-loop hang).
    return race.finally(() => clearTimeout(timer));
}
// ── Loop prevention: in-flight retrieval deduplication ───────────────────
// If preprocess() is called concurrently for the same prompt+files combination
// (LM Studio occasionally fires the preprocessor more than once before the
// first call has resolved), without this guard each concurrent call launches
// its own full Promise.all(queries) fan-out — doubling or tripling the number
// of embedding-model round-trips.  The resulting load spike can push latencies
// past the RETRIEVAL_TIMEOUT_MS threshold, which causes failures, which the
// caller can retry, creating a tight loop of stalled → timeout → retry.
// Sharing a single in-flight Promise collapses concurrent callers onto one
// retrieval operation and breaks that cycle.
const inFlightRetrievals = new Map();
// ── Loop prevention: consecutive-failure circuit breaker ─────────────────
// If the embedding model is unavailable (not loaded, wrong name, OOM crash),
// every preprocess() call fails, the error is caught, the call returns
// userMessage, and LM Studio immediately fires the next turn's preprocessor —
// which fails again, instantly, forever.  Tracking failures and backing off
// converts that tight loop into a graceful degradation: after
// MAX_CONSECUTIVE_FAILURES the plugin stops attempting retrieval until
// FAILURE_RESET_MS has elapsed, then tries once more.
const MAX_CONSECUTIVE_FAILURES = 3;
const FAILURE_RESET_MS = 30_000;
let consecutiveFailures = 0;
let lastFailureTime = 0;
function isAbortError(error) {
    return (!!error &&
        typeof error === "object" &&
        "name" in error &&
        error.name === "AbortError");
}
/**
 * Two-phase eviction:
 *  1. Sweep every entry whose TTL has expired (free wins, no size cost).
 *  2. If the cache is still over CACHE_MAX_SIZE, drop the oldest entries by
 *     insertion order until it fits.
 */
function pruneRetrievalCache() {
    const now = Date.now();
    const expiredKeys = [];
    for (const [key, entry] of retrievalCache) {
        if (entry.expiresAt <= now)
            expiredKeys.push(key);
    }
    for (const key of expiredKeys) {
        retrievalCache.delete(key);
    }
    if (retrievalCache.size > CACHE_MAX_SIZE) {
        const overflow = retrievalCache.size - CACHE_MAX_SIZE;
        const keys = [...retrievalCache.keys()];
        for (let i = 0; i < overflow; i++) {
            retrievalCache.delete(keys[i]);
        }
    }
}
function getFileTimestamp(file) {
    const cached = fileTimestampCache.get(file);
    if (cached !== undefined)
        return cached;
    const record = file;
    const candidateKeys = [
        "lastModified",
        "updatedAt",
        "modifiedAt",
        "mtime",
        "timestamp",
        "createdAt",
        "created",
    ];
    let result = null;
    for (const key of candidateKeys) {
        const value = record[key];
        if (typeof value === "number" && Number.isFinite(value)) {
            result = value;
            break;
        }
        if (typeof value === "string") {
            const parsed = Date.parse(value);
            if (!Number.isNaN(parsed)) {
                result = parsed;
                break;
            }
        }
    }
    fileTimestampCache.set(file, result);
    return result;
}
function compareFileByTimestamp(a, b) {
    const aTime = getFileTimestamp(a);
    const bTime = getFileTimestamp(b);
    if (aTime !== null && bTime !== null)
        return bTime - aTime;
    if (aTime !== null)
        return -1;
    if (bTime !== null)
        return 1;
    return 0;
}
function isTextLikeFile(file) {
    const cached = fileTextLikeCache.get(file);
    if (cached !== undefined)
        return cached;
    const record = file;
    const type = String(record.type ?? "").toLowerCase();
    const name = String(record.name ??
        record.fileName ??
        record.filename ??
        record.originalName ??
        record.path ??
        "").toLowerCase();
    const result = (type && type !== "image") ||
        /\.(txt|md|markdown|json|js|ts|py|java|c|cpp|cs|html|css|csv|yml|yaml|xml|log)$/i.test(name) ||
        Boolean(record.content || record.text || record.document || record.snippet);
    fileTextLikeCache.set(file, result);
    return result;
}
function selectRetrievalFiles(files) {
    const cacheKey = files.map(fileSignature).join(";");
    const cached = fileSelectionCache.get(cacheKey);
    if (cached)
        return cached;
    const validFiles = files.filter(isTextLikeFile);
    const result = validFiles.length === 0
        ? files.slice(0, MAX_RETRIEVAL_FILES)
        : validFiles.length <= MAX_RETRIEVAL_FILES
            ? validFiles
            : [...validFiles].sort(compareFileByTimestamp).slice(0, MAX_RETRIEVAL_FILES);
    if (fileSelectionCache.size >= FILE_SELECTION_CACHE_MAX) {
        const oldest = fileSelectionCache.keys().next().value;
        if (oldest !== undefined)
            fileSelectionCache.delete(oldest);
    }
    fileSelectionCache.set(cacheKey, result);
    return result;
}
function toText(value) {
    if (typeof value === "string")
        return value;
    if (!value || typeof value !== "object")
        return "";
    const obj = value;
    return String(obj.content ??
        obj.text ??
        obj.chunk ??
        obj.document ??
        obj.snippet ??
        obj.value ??
        "");
}
function toScore(value) {
    if (!value || typeof value !== "object")
        return 0;
    const obj = value;
    const raw = obj.score ?? obj.affinity ?? obj.relevance ?? obj.similarity ?? 0;
    const n = typeof raw === "number" ? raw : Number(raw);
    return Number.isFinite(n) ? n : 0;
}
function toCitation(value) {
    if (!value || typeof value !== "object")
        return "";
    const obj = value;
    return String(obj.fileName ??
        obj.filename ??
        obj.originalName ??
        obj.sourceName ??
        obj.path ??
        "");
}
function normalizeResult(value) {
    const textValue = (0, text_1.truncate)(toText(value).trim(), 5000);
    const citation = toCitation(value);
    const score = toScore(value);
    return {
        text: textValue,
        score,
        citation: citation || undefined,
        sourceName: citation || undefined,
        confidence: Math.max(0, Math.min(1, score)),
    };
}
function fileSignature(file) {
    const cached = fileSignatureCache.get(file);
    if (cached)
        return cached;
    const record = file;
    const parts = [
        String(record.type ?? ""),
        String(record.name ??
            record.fileName ??
            record.filename ??
            record.originalName ??
            record.path ??
            ""),
        String(record.id ?? record.uuid ?? record.hash ?? record.source ?? ""),
    ].filter(Boolean);
    const signature = parts.length > 0
        ? parts.join("|")
        : (0, text_1.stableHash)(Object.keys(record)
            .sort()
            .map(key => `${key}:${String(record[key])}`)
            .join("|"));
    fileSignatureCache.set(file, signature);
    return signature;
}
function buildCacheKey(prompt, files, limit, multiQueryCount, threshold) {
    const fileKey = files.map(fileSignature).join(";");
    return (0, text_1.stableHash)([prompt, fileKey, String(limit), String(multiQueryCount), String(threshold)].join("\u0000"));
}
async function retrieveAcrossQueries(ctl, prompt, files) {
    const config = ctl.getPluginConfig(config_1.configSchematics);
    const retrievalLimit = Math.max(1, Math.min(24, Number(config.get("retrievalLimit")) || 6));
    const multiQueryCount = Math.max(1, Math.min(6, Number(config.get("multiQueryCount")) || 2));
    const threshold = Math.max(0, Math.min(1, Number(config.get("retrievalAffinityThreshold")) || 0.55));
    const cacheKey = buildCacheKey(prompt, files, retrievalLimit, multiQueryCount, threshold);
    const cached = retrievalCache.get(cacheKey);
    if (cached && cached.expiresAt > Date.now()) {
        return cached.value;
    }
    // In-flight deduplication: return the existing promise rather than launching
    // a second identical retrieval fan-out (see comment near inFlightRetrievals).
    const inflight = inFlightRetrievals.get(cacheKey);
    if (inflight !== undefined)
        return inflight;
    const promise = (async () => {
        const embeddingModel = await (0, embeddingCache_1.getEmbeddingModel)(ctl);
        const queries = (0, queryExpansion_1.expandQueries)(prompt, multiQueryCount);
        // ── CHANGE 2: Parallel query execution ──────────────────────────────────
        // Previously the queries ran in a sequential for-loop so each network
        // round-trip blocked the next.  The embedding-model retrieval call is
        // I/O-bound, so firing all queries concurrently with Promise.all cuts
        // latency roughly by a factor of multiQueryCount for users who raise that
        // setting above the default of 1.
        //
        // Abort signals still propagate correctly: if any inner async function
        // re-throws an AbortError, Promise.all rejects immediately with it, and
        // the caller's try/catch in preprocess() rethrows it up the stack.
        //
        // Each individual retrieve call is wrapped with withTimeout() so a stalled
        // embedding model cannot keep the plugin hanging indefinitely.
        const queryResults = await Promise.all(queries.map(async (query) => {
            try {
                const items = (await withTimeout(ctl.client.files.retrieve(query, files, {
                    embeddingModel,
                    limit: retrievalLimit,
                    signal: ctl.abortSignal,
                }), RETRIEVAL_TIMEOUT_MS));
                return { query, items };
            }
            catch (error) {
                if (isAbortError(error))
                    throw error;
                // Non-abort errors (timeout, model unavailable, etc.) are treated as
                // empty results so the other queries can still contribute.
                return { query, items: [] };
            }
        }));
        const results = [];
        for (const { query, items } of queryResults) {
            for (const item of items) {
                const normalized = normalizeResult(item);
                if (!normalized.text)
                    continue;
                normalized.score = (0, hybridSearch_1.hybridScore)(query, normalized.text, normalized.score);
                if (normalized.score < threshold)
                    continue;
                results.push(normalized);
            }
        }
        const deduped = (0, dedupe_1.dedupeResults)(results);
        const ranked = deduped.length <= 4
            ? deduped
            : (0, reranker_1.rerankChunks)(deduped, prompt);
        pruneRetrievalCache();
        retrievalCache.set(cacheKey, {
            value: ranked,
            expiresAt: Date.now() + CACHE_TTL_MS,
        });
        return ranked;
    })();
    inFlightRetrievals.set(cacheKey, promise);
    // Always remove from the in-flight map on settlement so a failed retrieval
    // does not permanently block the cache key for future callers.
    promise.finally(() => inFlightRetrievals.delete(cacheKey));
    return promise;
}
function adaptiveBudget(prompt, selectedSources) {
    const length = (0, text_1.normalizeWhitespace)(prompt).length;
    const base = selectedSources > 6 ? 12_000 : 10_000;
    const growth = Math.min(8_000, Math.floor(length * 0.4));
    return Math.max(6_000, Math.min(18_000, base + growth));
}
async function preprocess(ctl, userMessage) {
    const rawPrompt = userMessage.getText();
    if (!rawPrompt.trim())
        return userMessage;
    // ── Loop prevention: augmentation guard ──────────────────────────────────
    // Primary check: reject messages that were already augmented by this plugin.
    // This prevents the double-augmentation loop that occurs when LM Studio
    // stores the preprocessed text and replays it through the pipeline on a
    // subsequent turn or during context reconstruction.
    if (rawPrompt.includes(document_context_1.RAG_AUGMENTED_MARKER))
        return userMessage;
    // ── Loop prevention: circuit breaker ─────────────────────────────────────
    // If the embedding model (or the RPC layer) has failed MAX_CONSECUTIVE_FAILURES
    // times in a row, stop attempting retrieval until FAILURE_RESET_MS has elapsed.
    // Without this guard, a broken model causes every preprocess() invocation to
    // attempt retrieval, fail, and return immediately — which LM Studio may
    // interpret as success and immediately fire again, creating a tight loop.
    if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
        if (Date.now() - lastFailureTime < FAILURE_RESET_MS)
            return userMessage;
        // Backoff window has elapsed; reset and attempt once more.
        consecutiveFailures = 0;
    }
    const config = ctl.getPluginConfig(config_1.configSchematics);
    const prompt = config.get("enablePromptInjectionProtection")
        ? (0, promptInjection_1.sanitizePrompt)(rawPrompt)
        : rawPrompt;
    const history = await ctl.pullHistory();
    history.append(userMessage);
    const files = history
        .getAllFiles(ctl.client)
        .filter((file) => file.type !== "image");
    if (!files.length)
        return userMessage;
    const retrievalFiles = selectRetrievalFiles(files);
    let retrieved = [];
    try {
        retrieved = await retrieveAcrossQueries(ctl, prompt, retrievalFiles);
        // Successful retrieval — reset the failure counter.
        consecutiveFailures = 0;
    }
    catch (error) {
        if (isAbortError(error))
            throw error;
        consecutiveFailures++;
        lastFailureTime = Date.now();
        return userMessage;
    }
    if (!retrieved.length)
        return userMessage;
    const selectedSourceCount = Math.min(retrieved.length, Math.max(1, Math.min(5, Number(config.get("selectedSourceCount")) || 5)));
    // ── CHANGE 3: Single-pass compression + enrichment ───────────────────────
    // The previous code did two separate .slice(0, selectedSourceCount).map()
    // passes — one to extract text strings for compressChunks(), then a second
    // to re-combine them into BudgetedChunks.  This created an unnecessary
    // intermediate array and repeated the slice allocation.
    //
    // We now do both in a single pass using compressChunk() (the single-item
    // helper that compressChunks() was already delegating to), which halves
    // the allocations with identical output.
    const useCompression = Boolean(config.get("enableCompression"));
    const enriched = retrieved
        .slice(0, selectedSourceCount)
        .map(item => ({
        ...item,
        text: useCompression && retrieved.length >= 3 ? (0, compression_1.compressChunk)(item.text) : item.text,
    }));
    const selected = (0, tokenBudget_1.fitToBudget)(enriched, adaptiveBudget(prompt, selectedSourceCount));
    if (!selected.length)
        return userMessage;
    const activeSources = new Map();
    selected.forEach((item, index) => {
        const key = item.sourceName || item.citation || `chunk-${index + 1}`;
        if (!activeSources.has(key))
            activeSources.set(key, index + 1);
    });
    const citations = [...activeSources.entries()]
        .map(([name, index]) => `[${index}] ${name}`)
        .join("\n");
    let memory = "";
    if (config.get("enableMemory")) {
        const topic = MemoryService_1.MemoryService.extractTopic(rawPrompt);
        memory = MemoryService_1.MemoryService.retrieve(topic);
        if (topic && rawPrompt.length >= MEMORY_MIN_PROMPT_LENGTH) {
            MemoryService_1.MemoryService.save(topic, (0, text_1.truncate)(rawPrompt, 240));
        }
    }
    return (0, sdk_1.text)((0, document_context_1.buildDocumentContext)({
        citations,
        context: selected.map(item => item.text).join("\n\n---\n\n"),
        memory,
        prompt,
    }));
}
rag-ultimate

rag-ultimate