Project Files
src / promptPreprocessor.ts
/**
* Prompt Preprocessor for user-docs
*
* Injected via withPromptPreprocessor. On every user turn it retrieves the most
* relevant document chunks for the user message and prepends them as context so
* the LLM has the documentation in scope without the user having to call find_doc.
*
* Workflow guidance (what to do with image candidates) is handled entirely by
* the find_doc tool result — no separate injection needed here.
*
* Mirrors rag-vc/src/promptPreprocessor.ts — shared Retriever singleton
* (initialised by toolsProvider) is reused here, no double-indexing.
*/
import { type ChatMessage, type PromptPreprocessorController } from "@lmstudio/sdk";
import path from "path";
import { getRetriever, isRetrieverReady } from "./rag/retrieverSingleton.js";
import { ragDebug } from "./utils/ragLogger.js";
// ─── Preprocessor ─────────────────────────────────────────────────────────────
export async function preprocess(
ctl: PromptPreprocessorController,
userMessage: ChatMessage
): Promise<ChatMessage | string> {
const userPrompt = userMessage.getText();
// ── RAG context injection ─────────────────────────────────────────────────
if (!isRetrieverReady()) {
// Retriever not ready yet — pass through unchanged.
// (toolsProvider is initialising it in the background.)
return userMessage;
}
try {
const retriever = getRetriever();
try {
const workingDir = (ctl as any).getWorkingDirectory?.();
if (typeof workingDir === "string" && workingDir.trim()) {
const chatId = path.basename(workingDir);
if (/^\d{13}$/.test(chatId)) retriever.setActiveChatId(chatId);
}
} catch {
// Best-effort only; some controller versions do not expose a working dir here.
}
const results = await retriever.search(userPrompt);
if (results.length === 0) {
return userMessage;
}
ragDebug("RAG", `Preprocessor: ${results.length} chunks retrieved`);
// Group by document
const resultsByDoc = new Map<string, typeof results>();
for (const r of results) {
const dp = r.document.path;
if (!resultsByDoc.has(dp)) resultsByDoc.set(dp, []);
resultsByDoc.get(dp)!.push(r);
}
let contextText = "The following information was retrieved from your documentation:\n\n";
let citationNum = 1;
for (const [docPath, docResults] of resultsByDoc) {
const docName = path.basename(docPath);
contextText += `=== ${docName} ===\n\n`;
for (const r of docResults) {
const tag = r.chunk.metadata?.isTable ? " [TABLE]" : "";
contextText += `[${citationNum}]${tag}\n${r.chunk.content}\n\n`;
citationNum++;
}
}
contextText +=
"Use the citations above when answering. If they do not contain relevant information, say so.\n\n";
userMessage.replaceText(contextText + userPrompt);
return userMessage;
} catch (err) {
ctl.createStatus({
status: "canceled",
text: `RAG Error: ${err instanceof Error ? err.message : "Unknown error"}`,
});
return userMessage;
}
}