Forked from mindstudio/big-rag
Project Files
src / promptPreprocessor.ts
import {
type ChatMessage,
type PromptPreprocessorController,
} from "@lmstudio/sdk";
import {
configSchematics,
DEFAULT_PROMPT_TEMPLATE,
getDefaultPromptTemplate,
type ResponseLanguage,
} from "./config";
import { VectorStore, type FilenameSearchResult } from "./vectorstore/vectorStore";
import { type OcrSettings } from "./parsers/documentParser";
import { performSanityChecks } from "./utils/sanityChecks";
import { tryStartIndexing, finishIndexing } from "./utils/indexingLock";
import * as path from "path";
import { runIndexingJob } from "./ingestion/runIndexing";
/**
* Check the abort signal and throw if the request has been cancelled.
* This gives LM Studio the opportunity to stop the preprocessor promptly.
*/
function checkAbort(signal: AbortSignal): void {
if (signal.aborted) {
throw signal.reason ?? new DOMException("Aborted", "AbortError");
}
}
/**
* Returns true if the error is an abort/cancellation error that should be re-thrown.
*/
function isAbortError(error: unknown): boolean {
if (error instanceof DOMException && error.name === "AbortError") return true;
if (error instanceof Error && error.name === "AbortError") return true;
if (error instanceof Error && error.message === "Aborted") return true;
return false;
}
function summarizeText(text: string, maxLines: number = 3, maxChars: number = 400): string {
const lines = text.split(/\r?\n/).filter(line => line.trim() !== "");
const clippedLines = lines.slice(0, maxLines);
let clipped = clippedLines.join("\n");
if (clipped.length > maxChars) {
clipped = clipped.slice(0, maxChars);
}
const needsEllipsis =
lines.length > maxLines ||
text.length > clipped.length ||
clipped.length === maxChars && text.length > maxChars;
return needsEllipsis ? `${clipped.trimEnd()}…` : clipped;
}
/**
* Extract filename search keywords from the user prompt.
* Looks for patterns indicating the user wants to search by filename.
* Returns a structure with:
* - filenameKeywords: words to search in filenames (always extracted when a pattern matches)
* - contentKeywords: words from the remainder of the query to search in file content
* - isFilenameOnly: true if the query seems to only ask for file listing (no content search needed)
*
* Supported patterns (Russian + English):
* - "найди файлы с именем X", "найди файлы с названием X"
* - "найди все файлы X", "найди файл X"
* - "покажи файлы X", "покажи все файлы X"
* - "список файлов X", "перечисли файлы X"
* - "find files named X", "find all files X", "find files with name X"
* - "show files X", "list files X"
* - "файлы X", "files X" (short forms)
* - "с именем X", "с названием X", "по имени X"
* - "named X", "called X"
* - "в названии которых есть X"
*/
interface FilenameSearchIntent {
filenameKeywords: string[];
contentKeywords: string[];
isFilenameOnly: boolean;
}
export function extractFilenameSearchIntent(query: string): FilenameSearchIntent {
const normalizedQuery = query.trim();
// Check if the query indicates filename search intent
const filenamePatterns = [
// Russian: "найди файлы с именем/named X", "найди все файлы X"
/найди\s+(?:все\s+)?файл[ыья]*\s+(?:с\s+имем(?:ем|и|ь)?|с\s+названи(?:ем|и|ю|я)|по\s+имени|по\s+названи(?:ю|я))\s+(.+?)(?:\s+в\s+котор|$)/iu,
/найди\s+(?:все\s+)?файл[ыья]*\s+(.+?)(?:\s+и\s+|\s+в\s+котор|\s+где|\s+из\s+|\s+для\s+|\s+по\s+|\s+за\s+|\s+на\s+|\s+от\s+|\s+с\s+|$)/iu,
// Russian: "в названии которых есть X"
/в\s+названи[ие]\s+котор[а-яё]*\s+(?:есть|содержит|встречается)\s+(.+?)(?:\s+и\s+|\s+из\s+|\s+для\s+|\s+по\s+|\s+за\s+|\s+от\s+|\s|$)/iu,
// Russian: "файлы с именем X" / "файлы с названием X"
/файл[ыья]*\s+(?:с\s+имем(?:ем|и|ь)?|с\s+названи(?:ем|и|ю|я))\s+(.+?)(?:\s+и\s+|\s+в\s+котор|\s+где|\s+из\s+|\s+для\s+|\s+по\s+|\s+за\s+|\s+от\s+|\s*$)/iu,
// Russian: "с именем X" / "с названием X" / "по имени X"
/(?:с\s+имем(?:ем|и|ь)?|с\s+названи(?:ем|и|ю|я)|по\s+имени|по\s+названи(?:ю|я))\s+(.+?)(?:\s+и\s+|\s+в\s+котор|\s+где|\s+из\s+|\s+для\s+|\s+по\s+|\s+за\s+|\s+от\s+|\s*$)/iu,
// English: "find files named/called X", "find all files X"
/find\s+(?:all\s+)?files?\s+(?:named|called|with\s+(?:the\s+)?name|whose\s+name(?:s?)\s+(?:is|contains?|include))\s+(.+?)(?:\s+which|\s+that|\s+where|$)/iu,
/find\s+(?:all\s+)?files?\s+(.+?)(?:\s+which|\s+that|\s+where|\s+containing|$)/iu,
// English: "show/list files X"
/(?:show|list)\s+(?:all\s+)?files?\s+(?:named|called|with\s+name)?\s*(.+?)(?:\s+which|\s+that|\s+where|$)/iu,
// English: "files named X" / "files called X"
/files?\s+(?:named|called)\s+(.+?)(?:\s+which|\s+that|\s+where|$)/iu,
// English: "in the name(s) X"
/(?:in|with)\s+(?:the\s+)?name(?:s?)\s+(.+?)(?:\s+which|\s+that|\s+where|$)/iu,
];
let filenameMatch: string | null = null;
for (const pattern of filenamePatterns) {
const match = normalizedQuery.match(pattern);
if (match && match[1]) {
filenameMatch = match[1].trim();
break;
}
}
if (filenameMatch === null) {
return { filenameKeywords: [], contentKeywords: [], isFilenameOnly: false };
}
// Extract the filename keywords and content keywords from the match
const filenameWords = extractMeaningfulWords(filenameMatch);
if (filenameWords.length === 0) {
return { filenameKeywords: [], contentKeywords: [], isFilenameOnly: false };
}
// Check if there are content-related constraints in the rest of the query
// e.g., "найди файлы с именем протокол в которых встречается слово двигатель"
const contentPatterns = [
/(?:в\s+котор[а-яё]*|где|that|which|where)\s+(?:есть|содержит|встречается|упоминается|conta(?:in|ys)|mention|include|has|have)\s+(?:(?:слово|word)\s+)?(.+?)$/iu,
/(?:содерж(?:ат|ит|ат|ащи)|в\s+котор[а-яё]*\s+(?:встречается|упоминается)|in\s+which|that\s+conta)\s+(.+?)$/iu,
];
let contentMatch: string | null = null;
for (const pattern of contentPatterns) {
const match = normalizedQuery.match(pattern);
if (match && match[1]) {
contentMatch = match[1].trim();
break;
}
}
const contentKeywords = contentMatch
? extractMeaningfulWords(contentMatch)
: [];
// Determine if it's a filename-only listing query
const isFilenameOnly = contentKeywords.length === 0;
console.info(
`[BigRAG] Filename search intent detected: filenameKeywords=[${filenameWords.join(", ")}], ` +
`contentKeywords=[${contentKeywords.join(", ")}], isFilenameOnly=${isFilenameOnly}`,
);
return {
filenameKeywords: filenameWords,
contentKeywords,
isFilenameOnly,
};
}
/**
* Extract meaningful words from text, removing common stop words (Russian + English).
*/
function extractMeaningfulWords(text: string): string[] {
const stopWords = new Set([
// Russian stop words
"и", "в", "на", "с", "что", "это", "как", "не", "но", "он", "она", "они",
"мы", "вы", "тут", "там", "где", "когда", "зачем", "почему", "все", "который",
"которая", "которое", "которые", "тот", "та", "то", "те", "этот", "эта", "это",
"эти", "мой", "моя", "моё", "мои", "ваш", "ваша", "ваше", "ваши", "наш", "наша",
"наше", "наши", "свой", "своя", "своё", "свои", "его", "её", "их", "ему", "ей",
"им", "ним", "ней", "ними", "а", "или", "ли", "же", "бы", "уже", "ещё", "еще",
"тоже", "также", "только", "даже", "уже", "ещё", "если", "чтобы", "потому",
"поэтому", "тогда", "так", "тут", "вот", "там", "здесь", "где", "куда", "откуда",
"какой", "какая", "какое", "какие", "чей", "чья", "чьё", "чьи", "сколько",
"несколько", "много", "мало", "каждый", "любой", "другой", "сам", "сама", "само",
"сами", "самый", "один", "одна", "одно", "одни", "два", "две", "три",
"файл", "файла", "файлу", "файлом", "файлов", "файлы", "файлов",
"найди", "найти", "покажи", "показать", "перечисли", "перечислить",
"список", "имя", "имени", "имем", "название", "названия", "названию",
"названием", "который", "которая", "которое", "которые",
"слово", "слова", "слову", "словом", "словах",
"встречается", "встречаются", "упоминается", "упоминаются",
"содержит", "содержат", "содержащий", "содержащая", "содержащее", "содержащие",
"есть", "имеет", "имеют",
// Russian action verbs (common in queries but not filename material)
"выведи", "вывести", "покажи", "показать", "дай", "дать", "предоставь", "предоставить",
"напиши", "написать", "расскажи", "рассказать", "объясни", "объяснить",
"перечисли", "перечислить", "укажи", "указать", "назови", "назвать",
"прочитай", "прочитать", "прочти", "прочесть",
"открой", "открыть", "загрузи", "загрузить", "скачай", "скачать",
"скопируй", "скопировать", "вставь", "вставить",
"выдай", "выдать", "выбери", "выбрать", "найди", "найти",
"получи", "получить", "создай", "создать", "удали", "удалить",
"измени", "изменить", "обнови", "обновить", "проверь", "проверить",
"сравни", "сравнить", "проанализируй", "проанализировать",
// Russian query filler words
"полностью", "полный", "полного", "полное", "полной",
"весь", "всего", "всей", "всё", "все", "всем", "всеми",
"какой", "какая", "какое", "какие", "каков", "какова",
"него", "неё", "них", "него", "ней", "ними",
"который", "которая", "которое", "которые",
"через", "между", "перед", "после", "под", "над", "около",
"среди", "вокруг", "вдоль", "близ", "без", "кроме",
"из", "от", "до", "для", "ради", "благодаря",
"текст", "текста", "тексту", "текстом", "тексте",
"содержимое", "содержания", "содержание",
"документ", "документа", "документу", "документом", "документе",
"документы", "документов", "документам",
"информация", "сведения", "данные", "данных",
"можешь", "можно", "нужно", "необходимо", "хочу", "хочешь",
"пожалуйста", "спасибо", "ок", "окей",
// English stop words
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "must", "shall", "can", "need", "dare",
"to", "of", "in", "for", "on", "with", "at", "by", "from", "as",
"into", "through", "during", "before", "after", "above", "below",
"between", "out", "off", "over", "under", "again", "further", "then",
"once", "here", "there", "when", "where", "why", "how", "all", "each",
"every", "both", "few", "more", "most", "other", "some", "such", "no",
"not", "only", "own", "same", "so", "than", "too", "very",
"just", "because", "but", "and", "or", "if", "while", "about",
"find", "show", "list", "file", "files", "named", "called", "name",
"names", "whose", "which", "that", "this", "these", "those",
"contain", "contains", "including", "include", "includes",
"mention", "mentions", "mentioned", "have", "has", "word", "words",
]);
// Split on non-word characters (supports Unicode)
const words = text.split(/[^\p{L}\p{N}]+/u).filter(w => w.length > 1);
return words.filter(w => !stopWords.has(w.toLowerCase()));
}
/**
* Detect if the user's query indicates they want to see the content of found files,
* not just list their names. Checked only when filename search intent is detected.
*/
export function hasContentDisplayIntent(query: string): boolean {
const patterns = [
// Russian: explicit content display verbs
/(?:^|[\s(])выведи(?:$|[\s,;:.!?)])/iu,
/(?:^|[\s(])вывести(?:$|[\s,;:.!?)])/iu,
/(?:^|[\s(])прочитай(?:$|[\s,;:.!?)])/iu,
/(?:^|[\s(])прочти(?:$|[\s,;:.!?)])/iu,
/(?:^|[\s(])прочесть(?:$|[\s,;:.!?)])/iu,
// Russian: "полностью" / "целиком" — strong signal for full content
/(?:^|[\s(])полностью(?:$|[\s,;:.!?)])/iu,
/(?:^|[\s(])целиком(?:$|[\s,;:.!?)])/iu,
// Russian: content-related nouns
/(?:^|[\s(])содержани[ея](?:$|[\s,;:.!?)])/iu,
/(?:^|[\s(])содержимое(?:$|[\s,;:.!?)])/iu,
// Russian: "весь текст" / "всё содержимое"
/(?:^|[\s(])весь\s+текст(?:$|[\s,;:.!?)])/iu,
/(?:^|[\s(])вс[ёе]\s+содерж/iu,
// Russian: "что внутри" / "что в нём" / "что в файле"
/(?:^|[\s(])что\s+(?:внутри|в\s+н[ёе]м|в\s+(?:этом|том|файл|документ))/iu,
// English
/\bdisplay\b/iu,
/\bread\s+(?:the\s+)?(?:file|document|content|it|full|entire)/iu,
/\bshow\s+(?:the\s+)?(?:content|text|full|entire|whole)/iu,
/\bfull\s+(?:content|text)/iu,
/\bentire\s+(?:content|text|file|document)/iu,
/\bwhole\s+(?:content|text|file|document)/iu,
/\bwhat(?:'s| is)\s+(?:inside|in)/iu,
];
return patterns.some(p => p.test(query));
}
/**
* Localized prompt strings for RAG context injection.
* All inline instructions sent to the model are defined here
* so they can be switched between Russian and English.
*/
interface PromptStrings {
/** Header above retrieved passages in standard vector search */
passagesFound: string;
/** Citation label: "Citation N (from file, score: X):" or "Цитата N (из файла, релевантность: X):" */
citationLabel: (n: number, fileName: string, score: string) => string;
/** "User Query:" label */
userQueryLabel: string;
/** File content request header: user asked to find and display file content */
fileContentRequest: (keywords: string, fileCount: number) => string;
/** Instruction to present file content to user */
fileContentPresent: string;
/** File content unavailable (files found but no indexed content) */
fileContentUnavailable: (keywords: string, fileCount: number) => string;
/** Instruction to inform user about unavailable content */
fileInfoUnavailable: string;
/** Filename-only search header */
fileNameSearch: (keywords: string, fileCount: number) => string;
/** Filename-only search listing instruction */
fileNameListInstruction: string;
/** File about search (found files but no content passages) */
fileAboutSearch: (keywords: string, fileCount: number) => string;
/** Instruction for file about search */
fileAboutListInstruction: string;
/** No results message */
noResultsPrefix: string;
/** No results instruction */
noResultsInstruction: string;
/** "Files matched by name" header */
filesMatchedByName: (count: number) => string;
}
function getPromptStrings(lang: ResponseLanguage): PromptStrings {
if (lang === "ru") {
return {
passagesFound: "Следующие фрагменты найдены в проиндексированных документах:\n\n",
citationLabel: (n, fileName, score) =>
`Цитата ${n} (из ${fileName}, релевантность: ${score}): `,
userQueryLabel: "Запрос пользователя:",
fileContentRequest: (keywords, fileCount) =>
`Пользователь просит найти и показать содержимое файлов, соответствующих: "${keywords}".\n` +
`Найдено ${fileCount} файл(ов). Ниже приведено проиндексированное содержимое каждого файла (фрагменты расположены по порядку в документе).\n\n`,
fileContentPresent:
`Представь содержимое файла(ов) выше пользователю. ` +
`Если содержимое выглядит как полный документ, отобрази его в читаемом формате. ` +
`Ответь на том же языке, на котором задан запрос пользователя.`,
fileContentUnavailable: (keywords, fileCount) =>
`Пользователь просит найти и показать содержимое файлов, соответствующих: "${keywords}".\n` +
`Найдено ${fileCount} файл(ов), но проиндексированное содержимое для них недоступно.\n\n`,
fileInfoUnavailable:
`Сообщи пользователю, что файлы найдены, но их содержимое недоступно в индексе.`,
fileNameSearch: (keywords, fileCount) =>
`Пользователь просит найти файлы по имени, соответствующие: "${keywords}".\n` +
`Найдено ${fileCount} файл(ов) в проиндексированных документах:\n\n`,
fileNameListInstruction:
`\n\nПеречисли файлы выше для пользователя. Если список пуст, сообщи, что файлы с таким именем не найдены.`,
fileAboutSearch: (keywords, fileCount) =>
`Пользователь спрашивает о файлах, соответствующих: "${keywords}".\n` +
`Найдено ${fileCount} файл(ов), но релевантные фрагменты содержимого не найдены.\n\n`,
fileAboutListInstruction:
`\n\nПеречисли найденные файлы пользователю и упомяни, что релевантные фрагменты содержимого не найдены.`,
noResultsPrefix:
`Важно: Релевантный контент не найден в проиндексированных документах по запросу пользователя. `,
noResultsInstruction:
`В одном предложении сообщи пользователю об этом. Затем ответь на запрос самостоятельно.`,
filesMatchedByName: (count) =>
`Файлы, найденные по имени (${count}): `,
};
}
// English (default)
return {
passagesFound: "The following passages were found in your indexed documents:\n\n",
citationLabel: (n, fileName, score) =>
`Citation ${n} (from ${fileName}, score: ${score}): `,
userQueryLabel: "User Query:",
fileContentRequest: (keywords, fileCount) =>
`The user asked to find and display the content of files matching: "${keywords}".\n` +
`Found ${fileCount} matching file(s). Below is the indexed content of each file (chunks are ordered by position in the document).\n\n`,
fileContentPresent:
`Present the content of the file(s) above to the user. ` +
`If the content appears to be a complete document, display it in a readable format. ` +
`Respond in the same language as the user's query.`,
fileContentUnavailable: (keywords, fileCount) =>
`The user asked to find and display content of files matching: "${keywords}".\n` +
`Found ${fileCount} matching file(s), but no indexed content is available for them.\n\n`,
fileInfoUnavailable:
`Inform the user that the files were found but their content is not available in the index.`,
fileNameSearch: (keywords, fileCount) =>
`The user asked to find files by name matching: "${keywords}".\n` +
`Found ${fileCount} matching file(s) in the indexed documents:\n\n`,
fileNameListInstruction:
`\n\nList the files above for the user. If the list is empty, inform the user that no files with that name were found.`,
fileAboutSearch: (keywords, fileCount) =>
`The user asked about files matching: "${keywords}".\n` +
`Found ${fileCount} matching file(s), but no relevant content passages within them.\n\n`,
fileAboutListInstruction:
`\n\nList the matching files for the user and mention that no specific content passages were found.`,
noResultsPrefix:
`Important: No relevant content was found in the indexed documents for the user query. `,
noResultsInstruction:
`In less than one sentence, inform the user of this. Then respond to the query to the best of your ability.`,
filesMatchedByName: (count) =>
`Files matched by name (${count}): `,
};
}
// Global state for vector store (persists across requests)
let vectorStore: VectorStore | null = null;
let lastIndexedDir = "";
let sanityChecksPassed = false;
const RAG_CONTEXT_MACRO = "{{rag_context}}";
const USER_QUERY_MACRO = "{{user_query}}";
function normalizePromptTemplate(
template: string | null | undefined,
fallbackQueryLabel?: string,
lang?: ResponseLanguage,
): string {
const hasContent = typeof template === "string" && template.trim().length > 0;
const fallbackTemplate = lang ? getDefaultPromptTemplate(lang) : DEFAULT_PROMPT_TEMPLATE;
let normalized = hasContent ? template! : fallbackTemplate;
if (!normalized.includes(RAG_CONTEXT_MACRO)) {
console.warn(
`[BigRAG] Prompt template missing ${RAG_CONTEXT_MACRO}. Prepending RAG context block.`,
);
normalized = `${RAG_CONTEXT_MACRO}\n\n${normalized}`;
}
if (!normalized.includes(USER_QUERY_MACRO)) {
console.warn(
`[BigRAG] Prompt template missing ${USER_QUERY_MACRO}. Appending user query block.`,
);
const queryLabel = fallbackQueryLabel ?? "User Query:";
normalized = `${normalized}\n\n${queryLabel}\n\n${USER_QUERY_MACRO}`;
}
return normalized;
}
function fillPromptTemplate(template: string, replacements: Record<string, string>): string {
return Object.entries(replacements).reduce(
(acc, [token, value]) => acc.split(token).join(value),
template,
);
}
async function warnIfContextOverflow(
ctl: PromptPreprocessorController,
finalPrompt: string,
): Promise<void> {
try {
const tokenSource = await ctl.tokenSource();
if (
!tokenSource ||
!("applyPromptTemplate" in tokenSource) ||
typeof tokenSource.applyPromptTemplate !== "function" ||
!("countTokens" in tokenSource) ||
typeof tokenSource.countTokens !== "function" ||
!("getContextLength" in tokenSource) ||
typeof tokenSource.getContextLength !== "function"
) {
console.warn("[BigRAG] Token source does not expose prompt utilities; skipping context check.");
return;
}
const [contextLength, history] = await Promise.all([
tokenSource.getContextLength(),
ctl.pullHistory(),
]);
const historyWithLatestMessage = history.withAppended({
role: "user",
content: finalPrompt,
});
const formattedPrompt = await tokenSource.applyPromptTemplate(historyWithLatestMessage);
const promptTokens = await tokenSource.countTokens(formattedPrompt);
if (promptTokens > contextLength) {
const warningSummary =
`⚠️ Prompt needs ${promptTokens.toLocaleString()} tokens but model max is ${contextLength.toLocaleString()}.`;
console.warn("[BigRAG]", warningSummary);
ctl.createStatus({
status: "error",
text: `${warningSummary} Reduce retrieved passages or increase the model's context length.`,
});
try {
await ctl.client.system.notify({
title: "Context window exceeded",
description: `${warningSummary} Prompt may be truncated or rejected.`,
noAutoDismiss: true,
});
} catch (notifyError) {
console.warn("[BigRAG] Unable to send context overflow notification:", notifyError);
}
}
} catch (error) {
console.warn("[BigRAG] Failed to evaluate context usage:", error);
}
}
/**
* Main prompt preprocessor function
*/
export async function preprocess(
ctl: PromptPreprocessorController,
userMessage: ChatMessage,
): Promise<ChatMessage | string> {
const userPrompt = userMessage.getText();
const pluginConfig = ctl.getPluginConfig(configSchematics);
// Get configuration
const documentsDir = pluginConfig.get("documentsDirectory");
const vectorStoreDir = pluginConfig.get("vectorStoreDirectory");
const retrievalLimit = pluginConfig.get("retrievalLimit");
const retrievalThreshold = pluginConfig.get("retrievalAffinityThreshold");
const chunkSize = pluginConfig.get("chunkSize");
const chunkOverlap = pluginConfig.get("chunkOverlap");
const maxConcurrent = pluginConfig.get("maxConcurrentFiles");
const enableOCR = pluginConfig.get("enableOCR");
const ocrSettings: OcrSettings = {
language: pluginConfig.get("ocrLanguage") || "eng+rus",
dataPath: pluginConfig.get("ocrDataPath") || "",
pageSegMode: pluginConfig.get("ocrPageSegMode") ?? 3,
minTextLength: pluginConfig.get("ocrMinTextLength") ?? 20,
maxPages: pluginConfig.get("ocrMaxPages") ?? 200,
maxImagesPerPage: pluginConfig.get("ocrMaxImagesPerPage") ?? 10,
minImageArea: pluginConfig.get("ocrMinImageArea") ?? 2500,
maxImagePixels: pluginConfig.get("ocrMaxImagePixels") ?? 100_000_000,
imageTimeoutMs: pluginConfig.get("ocrImageTimeoutMs") ?? 60_000,
};
const embeddingModelId = pluginConfig.get("embeddingModel") || "gpustack/text-embedding-bge-m3";
const skipPreviouslyIndexed = pluginConfig.get("manualReindex.skipPreviouslyIndexed");
const parseDelayMs = pluginConfig.get("parseDelayMs") ?? 0;
const fileTypeFilter = {
indexHTML: pluginConfig.get("indexHTML") ?? true,
indexPDF: pluginConfig.get("indexPDF") ?? true,
indexEPUB: pluginConfig.get("indexEPUB") ?? true,
indexText: pluginConfig.get("indexText") ?? true,
indexDocx: pluginConfig.get("indexDocx") ?? true,
indexXlsx: pluginConfig.get("indexXlsx") ?? true,
indexPptx: pluginConfig.get("indexPptx") ?? true,
indexImages: pluginConfig.get("indexImages") ?? true,
};
const reindexRequested = pluginConfig.get("manualReindex.trigger");
const enableFilenameSearch = pluginConfig.get("enableFilenameSearch") ?? true;
const responseLanguage: ResponseLanguage =
(pluginConfig.get("responseLanguage") as string) === "en" ? "en" : "ru";
const promptStrings = getPromptStrings(responseLanguage);
// Validate configuration
if (!documentsDir || documentsDir === "") {
console.warn("[BigRAG] Documents directory not configured. Please set it in plugin settings.");
return userMessage;
}
if (!vectorStoreDir || vectorStoreDir === "") {
console.warn("[BigRAG] Vector store directory not configured. Please set it in plugin settings.");
return userMessage;
}
try {
// Perform sanity checks on first run
if (!sanityChecksPassed) {
const checkStatus = ctl.createStatus({
status: "loading",
text: "Performing sanity checks...",
});
const sanityResult = await performSanityChecks(documentsDir, vectorStoreDir);
// Log warnings
for (const warning of sanityResult.warnings) {
console.warn("[BigRAG]", warning);
}
// Log errors and abort if critical
if (!sanityResult.passed) {
for (const error of sanityResult.errors) {
console.error("[BigRAG]", error);
}
const failureReason =
sanityResult.errors[0] ??
sanityResult.warnings[0] ??
"Unknown reason. Please review plugin settings.";
checkStatus.setState({
status: "canceled",
text: `Sanity checks failed: ${failureReason}`,
});
return userMessage;
}
checkStatus.setState({
status: "done",
text: "Sanity checks passed",
});
sanityChecksPassed = true;
}
checkAbort(ctl.abortSignal);
// Initialize vector store if needed
if (!vectorStore || lastIndexedDir !== vectorStoreDir) {
const status = ctl.createStatus({
status: "loading",
text: "Initializing vector store...",
});
vectorStore = new VectorStore(vectorStoreDir);
await vectorStore.initialize();
console.info(
`[BigRAG] Vector store ready (path=${vectorStoreDir}). Waiting for queries...`,
);
lastIndexedDir = vectorStoreDir;
status.setState({
status: "done",
text: "Vector store initialized",
});
}
checkAbort(ctl.abortSignal);
await maybeHandleConfigTriggeredReindex({
ctl,
documentsDir,
vectorStoreDir,
chunkSize,
chunkOverlap,
maxConcurrent,
enableOCR,
ocrSettings,
embeddingModelId,
parseDelayMs,
fileTypeFilter,
reindexRequested,
skipPreviouslyIndexed: pluginConfig.get("manualReindex.skipPreviouslyIndexed"),
});
checkAbort(ctl.abortSignal);
// Check if we need to index
const stats = await vectorStore.getStats();
console.debug(`[BigRAG] Vector store stats before auto-index check: totalChunks=${stats.totalChunks}, uniqueFiles=${stats.uniqueFiles}`);
if (stats.totalChunks === 0) {
if (!tryStartIndexing("auto-trigger")) {
console.warn("[BigRAG] Indexing already running, skipping automatic indexing.");
} else {
const indexStatus = ctl.createStatus({
status: "loading",
text: "Starting initial indexing...",
});
try {
const { indexingResult } = await runIndexingJob({
client: ctl.client,
abortSignal: ctl.abortSignal,
documentsDir,
vectorStoreDir,
chunkSize,
chunkOverlap,
maxConcurrent,
enableOCR,
ocrSettings,
embeddingModelId,
autoReindex: false,
parseDelayMs,
fileTypeFilter,
vectorStore,
forceReindex: true,
onProgress: (progress) => {
if (progress.status === "scanning") {
indexStatus.setState({
status: "loading",
text: `Scanning: ${progress.currentFile}`,
});
} else if (progress.status === "indexing") {
const success = progress.successfulFiles ?? 0;
const failed = progress.failedFiles ?? 0;
const skipped = progress.skippedFiles ?? 0;
indexStatus.setState({
status: "loading",
text: `Indexing: ${progress.processedFiles}/${progress.totalFiles} files ` +
`(success=${success}, failed=${failed}, skipped=${skipped}) ` +
`(${progress.currentFile})`,
});
} else if (progress.status === "complete") {
indexStatus.setState({
status: "done",
text: `Indexing complete: ${progress.processedFiles} files processed`,
});
} else if (progress.status === "error") {
indexStatus.setState({
status: "canceled",
text: `Indexing error: ${progress.error}`,
});
}
},
});
console.log(`[BigRAG] Indexing complete: ${indexingResult.successfulFiles}/${indexingResult.totalFiles} files successfully indexed (${indexingResult.failedFiles} failed)`);
} catch (error) {
indexStatus.setState({
status: "canceled",
text: `Indexing failed: ${error instanceof Error ? error.message : String(error)}`,
});
console.error("[BigRAG] Indexing failed:", error);
} finally {
finishIndexing();
}
}
}
checkAbort(ctl.abortSignal);
// Log manual reindex toggle states for visibility on each chat
const toggleStatusText =
`Manual Reindex Trigger: ${reindexRequested ? "ON" : "OFF"} | ` +
`Skip Previously Indexed: ${skipPreviouslyIndexed ? "ON" : "OFF"}`;
console.info(`[BigRAG] ${toggleStatusText}`);
ctl.createStatus({
status: "done",
text: toggleStatusText,
});
// Perform retrieval
const retrievalStatus = ctl.createStatus({
status: "loading",
text: "Loading embedding model for retrieval...",
});
const embeddingModel = await ctl.client.embedding.model(
embeddingModelId,
{ signal: ctl.abortSignal }
);
checkAbort(ctl.abortSignal);
retrievalStatus.setState({
status: "loading",
text: "Searching for relevant content...",
});
// Embed the query
const queryEmbeddingResult = await embeddingModel.embed(userPrompt);
checkAbort(ctl.abortSignal);
const queryEmbedding = queryEmbeddingResult.embedding;
// ─── Filename search ──────────────────────────────────────────────
// Check if the user is asking to search by filename
const filenameIntent = enableFilenameSearch
? extractFilenameSearchIntent(userPrompt)
: { filenameKeywords: [] as string[], contentKeywords: [] as string[], isFilenameOnly: false };
let filenameResults: FilenameSearchResult[] = [];
if (filenameIntent.filenameKeywords.length > 0) {
retrievalStatus.setState({
status: "loading",
text: `Searching files by name: [${filenameIntent.filenameKeywords.join(", ")}]...`,
});
filenameResults = await vectorStore.searchByFilenames(filenameIntent.filenameKeywords);
checkAbort(ctl.abortSignal);
console.info(
`[BigRAG] Filename search found ${filenameResults.length} files matching [${filenameIntent.filenameKeywords.join(", ")}]`,
);
}
// ─── Filename-only listing ─────────────────────────────────────────
if (filenameIntent.isFilenameOnly && filenameResults.length > 0) {
// Check if the user also wants to see the file content (not just list names)
const wantsContent = hasContentDisplayIntent(userPrompt);
console.info(
`[BigRAG] Filename-only search. wantsContent=${wantsContent} for query: "${userPrompt.slice(0, 120)}"`,
);
if (wantsContent) {
// Retrieve ALL chunks from matched files and include in context
const matchingPaths = filenameResults.map((fr) => fr.filePath);
retrievalStatus.setState({
status: "loading",
text: `Retrieving content from ${matchingPaths.length} matched file(s)...`,
});
const fileChunks = await vectorStore.getChunksForFiles(
matchingPaths,
retrievalLimit, // max chunks per file
);
checkAbort(ctl.abortSignal);
if (fileChunks.length > 0) {
retrievalStatus.setState({
status: "done",
text: `Retrieved ${fileChunks.length} passages from ${matchingPaths.length} file(s)`,
});
// Build context with all file content
const kwJoined = filenameIntent.filenameKeywords.join(", ");
let contentBlock = promptStrings.fileContentRequest(kwJoined, filenameResults.length);
// Group chunks by file
const chunksByFile = new Map<string, typeof fileChunks>();
for (const chunk of fileChunks) {
let arr = chunksByFile.get(chunk.filePath);
if (!arr) {
arr = [];
chunksByFile.set(chunk.filePath, arr);
}
arr.push(chunk);
}
let fileIdx = 0;
for (const [filePath, chunks] of chunksByFile) {
fileIdx++;
const baseName = path.basename(filePath);
// Sort by chunkIndex for correct reading order
chunks.sort((a, b) => a.chunkIndex - b.chunkIndex);
contentBlock += `--- File ${fileIdx}: ${baseName} (${chunks.length} chunk(s)) ---\n\n`;
for (const chunk of chunks) {
contentBlock += `${chunk.text}\n\n`;
}
}
contentBlock += `\n${promptStrings.fileContentPresent}`;
console.info(
`[BigRAG] Filename content retrieval: ${fileChunks.length} chunks from ${chunksByFile.size} file(s)`,
);
// Log chunks for debugging
const chunkLogEntries = fileChunks.map((c) =>
` file=${path.basename(c.filePath)} chunk=${c.chunkIndex} len=${c.text.length}`
);
console.info(`[BigRAG] Retrieved chunks:\n${chunkLogEntries.join("\n")}`);
const promptTemplate = normalizePromptTemplate(
pluginConfig.get("promptTemplate"),
promptStrings.userQueryLabel,
responseLanguage,
);
return fillPromptTemplate(promptTemplate, {
[RAG_CONTEXT_MACRO]: contentBlock.trimEnd(),
[USER_QUERY_MACRO]: userPrompt,
});
} else {
// No chunks found (shouldn't happen if files are indexed, but handle gracefully)
retrievalStatus.setState({
status: "done",
text: `Found ${matchingPaths.length} file(s) but no indexed content`,
});
const fileListContext = filenameResults.map((fr, idx) => {
const baseName = path.basename(fr.filePath);
const dirName = path.dirname(fr.filePath);
return `${idx + 1}. ${baseName} — path: ${dirName}`;
}).join("\n");
const kwJoined2 = filenameIntent.filenameKeywords.join(", ");
return (
promptStrings.fileContentUnavailable(kwJoined2, filenameResults.length) +
`Matching files:\n${fileListContext}\n\n` +
`${promptStrings.fileInfoUnavailable}\n\n` +
`${promptStrings.userQueryLabel}\n\n${userPrompt}`
);
}
}
// ── Filename listing only (no content display) ──
retrievalStatus.setState({
status: "done",
text: `Found ${filenameResults.length} files by name`,
});
const fileListEntries = filenameResults.map((fr, idx) => {
return `#${idx + 1} ${path.basename(fr.filePath)} (${fr.matchedKeywords.join(", ")})`;
});
const fileListLog = fileListEntries.join("\n");
console.info(`[BigRAG] Filename search results:\n${fileListLog}`);
ctl.createStatus({
status: "done",
text: `Filename search results (${filenameResults.length}):`,
});
for (const entry of fileListEntries) {
ctl.createStatus({ status: "done", text: entry });
}
// Build context: list of matching files
const fileListContext = filenameResults.map((fr, idx) => {
const baseName = path.basename(fr.filePath);
const dirName = path.dirname(fr.filePath);
return `${idx + 1}. ${baseName} — path: ${dirName}`;
}).join("\n");
const kwJoined3 = filenameIntent.filenameKeywords.join(", ");
const filenameContextBlock =
promptStrings.fileNameSearch(kwJoined3, filenameResults.length) +
fileListContext +
promptStrings.fileNameListInstruction +
`\n\n${promptStrings.userQueryLabel}\n\n${userPrompt}`;
return filenameContextBlock;
}
// ─── Filename + content search ────────────────────────────────────
// If filename search found files AND there are content keywords,
// search content within those specific files
let filenameContentResults: typeof results = [];
if (filenameIntent.filenameKeywords.length > 0 && !filenameIntent.isFilenameOnly && filenameResults.length > 0) {
const matchingPaths = filenameResults.map((fr) => fr.filePath);
retrievalStatus.setState({
status: "loading",
text: `Searching content within ${matchingPaths.length} matched files...`,
});
if (filenameIntent.contentKeywords.length > 0) {
// Embed content keywords for vector search within files
const contentQuery = filenameIntent.contentKeywords.join(" ");
const contentEmbeddingResult = await embeddingModel.embed(contentQuery);
checkAbort(ctl.abortSignal);
const contentEmbedding = contentEmbeddingResult.embedding;
filenameContentResults = await vectorStore.searchInFiles(
contentEmbedding,
matchingPaths,
retrievalLimit,
retrievalThreshold * 0.6, // Lower threshold for within-file search
);
} else {
// No content keywords — just get first chunks from each file
filenameContentResults = await vectorStore.getChunksForFiles(
matchingPaths,
Math.max(1, Math.floor(retrievalLimit / Math.max(matchingPaths.length, 1))),
);
}
console.info(
`[BigRAG] Content search within filename-matched files returned ${filenameContentResults.length} results`,
);
}
// ─── Standard vector search ───────────────────────────────────────
const queryPreview =
userPrompt.length > 160 ? `${userPrompt.slice(0, 160)}...` : userPrompt;
console.info(
`[BigRAG] Executing vector search for "${queryPreview}" (limit=${retrievalLimit}, threshold=${retrievalThreshold})`,
);
let results = await vectorStore.search(
queryEmbedding,
retrievalLimit,
retrievalThreshold
);
checkAbort(ctl.abortSignal);
// ─── Fallback: retry with simplified query if no results ──────────
if (results.length === 0) {
const meaningfulWords = extractMeaningfulWords(userPrompt);
console.info(
`[BigRAG] Initial search returned 0. Extracting meaningful words: [${meaningfulWords.join(", ")}]`,
);
// Attempt 1: embed meaningful words only and retry with same threshold
if (meaningfulWords.length > 0) {
const simplifiedQuery = meaningfulWords.join(" ");
console.info(`[BigRAG] Retrying with simplified query: "${simplifiedQuery}"`);
const fallbackEmbeddingResult = await embeddingModel.embed(simplifiedQuery);
checkAbort(ctl.abortSignal);
const fallbackEmbedding = fallbackEmbeddingResult.embedding;
results = await vectorStore.search(
fallbackEmbedding,
retrievalLimit,
retrievalThreshold,
);
checkAbort(ctl.abortSignal);
if (results.length > 0) {
console.info(
`[BigRAG] Simplified query returned ${results.length} results.`,
);
}
}
// Attempt 2: if still 0, retry with lowered threshold (0.3)
if (results.length === 0) {
const loweredThreshold = Math.min(retrievalThreshold * 0.6, 0.3);
console.info(
`[BigRAG] Retrying with lowered threshold=${loweredThreshold.toFixed(3)}`,
);
results = await vectorStore.search(
queryEmbedding,
retrievalLimit,
loweredThreshold,
);
checkAbort(ctl.abortSignal);
if (results.length > 0) {
console.info(
`[BigRAG] Lowered threshold returned ${results.length} results.`,
);
}
}
// Attempt 3: simplified query + lowered threshold
if (results.length === 0 && meaningfulWords.length > 0) {
const simplifiedQuery = meaningfulWords.join(" ");
const loweredThreshold = Math.min(retrievalThreshold * 0.6, 0.3);
const fallbackEmbeddingResult = await embeddingModel.embed(simplifiedQuery);
checkAbort(ctl.abortSignal);
const fallbackEmbedding = fallbackEmbeddingResult.embedding;
results = await vectorStore.search(
fallbackEmbedding,
retrievalLimit,
loweredThreshold,
);
checkAbort(ctl.abortSignal);
if (results.length > 0) {
console.info(
`[BigRAG] Simplified query + lowered threshold returned ${results.length} results.`,
);
}
}
}
// ─── Merge results ────────────────────────────────────────────────
// Merge filename content results with regular vector search results,
// deduplicating by (filePath + chunkIndex)
if (filenameContentResults.length > 0) {
const seen = new Set<string>();
for (const r of results) {
seen.add(`${r.filePath}:${r.chunkIndex}`);
}
for (const r of filenameContentResults) {
const key = `${r.filePath}:${r.chunkIndex}`;
if (!seen.has(key)) {
results.push(r);
seen.add(key);
}
}
// Re-sort by score
results.sort((a, b) => b.score - a.score);
results = results.slice(0, retrievalLimit);
}
if (results.length > 0) {
const topHit = results[0];
console.info(
`[BigRAG] Vector search returned ${results.length} results. Top hit: file=${topHit.fileName} score=${topHit.score.toFixed(3)}`,
);
const docSummaries = results
.map(
(result, idx) =>
`#${idx + 1} file=${path.basename(result.filePath)} shard=${result.shardName} score=${result.score.toFixed(3)}`,
)
.join("\n");
console.info(`[BigRAG] Relevant documents:\n${docSummaries}`);
} else {
console.warn("[BigRAG] All search attempts (including fallbacks) returned 0 results.");
}
if (results.length === 0) {
// If we found files by name but no content, return the file listing
if (filenameResults.length > 0) {
retrievalStatus.setState({
status: "done",
text: `Found ${filenameResults.length} files by name (no matching content)`,
});
const fileListContext = filenameResults.map((fr, idx) => {
const baseName = path.basename(fr.filePath);
const dirName = path.dirname(fr.filePath);
return `${idx + 1}. ${baseName} — path: ${dirName}`;
}).join("\n");
const kwJoined4 = filenameIntent.filenameKeywords.join(", ");
const filenameContextBlock =
promptStrings.fileAboutSearch(kwJoined4, filenameResults.length) +
`Matching files:\n` + fileListContext +
promptStrings.fileAboutListInstruction +
`\n\n${promptStrings.userQueryLabel}\n\n${userPrompt}`;
return filenameContextBlock;
}
retrievalStatus.setState({
status: "canceled",
text: "No relevant content found in indexed documents",
});
const noteAboutNoResults =
promptStrings.noResultsPrefix +
promptStrings.noResultsInstruction;
return noteAboutNoResults + `\n\n${promptStrings.userQueryLabel}\n\n${userPrompt}`;
}
// Format results
retrievalStatus.setState({
status: "done",
text: `Retrieved ${results.length} relevant passages`,
});
ctl.debug("Retrieval results:", results);
let ragContextFull = "";
let ragContextPreview = "";
ragContextFull += promptStrings.passagesFound;
ragContextPreview += promptStrings.passagesFound;
// Add filename search results header if applicable
if (filenameResults.length > 0 && !filenameIntent.isFilenameOnly) {
const fileMatchInfo =
promptStrings.filesMatchedByName(filenameResults.length) +
filenameResults.map((fr) => path.basename(fr.filePath)).join(", ") +
"\n\n";
ragContextFull += fileMatchInfo;
ragContextPreview += fileMatchInfo;
}
let citationNumber = 1;
for (const result of results) {
const fileName = path.basename(result.filePath);
const citationLabel = promptStrings.citationLabel(citationNumber, fileName, result.score.toFixed(3));
ragContextFull += `\n${citationLabel}"${result.text}"\n\n`;
ragContextPreview += `\n${citationLabel}"${summarizeText(result.text)}"\n\n`;
citationNumber++;
}
const promptTemplate = normalizePromptTemplate(
pluginConfig.get("promptTemplate"),
promptStrings.userQueryLabel,
responseLanguage,
);
const finalPrompt = fillPromptTemplate(promptTemplate, {
[RAG_CONTEXT_MACRO]: ragContextFull.trimEnd(),
[USER_QUERY_MACRO]: userPrompt,
});
const finalPromptPreview = fillPromptTemplate(promptTemplate, {
[RAG_CONTEXT_MACRO]: ragContextPreview.trimEnd(),
[USER_QUERY_MACRO]: userPrompt,
});
ctl.debug("Processed content (preview):", finalPromptPreview);
const passagesLogEntries = results.map((result, idx) => {
const fileName = path.basename(result.filePath);
return `#${idx + 1} file=${fileName} shard=${result.shardName} score=${result.score.toFixed(3)}\n${summarizeText(result.text)}`;
});
const passagesLog = passagesLogEntries.join("\n\n");
console.info(`[BigRAG] RAG passages (${results.length}) preview:\n${passagesLog}`);
ctl.createStatus({
status: "done",
text: `RAG passages (${results.length}):`,
});
for (const entry of passagesLogEntries) {
ctl.createStatus({
status: "done",
text: entry,
});
}
console.info(`[BigRAG] Final prompt sent to model (preview):\n${finalPromptPreview}`);
ctl.createStatus({
status: "done",
text: `Final prompt sent to model (preview):\n${finalPromptPreview}`,
});
await warnIfContextOverflow(ctl, finalPrompt);
return finalPrompt;
} catch (error) {
// IMPORTANT: Re-throw abort errors so LM Studio can stop the preprocessor promptly.
// Swallowing AbortError causes the "did not abort in time" warning.
if (isAbortError(error)) {
throw error;
}
console.error("[PromptPreprocessor] Preprocessing failed.", error);
return userMessage;
}
}
interface ConfigReindexOpts {
ctl: PromptPreprocessorController;
documentsDir: string;
vectorStoreDir: string;
chunkSize: number;
chunkOverlap: number;
maxConcurrent: number;
enableOCR: boolean;
ocrSettings: OcrSettings;
embeddingModelId: string;
parseDelayMs: number;
fileTypeFilter: {
indexHTML: boolean;
indexPDF: boolean;
indexEPUB: boolean;
indexText: boolean;
indexDocx: boolean;
indexXlsx: boolean;
indexPptx: boolean;
indexImages: boolean;
};
reindexRequested: boolean;
skipPreviouslyIndexed: boolean;
}
async function maybeHandleConfigTriggeredReindex({
ctl,
documentsDir,
vectorStoreDir,
chunkSize,
chunkOverlap,
maxConcurrent,
enableOCR,
ocrSettings,
embeddingModelId,
parseDelayMs,
fileTypeFilter,
reindexRequested,
skipPreviouslyIndexed,
}: ConfigReindexOpts) {
if (!reindexRequested) {
return;
}
const reminderText =
`Manual Reindex Trigger is ON. Skip Previously Indexed Files is currently ${skipPreviouslyIndexed ? "ON" : "OFF"}. ` +
"The index will be rebuilt each chat when 'Skip Previously Indexed Files' is OFF. If 'Skip Previously Indexed Files' is ON, the index will only be rebuilt for new or changed files.";
console.info(`[BigRAG] ${reminderText}`);
ctl.createStatus({
status: "done",
text: reminderText,
});
if (!tryStartIndexing("config-trigger")) {
ctl.createStatus({
status: "canceled",
text: "Manual reindex already running. Please wait for it to finish.",
});
return;
}
const status = ctl.createStatus({
status: "loading",
text: "Manual reindex requested from config...",
});
try {
const { indexingResult } = await runIndexingJob({
client: ctl.client,
abortSignal: ctl.abortSignal,
documentsDir,
vectorStoreDir,
chunkSize,
chunkOverlap,
maxConcurrent,
enableOCR,
ocrSettings,
embeddingModelId,
autoReindex: skipPreviouslyIndexed,
parseDelayMs,
fileTypeFilter,
forceReindex: !skipPreviouslyIndexed,
vectorStore: vectorStore ?? undefined,
onProgress: (progress) => {
if (progress.status === "scanning") {
status.setState({
status: "loading",
text: `Scanning: ${progress.currentFile}`,
});
} else if (progress.status === "indexing") {
const success = progress.successfulFiles ?? 0;
const failed = progress.failedFiles ?? 0;
const skipped = progress.skippedFiles ?? 0;
status.setState({
status: "loading",
text: `Indexing: ${progress.processedFiles}/${progress.totalFiles} files ` +
`(success=${success}, failed=${failed}, skipped=${skipped}) ` +
`(${progress.currentFile})`,
});
} else if (progress.status === "complete") {
status.setState({
status: "done",
text: `Indexing complete: ${progress.processedFiles} files processed`,
});
} else if (progress.status === "error") {
status.setState({
status: "canceled",
text: `Indexing error: ${progress.error}`,
});
}
},
});
status.setState({
status: "done",
text: "Manual reindex complete!",
});
const summaryLines = [
`Processed: ${indexingResult.successfulFiles}/${indexingResult.totalFiles}`,
`Failed: ${indexingResult.failedFiles}`,
`Skipped (unchanged): ${indexingResult.skippedFiles}`,
`Updated existing files: ${indexingResult.updatedFiles}`,
`New files added: ${indexingResult.newFiles}`,
];
for (const line of summaryLines) {
ctl.createStatus({
status: "done",
text: line,
});
}
if (indexingResult.totalFiles > 0 && indexingResult.skippedFiles === indexingResult.totalFiles) {
ctl.createStatus({
status: "done",
text: "All files were already up to date (skipped).",
});
}
console.log(
`[BigRAG] Manual reindex summary:\n ${summaryLines.join("\n ")}`,
);
await notifyManualResetNeeded(ctl);
} catch (error) {
status.setState({
status: "error",
text: `Manual reindex failed: ${error instanceof Error ? error.message : String(error)}`,
});
console.error("[BigRAG] Manual reindex failed:", error);
} finally {
finishIndexing();
}
}
async function notifyManualResetNeeded(ctl: PromptPreprocessorController) {
try {
await ctl.client.system.notify({
title: "Manual reindex completed",
description:
"Manual Reindex Trigger is ON. The index will be rebuilt each chat when 'Skip Previously Indexed Files' is OFF. If 'Skip Previously Indexed Files' is ON, the index will only be rebuilt for new or changed files.",
});
} catch (error) {
console.warn("[BigRAG] Unable to send notification about manual reindex reset:", error);
}
}