Project Files
src / tools / find_doc.ts
import {
tool,
type Tool,
type ToolsProviderController,
type ToolCallContext,
} from "@lmstudio/sdk";
import { z } from "zod";
import path from "path";
import fs from "fs";
import {
setActiveChatContext,
readState,
writeStateAtomic,
generatePreviewFromBuffer,
materializeToolResultImageToFiles,
appendPictures,
getSelfPluginIdentifier,
formatToolMetaBlock,
} from "../core-bundle.mjs";
import { configSchematics, globalConfigSchematics } from "../config.js";
import { ensureRetriever } from "../rag/retrieverSingleton.js";
import { TextChunker } from "../rag/chunker.js";
import { DocumentLoader } from "../documents/loader.js";
import type { SearchResult } from "../types.js";
import type { IndexedDocument } from "../sources/types.js";
import { loadRemoteIndexedDocuments } from "../sources/registry.js";
import { parseRemoteImageRefsFromChunk } from "../sources/remoteImageResolver.js";
import { sanitizeEmbeddedImagePayloads } from "../helpers/documentImages.js";
import {
isLmStudioConversationSource,
loadConversationFile,
} from "../sources/adapters/lmStudioConversationSourceAdapter.js";
import { defaultLmStudioHome } from "../sources/lmStudioConversationMarkdown.js";
// ─── parseMarkdownImageRefs ───────────────────────────────────────────────────
// Extract image refs from a chunk, resolve relative to documentPath, and return
// absolute paths that actually exist on disk (or were just decoded from base64).
//
// Supported formats:
// Linked file :  or 
// Inline base64: 
// HTML img tag : <img src="data:image/jpeg;base64,<data>">
// Obsidian wiki: ![[image.png]]
export function parseMarkdownImageRefs(
chunkContent: string,
documentPath: string,
tempDir?: string // if provided, base64 images are decoded and saved here
): string[] {
const docDir = path.dirname(documentPath);
const refs: string[] = [];
const seen = new Set<string>();
function addAbs(abs: string): void {
if (!seen.has(abs)) { seen.add(abs); refs.push(abs); }
}
// ── 1. Linked file paths (existing behaviour) ──────────────────────────────
const reAngle = /!\[.*?\]\(<([^>]+)>\)/g;
const rePlain = /!\[.*?\]\((?!<)(?!data:)([^)\s"]+)(?:\s+"[^"]*")?\)/g;
for (const re of [reAngle, rePlain]) {
let m: RegExpExecArray | null;
while ((m = re.exec(chunkContent)) !== null) {
const raw = m[1].trim();
if (/^https?:\/\//i.test(raw) || /^data:/i.test(raw)) continue;
const abs = path.resolve(docDir, raw);
if (fs.existsSync(abs)) addAbs(abs);
}
}
// ── 2. Obsidian wikilinks: ![[image.png]] ──────────────────────────────────
const reWiki = /!\[\[([^\]]+)\]\]/g;
let mw: RegExpExecArray | null;
while ((mw = reWiki.exec(chunkContent)) !== null) {
const raw = mw[1].split("|")[0].trim(); // strip optional alias
if (/^https?:\/\//i.test(raw)) continue;
const abs = path.resolve(docDir, raw);
if (fs.existsSync(abs)) addAbs(abs);
}
if (!tempDir) return refs;
// ── 3. Inline base64:  ──────────────
const reB64Md = /!\[.*?\]\((data:image\/([a-zA-Z0-9+.-]+);base64,([A-Za-z0-9+/=\s]+))\)/g;
let mb: RegExpExecArray | null;
while ((mb = reB64Md.exec(chunkContent)) !== null) {
const mime = mb[2].toLowerCase();
const b64 = mb[3].replace(/\s/g, "");
const abs = decodeBase64ToFile(b64, mime, documentPath, tempDir);
if (abs) addAbs(abs);
}
// ── 4. HTML img tags: <img src="data:image/<mime>;base64,<data>"> ──────────
const reB64Html = /<img\s[^>]*src=["'](data:image\/([a-zA-Z0-9+.-]+);base64,([A-Za-z0-9+/=\s]+))["'][^>]*>/gi;
let mh: RegExpExecArray | null;
while ((mh = reB64Html.exec(chunkContent)) !== null) {
const mime = mh[2].toLowerCase();
const b64 = mh[3].replace(/\s/g, "");
const abs = decodeBase64ToFile(b64, mime, documentPath, tempDir);
if (abs) addAbs(abs);
}
return refs;
}
// Decode a base64 image and write it to tempDir as a deterministic filename.
// Returns the absolute path on success, null on failure.
function decodeBase64ToFile(
b64: string,
mime: string,
documentPath: string, // used to derive a stable name prefix
tempDir: string
): string | null {
try {
const ext = mime === "jpeg" ? "jpg" : mime.replace(/[^a-z0-9]/gi, "");
// Stable name: hash of first 64 chars of b64 data + source document stem
const stem = path.basename(documentPath, path.extname(documentPath)).replace(/[^a-z0-9_-]/gi, "_");
const hash = b64.slice(0, 64).replace(/[^A-Za-z0-9]/g, "").slice(0, 12);
const filename = `doc-b64-${stem}-${hash}.${ext}`;
const abs = path.join(tempDir, filename);
if (!fs.existsSync(abs)) {
const buf = Buffer.from(b64, "base64");
fs.writeFileSync(abs, buf);
}
return abs;
} catch {
return null;
}
}
// Read the full document and extract all embedded base64 images.
// Used as fallback when chunk content is truncated mid-base64.
function extractBase64ImagesFromDocument(docPath: string, tempDir: string, contentOverride?: string): string[] {
try {
const content = contentOverride ?? fs.readFileSync(docPath, "utf8");
const refs: string[] = [];
const seen = new Set<string>();
function add(abs: string | null): void {
if (abs && !seen.has(abs)) { seen.add(abs); refs.push(abs); }
}
//  and 
// [^>)]+ stops at the first > or ) so it never spans multiple images
const reB64Md = /!\[.*?\]\(<?data:image\/([a-zA-Z0-9+.-]+);base64,([^>)]+)>?\)/g;
let m: RegExpExecArray | null;
while ((m = reB64Md.exec(content)) !== null) {
add(decodeBase64ToFile(m[2].replace(/\s/g, ""), m[1].toLowerCase(), docPath, tempDir));
}
// <img src="data:image/...">
const reHtml = /<img\s[^>]*src=["']data:image\/([a-zA-Z0-9+.-]+);base64,([^"']+)["'][^>]*>/gi;
while ((m = reHtml.exec(content)) !== null) {
add(decodeBase64ToFile(m[2].replace(/\s/g, ""), m[1].toLowerCase(), docPath, tempDir));
}
return refs;
} catch {
return [];
}
}
// Sanitize chunk content before including in the tool result text.
// Strips <details> blocks (exported tool-call logs), base64 payloads, and
// collapses runs of blank lines left behind.
function sanitizeChunkContent(content: string): string {
let s = content;
// Remove complete <details>…</details> blocks (multiline)
s = s.replace(/<details[\s\S]*?<\/details>/gi, "");
// Remove incomplete <details> blocks (chunk was cut before </details>)
s = s.replace(/<details[\s\S]*/gi, "");
// Remove standalone <summary>…</summary> remnants
s = s.replace(/<summary[\s\S]*?<\/summary>/gi, "");
// Remove tool-call artifact lines: "- Arguments: ```json …" / "- Result: ```json …"
s = s.replace(/^[ \t]*-[ \t]+(Arguments|Result):.*$/gim, "");
// Remove leftover fenced code blocks that contained only JSON (single-line ```json … ```)
s = s.replace(/```json\s*[\s\S]*?```/g, "");
s = sanitizeEmbeddedImagePayloads(s, "[embedded image -- extracted]");
// Collapse 3+ blank lines into 2
s = s.replace(/\n{3,}/g, "\n\n");
return s.trim();
}
function parseDirectRemoteSourceQuery(query: string): string | null {
const source = query.trim();
if (!source || /\s/.test(source)) return null;
if (/^github:\/\//i.test(source)) return source;
if (isLmStudioConversationSource(source)) return source;
try {
const url = new URL(source);
if (url.protocol === "https:") return url.toString();
if (url.protocol === "http:" && (url.hostname === "localhost" || url.hostname === "127.0.0.1")) {
return url.toString();
}
} catch {
return null;
}
return null;
}
function buildDirectRemoteResults(docs: IndexedDocument[], chunkSize: number, chunkOverlap: number, limit: number): SearchResult[] {
const results: SearchResult[] = [];
let syntheticChunkId = -1;
for (let docIndex = 0; docIndex < docs.length && results.length < limit; docIndex++) {
const doc = docs[docIndex];
const documentId = -(docIndex + 1);
const chunker = new TextChunker({
chunkSize,
chunkOverlap,
documentPath: doc.sourceId,
documentId,
});
const chunks = chunker.chunk(doc.content).map((chunk) => ({
...chunk,
id: syntheticChunkId--,
metadata: {
...chunk.metadata,
sourceKind: doc.sourceKind,
canonicalUrl: doc.canonicalUrl,
baseUrl: doc.baseUrl,
version: doc.version,
imageRefs: doc.imageRefs,
},
}));
for (const chunk of chunks) {
if (results.length >= limit) break;
results.push({
chunk,
document: {
id: documentId,
path: doc.sourceId,
hash: doc.contentHash,
title: doc.title,
metadata: doc.metadata,
updatedAt: new Date(),
},
score: 1,
distance: 0,
});
}
}
return results;
}
function conversationReadHandle(source: string): string | null {
const match = /^lmstudio-conversation:\/\/(\d{13})$/i.exec(source)
?? /(?:^|\/)(\d{13})\.conversation\.json$/i.exec(source);
return match ? `l${match[1]}` : null;
}
interface FilenameFastPathDoc {
path: string;
title?: string;
content: string;
hash: string;
sourceKind: "file" | "conversation";
metadata: Record<string, unknown>;
imageRefs?: unknown;
}
// ─── Tool ─────────────────────────────────────────────────────────────────────
export function createFindDocTool(ctl: ToolsProviderController): Tool {
return tool({
name: "find_doc",
description: `Search personal documentation and return relevant text context plus associated screenshots.
Use this when the user asks about setup, configuration, workflows, or any topic that may be covered in their local documentation. The tool retrieves the most relevant document sections and, when available, the images embedded in those sections.
Parameters:
- query: Compact search query, not a prompt. Use only the essential keywords, product names, feature names, error text, file/tool names, and short synonyms. Prefer 3-12 words. Multilingual keyword variants are useful when the user's wording and the docs may differ. Do not include instructions, reasoning, politeness, answer format requests, or full sentences unless the exact sentence is quoted error text or a direct URL/source.
- show_images: When true (default), discovered images are registered as pN entries for downstream analysis or annotation.
Workflow reference: call find_doc("user-docs workflow tools") to load the file "USER-DOCS.md" — it contains the full usage guide for this plugin, including when to use each tool and how to compose answers with images.
${formatToolMetaBlock()}`,
parameters: {
query: z.string().describe("Compact documentation search query, not a prompt. Use essential keywords/names/error text/synonyms only; prefer 3-12 words. Direct URLs or source identifiers are allowed as the whole query."),
show_images: z
.boolean()
.optional()
.default(true)
.describe("When true: discovered images from retrieved chunks are registered as iN candidates."),
},
implementation: async (args: any, ctx: ToolCallContext) => {
// ── 1. Resolve working directory ────────────────────────────────────────
let activeChatId: string | null = null;
try {
const workingDir = ctl.getWorkingDirectory();
if (typeof workingDir === "string" && workingDir.trim().length > 0) {
const chatId = path.basename(workingDir);
if (/^\d{13}$/.test(chatId)) {
activeChatId = chatId;
setActiveChatContext({ chatId, workingDir, requestId: `tool-${Date.now()}` });
}
}
} catch {
// best-effort
}
const chatWd = ctl.getWorkingDirectory();
if (typeof chatWd !== "string" || !chatWd.trim()) {
return "find_doc failed: could not resolve LM Studio chat working directory.";
}
// ── 2. Ensure retriever is ready ────────────────────────────────────────
let retriever: Awaited<ReturnType<typeof ensureRetriever>>;
try {
retriever = await ensureRetriever(ctl, (msg) => { try { ctx.status(msg); } catch {} });
retriever.setActiveChatId(activeChatId);
} catch (err) {
return `find_doc: Document index setup failed.\n\n${
err instanceof Error ? err.message : String(err)
}`;
}
const pluginConfig = ctl.getPluginConfig(configSchematics);
const query: string = typeof args?.query === "string" ? args.query.trim() : "";
if (!query) return "find_doc: query must not be empty.";
const showImages: boolean = args?.show_images !== false;
const getter: any =
(ctl as any).getGlobalPluginConfig || (ctl as any).getGlobalConfig;
const globalConfig = getter ? getter.call(ctl, globalConfigSchematics) : null;
const remoteFetchTimeoutMs: number = globalConfig?.get("remoteFetchTimeoutMs") ?? 10000;
const remoteMaxBytes: number = globalConfig?.get("remoteMaxBytes") ?? 15728640;
const remoteMaxPages: number = globalConfig?.get("remoteMaxPages") ?? 50;
const githubToken: string = globalConfig?.get("githubToken") ?? "";
const huggingFaceToken: string = globalConfig?.get("huggingFaceToken") ?? "";
const chunkSize: number = globalConfig?.get("chunkSize") ?? 1000;
const chunkOverlap: number = globalConfig?.get("chunkOverlap") ?? 100;
const retrievalLimit: number = globalConfig?.get("retrievalLimit") ?? 5;
const directRemoteSource = parseDirectRemoteSourceQuery(query);
// ── 3. Retrieval ────────────────────────────────────────────────────────
// If the FileWatcher queued new/changed files since last search, wait for
// them to finish indexing before running the query. Indexing time must not
// count against retrieval/fetch timeouts.
const queueSize = retriever.getStats().indexingQueue ?? 0;
if (queueSize > 0) {
try { ctx.status(`Waiting for index to catch up (${queueSize} file${queueSize !== 1 ? "s" : ""} pending)…`); } catch {}
let lastRemaining = -1;
await retriever.waitForIndexing(null, (remaining, currentFile) => {
if (remaining === lastRemaining) return;
lastRemaining = remaining;
const name = currentFile ? path.basename(currentFile) : "document";
try { ctx.status(`Indexing: ${name} (${remaining} file${remaining !== 1 ? "s" : ""} remaining)…`); } catch {}
});
}
try { ctx.status("Searching documentation…"); } catch {}
let results: Awaited<ReturnType<typeof retriever.search>>;
let filenameFastPathDocs: FilenameFastPathDoc[] = [];
try {
if (directRemoteSource) {
try { ctx.status(`Loading remote source: ${directRemoteSource}`); } catch {}
const docs = await loadRemoteIndexedDocuments({
remoteSources: [directRemoteSource],
fetchTimeoutMs: remoteFetchTimeoutMs,
maxBytes: remoteMaxBytes,
maxPages: Math.max(1, Math.min(remoteMaxPages, retrievalLimit)),
githubToken,
huggingFaceToken,
});
if (!isLmStudioConversationSource(directRemoteSource)) {
for (const doc of docs) {
await retriever.indexIndexedDocument(doc);
}
}
results = buildDirectRemoteResults(docs, chunkSize, chunkOverlap, retrievalLimit);
if (results.length === 0) {
return `find_doc: Remote source could not be loaded or contained no indexable text: ${directRemoteSource}`;
}
} else {
filenameFastPathDocs = await resolveFilenameFastPathDocs(query, globalConfig, retrievalLimit);
if (filenameFastPathDocs.length > 0) {
try { ctx.status(`Resolved filename directly: ${filenameFastPathDocs.map((doc) => path.basename(doc.path)).join(", ")}`); } catch {}
results = buildFilenameFastPathResults(filenameFastPathDocs, chunkSize, chunkOverlap, retrievalLimit);
} else {
results = await retriever.search(query);
}
}
} catch (err) {
console.error("[find_doc] retrieval error:", err);
return `find_doc failed: ${err instanceof Error ? err.message : String(err)}`;
}
try {
ctx.status(
`Found ${results.length} chunk${results.length !== 1 ? "s" : ""} in ` +
`${new Set(results.map((r) => r.document.path)).size} document(s)`
);
} catch {}
if (results.length === 0) {
const dirs: string[] = retriever.getStats().watchedDirectories ?? [];
const dirList = dirs.length > 0 ? dirs.join(", ") : "(none configured)";
return `No relevant documentation found for: "${query}".\nIndexed directories: ${dirList}`;
}
// ── 4. Build text context ───────────────────────────────────────────────
const resultsByDoc = new Map<string, typeof results>();
for (const r of results) {
const dp = r.document.path;
if (!resultsByDoc.has(dp)) resultsByDoc.set(dp, []);
resultsByDoc.get(dp)!.push(r);
}
let contextText = "Retrieved from indexed documentation:\n\n";
let citationNum = 1;
for (const [docPath, docResults] of resultsByDoc) {
const docName = path.basename(docPath);
const readHandle = conversationReadHandle(docPath);
contextText += readHandle
? `=== ${docName} (read_doc filename: ${readHandle}) ===\n\n`
: `=== ${docName} ===\n\n`;
for (const r of docResults) {
const tag = r.chunk.metadata?.isTable ? " [TABLE]" : "";
contextText += `[${citationNum}]${tag}\n${sanitizeChunkContent(r.chunk.content)}\n\n`;
citationNum++;
}
}
// ── 5. Image extraction ─────────────────────────────────────────────────
if (!showImages) {
return contextText;
}
const maxImages: number = pluginConfig.get("maxImagesPerResponse") ?? 5;
const previewMaxSum: number = pluginConfig.get("imagePreviewMaxSum") ?? 3072;
const previewQuality: number = pluginConfig.get("imagePreviewQuality") ?? 85;
// Collect unique absolute image paths from all chunks
const imagePathSet = new Set<string>();
const remoteImageRefs: Array<{ url: string; altText?: string }> = [];
const remoteImageUrlSet = new Set<string>();
const docsNeedingBase64Scan = new Set<string>();
for (const doc of filenameFastPathDocs) {
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
const refs = parseMarkdownImageRefs(doc.content, doc.path, chatWd);
for (const abs of refs) {
imagePathSet.add(abs);
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
}
if (imagePathSet.size + remoteImageRefs.length < maxImages && doc.content.includes("data:image/")) {
for (const abs of extractBase64ImagesFromDocument(doc.path, chatWd, doc.content)) {
imagePathSet.add(abs);
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
}
}
if (doc.sourceKind === "conversation" && imagePathSet.size + remoteImageRefs.length < maxImages) {
for (const abs of localImageRefsFromMetadata(doc.imageRefs)) {
imagePathSet.add(abs);
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
}
}
}
for (const r of results) {
if (!r.chunk.metadata?.documentPath) continue;
const docPath = r.chunk.metadata.documentPath;
const sourceKind = typeof r.chunk.metadata?.sourceKind === "string" ? r.chunk.metadata.sourceKind : "file";
if (sourceKind === "github" || sourceKind === "huggingface" || sourceKind === "https") {
const refs = parseRemoteImageRefsFromChunk(r.chunk.content, r.chunk.metadata);
for (const ref of refs) {
if (!remoteImageUrlSet.has(ref.url)) {
remoteImageUrlSet.add(ref.url);
remoteImageRefs.push({ url: ref.url, altText: ref.altText });
}
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
}
} else {
const refs = parseMarkdownImageRefs(r.chunk.content, docPath, chatWd);
for (const abs of refs) {
imagePathSet.add(abs);
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
}
if (sourceKind === "conversation" && refs.length === 0) {
for (const abs of localImageRefsFromMetadata(r.chunk.metadata?.imageRefs)) {
imagePathSet.add(abs);
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
}
}
}
// Chunk contains a data: URI start but it was likely truncated — schedule
// a full-document scan so we get the complete base64 payload.
if (sourceKind === "file" && r.chunk.content.includes("data:image/")) {
docsNeedingBase64Scan.add(docPath);
}
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
}
// Full-document scan for documents with embedded base64 images
for (const docPath of docsNeedingBase64Scan) {
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
const refs = extractBase64ImagesFromDocument(docPath, chatWd);
for (const abs of refs) {
imagePathSet.add(abs);
if (imagePathSet.size + remoteImageRefs.length >= maxImages) break;
}
}
const imagePaths = Array.from(imagePathSet);
if (imagePaths.length === 0 && remoteImageRefs.length === 0) {
return contextText + "\n(No image references found in retrieved chunks.)";
}
// ── 6. Preview generation + state registration ──────────────────────────
await fs.promises.mkdir(chatWd, { recursive: true }).catch(() => {});
const previewSpec = {
maxDim: previewMaxSum,
maxSum: previewMaxSum,
mode: "sum" as const,
quality: previewQuality,
};
const pluginId = getSelfPluginIdentifier() ?? "unknown";
const imageRecords: any[] = [];
const remoteSkips: Record<string, number> = {};
for (let imgIdx = 0; imgIdx < imagePaths.length; imgIdx++) {
const srcAbs = imagePaths[imgIdx];
try {
try {
ctx.status(`Generating image preview ${imgIdx + 1}/${imagePaths.length}…`);
} catch {}
const srcBuf = await fs.promises.readFile(srcAbs);
const originalFilename = path.basename(srcAbs);
const baseStem = path.basename(srcAbs, path.extname(srcAbs)).replace(/ /g, "_");
const destFilename = `doc-img-${baseStem}.jpg`;
const preview = await generatePreviewFromBuffer(
srcBuf,
chatWd,
originalFilename,
previewSpec,
{ customFilename: destFilename }
);
imageRecords.push({
filename: preview.previewFilename,
preview: preview.previewFilename,
sourceTool: `${pluginId}/find_doc`,
sourceUrl: `file://${srcAbs}`,
});
} catch (err) {
console.warn(`[find_doc] preview generation failed for ${srcAbs}:`, String(err));
}
}
for (let imgIdx = 0; imgIdx < remoteImageRefs.length && imageRecords.length < maxImages; imgIdx++) {
const ref = remoteImageRefs[imgIdx];
try {
try {
ctx.status(`Fetching remote image ${imgIdx + 1}/${remoteImageRefs.length}…`);
} catch {}
const hash = Buffer.from(ref.url).toString("base64url").slice(0, 16);
const originalBaseName = `doc-remote-${hash}.png`;
const previewBaseName = `doc-remote-preview-${hash}.jpg`;
const originalAbs = path.join(chatWd, originalBaseName);
const previewAbs = path.join(chatWd, previewBaseName);
await materializeToolResultImageToFiles({
url: ref.url,
originalAbs,
previewAbs,
preview: {
maxDim: previewMaxSum,
quality: previewQuality,
},
timeoutMs: remoteFetchTimeoutMs,
maxBytes: remoteMaxBytes,
});
imageRecords.push({
filename: originalBaseName,
preview: previewBaseName,
sourceTool: `${pluginId}/find_doc`,
sourceUrl: ref.url,
title: ref.altText,
});
} catch (err) {
const reason = err instanceof Error ? err.message : String(err);
remoteSkips[reason] = (remoteSkips[reason] ?? 0) + 1;
console.warn(`[find_doc] remote image materialization failed for ${ref.url}:`, reason);
}
}
let assignedKeys: string[] = [];
let assignedDisplayLines: string[] = [];
if (imageRecords.length > 0) {
try {
const state = await readState(chatWd);
const appendResult = appendPictures(state, imageRecords);
if (appendResult.changed) {
await writeStateAtomic(chatWd, state);
console.log("[find_doc] state written, nextPictureP:", state.counters?.nextPictureP);
}
const pictures: any[] = Array.isArray((state as any)?.pictures) ? (state as any).pictures : [];
const recordsBySourceUrl = new Map<string, any>();
for (const rec of pictures) {
const sourceUrl = typeof rec?.sourceUrl === "string" ? rec.sourceUrl.trim() : "";
if (sourceUrl && typeof rec?.p === "number" && !recordsBySourceUrl.has(sourceUrl)) {
recordsBySourceUrl.set(sourceUrl, rec);
}
}
const resolvedRecords = imageRecords
.map((rec) => {
const sourceUrl = typeof rec?.sourceUrl === "string" ? rec.sourceUrl.trim() : "";
return sourceUrl ? recordsBySourceUrl.get(sourceUrl) : undefined;
})
.filter((r): r is any => !!r);
assignedKeys = resolvedRecords
.map((r) => (typeof r.p === "number" ? `p${r.p}` : null))
.filter((k): k is string => k !== null);
assignedDisplayLines = resolvedRecords
.filter((r) => typeof r.p === "number" && typeof r.preview === "string")
.map((r) => `p${r.p}: `);
} catch (err) {
console.warn("[find_doc] state update failed:", String(err));
}
}
// ── 7. Tool result ───────────────────────────────────────────────────────
const n = imageRecords.length;
try {
ctx.status(
n > 0
? `Done — ${n} image${n !== 1 ? "s" : ""} registered`
: `Done — ${results.length} chunk${results.length !== 1 ? "s" : ""} retrieved`
);
} catch {}
let imageHint = "";
if (assignedKeys.length > 0) {
const keyList = assignedKeys.map((k) => `"${k}"`).join(", ");
imageHint =
`\n\n${assignedKeys.length} image candidate${assignedKeys.length > 1 ? "s" : ""} from the documentation ` +
`registered as ${assignedKeys.join(", ")}. Call: review_image({"targets":[${keyList}]})`;
} else if (n > 0) {
imageHint = `\n\n${n} image candidate${n > 1 ? "s" : ""} from the documentation registered (p-index unavailable).`;
}
const remoteSkipEntries = Object.entries(remoteSkips);
if (remoteSkipEntries.length > 0) {
imageHint += `\nRemote image candidates skipped: ${remoteSkipEntries
.map(([reason, count]) => `${count} (${reason})`)
.join(", ")}.`;
}
return contextText + imageHint;
},
});
}
function localImageRefsFromMetadata(value: unknown): string[] {
if (!Array.isArray(value)) return [];
const refs: string[] = [];
const seen = new Set<string>();
for (const item of value) {
const raw = typeof item === "string"
? item
: item && typeof item === "object" && typeof (item as any).url === "string"
? (item as any).url
: "";
const abs = localImageRefToPath(raw);
if (!abs || seen.has(abs) || !fs.existsSync(abs)) continue;
seen.add(abs);
refs.push(abs);
}
return refs;
}
function localImageRefToPath(value: string): string | null {
const ref = String(value ?? "").trim();
if (!ref || /^data:/i.test(ref) || /^https?:\/\//i.test(ref)) return null;
if (/^file:\/\//i.test(ref)) {
try {
return new URL(ref).pathname;
} catch {
return null;
}
}
return path.isAbsolute(ref) ? ref : null;
}
async function resolveFilenameFastPathDocs(
query: string,
globalConfig: any,
limit: number
): Promise<FilenameFastPathDoc[]> {
const requested = normalizeFilenameQuery(query);
if (!requested) return [];
const conversationPath = resolveConversationFilenameQuery(requested);
if (conversationPath) {
try {
const doc = await loadConversationFile(conversationPath, inferLmStudioHomeFromConversationFile(conversationPath), {
includeThinking: false,
includeToolCalls: false,
});
return [{
path: doc.sourceId,
title: doc.title,
content: doc.rawContent,
hash: doc.version ?? doc.fetchedAt,
sourceKind: "conversation",
metadata: doc.metadata ?? {},
imageRefs: (doc as any).imageRefs,
}];
} catch {
return [];
}
}
if (!looksLikeDocumentFilename(requested)) return [];
const notesDirectory: string = globalConfig?.get("notesDirectory") ?? "";
const contentDirectories: string[] = Array.isArray(globalConfig?.get("contentDirectories"))
? globalConfig.get("contentDirectories")
: [];
const roots = Array.from(new Set([notesDirectory, ...contentDirectories].filter((root) => typeof root === "string" && root.trim())));
const matches = resolveLocalFilenameMatches(requested, roots, limit);
const docs: FilenameFastPathDoc[] = [];
for (const filePath of matches) {
try {
const parsed = await DocumentLoader.load(filePath, { extractTables: true });
docs.push({
path: filePath,
title: parsed.metadata.title,
content: parsed.content,
hash: parsed.hash,
sourceKind: "file",
metadata: parsed.metadata,
});
} catch (err) {
console.warn(`[find_doc] filename fastpath failed for ${filePath}:`, String(err));
}
}
return docs;
}
function buildFilenameFastPathResults(
docs: FilenameFastPathDoc[],
chunkSize: number,
chunkOverlap: number,
limit: number
): SearchResult[] {
const results: SearchResult[] = [];
let syntheticDocumentId = -1000;
let syntheticChunkId = -100000;
for (const doc of docs) {
const documentId = syntheticDocumentId--;
const chunker = new TextChunker({
chunkSize,
chunkOverlap,
documentPath: doc.path,
documentId,
});
const chunks = chunker.chunk(doc.content).map((chunk) => ({
...chunk,
id: syntheticChunkId--,
metadata: {
...chunk.metadata,
sourceKind: doc.sourceKind,
...(doc.sourceKind === "conversation" ? { imageRefs: doc.imageRefs } : {}),
},
}));
for (const chunk of chunks) {
if (results.length >= limit) break;
results.push({
chunk,
document: {
id: documentId,
path: doc.path,
hash: doc.hash,
title: doc.title,
metadata: doc.metadata,
updatedAt: new Date(),
},
score: 1,
distance: 0,
});
}
if (results.length >= limit) break;
}
return results;
}
function normalizeFilenameQuery(query: string): string | null {
const trimmed = query.trim().replace(/^['"]/, "").replace(/['"]$/, "");
if (!trimmed || /\n/.test(trimmed)) return null;
return trimmed.replace(/\\/g, "/");
}
function looksLikeDocumentFilename(value: string): boolean {
const base = path.basename(value).toLowerCase();
return /\.(md|markdown|txt|pdf)$/i.test(base);
}
function resolveLocalFilenameMatches(requested: string, roots: string[], limit: number): string[] {
const matches: string[] = [];
const seen = new Set<string>();
const add = (filePath: string) => {
if (seen.has(filePath) || !DocumentLoader.isSupported(filePath)) return;
seen.add(filePath);
matches.push(filePath);
};
if (path.isAbsolute(requested) && fs.existsSync(requested)) {
add(requested);
return matches;
}
for (const root of roots) {
const directPath = path.resolve(root, requested);
if (isInsideDirectory(directPath, root) && fs.existsSync(directPath)) add(directPath);
if (matches.length >= limit) return matches;
}
const requestedBase = path.basename(requested).toLowerCase();
if (!requestedBase) return matches;
for (const root of roots) {
collectFilesByBasename(root, requestedBase, matches, seen, limit);
if (matches.length >= limit) break;
}
return matches;
}
function collectFilesByBasename(
root: string,
requestedBase: string,
matches: string[],
seen: Set<string>,
limit: number
): void {
if (matches.length >= limit) return;
let entries: fs.Dirent[];
try {
entries = fs.readdirSync(root, { withFileTypes: true });
} catch {
return;
}
for (const entry of entries) {
if (matches.length >= limit) return;
if (entry.name === "node_modules" || entry.name.startsWith(".")) continue;
const childPath = path.join(root, entry.name);
if (entry.isFile() && entry.name.toLowerCase() === requestedBase && !seen.has(childPath) && DocumentLoader.isSupported(childPath)) {
seen.add(childPath);
matches.push(childPath);
continue;
}
if (entry.isDirectory()) collectFilesByBasename(childPath, requestedBase, matches, seen, limit);
}
}
function resolveConversationFilenameQuery(value: string): string | null {
if (/^l\d{13}$/i.test(value)) {
const chatId = value.slice(1);
return path.join(defaultLmStudioHome(), "conversations", `${chatId}.conversation.json`);
}
if (/^lmstudio-conversations?:\/\/\d{13}$/i.test(value)) {
const chatId = value.replace(/^lmstudio-conversations?:\/\//i, "");
return path.join(defaultLmStudioHome(), "conversations", `${chatId}.conversation.json`);
}
if (/^\d{13}\.conversation\.json$/i.test(value)) {
return path.join(defaultLmStudioHome(), "conversations", value);
}
if (path.isAbsolute(value) && isLmStudioConversationSource(value)) return value;
return null;
}
function inferLmStudioHomeFromConversationFile(filePath: string): string {
const conversationsDir = path.dirname(filePath);
return path.basename(conversationsDir) === "conversations"
? path.dirname(conversationsDir)
: defaultLmStudioHome();
}
function isInsideDirectory(filePath: string, root: string): boolean {
const relative = path.relative(path.resolve(root), filePath);
return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative));
}