src / toolsProvider.ts
import { text, tool, type Tool, type ToolsProviderController } from "@lmstudio/sdk";
import { spawn } from "child_process";
import { closeSync, existsSync, openSync, writeSync } from "fs";
import { basename, dirname, extname, join, resolve } from "path";
import { z } from "zod";
import { globalConfigSchematics } from "./config";
const BACKEND = resolve(__dirname, "..", "python", "ocr_backend.py");
export async function toolsProvider(ctl: ToolsProviderController) {
const tools: Tool[] = [];
const processPdf = tool({
name: "process_pdf",
description: text`
Performs OCR on a PDF file. Use this tool whenever the user asks to
process, recognize, read, or extract text from a PDF document.
The recognized text is saved to a .txt file next to the PDF. Progress is
reported live while it runs. When finished, the tool returns the output
path and a short preview. Do NOT retry while it is running.
'file_path' is the absolute path to the PDF file on disk.
'pages' (optional) is a page range, e.g. "1-5" or "2,4,7".
'mode' (optional) is "ocr" (plain text) or "layout" (tables/formulas).
'language' (optional) language codes like "ru", "en" or "ru,en".
`,
parameters: {
file_path: z.string(),
pages: z.string().optional(),
mode: z.enum(["ocr", "layout"]).optional(),
language: z.string().optional(),
},
implementation: async ({ file_path, pages, mode, language }, { status, warn, signal }) => {
if (!existsSync(file_path)) return `Error: file not found: ${file_path}`;
if (!existsSync(BACKEND)) return `Error: python backend not found: ${BACKEND}`;
const cfg = ctl.getGlobalPluginConfig(globalConfigSchematics);
const engines: string[] = [];
if (cfg.get("useQwenVL")) engines.push("qwen");
if (cfg.get("useEasyOCR")) engines.push("easyocr");
if (cfg.get("useTesseract")) engines.push("tesseract");
if (engines.length === 0) {
return "Error: no OCR engine is enabled in the plugin settings.";
}
if (engines.includes("qwen")) {
warn(
"The Qwen3-VL engine calls the LM Studio server and can conflict with the chat model. " +
"If processing hangs, disable 'Use Qwen3-VL' in the plugin settings and keep EasyOCR.",
);
}
const dir = dirname(file_path);
const base = basename(file_path, extname(file_path));
const outPath = join(dir, base + ".ocr.txt");
const pythonPath = String(cfg.get("pythonPath") || "python");
const lang = language && language.trim() ? language.trim() : String(cfg.get("languages") || "ru,en");
const args = [
BACKEND,
"--pdf", file_path,
"--engines", engines.join(","),
"--lang", lang,
"--dpi", String(cfg.get("dpi") || 200),
"--mode", mode || "ocr",
"--lmstudio-url", String(cfg.get("lmStudioServerUrl") || "http://localhost:1234/v1"),
"--model", String(cfg.get("qwenModelId") || "qwen/qwen3-vl-8b"),
];
if (pages) args.push("--pages", pages);
let out: number;
try {
out = openSync(outPath, "w");
} catch (e) {
return `Error: cannot create output file next to the PDF: ${(e as Error).message}`;
}
status(`Starting OCR (engines: ${engines.join(", ")})...`);
const child = spawn(pythonPath, args, {
stdio: ["ignore", "pipe", "pipe"],
env: { ...process.env, PYTHONUTF8: "1", PYTHONIOENCODING: "utf-8" },
});
const onAbort = () => {
try {
child.kill();
} catch {
/* ignore */
}
};
signal.addEventListener("abort", onAbort);
let preview = "";
let stderrTail = "";
child.stdout.on("data", (chunk: Buffer) => {
writeSync(out, chunk);
if (preview.length < 4000) preview += chunk.toString("utf8");
});
child.stderr.on("data", (chunk: Buffer) => {
const s = chunk.toString("utf8");
stderrTail = (stderrTail + s).slice(-2000);
for (const line of s.split(/\r?\n/)) {
const t = line.trim();
if (t) status(t.replace(/^\[backend\]\s*/, ""));
}
});
const code: number = await new Promise(res => {
child.on("close", c => res(c ?? -1));
child.on("error", () => res(-1));
});
signal.removeEventListener("abort", onAbort);
closeSync(out);
if (signal.aborted) {
return `OCR was cancelled. Partial result (if any) is saved to: ${outPath}`;
}
if (code !== 0) {
return `OCR failed (exit code ${code}). Last log lines:\n${stderrTail}`;
}
const trimmed = preview.trim();
const head =
trimmed.length > 1500 ? trimmed.slice(0, 1500) + "\n...(truncated, full text in file)" : trimmed;
return text`
OCR finished. Full text saved to:
${outPath}
Preview:
${head || "(empty)"}
`;
},
});
tools.push(processPdf);
return tools;
}