toolsProvider.js
"use strict";
/**
* Document Parser Plugin — toolsProvider
*
* Tools:
* parse_document — extract full text from PDF/DOCX/spreadsheet/txt
* search_document — keyword/regex scan through a document
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.toolsProvider = void 0;
const sdk_1 = require("@lmstudio/sdk");
const promises_1 = require("fs/promises");
const path_1 = require("path");
const zod_1 = require("zod");
const config_1 = require("./config");
const peers_1 = require("./peers");
const pdf_parse_1 = __importDefault(require("pdf-parse"));
const mammoth_1 = __importDefault(require("mammoth"));
const XLSX = __importStar(require("xlsx"));
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function json(obj) {
return JSON.stringify(obj, null, 2);
}
function safe_impl(name, fn) {
return async (params, ctx) => {
if (ctx.signal.aborted) {
return JSON.stringify({ tool_error: true, tool: name, error: "cancelled" });
}
try {
return await fn(params, ctx);
}
catch (err) {
const msg = err instanceof Error ? err.message : String(err);
return JSON.stringify({ tool_error: true, tool: name, error: msg }, null, 2);
}
};
}
async function resolvePath(filePath, workspace, maxMb) {
const fullPath = workspace ? (0, path_1.resolve)(workspace, filePath) : (0, path_1.resolve)(filePath);
const info = await (0, promises_1.stat)(fullPath);
const sizeMb = info.size / (1024 * 1024);
if (sizeMb > maxMb) {
throw new Error(`File is ${sizeMb.toFixed(1)} MB, exceeds limit of ${maxMb} MB`);
}
return fullPath;
}
function sheetToMarkdown(sheet) {
const rows = XLSX.utils.sheet_to_json(sheet, { header: 1, defval: "" });
if (rows.length === 0)
return "(empty sheet)";
const header = rows[0];
const sep = header.map(() => "---");
const body = rows.slice(1);
const toRow = (r) => "| " + r.map(String).join(" | ") + " |";
return [toRow(header), toRow(sep), ...body.map(toRow)].join("\n");
}
// sheetName: blank = all sheets, non-blank = single sheet only
async function extractText(fullPath, format, sheetName = "") {
if (format === "pdf") {
const buf = await (0, promises_1.readFile)(fullPath);
const result = await (0, pdf_parse_1.default)(buf);
return `Pages: ${result.numpages}\n\n${result.text}`;
}
if (format === "docx") {
const result = await mammoth_1.default.extractRawText({ path: fullPath });
return result.value;
}
if (format === "spreadsheet") {
const wb = XLSX.readFile(fullPath);
const targetSheets = sheetName ? [sheetName] : wb.SheetNames;
const sections = targetSheets.map(n => {
const sheet = wb.Sheets[n];
if (!sheet)
throw new Error(`Sheet "${n}" not found. Available: ${wb.SheetNames.join(", ")}`);
return `## Sheet: ${n}\n\n${sheetToMarkdown(sheet)}`;
});
return sections.join("\n\n---\n\n");
}
return (0, promises_1.readFile)(fullPath, "utf-8");
}
function detectFormat(ext) {
if (ext === ".pdf")
return "pdf";
if (ext === ".docx")
return "docx";
if ([".xlsx", ".xls", ".ods", ".csv"].includes(ext))
return "spreadsheet";
return "txt";
}
// ---------------------------------------------------------------------------
// Tools
// ---------------------------------------------------------------------------
const toolsProvider = async (ctl) => {
await (0, peers_1.detectRagPeer)(ctl);
const cfg = ctl.getPluginConfig(config_1.pluginConfigSchematics);
return [
(0, sdk_1.tool)({
name: "parse_document",
description: (0, sdk_1.text) `
Extract text content from a local file.
format: "auto" (detect from extension), "pdf", "docx", "spreadsheet", "txt"
Returns plain text. Spreadsheets return one Markdown table per sheet.
Set sheet_name to target a single sheet (spreadsheet only).
Set max_chars to truncate output (default 80000 — roughly 60 pages).
`,
parameters: {
path: zod_1.z.string().describe("Relative (within workspace) or absolute file path"),
format: zod_1.z.enum(["auto", "pdf", "docx", "spreadsheet", "txt"]).default("auto"),
sheet_name: zod_1.z.string().default("").describe("Target sheet name (spreadsheets only; blank = all sheets)"),
max_chars: zod_1.z.coerce.number().int().min(1000).max(500000).default(80000),
},
implementation: safe_impl("parse_document", async ({ path, format, sheet_name, max_chars }, ctx) => {
ctx.status(`Resolving ${path}`);
const fullPath = await resolvePath(path, cfg.get("workspacePath"), cfg.get("maxFileSizeMb"));
const ext = (0, path_1.extname)(fullPath).toLowerCase();
const detected = format === "auto" ? detectFormat(ext) : format;
ctx.status(`Parsing as ${detected}`);
let output = await extractText(fullPath, detected, sheet_name);
if (output.length > max_chars) {
output = output.slice(0, max_chars) + `\n\n[truncated — ${output.length} total chars, showing first ${max_chars}]`;
}
return json({ path: fullPath, format: detected, chars: output.length, content: output });
}),
}),
(0, sdk_1.tool)({
name: "search_document",
description: (0, sdk_1.text) `
Search for a keyword or regex pattern inside a document (PDF/DOCX/spreadsheet/txt).
Returns matching lines with surrounding context.
pattern: literal string or JavaScript regex (e.g. "/\\d{4}-\\d{2}-\\d{2}/i")
context_lines: number of lines before/after each match to include (default 2)
max_matches: stop after this many matches (default 50)
`,
parameters: {
path: zod_1.z.string().describe("File path (relative to workspace or absolute)"),
pattern: zod_1.z.string().describe("Search string or /regex/flags"),
format: zod_1.z.enum(["auto", "pdf", "docx", "spreadsheet", "txt"]).default("auto"),
context_lines: zod_1.z.coerce.number().int().min(0).max(10).default(2),
max_matches: zod_1.z.coerce.number().int().min(1).max(500).default(50),
},
implementation: safe_impl("search_document", async ({ path, pattern, format, context_lines, max_matches }, ctx) => {
ctx.status(`Loading ${path}`);
const fullPath = await resolvePath(path, cfg.get("workspacePath"), cfg.get("maxFileSizeMb"));
const ext = (0, path_1.extname)(fullPath).toLowerCase();
const detected = format === "auto" ? detectFormat(ext) : format;
const rawText = await extractText(fullPath, detected, "");
const lines = rawText.split("\n");
let regex;
const reMatch = pattern.match(/^\/(.+)\/([gimsuy]*)$/);
if (reMatch) {
regex = new RegExp(reMatch[1], reMatch[2] || "gi");
}
else {
regex = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "gi");
}
ctx.status(`Scanning ${lines.length} lines for "${pattern}"`);
const matches = [];
for (let i = 0; i < lines.length && matches.length < max_matches; i++) {
if (regex.test(lines[i])) {
regex.lastIndex = 0;
const start = Math.max(0, i - context_lines);
const end = Math.min(lines.length - 1, i + context_lines);
matches.push({
line: i + 1,
text: lines[i],
context: lines.slice(start, end + 1),
});
}
regex.lastIndex = 0;
}
return json({ path: fullPath, pattern, total_lines: lines.length, matches_found: matches.length, matches });
}),
}),
];
};
exports.toolsProvider = toolsProvider;