Project Files
src / tools / analysis.ts
import { tool, type Tool } from "@lmstudio/sdk";
import { z } from "zod";
import * as fs from "fs";
import * as path from "path";
import { DocxDocument } from "../utils/document";
import { checkReadable, resolvePath } from "../utils/config";
export function getAnalysisTools(workingDir?: string): Tool[] {
const resolve = (p: string) => resolvePath(p, workingDir);
return [
// 1. word_info
tool({
name: "word_info",
description: "Get file metadata, word count, paragraph count, image count, and structure info from a .docx file.",
parameters: {
filePath: z.string(),
},
implementation: async ({ filePath }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const doc = await DocxDocument.load(filePath);
const paras = doc.getParagraphs();
const tables = doc.getTables();
const images = await doc.getImages();
const comments = await doc.getComments();
const meta = await doc.getMetadata();
const nonEmpty = paras.filter(p => p.text.trim().length > 0);
const wordCount = nonEmpty.reduce((sum, p) => sum + p.text.trim().split(/\s+/).length, 0);
const charCount = nonEmpty.reduce((sum, p) => sum + p.text.length, 0);
const headingsByLevel: Record<string, number> = {};
for (const p of paras.filter(p => p.style.toLowerCase().startsWith("heading"))) {
headingsByLevel[p.style] = (headingsByLevel[p.style] ?? 0) + 1;
}
const fileStat = fs.statSync(filePath);
const ext = path.extname(filePath).toLowerCase();
return JSON.stringify({
file: filePath,
format: ext,
file_size_kb: Math.round(fileStat.size / 1024 * 10) / 10,
metadata: meta,
stats: {
word_count: wordCount,
character_count: charCount,
total_paragraphs: paras.length,
non_empty_paragraphs: nonEmpty.length,
tables: tables.length,
images: images.length,
comments: comments.length,
headings_by_level: headingsByLevel,
},
has_vba: ext === ".docm",
}, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
// 2. word_list_headings
tool({
name: "word_list_headings",
description: "Extract the heading outline of a .docx document — title, level, and paragraph index.",
parameters: {
filePath: z.string(),
maxLevel: z.number().int().min(1).max(9).optional().describe("Max heading level to include (default: all)"),
},
implementation: async ({ filePath, maxLevel }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const doc = await DocxDocument.load(filePath);
const paras = doc.getParagraphs();
const headings = paras
.filter(p => {
if (!p.style.toLowerCase().startsWith("heading")) return false;
const lvl = parseInt(p.style.replace(/\D/g, "") || "99");
return maxLevel === undefined || lvl <= maxLevel;
})
.map(p => ({
index: p.index,
level: parseInt(p.style.replace(/\D/g, "") || "1"),
style: p.style,
text: p.text,
}));
return JSON.stringify({ file: filePath, heading_count: headings.length, headings }, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
// 3. word_extract_comments
tool({
name: "word_extract_comments",
description: "Extract all review comments from a .docx — author, date, and text of each comment.",
parameters: {
filePath: z.string(),
author: z.string().optional().describe("Filter by author name (case-insensitive partial match)"),
},
implementation: async ({ filePath, author }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const doc = await DocxDocument.load(filePath);
let comments = await doc.getComments();
if (author) {
comments = comments.filter(c => c.author.toLowerCase().includes(author.toLowerCase()));
}
return JSON.stringify({
file: filePath,
total_comments: comments.length,
filtered_by_author: author ?? null,
comments,
}, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
// 4. word_extract_images
tool({
name: "word_extract_images",
description: "List all embedded images in a .docx — name, type, and size. Optionally extract them to disk.",
parameters: {
filePath: z.string(),
extractTo: z.string().optional().describe("Directory to extract image files to (optional)"),
},
implementation: async ({ filePath, extractTo }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
if (extractTo) extractTo = resolve(extractTo);
try {
const doc = await DocxDocument.load(filePath);
const images = await doc.getImages();
const extracted: string[] = [];
if (extractTo) {
if (!fs.existsSync(extractTo)) fs.mkdirSync(extractTo, { recursive: true });
const zip = doc.getZip();
for (const img of images) {
const mediaPath = `word/media/${img.name}`;
const file = zip.file(mediaPath);
if (!file) continue;
const data = await file.async("nodebuffer");
const outPath = path.join(extractTo, img.name);
fs.writeFileSync(outPath, data);
extracted.push(outPath);
}
}
return JSON.stringify({
file: filePath,
image_count: images.length,
images,
extracted_to: extractTo ?? null,
extracted_files: extracted,
}, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
// 5. word_diff
tool({
name: "word_diff",
description: "Compare two .docx files and report paragraphs added, removed, and unchanged.",
parameters: {
fileA: z.string().describe("Original (base) file"),
fileB: z.string().describe("Modified (new) file"),
},
implementation: async ({ fileA, fileB }) => {
fileA = resolve(fileA);
fileB = resolve(fileB);
const errA = checkReadable(fileA);
if (errA) return `Error (fileA): ${errA}`;
const errB = checkReadable(fileB);
if (errB) return `Error (fileB): ${errB}`;
try {
const docA = await DocxDocument.load(fileA);
const docB = await DocxDocument.load(fileB);
const parasA = docA.getParagraphs().filter(p => p.text.trim()).map(p => p.text);
const parasB = docB.getParagraphs().filter(p => p.text.trim()).map(p => p.text);
const setA = new Set(parasA);
const setB = new Set(parasB);
const added = parasB.filter(t => !setA.has(t));
const removed = parasA.filter(t => !setB.has(t));
const common = parasA.filter(t => setB.has(t));
return JSON.stringify({
file_a: fileA,
file_b: fileB,
summary: {
total_a: parasA.length,
total_b: parasB.length,
added: added.length,
removed: removed.length,
unchanged: common.length,
},
added: added.map(t => ({ type: "added", text: t })),
removed: removed.map(t => ({ type: "removed", text: t })),
}, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
];
}
export function getMacroTools(workingDir?: string): Tool[] {
const resolve = (p: string) => resolvePath(p, workingDir);
return [
// 6. word_read_vba
tool({
name: "word_read_vba",
description: "Extract VBA module information from a .docm (macro-enabled Word document).",
parameters: {
filePath: z.string().describe("Path to a .docm file"),
},
implementation: async ({ filePath }) => {
filePath = resolve(filePath);
if (!fs.existsSync(filePath)) return `Error: File not found: "${filePath}"`;
if (!filePath.toLowerCase().endsWith(".docm")) {
return `Error: VBA is only available in .docm files. Got: "${filePath}"`;
}
try {
const doc = await DocxDocument.load(filePath);
const modules = await doc.readVba();
return JSON.stringify({
file: filePath,
vba_modules: Object.keys(modules).length,
modules,
}, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
];
}