Project Files
src / tools / content.ts
import { tool, type Tool } from "@lmstudio/sdk";
import { z } from "zod";
import * as fs from "fs";
import * as path from "path";
import { Document, Packer, Paragraph, TextRun, Table, TableRow, TableCell,
HeadingLevel, AlignmentType, LevelFormat, WidthType, BorderStyle,
ShadingType, PageBreak } from "docx";
import {
DocxDocument, blocksToXml, stripSectPr, extractBodyContent,
findTopLevelParaSpans, ContentBlock,
} from "../utils/document";
import { checkReadable, backupFile, ensureDir, resolvePath } from "../utils/config";
// ─── Schema ───────────────────────────────────────────────────────────────────
const contentBlockSchema = z.array(z.union([
z.object({ type: z.literal("heading"), text: z.string(),
level: z.number().int().min(1).max(6).optional(), bold: z.boolean().optional() }),
z.object({ type: z.literal("paragraph"), text: z.string(),
bold: z.boolean().optional(), italic: z.boolean().optional(),
align: z.enum(["left","center","right","justify"]).optional() }),
z.object({ type: z.literal("list"), items: z.array(z.string()),
ordered: z.boolean().optional() }),
z.object({ type: z.literal("table"), headers: z.array(z.string()),
rows: z.array(z.array(z.string())) }),
z.object({ type: z.literal("pagebreak") }),
])).describe("Content blocks to write");
// ─── docx-library helpers (only used for word_write / word_split_by_heading) ──
const HEADING_MAP: Record<number, typeof HeadingLevel[keyof typeof HeadingLevel]> = {
1: HeadingLevel.HEADING_1, 2: HeadingLevel.HEADING_2, 3: HeadingLevel.HEADING_3,
4: HeadingLevel.HEADING_4, 5: HeadingLevel.HEADING_5, 6: HeadingLevel.HEADING_6,
};
function buildDocxContent(blocks: ContentBlock[]): (Paragraph | Table)[] {
const children: (Paragraph | Table)[] = [];
const border = { style: BorderStyle.SINGLE, size: 1, color: "CCCCCC" };
const borders = { top: border, bottom: border, left: border, right: border };
for (const block of blocks) {
if (block.type === "heading") {
children.push(new Paragraph({
heading: HEADING_MAP[block.level ?? 1],
children: [new TextRun({ text: block.text ?? "", bold: block.bold })],
}));
} else if (block.type === "paragraph") {
const align: Record<string, typeof AlignmentType[keyof typeof AlignmentType]> = {
left: AlignmentType.LEFT, center: AlignmentType.CENTER,
right: AlignmentType.RIGHT, justify: AlignmentType.JUSTIFIED,
};
children.push(new Paragraph({
alignment: block.align ? align[block.align] : undefined,
children: [new TextRun({ text: block.text ?? "", bold: block.bold, italics: block.italic })],
}));
} else if (block.type === "list") {
for (const item of block.items ?? []) {
children.push(new Paragraph({
numbering: { reference: block.ordered ? "numbers" : "bullets", level: 0 },
children: [new TextRun(item)],
}));
}
} else if (block.type === "table") {
const colCount = (block.headers ?? []).length;
const colWidth = Math.floor(9360 / Math.max(colCount, 1));
const colWidths = Array(colCount).fill(colWidth);
const makeRow = (cells: string[], isHeader = false) =>
new TableRow({ children: cells.map((text, ci) =>
new TableCell({
borders,
width: { size: colWidths[ci], type: WidthType.DXA },
shading: isHeader ? { fill: "4472C4", type: ShadingType.CLEAR } : undefined,
margins: { top: 80, bottom: 80, left: 120, right: 120 },
children: [new Paragraph({ children: [new TextRun({
text, bold: isHeader, color: isHeader ? "FFFFFF" : undefined,
})] })],
})
)});
children.push(new Table({
width: { size: 9360, type: WidthType.DXA },
columnWidths: colWidths,
rows: [makeRow(block.headers ?? [], true), ...(block.rows ?? []).map(r => makeRow(r))],
}));
} else if (block.type === "pagebreak") {
children.push(new Paragraph({ children: [new PageBreak()] }));
}
}
return children;
}
// ─── Tools ───────────────────────────────────────────────────────────────────
export function getContentTools(workingDir?: string): Tool[] {
const resolve = (p: string) => resolvePath(p, workingDir);
return [
// 1. word_read
tool({
name: "word_read",
description: "Read paragraphs from a .docx file. Returns structured data: text, style, formatting runs. Supports pagination.",
parameters: {
filePath: z.string(),
startPara: z.number().optional().describe("First paragraph index, 0-based (default: 0)"),
endPara: z.number().optional().describe("Last paragraph index exclusive (default: all)"),
maxParas: z.number().optional().describe("Max paragraphs to return (default: 500)"),
includeEmpty: z.boolean().optional().describe("Include empty paragraphs (default: false)"),
},
implementation: async ({ filePath, startPara, endPara, maxParas, includeEmpty }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const doc = await DocxDocument.load(filePath);
let paras = doc.getParagraphs();
if (!includeEmpty) paras = paras.filter(p => p.text.trim().length > 0);
const s = startPara ?? 0;
const e = endPara ?? paras.length;
const slice = paras.slice(s, e).slice(0, maxParas ?? 500);
return JSON.stringify({
file: filePath, total_paragraphs: paras.length,
returned: slice.length, range: `${s}–${Math.min(e, paras.length) - 1}`,
paragraphs: slice,
}, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
// 2. word_write
tool({
name: "word_write",
description: "Create a new .docx file from structured content blocks (headings, paragraphs, lists, tables, page breaks).",
parameters: {
filePath: z.string(),
content: contentBlockSchema,
title: z.string().optional().describe("Document title (metadata)"),
author: z.string().optional().describe("Document author (metadata)"),
margins: z.object({
top: z.number().optional(), bottom: z.number().optional(),
left: z.number().optional(), right: z.number().optional(),
}).optional().describe("Page margins in DXA (1440 = 1 inch, default: 1440 all sides)"),
},
implementation: async ({ filePath, content, title, author, margins }) => {
filePath = resolve(filePath);
try {
ensureDir(filePath);
const m = margins ?? {};
const doc = new Document({
creator: author, title,
numbering: { config: [
{ reference: "bullets", levels: [{ level: 0, format: LevelFormat.BULLET, text: "•",
alignment: AlignmentType.LEFT,
style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] },
{ reference: "numbers", levels: [{ level: 0, format: LevelFormat.DECIMAL, text: "%1.",
alignment: AlignmentType.LEFT,
style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] },
]},
styles: { default: { document: { run: { font: "Calibri", size: 24 } } } },
sections: [{ properties: {
page: { size: { width: 12240, height: 15840 },
margin: { top: m.top??1440, bottom: m.bottom??1440, left: m.left??1440, right: m.right??1440 } }
}, children: buildDocxContent(content as ContentBlock[]) }],
});
const buf = await Packer.toBuffer(doc);
fs.writeFileSync(filePath, buf);
return JSON.stringify({ success: true, file: filePath, blocks_written: content.length,
file_size_kb: Math.round(buf.length / 1024 * 10) / 10 });
} catch (e) { return `Error: ${e}`; }
},
}),
// 3. word_append
tool({
name: "word_append",
description: "Append content blocks (headings, paragraphs, lists, tables) to the end of an existing .docx. Creates a .bak backup.",
parameters: {
filePath: z.string(),
content: contentBlockSchema,
},
implementation: async ({ filePath, content }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const doc = await DocxDocument.load(filePath);
// Build XML directly — no temp-doc round-trips, no relationship ID conflicts
const newXml = blocksToXml(content as ContentBlock[]);
// Inject immediately before </w:body>
doc.setXml(doc.getXml().replace(/<\/w:body>/, `${newXml}</w:body>`));
await doc.save(filePath);
return JSON.stringify({ success: true, file: filePath, blocks_appended: content.length });
} catch (e) { return `Error: ${e}`; }
},
}),
// 4. word_find_replace
tool({
name: "word_find_replace",
description: "Find and replace text across the entire document. Creates a .bak backup.",
parameters: {
filePath: z.string(),
find: z.string().describe("Text to search for"),
replace: z.string().describe("Replacement text"),
regex: z.boolean().optional().describe("Treat 'find' as a regular expression (default: false)"),
caseSensitive: z.boolean().optional().describe("Case-sensitive matching (default: false)"),
},
implementation: async ({ filePath, find, replace, regex, caseSensitive }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const doc = await DocxDocument.load(filePath);
const count = doc.findReplace(find, replace, { regex, caseSensitive });
await doc.save(filePath);
return JSON.stringify({ success: true, file: filePath, find, replace, replacements_made: count });
} catch (e) { return `Error: ${e}`; }
},
}),
// 5. word_insert_after_heading
tool({
name: "word_insert_after_heading",
description: "Insert content blocks immediately after a specific heading (matched by text). Creates a .bak backup.",
parameters: {
filePath: z.string(),
headingText: z.string().describe("Text of the heading to insert after (case-insensitive, partial match)"),
content: contentBlockSchema,
},
implementation: async ({ filePath, headingText, content }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const doc = await DocxDocument.load(filePath);
const xml = doc.getXml();
// Work entirely within the body content to find the target span
const bodyContent = extractBodyContent(xml);
const spans = findTopLevelParaSpans(bodyContent);
// Find the heading paragraph by style + text content
const needle = headingText.toLowerCase();
const target = spans.find(span => {
const pXml = bodyContent.slice(span.start, span.end);
const hasHeadingStyle = /w:pStyle w:val="Heading\d"/i.test(pXml);
// Extract plain text from <w:t> elements to compare
const texts: string[] = [];
pXml.replace(/<w:t(?:[^>]*)>([\s\S]*?)<\/w:t>/g, (_, t) => { texts.push(t); return _; });
const plainText = texts.join("").toLowerCase();
return hasHeadingStyle && plainText.includes(needle);
});
if (!target) {
const headings = spans
.filter(span => /w:pStyle w:val="Heading\d"/i.test(bodyContent.slice(span.start, span.end)))
.map(span => {
const texts: string[] = [];
bodyContent.slice(span.start, span.end).replace(/<w:t(?:[^>]*)>([\s\S]*?)<\/w:t>/g, (_, t) => { texts.push(t); return _; });
return `"${texts.join("")}"`;
});
return `Error: Heading not found: "${headingText}". Available headings: ${headings.join(", ")}`;
}
const newXml = blocksToXml(content as ContentBlock[]);
// Rebuild body: everything up to and including the heading paragraph, then new content, then the rest
const newBodyContent =
bodyContent.slice(0, target.end) +
newXml +
bodyContent.slice(target.end);
// Replace body content in the full XML (use greedy to get full body)
const updatedXml = xml.replace(
/(<w:body>)([\s\S]*)(<\/w:body>)/,
`$1${newBodyContent}$3`
);
doc.setXml(updatedXml);
await doc.save(filePath);
return JSON.stringify({ success: true, file: filePath, blocks_inserted: content.length });
} catch (e) { return `Error: ${e}`; }
},
}),
// 6. word_delete_paragraphs
tool({
name: "word_delete_paragraphs",
description: "Delete paragraphs matching a text pattern or style. Creates a .bak backup.",
parameters: {
filePath: z.string(),
filter: z.object({
textContains: z.string().optional().describe("Delete paragraphs containing this text (case-insensitive)"),
textRegex: z.string().optional().describe("Delete paragraphs matching this regex"),
style: z.string().optional().describe("Delete paragraphs with this style (e.g. 'Heading1', 'Normal')"),
emptyOnly: z.boolean().optional().describe("Delete only empty paragraphs"),
}),
},
implementation: async ({ filePath, filter }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const doc = await DocxDocument.load(filePath);
const xml = doc.getXml();
const bodyContent = extractBodyContent(xml);
const spans = findTopLevelParaSpans(bodyContent);
// Determine which spans to delete
const toDelete = new Set<number>();
for (const span of spans) {
const pXml = bodyContent.slice(span.start, span.end);
const texts: string[] = [];
pXml.replace(/<w:t(?:[^>]*)>([\s\S]*?)<\/w:t>/g, (_, t) => { texts.push(t); return _; });
const plainText = texts.join("");
if (filter.emptyOnly) {
if (plainText.trim() === "") toDelete.add(span.start);
continue;
}
let matches = true;
if (filter.textContains && !plainText.toLowerCase().includes(filter.textContains.toLowerCase())) matches = false;
if (filter.textRegex && !new RegExp(filter.textRegex, "i").test(plainText)) matches = false;
if (filter.style) {
const styleMatch = /w:pStyle w:val="([^"]+)"/.exec(pXml);
const paraStyle = styleMatch ? styleMatch[1] : "Normal";
if (paraStyle.toLowerCase() !== filter.style.toLowerCase()) matches = false;
}
if (matches) toDelete.add(span.start);
}
if (toDelete.size === 0) return `No paragraphs matched the filter.`;
// Rebuild body content skipping deleted spans
let newBody = "";
let cursor = 0;
for (const span of spans) {
if (toDelete.has(span.start)) {
newBody += bodyContent.slice(cursor, span.start); // add gap before span
cursor = span.end; // skip the span
}
}
newBody += bodyContent.slice(cursor); // remainder
const updatedXml = xml.replace(
/(<w:body>)([\s\S]*)(<\/w:body>)/,
`$1${newBody}$3`
);
doc.setXml(updatedXml);
await doc.save(filePath);
return JSON.stringify({
success: true, file: filePath,
paragraphs_before: spans.length,
paragraphs_deleted: toDelete.size,
paragraphs_after: spans.length - toDelete.size,
});
} catch (e) { return `Error: ${e}`; }
},
}),
// 7. word_extract_tables
tool({
name: "word_extract_tables",
description: "Extract all tables from a .docx as JSON arrays of rows.",
parameters: {
filePath: z.string(),
tableIndex: z.number().optional().describe("Extract only this table (0-based). Default: all."),
},
implementation: async ({ filePath, tableIndex }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const doc = await DocxDocument.load(filePath);
const tables = doc.getTables();
const result = tableIndex !== undefined ? [tables[tableIndex]].filter(Boolean) : tables;
return JSON.stringify({
file: filePath, total_tables: tables.length, returned: result.length,
tables: result.map(t => ({
index: t.index, rows: t.rows.length, cols: t.rows[0]?.length ?? 0, data: t.rows,
})),
}, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
// 8. word_insert_table
tool({
name: "word_insert_table",
description: "Insert a table at the end of a .docx or after a specific heading. Creates a .bak backup.",
parameters: {
filePath: z.string(),
headers: z.array(z.string()).describe("Column header labels"),
rows: z.array(z.array(z.string())).describe("Table data rows"),
afterHeading: z.string().optional().describe("Insert after this heading text (default: end of document)"),
},
implementation: async ({ filePath, headers, rows, afterHeading }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const doc = await DocxDocument.load(filePath);
const tableXml = blocksToXml([{ type: "table", headers, rows }]);
if (!afterHeading) {
doc.setXml(doc.getXml().replace(/<\/w:body>/, `${tableXml}</w:body>`));
} else {
const xml = doc.getXml();
const bodyContent = extractBodyContent(xml);
const spans = findTopLevelParaSpans(bodyContent);
const needle = afterHeading.toLowerCase();
const target = spans.find(span => {
const pXml = bodyContent.slice(span.start, span.end);
if (!/w:pStyle w:val="Heading\d"/i.test(pXml)) return false;
const texts: string[] = [];
pXml.replace(/<w:t(?:[^>]*)>([\s\S]*?)<\/w:t>/g, (_, t) => { texts.push(t); return _; });
return texts.join("").toLowerCase().includes(needle);
});
if (!target) return `Error: Heading "${afterHeading}" not found.`;
const newBody =
bodyContent.slice(0, target.end) + tableXml + bodyContent.slice(target.end);
doc.setXml(xml.replace(/(<w:body>)([\s\S]*)(<\/w:body>)/, `$1${newBody}$3`));
}
await doc.save(filePath);
return JSON.stringify({
success: true, file: filePath,
table_rows: rows.length + 1, table_cols: headers.length,
inserted: afterHeading ? `after heading "${afterHeading}"` : "at end of document",
});
} catch (e) { return `Error: ${e}`; }
},
}),
// 9. word_merge_docs
tool({
name: "word_merge_docs",
description: "Merge multiple .docx files into a single document. Each source is appended with an optional page break between them.",
parameters: {
inputFiles: z.array(z.string()).min(2),
outputFile: z.string(),
pageBreakBetween: z.boolean().optional().describe("Insert a page break between documents (default: true)"),
},
implementation: async ({ inputFiles, outputFile, pageBreakBetween }) => {
const resolved = inputFiles.map(f => resolve(f));
const missing = resolved.filter(f => !fs.existsSync(f));
if (missing.length) return `Error: Files not found: ${missing.join(", ")}`;
outputFile = resolve(outputFile);
try {
ensureDir(outputFile);
const base = await DocxDocument.load(resolved[0]);
const summary: { file: string; paragraphs: number }[] = [];
for (let i = 1; i < resolved.length; i++) {
const src = await DocxDocument.load(resolved[i]);
const paras = src.getParagraphs();
summary.push({ file: resolved[i], paragraphs: paras.length });
// Extract body content and strip ALL sectPr forms before injection
const bodyContent = extractBodyContent(src.getXml());
let content = stripSectPr(bodyContent).trim();
if (pageBreakBetween !== false) {
content = `<w:p><w:r><w:br w:type="page"/></w:r></w:p>${content}`;
}
base.setXml(base.getXml().replace(/<\/w:body>/, `${content}</w:body>`));
}
await base.save(outputFile);
const totalParas = base.getParagraphs().length;
return JSON.stringify({
success: true, output: outputFile,
files_merged: resolved.length, total_paragraphs: totalParas, sources: summary,
});
} catch (e) { return `Error: ${e}`; }
},
}),
// 10. word_split_by_heading
tool({
name: "word_split_by_heading",
description: "Split a .docx document into multiple files — one per top-level heading section.",
parameters: {
filePath: z.string(),
outputDir: z.string().describe("Directory where split files will be saved"),
headingLevel: z.number().int().min(1).max(3).optional().describe("Heading level to split on (default: 1)"),
},
implementation: async ({ filePath, outputDir, headingLevel }) => {
filePath = resolve(filePath);
outputDir = resolve(outputDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir, { recursive: true });
const doc = await DocxDocument.load(filePath);
const paras = doc.getParagraphs();
const level = headingLevel ?? 1;
const targetStyle = `Heading${level}`;
const splitIndices = paras
.filter(p => p.style === targetStyle && p.text.trim())
.map(p => p.index);
if (splitIndices.length === 0) {
return `Error: No Heading${level} paragraphs found in document.`;
}
const outputFiles: { file: string; heading: string; paragraphs: number }[] = [];
for (let i = 0; i < splitIndices.length; i++) {
const start = splitIndices[i];
const end = splitIndices[i + 1] ?? paras.length;
const section = paras.slice(start, end);
const heading = section[0]?.text ?? `Section${i + 1}`;
const safeName = heading.replace(/[^a-zA-Z0-9 _-]/g, "").trim().slice(0, 50) || `section_${i + 1}`;
const outPath = path.join(outputDir, `${safeName}.docx`);
const blocks: ContentBlock[] = section.map(p => {
if (p.style.toLowerCase().startsWith("heading")) {
const lvl = parseInt(p.style.replace(/\D/g, "") || "1");
return { type: "heading" as const, text: p.text, level: lvl };
}
return { type: "paragraph" as const, text: p.text };
});
const newDoc = new Document({
numbering: { config: [
{ reference: "bullets", levels: [{ level: 0, format: LevelFormat.BULLET, text: "•",
alignment: AlignmentType.LEFT, style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] },
]},
sections: [{ children: buildDocxContent(blocks) }],
});
const buf = await Packer.toBuffer(newDoc);
fs.writeFileSync(outPath, buf);
outputFiles.push({ file: outPath, heading, paragraphs: section.length });
}
return JSON.stringify({
success: true, source: filePath, split_on: `Heading${level}`,
files_created: outputFiles.length, output_dir: outputDir, files: outputFiles,
});
} catch (e) { return `Error: ${e}`; }
},
}),
];
}