Project Files
src / tools / formatting.ts
import { tool, type Tool } from "@lmstudio/sdk";
import { z } from "zod";
import * as fs from "fs";
import JSZip from "jszip";
import { DocxDocument, escapeXml, extractBodyContent, findTopLevelParaSpans, blocksToXml } from "../utils/document";
import { checkReadable, backupFile, resolvePath } from "../utils/config";
export function getFormattingTools(workingDir?: string): Tool[] {
const resolve = (p: string) => resolvePath(p, workingDir);
return [
// 1. word_format_text
tool({
name: "word_format_text",
description: "Apply bold, italic, underline, or color to all runs containing matching text. Formats the whole run. Creates a .bak backup.",
parameters: {
filePath: z.string(),
find: z.string().describe("Text to find (all runs containing this text are formatted)"),
bold: z.boolean().optional(),
italic: z.boolean().optional(),
underline: z.boolean().optional(),
color: z.string().optional().describe("6-char hex color, e.g. 'FF0000' for red"),
caseSensitive: z.boolean().optional(),
},
implementation: async ({ filePath, find, bold, italic, underline, color, caseSensitive }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const doc = await DocxDocument.load(filePath);
let xml = doc.getXml();
let count = 0;
// Build rPr XML fragments to inject
const rPrParts: string[] = [];
if (bold) rPrParts.push("<w:b/>");
if (italic) rPrParts.push("<w:i/>");
if (underline) rPrParts.push('<w:u w:val="single"/>');
if (color) rPrParts.push(`<w:color w:val="${color}"/>`);
if (!rPrParts.length) return `Error: No formatting properties specified (bold, italic, underline, or color).`;
const flags = caseSensitive ? "" : "i";
const escaped = find.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const textRe = new RegExp(escaped, flags);
// Operate at the <w:r> level — never break or split elements
// Strategy: for each <w:r>...</w:r>, check if its <w:t> content matches,
// then inject/merge rPr. This safely formats the whole run.
xml = xml.replace(/(<w:r(?:\s[^>]*)?>)([\s\S]*?)(<\/w:r>)/g,
(match, rOpen, inner, rClose) => {
// Extract text from all <w:t> elements within this run
const textContent = inner.replace(/<w:t(?:[^>]*)>([\s\S]*?)<\/w:t>/g, "$1");
if (!textRe.test(textContent)) return match;
count++;
// Inject into existing <w:rPr> or create one
// Use regex — rPr may have attributes: <w:rPr> or <w:rPr w:...>
if (/<w:rPr[\s>]/.test(inner)) {
// Merge into existing rPr (insert after opening tag)
const merged = inner.replace(/(<w:rPr(?:\s[^>]*)?>)/, `$1${rPrParts.join("")}`);
return `${rOpen}${merged}${rClose}`;
}
// No rPr — prepend new one before first child
return `${rOpen}<w:rPr>${rPrParts.join("")}</w:rPr>${inner}${rClose}`;
}
);
doc.setXml(xml);
await doc.save(filePath);
return JSON.stringify({
success: true, file: filePath, find,
formatting_applied: { bold, italic, underline, color },
runs_formatted: count,
});
} catch (e) { return `Error: ${e}`; }
},
}),
// 2. word_set_paragraph_style
tool({
name: "word_set_paragraph_style",
description: "Change the style of paragraphs containing specific text. Creates a .bak backup.",
parameters: {
filePath: z.string(),
textContains: z.string().describe("Match paragraphs containing this text (case-insensitive)"),
newStyle: z.string().describe("Style to apply, e.g. 'Heading1', 'Normal', 'Quote'"),
},
implementation: async ({ filePath, textContains, newStyle }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const doc = await DocxDocument.load(filePath);
const xml = doc.getXml();
const bodyContent = extractBodyContent(xml);
const spans = findTopLevelParaSpans(bodyContent);
const needle = textContains.toLowerCase();
// Identify matching spans by their text content
const toChange = new Set<number>();
for (const span of spans) {
const pXml = bodyContent.slice(span.start, span.end);
const texts: string[] = [];
pXml.replace(/<w:t(?:[^>]*)>([\s\S]*?)<\/w:t>/g, (_, t) => { texts.push(t); return _; });
if (texts.join("").toLowerCase().includes(needle)) {
toChange.add(span.start);
}
}
if (toChange.size === 0) return `No paragraphs found containing: "${textContains}"`;
// Rebuild body content, modifying pStyle in matched spans
let changed = 0;
let newBodyContent = "";
let cursor = 0;
for (const span of spans) {
newBodyContent += bodyContent.slice(cursor, span.start);
if (!toChange.has(span.start)) {
newBodyContent += bodyContent.slice(span.start, span.end);
} else {
changed++;
let pXml = bodyContent.slice(span.start, span.end);
if (/<w:pPr[\s>]/.test(pXml)) {
if (/<w:pStyle[\s>]/.test(pXml)) {
// Replace existing pStyle value
pXml = pXml.replace(/(<w:pStyle[^>]*w:val=")[^"]*(")/,
`$1${escapeXml(newStyle)}$2`);
} else {
// Inject pStyle as first child of existing pPr
pXml = pXml.replace(/(<w:pPr(?:\s[^>]*)?>)/,
`$1<w:pStyle w:val="${escapeXml(newStyle)}"/>`);
}
} else {
// No pPr at all — inject one immediately after the <w:p...> opening tag
pXml = pXml.replace(/(<w:p(?:\s[^>]*)?>)/,
`$1<w:pPr><w:pStyle w:val="${escapeXml(newStyle)}"/></w:pPr>`);
}
newBodyContent += pXml;
}
cursor = span.end;
}
newBodyContent += bodyContent.slice(cursor);
const updatedXml = xml.replace(
/(<w:body>)([\s\S]*)(<\/w:body>)/,
`$1${newBodyContent}$3`
);
doc.setXml(updatedXml);
await doc.save(filePath);
return JSON.stringify({ success: true, file: filePath, paragraphs_changed: changed, new_style: newStyle });
} catch (e) { return `Error: ${e}`; }
},
}),
// 3. word_add_header_footer
tool({
name: "word_add_header_footer",
description: "Add or replace header and/or footer of a .docx document. Creates a .bak backup.",
parameters: {
filePath: z.string(),
headerText: z.string().optional().describe("Header text (omit to leave header unchanged)"),
footerText: z.string().optional().describe("Footer text (omit to leave footer unchanged)"),
align: z.enum(["left", "center", "right"]).optional().describe("Text alignment (default: center)"),
},
implementation: async ({ filePath, headerText, footerText, align }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
if (headerText === undefined && footerText === undefined) {
return `Error: Provide at least one of headerText or footerText.`;
}
try {
backupFile(filePath);
const jc = align ?? "center";
const zip = await JSZip.loadAsync(fs.readFileSync(filePath));
const changes: string[] = [];
// Full OOXML namespace list required for header/footer parts
const NS =
`xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" ` +
`xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ` +
`xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ` +
`xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ` +
`mc:Ignorable="w14 w15 wp14"`;
if (headerText !== undefined) {
const safe = escapeXml(headerText);
zip.file("word/header1.xml",
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:hdr ${NS}>` +
`<w:p><w:pPr><w:jc w:val="${jc}"/></w:pPr>` +
`<w:r><w:t xml:space="preserve">${safe}</w:t></w:r></w:p>` +
`</w:hdr>`
);
changes.push("header");
}
if (footerText !== undefined) {
const safe = escapeXml(footerText);
zip.file("word/footer1.xml",
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:ftr ${NS}>` +
`<w:p><w:pPr><w:jc w:val="${jc}"/></w:pPr>` +
`<w:r><w:t xml:space="preserve">${safe}</w:t></w:r></w:p>` +
`</w:ftr>`
);
changes.push("footer");
}
// 1. Update [Content_Types].xml
const ctFile = zip.file("[Content_Types].xml");
if (ctFile) {
let ct = await ctFile.async("string");
if (headerText !== undefined && !ct.includes("header1.xml")) {
ct = ct.replace("</Types>",
`<Override PartName="/word/header1.xml" ` +
`ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>` +
`</Types>`);
}
if (footerText !== undefined && !ct.includes("footer1.xml")) {
ct = ct.replace("</Types>",
`<Override PartName="/word/footer1.xml" ` +
`ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>` +
`</Types>`);
}
zip.file("[Content_Types].xml", ct);
}
// 2. Update word/_rels/document.xml.rels
const relsFile = zip.file("word/_rels/document.xml.rels");
if (relsFile) {
let rels = await relsFile.async("string");
if (headerText !== undefined && !rels.includes("header1.xml")) {
rels = rels.replace("</Relationships>",
`<Relationship Id="rIdHdr1" ` +
`Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" ` +
`Target="header1.xml"/>` +
`</Relationships>`);
}
if (footerText !== undefined && !rels.includes("footer1.xml")) {
rels = rels.replace("</Relationships>",
`<Relationship Id="rIdFtr1" ` +
`Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" ` +
`Target="footer1.xml"/>` +
`</Relationships>`);
}
zip.file("word/_rels/document.xml.rels", rels);
}
// 3. Update document.xml — inject headerReference/footerReference into sectPr
const docFile = zip.file("word/document.xml");
if (docFile) {
let docXml = await docFile.async("string");
// Remove stale references so we never duplicate them
if (headerText !== undefined) {
docXml = docXml.replace(/<w:headerReference[^/]*\/>/g, "");
}
if (footerText !== undefined) {
docXml = docXml.replace(/<w:footerReference[^/]*\/>/g, "");
}
// Build fresh reference tags
const refs: string[] = [];
if (headerText !== undefined) {
refs.push(`<w:headerReference w:type="default" r:id="rIdHdr1"/>`);
}
if (footerText !== undefined) {
refs.push(`<w:footerReference w:type="default" r:id="rIdFtr1"/>`);
}
// Inject at start of sectPr content (handles both <w:sectPr> and <w:sectPr ...>)
if (docXml.includes("<w:sectPr")) {
docXml = docXml.replace(/(<w:sectPr(?:\s[^>]*)?>)/, `$1${refs.join("")}`);
} else {
// No sectPr — append one before </w:body>
docXml = docXml.replace("</w:body>",
`<w:sectPr>${refs.join("")}</w:sectPr></w:body>`);
}
zip.file("word/document.xml", docXml);
}
const buf = await zip.generateAsync({
type: "nodebuffer", compression: "DEFLATE", compressionOptions: { level: 6 },
});
fs.writeFileSync(filePath, buf);
return JSON.stringify({ success: true, file: filePath, changes_applied: changes });
} catch (e) { return `Error: ${e}`; }
},
}),
// 4. word_set_page_layout
tool({
name: "word_set_page_layout",
description: "Change page margins, orientation, or paper size of a .docx. Creates a .bak backup.",
parameters: {
filePath: z.string(),
orientation: z.enum(["portrait", "landscape"]).optional(),
paperSize: z.enum(["letter", "a4", "legal"]).optional(),
margins: z.object({
top: z.number().optional().describe("Top margin in DXA (1440 = 1 inch)"),
bottom: z.number().optional().describe("Bottom margin in DXA"),
left: z.number().optional().describe("Left margin in DXA"),
right: z.number().optional().describe("Right margin in DXA"),
}).optional(),
},
implementation: async ({ filePath, orientation, paperSize, margins }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const doc = await DocxDocument.load(filePath);
let xml = doc.getXml();
const changes: string[] = [];
// Page size dimensions in DXA (portrait)
const sizes: Record<string, { w: number; h: number }> = {
letter: { w: 12240, h: 15840 },
a4: { w: 11906, h: 16838 },
legal: { w: 12240, h: 20160 },
};
if (paperSize || orientation) {
const base = paperSize ? sizes[paperSize] : null;
const isLandscape = orientation === "landscape";
const finalW = base ? (isLandscape ? base.h : base.w) : undefined;
const finalH = base ? (isLandscape ? base.w : base.h) : undefined;
if (/<w:pgSz[\s>\/]/.test(xml)) {
// Modify existing <w:pgSz .../> element attribute by attribute
xml = xml.replace(/<w:pgSz([^>]*)\/>/, (_, attrs) => {
let a = attrs;
if (finalW !== undefined) {
a = /w:w="/.test(a)
? a.replace(/w:w="[^"]*"/, `w:w="${finalW}"`)
: `${a} w:w="${finalW}"`;
}
if (finalH !== undefined) {
a = /w:h="/.test(a)
? a.replace(/w:h="[^"]*"/, `w:h="${finalH}"`)
: `${a} w:h="${finalH}"`;
}
if (orientation) {
a = /w:orient="/.test(a)
? a.replace(/w:orient="[^"]*"/, `w:orient="${orientation}"`)
: `${a} w:orient="${orientation}"`;
}
return `<w:pgSz${a}/>`;
});
} else {
// Insert new <w:pgSz/> inside sectPr
const pgSz =
`<w:pgSz` +
(finalW ? ` w:w="${finalW}"` : "") +
(finalH ? ` w:h="${finalH}"` : "") +
(orientation ? ` w:orient="${orientation}"` : "") +
`/>`;
xml = xml.replace(/(<w:sectPr(?:\s[^>]*)?>)/, `$1${pgSz}`);
}
changes.push(`paper=${paperSize ?? "unchanged"} orientation=${orientation ?? "unchanged"}`);
}
if (margins) {
const m = margins;
if (/<w:pgMar[\s>\/]/.test(xml)) {
xml = xml.replace(/<w:pgMar([^>]*)\/>/, (_, attrs) => {
let a = attrs;
const set = (attr: string, val: number | undefined) => {
if (val === undefined) return;
a = new RegExp(`${attr}="[^"]*"`).test(a)
? a.replace(new RegExp(`${attr}="[^"]*"`), `${attr}="${val}"`)
: `${a} ${attr}="${val}"`;
};
set("w:top", m.top);
set("w:bottom", m.bottom);
set("w:left", m.left);
set("w:right", m.right);
return `<w:pgMar${a}/>`;
});
} else {
const pgMar =
`<w:pgMar` +
(m.top !== undefined ? ` w:top="${m.top}"` : "") +
(m.bottom !== undefined ? ` w:bottom="${m.bottom}"` : "") +
(m.left !== undefined ? ` w:left="${m.left}"` : "") +
(m.right !== undefined ? ` w:right="${m.right}"` : "") +
`/>`;
xml = xml.replace(/(<w:sectPr(?:\s[^>]*)?>)/, `$1${pgMar}`);
}
changes.push(`margins=${JSON.stringify(margins)}`);
}
doc.setXml(xml);
await doc.save(filePath);
return JSON.stringify({ success: true, file: filePath, changes });
} catch (e) { return `Error: ${e}`; }
},
}),
// 5. word_add_toc
tool({
name: "word_add_toc",
description: "Insert a Table of Contents placeholder at the start of a .docx. Populate with F9 in Word/LibreOffice. Creates a .bak backup.",
parameters: {
filePath: z.string(),
title: z.string().optional().describe("TOC title text (default: 'Table of Contents')"),
maxLevel: z.number().int().min(1).max(9).optional().describe("Max heading level to include (default: 3)"),
},
implementation: async ({ filePath, title, maxLevel }) => {
filePath = resolve(filePath);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const doc = await DocxDocument.load(filePath);
let xml = doc.getXml();
const tocTitle = escapeXml(title ?? "Table of Contents");
const levels = maxLevel ?? 3;
// TOC field instruction uses single backslashes in the actual XML
const tocXml =
`<w:p><w:pPr><w:pStyle w:val="TOCHeading"/></w:pPr>` +
`<w:r><w:t>${tocTitle}</w:t></w:r></w:p>` +
`<w:sdt>` +
`<w:sdtPr><w:docPart>` +
`<w:docPartGallery w:val="Table of Contents"/>` +
`<w:docPartUnique/>` +
`</w:docPart></w:sdtPr>` +
`<w:sdtContent>` +
`<w:p><w:pPr><w:pStyle w:val="TOC1"/></w:pPr>` +
`<w:r><w:fldChar w:fldCharType="begin" w:dirty="true"/></w:r>` +
`<w:r><w:instrText xml:space="preserve"> TOC \\o "1-${levels}" \\h \\z \\u </w:instrText></w:r>` +
`<w:r><w:fldChar w:fldCharType="separate"/></w:r>` +
`<w:r><w:t>Right-click to update Table of Contents.</w:t></w:r>` +
`<w:r><w:fldChar w:fldCharType="end"/></w:r>` +
`</w:p>` +
`</w:sdtContent>` +
`</w:sdt>`;
xml = xml.replace(/(<w:body>)/, `$1${tocXml}`);
doc.setXml(xml);
await doc.save(filePath);
return JSON.stringify({
success: true, file: filePath,
toc_title: title ?? "Table of Contents", max_heading_level: levels,
note: "Open in Word or LibreOffice and press F9 to populate the TOC.",
});
} catch (e) { return `Error: ${e}`; }
},
}),
];
}