Project Files
src / docxWrite.ts
import * as fs from "node:fs/promises";
import { marked, type Token, type Tokens } from "marked";
import {
AlignmentType,
Document,
ExternalHyperlink,
HeadingLevel,
LevelFormat,
PageBreak,
Packer,
Paragraph,
ShadingType,
TextRun,
convertMillimetersToTwip,
type ISectionOptions,
type IRunOptions,
type ParagraphChild,
} from "docx";
export interface WriteDocxOptions {
fontFamily: string;
fontSizePt: number;
pageMargin: "normal" | "narrow" | "wide";
preserveListStyles: boolean;
title?: string;
}
export interface WriteDocxResult {
bytes: number;
paragraphs: number;
warnings: string[];
}
const HEADING_LEVELS: Record<number, (typeof HeadingLevel)[keyof typeof HeadingLevel]> = {
1: HeadingLevel.HEADING_1,
2: HeadingLevel.HEADING_2,
3: HeadingLevel.HEADING_3,
4: HeadingLevel.HEADING_4,
5: HeadingLevel.HEADING_5,
6: HeadingLevel.HEADING_6,
};
const NUMBERED_REF = "lms-numbered";
const MARGIN_MM: Record<WriteDocxOptions["pageMargin"], number> = {
normal: 25.4,
narrow: 12.7,
wide: 50.8,
};
export async function writeDocx(
absPath: string,
markdown: string,
opts: WriteDocxOptions,
): Promise<WriteDocxResult> {
const tokens = marked.lexer(markdown);
const warnings: string[] = [];
const children = buildBlocks(tokens, { warnings, opts, listDepth: 0 });
const margin = convertMillimetersToTwip(MARGIN_MM[opts.pageMargin]);
const sizeHalfPt = opts.fontSizePt * 2;
const section: ISectionOptions = {
properties: {
page: {
margin: { top: margin, right: margin, bottom: margin, left: margin },
},
},
children,
};
const doc = new Document({
title: opts.title,
styles: {
default: {
document: {
run: { font: opts.fontFamily, size: sizeHalfPt },
},
},
},
creator: "lms-plugin-docx",
numbering: {
config: [
{
reference: NUMBERED_REF,
levels: [0, 1, 2, 3].map((i) => ({
level: i,
format: LevelFormat.DECIMAL,
text: `%${i + 1}.`,
alignment: AlignmentType.START,
style: {
paragraph: {
indent: { left: 720 * (i + 1), hanging: 360 },
},
},
})),
},
],
},
sections: [section],
});
const buf = await Packer.toBuffer(doc);
await fs.writeFile(absPath, buf);
return {
bytes: buf.length,
paragraphs: countParagraphs(children),
warnings,
};
}
interface WalkCtx {
warnings: string[];
opts: WriteDocxOptions;
listDepth: number;
}
function buildBlocks(tokens: Token[], ctx: WalkCtx): Paragraph[] {
const out: Paragraph[] = [];
for (const tok of tokens) {
pushBlock(tok, ctx, out);
}
return out;
}
function pushBlock(tok: Token, ctx: WalkCtx, out: Paragraph[]): void {
switch (tok.type) {
case "space":
return;
case "heading": {
const h = tok as Tokens.Heading;
const level = Math.min(Math.max(h.depth, 1), 6) as 1 | 2 | 3 | 4 | 5 | 6;
out.push(
new Paragraph({
heading: HEADING_LEVELS[level],
children: inlineRuns(h.tokens ?? [], {}, ctx),
}),
);
return;
}
case "paragraph": {
const p = tok as Tokens.Paragraph;
out.push(
new Paragraph({ children: inlineRuns(p.tokens ?? [], {}, ctx) }),
);
return;
}
case "blockquote": {
const b = tok as Tokens.Blockquote;
// Render as indented italic paragraphs. Word's built-in "Quote" style
// would be more semantic, but mammoth's markdown writer has no
// <blockquote> handler — it silently drops the tag and merges the
// text into the next paragraph. Italic+indent at least preserves
// paragraph boundaries on round-trip (the `>` becomes `*italic*`).
for (const inner of b.tokens ?? []) {
if (inner.type === "paragraph") {
const ip = inner as Tokens.Paragraph;
out.push(
new Paragraph({
indent: { left: 360 },
children: inlineRuns(ip.tokens ?? [], { italics: true }, ctx),
}),
);
} else {
pushBlock(inner, ctx, out);
}
}
return;
}
case "list": {
const l = tok as Tokens.List;
pushList(l, ctx, out);
return;
}
case "code": {
const c = tok as Tokens.Code;
// Each line of the code block becomes its own paragraph with a monospace
// run. Word doesn't have a true "code block" style, so we approximate.
for (const line of c.text.split("\n")) {
out.push(
new Paragraph({
shading: { type: ShadingType.CLEAR, color: "auto", fill: "F5F5F5" },
children: [
new TextRun({ text: line.length ? line : " ", font: "Consolas" }),
],
}),
);
}
return;
}
case "hr": {
out.push(new Paragraph({ children: [new PageBreak()] }));
return;
}
case "html":
case "text": {
const text = ("text" in tok ? tok.text : "") as string;
if (text.trim().length === 0) return;
out.push(new Paragraph({ children: [new TextRun({ text })] }));
return;
}
case "table": {
// i1 — render as fenced text. i2 will produce a real Table.
const t = tok as Tokens.Table;
ctx.warnings.push("table rendered as plain text (i2 will support real tables)");
const headers = t.header.map((c) => c.text).join(" | ");
const rows = t.rows.map((r) => r.map((c) => c.text).join(" | "));
for (const line of [headers, ...rows]) {
out.push(
new Paragraph({
children: [new TextRun({ text: line, font: "Consolas" })],
}),
);
}
return;
}
default: {
const anyTok = tok as { type: string; raw?: string; text?: string };
ctx.warnings.push(`unsupported block token "${anyTok.type}" rendered as plain text`);
const text = anyTok.text ?? anyTok.raw ?? "";
if (text.trim().length) {
out.push(new Paragraph({ children: [new TextRun({ text })] }));
}
}
}
}
function pushList(list: Tokens.List, ctx: WalkCtx, out: Paragraph[]): void {
const ordered = list.ordered;
const useNumbering = ordered && ctx.opts.preserveListStyles;
const useBullet = !ordered || !ctx.opts.preserveListStyles;
const level = ctx.listDepth;
for (const item of list.items) {
const itemTokens = item.tokens ?? [];
// The first paragraph (or text token) of the list item carries the bullet/
// number marker; subsequent block tokens become continuation paragraphs.
let leadEmitted = false;
for (const inner of itemTokens) {
if (inner.type === "list") {
const next: WalkCtx = { ...ctx, listDepth: level + 1 };
pushList(inner as Tokens.List, next, out);
continue;
}
const runs = leadingRuns(inner, ctx);
const para = new Paragraph({
children: runs,
...(leadEmitted
? { indent: { left: 720 * (level + 1) } }
: useBullet
? { bullet: { level } }
: useNumbering
? { numbering: { reference: NUMBERED_REF, level } }
: { bullet: { level } }),
});
out.push(para);
leadEmitted = true;
}
if (!leadEmitted) {
// Empty list item — emit a placeholder so the bullet is still visible.
out.push(
new Paragraph({
bullet: useBullet ? { level } : undefined,
numbering: useNumbering ? { reference: NUMBERED_REF, level } : undefined,
children: [new TextRun({ text: " " })],
}),
);
}
}
}
// Pull the inline runs out of a block-level token nested inside a list item.
function leadingRuns(tok: Token, ctx: WalkCtx): ParagraphChild[] {
if (tok.type === "text") {
const t = tok as Tokens.Text;
return inlineRuns(t.tokens ?? [{ type: "text", raw: t.text, text: t.text } as Token], {}, ctx);
}
if (tok.type === "paragraph") {
return inlineRuns((tok as Tokens.Paragraph).tokens ?? [], {}, ctx);
}
// Code blocks, blockquotes etc inside a list item — flatten to text in i1.
const text = "text" in tok ? (tok as { text?: string }).text ?? "" : "";
return [new TextRun({ text })];
}
interface RunStyle {
bold?: boolean;
italics?: boolean;
code?: boolean;
}
function inlineRuns(tokens: Token[], style: RunStyle, ctx: WalkCtx): ParagraphChild[] {
const out: ParagraphChild[] = [];
for (const tok of tokens) {
inlineOne(tok, style, ctx, out);
}
return out;
}
function inlineOne(
tok: Token,
style: RunStyle,
ctx: WalkCtx,
out: ParagraphChild[],
): void {
switch (tok.type) {
case "text": {
const t = tok as Tokens.Text;
if (t.tokens && t.tokens.length) {
for (const inner of t.tokens) inlineOne(inner, style, ctx, out);
} else {
out.push(new TextRun(runOpts(t.text, style)));
}
return;
}
case "strong": {
const s = tok as Tokens.Strong;
for (const inner of s.tokens ?? []) {
inlineOne(inner, { ...style, bold: true }, ctx, out);
}
return;
}
case "em": {
const e = tok as Tokens.Em;
for (const inner of e.tokens ?? []) {
inlineOne(inner, { ...style, italics: true }, ctx, out);
}
return;
}
case "codespan": {
const c = tok as Tokens.Codespan;
out.push(new TextRun({ text: c.text, font: "Consolas" }));
return;
}
case "link": {
const l = tok as Tokens.Link;
const innerRuns = inlineRuns(l.tokens ?? [], style, ctx).filter(
(r): r is TextRun => r instanceof TextRun,
);
out.push(
new ExternalHyperlink({
link: l.href,
children: innerRuns.length
? innerRuns
: [new TextRun({ text: l.text, style: "Hyperlink" })],
}),
);
return;
}
case "br": {
out.push(new TextRun({ text: "", break: 1 }));
return;
}
case "del": {
const d = tok as Tokens.Del;
for (const inner of d.tokens ?? []) {
inlineOne(inner, style, ctx, out);
}
return;
}
case "image": {
const i = tok as Tokens.Image;
ctx.warnings.push(`inline image "${i.href}" rendered as text (i3 will embed)`);
out.push(new TextRun(runOpts(`[image: ${i.text || i.href}]`, style)));
return;
}
case "html":
case "escape": {
const text = "text" in tok ? (tok as { text: string }).text : "";
out.push(new TextRun(runOpts(text, style)));
return;
}
default: {
const text = "text" in tok ? ((tok as { text?: string }).text ?? "") : "";
if (text) out.push(new TextRun(runOpts(text, style)));
}
}
}
function runOpts(text: string, style: RunStyle): IRunOptions {
return {
text,
bold: style.bold,
italics: style.italics,
font: style.code ? "Consolas" : undefined,
};
}
function countParagraphs(blocks: Paragraph[]): number {
return blocks.length;
}