Project Files
src / parsers / xlsxParser.ts
import ExcelJS from "exceljs";
import * as XLSX from "xlsx";
/**
* Parse XLSX files using ExcelJS — preserves sheet names and handles rich text
* and formula results natively.
*/
export async function parseXLSX(filePath: string): Promise<string> {
try {
const workbook = new ExcelJS.Workbook();
await workbook.xlsx.readFile(filePath);
const sheetParts: string[] = [];
workbook.eachSheet((sheet) => {
const rows: string[] = [];
sheet.eachRow({ includeEmpty: false }, (row) => {
const cells: string[] = [];
row.eachCell({ includeEmpty: true }, (cell) => {
const val = cell.value;
if (val === null || val === undefined) {
cells.push("");
} else if (typeof val === "object" && "richText" in val) {
cells.push(val.richText.map((r) => r.text).join(""));
} else if (typeof val === "object" && "result" in val) {
cells.push(String((val as ExcelJS.CellFormulaValue).result ?? ""));
} else if (typeof val === "object" && val instanceof Date) {
cells.push(val.toISOString());
} else {
cells.push(String(val));
}
});
rows.push(cells.join("\t"));
});
if (rows.length > 0) {
sheetParts.push(`Sheet: ${sheet.name}\n${rows.join("\n")}`);
}
});
return sheetParts.join("\n\n").trim();
} catch (error) {
console.error(`Error parsing XLSX file ${filePath}:`, error);
return "";
}
}
/**
* Parse legacy Excel binary files (.xls) using SheetJS.
*
* Note: SheetJS (xlsx package) has known CVEs for prototype-pollution and
* ReDoS when processing untrusted input. Formula parsing is disabled here
* ({cellFormula: false}) to remove the ReDoS vector. This parser is
* intentionally used only for LOCAL files the user has placed in their
* own document directory — it should never be exposed to untrusted input.
*/
export function parseXLS(filePath: string): string {
try {
const workbook = XLSX.readFile(filePath, {
cellFormula: false, // disables the ReDoS vector
cellHTML: false,
});
const sheetParts: string[] = [];
for (const sheetName of workbook.SheetNames) {
const sheet = workbook.Sheets[sheetName];
const rows = XLSX.utils.sheet_to_json<string[]>(sheet, {
header: 1,
defval: "",
raw: false,
});
const nonEmpty = (rows as string[][]).filter((r) =>
r.some((c) => c !== null && c !== undefined && String(c).trim() !== ""),
);
if (nonEmpty.length > 0) {
const body = nonEmpty.map((r) => r.map(String).join("\t")).join("\n");
sheetParts.push(`Sheet: ${sheetName}\n${body}`);
}
}
return sheetParts.join("\n\n").trim();
} catch (error) {
console.error(`Error parsing XLS file ${filePath}:`, error);
return "";
}
}