Project Files
src / tools / data.ts
import { tool, type Tool } from "@lmstudio/sdk";
import { z } from "zod";
import * as fs from "fs";
import * as path from "path";
import ExcelJS from "exceljs";
import {
loadWorkbook, saveWorkbook, getSheet, worksheetToJson,
jsonToWorksheet, matchesFilters, preview, clearWorksheet,
type FilterCondition,
} from "../utils/workbook";
import { checkReadable, ensureDir, backupFile, resolvePath } from "../utils/config";
const filterSchema = z.array(z.object({
column: z.string(),
operator: z.enum(["=","!=",">","<",">=","<=","contains","startsWith","endsWith","regex","isNull","isNotNull"]),
value: z.union([z.string(), z.number()]).optional(),
})).describe("Filter conditions (AND logic)");
export function getDataTools(workingDir: string): Tool[] {
return [
// ── 1. excel_read ────────────────────────────────────────────────────────
tool({
name: "excel_read",
description: "Read rows from any Excel or CSV file (.xlsx .xlsm .xls .ods .csv .tsv). Supports row range and pagination.",
parameters: {
filePath: z.string(),
sheet: z.string().optional().describe("Sheet name (default: first sheet)"),
startRow: z.number().optional().describe("First data row index, 0-based (default: 0)"),
endRow: z.number().optional().describe("Last row index exclusive (default: all)"),
maxRows: z.number().optional().describe("Max rows to return (default: 200)"),
},
implementation: async ({ filePath, sheet, startRow, endRow, maxRows }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const all = worksheetToJson(ws);
const s = startRow ?? 0;
const e = endRow ?? all.length;
const rows = all.slice(s, e).slice(0, maxRows ?? 200);
return JSON.stringify({ file: filePath, sheet: ws.name, total_rows: all.length,
returned_rows: rows.length, range: `${s}–${Math.min(e, all.length)-1}`,
columns: rows[0] ? Object.keys(rows[0]) : [], data: rows }, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 2. excel_write ───────────────────────────────────────────────────────
tool({
name: "excel_write",
description: "Create a new .xlsx file from row objects. Applies bold headers, auto-width columns. String values starting with '=' are Excel formulas.",
parameters: {
filePath: z.string(),
data: z.array(z.record(z.union([z.string(), z.number(), z.null()]))),
sheetName: z.string().optional().describe("Sheet name (default: Sheet1)"),
boldHeaders: z.boolean().optional(),
autoWidth: z.boolean().optional(),
},
implementation: async ({ filePath, data, sheetName, boldHeaders, autoWidth }) => {
try {
filePath = resolvePath(filePath, workingDir);
ensureDir(filePath);
const wb = new ExcelJS.Workbook();
const ws = wb.addWorksheet(sheetName ?? "Sheet1");
jsonToWorksheet(ws, data, { boldHeaders: boldHeaders ?? true, autoWidth: autoWidth ?? true });
await saveWorkbook(wb, filePath);
return JSON.stringify({ success: true, file: filePath, sheet: ws.name,
rows_written: data.length, columns: data[0] ? Object.keys(data[0]) : [],
preview: preview(data) });
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 3. excel_append_rows ─────────────────────────────────────────────────
tool({
name: "excel_append_rows",
description: "Append rows to an existing sheet. Validates that column names match existing headers.",
parameters: {
filePath: z.string(),
rows: z.array(z.record(z.union([z.string(), z.number(), z.null()]))),
sheet: z.string().optional(),
},
implementation: async ({ filePath, rows, sheet }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const existing = worksheetToJson(ws);
if (existing.length > 0 && rows.length > 0) {
const known = new Set(Object.keys(existing[0]));
const unknown = Object.keys(rows[0]).filter(k => !known.has(k));
if (unknown.length) return `Error: Unknown columns: ${unknown.join(", ")}. Expected: ${[...known].join(", ")}`;
}
const updated = [...existing, ...rows];
clearWorksheet(ws); // clear
jsonToWorksheet(ws, updated);
await saveWorkbook(wb, filePath);
return JSON.stringify({ success: true, file: filePath, sheet: ws.name,
rows_before: existing.length, rows_appended: rows.length, total_rows: updated.length,
preview_appended: preview(rows) });
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 4. excel_filter ──────────────────────────────────────────────────────
tool({
name: "excel_filter",
description: "Return rows matching filter conditions. Supports =, !=, >, <, >=, <=, contains, startsWith, endsWith, regex, isNull, isNotNull.",
parameters: {
filePath: z.string(),
filters: filterSchema,
sheet: z.string().optional(),
},
implementation: async ({ filePath, filters, sheet }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const rows = worksheetToJson(ws);
const matched = rows.filter(r => matchesFilters(r, filters as FilterCondition[]));
return JSON.stringify({ file: filePath, sheet: ws.name, filters,
total_rows: rows.length, matched_rows: matched.length, data: matched }, null, 2);
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 5. excel_sort ────────────────────────────────────────────────────────
tool({
name: "excel_sort",
description: "Sort a sheet by one or more columns and save.",
parameters: {
filePath: z.string(),
sortBy: z.array(z.object({ column: z.string(), direction: z.enum(["asc","desc"]).optional() })),
sheet: z.string().optional(),
},
implementation: async ({ filePath, sortBy, sheet }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const rows = worksheetToJson(ws);
const sorted = [...rows].sort((a, b) => {
for (const { column, direction } of sortBy) {
const d = direction === "desc" ? -1 : 1;
const av = a[column]; const bv = b[column];
if (av === bv) continue;
if (typeof av === "number" && typeof bv === "number") return (av - bv) * d;
return String(av ?? "").localeCompare(String(bv ?? "")) * d;
}
return 0;
});
clearWorksheet(ws);
jsonToWorksheet(ws, sorted);
await saveWorkbook(wb, filePath);
return JSON.stringify({ success: true, file: filePath, sheet: ws.name,
sorted_by: sortBy, total_rows: sorted.length, preview: preview(sorted) });
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 6. excel_delete_rows ─────────────────────────────────────────────────
tool({
name: "excel_delete_rows",
description: "Delete rows matching filter conditions. Creates a .bak backup before deleting.",
parameters: {
filePath: z.string(),
filters: filterSchema,
sheet: z.string().optional(),
},
implementation: async ({ filePath, filters, sheet }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const rows = worksheetToJson(ws);
const kept = rows.filter(r => !matchesFilters(r, filters as FilterCondition[]));
clearWorksheet(ws);
jsonToWorksheet(ws, kept);
await saveWorkbook(wb, filePath);
return JSON.stringify({ success: true, file: filePath, sheet: ws.name,
rows_before: rows.length, rows_deleted: rows.length - kept.length, rows_after: kept.length,
preview_remaining: preview(kept) });
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 7. excel_update_rows ─────────────────────────────────────────────────
tool({
name: "excel_update_rows",
description: "Update cell values on rows matching filter conditions (like SQL UPDATE ... WHERE). Creates a .bak backup.",
parameters: {
filePath: z.string(),
filters: filterSchema,
updates: z.record(z.union([z.string(), z.number(), z.null()])).describe("Column/value pairs to set on matched rows"),
sheet: z.string().optional(),
},
implementation: async ({ filePath, filters, updates, sheet }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const rows = worksheetToJson(ws);
let count = 0;
const newRows = rows.map(r => {
if (!matchesFilters(r, filters as FilterCondition[])) return r;
count++;
return { ...r, ...updates };
});
clearWorksheet(ws);
jsonToWorksheet(ws, newRows);
await saveWorkbook(wb, filePath);
return JSON.stringify({ success: true, file: filePath, sheet: ws.name,
filters, updates, rows_updated: count });
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 8. excel_merge_files ─────────────────────────────────────────────────
tool({
name: "excel_merge_files",
description: "Merge multiple Excel/CSV files into a single workbook. Each source file becomes a sheet.",
parameters: {
inputFiles: z.array(z.string()),
outputFile: z.string(),
},
implementation: async ({ inputFiles, outputFile }) => {
inputFiles = inputFiles.map(f => resolvePath(f, workingDir));
outputFile = resolvePath(outputFile, workingDir);
const missing = inputFiles.filter(f => !fs.existsSync(f));
if (missing.length) return `Error: Files not found: ${missing.join(", ")}`;
try {
const outWb = new ExcelJS.Workbook();
const summary: { file: string; sheet: string; rows: number }[] = [];
for (const fp of inputFiles) {
const wb = await loadWorkbook(fp);
for (const ws of wb.worksheets) {
const rows = worksheetToJson(ws);
let name = `${path.basename(fp, path.extname(fp))}_${ws.name}`.slice(0, 31);
let safe = name; let i = 2;
while (outWb.worksheets.some(s => s.name === safe)) safe = `${name}_${i++}`;
const newWs = outWb.addWorksheet(safe);
jsonToWorksheet(newWs, rows);
summary.push({ file: fp, sheet: safe, rows: rows.length });
}
}
ensureDir(outputFile);
await saveWorkbook(outWb, outputFile);
return JSON.stringify({ success: true, output: outputFile, sheets_created: summary });
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 9. excel_find_replace ────────────────────────────────────────────────
tool({
name: "excel_find_replace",
description: "Find and replace values across an entire sheet or a specific column.",
parameters: {
filePath: z.string(),
find: z.string().describe("Value to search for (case-insensitive)"),
replace: z.string().describe("Replacement value"),
column: z.string().optional().describe("Restrict to this column (default: all columns)"),
sheet: z.string().optional(),
exactMatch: z.boolean().optional().describe("Match entire cell value only (default: false = substring)"),
},
implementation: async ({ filePath, find, replace, column, sheet, exactMatch }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const rows = worksheetToJson(ws);
let count = 0;
const regex = new RegExp(exactMatch ? `^${find}$` : find, "gi");
const newRows = rows.map(r => {
const nr = { ...r };
const cols = column ? [column] : Object.keys(nr);
for (const c of cols) {
const old = String(nr[c] ?? "");
const replaced = old.replace(regex, replace);
if (replaced !== old) { nr[c] = replaced; count++; }
}
return nr;
});
clearWorksheet(ws);
jsonToWorksheet(ws, newRows);
await saveWorkbook(wb, filePath);
return JSON.stringify({ success: true, file: filePath, sheet: ws.name,
find, replace, replacements_made: count });
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 10. excel_deduplicate ────────────────────────────────────────────────
tool({
name: "excel_deduplicate",
description: "Remove duplicate rows. Can deduplicate on specific columns only, or the full row.",
parameters: {
filePath: z.string(),
columns: z.array(z.string()).optional().describe("Columns to check for duplicates (default: all columns)"),
keep: z.enum(["first","last"]).optional().describe("Which occurrence to keep (default: first)"),
sheet: z.string().optional(),
},
implementation: async ({ filePath, columns, keep, sheet }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
backupFile(filePath);
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const rows = worksheetToJson(ws);
const seen = new Set<string>();
const keepLast = keep === "last";
const source = keepLast ? [...rows].reverse() : rows;
const deduped = source.filter(r => {
const key = JSON.stringify(columns ? columns.map(c => r[c]) : r);
if (seen.has(key)) return false;
seen.add(key); return true;
});
const result = keepLast ? deduped.reverse() : deduped;
clearWorksheet(ws);
jsonToWorksheet(ws, result);
await saveWorkbook(wb, filePath);
return JSON.stringify({ success: true, file: filePath, sheet: ws.name,
rows_before: rows.length, duplicates_removed: rows.length - result.length,
rows_after: result.length });
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 11. excel_rename_columns ─────────────────────────────────────────────
tool({
name: "excel_rename_columns",
description: "Rename one or more column headers without modifying data.",
parameters: {
filePath: z.string(),
renames: z.record(z.string()).describe("{ oldName: newName } pairs"),
sheet: z.string().optional(),
},
implementation: async ({ filePath, renames, sheet }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const rows = worksheetToJson(ws);
const newRows = rows.map(r => {
const nr: Record<string, unknown> = {};
for (const [k, v] of Object.entries(r)) nr[renames[k] ?? k] = v;
return nr;
});
clearWorksheet(ws);
jsonToWorksheet(ws, newRows);
await saveWorkbook(wb, filePath);
return JSON.stringify({ success: true, file: filePath, sheet: ws.name, renames,
columns_after: newRows[0] ? Object.keys(newRows[0]) : [] });
} catch (e) { return `Error: ${e}`; }
},
}),
// ── 12. excel_add_column ─────────────────────────────────────────────────
tool({
name: "excel_add_column",
description: "Add a computed column. The expression is evaluated as JS with 'row' as the current row object. Example: 'row.Price * row.Qty' or 'row.First + \" \" + row.Last'.",
parameters: {
filePath: z.string(),
columnName: z.string().describe("Name of the new column"),
expression: z.string().describe("JS expression using 'row', e.g. 'row.Price * 1.2'"),
sheet: z.string().optional(),
},
implementation: async ({ filePath, columnName, expression, sheet }) => {
filePath = resolvePath(filePath, workingDir);
const err = checkReadable(filePath);
if (err) return `Error: ${err}`;
try {
const wb = await loadWorkbook(filePath);
const ws = getSheet(wb, sheet);
if (typeof ws === "string") return `Error: ${ws}`;
const rows = worksheetToJson(ws);
// eslint-disable-next-line no-new-func
const fn = new Function("row", `return (${expression})`);
const newRows = rows.map(row => ({ ...row, [columnName]: fn(row) }));
clearWorksheet(ws);
jsonToWorksheet(ws, newRows);
await saveWorkbook(wb, filePath);
return JSON.stringify({ success: true, file: filePath, sheet: ws.name,
new_column: columnName, expression, preview: preview(newRows) });
} catch (e) { return `Error: ${e}`; }
},
}),
];
}