Project Files
dist / tools / analysis.js
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.getAnalysisTools = getAnalysisTools;
const sdk_1 = require("@lmstudio/sdk");
const zod_1 = require("zod");
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const exceljs_1 = __importDefault(require("exceljs"));
const XLSX = __importStar(require("xlsx"));
const workbook_1 = require("../utils/workbook");
const config_1 = require("../utils/config");
function getAnalysisTools() {
return [
// ── 29. excel_summarize ──────────────────────────────────────────────────
(0, sdk_1.tool)({
name: "excel_summarize",
description: "Full statistical summary per column: min/max/avg/sum/stddev, unique count, null count, outliers (±3σ), duplicate rows, empty columns.",
parameters: {
filePath: zod_1.z.string(),
sheet: zod_1.z.string().optional(),
},
implementation: async ({ filePath, sheet }) => {
const err = (0, config_1.checkReadable)(filePath);
if (err)
return `Error: ${err}`;
try {
const wb = await (0, workbook_1.loadWorkbook)(filePath);
const ws = (0, workbook_1.getSheet)(wb, sheet);
if (typeof ws === "string")
return `Error: ${ws}`;
const rows = (0, workbook_1.worksheetToJson)(ws);
if (!rows.length)
return JSON.stringify({ file: filePath, empty: true });
const dups = rows.length - new Set(rows.map(r => JSON.stringify(r))).size;
const headers = Object.keys(rows[0]);
const empty = [];
const stats = {};
for (const col of headers) {
const vals = rows.map(r => r[col]).filter(v => v !== null && v !== "");
if (!vals.length) {
empty.push(col);
continue;
}
const nums = vals.filter(v => typeof v === "number");
if (nums.length > 0) {
const sum = nums.reduce((a, b) => a + b, 0);
const avg = sum / nums.length;
const stddev = Math.sqrt(nums.reduce((a, b) => a + (b - avg) ** 2, 0) / nums.length);
const outliers = nums.filter(n => Math.abs(n - avg) > 3 * stddev);
stats[col] = {
type: "numeric", count: nums.length, nulls: rows.length - nums.length,
min: Math.min(...nums), max: Math.max(...nums),
avg: +avg.toFixed(4), sum: +sum.toFixed(4), stddev: +stddev.toFixed(4),
outliers: outliers.length ? outliers.slice(0, 10) : "none",
};
}
else {
const unique = new Set(vals.map(String));
stats[col] = {
type: "text", count: vals.length, nulls: rows.length - vals.length,
unique_values: unique.size, sample: [...unique].slice(0, 5),
};
}
}
return JSON.stringify({
file: filePath, sheet: ws.name,
total_rows: rows.length, total_columns: headers.length,
duplicate_rows: dups, empty_columns: empty.length ? empty : "none",
column_stats: stats,
}, null, 2);
}
catch (e) {
return `Error: ${e}`;
}
},
}),
// ── 30. excel_pivot ──────────────────────────────────────────────────────
(0, sdk_1.tool)({
name: "excel_pivot",
description: "Create a pivot table: group by one column, aggregate one or more numeric columns (sum/avg/count/min/max). Saves result to a new sheet.",
parameters: {
filePath: zod_1.z.string(),
groupBy: zod_1.z.string().describe("Column to group by"),
valueCols: zod_1.z.array(zod_1.z.object({
column: zod_1.z.string(),
aggregation: zod_1.z.enum(["sum", "avg", "count", "min", "max"]),
})).describe("Columns and aggregation functions"),
outputSheet: zod_1.z.string().optional().describe("Sheet name for result (default: Pivot)"),
sheet: zod_1.z.string().optional(),
},
implementation: async ({ filePath, groupBy, valueCols, outputSheet, sheet }) => {
const err = (0, config_1.checkReadable)(filePath);
if (err)
return `Error: ${err}`;
try {
const wb = await (0, workbook_1.loadWorkbook)(filePath);
const ws = (0, workbook_1.getSheet)(wb, sheet);
if (typeof ws === "string")
return `Error: ${ws}`;
const rows = (0, workbook_1.worksheetToJson)(ws);
const groups = new Map();
for (const row of rows) {
const key = String(row[groupBy] ?? "(blank)");
if (!groups.has(key))
groups.set(key, Object.fromEntries(valueCols.map(v => [v.column, []])));
for (const { column } of valueCols) {
const n = Number(row[column]);
if (!isNaN(n))
groups.get(key)[column].push(n);
}
}
const agg = (nums, fn) => {
if (!nums.length)
return 0;
switch (fn) {
case "sum": return nums.reduce((a, b) => a + b, 0);
case "avg": return nums.reduce((a, b) => a + b, 0) / nums.length;
case "count": return nums.length;
case "min": return Math.min(...nums);
case "max": return Math.max(...nums);
default: return 0;
}
};
const pivotRows = [...groups.entries()].map(([key, colNums]) => {
const obj = { [groupBy]: key };
for (const { column, aggregation } of valueCols)
obj[`${aggregation}(${column})`] = +agg(colNums[column], aggregation).toFixed(4);
return obj;
});
const pivotName = outputSheet ?? "Pivot";
const pivotWs = wb.getWorksheet(pivotName) ?? wb.addWorksheet(pivotName);
pivotWs.spliceRows(1, pivotWs.rowCount);
(0, workbook_1.jsonToWorksheet)(pivotWs, pivotRows);
await (0, workbook_1.saveWorkbook)(wb, filePath);
return JSON.stringify({ success: true, file: filePath, pivot_sheet: pivotName,
group_by: groupBy, groups: groups.size, preview: (0, workbook_1.preview)(pivotRows, 10) }, null, 2);
}
catch (e) {
return `Error: ${e}`;
}
},
}),
// ── 31. excel_diff ───────────────────────────────────────────────────────
(0, sdk_1.tool)({
name: "excel_diff",
description: "Compare two files and return added rows, removed rows, and modified rows (with before/after values per changed cell).",
parameters: {
fileA: zod_1.z.string().describe("Original file"),
fileB: zod_1.z.string().describe("Modified file"),
keyColumn: zod_1.z.string().describe("Column that uniquely identifies each row (primary key)"),
sheetA: zod_1.z.string().optional(), sheetB: zod_1.z.string().optional(),
exportDiff: zod_1.z.string().optional().describe("Optional .xlsx path to export a color-coded diff file"),
},
implementation: async ({ fileA, fileB, keyColumn, sheetA, sheetB, exportDiff }) => {
const e1 = (0, config_1.checkReadable)(fileA);
if (e1)
return `Error: ${e1}`;
const e2 = (0, config_1.checkReadable)(fileB);
if (e2)
return `Error: ${e2}`;
try {
const wbA = await (0, workbook_1.loadWorkbook)(fileA);
const wbB = await (0, workbook_1.loadWorkbook)(fileB);
const wsA = (0, workbook_1.getSheet)(wbA, sheetA);
if (typeof wsA === "string")
return `Error: ${wsA}`;
const wsB = (0, workbook_1.getSheet)(wbB, sheetB);
if (typeof wsB === "string")
return `Error: ${wsB}`;
const rowsA = (0, workbook_1.worksheetToJson)(wsA);
const rowsB = (0, workbook_1.worksheetToJson)(wsB);
const mapA = new Map(rowsA.map(r => [String(r[keyColumn]), r]));
const mapB = new Map(rowsB.map(r => [String(r[keyColumn]), r]));
const added = rowsB.filter(r => !mapA.has(String(r[keyColumn])));
const removed = rowsA.filter(r => !mapB.has(String(r[keyColumn])));
const modified = [];
for (const [key, rowB] of mapB) {
const rowA = mapA.get(key);
if (!rowA)
continue;
const changes = {};
for (const col of new Set([...Object.keys(rowA), ...Object.keys(rowB)]))
if (String(rowA[col] ?? "") !== String(rowB[col] ?? ""))
changes[col] = { from: rowA[col], to: rowB[col] };
if (Object.keys(changes).length)
modified.push({ key, changes });
}
// Optional color-coded export
if (exportDiff) {
(0, config_1.ensureDir)(exportDiff);
const ewb = new exceljs_1.default.Workbook();
const addSheet = (name, data, color) => {
if (!data.length)
return;
const ws = ewb.addWorksheet(name);
(0, workbook_1.jsonToWorksheet)(ws, data);
ws.eachRow((row, i) => { if (i > 1)
row.fill = { type: "pattern", pattern: "solid", fgColor: { argb: color } }; });
};
addSheet("Added", added, "FFC6EFCE");
addSheet("Removed", removed, "FFFFC7CE");
if (modified.length) {
const modRows = modified.map(m => ({ key: m.key, changes: JSON.stringify(m.changes) }));
addSheet("Modified", modRows, "FFFFFFCC");
}
await (0, workbook_1.saveWorkbook)(ewb, exportDiff);
}
return JSON.stringify({ fileA, fileB, key_column: keyColumn,
summary: { added: added.length, removed: removed.length, modified: modified.length },
added: added.slice(0, 20), removed: removed.slice(0, 20), modified: modified.slice(0, 20),
...(exportDiff ? { diff_file: exportDiff } : {}),
}, null, 2);
}
catch (e) {
return `Error: ${e}`;
}
},
}),
// ── 32. excel_validate_data ──────────────────────────────────────────────
(0, sdk_1.tool)({
name: "excel_validate_data",
description: "Check data quality rules: required columns not null, values in allowed list, numeric range, regex pattern, unique constraint. Returns all violations.",
parameters: {
filePath: zod_1.z.string(),
rules: zod_1.z.array(zod_1.z.object({
column: zod_1.z.string(),
type: zod_1.z.enum(["required", "allowedValues", "numericRange", "regex", "unique"]),
allowedValues: zod_1.z.array(zod_1.z.string()).optional(),
min: zod_1.z.number().optional(), max: zod_1.z.number().optional(),
pattern: zod_1.z.string().optional().describe("Regex pattern"),
})),
sheet: zod_1.z.string().optional(),
},
implementation: async ({ filePath, rules, sheet }) => {
const err = (0, config_1.checkReadable)(filePath);
if (err)
return `Error: ${err}`;
try {
const wb = await (0, workbook_1.loadWorkbook)(filePath);
const ws = (0, workbook_1.getSheet)(wb, sheet);
if (typeof ws === "string")
return `Error: ${ws}`;
const rows = (0, workbook_1.worksheetToJson)(ws);
const violations = [];
const seenValues = {};
rows.forEach((row, i) => {
for (const rule of rules) {
const val = row[rule.column];
const v = String(val ?? "");
const rowNum = i + 2; // 1-based + header
switch (rule.type) {
case "required":
if (val === null || val === undefined || v === "")
violations.push({ row: rowNum, column: rule.column, rule: "required", value: val });
break;
case "allowedValues":
if (v && rule.allowedValues && !rule.allowedValues.includes(v))
violations.push({ row: rowNum, column: rule.column, rule: `allowedValues: ${rule.allowedValues.join(",")}`, value: val });
break;
case "numericRange":
if (v) {
const n = Number(val);
if (isNaN(n) || (rule.min !== undefined && n < rule.min) || (rule.max !== undefined && n > rule.max))
violations.push({ row: rowNum, column: rule.column, rule: `numericRange [${rule.min}–${rule.max}]`, value: val });
}
break;
case "regex":
if (v && rule.pattern && !new RegExp(rule.pattern).test(v))
violations.push({ row: rowNum, column: rule.column, rule: `regex: ${rule.pattern}`, value: val });
break;
case "unique":
if (!seenValues[rule.column])
seenValues[rule.column] = new Set();
if (v && seenValues[rule.column].has(v))
violations.push({ row: rowNum, column: rule.column, rule: "unique", value: val });
seenValues[rule.column].add(v);
break;
}
}
});
return JSON.stringify({ file: filePath, sheet: ws.name, rows_checked: rows.length,
violations_found: violations.length, violations: violations.slice(0, 100),
status: violations.length === 0 ? "PASS" : "FAIL" }, null, 2);
}
catch (e) {
return `Error: ${e}`;
}
},
}),
// ── 33. excel_query ──────────────────────────────────────────────────────
(0, sdk_1.tool)({
name: "excel_query",
description: "Run a SQL-like query on a sheet. Syntax: SELECT col1, col2 FROM sheetName WHERE col3 > 100 ORDER BY col1 DESC LIMIT 50. Use * to select all columns.",
parameters: {
filePath: zod_1.z.string(),
query: zod_1.z.string().describe("SQL-like query string"),
},
implementation: async ({ filePath, query }) => {
const err = (0, config_1.checkReadable)(filePath);
if (err)
return `Error: ${err}`;
try {
const sel = query.match(/SELECT\s+(.*?)\s+FROM\s+["']?([^"'\s]+)["']?/i);
if (!sel)
return "Error: Query must start with SELECT ... FROM ...";
const cols = sel[1].trim() === "*" ? null : sel[1].split(",").map(s => s.trim());
const sheetName = sel[2];
const whereM = query.match(/WHERE\s+(.*?)(?:\s+ORDER\s+BY|\s+LIMIT|$)/i);
const orderM = query.match(/ORDER\s+BY\s+(.*?)(?:\s+LIMIT|$)/i);
const limitM = query.match(/LIMIT\s+(\d+)/i);
const wb = await (0, workbook_1.loadWorkbook)(filePath);
const ws = (0, workbook_1.getSheet)(wb, sheetName);
if (typeof ws === "string")
return `Error: ${ws}`;
let rows = (0, workbook_1.worksheetToJson)(ws);
// WHERE
if (whereM) {
const cond = whereM[1].trim();
rows = rows.filter(row => {
try {
// Safe eval: replace column names with row values
const expr = cond.replace(/(\w+)/g, (m) => row[m] !== undefined ? JSON.stringify(row[m]) : m);
// eslint-disable-next-line no-new-func
return new Function(`return (${expr})`)();
}
catch {
return false;
}
});
}
// ORDER BY
if (orderM) {
const parts = orderM[1].split(",").map(s => {
const t = s.trim().split(/\s+/);
return { col: t[0], dir: (t[1]?.toUpperCase() === "DESC" ? -1 : 1) };
});
rows.sort((a, b) => {
for (const { col, dir } of parts) {
const av = a[col];
const bv = b[col];
if (av === bv)
continue;
if (typeof av === "number" && typeof bv === "number")
return (av - bv) * dir;
return String(av ?? "").localeCompare(String(bv ?? "")) * dir;
}
return 0;
});
}
// LIMIT
if (limitM)
rows = rows.slice(0, parseInt(limitM[1]));
// SELECT columns
const result = cols ? rows.map(r => Object.fromEntries(cols.map(c => [c, r[c]]))) : rows;
return JSON.stringify({ query, sheet: ws.name, total_matched: rows.length,
columns: result[0] ? Object.keys(result[0]) : [], data: result }, null, 2);
}
catch (e) {
return `Error: ${e}`;
}
},
}),
// ── 34. excel_split_sheet ────────────────────────────────────────────────
(0, sdk_1.tool)({
name: "excel_split_sheet",
description: "Split a sheet into multiple files (one per unique value of a column). E.g. split a sales sheet by Region → one file per region.",
parameters: {
filePath: zod_1.z.string(),
splitColumn: zod_1.z.string().describe("Column whose unique values become separate files"),
outputDir: zod_1.z.string().describe("Directory to write output files into"),
sheet: zod_1.z.string().optional(),
},
implementation: async ({ filePath, splitColumn, outputDir, sheet }) => {
const err = (0, config_1.checkReadable)(filePath);
if (err)
return `Error: ${err}`;
try {
const wb = await (0, workbook_1.loadWorkbook)(filePath);
const ws = (0, workbook_1.getSheet)(wb, sheet);
if (typeof ws === "string")
return `Error: ${ws}`;
const rows = (0, workbook_1.worksheetToJson)(ws);
const groups = new Map();
for (const row of rows) {
const key = String(row[splitColumn] ?? "blank");
if (!groups.has(key))
groups.set(key, []);
groups.get(key).push(row);
}
(0, config_1.ensureDir)(path.join(outputDir, "placeholder"));
const files = [];
for (const [key, groupRows] of groups) {
const safe = key.replace(/[^a-zA-Z0-9_\-]/g, "_");
const outPath = path.join(outputDir, `${safe}.xlsx`);
const outWb = new exceljs_1.default.Workbook();
const outWs = outWb.addWorksheet(key.slice(0, 31));
(0, workbook_1.jsonToWorksheet)(outWs, groupRows);
await (0, workbook_1.saveWorkbook)(outWb, outPath);
files.push({ value: key, file: outPath, rows: groupRows.length });
}
return JSON.stringify({ success: true, source: filePath, split_by: splitColumn,
files_created: files.length, files }, null, 2);
}
catch (e) {
return `Error: ${e}`;
}
},
}),
// ── 35. excel_info ───────────────────────────────────────────────────────
(0, sdk_1.tool)({
name: "excel_info",
description: "Get complete metadata about a file: format, size, sheets, row/column counts, named ranges, whether VBA is present, last modified date.",
parameters: {
filePath: zod_1.z.string(),
},
implementation: async ({ filePath }) => {
const err = (0, config_1.checkReadable)(filePath);
if (err)
return `Error: ${err}`;
try {
const stat = fs.statSync(filePath);
const ext = path.extname(filePath).toLowerCase();
const wb = await (0, workbook_1.loadWorkbook)(filePath);
const sheets = wb.worksheets.map(ws => {
const rows = (0, workbook_1.worksheetToJson)(ws);
return { name: ws.name, rows: rows.length,
columns: rows[0] ? Object.keys(rows[0]).length : 0,
column_names: rows[0] ? Object.keys(rows[0]) : [],
has_formulas: ws.rowCount > 0,
};
});
// Check VBA (only for xlsm via SheetJS)
let hasVba = false;
if (ext === ".xlsm") {
try {
const sjWb = XLSX.readFile(filePath, { bookVBA: true });
hasVba = !!sjWb.vbaraw;
}
catch { }
}
return JSON.stringify({
file: filePath, format: ext, size_bytes: stat.size,
size_kb: +(stat.size / 1024).toFixed(1),
last_modified: stat.mtime.toISOString(),
sheet_count: sheets.length, sheets,
has_vba: hasVba,
total_rows: sheets.reduce((a, s) => a + s.rows, 0),
}, null, 2);
}
catch (e) {
return `Error: ${e}`;
}
},
}),
];
}
//# sourceMappingURL=analysis.js.map