/**
* Text Processing Tools — sed/awk equivalents for Node.js
* Provides safe, portable text transformation capabilities without shell dependencies.
*/
import type { Tool } from '@lmstudio/sdk';
import { tool } from '@lmstudio/sdk';
import { z } from 'zod';
import * as fs from 'fs/promises';
import type { PluginConfig } from '../config.js';
import { validatePath } from '../security.js';
import { getWorkingDir, resolvePath } from '../workingDir.js';
// ==================== Typed Params Interfaces ====================
interface TextTransformParams {
file_name: string;
pattern: string;
replacement?: string;
flags?: 'g' | 'i' | 'gi';
lines?: { start?: number; end?: number };
backup?: boolean;
dry_run?: boolean;
}
interface TextExtractParams {
file_name: string;
pattern?: string;
fields?: number[];
delimiter?: string;
output_format?: 'list' | 'json' | 'csv';
}
// ==================== Helper Functions ====================
/** Helper for consistent error handling */
function handleError(error: unknown): { success: false; error: string } {
const message = error instanceof Error ? error.message : String(error);
return { success: false, error: message };
}
/** Validate regex pattern to prevent DoS attacks */
function validateRegex(pattern: string): boolean {
try {
// Basic check for obviously dangerous patterns (nested quantifiers)
if (/(\[[^\]]*\]\*)/.test(pattern)) return false;
if (/(\([^)]*\)\+)/.test(pattern)) return false;
// Try to compile the regex - will throw if invalid
new RegExp(pattern);
return true;
} catch {
return false;
}
}
/** Read file with size limit (10MB) */
/** Read file with size limit (10MB) AND binary detection */
async function readFileWithLimit(filePath: string): Promise<string> {
const stats = await fs.stat(filePath);
if (stats.size > 10_000_000) {
throw new Error(`File too large (${(stats.size / 1048576).toFixed(2)}MB, max 10MB)`);
}
// Binary detection: check for null bytes in first 8KB
const buffer = await fs.readFile(filePath);
const checkBuffer = buffer.subarray(0, Math.min(buffer.length, 8192));
if (checkBuffer.includes(0)) {
throw new Error('Binary file detected. This tool only supports text files.');
}
return buffer.toString('utf-8');
}
/** Write file with atomic operation */
async function writeFileAtomic(filePath: string, content: string): Promise<void> {
const tmpPath = `${filePath}.tmp.${Date.now()}`;
await fs.writeFile(tmpPath, content, 'utf-8');
await fs.rename(tmpPath, filePath);
}
export function registerTextProcessingTools(_config: PluginConfig): Tool[] {
const tools: Tool[] = [];
// text_transform tool (sed-equivalent)
tools.push(tool({
name: 'text_transform',
description: 'Apply regex-based text transformations to files. Supports substitution, line ranges, and capture groups. Safer than shell sed — no command injection risk.',
parameters: {
file_name: z.string().describe('File path'),
pattern: z.string().min(1).max(10_000).describe('Regex or literal pattern to match (required, non-empty, max 10KB)'),
replacement: z.string().max(100_000).optional().describe('Replacement text (supports $1, $2 for capture groups, max 100KB)'),
flags: z.enum(['g', 'i', 'gi']).default('g').describe('Flags: g=global, i=case-insensitive, gi=both'),
lines: z.object({
start: z.number().int().min(1).optional(),
end: z.number().int().optional(),
}).optional().describe('Line range to apply transformation (e.g., {start: 10, end: 20})'),
backup: z.boolean().default(false).describe('Create .bak file before modifying'),
dry_run: z.boolean().default(false).describe('Preview changes without writing to disk'),
},
implementation: async ({ file_name, pattern, replacement, flags, lines, backup, dry_run }: TextTransformParams) => { // C5 FIX: typed params
try {
if (!validatePath(file_name, getWorkingDir())) {
return { success: false, error: 'Invalid path: directory traversal detected' };
}
const fullPath = resolvePath(file_name);
// Validate regex pattern to prevent DoS attacks
if (!validateRegex(pattern)) {
return { success: false, error: 'Invalid or potentially dangerous regex pattern' };
}
let content: string;
try {
content = await readFileWithLimit(fullPath);
} catch (error) {
return handleError(error);
}
// Build regex with flags (Zod guarantees valid input: 'g', 'i', or 'gi')
const flagString = flags || 'g';
const regex = new RegExp(pattern, flagString);
let changesApplied = 0;
let transformedContent: string;
if (lines?.start !== undefined || lines?.end !== undefined) {
// Apply transformation to specific line range
const linesArray = content.split('\n');
const startLine = Math.max(1, lines.start ?? 1);
let endLine = Math.min(lines.end ?? linesArray.length, linesArray.length);
for (let i = startLine - 1; i < endLine; i++) {
if (replacement) {
// Count replacements in this line before applying (use same flags as main regex)
const matches = linesArray[i].match(new RegExp(pattern, flagString));
if (matches) changesApplied += matches.length;
// Apply replacement with capture groups ($1, $2, etc.)
linesArray[i] = linesArray[i].replace(regex, replacement);
} else {
// Delete matching lines by splicing the array (use same flags as main regex)
const matches = linesArray[i].match(new RegExp(pattern, flagString));
if (matches) changesApplied += matches.length;
// Remove the line and adjust loop bounds to avoid skipping indices
linesArray.splice(i, 1);
i--; // Decrement index since array shifted left
endLine--; // Adjust range so we don't overrun
}
}
transformedContent = linesArray.join('\n');
} else {
// Apply transformation to entire file
const allMatches = content.match(regex);
if (allMatches) changesApplied = allMatches.length;
if (replacement) {
transformedContent = content.replace(regex, replacement);
} else {
transformedContent = content.replace(regex, '');
}
}
// Preview mode - don't write
if (dry_run) {
return { success: true, data: {
dry_run: true,
total_changes_previewed: changesApplied,
preview_lines: transformedContent.split('\n').slice(0, 10),
message: 'Dry run complete — no changes written'
}};
}
// Create backup if requested
let backup_created = false;
if (backup) {
try {
await fs.copyFile(fullPath, `${fullPath}.bak`);
backup_created = true;
} catch (error) {
return handleError(error);
}
}
// Write transformed content atomically
try {
await writeFileAtomic(fullPath, transformedContent);
} catch (error) {
if (backup_created) {
// Restore backup on failure
try {
await fs.copyFile(`${fullPath}.bak`, fullPath);
} catch {}
}
return handleError(error);
}
return { success: true, data: {
transformed: true,
total_changes_applied: changesApplied,
backup_created,
lines_processed: lines ? `${lines.start ?? 1}-${lines.end}` : 'all',
pattern_used: pattern,
replacement_used: replacement || '(deletion)'
}};
} catch (error) {
return handleError(error);
}
},
}));
// text_extract tool (awk-equivalent)
tools.push(tool({
name: 'text_extract',
description: 'Extract structured data from text files using pattern matching and field extraction. Like awk for parsing logs, CSVs, TSVs, or any delimited text.',
parameters: {
file_name: z.string().describe('File path'),
pattern: z.string().optional().describe('Regex to filter lines (optional)'),
fields: z.array(z.number().int()).default([0]).describe('Field indices to extract (0-based, e.g., [0, 2] for first and third columns)'),
delimiter: z.string().default('\t').describe('Field separator character'),
output_format: z.enum(['list', 'json', 'csv']).default('json').describe('Output format for extracted data'),
},
implementation: async ({ file_name, pattern, fields, delimiter, output_format }: TextExtractParams) => { // C5 FIX: typed params
try {
if (!validatePath(file_name, getWorkingDir())) {
return { success: false, error: 'Invalid path: directory traversal detected' };
}
const fullPath = resolvePath(file_name);
let content: string;
try {
content = await readFileWithLimit(fullPath);
} catch (error) {
return handleError(error);
}
// Filter lines by pattern if specified
const allLines = content.split('\n');
const filteredLines = pattern
? allLines.filter(line => new RegExp(pattern).test(line))
: allLines;
// Extract fields from each line
const results: string[] = [];
let empty_lines_skipped = 0;
for (const line of filteredLines) {
if (!line.trim()) continue; // Skip empty lines
const parts = line.split(delimiter || "\t");
const extracted = (fields || [0]).map(i => parts[i] || '');
if (extracted.length > 0 && !extracted.every(p => p.trim() === '')) {
results.push(extracted.join(delimiter));
} else {
empty_lines_skipped++;
}
}
// Format output based on requested format
let formatted_output: string;
switch (output_format) {
case 'json':
formatted_output = JSON.stringify(results, null, 2);
break;
case 'csv':
formatted_output = results.join('\n');
break;
default: // 'list'
formatted_output = results.join('\n');
}
return { success: true, data: {
total_lines_read: allLines.length,
lines_matched: filteredLines.length,
fields_extracted: (fields || [0]).length,
extracted_count: results.length,
empty_lines_skipped,
output_format,
preview: formatted_output.substring(0, 1000) + (formatted_output.length > 1000 ? '\n... (truncated)' : ''),
full_output_available: formatted_output.length <= 1000
}};
} catch (error) {
return handleError(error);
}
},
}));
// line_operations tool (awk/print equivalent for line manipulation)
tools.push(tool({
name: 'line_operations',
description: 'Insert, delete, or reorder lines in a file. Like awk for line-level operations without shell dependencies.',
parameters: {
file_name: z.string().describe('File path'),
operation: z.enum(['insert', 'delete', 'move']).default('insert').describe('Operation to perform (use "lines" range for delete)'),
target_line: z.number().int().min(1).optional().describe('Target line number for insert/delete/move operations'),
lines: z.object({
start: z.number().int().min(1).optional(),
end: z.number().int().optional(),
}).optional().describe('Line range for delete operation (e.g., {start: 16, end: 17})'),
content: z.string().max(1_000_000).optional().describe('For insert operation - text to insert (max 1MB)'),
backup: z.boolean().optional().default(false).describe('Create .bak backup before modification. Default: false'),
move_from: z.number().int().optional().describe('Source line for move operation'),
move_to: z.number().int().optional().describe('Destination line for move operation')
},
implementation: async (params: {
file_name: string;
operation: 'insert' | 'delete' | 'move';
target_line?: number;
lines?: { start?: number; end?: number };
content?: string;
backup?: boolean;
move_from?: number;
move_to?: number;
}) => {
const { file_name, operation, target_line, lines, content, backup, move_from, move_to } = params; // C5 FIX: typed params
try {
if (!validatePath(file_name, getWorkingDir())) {
return { success: false, error: 'Invalid path: directory traversal detected' };
}
const fullPath = resolvePath(file_name);
let file_content: string;
try {
file_content = await readFileWithLimit(fullPath);
} catch (error) {
return handleError(error);
}
const linesArr = file_content.split('\n');
let changes_made = 0;
switch (operation) {
case 'insert':
if (!content && content !== '') {
return { success: false, error: 'Insert operation requires "content" parameter' };
}
const insert_line = target_line ?? (linesArr.length + 1);
linesArr.splice(insert_line - 1, 0, content || '');
changes_made = 1;
break;
case 'delete':
if (!target_line && !lines) {
return { success: false, error: 'Delete operation requires either "target_line" or "lines.range" parameter' };
}
let deleteStart = target_line ?? (lines?.start ?? 0);
let deleteEnd = lines?.end ?? target_line ?? linesArr.length;
// Validate range
if (deleteStart < 1 || deleteEnd > linesArr.length || deleteStart > deleteEnd) {
return { success: false, error: `Line range ${deleteStart}-${deleteEnd} out of bounds (1-${linesArr.length})` };
}
// Delete from end to start to preserve indices during splicing
for (let i = deleteEnd - 1; i >= deleteStart - 1; i--) {
linesArr.splice(i, 1);
}
changes_made = deleteEnd - deleteStart + 1;
break;
case 'move':
if (!move_from || !move_to) {
return { success: false, error: 'Move operation requires "move_from" and "move_to" parameters' };
}
if (move_from < 1 || move_from > linesArr.length || move_to < 1 || move_to > linesArr.length) {
return { success: false, error: `Line numbers out of range (1-${linesArr.length})` };
}
const moved_line = linesArr.splice(move_from - 1, 1)[0];
// Adjust target if moving within same direction
let adjusted_to = move_to;
if (move_from < move_to) {
adjusted_to--;
}
linesArr.splice(adjusted_to - 1, 0, moved_line);
changes_made = 1;
break;
default:
return { success: false, error: `Unknown operation: ${String(operation)}` };
}
// ========== P1 FIX: Create Backup if requested (Bug #5) ==========
let backupPath: string | null = null;
if (backup) {
backupPath = fullPath + '.bak';
try {
await fs.copyFile(fullPath, backupPath);
} catch (e) {
return { success: false, error: `Failed to create backup at ${backupPath}: ${e instanceof Error ? e.message : String(e)}` };
}
}
// ========== P1 FIX: Atomic Write (Bug #4) ==========
try {
await writeFileAtomic(fullPath, linesArr.join('\n'));
} catch (error) {
return handleError(error);
}
return { success: true, data: {
operations_performed: operation,
changes_applied: changes_made,
total_lines_after: linesArr.length,
backup_created: backupPath,
message: `${operation.charAt(0).toUpperCase() + operation.slice(1)} operation completed successfully`
}};
} catch (error) {
return handleError(error);
}
},
}));
return tools;
}