neil/file-agent • LM Studio Hub

/**
 * MOST OF THIS CODE IS ATTRIBUTABLE TO CLINE
 * replace_in_file:
 * https://github.com/cline/cline/blob/06e0973/src/core/assistant-message/diff.ts
 * https://github.com/cline/cline/blob/0ba4508/src/core/prompts/system-prompt/components/editing_files.ts#L7
 * https://github.com/cline/cline/blob/0ba4508/src/core/prompts/system-prompt/tools/replace_in_file.ts
 *
 * read_file:
 * https://github.com/cline/cline/blob/f0ad29a/src/core/prompts/system-prompt/tools/read_file.ts
 * https://github.com/cline/cline/blob/e08c656/src/core/task/ToolExecutor.ts
 *
 * write_file:
 * https://github.com/cline/cline/blob/5595d12/src/core/prompts/system-prompt/tools/write_to_file.ts
 * https://github.com/cline/cline/blob/2315fc6/src/core/task/tools/handlers/WriteToFileToolHandler.ts
 *
 * search_files:
 * https://github.com/cline/cline/blob/0ba4508/src/core/prompts/system-prompt/tools/search_files.ts
 * https://github.com/cline/cline/blob/2315fc6/src/core/task/tools/handlers/SearchFilesToolHandler.ts
 *
 * list_files:
 * https://github.com/cline/cline/blob/0ba4508/src/core/prompts/system-prompt/tools/list_files.ts
 * https://github.com/cline/cline/blob/971ac0f/src/core/task/tools/handlers/ListFilesToolHandler.ts
 *
 */

import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";
import { configSchematics } from "./config";
import { existsSync, statSync } from "fs";
import { writeFile, readFile, mkdir } from "fs/promises";
import { regexSearchFiles } from "./ripgrep";
import { listFiles, formatFilesList } from "./list-files";

// See details about defining tools in the documentation:
// https://lmstudio.ai/docs/typescript/agent/tools

const SEARCH_BLOCK_START = "------- SEARCH";
const SEARCH_BLOCK_END = "=======";
const REPLACE_BLOCK_END = "+++++++ REPLACE";

const SEARCH_BLOCK_CHAR = "-";
const REPLACE_BLOCK_CHAR = "+";
const LEGACY_SEARCH_BLOCK_CHAR = "<";
const LEGACY_REPLACE_BLOCK_CHAR = ">";

// Replace the exact string constants with flexible regex patterns
const SEARCH_BLOCK_START_REGEX = /^[-]{3,} SEARCH>?$/;
const LEGACY_SEARCH_BLOCK_START_REGEX = /^[<]{3,} SEARCH>?$/;

const SEARCH_BLOCK_END_REGEX = /^[=]{3,}$/;

const REPLACE_BLOCK_END_REGEX = /^[+]{3,} REPLACE>?$/;
const LEGACY_REPLACE_BLOCK_END_REGEX = /^[>]{3,} REPLACE>?$/;

// Helper functions to check if a line matches the flexible patterns
function isSearchBlockStart(line: string): boolean {
  return (
    SEARCH_BLOCK_START_REGEX.test(line) ||
    LEGACY_SEARCH_BLOCK_START_REGEX.test(line)
  );
}

function isSearchBlockEnd(line: string): boolean {
  return SEARCH_BLOCK_END_REGEX.test(line);
}

function isReplaceBlockEnd(line: string): boolean {
  return (
    REPLACE_BLOCK_END_REGEX.test(line) ||
    LEGACY_REPLACE_BLOCK_END_REGEX.test(line)
  );
}

/**
 * Attempts a line-trimmed fallback match for the given search content in the original content.
 * It tries to match `searchContent` lines against a block of lines in `originalContent` starting
 * from `lastProcessedIndex`. Lines are matched by trimming leading/trailing whitespace and ensuring
 * they are identical afterwards.
 *
 * Returns [matchIndexStart, matchIndexEnd] if found, or false if not found.
 */
function lineTrimmedFallbackMatch(
  originalContent: string,
  searchContent: string,
  startIndex: number
): [number, number] | false {
  // Split both contents into lines
  const originalLines = originalContent.split("\n");
  const searchLines = searchContent.split("\n");

  // Trim trailing empty line if exists (from the trailing \n in searchContent)
  if (searchLines[searchLines.length - 1] === "") {
    searchLines.pop();
  }

  // Find the line number where startIndex falls
  let startLineNum = 0;
  let currentIndex = 0;
  while (currentIndex < startIndex && startLineNum < originalLines.length) {
    currentIndex += originalLines[startLineNum].length + 1; // +1 for \n
    startLineNum++;
  }

  // For each possible starting position in original content
  for (
    let i = startLineNum;
    i <= originalLines.length - searchLines.length;
    i++
  ) {
    let matches = true;

    // Try to match all search lines from this position
    for (let j = 0; j < searchLines.length; j++) {
      const originalTrimmed = originalLines[i + j].trim();
      const searchTrimmed = searchLines[j].trim();

      if (originalTrimmed !== searchTrimmed) {
        matches = false;
        break;
      }
    }

    // If we found a match, calculate the exact character positions
    if (matches) {
      // Find start character index
      let matchStartIndex = 0;
      for (let k = 0; k < i; k++) {
        matchStartIndex += originalLines[k].length + 1; // +1 for \n
      }

      // Find end character index
      let matchEndIndex = matchStartIndex;
      for (let k = 0; k < searchLines.length; k++) {
        matchEndIndex += originalLines[i + k].length + 1; // +1 for \n
      }

      return [matchStartIndex, matchEndIndex];
    }
  }

  return false;
}

/**
 * Attempts to match blocks of code by using the first and last lines as anchors.
 * This is a third-tier fallback strategy that helps match blocks where we can identify
 * the correct location by matching the beginning and end, even if the exact content
 * differs slightly.
 *
 * The matching strategy:
 * 1. Only attempts to match blocks of 3 or more lines to avoid false positives
 * 2. Extracts from the search content:
 *    - First line as the "start anchor"
 *    - Last line as the "end anchor"
 * 3. For each position in the original content:
 *    - Checks if the next line matches the start anchor
 *    - If it does, jumps ahead by the search block size
 *    - Checks if that line matches the end anchor
 *    - All comparisons are done after trimming whitespace
 *
 * This approach is particularly useful for matching blocks of code where:
 * - The exact content might have minor differences
 * - The beginning and end of the block are distinctive enough to serve as anchors
 * - The overall structure (number of lines) remains the same
 *
 * @param originalContent - The full content of the original file
 * @param searchContent - The content we're trying to find in the original file
 * @param startIndex - The character index in originalContent where to start searching
 * @returns A tuple of [startIndex, endIndex] if a match is found, false otherwise
 */
function blockAnchorFallbackMatch(
  originalContent: string,
  searchContent: string,
  startIndex: number
): [number, number] | false {
  const originalLines = originalContent.split("\n");
  const searchLines = searchContent.split("\n");

  // Only use this approach for blocks of 3+ lines
  if (searchLines.length < 3) {
    return false;
  }

  // Trim trailing empty line if exists
  if (searchLines[searchLines.length - 1] === "") {
    searchLines.pop();
  }

  const firstLineSearch = searchLines[0].trim();
  const lastLineSearch = searchLines[searchLines.length - 1].trim();
  const searchBlockSize = searchLines.length;

  // Find the line number where startIndex falls
  let startLineNum = 0;
  let currentIndex = 0;
  while (currentIndex < startIndex && startLineNum < originalLines.length) {
    currentIndex += originalLines[startLineNum].length + 1;
    startLineNum++;
  }

  // Look for matching start and end anchors
  for (let i = startLineNum; i <= originalLines.length - searchBlockSize; i++) {
    // Check if first line matches
    if (originalLines[i].trim() !== firstLineSearch) {
      continue;
    }

    // Check if last line matches at the expected position
    if (originalLines[i + searchBlockSize - 1].trim() !== lastLineSearch) {
      continue;
    }

    // Calculate exact character positions
    let matchStartIndex = 0;
    for (let k = 0; k < i; k++) {
      matchStartIndex += originalLines[k].length + 1;
    }

    let matchEndIndex = matchStartIndex;
    for (let k = 0; k < searchBlockSize; k++) {
      matchEndIndex += originalLines[i + k].length + 1;
    }

    return [matchStartIndex, matchEndIndex];
  }

  return false;
}

enum ProcessingState {
  Idle = 0,
  StateSearch = 1 << 0,
  StateReplace = 1 << 1,
}

class NewFileContentConstructor {
  private originalContent: string;
  private isFinal: boolean;
  private state: number;
  private pendingNonStandardLines: string[];
  private result: string;
  private lastProcessedIndex: number;
  private currentSearchContent: string;
  private searchMatchIndex: number;
  private searchEndIndex: number;

  constructor(originalContent: string, isFinal: boolean) {
    this.originalContent = originalContent;
    this.isFinal = isFinal;
    this.pendingNonStandardLines = [];
    this.result = "";
    this.lastProcessedIndex = 0;
    this.state = ProcessingState.Idle;
    this.currentSearchContent = "";
    this.searchMatchIndex = -1;
    this.searchEndIndex = -1;
  }

  private resetForNextBlock() {
    // Reset for next block
    this.state = ProcessingState.Idle;
    this.currentSearchContent = "";
    this.searchMatchIndex = -1;
    this.searchEndIndex = -1;
  }

  private findLastMatchingLineIndex(regx: RegExp, lineLimit: number) {
    for (let i = lineLimit; i > 0; ) {
      i--;
      if (this.pendingNonStandardLines[i].match(regx)) {
        return i;
      }
    }
    return -1;
  }

  private updateProcessingState(newState: ProcessingState) {
    const isValidTransition =
      (this.state === ProcessingState.Idle &&
        newState === ProcessingState.StateSearch) ||
      (this.state === ProcessingState.StateSearch &&
        newState === ProcessingState.StateReplace);

    if (!isValidTransition) {
      throw new Error(
        `Invalid state transition.\n` +
          "Valid transitions are:\n" +
          "- Idle → StateSearch\n" +
          "- StateSearch → StateReplace"
      );
    }

    this.state |= newState;
  }

  private isStateActive(state: ProcessingState): boolean {
    return (this.state & state) === state;
  }

  private activateReplaceState() {
    this.updateProcessingState(ProcessingState.StateReplace);
  }

  private activateSearchState() {
    this.updateProcessingState(ProcessingState.StateSearch);
    this.currentSearchContent = "";
  }

  private isSearchingActive(): boolean {
    return this.isStateActive(ProcessingState.StateSearch);
  }

  private isReplacingActive(): boolean {
    return this.isStateActive(ProcessingState.StateReplace);
  }

  private hasPendingNonStandardLines(
    pendingNonStandardLineLimit: number
  ): boolean {
    return (
      this.pendingNonStandardLines.length - pendingNonStandardLineLimit <
      this.pendingNonStandardLines.length
    );
  }

  public processLine(line: string) {
    this.internalProcessLine(line, true, this.pendingNonStandardLines.length);
  }

  public getResult() {
    // If this is the final chunk, append any remaining original content
    if (this.isFinal && this.lastProcessedIndex < this.originalContent.length) {
      this.result += this.originalContent.slice(this.lastProcessedIndex);
    }
    if (this.isFinal && this.state !== ProcessingState.Idle) {
      throw new Error(
        "File processing incomplete - SEARCH/REPLACE operations still active during finalization"
      );
    }
    return this.result;
  }

  private internalProcessLine(
    line: string,
    canWritependingNonStandardLines: boolean,
    pendingNonStandardLineLimit: number
  ): number {
    let removeLineCount = 0;
    if (isSearchBlockStart(line)) {
      removeLineCount = this.trimPendingNonStandardTrailingEmptyLines(
        pendingNonStandardLineLimit
      );
      if (removeLineCount > 0) {
        pendingNonStandardLineLimit =
          pendingNonStandardLineLimit - removeLineCount;
      }
      if (this.hasPendingNonStandardLines(pendingNonStandardLineLimit)) {
        this.tryFixSearchReplaceBlock(pendingNonStandardLineLimit);
        canWritependingNonStandardLines &&
          (this.pendingNonStandardLines.length = 0);
      }
      this.activateSearchState();
    } else if (isSearchBlockEnd(line)) {
      // 校验非标内容
      if (!this.isSearchingActive()) {
        this.tryFixSearchBlock(pendingNonStandardLineLimit);
        canWritependingNonStandardLines &&
          (this.pendingNonStandardLines.length = 0);
      }
      this.activateReplaceState();
      this.beforeReplace();
    } else if (isReplaceBlockEnd(line)) {
      if (!this.isReplacingActive()) {
        this.tryFixReplaceBlock(pendingNonStandardLineLimit);
        canWritependingNonStandardLines &&
          (this.pendingNonStandardLines.length = 0);
      }
      this.lastProcessedIndex = this.searchEndIndex;
      this.resetForNextBlock();
    } else {
      // Accumulate content for search or replace
      // (currentReplaceContent is not being used for anything right now since we directly append to result.)
      // (We artificially add a linebreak since we split on \n at the beginning. In order to not include a trailing linebreak in the final search/result blocks we need to remove it before using them. This allows for partial line matches to be correctly identified.)
      // NOTE: search/replace blocks must be arranged in the order they appear in the file due to how we build the content using lastProcessedIndex. We also cannot strip the trailing newline since for non-partial lines it would remove the linebreak from the original content. (If we remove end linebreak from search, then we'd also have to remove it from replace but we can't know if it's a partial line or not since the model may be using the line break to indicate the end of the block rather than as part of the search content.) We require the model to output full lines in order for our fallbacks to work as well.
      if (this.isReplacingActive()) {
        // Output replacement lines immediately if we know the insertion point
        if (this.searchMatchIndex !== -1) {
          this.result += line + "\n";
        }
      } else if (this.isSearchingActive()) {
        this.currentSearchContent += line + "\n";
      } else {
        const appendToPendingNonStandardLines = canWritependingNonStandardLines;
        if (appendToPendingNonStandardLines) {
          // 处理非标内容
          this.pendingNonStandardLines.push(line);
        }
      }
    }
    return removeLineCount;
  }

  private beforeReplace() {
    // Remove trailing linebreak for adding the === marker
    // if (currentSearchContent.endsWith("\r\n")) {
    // 	currentSearchContent = currentSearchContent.slice(0, -2)
    // } else if (currentSearchContent.endsWith("\n")) {
    // 	currentSearchContent = currentSearchContent.slice(0, -1)
    // }

    if (!this.currentSearchContent) {
      // Empty search block
      if (this.originalContent.length === 0) {
        // New file scenario: nothing to match, just start inserting
        this.searchMatchIndex = 0;
        this.searchEndIndex = 0;
      } else {
        // Complete file replacement scenario: treat the entire file as matched
        this.searchMatchIndex = 0;
        this.searchEndIndex = this.originalContent.length;
      }
    } else {
      // Add check for inefficient full-file search
      // if (currentSearchContent.trim() === originalContent.trim()) {
      // 	throw new Error(
      // 		"The SEARCH block contains the entire file content. Please either:\n" +
      // 			"1. Use an empty SEARCH block to replace the entire file, or\n" +
      // 			"2. Make focused changes to specific parts of the file that need modification.",
      // 	)
      // }
      // Exact search match scenario
      const exactIndex = this.originalContent.indexOf(
        this.currentSearchContent,
        this.lastProcessedIndex
      );
      if (exactIndex !== -1) {
        this.searchMatchIndex = exactIndex;
        this.searchEndIndex = exactIndex + this.currentSearchContent.length;
      } else {
        // Attempt fallback line-trimmed matching
        const lineMatch = lineTrimmedFallbackMatch(
          this.originalContent,
          this.currentSearchContent,
          this.lastProcessedIndex
        );
        if (lineMatch) {
          [this.searchMatchIndex, this.searchEndIndex] = lineMatch;
        } else {
          // Try block anchor fallback for larger blocks
          const blockMatch = blockAnchorFallbackMatch(
            this.originalContent,
            this.currentSearchContent,
            this.lastProcessedIndex
          );
          if (blockMatch) {
            [this.searchMatchIndex, this.searchEndIndex] = blockMatch;
          } else {
            throw new Error(
              `The SEARCH block:\n${this.currentSearchContent.trimEnd()}\n...does not match anything in the file.`
            );
          }
        }
      }
    }
    if (this.searchMatchIndex < this.lastProcessedIndex) {
      throw new Error(
        `The SEARCH block:\n${this.currentSearchContent.trimEnd()}\n...matched an incorrect content in the file.`
      );
    }
    // Output everything up to the match location
    this.result += this.originalContent.slice(
      this.lastProcessedIndex,
      this.searchMatchIndex
    );
  }

  private tryFixSearchBlock(lineLimit: number): number {
    let removeLineCount = 0;
    if (lineLimit < 0) {
      lineLimit = this.pendingNonStandardLines.length;
    }
    if (!lineLimit) {
      throw new Error(
        "Invalid SEARCH/REPLACE block structure - no lines available to process"
      );
    }
    const searchTagRegexp = /^([-]{3,}|[<]{3,}) SEARCH$/;
    const searchTagIndex = this.findLastMatchingLineIndex(
      searchTagRegexp,
      lineLimit
    );
    if (searchTagIndex !== -1) {
      const fixLines = this.pendingNonStandardLines.slice(
        searchTagIndex,
        lineLimit
      );
      fixLines[0] = SEARCH_BLOCK_START;
      for (const line of fixLines) {
        removeLineCount += this.internalProcessLine(
          line,
          false,
          searchTagIndex
        );
      }
    } else {
      throw new Error(
        `Invalid REPLACE marker detected - could not find matching SEARCH block starting from line ${
          searchTagIndex + 1
        }`
      );
    }
    return removeLineCount;
  }

  private tryFixReplaceBlock(lineLimit: number): number {
    let removeLineCount = 0;
    if (lineLimit < 0) {
      lineLimit = this.pendingNonStandardLines.length;
    }
    if (!lineLimit) {
      throw new Error();
    }
    const replaceBeginTagRegexp = /^[=]{3,}$/;
    const replaceBeginTagIndex = this.findLastMatchingLineIndex(
      replaceBeginTagRegexp,
      lineLimit
    );
    if (replaceBeginTagIndex !== -1) {
      // // 校验非标内容
      // if (!this.isSearchingActive()) {
      // 	removeLineCount += this.tryFixSearchBlock(replaceBeginTagIndex)
      // }
      const fixLines = this.pendingNonStandardLines.slice(
        replaceBeginTagIndex - removeLineCount,
        lineLimit - removeLineCount
      );
      fixLines[0] = SEARCH_BLOCK_END;
      for (const line of fixLines) {
        removeLineCount += this.internalProcessLine(
          line,
          false,
          replaceBeginTagIndex - removeLineCount
        );
      }
    } else {
      throw new Error(
        `Malformed REPLACE block - missing valid separator after line ${
          replaceBeginTagIndex + 1
        }`
      );
    }
    return removeLineCount;
  }

  private tryFixSearchReplaceBlock(lineLimit: number): number {
    let removeLineCount = 0;
    if (lineLimit < 0) {
      lineLimit = this.pendingNonStandardLines.length;
    }
    if (!lineLimit) {
      throw new Error();
    }

    const replaceEndTagRegexp = /^([+]{3,}|[>]{3,}) REPLACE$/;
    const replaceEndTagIndex = this.findLastMatchingLineIndex(
      replaceEndTagRegexp,
      lineLimit
    );
    const likeReplaceEndTag = replaceEndTagIndex === lineLimit - 1;
    if (likeReplaceEndTag) {
      // // 校验非标内容
      // if (!this.isReplacingActive()) {
      // 	removeLineCount += this.tryFixReplaceBlock(replaceEndTagIndex)
      // }
      const fixLines = this.pendingNonStandardLines.slice(
        replaceEndTagIndex - removeLineCount,
        lineLimit - removeLineCount
      );
      fixLines[fixLines.length - 1] = REPLACE_BLOCK_END;
      for (const line of fixLines) {
        removeLineCount += this.internalProcessLine(
          line,
          false,
          replaceEndTagIndex - removeLineCount
        );
      }
    } else {
      throw new Error(
        "Malformed SEARCH/REPLACE block structure: Missing valid closing REPLACE marker"
      );
    }
    return removeLineCount;
  }

  /**
   * Removes trailing empty lines from the pendingNonStandardLines array
   * @param lineLimit - The index to start checking from (exclusive).
   *                    Removes empty lines from lineLimit-1 backwards.
   * @returns The number of empty lines removed
   */
  private trimPendingNonStandardTrailingEmptyLines(lineLimit: number): number {
    let removedCount = 0;
    let i = Math.min(lineLimit, this.pendingNonStandardLines.length) - 1;

    while (i >= 0 && this.pendingNonStandardLines[i].trim() === "") {
      this.pendingNonStandardLines.pop();
      removedCount++;
      i--;
    }

    return removedCount;
  }
}

export async function constructNewFileContentV2(
  diffContent: string,
  originalContent: string,
  isFinal: boolean
): Promise<string> {
  const newFileContentConstructor = new NewFileContentConstructor(
    originalContent,
    isFinal
  );

  const lines = diffContent.split("\n");

  // If the last line looks like a partial marker but isn't recognized,
  // remove it because it might be incomplete.
  const lastLine = lines[lines.length - 1];
  if (
    lines.length > 0 &&
    (lastLine.startsWith(SEARCH_BLOCK_CHAR) ||
      lastLine.startsWith(LEGACY_SEARCH_BLOCK_CHAR) ||
      lastLine.startsWith("=") ||
      lastLine.startsWith(REPLACE_BLOCK_CHAR) ||
      lastLine.startsWith(LEGACY_REPLACE_BLOCK_CHAR)) &&
    lastLine !== SEARCH_BLOCK_START &&
    lastLine !== SEARCH_BLOCK_END &&
    lastLine !== REPLACE_BLOCK_END
  ) {
    lines.pop();
  }

  for (const line of lines) {
    newFileContentConstructor.processLine(line);
  }

  const result = newFileContentConstructor.getResult();
  return result;
}

/**
 * Fixes incorrectly escaped HTML entities in AI model outputs
 * @param text String potentially containing incorrectly escaped HTML entities from AI models
 * @returns String with HTML entities converted back to normal characters
 */
export function fixModelHtmlEscaping(text: string): string {
  return text
    .replace(/&gt;/g, ">")
    .replace(/&lt;/g, "<")
    .replace(/&quot;/g, '"')
    .replace(/&amp;/g, "&")
    .replace(/&apos;/g, "'");
}

/**
 * Removes invalid characters (like the replacement character �) from a string
 * @param text String potentially containing invalid characters
 * @returns String with invalid characters removed
 */
export function removeInvalidChars(text: string): string {
  return text.replace(/\uFFFD/g, "");
}

export async function toolsProvider(ctl: ToolsProviderController) {
  const config = ctl.getPluginConfig(configSchematics);

  const tools: Tool[] = [];

  const replaceInFilePathDescription = `The absolute path of the file to modify`;
  const replaceInFileDiffDescription = `One or more SEARCH/REPLACE blocks following this exact format:
  \`\`\`
  ------- SEARCH
  [exact content to find]
  =======
  [new content to replace with]
  +++++++ REPLACE
  \`\`\`
  Critical rules:
  1. SEARCH content must match the associated file section to find EXACTLY:
     * Match character-for-character including whitespace, indentation, line endings
     * Include all comments, docstrings, etc.
  2. SEARCH/REPLACE blocks will ONLY replace the first match occurrence.
     * Including multiple unique SEARCH/REPLACE blocks if you need to make multiple changes.
     * Include *just* enough lines in each SEARCH section to uniquely match each set of lines that need to change.
     * When using multiple SEARCH/REPLACE blocks, list them in the order they appear in the file.
  3. Keep SEARCH/REPLACE blocks concise:
     * Break large SEARCH/REPLACE blocks into a series of smaller blocks that each change a small portion of the file.
     * Include just the changing lines, and a few surrounding lines if needed for uniqueness.
     * Do not include long runs of unchanging lines in SEARCH/REPLACE blocks.
     * Each line must be complete. Never truncate lines mid-way through as this can cause matching failures.
  4. Special operations:
     * To move code: Use two SEARCH/REPLACE blocks (one to delete from original + one to insert at new location)
     * To delete code: Use empty REPLACE section`;
  const replaceInFileTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "replace_in_file",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to replace sections of content in an existing file using SEARCH/REPLACE blocks that define exact changes to specific parts of the file. This tool should be used when you need to make targeted changes to specific parts of a file.`,
    parameters: {
      path: z.string().describe(replaceInFilePathDescription),
      diff: z.string().describe(replaceInFileDiffDescription),
    },
    implementation: async ({ path, diff }) => {
      try {
        const filePath = path;

        let originalContent = "";
        if (existsSync(filePath)) {
          originalContent = await readFile(filePath, "utf-8");
        } else {
          return `Error: could not find file`;
        }

        const newContent = await constructNewFileContentV2(
          diff,
          originalContent,
          true
        );
        await writeFile(filePath, newContent, "utf-8");

        return "Diff applied successfully.";
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(replaceInFileTool);

  const writeFileTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "write_file",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. This tool will automatically create any directories needed to write the file.`,
    parameters: {
      path: z.string().describe("The absolute path of the file to write to"),
      content: z
        .string()
        .describe(
          "The content to write to the file. ALWAYS provide the COMPLETE intended content of the file, without any truncation or omissions. You MUST include ALL parts of the file, even if they haven't been modified."
        ),
    },
    implementation: async ({ path, content }) => {
      try {
        const filePath = path;

        // Create directory structure if it doesn't exist
        const dirPath = filePath.substring(0, filePath.lastIndexOf("/"));
        if (dirPath) {
          await mkdir(dirPath, { recursive: true });
        }

        let newContent = content;
        if (newContent.startsWith("```")) {
          // this handles cases where it includes language specifiers like ```python ```js
          newContent = newContent.split("\n").slice(1).join("\n").trim();
        }
        if (newContent.endsWith("```")) {
          newContent = newContent.split("\n").slice(0, -1).join("\n").trim();
        }

        newContent = fixModelHtmlEscaping(newContent);
        newContent = removeInvalidChars(newContent);

        // Write content to file
        await writeFile(filePath, newContent, "utf-8");

        return "File written successfully.";
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(writeFileTool);

  const readFileTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "read_file",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file you do not know the contents of, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string. Do NOT use this tool to list the contents of a directory. Only use this tool on files.`,
    parameters: {
      path: z.string().describe("The absolute path of the file to read"),
    },
    implementation: async ({ path }) => {
      // TODO: check if num tokens in file is larger than ctx length
      try {
        const filePath = path;

        if (existsSync(filePath)) {
          const text = await readFile(filePath, "utf-8");
          return {
            text: text,
          };
        } else {
          return `Error: could not find file`;
        }
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(readFileTool);

  const searchFilesTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "search_files",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.`,
    parameters: {
      path: z
        .string()
        .describe("The absolute path of the directory to search in"),
      regex: z
        .string()
        .describe(
          "The regular expression pattern to search for. Uses Rust regex syntax."
        ),
      file_pattern: z
        .string()
        .optional()
        .describe(
          "Glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*)."
        ),
    },
    implementation: async ({ path, regex, file_pattern }) => {
      try {
        // Validate path exists
        if (!existsSync(path)) {
          return `Error: Directory '${path}' does not exist`;
        }

        // Validate path is a directory
        const stats = statSync(path);
        if (!stats.isDirectory()) {
          return `Error: '${path}' is not a directory`;
        }

        // Get current working directory and perform search
        const cwd = process.cwd();
        const results = await regexSearchFiles(cwd, path, regex, file_pattern);
        return results;
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(searchFilesTool);

  const listFilesTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "list_files",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to list files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents. Do not use this tool to confirm the existence of files you may have created, as the user will let you know if the files were created successfully or not.`,
    parameters: {
      path: z
        .string()
        .describe("The absolute path of the directory to list contents for"),
      recursive: z
        .boolean()
        .optional()
        .describe(
          "Whether to list files recursively. Use true for recursive listing, false or omit for top-level only. THIS IS CASE SENSITIVE, FOR EXAMPLE NEVER PUT True or TRUE only put true"
        ),
    },
    implementation: async ({ path, recursive }) => {
      try {
        // Validate path exists
        if (!existsSync(path)) {
          return `Error: Directory '${path}' does not exist`;
        }

        // Validate path is a directory
        const stats = statSync(path);
        if (!stats.isDirectory()) {
          return `Error: '${path}' is not a directory`;
        }

        // Set default recursive to false if not provided, and use reasonable limit
        const isRecursive = recursive ?? false;
        const limit = 1000;

        // Call listFiles function
        const [filePaths, wasLimited] = await listFiles(
          path,
          isRecursive,
          limit
        );

        // Use formatFilesList function to format the output
        return formatFilesList(path, filePaths, wasLimited);
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(listFilesTool);
  return tools;
}

src / toolsProvider.ts

/**
 * MOST OF THIS CODE IS ATTRIBUTABLE TO CLINE
 * replace_in_file:
 * https://github.com/cline/cline/blob/06e0973/src/core/assistant-message/diff.ts
 * https://github.com/cline/cline/blob/0ba4508/src/core/prompts/system-prompt/components/editing_files.ts#L7
 * https://github.com/cline/cline/blob/0ba4508/src/core/prompts/system-prompt/tools/replace_in_file.ts
 *
 * read_file:
 * https://github.com/cline/cline/blob/f0ad29a/src/core/prompts/system-prompt/tools/read_file.ts
 * https://github.com/cline/cline/blob/e08c656/src/core/task/ToolExecutor.ts
 *
 * write_file:
 * https://github.com/cline/cline/blob/5595d12/src/core/prompts/system-prompt/tools/write_to_file.ts
 * https://github.com/cline/cline/blob/2315fc6/src/core/task/tools/handlers/WriteToFileToolHandler.ts
 *
 * search_files:
 * https://github.com/cline/cline/blob/0ba4508/src/core/prompts/system-prompt/tools/search_files.ts
 * https://github.com/cline/cline/blob/2315fc6/src/core/task/tools/handlers/SearchFilesToolHandler.ts
 *
 * list_files:
 * https://github.com/cline/cline/blob/0ba4508/src/core/prompts/system-prompt/tools/list_files.ts
 * https://github.com/cline/cline/blob/971ac0f/src/core/task/tools/handlers/ListFilesToolHandler.ts
 *
 */

import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";
import { configSchematics } from "./config";
import { existsSync, statSync } from "fs";
import { writeFile, readFile, mkdir } from "fs/promises";
import { regexSearchFiles } from "./ripgrep";
import { listFiles, formatFilesList } from "./list-files";

// See details about defining tools in the documentation:
// https://lmstudio.ai/docs/typescript/agent/tools

const SEARCH_BLOCK_START = "------- SEARCH";
const SEARCH_BLOCK_END = "=======";
const REPLACE_BLOCK_END = "+++++++ REPLACE";

const SEARCH_BLOCK_CHAR = "-";
const REPLACE_BLOCK_CHAR = "+";
const LEGACY_SEARCH_BLOCK_CHAR = "<";
const LEGACY_REPLACE_BLOCK_CHAR = ">";

// Replace the exact string constants with flexible regex patterns
const SEARCH_BLOCK_START_REGEX = /^[-]{3,} SEARCH>?$/;
const LEGACY_SEARCH_BLOCK_START_REGEX = /^[<]{3,} SEARCH>?$/;

const SEARCH_BLOCK_END_REGEX = /^[=]{3,}$/;

const REPLACE_BLOCK_END_REGEX = /^[+]{3,} REPLACE>?$/;
const LEGACY_REPLACE_BLOCK_END_REGEX = /^[>]{3,} REPLACE>?$/;

// Helper functions to check if a line matches the flexible patterns
function isSearchBlockStart(line: string): boolean {
  return (
    SEARCH_BLOCK_START_REGEX.test(line) ||
    LEGACY_SEARCH_BLOCK_START_REGEX.test(line)
  );
}

function isSearchBlockEnd(line: string): boolean {
  return SEARCH_BLOCK_END_REGEX.test(line);
}

function isReplaceBlockEnd(line: string): boolean {
  return (
    REPLACE_BLOCK_END_REGEX.test(line) ||
    LEGACY_REPLACE_BLOCK_END_REGEX.test(line)
  );
}

/**
 * Attempts a line-trimmed fallback match for the given search content in the original content.
 * It tries to match `searchContent` lines against a block of lines in `originalContent` starting
 * from `lastProcessedIndex`. Lines are matched by trimming leading/trailing whitespace and ensuring
 * they are identical afterwards.
 *
 * Returns [matchIndexStart, matchIndexEnd] if found, or false if not found.
 */
function lineTrimmedFallbackMatch(
  originalContent: string,
  searchContent: string,
  startIndex: number
): [number, number] | false {
  // Split both contents into lines
  const originalLines = originalContent.split("\n");
  const searchLines = searchContent.split("\n");

  // Trim trailing empty line if exists (from the trailing \n in searchContent)
  if (searchLines[searchLines.length - 1] === "") {
    searchLines.pop();
  }

  // Find the line number where startIndex falls
  let startLineNum = 0;
  let currentIndex = 0;
  while (currentIndex < startIndex && startLineNum < originalLines.length) {
    currentIndex += originalLines[startLineNum].length + 1; // +1 for \n
    startLineNum++;
  }

  // For each possible starting position in original content
  for (
    let i = startLineNum;
    i <= originalLines.length - searchLines.length;
    i++
  ) {
    let matches = true;

    // Try to match all search lines from this position
    for (let j = 0; j < searchLines.length; j++) {
      const originalTrimmed = originalLines[i + j].trim();
      const searchTrimmed = searchLines[j].trim();

      if (originalTrimmed !== searchTrimmed) {
        matches = false;
        break;
      }
    }

    // If we found a match, calculate the exact character positions
    if (matches) {
      // Find start character index
      let matchStartIndex = 0;
      for (let k = 0; k < i; k++) {
        matchStartIndex += originalLines[k].length + 1; // +1 for \n
      }

      // Find end character index
      let matchEndIndex = matchStartIndex;
      for (let k = 0; k < searchLines.length; k++) {
        matchEndIndex += originalLines[i + k].length + 1; // +1 for \n
      }

      return [matchStartIndex, matchEndIndex];
    }
  }

  return false;
}

/**
 * Attempts to match blocks of code by using the first and last lines as anchors.
 * This is a third-tier fallback strategy that helps match blocks where we can identify
 * the correct location by matching the beginning and end, even if the exact content
 * differs slightly.
 *
 * The matching strategy:
 * 1. Only attempts to match blocks of 3 or more lines to avoid false positives
 * 2. Extracts from the search content:
 *    - First line as the "start anchor"
 *    - Last line as the "end anchor"
 * 3. For each position in the original content:
 *    - Checks if the next line matches the start anchor
 *    - If it does, jumps ahead by the search block size
 *    - Checks if that line matches the end anchor
 *    - All comparisons are done after trimming whitespace
 *
 * This approach is particularly useful for matching blocks of code where:
 * - The exact content might have minor differences
 * - The beginning and end of the block are distinctive enough to serve as anchors
 * - The overall structure (number of lines) remains the same
 *
 * @param originalContent - The full content of the original file
 * @param searchContent - The content we're trying to find in the original file
 * @param startIndex - The character index in originalContent where to start searching
 * @returns A tuple of [startIndex, endIndex] if a match is found, false otherwise
 */
function blockAnchorFallbackMatch(
  originalContent: string,
  searchContent: string,
  startIndex: number
): [number, number] | false {
  const originalLines = originalContent.split("\n");
  const searchLines = searchContent.split("\n");

  // Only use this approach for blocks of 3+ lines
  if (searchLines.length < 3) {
    return false;
  }

  // Trim trailing empty line if exists
  if (searchLines[searchLines.length - 1] === "") {
    searchLines.pop();
  }

  const firstLineSearch = searchLines[0].trim();
  const lastLineSearch = searchLines[searchLines.length - 1].trim();
  const searchBlockSize = searchLines.length;

  // Find the line number where startIndex falls
  let startLineNum = 0;
  let currentIndex = 0;
  while (currentIndex < startIndex && startLineNum < originalLines.length) {
    currentIndex += originalLines[startLineNum].length + 1;
    startLineNum++;
  }

  // Look for matching start and end anchors
  for (let i = startLineNum; i <= originalLines.length - searchBlockSize; i++) {
    // Check if first line matches
    if (originalLines[i].trim() !== firstLineSearch) {
      continue;
    }

    // Check if last line matches at the expected position
    if (originalLines[i + searchBlockSize - 1].trim() !== lastLineSearch) {
      continue;
    }

    // Calculate exact character positions
    let matchStartIndex = 0;
    for (let k = 0; k < i; k++) {
      matchStartIndex += originalLines[k].length + 1;
    }

    let matchEndIndex = matchStartIndex;
    for (let k = 0; k < searchBlockSize; k++) {
      matchEndIndex += originalLines[i + k].length + 1;
    }

    return [matchStartIndex, matchEndIndex];
  }

  return false;
}

enum ProcessingState {
  Idle = 0,
  StateSearch = 1 << 0,
  StateReplace = 1 << 1,
}

class NewFileContentConstructor {
  private originalContent: string;
  private isFinal: boolean;
  private state: number;
  private pendingNonStandardLines: string[];
  private result: string;
  private lastProcessedIndex: number;
  private currentSearchContent: string;
  private searchMatchIndex: number;
  private searchEndIndex: number;

  constructor(originalContent: string, isFinal: boolean) {
    this.originalContent = originalContent;
    this.isFinal = isFinal;
    this.pendingNonStandardLines = [];
    this.result = "";
    this.lastProcessedIndex = 0;
    this.state = ProcessingState.Idle;
    this.currentSearchContent = "";
    this.searchMatchIndex = -1;
    this.searchEndIndex = -1;
  }

  private resetForNextBlock() {
    // Reset for next block
    this.state = ProcessingState.Idle;
    this.currentSearchContent = "";
    this.searchMatchIndex = -1;
    this.searchEndIndex = -1;
  }

  private findLastMatchingLineIndex(regx: RegExp, lineLimit: number) {
    for (let i = lineLimit; i > 0; ) {
      i--;
      if (this.pendingNonStandardLines[i].match(regx)) {
        return i;
      }
    }
    return -1;
  }

  private updateProcessingState(newState: ProcessingState) {
    const isValidTransition =
      (this.state === ProcessingState.Idle &&
        newState === ProcessingState.StateSearch) ||
      (this.state === ProcessingState.StateSearch &&
        newState === ProcessingState.StateReplace);

    if (!isValidTransition) {
      throw new Error(
        `Invalid state transition.\n` +
          "Valid transitions are:\n" +
          "- Idle → StateSearch\n" +
          "- StateSearch → StateReplace"
      );
    }

    this.state |= newState;
  }

  private isStateActive(state: ProcessingState): boolean {
    return (this.state & state) === state;
  }

  private activateReplaceState() {
    this.updateProcessingState(ProcessingState.StateReplace);
  }

  private activateSearchState() {
    this.updateProcessingState(ProcessingState.StateSearch);
    this.currentSearchContent = "";
  }

  private isSearchingActive(): boolean {
    return this.isStateActive(ProcessingState.StateSearch);
  }

  private isReplacingActive(): boolean {
    return this.isStateActive(ProcessingState.StateReplace);
  }

  private hasPendingNonStandardLines(
    pendingNonStandardLineLimit: number
  ): boolean {
    return (
      this.pendingNonStandardLines.length - pendingNonStandardLineLimit <
      this.pendingNonStandardLines.length
    );
  }

  public processLine(line: string) {
    this.internalProcessLine(line, true, this.pendingNonStandardLines.length);
  }

  public getResult() {
    // If this is the final chunk, append any remaining original content
    if (this.isFinal && this.lastProcessedIndex < this.originalContent.length) {
      this.result += this.originalContent.slice(this.lastProcessedIndex);
    }
    if (this.isFinal && this.state !== ProcessingState.Idle) {
      throw new Error(
        "File processing incomplete - SEARCH/REPLACE operations still active during finalization"
      );
    }
    return this.result;
  }

  private internalProcessLine(
    line: string,
    canWritependingNonStandardLines: boolean,
    pendingNonStandardLineLimit: number
  ): number {
    let removeLineCount = 0;
    if (isSearchBlockStart(line)) {
      removeLineCount = this.trimPendingNonStandardTrailingEmptyLines(
        pendingNonStandardLineLimit
      );
      if (removeLineCount > 0) {
        pendingNonStandardLineLimit =
          pendingNonStandardLineLimit - removeLineCount;
      }
      if (this.hasPendingNonStandardLines(pendingNonStandardLineLimit)) {
        this.tryFixSearchReplaceBlock(pendingNonStandardLineLimit);
        canWritependingNonStandardLines &&
          (this.pendingNonStandardLines.length = 0);
      }
      this.activateSearchState();
    } else if (isSearchBlockEnd(line)) {
      // 校验非标内容
      if (!this.isSearchingActive()) {
        this.tryFixSearchBlock(pendingNonStandardLineLimit);
        canWritependingNonStandardLines &&
          (this.pendingNonStandardLines.length = 0);
      }
      this.activateReplaceState();
      this.beforeReplace();
    } else if (isReplaceBlockEnd(line)) {
      if (!this.isReplacingActive()) {
        this.tryFixReplaceBlock(pendingNonStandardLineLimit);
        canWritependingNonStandardLines &&
          (this.pendingNonStandardLines.length = 0);
      }
      this.lastProcessedIndex = this.searchEndIndex;
      this.resetForNextBlock();
    } else {
      // Accumulate content for search or replace
      // (currentReplaceContent is not being used for anything right now since we directly append to result.)
      // (We artificially add a linebreak since we split on \n at the beginning. In order to not include a trailing linebreak in the final search/result blocks we need to remove it before using them. This allows for partial line matches to be correctly identified.)
      // NOTE: search/replace blocks must be arranged in the order they appear in the file due to how we build the content using lastProcessedIndex. We also cannot strip the trailing newline since for non-partial lines it would remove the linebreak from the original content. (If we remove end linebreak from search, then we'd also have to remove it from replace but we can't know if it's a partial line or not since the model may be using the line break to indicate the end of the block rather than as part of the search content.) We require the model to output full lines in order for our fallbacks to work as well.
      if (this.isReplacingActive()) {
        // Output replacement lines immediately if we know the insertion point
        if (this.searchMatchIndex !== -1) {
          this.result += line + "\n";
        }
      } else if (this.isSearchingActive()) {
        this.currentSearchContent += line + "\n";
      } else {
        const appendToPendingNonStandardLines = canWritependingNonStandardLines;
        if (appendToPendingNonStandardLines) {
          // 处理非标内容
          this.pendingNonStandardLines.push(line);
        }
      }
    }
    return removeLineCount;
  }

  private beforeReplace() {
    // Remove trailing linebreak for adding the === marker
    // if (currentSearchContent.endsWith("\r\n")) {
    // 	currentSearchContent = currentSearchContent.slice(0, -2)
    // } else if (currentSearchContent.endsWith("\n")) {
    // 	currentSearchContent = currentSearchContent.slice(0, -1)
    // }

    if (!this.currentSearchContent) {
      // Empty search block
      if (this.originalContent.length === 0) {
        // New file scenario: nothing to match, just start inserting
        this.searchMatchIndex = 0;
        this.searchEndIndex = 0;
      } else {
        // Complete file replacement scenario: treat the entire file as matched
        this.searchMatchIndex = 0;
        this.searchEndIndex = this.originalContent.length;
      }
    } else {
      // Add check for inefficient full-file search
      // if (currentSearchContent.trim() === originalContent.trim()) {
      // 	throw new Error(
      // 		"The SEARCH block contains the entire file content. Please either:\n" +
      // 			"1. Use an empty SEARCH block to replace the entire file, or\n" +
      // 			"2. Make focused changes to specific parts of the file that need modification.",
      // 	)
      // }
      // Exact search match scenario
      const exactIndex = this.originalContent.indexOf(
        this.currentSearchContent,
        this.lastProcessedIndex
      );
      if (exactIndex !== -1) {
        this.searchMatchIndex = exactIndex;
        this.searchEndIndex = exactIndex + this.currentSearchContent.length;
      } else {
        // Attempt fallback line-trimmed matching
        const lineMatch = lineTrimmedFallbackMatch(
          this.originalContent,
          this.currentSearchContent,
          this.lastProcessedIndex
        );
        if (lineMatch) {
          [this.searchMatchIndex, this.searchEndIndex] = lineMatch;
        } else {
          // Try block anchor fallback for larger blocks
          const blockMatch = blockAnchorFallbackMatch(
            this.originalContent,
            this.currentSearchContent,
            this.lastProcessedIndex
          );
          if (blockMatch) {
            [this.searchMatchIndex, this.searchEndIndex] = blockMatch;
          } else {
            throw new Error(
              `The SEARCH block:\n${this.currentSearchContent.trimEnd()}\n...does not match anything in the file.`
            );
          }
        }
      }
    }
    if (this.searchMatchIndex < this.lastProcessedIndex) {
      throw new Error(
        `The SEARCH block:\n${this.currentSearchContent.trimEnd()}\n...matched an incorrect content in the file.`
      );
    }
    // Output everything up to the match location
    this.result += this.originalContent.slice(
      this.lastProcessedIndex,
      this.searchMatchIndex
    );
  }

  private tryFixSearchBlock(lineLimit: number): number {
    let removeLineCount = 0;
    if (lineLimit < 0) {
      lineLimit = this.pendingNonStandardLines.length;
    }
    if (!lineLimit) {
      throw new Error(
        "Invalid SEARCH/REPLACE block structure - no lines available to process"
      );
    }
    const searchTagRegexp = /^([-]{3,}|[<]{3,}) SEARCH$/;
    const searchTagIndex = this.findLastMatchingLineIndex(
      searchTagRegexp,
      lineLimit
    );
    if (searchTagIndex !== -1) {
      const fixLines = this.pendingNonStandardLines.slice(
        searchTagIndex,
        lineLimit
      );
      fixLines[0] = SEARCH_BLOCK_START;
      for (const line of fixLines) {
        removeLineCount += this.internalProcessLine(
          line,
          false,
          searchTagIndex
        );
      }
    } else {
      throw new Error(
        `Invalid REPLACE marker detected - could not find matching SEARCH block starting from line ${
          searchTagIndex + 1
        }`
      );
    }
    return removeLineCount;
  }

  private tryFixReplaceBlock(lineLimit: number): number {
    let removeLineCount = 0;
    if (lineLimit < 0) {
      lineLimit = this.pendingNonStandardLines.length;
    }
    if (!lineLimit) {
      throw new Error();
    }
    const replaceBeginTagRegexp = /^[=]{3,}$/;
    const replaceBeginTagIndex = this.findLastMatchingLineIndex(
      replaceBeginTagRegexp,
      lineLimit
    );
    if (replaceBeginTagIndex !== -1) {
      // // 校验非标内容
      // if (!this.isSearchingActive()) {
      // 	removeLineCount += this.tryFixSearchBlock(replaceBeginTagIndex)
      // }
      const fixLines = this.pendingNonStandardLines.slice(
        replaceBeginTagIndex - removeLineCount,
        lineLimit - removeLineCount
      );
      fixLines[0] = SEARCH_BLOCK_END;
      for (const line of fixLines) {
        removeLineCount += this.internalProcessLine(
          line,
          false,
          replaceBeginTagIndex - removeLineCount
        );
      }
    } else {
      throw new Error(
        `Malformed REPLACE block - missing valid separator after line ${
          replaceBeginTagIndex + 1
        }`
      );
    }
    return removeLineCount;
  }

  private tryFixSearchReplaceBlock(lineLimit: number): number {
    let removeLineCount = 0;
    if (lineLimit < 0) {
      lineLimit = this.pendingNonStandardLines.length;
    }
    if (!lineLimit) {
      throw new Error();
    }

    const replaceEndTagRegexp = /^([+]{3,}|[>]{3,}) REPLACE$/;
    const replaceEndTagIndex = this.findLastMatchingLineIndex(
      replaceEndTagRegexp,
      lineLimit
    );
    const likeReplaceEndTag = replaceEndTagIndex === lineLimit - 1;
    if (likeReplaceEndTag) {
      // // 校验非标内容
      // if (!this.isReplacingActive()) {
      // 	removeLineCount += this.tryFixReplaceBlock(replaceEndTagIndex)
      // }
      const fixLines = this.pendingNonStandardLines.slice(
        replaceEndTagIndex - removeLineCount,
        lineLimit - removeLineCount
      );
      fixLines[fixLines.length - 1] = REPLACE_BLOCK_END;
      for (const line of fixLines) {
        removeLineCount += this.internalProcessLine(
          line,
          false,
          replaceEndTagIndex - removeLineCount
        );
      }
    } else {
      throw new Error(
        "Malformed SEARCH/REPLACE block structure: Missing valid closing REPLACE marker"
      );
    }
    return removeLineCount;
  }

  /**
   * Removes trailing empty lines from the pendingNonStandardLines array
   * @param lineLimit - The index to start checking from (exclusive).
   *                    Removes empty lines from lineLimit-1 backwards.
   * @returns The number of empty lines removed
   */
  private trimPendingNonStandardTrailingEmptyLines(lineLimit: number): number {
    let removedCount = 0;
    let i = Math.min(lineLimit, this.pendingNonStandardLines.length) - 1;

    while (i >= 0 && this.pendingNonStandardLines[i].trim() === "") {
      this.pendingNonStandardLines.pop();
      removedCount++;
      i--;
    }

    return removedCount;
  }
}

export async function constructNewFileContentV2(
  diffContent: string,
  originalContent: string,
  isFinal: boolean
): Promise<string> {
  const newFileContentConstructor = new NewFileContentConstructor(
    originalContent,
    isFinal
  );

  const lines = diffContent.split("\n");

  // If the last line looks like a partial marker but isn't recognized,
  // remove it because it might be incomplete.
  const lastLine = lines[lines.length - 1];
  if (
    lines.length > 0 &&
    (lastLine.startsWith(SEARCH_BLOCK_CHAR) ||
      lastLine.startsWith(LEGACY_SEARCH_BLOCK_CHAR) ||
      lastLine.startsWith("=") ||
      lastLine.startsWith(REPLACE_BLOCK_CHAR) ||
      lastLine.startsWith(LEGACY_REPLACE_BLOCK_CHAR)) &&
    lastLine !== SEARCH_BLOCK_START &&
    lastLine !== SEARCH_BLOCK_END &&
    lastLine !== REPLACE_BLOCK_END
  ) {
    lines.pop();
  }

  for (const line of lines) {
    newFileContentConstructor.processLine(line);
  }

  const result = newFileContentConstructor.getResult();
  return result;
}

/**
 * Fixes incorrectly escaped HTML entities in AI model outputs
 * @param text String potentially containing incorrectly escaped HTML entities from AI models
 * @returns String with HTML entities converted back to normal characters
 */
export function fixModelHtmlEscaping(text: string): string {
  return text
    .replace(/&gt;/g, ">")
    .replace(/&lt;/g, "<")
    .replace(/&quot;/g, '"')
    .replace(/&amp;/g, "&")
    .replace(/&apos;/g, "'");
}

/**
 * Removes invalid characters (like the replacement character �) from a string
 * @param text String potentially containing invalid characters
 * @returns String with invalid characters removed
 */
export function removeInvalidChars(text: string): string {
  return text.replace(/\uFFFD/g, "");
}

export async function toolsProvider(ctl: ToolsProviderController) {
  const config = ctl.getPluginConfig(configSchematics);

  const tools: Tool[] = [];

  const replaceInFilePathDescription = `The absolute path of the file to modify`;
  const replaceInFileDiffDescription = `One or more SEARCH/REPLACE blocks following this exact format:
  \`\`\`
  ------- SEARCH
  [exact content to find]
  =======
  [new content to replace with]
  +++++++ REPLACE
  \`\`\`
  Critical rules:
  1. SEARCH content must match the associated file section to find EXACTLY:
     * Match character-for-character including whitespace, indentation, line endings
     * Include all comments, docstrings, etc.
  2. SEARCH/REPLACE blocks will ONLY replace the first match occurrence.
     * Including multiple unique SEARCH/REPLACE blocks if you need to make multiple changes.
     * Include *just* enough lines in each SEARCH section to uniquely match each set of lines that need to change.
     * When using multiple SEARCH/REPLACE blocks, list them in the order they appear in the file.
  3. Keep SEARCH/REPLACE blocks concise:
     * Break large SEARCH/REPLACE blocks into a series of smaller blocks that each change a small portion of the file.
     * Include just the changing lines, and a few surrounding lines if needed for uniqueness.
     * Do not include long runs of unchanging lines in SEARCH/REPLACE blocks.
     * Each line must be complete. Never truncate lines mid-way through as this can cause matching failures.
  4. Special operations:
     * To move code: Use two SEARCH/REPLACE blocks (one to delete from original + one to insert at new location)
     * To delete code: Use empty REPLACE section`;
  const replaceInFileTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "replace_in_file",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to replace sections of content in an existing file using SEARCH/REPLACE blocks that define exact changes to specific parts of the file. This tool should be used when you need to make targeted changes to specific parts of a file.`,
    parameters: {
      path: z.string().describe(replaceInFilePathDescription),
      diff: z.string().describe(replaceInFileDiffDescription),
    },
    implementation: async ({ path, diff }) => {
      try {
        const filePath = path;

        let originalContent = "";
        if (existsSync(filePath)) {
          originalContent = await readFile(filePath, "utf-8");
        } else {
          return `Error: could not find file`;
        }

        const newContent = await constructNewFileContentV2(
          diff,
          originalContent,
          true
        );
        await writeFile(filePath, newContent, "utf-8");

        return "Diff applied successfully.";
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(replaceInFileTool);

  const writeFileTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "write_file",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. This tool will automatically create any directories needed to write the file.`,
    parameters: {
      path: z.string().describe("The absolute path of the file to write to"),
      content: z
        .string()
        .describe(
          "The content to write to the file. ALWAYS provide the COMPLETE intended content of the file, without any truncation or omissions. You MUST include ALL parts of the file, even if they haven't been modified."
        ),
    },
    implementation: async ({ path, content }) => {
      try {
        const filePath = path;

        // Create directory structure if it doesn't exist
        const dirPath = filePath.substring(0, filePath.lastIndexOf("/"));
        if (dirPath) {
          await mkdir(dirPath, { recursive: true });
        }

        let newContent = content;
        if (newContent.startsWith("```")) {
          // this handles cases where it includes language specifiers like ```python ```js
          newContent = newContent.split("\n").slice(1).join("\n").trim();
        }
        if (newContent.endsWith("```")) {
          newContent = newContent.split("\n").slice(0, -1).join("\n").trim();
        }

        newContent = fixModelHtmlEscaping(newContent);
        newContent = removeInvalidChars(newContent);

        // Write content to file
        await writeFile(filePath, newContent, "utf-8");

        return "File written successfully.";
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(writeFileTool);

  const readFileTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "read_file",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file you do not know the contents of, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string. Do NOT use this tool to list the contents of a directory. Only use this tool on files.`,
    parameters: {
      path: z.string().describe("The absolute path of the file to read"),
    },
    implementation: async ({ path }) => {
      // TODO: check if num tokens in file is larger than ctx length
      try {
        const filePath = path;

        if (existsSync(filePath)) {
          const text = await readFile(filePath, "utf-8");
          return {
            text: text,
          };
        } else {
          return `Error: could not find file`;
        }
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(readFileTool);

  const searchFilesTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "search_files",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.`,
    parameters: {
      path: z
        .string()
        .describe("The absolute path of the directory to search in"),
      regex: z
        .string()
        .describe(
          "The regular expression pattern to search for. Uses Rust regex syntax."
        ),
      file_pattern: z
        .string()
        .optional()
        .describe(
          "Glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*)."
        ),
    },
    implementation: async ({ path, regex, file_pattern }) => {
      try {
        // Validate path exists
        if (!existsSync(path)) {
          return `Error: Directory '${path}' does not exist`;
        }

        // Validate path is a directory
        const stats = statSync(path);
        if (!stats.isDirectory()) {
          return `Error: '${path}' is not a directory`;
        }

        // Get current working directory and perform search
        const cwd = process.cwd();
        const results = await regexSearchFiles(cwd, path, regex, file_pattern);
        return results;
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(searchFilesTool);

  const listFilesTool = tool({
    // Name of the tool, this will be passed to the model. Aim for concise, descriptive names
    name: "list_files",
    // Your description here, more details will help the model to understand when to use the tool
    description: `Request to list files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents. Do not use this tool to confirm the existence of files you may have created, as the user will let you know if the files were created successfully or not.`,
    parameters: {
      path: z
        .string()
        .describe("The absolute path of the directory to list contents for"),
      recursive: z
        .boolean()
        .optional()
        .describe(
          "Whether to list files recursively. Use true for recursive listing, false or omit for top-level only. THIS IS CASE SENSITIVE, FOR EXAMPLE NEVER PUT True or TRUE only put true"
        ),
    },
    implementation: async ({ path, recursive }) => {
      try {
        // Validate path exists
        if (!existsSync(path)) {
          return `Error: Directory '${path}' does not exist`;
        }

        // Validate path is a directory
        const stats = statSync(path);
        if (!stats.isDirectory()) {
          return `Error: '${path}' is not a directory`;
        }

        // Set default recursive to false if not provided, and use reasonable limit
        const isRecursive = recursive ?? false;
        const limit = 1000;

        // Call listFiles function
        const [filePaths, wasLimited] = await listFiles(
          path,
          isRecursive,
          limit
        );

        // Use formatFilesList function to format the output
        return formatFilesList(path, filePaths, wasLimited);
      } catch (error) {
        return `Error: ${(error as Error).message}`;
      }
    },
  });
  tools.push(listFilesTool);
  return tools;
}