import { rawFunctionTool, type ToolsProviderController } from "@lmstudio/sdk";
import * as fs from "fs";
import * as path from "path";

interface Chunk {
  id: string;
  source: string;
  text: string;
  tokens: string[];
}

interface Document {
  name: string;
  path: string;
  content: string;
  chunks: Chunk[];
}

const STOP_WORDS = new Set([
  "the", "and", "for", "are", "but", "not", "you", "all", "can", "had",
  "her", "was", "one", "our", "out", "has", "have", "been", "from",
  "this", "that", "with", "will", "each", "make", "like", "just",
  "over", "such", "more", "than", "them", "very", "when", "what",
  "which", "their", "there", "about", "string", "number", "type",
  "object", "array", "boolean", "required", "default", "optional",
  "example", "response", "request", "body", "header", "model",
  "token", "tokens", "using", "should", "would", "could", "also",
  "other", "some", "time", "only", "into", "after", "before",
  "between", "under", "again", "further", "then", "once", "here",
  "where", "why", "how", "all", "any", "both", "few", "most",
  "own", "same", "so", "than", "too", "very", "s", "t", "don",
  "now", "a", "an", "is", "am", "are", "be", "been", "being",
  "do", "does", "did", "doing", "to", "of", "in", "on", "at",
  "by", "as", "if", "or", "no", "nor", "it", "its", "he", "she",
  "they", "we", "i", "me", "my", "your", "his", "him", "them",
]);

let documents: Document[] = [];
let chunks: Chunk[] = [];
let idfCache: Map<string, number> | null = null;

function tokenize(text: string): string[] {
  return text
    .toLowerCase()
    .replace(/[^a-zа-яё0-9\s]/gi, " ")
    .split(/\s+/)
    .filter(w => w.length > 2)
    .filter(w => !STOP_WORDS.has(w));
}

function chunkText(text: string, source: string, chunkSize: number, overlap: number): Chunk[] {
  const result: Chunk[] = [];
  const words = text.split(/\s+/);
  let start = 0;
  let chunkIndex = 0;
  while (start < words.length) {
    const end = Math.min(start + chunkSize, words.length);
    const chunkWords = words.slice(start, end);
    const chunkText = chunkWords.join(" ");
    if (chunkText.trim().length > 50) {
      result.push({
        id: source.replace(/[^a-z0-9]/gi, "_") + "_" + chunkIndex,
        source: source,
        text: chunkText,
        tokens: tokenize(chunkText),
      });
      chunkIndex++;
    }
    start = end - overlap;
    if (start >= words.length) break;
  }
  return result;
}

function computeIDF(): Map<string, number> {
  if (idfCache !== null) return idfCache;
  const docFreq = new Map<string, number>();
  const N = chunks.length;
  for (const chunk of chunks) {
    const seen = new Set<string>();
    for (const token of chunk.tokens) {
      if (!seen.has(token)) {
        docFreq.set(token, (docFreq.get(token) || 0) + 1);
        seen.add(token);
      }
    }
  }
  idfCache = new Map();
  for (const termEntry of docFreq.entries()) {
    const term = termEntry[0];
    const freq = termEntry[1];
    idfCache!.set(term, Math.log((N + 1) / (freq + 0.5)));
  }
  return idfCache!;
}

function bm25Score(queryTokens: string[], chunk: Chunk): number {
  const idf = computeIDF();
  const docLen = chunk.tokens.length;
  const avgDocLen = chunks.length > 0
    ? chunks.reduce(function(s, c) { return s + c.tokens.length; }, 0) / chunks.length
    : 1;
  const k1 = 1.5;
  const b = 0.75;
  let score = 0;
  for (let qi = 0; qi < queryTokens.length; qi++) {
    const qt = queryTokens[qi];
    let freq = 0;
    for (let ti = 0; ti < chunk.tokens.length; ti++) {
      if (chunk.tokens[ti] === qt) freq++;
    }
    const termIDF = idf.get(qt) || 0;
    const tf = (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * (docLen / avgDocLen)));
    score += termIDF * tf;
  }
  return score;
}

function searchChunks(query: string, topN: number): { chunk: Chunk; score: number }[] {
  const queryTokens = tokenize(query);
  const scored: { chunk: Chunk; score: number }[] = [];
  for (let ci = 0; ci < chunks.length; ci++) {
    const chunk = chunks[ci];
    const score = bm25Score(queryTokens, chunk);
    if (score > 0) scored.push({ chunk: chunk, score: score });
  }
  scored.sort(function(a, b) { return b.score - a.score; });
  return scored.slice(0, topN);
}

function readFileSyncSafe(filePath: string): string {
  try {
    return fs.readFileSync(filePath, "utf-8");
  } catch {
    return "";
  }
}

function loadDirectorySync(dirPath: string): { name: string; content: string }[] {
  const results: { name: string; content: string }[] = [];
  let entries: string[];
  try {
    entries = fs.readdirSync(dirPath);
  } catch {
    return [];
  }
  for (let ei = 0; ei < entries.length; ei++) {
    const entry = entries[ei];
    const fullPath = path.join(dirPath, entry);
    let entryStat: fs.Stats;
    try {
      entryStat = fs.statSync(fullPath);
    } catch {
      continue;
    }
    if (entryStat.isDirectory()) {
      if (entry === "node_modules" || entry === ".git" || entry === "dist" || entry === ".lmstudio") continue;
      const subResults = loadDirectorySync(fullPath);
      for (let si = 0; si < subResults.length; si++) {
        subResults[si].name = path.join(entry, subResults[si].name);
      }
      for (let si = 0; si < subResults.length; si++) {
        results.push(subResults[si]);
      }
    } else {
      const ext = path.extname(entry).toLowerCase();
      const supportedExts = [".txt", ".md", ".json", ".log", ".csv", ".js", ".ts", ".py", ".jsx", ".tsx", ".html", ".css", ".yaml", ".yml", ".toml", ".xml"];
      if (supportedExts.indexOf(ext) !== -1) {
        try {
          const content = readFileSyncSafe(fullPath);
          if (content.trim().length > 0) {
            results.push({ name: entry, content: content });
          }
        } catch {
          // skip
        }
      }
    }
  }
  return results;
}

function formatSearchResults(results: { chunk: Chunk; score: number }[], query: string): string {
  if (results.length === 0) {
    return "No relevant documents found for \"" + query + "\".\n\nTry loading more documents or rephrasing your query.";
  }
  let output = "Found " + results.length + " relevant document fragment(s) for \"" + query + "\":\n\n";
  for (let i = 0; i < results.length; i++) {
    const chunk = results[i].chunk;
    const score = results[i].score;
    output += "## Fragment " + (i + 1) + " (Source: " + chunk.source + ", Relevance: " + score.toFixed(2) + ")\n\n";
    output += chunk.text;
    output += "\n\n---\n\n";
  }
  return output;
}

function formatDocumentList(): string {
  if (documents.length === 0) {
    return "No documents loaded. Use `rag_load_documents` to load files from a directory.";
  }
  let output = "## Loaded Documents (" + documents.length + " files, " + chunks.length + " fragments)\n\n";
  for (let i = 0; i < documents.length; i++) {
    const doc = documents[i];
    output += "- **" + doc.name + "** (" + doc.chunks.length + " fragments, " + doc.content.length + " chars)\n";
  }
  return output;
}

export async function toolsProvider(ctl: ToolsProviderController): Promise<any[]> {
  const tools: any[] = [];

  tools.push(rawFunctionTool({
    name: "rag_load_documents",
    description: "Load documents from a directory into the RAG knowledge base. Supported types: .txt, .md, .json, .log, .csv, .js, .ts, .py, .jsx, .tsx, .html, .css, .yaml, .yml, .toml, .xml. Parameters: directory (required), chunk_size (default 500), overlap (default 100).",
    parametersJsonSchema: {
      type: "object",
      properties: {
        directory: { type: "string", description: "Path to directory with documents" },
        chunk_size: { type: "number", description: "Words per fragment (default 500)" },
        overlap: { type: "number", description: "Overlap between fragments in words (default 100)" }
      },
      required: ["directory"]
    },
    implementation: async function(params: Record<string, unknown>) {
      try {
        const directory = String(params.directory || "").trim();
        const chunkSize = Number(params.chunk_size) || 500;
        const overlap = Number(params.overlap) || 100;
        if (!directory) return "Please provide a directory path";
        if (!fs.existsSync(directory)) return "Directory not found: " + directory;
        const files = loadDirectorySync(directory);
        if (files.length === 0) {
          return "No supported files found in \"" + directory + "\". Supported types: .txt, .md, .json, .log, .csv, .js, .ts, .py, .jsx, .tsx, .html, .css, .yaml, .yml, .toml, .xml";
        }
        documents = [];
        chunks = [];
        idfCache = null;
        let totalChars = 0;
        let totalChunks = 0;
        for (let fi = 0; fi < files.length; fi++) {
          const file = files[fi];
          const docChunks = chunkText(file.content, file.name, chunkSize, overlap);
          documents.push({
            name: file.name,
            path: path.join(directory, file.name),
            content: file.content,
            chunks: docChunks,
          });
          for (let ci = 0; ci < docChunks.length; ci++) {
            chunks.push(docChunks[ci]);
          }
          totalChars += file.content.length;
          totalChunks += docChunks.length;
        }
        return "Loaded " + documents.length + " document(s) into RAG knowledge base.\n\nFiles: " + documents.map(function(d) { return d.name; }).join(", ") + "\nTotal fragments: " + totalChunks + "\nTotal characters: " + totalChars.toLocaleString() + "\nChunk size: " + chunkSize + " words, overlap: " + overlap + " words\n\nUse rag_search to search these documents.";
      } catch (e: any) {
        return "Error loading documents: " + (e && e.message ? e.message : String(e));
      }
    },
  }));

  tools.push(rawFunctionTool({
    name: "rag_search",
    description: "Search the loaded RAG knowledge base for relevant information. Use after rag_load_documents. Parameters: query (required), top_n (default 5).",
    parametersJsonSchema: {
      type: "object",
      properties: {
        query: { type: "string", description: "What to search for in the documents" },
        top_n: { type: "number", description: "Number of results (default 5)" }
      },
      required: ["query"]
    },
    implementation: async function(params: Record<string, unknown>) {
      try {
        const query = String(params.query || "").trim();
        const topN = Number(params.top_n) || 5;
        if (!query) return "Please provide a search query";
        if (chunks.length === 0) return "No documents loaded. Use rag_load_documents first to load files.";
        const results = searchChunks(query, topN);
        return formatSearchResults(results, query);
      } catch (e: any) {
        return "Error searching documents: " + (e && e.message ? e.message : String(e));
      }
    },
  }));

  tools.push(rawFunctionTool({
    name: "rag_list_documents",
    description: "List all documents currently loaded in the RAG knowledge base.",
    parametersJsonSchema: {
      type: "object",
      properties: {},
      required: []
    },
    implementation: async function() {
      return formatDocumentList();
    },
  }));

  tools.push(rawFunctionTool({
    name: "rag_clear",
    description: "Clear all documents from the RAG knowledge base.",
    parametersJsonSchema: {
      type: "object",
      properties: {},
      required: []
    },
    implementation: async function() {
      const count = documents.length;
      documents = [];
      chunks = [];
      idfCache = null;
      return "Cleared " + count + " document(s) from RAG knowledge base.";
    },
  }));

  return tools;
}
rag-local