Project Files
src / toolsProvider.ts
import { rawFunctionTool, type ToolsProviderController } from "@lmstudio/sdk";
import * as fs from "fs";
import * as path from "path";
interface Chunk {
id: string;
source: string;
text: string;
tokens: string[];
}
interface Document {
name: string;
path: string;
content: string;
chunks: Chunk[];
}
const STOP_WORDS = new Set([
"the", "and", "for", "are", "but", "not", "you", "all", "can", "had",
"her", "was", "one", "our", "out", "has", "have", "been", "from",
"this", "that", "with", "will", "each", "make", "like", "just",
"over", "such", "more", "than", "them", "very", "when", "what",
"which", "their", "there", "about", "string", "number", "type",
"object", "array", "boolean", "required", "default", "optional",
"example", "response", "request", "body", "header", "model",
"token", "tokens", "using", "should", "would", "could", "also",
"other", "some", "time", "only", "into", "after", "before",
"between", "under", "again", "further", "then", "once", "here",
"where", "why", "how", "all", "any", "both", "few", "most",
"own", "same", "so", "than", "too", "very", "s", "t", "don",
"now", "a", "an", "is", "am", "are", "be", "been", "being",
"do", "does", "did", "doing", "to", "of", "in", "on", "at",
"by", "as", "if", "or", "no", "nor", "it", "its", "he", "she",
"they", "we", "i", "me", "my", "your", "his", "him", "them",
]);
let documents: Document[] = [];
let chunks: Chunk[] = [];
let idfCache: Map<string, number> | null = null;
function tokenize(text: string): string[] {
return text
.toLowerCase()
.replace(/[^a-zа-яё0-9\s]/gi, " ")
.split(/\s+/)
.filter(w => w.length > 2)
.filter(w => !STOP_WORDS.has(w));
}
function chunkText(text: string, source: string, chunkSize: number, overlap: number): Chunk[] {
const result: Chunk[] = [];
const words = text.split(/\s+/);
let start = 0;
let chunkIndex = 0;
while (start < words.length) {
const end = Math.min(start + chunkSize, words.length);
const chunkWords = words.slice(start, end);
const chunkText = chunkWords.join(" ");
if (chunkText.trim().length > 50) {
result.push({
id: source.replace(/[^a-z0-9]/gi, "_") + "_" + chunkIndex,
source: source,
text: chunkText,
tokens: tokenize(chunkText),
});
chunkIndex++;
}
start = end - overlap;
if (start >= words.length) break;
}
return result;
}
function computeIDF(): Map<string, number> {
if (idfCache !== null) return idfCache;
const docFreq = new Map<string, number>();
const N = chunks.length;
for (const chunk of chunks) {
const seen = new Set<string>();
for (const token of chunk.tokens) {
if (!seen.has(token)) {
docFreq.set(token, (docFreq.get(token) || 0) + 1);
seen.add(token);
}
}
}
idfCache = new Map();
for (const termEntry of docFreq.entries()) {
const term = termEntry[0];
const freq = termEntry[1];
idfCache!.set(term, Math.log((N + 1) / (freq + 0.5)));
}
return idfCache!;
}
function bm25Score(queryTokens: string[], chunk: Chunk): number {
const idf = computeIDF();
const docLen = chunk.tokens.length;
const avgDocLen = chunks.length > 0
? chunks.reduce(function(s, c) { return s + c.tokens.length; }, 0) / chunks.length
: 1;
const k1 = 1.5;
const b = 0.75;
let score = 0;
for (let qi = 0; qi < queryTokens.length; qi++) {
const qt = queryTokens[qi];
let freq = 0;
for (let ti = 0; ti < chunk.tokens.length; ti++) {
if (chunk.tokens[ti] === qt) freq++;
}
const termIDF = idf.get(qt) || 0;
const tf = (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * (docLen / avgDocLen)));
score += termIDF * tf;
}
return score;
}
function searchChunks(query: string, topN: number): { chunk: Chunk; score: number }[] {
const queryTokens = tokenize(query);
const scored: { chunk: Chunk; score: number }[] = [];
for (let ci = 0; ci < chunks.length; ci++) {
const chunk = chunks[ci];
const score = bm25Score(queryTokens, chunk);
if (score > 0) scored.push({ chunk: chunk, score: score });
}
scored.sort(function(a, b) { return b.score - a.score; });
return scored.slice(0, topN);
}
function readFileSyncSafe(filePath: string): string {
try {
return fs.readFileSync(filePath, "utf-8");
} catch {
return "";
}
}
function loadDirectorySync(dirPath: string): { name: string; content: string }[] {
const results: { name: string; content: string }[] = [];
let entries: string[];
try {
entries = fs.readdirSync(dirPath);
} catch {
return [];
}
for (let ei = 0; ei < entries.length; ei++) {
const entry = entries[ei];
const fullPath = path.join(dirPath, entry);
let entryStat: fs.Stats;
try {
entryStat = fs.statSync(fullPath);
} catch {
continue;
}
if (entryStat.isDirectory()) {
if (entry === "node_modules" || entry === ".git" || entry === "dist" || entry === ".lmstudio") continue;
const subResults = loadDirectorySync(fullPath);
for (let si = 0; si < subResults.length; si++) {
subResults[si].name = path.join(entry, subResults[si].name);
}
for (let si = 0; si < subResults.length; si++) {
results.push(subResults[si]);
}
} else {
const ext = path.extname(entry).toLowerCase();
const supportedExts = [".txt", ".md", ".json", ".log", ".csv", ".js", ".ts", ".py", ".jsx", ".tsx", ".html", ".css", ".yaml", ".yml", ".toml", ".xml"];
if (supportedExts.indexOf(ext) !== -1) {
try {
const content = readFileSyncSafe(fullPath);
if (content.trim().length > 0) {
results.push({ name: entry, content: content });
}
} catch {
// skip
}
}
}
}
return results;
}
function formatSearchResults(results: { chunk: Chunk; score: number }[], query: string): string {
if (results.length === 0) {
return "No relevant documents found for \"" + query + "\".\n\nTry loading more documents or rephrasing your query.";
}
let output = "Found " + results.length + " relevant document fragment(s) for \"" + query + "\":\n\n";
for (let i = 0; i < results.length; i++) {
const chunk = results[i].chunk;
const score = results[i].score;
output += "## Fragment " + (i + 1) + " (Source: " + chunk.source + ", Relevance: " + score.toFixed(2) + ")\n\n";
output += chunk.text;
output += "\n\n---\n\n";
}
return output;
}
function formatDocumentList(): string {
if (documents.length === 0) {
return "No documents loaded. Use `rag_load_documents` to load files from a directory.";
}
let output = "## Loaded Documents (" + documents.length + " files, " + chunks.length + " fragments)\n\n";
for (let i = 0; i < documents.length; i++) {
const doc = documents[i];
output += "- **" + doc.name + "** (" + doc.chunks.length + " fragments, " + doc.content.length + " chars)\n";
}
return output;
}
export async function toolsProvider(ctl: ToolsProviderController): Promise<any[]> {
const tools: any[] = [];
tools.push(rawFunctionTool({
name: "rag_load_documents",
description: "Load documents from a directory into the RAG knowledge base. Supported types: .txt, .md, .json, .log, .csv, .js, .ts, .py, .jsx, .tsx, .html, .css, .yaml, .yml, .toml, .xml. Parameters: directory (required), chunk_size (default 500), overlap (default 100).",
parametersJsonSchema: {
type: "object",
properties: {
directory: { type: "string", description: "Path to directory with documents" },
chunk_size: { type: "number", description: "Words per fragment (default 500)" },
overlap: { type: "number", description: "Overlap between fragments in words (default 100)" }
},
required: ["directory"]
},
implementation: async function(params: Record<string, unknown>) {
try {
const directory = String(params.directory || "").trim();
const chunkSize = Number(params.chunk_size) || 500;
const overlap = Number(params.overlap) || 100;
if (!directory) return "Please provide a directory path";
if (!fs.existsSync(directory)) return "Directory not found: " + directory;
const files = loadDirectorySync(directory);
if (files.length === 0) {
return "No supported files found in \"" + directory + "\". Supported types: .txt, .md, .json, .log, .csv, .js, .ts, .py, .jsx, .tsx, .html, .css, .yaml, .yml, .toml, .xml";
}
documents = [];
chunks = [];
idfCache = null;
let totalChars = 0;
let totalChunks = 0;
for (let fi = 0; fi < files.length; fi++) {
const file = files[fi];
const docChunks = chunkText(file.content, file.name, chunkSize, overlap);
documents.push({
name: file.name,
path: path.join(directory, file.name),
content: file.content,
chunks: docChunks,
});
for (let ci = 0; ci < docChunks.length; ci++) {
chunks.push(docChunks[ci]);
}
totalChars += file.content.length;
totalChunks += docChunks.length;
}
return "Loaded " + documents.length + " document(s) into RAG knowledge base.\n\nFiles: " + documents.map(function(d) { return d.name; }).join(", ") + "\nTotal fragments: " + totalChunks + "\nTotal characters: " + totalChars.toLocaleString() + "\nChunk size: " + chunkSize + " words, overlap: " + overlap + " words\n\nUse rag_search to search these documents.";
} catch (e: any) {
return "Error loading documents: " + (e && e.message ? e.message : String(e));
}
},
}));
tools.push(rawFunctionTool({
name: "rag_search",
description: "Search the loaded RAG knowledge base for relevant information. Use after rag_load_documents. Parameters: query (required), top_n (default 5).",
parametersJsonSchema: {
type: "object",
properties: {
query: { type: "string", description: "What to search for in the documents" },
top_n: { type: "number", description: "Number of results (default 5)" }
},
required: ["query"]
},
implementation: async function(params: Record<string, unknown>) {
try {
const query = String(params.query || "").trim();
const topN = Number(params.top_n) || 5;
if (!query) return "Please provide a search query";
if (chunks.length === 0) return "No documents loaded. Use rag_load_documents first to load files.";
const results = searchChunks(query, topN);
return formatSearchResults(results, query);
} catch (e: any) {
return "Error searching documents: " + (e && e.message ? e.message : String(e));
}
},
}));
tools.push(rawFunctionTool({
name: "rag_list_documents",
description: "List all documents currently loaded in the RAG knowledge base.",
parametersJsonSchema: {
type: "object",
properties: {},
required: []
},
implementation: async function() {
return formatDocumentList();
},
}));
tools.push(rawFunctionTool({
name: "rag_clear",
description: "Clear all documents from the RAG knowledge base.",
parametersJsonSchema: {
type: "object",
properties: {},
required: []
},
implementation: async function() {
const count = documents.length;
documents = [];
chunks = [];
idfCache = null;
return "Cleared " + count + " document(s) from RAG knowledge base.";
},
}));
return tools;
}