Project Files
src
index.ts
profiles.ts
roleManager.ts
toolsProvider.ts
vectorStore.ts
manifest.json
package-lock.json
package.json
tsconfig.json
src / toolsProvider.ts
import { text, tool, type Tool, type ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";
import { readFile } from "fs/promises";
import { homedir } from "os";
import { join } from "path";
import { RoleManager } from "./roleManager";
import { VectorStore } from "./vectorStore";
import { getProfile, listProfiles } from "./profiles";

export async function toolsProvider(ctl: ToolsProviderController) {
  // Fixed storage path — never call getWorkingDirectory() at load time
  const storageDir = join(homedir(), ".multi-role");
  const roles = new RoleManager(ctl.client, storageDir);
  const store = new VectorStore(storageDir);

  await roles.loadConfig();
  await store.load();

  // Lazy working directory — only resolved when a tool actually needs it
  const getWorkDir = (): string => {
    try { return ctl.getWorkingDirectory(); } catch { return storageDir; }
  };

  const tools: Tool[] = [];

  // ─── PROFILES ───────────────────────────────────────────────────────────────

  tools.push(tool({
    name: "list_profiles",
    description: text`
      Lists all available pre-built profiles with their recommended model assignments.
      Profiles are optimized combinations of models for specific use cases:
        - "reasoning": deep analysis and complex multi-step reasoning
        - "coding": software development and code generation
        - "fast": maximum speed for quick tasks
        - "research": document analysis with semantic search (RAG)
      Use load_profile to activate a profile.
    `,
    parameters: {},
    implementation: async () => {
      return { profiles: listProfiles() };
    },
  }));

  tools.push(tool({
    name: "load_profile",
    description: text`
      Activates a pre-built profile by assigning the recommended models to each role.
      Available profiles: "reasoning", "coding", "fast", "research"
      This automatically calls set_role for each role in the profile and configures swap mode.
      After loading, you can still override individual roles with set_role if needed.
      The profile configuration is saved to disk and persists across sessions.
    `,
    parameters: {
      profile: z.enum(["reasoning", "coding", "fast", "research"]).describe("Name of the profile to load"),
    },
    implementation: async ({ profile }) => {
      const p = getProfile(profile);
      if (!p) return { success: false, error: `Profile "${profile}" not found.` };

      roles.setSwapMode(p.swapMode);

      const assigned: Record<string, string> = {};
      for (const [role, cfg] of Object.entries(p.roles)) {
        if (cfg) {
          roles.setRole(role as any, cfg.modelId);
          assigned[role] = cfg.modelId;
        }
      }

      await roles.saveConfig();

      return {
        success: true,
        profile: p.name,
        description: p.description,
        useCase: p.useCase,
        assigned,
        swapMode: p.swapMode,
        notes: p.notes,
      };
    },
  }));

  // ─── ROLE MANAGEMENT ────────────────────────────────────────────────────────

  tools.push(tool({
    name: "set_role",
    description: text`
      Assigns a model loaded in LM Studio to a role.
      Roles available:
        - "planner": powerful model for breaking down complex tasks into steps
        - "executor": fast model for executing individual steps and generating output
        - "embedder": embedding model for indexing and searching documents
      The modelId must match exactly the model identifier shown in LM Studio.
      Optionally override the default system prompt for that role.
      Call this before using ask_planner, ask_executor, or search_documents.
    `,
    parameters: {
      role: z.enum(["planner", "executor", "embedder"]).describe("Role to assign the model to"),
      modelId: z.string().describe("Exact model identifier as shown in LM Studio (e.g. 'qwen2.5-32b-instruct')"),
      systemPrompt: z.string().optional().describe("Custom system prompt for this role. Leave empty to use the default."),
    },
    implementation: async ({ role, modelId, systemPrompt }) => {
      roles.setRole(role, modelId, systemPrompt);
      await roles.saveConfig();
      return { success: true, role, modelId, message: `Role "${role}" assigned to model "${modelId}"` };
    },
  }));

  tools.push(tool({
    name: "get_roles",
    description: text`
      Returns the current role assignments: which model is assigned to planner, executor, and embedder.
      Also shows whether swap mode is enabled.
      Use this to check the current configuration before running tasks.
    `,
    parameters: {},
    implementation: async () => {
      const allRoles = roles.getAllRoles();
      return {
        swapMode: roles.isSwapMode(),
        swapModeDescription: roles.isSwapMode()
          ? "Enabled — models are loaded/unloaded one at a time (saves VRAM, slower)"
          : "Disabled — models stay loaded in parallel (faster, uses more VRAM)",
        roles: {
          planner: allRoles.planner ?? null,
          executor: allRoles.executor ?? null,
          embedder: allRoles.embedder ?? null,
        },
      };
    },
  }));

  tools.push(tool({
    name: "set_swap_mode",
    description: text`
      Enables or disables swap mode.
      Swap mode ON (default): unloads the current role model before loading the next one.
        Use when VRAM is limited or models are large (13B+).
      Swap mode OFF: keeps all role models loaded simultaneously.
        Use when you have enough VRAM and want faster responses.
    `,
    parameters: {
      enabled: z.boolean().describe("true to enable swap mode, false to disable it"),
    },
    implementation: async ({ enabled }) => {
      roles.setSwapMode(enabled);
      await roles.saveConfig();
      return { swapMode: enabled, message: `Swap mode ${enabled ? "enabled" : "disabled"}` };
    },
  }));

  // ─── PLANNER ────────────────────────────────────────────────────────────────

  tools.push(tool({
    name: "ask_planner",
    description: text`
      Sends a complex task to the Planner model and returns a detailed step-by-step plan.
      Use this for tasks that require strategic thinking, architecture decisions,
      or breaking down complex problems before execution.
      The planner model should be the most capable model available.
      After getting the plan, use ask_executor for each step.
      Requires the "planner" role to be set via set_role first.
    `,
    parameters: {
      task: z.string().describe("The complex task or goal to plan. Be as descriptive as possible."),
      context: z.string().optional().describe("Any additional context, constraints, or requirements for the task."),
    },
    implementation: async ({ task, context }) => {
      try {
        const fullTask = context ? `${task}\n\nConstraints/Context:\n${context}` : task;
        const plan = await roles.callLLM("planner", fullTask);
        return { success: true, role: "planner", task, plan };
      } catch (err: any) {
        return { success: false, error: err?.message ?? String(err) };
      }
    },
  }));

  // ─── EXECUTOR ───────────────────────────────────────────────────────────────

  tools.push(tool({
    name: "ask_executor",
    description: text`
      Sends a single concrete step or task to the Executor model for direct execution.
      Use this for well-defined tasks: writing code, generating text, transforming data,
      answering specific questions, or any task that doesn't need further planning.
      The executor model should be fast and efficient.
      Requires the "executor" role to be set via set_role first.
    `,
    parameters: {
      task: z.string().describe("The specific step or task to execute. Should be concrete and actionable."),
      context: z.string().optional().describe("Previous output or context the executor needs to complete this step."),
    },
    implementation: async ({ task, context }) => {
      try {
        const result = await roles.callLLM("executor", task, context);
        return { success: true, role: "executor", task, result };
      } catch (err: any) {
        return { success: false, error: err?.message ?? String(err) };
      }
    },
  }));

  tools.push(tool({
    name: "plan_and_execute",
    description: text`
      Full pipeline: sends the task to the Planner first, then automatically passes
      each step to the Executor and returns all results.
      Use this for complex tasks where you want the full planner→executor flow in one call.
      Requires both "planner" and "executor" roles to be set.
    `,
    parameters: {
      task: z.string().describe("The high-level task to plan and execute end to end."),
      context: z.string().optional().describe("Additional context or constraints for the task."),
    },
    implementation: async ({ task, context }) => {
      try {
        const fullTask = context ? `${task}\n\nConstraints/Context:\n${context}` : task;
        const plan = await roles.callLLM("planner", fullTask);

        const stepLines = plan
          .split("\n")
          .filter(line => /^\s*\d+[\.\)]\s+/.test(line))
          .map(line => line.replace(/^\s*\d+[\.\)]\s+/, "").trim())
          .filter(s => s.length > 0);

        if (stepLines.length === 0) {
          const result = await roles.callLLM("executor", plan);
          return { success: true, plan, steps: [], finalResult: result };
        }

        const results: Array<{ step: number; task: string; result: string }> = [];
        let previousOutput = "";

        for (let i = 0; i < stepLines.length; i++) {
          const stepResult = await roles.callLLM("executor", stepLines[i], previousOutput || undefined);
          results.push({ step: i + 1, task: stepLines[i], result: stepResult });
          previousOutput = stepResult;
        }

        return { success: true, plan, steps: results };
      } catch (err: any) {
        return { success: false, error: err?.message ?? String(err) };
      }
    },
  }));

  // ─── EMBEDDER / DOCUMENT SEARCH ─────────────────────────────────────────────

  tools.push(tool({
    name: "index_document",
    description: text`
      Indexes a text or file into the vector store so it can be searched later with search_documents.
      Accepts either raw text or a file path (relative to the working directory).
      Large documents are automatically split into chunks for better search precision.
      Requires the "embedder" role to be set via set_role first.
    `,
    parameters: {
      source: z.string().describe("A label for this document (e.g. 'API docs', 'readme.md', 'meeting notes')"),
      text: z.string().optional().describe("Raw text content to index"),
      filePath: z.string().optional().describe("Relative path to a text file to read and index"),
      chunkSize: z.number().optional().describe("Characters per chunk (default 800). Smaller = more precise search."),
    },
    implementation: async ({ source, text: rawText, filePath, chunkSize }) => {
      try {
        let content = rawText ?? "";

        if (filePath) {
          const workingDir = getWorkDir();
          const abs = join(workingDir, filePath);
          content = await readFile(abs, "utf-8");
        }

        if (!content.trim()) {
          return { success: false, error: "No content provided. Use 'text' or 'filePath'." };
        }

        const size = chunkSize ?? 800;
        const overlap = Math.floor(size * 0.15);
        const chunks: string[] = [];

        for (let i = 0; i < content.length; i += size - overlap) {
          const chunk = content.slice(i, i + size).trim();
          if (chunk.length > 50) chunks.push(chunk);
        }

        const ids: string[] = [];
        for (const chunk of chunks) {
          const embedding = await roles.callEmbedder(chunk);
          const id = await store.add(chunk, source, embedding);
          ids.push(id);
        }

        return { success: true, source, chunks: chunks.length, ids, totalChars: content.length };
      } catch (err: any) {
        return { success: false, error: err?.message ?? String(err) };
      }
    },
  }));

  tools.push(tool({
    name: "search_documents",
    description: text`
      Searches the indexed documents by semantic similarity to the query.
      Returns the most relevant chunks and optionally generates an answer using the Executor model.
      Use this when the user asks questions about documents, notes, codebases, or any indexed content.
      Requires the "embedder" role to be set. Optionally uses "executor" to generate a final answer.
    `,
    parameters: {
      query: z.string().describe("The question or topic to search for in the indexed documents"),
      topK: z.number().optional().describe("Number of relevant chunks to retrieve (default 3)"),
      generateAnswer: z.boolean().optional().describe("If true, uses the Executor model to generate a synthesized answer from the results (default true)"),
    },
    implementation: async ({ query, topK, generateAnswer }) => {
      try {
        if (store.count() === 0) {
          return { success: false, error: "No documents indexed yet. Use index_document first." };
        }

        const queryEmbedding = await roles.callEmbedder(query);
        const results = store.search(queryEmbedding, topK ?? 3);

        if (results.length === 0) {
          return { success: true, query, results: [], answer: null };
        }

        const shouldGenerate = generateAnswer !== false;
        let answer: string | null = null;

        if (shouldGenerate && roles.getRole("executor")) {
          const context = results.map((r, i) => `[${i + 1}] (${r.source}, score: ${r.score.toFixed(3)})\n${r.text}`).join("\n\n");
          answer = await roles.callLLM("executor", query, context);
        }

        return {
          success: true,
          query,
          results: results.map(r => ({ id: r.id, source: r.source, score: r.score.toFixed(3), excerpt: r.text.slice(0, 200) + (r.text.length > 200 ? "..." : "") })),
          answer,
        };
      } catch (err: any) {
        return { success: false, error: err?.message ?? String(err) };
      }
    },
  }));

  tools.push(tool({
    name: "list_indexed_documents",
    description: text`
      Lists all documents currently indexed in the vector store.
      Shows document ID, source label, size, and when it was added.
    `,
    parameters: {},
    implementation: async () => {
      return { count: store.count(), documents: store.list() };
    },
  }));

  tools.push(tool({
    name: "remove_document",
    description: text`
      Removes a document chunk from the vector store by its ID.
      Use list_indexed_documents to find the ID of the document to remove.
    `,
    parameters: {
      id: z.string().describe("The document chunk ID to remove"),
    },
    implementation: async ({ id }) => {
      const removed = store.remove(id);
      if (removed) await store.save();
      return { success: removed, id, message: removed ? `Document ${id} removed` : `Document ${id} not found` };
    },
  }));

  return tools;
}