Project Files
src
index.ts
profiles.ts
roleManager.ts
toolsProvider.ts
vectorStore.ts
manifest.json
package-lock.json
package.json
tsconfig.json
src / profiles.ts
import { type Role } from "./roleManager";

interface ProfileRole {
  modelId: string;
  description: string;
}

interface Profile {
  name: string;
  description: string;
  useCase: string;
  roles: Partial<Record<Role, ProfileRole>>;
  swapMode: boolean;
  notes: string;
}

// Profiles built from user's downloaded models (MODELOS.TXT, May 2026)
// System: 16GB VRAM + 32GB RAM
// Models excluded: mimo-v2.5 (310B, too large), deepseek-v4-flash-fp4-fp8-ssd (SSD-only, no VRAM fit)
// Manual set_role candidates: deepseek-r1-distill-llama-70b, hermes-4.3-36b, google/gemma-4-31b,
//   deepseek-coder-v2-lite-instruct, deepseek-r1-distill-qwen-14b, plutus-meta-llama-3.1-8b-instruct-bnb
export const PROFILES: Record<string, Profile> = {

  reasoning: {
    name: "reasoning",
    description: "Deep reasoning and complex analysis",
    useCase: "Complex multi-step reasoning, scientific analysis, logic chains, strategic planning",
    swapMode: true,
    roles: {
      planner: {
        modelId: "qwen3.6-35b-a3b-kimi-k2.6-reasoning-distilled",
        description: "Qwen3.6-35B-A3B MoE (3B active) + Kimi K2.6 reasoning distillation. Best reasoning distillation locally available. Fast inference with frontier-level chain-of-thought.",
      },
      executor: {
        modelId: "deepseek-r1-0528-qwen3-8b",
        description: "DeepSeek-R1-0528 distilled into Qwen3-8B. Fast (25-40 tok/s) with extended chain-of-thought. Matches Qwen3-235B on AIME 2024.",
      },
    },
    notes: "Swap mode ON — both models are large enough that sequential loading is safer. Planner loads first, reasons through the problem, then executor carries out each step.",
  },

  coding: {
    name: "coding",
    description: "Software development and code generation",
    useCase: "App development, code review, architecture design, debugging, Flutter/Python/JS/TS projects",
    swapMode: true,
    roles: {
      planner: {
        modelId: "qwen/qwen3-coder-next",
        description: "Qwen3-Coder-Next — latest generation Qwen3 coding model. Handles repository-level reasoning, architecture decisions, and multi-file planning.",
      },
      executor: {
        modelId: "qwen/qwen3-coder-30b",
        description: "Qwen3-Coder-30B dense. Specialized code generation across all major languages. Executes individual coding steps produced by the planner.",
      },
    },
    notes: "Swap mode ON. Both are dedicated coding models from the same Qwen3-Coder family — planner (next-gen) designs, executor (30B) writes. Strong SWE-bench performance.",
  },

  fast: {
    name: "fast",
    description: "Maximum speed for quick tasks",
    useCase: "Quick answers, simple code generation, text editing, short tasks where speed matters more than depth",
    swapMode: false,
    roles: {
      planner: {
        modelId: "google/gemma-4-e4b",
        description: "Gemma 4 Efficient 4B. Google's compact multimodal model. Fits entirely in VRAM alongside the executor. Strong instruction following at 60-90 tok/s.",
      },
      executor: {
        modelId: "mistralai/ministral-3-3b",
        description: "Ministral 3B. Mistral's smallest instruct model. Ultra-fast (80-120 tok/s). Excellent for direct short-form output: edits, rewrites, quick answers.",
      },
    },
    notes: "Swap mode OFF — Gemma-4-E4B (4B) + Ministral-3B (3B) stay loaded simultaneously in 16GB VRAM. Combined ~7B parameters, near-instant responses.",
  },

  research: {
    name: "research",
    description: "Document analysis with semantic search",
    useCase: "Searching indexed documents, answering questions from codebases, analyzing reports, RAG workflows",
    swapMode: true,
    roles: {
      planner: {
        modelId: "nemotron-3-super-64b-a12b-math-reap",
        description: "Nvidia Nemotron-3 Super 64B MoE (12B active parameters) with math+reasoning post-training. Exceptional at analytical decomposition and structured document queries.",
      },
      executor: {
        modelId: "qwen/qwen3.6-35b-a3b",
        description: "Qwen3.6-35B-A3B MoE hybrid (3B active). Fast synthesis of retrieved document chunks into coherent, well-structured answers.",
      },
    },
    notes: "Swap mode ON. Embedder role must be set separately with a dedicated embedding model (e.g. nomic-embed-text-v1.5 or bge-m3). Use index_document + search_documents tools for RAG workflow.",
  },
};

export function getProfile(name: string): Profile | undefined {
  return PROFILES[name.toLowerCase()];
}

export function listProfiles(): Array<{ name: string; description: string; useCase: string; planner: string | null; executor: string | null; swapMode: boolean }> {
  return Object.values(PROFILES).map(p => ({
    name: p.name,
    description: p.description,
    useCase: p.useCase,
    planner: p.roles.planner?.modelId ?? null,
    executor: p.roles.executor?.modelId ?? null,
    swapMode: p.swapMode,
  }));
}
multi-role