src / profiles.ts
import { type Role } from "./roleManager";
interface ProfileRole {
modelId: string;
description: string;
}
interface Profile {
name: string;
description: string;
useCase: string;
roles: Partial<Record<Role, ProfileRole>>;
swapMode: boolean;
notes: string;
}
// Profiles built from user's downloaded models (MODELOS.TXT, May 2026)
// System: 16GB VRAM + 32GB RAM
// Models excluded: mimo-v2.5 (310B, too large), deepseek-v4-flash-fp4-fp8-ssd (SSD-only, no VRAM fit)
// Manual set_role candidates: deepseek-r1-distill-llama-70b, hermes-4.3-36b, google/gemma-4-31b,
// deepseek-coder-v2-lite-instruct, deepseek-r1-distill-qwen-14b, plutus-meta-llama-3.1-8b-instruct-bnb
export const PROFILES: Record<string, Profile> = {
reasoning: {
name: "reasoning",
description: "Deep reasoning and complex analysis",
useCase: "Complex multi-step reasoning, scientific analysis, logic chains, strategic planning",
swapMode: true,
roles: {
planner: {
modelId: "qwen3.6-35b-a3b-kimi-k2.6-reasoning-distilled",
description: "Qwen3.6-35B-A3B MoE (3B active) + Kimi K2.6 reasoning distillation. Best reasoning distillation locally available. Fast inference with frontier-level chain-of-thought.",
},
executor: {
modelId: "deepseek-r1-0528-qwen3-8b",
description: "DeepSeek-R1-0528 distilled into Qwen3-8B. Fast (25-40 tok/s) with extended chain-of-thought. Matches Qwen3-235B on AIME 2024.",
},
},
notes: "Swap mode ON — both models are large enough that sequential loading is safer. Planner loads first, reasons through the problem, then executor carries out each step.",
},
coding: {
name: "coding",
description: "Software development and code generation",
useCase: "App development, code review, architecture design, debugging, Flutter/Python/JS/TS projects",
swapMode: true,
roles: {
planner: {
modelId: "qwen/qwen3-coder-next",
description: "Qwen3-Coder-Next — latest generation Qwen3 coding model. Handles repository-level reasoning, architecture decisions, and multi-file planning.",
},
executor: {
modelId: "qwen/qwen3-coder-30b",
description: "Qwen3-Coder-30B dense. Specialized code generation across all major languages. Executes individual coding steps produced by the planner.",
},
},
notes: "Swap mode ON. Both are dedicated coding models from the same Qwen3-Coder family — planner (next-gen) designs, executor (30B) writes. Strong SWE-bench performance.",
},
fast: {
name: "fast",
description: "Maximum speed for quick tasks",
useCase: "Quick answers, simple code generation, text editing, short tasks where speed matters more than depth",
swapMode: false,
roles: {
planner: {
modelId: "google/gemma-4-e4b",
description: "Gemma 4 Efficient 4B. Google's compact multimodal model. Fits entirely in VRAM alongside the executor. Strong instruction following at 60-90 tok/s.",
},
executor: {
modelId: "mistralai/ministral-3-3b",
description: "Ministral 3B. Mistral's smallest instruct model. Ultra-fast (80-120 tok/s). Excellent for direct short-form output: edits, rewrites, quick answers.",
},
},
notes: "Swap mode OFF — Gemma-4-E4B (4B) + Ministral-3B (3B) stay loaded simultaneously in 16GB VRAM. Combined ~7B parameters, near-instant responses.",
},
research: {
name: "research",
description: "Document analysis with semantic search",
useCase: "Searching indexed documents, answering questions from codebases, analyzing reports, RAG workflows",
swapMode: true,
roles: {
planner: {
modelId: "nemotron-3-super-64b-a12b-math-reap",
description: "Nvidia Nemotron-3 Super 64B MoE (12B active parameters) with math+reasoning post-training. Exceptional at analytical decomposition and structured document queries.",
},
executor: {
modelId: "qwen/qwen3.6-35b-a3b",
description: "Qwen3.6-35B-A3B MoE hybrid (3B active). Fast synthesis of retrieved document chunks into coherent, well-structured answers.",
},
},
notes: "Swap mode ON. Embedder role must be set separately with a dedicated embedding model (e.g. nomic-embed-text-v1.5 or bge-m3). Use index_document + search_documents tools for RAG workflow.",
},
};
export function getProfile(name: string): Profile | undefined {
return PROFILES[name.toLowerCase()];
}
export function listProfiles(): Array<{ name: string; description: string; useCase: string; planner: string | null; executor: string | null; swapMode: boolean }> {
return Object.values(PROFILES).map(p => ({
name: p.name,
description: p.description,
useCase: p.useCase,
planner: p.roles.planner?.modelId ?? null,
executor: p.roles.executor?.modelId ?? null,
swapMode: p.swapMode,
}));
}