Project Files
src / helpers / embeddingCapabilityPrimer.ts
/**
* Embedding Capability Primer
*
* Adapted from draw-things-chat's visionCapabilityPrimer.ts
*
* Problem: Semantic search requires an embedding model to be loaded in LM Studio.
* Users may not have one loaded, or may want to use a local model.
*
* Solution:
* 1. Check if configured embedding model is already loaded (fast API check)
* 2. If not, check if model is installed locally
* 3. Offer to load it, or guide user with helpful instructions
* 4. Support graceful degradation (keyword-only search as fallback)
*
* Key difference from visionCapabilityPrimer:
* - Vision primer loads a SMALL model CPU-only (priming UI capabilities)
* - Embedding primer may load a LARGER model with GPU (for actual work)
* - Embedding models are typically smaller, but still need proper loading
*/
import { exec } from "child_process";
import { promisify } from "util";
import path from "path";
import os from "os";
const execAsync = promisify(exec);
export interface EmbeddingPrimerConfig {
/** Embedding model identifier (from config) */
modelId: string;
/** LM Studio base URL */
baseUrl: string;
/** Whether to attempt auto-load (default: false, just check and guide) */
autoLoad?: boolean;
/** GPU mode for auto-load: "off", "max", or 0-1 for partial */
gpuMode?: "off" | "max" | number;
/** TTL in seconds for auto-loaded model (default: no TTL) */
ttlSeconds?: number;
}
export interface EmbeddingPrimerResult {
/** Is the embedding model ready to use? */
ready: boolean;
/** Configured model identifier */
modelId: string;
/** Is the model loaded via API? */
isLoaded: boolean;
/** Is the model installed locally? (only checked if not loaded) */
isInstalled?: boolean;
/** Was the model auto-loaded by us? */
wasAutoLoaded?: boolean;
/** List of OTHER embedding models currently loaded (alternatives) */
availableEmbeddingModels?: string[];
/** User-facing message (info, warning, or error) */
userMessage?: string;
/** Message severity */
messageSeverity?: "info" | "warning" | "error";
/** Internal error */
error?: string;
/** Should semantic search be disabled? */
disableSemantic?: boolean;
}
/**
* Find the lms CLI path (same as visionCapabilityPrimer)
* All exec calls have timeouts to prevent blocking.
*/
export async function findLmsCli(): Promise<string | null> {
const CLI_CHECK_TIMEOUT_MS = 3000; // 3s max per candidate
const candidates = [
path.join(os.homedir(), ".lmstudio", "bin", "lms"),
"/usr/local/bin/lms",
"/opt/homebrew/bin/lms",
];
for (const candidate of candidates) {
try {
await execAsync(`"${candidate}" -h`, { timeout: CLI_CHECK_TIMEOUT_MS });
return candidate;
} catch {
// Not found or timed out, try next
}
}
// Try PATH
try {
const { stdout } = await execAsync("which lms", { timeout: CLI_CHECK_TIMEOUT_MS });
const lmsPath = stdout.trim();
if (lmsPath) {
return lmsPath;
}
} catch {
// Not in PATH or timed out
}
return null;
}
/**
* Check if a model is installed locally using `lms ls --json`
*/
async function isModelInstalled(
lmsCli: string,
modelId: string
): Promise<{ installed: boolean; modelKey?: string }> {
try {
const { stdout } = await execAsync(`"${lmsCli}" ls --json`, { timeout: 10000 });
const models = JSON.parse(stdout);
if (!Array.isArray(models)) {
return { installed: false };
}
// Search for model by ID (can be partial match, e.g., "e5-large")
const normalizedSearch = modelId.toLowerCase();
const found = models.find((m: any) => {
const key = String(m?.modelKey || "").toLowerCase();
const displayName = String(m?.displayName || "").toLowerCase();
return (
key.includes(normalizedSearch) ||
displayName.includes(normalizedSearch) ||
key.includes("embed") && displayName.includes(normalizedSearch.replace("text-embedding-", ""))
);
});
return found
? { installed: true, modelKey: found.modelKey }
: { installed: false };
} catch (e) {
console.warn("[EmbeddingPrimer] Failed to check installed models:", (e as Error)?.message);
return { installed: false };
}
}
/**
* Get list of currently loaded models from LM Studio API
*
* FAST: 2s timeout to avoid blocking when LM Studio is not running.
* Returns empty arrays on any failure (graceful degradation).
*/
async function getLoadedModels(baseUrl: string): Promise<{
embeddingModels: string[];
allModels: string[];
reachable: boolean;
}> {
try {
const response = await fetch(`${baseUrl}/v1/models`, {
method: "GET",
signal: AbortSignal.timeout(2000), // 2s - fast fail if LM Studio not running
});
if (!response.ok) {
return { embeddingModels: [], allModels: [], reachable: true }; // Server reachable but error
}
const data = await response.json();
const models: Array<{ id: string; type?: string }> = data.data || [];
const allModels = models.map((m) => m.id);
const embeddingModels = models
.filter(
(m) =>
m.type === "embedding" ||
m.id.toLowerCase().includes("embed")
)
.map((m) => m.id);
return { embeddingModels, allModels, reachable: true };
} catch (e) {
// Connection refused, timeout, etc. - LM Studio probably not running
console.warn("[EmbeddingPrimer] LM Studio API not reachable:", (e as Error)?.message);
return { embeddingModels: [], allModels: [], reachable: false };
}
}
/**
* Load an embedding model via lms CLI
*/
async function loadModelViaCli(
lmsCli: string,
modelKey: string,
options: { gpuMode?: "off" | "max" | number; ttlSeconds?: number; identifier?: string }
): Promise<{ ok: boolean; error?: string }> {
const { gpuMode = "max", ttlSeconds, identifier } = options;
const gpuArg =
gpuMode === "off"
? "--gpu off"
: gpuMode === "max"
? "--gpu max"
: `--gpu ${gpuMode}`;
const parts = [
`"${lmsCli}"`,
"load",
modelKey,
gpuArg,
];
if (ttlSeconds) {
parts.push(`--ttl ${ttlSeconds}`);
}
if (identifier) {
parts.push(`--identifier "${identifier}"`);
}
const cmd = parts.join(" ");
console.log("[EmbeddingPrimer] Running:", cmd);
try {
await execAsync(cmd, { timeout: 120_000 }); // 2 min timeout for large models
return { ok: true };
} catch (e: any) {
return { ok: false, error: e?.stderr || e?.message || String(e) };
}
}
/**
* Check embedding model availability and provide guidance
*
* This is the main entry point - call this when semantic search is enabled.
*
* PERFORMANCE: Designed to fail fast when LM Studio is not running.
* - API check: 2s timeout
* - CLI checks: Only if API is reachable but no model loaded
* - Total worst case: ~5s (not 27s!)
*/
export async function checkEmbeddingCapability(
config: EmbeddingPrimerConfig
): Promise<EmbeddingPrimerResult> {
const { modelId, baseUrl, autoLoad = false, gpuMode = "max", ttlSeconds } = config;
console.log(`[EmbeddingPrimer] Checking capability for model: ${modelId}`);
// Step 1: Check if the configured model (or any embedding model) is loaded
// This is FAST (2s timeout) - will fail quickly if LM Studio not running
const { embeddingModels, allModels, reachable } = await getLoadedModels(baseUrl);
// If LM Studio API is not reachable, skip all CLI checks (they'd be pointless)
// and return immediately with graceful degradation
if (!reachable) {
console.log(`[EmbeddingPrimer] LM Studio not reachable, skipping CLI checks`);
return {
ready: false,
modelId,
isLoaded: false,
disableSemantic: true,
userMessage: `**Semantic Search Unavailable:**\n\nCannot reach LM Studio API at \`${baseUrl}\`.\n\n**Fallback:** Using keyword-only search.\n\n**To enable semantic search:**\n1. Start LM Studio\n2. Load an embedding model (e.g., \`nomic-embed-text\`, \`e5-large\`)`,
messageSeverity: "warning",
error: "LM Studio API not reachable",
};
}
// Check if our specific model is loaded
const normalizedModelId = modelId.toLowerCase();
const isExactModelLoaded = embeddingModels.some(
(m) => m.toLowerCase() === normalizedModelId
);
if (isExactModelLoaded) {
console.log(`[EmbeddingPrimer] ✓ Model loaded: ${modelId}`);
return {
ready: true,
modelId,
isLoaded: true,
availableEmbeddingModels: embeddingModels,
};
}
// Check if a SIMILAR embedding model is loaded (user might have loaded a variant)
const similarModel = embeddingModels.find((m) =>
m.toLowerCase().includes(normalizedModelId.replace("text-embedding-", ""))
);
if (similarModel) {
console.log(`[EmbeddingPrimer] ✓ Similar model loaded: ${similarModel} (configured: ${modelId})`);
return {
ready: true,
modelId: similarModel, // Return the actually loaded model
isLoaded: true,
availableEmbeddingModels: embeddingModels,
userMessage: `Using loaded embedding model: ${similarModel}\n(Configured: ${modelId})`,
messageSeverity: "info",
};
}
// No matching embedding model loaded
console.log(`[EmbeddingPrimer] Model not loaded: ${modelId}`);
// If OTHER embedding models are loaded, offer to use one of them
if (embeddingModels.length > 0) {
return {
ready: true, // Can still use the loaded model
modelId: embeddingModels[0], // Use first available
isLoaded: true,
availableEmbeddingModels: embeddingModels,
userMessage: `**Semantic Search:**\n\nConfigured model \`${modelId}\` is not loaded.\n\nUsing available model: \`${embeddingModels[0]}\`\n\nTo use your configured model, load it in LM Studio.`,
messageSeverity: "info",
};
}
// No embedding models loaded at all, but LM Studio IS running
// Step 2: Check if model is installed locally (slower, but user can benefit from guidance)
const lmsCli = await findLmsCli();
if (!lmsCli) {
return {
ready: false,
modelId,
isLoaded: false,
disableSemantic: true,
userMessage: `**Semantic Search Unavailable:**\n\nNo embedding model is loaded and \`lms\` CLI not found.\n\n**Fallback:** Using keyword-only search.\n\n**To enable semantic search:**\n1. Load an embedding model in LM Studio\n2. Common models: \`nomic-embed-text\`, \`e5-large\`, \`bge-large\``,
messageSeverity: "warning",
};
}
const { installed, modelKey } = await isModelInstalled(lmsCli, modelId);
if (!installed) {
return {
ready: false,
modelId,
isLoaded: false,
isInstalled: false,
disableSemantic: true,
userMessage: `**Semantic Search Unavailable:**\n\nNo embedding model loaded, and \`${modelId}\` is not installed locally.\n\n**Fallback:** Using keyword-only search.\n\n**To enable semantic search:**\n1. Open LM Studio → Discover → Search for embedding models\n2. Download one (e.g., \`nomic-embed-text\`, \`e5-large\`)\n3. Load it in the "Developer" tab or via \`lms load\``,
messageSeverity: "warning",
};
}
// Model is installed but not loaded
console.log(`[EmbeddingPrimer] Model installed but not loaded: ${modelKey}`);
if (autoLoad && modelKey) {
// Attempt to auto-load
console.log(`[EmbeddingPrimer] Attempting auto-load: ${modelKey}`);
const loadResult = await loadModelViaCli(lmsCli, modelKey, {
gpuMode,
ttlSeconds,
identifier: "semantic-search-embedding",
});
if (loadResult.ok) {
// Verify it's now loaded
const { embeddingModels: newEmbeddings } = await getLoadedModels(baseUrl);
if (newEmbeddings.length > 0) {
return {
ready: true,
modelId: newEmbeddings[0],
isLoaded: true,
isInstalled: true,
wasAutoLoaded: true,
availableEmbeddingModels: newEmbeddings,
userMessage: `**Semantic Search Enabled:**\n\nAuto-loaded embedding model: \`${newEmbeddings[0]}\``,
messageSeverity: "info",
};
}
} else {
console.warn(`[EmbeddingPrimer] Auto-load failed: ${loadResult.error}`);
}
}
// Model installed but not loaded, no auto-load
return {
ready: false,
modelId,
isLoaded: false,
isInstalled: true,
disableSemantic: true,
userMessage: `**Semantic Search Unavailable:**\n\nEmbedding model \`${modelKey || modelId}\` is installed but not loaded.\n\n**Fallback:** Using keyword-only search.\n\n**To enable:**\n\`\`\`bash\nlms load ${modelKey || modelId}\n\`\`\`\n\nOr load it via LM Studio → Developer tab.`,
messageSeverity: "warning",
};
}
/**
* Quick availability check (for tool execution)
* Returns true if ANY embedding model is loaded
*/
export async function isEmbeddingAvailable(baseUrl: string): Promise<boolean> {
const { embeddingModels } = await getLoadedModels(baseUrl);
return embeddingModels.length > 0;
}
/**
* Get the first available embedding model
*/
export async function getAvailableEmbeddingModel(baseUrl: string): Promise<string | null> {
const { embeddingModels } = await getLoadedModels(baseUrl);
return embeddingModels[0] || null;
}