Project Files
src / helpers / embeddingCapabilityPrimer.ts
/**
* Embedding Capability Primer
* Adapted from draw-things-index/src/helpers/embeddingCapabilityPrimer.ts
*
* Lazy-checks whether an embedding model is available in LM Studio before
* attempting semantic search. Result is cached for 30 s to avoid hammering
* the API on every tool call.
*/
import { exec } from "node:child_process";
import { promisify } from "node:util";
import path from "node:path";
import os from "node:os";
const execAsync = promisify(exec);
export interface EmbeddingPrimerConfig {
modelId: string;
baseUrl: string;
autoLoad?: boolean;
gpuMode?: "off" | "max" | number;
ttlSeconds?: number;
}
export interface EmbeddingPrimerResult {
ready: boolean;
modelId: string;
isLoaded: boolean;
isInstalled?: boolean;
wasAutoLoaded?: boolean;
availableEmbeddingModels?: string[];
userMessage?: string;
messageSeverity?: "info" | "warning" | "error";
error?: string;
disableSemantic?: boolean;
}
// ─── helpers ────────────────────────────────────────────────────────────────
export async function findLmsCli(): Promise<string | null> {
const TIMEOUT = 3_000;
const candidates = [
path.join(os.homedir(), ".lmstudio", "bin", "lms"),
"/usr/local/bin/lms",
"/opt/homebrew/bin/lms",
];
for (const c of candidates) {
try {
await execAsync(`"${c}" -h`, { timeout: TIMEOUT });
return c;
} catch { /* try next */ }
}
try {
const { stdout } = await execAsync("which lms", { timeout: TIMEOUT });
const p = stdout.trim();
if (p) return p;
} catch { /* not in PATH */ }
return null;
}
async function isModelInstalled(
lmsCli: string,
modelId: string
): Promise<{ installed: boolean; modelKey?: string }> {
try {
const { stdout } = await execAsync(`"${lmsCli}" ls --json`, { timeout: 10_000 });
const models = JSON.parse(stdout);
if (!Array.isArray(models)) return { installed: false };
const q = modelId.toLowerCase();
const found = models.find((m: any) => {
const key = String(m?.modelKey ?? "").toLowerCase();
const name = String(m?.displayName ?? "").toLowerCase();
return key.includes(q) || name.includes(q) ||
(key.includes("embed") && name.includes(q.replace("text-embedding-", "")));
});
return found ? { installed: true, modelKey: found.modelKey } : { installed: false };
} catch {
return { installed: false };
}
}
async function getLoadedModels(baseUrl: string): Promise<{
embeddingModels: string[];
allModels: string[];
reachable: boolean;
}> {
try {
const response = await fetch(`${baseUrl}/v1/models`, {
method: "GET",
signal: AbortSignal.timeout(2_000),
});
if (!response.ok) return { embeddingModels: [], allModels: [], reachable: true };
const data = (await response.json()) as { data?: Array<{ id: string; type?: string }> };
const models = data.data ?? [];
const allModels = models.map((m) => m.id);
const embeddingModels = models
.filter((m) => m.type === "embedding" || m.id.toLowerCase().includes("embed"))
.map((m) => m.id);
return { embeddingModels, allModels, reachable: true };
} catch {
return { embeddingModels: [], allModels: [], reachable: false };
}
}
async function loadModelViaCli(
lmsCli: string,
modelKey: string,
opts: { gpuMode?: "off" | "max" | number; ttlSeconds?: number }
): Promise<{ ok: boolean; error?: string }> {
const gpu =
opts.gpuMode === "off" ? "--gpu off" :
opts.gpuMode === "max" ? "--gpu max" :
`--gpu ${opts.gpuMode ?? "max"}`;
const parts = [`"${lmsCli}"`, "load", modelKey, gpu];
if (opts.ttlSeconds) parts.push(`--ttl ${opts.ttlSeconds}`);
parts.push(`--identifier "playbook-embedding"`);
try {
await execAsync(parts.join(" "), { timeout: 120_000 });
return { ok: true };
} catch (e: any) {
return { ok: false, error: e?.stderr ?? e?.message ?? String(e) };
}
}
// ─── main export ────────────────────────────────────────────────────────────
export async function checkEmbeddingCapability(
config: EmbeddingPrimerConfig
): Promise<EmbeddingPrimerResult> {
const { modelId, baseUrl, autoLoad = false, gpuMode = "max", ttlSeconds } = config;
const { embeddingModels, reachable } = await getLoadedModels(baseUrl);
if (!reachable) {
return {
ready: false, modelId, isLoaded: false, disableSemantic: true,
userMessage:
`**Semantic Search Unavailable:**\n\nCannot reach LM Studio API at \`${baseUrl}\`.\n\n` +
`**Fallback:** Using keyword-only search.\n\n` +
`**To enable semantic search:**\n1. Start LM Studio\n2. Load an embedding model`,
messageSeverity: "warning",
error: "LM Studio API not reachable",
};
}
const norm = modelId.toLowerCase();
// Exact match
if (embeddingModels.some((m) => m.toLowerCase() === norm)) {
return { ready: true, modelId, isLoaded: true, availableEmbeddingModels: embeddingModels };
}
// Similar match (e.g. user loaded a variant)
const similar = embeddingModels.find((m) =>
m.toLowerCase().includes(norm.replace("text-embedding-", ""))
);
if (similar) {
return {
ready: true, modelId: similar, isLoaded: true,
availableEmbeddingModels: embeddingModels,
userMessage: `Using loaded embedding model: ${similar}\n(Configured: ${modelId})`,
messageSeverity: "info",
};
}
// A different embedding model is loaded — use it
if (embeddingModels.length > 0) {
return {
ready: true, modelId: embeddingModels[0], isLoaded: true,
availableEmbeddingModels: embeddingModels,
userMessage:
`**Semantic Search:** Configured model \`${modelId}\` is not loaded.\n` +
`Using available model: \`${embeddingModels[0]}\``,
messageSeverity: "info",
};
}
// Nothing loaded — check CLI
const lmsCli = await findLmsCli();
if (!lmsCli) {
return {
ready: false, modelId, isLoaded: false, disableSemantic: true,
userMessage:
`**Semantic Search Unavailable:**\n\nNo embedding model is loaded and \`lms\` CLI not found.\n\n` +
`**Fallback:** Using keyword-only search.\n\n` +
`**To enable:** Load an embedding model in LM Studio.`,
messageSeverity: "warning",
};
}
const { installed, modelKey } = await isModelInstalled(lmsCli, modelId);
if (!installed) {
return {
ready: false, modelId, isLoaded: false, isInstalled: false, disableSemantic: true,
userMessage:
`**Semantic Search Unavailable:**\n\nModel \`${modelId}\` is not installed locally.\n\n` +
`**Fallback:** Using keyword-only search.\n\n` +
`**To enable:** Download an embedding model in LM Studio → Discover.`,
messageSeverity: "warning",
};
}
if (autoLoad && modelKey) {
const res = await loadModelViaCli(lmsCli, modelKey, { gpuMode, ttlSeconds });
if (res.ok) {
const { embeddingModels: fresh } = await getLoadedModels(baseUrl);
if (fresh.length > 0) {
return {
ready: true, modelId: fresh[0], isLoaded: true, isInstalled: true,
wasAutoLoaded: true, availableEmbeddingModels: fresh,
userMessage: `**Semantic Search Enabled:** Auto-loaded \`${fresh[0]}\``,
messageSeverity: "info",
};
}
}
}
return {
ready: false, modelId, isLoaded: false, isInstalled: true, disableSemantic: true,
userMessage:
`**Semantic Search Unavailable:**\n\nModel \`${modelKey ?? modelId}\` is installed but not loaded.\n\n` +
`**Fallback:** Using keyword-only search.\n\n` +
`**To enable:**\n\`\`\`bash\nlms load ${modelKey ?? modelId}\n\`\`\``,
messageSeverity: "warning",
};
}
export async function isEmbeddingAvailable(baseUrl: string): Promise<boolean> {
const { embeddingModels } = await getLoadedModels(baseUrl);
return embeddingModels.length > 0;
}