Project Files
src / helpers / visionCapabilityPrimer.ts
import fs from "fs";
import path from "path";
import { exec } from "child_process";
import { promisify } from "util";
const execAsync = promisify(exec);
/**
* Resolve the core vision primer model key at call time.
*
* Local minimal model used only to open LM Studio's attachment UI when the
* productive Qwen3-VL backend is remote or not configured as an LM Studio key.
*/
const CORE_VISION_PRIMER_MODEL_KEY = "qwen/qwen3-vl-4b";
function resolveCoreVisionPrimerModelKey(): string {
return CORE_VISION_PRIMER_MODEL_KEY;
}
export const VISION_PRIMER_IDENTIFIER = "vision-capability-priming";
function isLocalLmApiBaseUrl(baseUrl: string | undefined | null): boolean {
const raw = String(baseUrl || "").trim();
if (!raw) return false;
try {
const host = new URL(raw).hostname.toLowerCase();
return host === "localhost" || host === "127.0.0.1" || host === "::1";
} catch {
return false;
}
}
export interface UserDocsVisionPrimerPolicyInput {
qwen3VlModelPath: string;
embeddingBaseUrl: string;
embeddingApiKey?: string;
}
export interface UserDocsVisionPrimerPolicy {
primerModelKey: string;
exactModelKey?: string;
contextLength: number;
gpuMode: "off" | "max";
gpuOffloadRatio?: number;
reason: string;
}
export function isFilesystemModelPath(value: string | undefined | null): boolean {
const raw = String(value || "").trim();
if (!raw) return false;
if (raw.startsWith("~") || raw.startsWith("./") || raw.startsWith("../")) return true;
if (path.isAbsolute(raw)) return true;
try {
return fs.existsSync(raw);
} catch {
return false;
}
}
export function resolveUserDocsVisionPrimerPolicy(
input: UserDocsVisionPrimerPolicyInput
): UserDocsVisionPrimerPolicy {
const qwen3VlModelPath = String(input.qwen3VlModelPath || "").trim();
const embeddingBaseUrl = String(input.embeddingBaseUrl || "").trim();
console.debug("[UserDocsVisionPrimer] Resolving policy...", {
qwen3VlModelPath,
embeddingBaseUrl,
});
if (!qwen3VlModelPath || isFilesystemModelPath(qwen3VlModelPath)) {
const result = {
primerModelKey: resolveCoreVisionPrimerModelKey(),
contextLength: 512,
gpuMode: "off" as const,
gpuOffloadRatio: 0,
reason: "qwen3VlModelPath is empty or a filesystem path. Filesystem paths are not supported for user-docs Vision API inference; using Core attachment primer behavior.",
};
console.debug("[UserDocsVisionPrimer] Policy → 4B:", result.reason);
return result;
}
if (!isLocalLmApiBaseUrl(embeddingBaseUrl)) {
const result = {
primerModelKey: resolveCoreVisionPrimerModelKey(),
contextLength: 512,
gpuMode: "off" as const,
gpuOffloadRatio: 0,
reason: "embeddingBaseUrl is remote; using Core local attachment primer behavior.",
};
console.debug("[UserDocsVisionPrimer] Policy → 4B:", result.reason);
return result;
}
const result = {
primerModelKey: qwen3VlModelPath,
exactModelKey: qwen3VlModelPath,
contextLength: 8192,
gpuMode: "max" as const,
gpuOffloadRatio: 1,
reason: "embeddingBaseUrl is local and qwen3VlModelPath is a model key; using qwen3VlModelPath as the CLI-loaded attachment primer.",
};
console.debug("[UserDocsVisionPrimer] Policy → custom:", result.primerModelKey);
return result;
}
/**
* Returns true if the given identifier is confirmed to be loaded with expectedModelKey.
* Returns false if: wrong model detected, no model-key field available, or any error.
* Callers treat false as "reload needed".
*/
export async function isIdentifierLoadedWithModel(
lmsCli: string,
identifier: string,
expectedModelKey: string,
timeoutMs = 5000
): Promise<boolean> {
const normalizedId = identifier.toLowerCase().trim();
const normalizedExpected = expectedModelKey.toLowerCase().trim();
console.debug("[UserDocsVisionPrimer] isIdentifierLoadedWithModel check...", {
identifier,
expectedModelKey,
});
try {
const { stdout } = await execAsync(`"${lmsCli}" ps --json`, { timeout: timeoutMs });
let instances: any[];
try {
instances = JSON.parse(stdout);
} catch {
console.debug("[UserDocsVisionPrimer] Failed to parse lms ps output");
return false;
}
if (!Array.isArray(instances)) {
console.debug("[UserDocsVisionPrimer] lms ps output is not an array");
return false;
}
const inst = instances.find(
(i: any) => String(i?.identifier || "").toLowerCase().trim() === normalizedId
);
if (!inst) {
console.debug("[UserDocsVisionPrimer] No instance found with identifier:", identifier);
return false;
}
// Collect every field the CLI might use for the model path/key.
const candidates = [inst?.path, inst?.modelKey, inst?.model, inst?.loadedModelKey]
.map((v: any) => String(v || "").toLowerCase().trim())
.filter((v: string) => v.length > 0);
console.debug("[UserDocsVisionPrimer] Instance found, candidates:", candidates);
if (candidates.length === 0) {
// No model-key field in lms ps output — cannot confirm → treat as wrong, trigger reload.
console.debug("[UserDocsVisionPrimer] lms ps instance has no model-key field; assuming wrong model");
return false;
}
const matchResult = candidates.some(
(c: string) => c.includes(normalizedExpected) || normalizedExpected.includes(c)
);
console.debug("[UserDocsVisionPrimer] Match result:", matchResult, {
expected: normalizedExpected,
candidates,
});
return matchResult;
} catch (e: any) {
console.warn("[UserDocsVisionPrimer] isIdentifierLoadedWithModel error:", e.message || e);
return false;
}
}
// ---------------------------------------------------------------------------
// Imports from core-bundle (for runUserDocsVisionPrimer)
// ---------------------------------------------------------------------------
import {
checkVisionPrimerStatus,
loadVisionPrimerModel,
unloadVisionPrimer,
injectLastUsedModelIntoNewestChat,
getPluginMeta,
} from "../core-bundle.mjs";
/** Configuration for runUserDocsVisionPrimer */
export interface UserDocsVisionPrimerRunConfig {
baseUrl: string;
apiKey: string;
primerPolicy: UserDocsVisionPrimerPolicy;
pluginId?: string;
}
/** Result of the full user-docs vision primer run */
export interface UserDocsVisionPrimerRunResult {
ok: boolean;
alreadyLoaded?: boolean;
loadFailed?: boolean;
notInstalled?: boolean;
infrastructureError?: boolean;
userFacingError?: string;
error?: string;
}
// ---------------------------------------------------------------------------
// runUserDocsVisionPrimer — complete primer orchestration for user-docs
// ---------------------------------------------------------------------------
/**
* Run the full user-docs vision primer workflow.
* Encapsulates: policy resolution → quick check → load/replace/inject logic.
* Call this from main() instead of inlining primer logic.
*/
export async function runUserDocsVisionPrimer(
config: UserDocsVisionPrimerRunConfig
): Promise<UserDocsVisionPrimerRunResult> {
const { baseUrl, apiKey, primerPolicy } = config;
const pluginId = config.pluginId ?? getPluginMeta().pluginIdentifier;
console.debug("[UserDocsVisionPrimer] === Starting Primer Run ===");
console.debug("[UserDocsVisionPrimer] Primer config:", {
modelKey: primerPolicy.primerModelKey,
identifier: VISION_PRIMER_IDENTIFIER,
});
const primerConfig = {
modelKey: primerPolicy.primerModelKey,
baseUrl,
apiKey,
contextLength: primerPolicy.contextLength,
gpuMode: primerPolicy.gpuMode,
ttlSeconds: 7200,
identifier: VISION_PRIMER_IDENTIFIER,
};
// Step 1: Quick status check (awaited — fast)
const quickCheck = await checkVisionPrimerStatus(primerConfig);
console.debug("[UserDocsVisionPrimer] Quick check result:", {
alreadyLoaded: quickCheck.alreadyLoaded,
needsLoad: quickCheck.needsLoad,
installed: quickCheck.installed,
infrastructureError: quickCheck.infrastructureError,
userFacingError: !!quickCheck.userFacingError,
});
// Step 2: Branch based on status
if (quickCheck.alreadyLoaded) {
console.debug("[UserDocsVisionPrimer] Branch: alreadyLoaded — checking if correct model");
const hasExactModelRequirement = primerPolicy.exactModelKey != null;
console.debug("[UserDocsVisionPrimer] Policy has exact model requirement:", hasExactModelRequirement, {
primerModelKey: primerPolicy.primerModelKey,
coreKey: resolveCoreVisionPrimerModelKey(),
});
// Verify loaded model matches policy — works in ALL directions (4B→8B, 8B→4B, any→any)
const keyMatch = await isIdentifierLoadedWithModel(
quickCheck.lmsCli!,
VISION_PRIMER_IDENTIFIER,
primerPolicy.primerModelKey
);
// Also check if loaded with minmal mode (contextLength ≤ 512 → gpuMode="off" proxy)
let minimalModeDetected = false;
if (!keyMatch && quickCheck.lmsCli != null) {
try {
const { stdout } = await execAsync(`"${quickCheck.lmsCli}" ps --json`, { timeout: 5000 });
const instances: any[] = JSON.parse(stdout);
const inst = instances.find(
(i: any) => String(i?.identifier || "").toLowerCase().trim() === VISION_PRIMER_IDENTIFIER.toLowerCase()
);
if (inst && typeof inst.contextLength === "number" && inst.contextLength <= 512) {
minimalModeDetected = true;
console.debug("[UserDocsVisionPrimer] Minimal mode detected on loaded instance:", { contextLength: inst.contextLength });
}
} catch (e: any) {
console.debug("[UserDocsVisionPrimer] Could not inspect contextLength for minimal mode check:", e.message || e);
}
}
const needsReplace = hasExactModelRequirement && quickCheck.lmsCli != null && (!keyMatch || minimalModeDetected);
console.debug("[UserDocsVisionPrimer] needsReplace:", needsReplace, { keyMatch, minimalModeDetected });
if (needsReplace) {
// Wrong model detected — unload + reload with verification (fire-and-forget)
console.debug(
`[UserDocsVisionPrimer] WRONG MODEL DETECTED — unloading to replace with '${primerPolicy.primerModelKey}'`
);
await unloadVisionPrimer(VISION_PRIMER_IDENTIFIER, {
baseUrl,
apiKey: apiKey || undefined,
});
const reloadPromise = loadVisionPrimerModel(quickCheck.lmsCli, primerConfig);
(globalThis as any).__dtc_visionPrimerPromise = reloadPromise;
reloadPromise
.then(async (loadResult) => {
(globalThis as any).__dtc_visionPrimerResult = loadResult;
if (loadResult.ok) {
console.debug(
`[UserDocsVisionPrimer] Replaced primer: ${loadResult.size} in ${loadResult.loadTimeSec}s`
);
// Verify the correct model is now loaded
const verified = quickCheck.lmsCli != null && await isIdentifierLoadedWithModel(
quickCheck.lmsCli,
VISION_PRIMER_IDENTIFIER,
primerPolicy.primerModelKey
);
console.debug("[UserDocsVisionPrimer] Post-replace verification:", { ok: verified, model: primerPolicy.primerModelKey });
await injectLastUsedModelIntoNewestChat({
modelKey: primerConfig.modelKey,
identifier: primerConfig.identifier,
contextLength: primerConfig.contextLength,
gpuOffloadRatio: primerPolicy.gpuOffloadRatio,
pluginId,
});
} else {
console.warn("[UserDocsVisionPrimer] Replace-load failed:", loadResult.error);
}
})
.catch((err: any) => {
console.warn("[UserDocsVisionPrimer] Unexpected replace-load error:", err?.message || err);
});
return { ok: true }; // fire-and-forget started successfully
} else if (hasExactModelRequirement) {
// Correct exact model already loaded — inject immediately
console.debug("[UserDocsVisionPrimer] Branch: correct exact model already loaded, injecting lastUsedModel");
await injectLastUsedModelIntoNewestChat({
modelKey: primerConfig.modelKey,
identifier: primerConfig.identifier,
contextLength: primerConfig.contextLength,
gpuOffloadRatio: primerPolicy.gpuOffloadRatio,
pluginId,
});
} else {
// No exact requirement (fallback to core 4B attachment mode) — whatever is loaded is fine
console.debug("[UserDocsVisionPrimer] Branch: no exact model requirement, injecting lastUsedModel with current state");
await injectLastUsedModelIntoNewestChat({
modelKey: primerConfig.modelKey,
identifier: primerConfig.identifier,
contextLength: primerConfig.contextLength,
gpuOffloadRatio: primerPolicy.gpuOffloadRatio,
pluginId,
});
}
(globalThis as any).__dtc_visionPrimerResult = { ok: true, alreadyLoaded: true };
return { ok: true, alreadyLoaded: true };
} else if (quickCheck.needsLoad) {
// Model installed but not loaded — fire-and-forget load
console.debug("[UserDocsVisionPrimer] Branch: needsLoad — starting fire-and-forget load for", primerConfig.modelKey);
const loadPromise = loadVisionPrimerModel(quickCheck.lmsCli, primerConfig);
(globalThis as any).__dtc_visionPrimerPromise = loadPromise;
loadPromise
.then(async (loadResult) => {
(globalThis as any).__dtc_visionPrimerResult = loadResult;
if (loadResult.ok) {
console.debug(
`[UserDocsVisionPrimer] Model loaded: ${loadResult.size} in ${loadResult.loadTimeSec}s`
);
await injectLastUsedModelIntoNewestChat({
modelKey: primerConfig.modelKey,
identifier: primerConfig.identifier,
contextLength: primerConfig.contextLength,
gpuOffloadRatio: primerPolicy.gpuOffloadRatio,
pluginId,
});
} else {
console.warn("[UserDocsVisionPrimer] Load failed:", loadResult.error);
}
})
.catch((err: any) => {
console.warn("[UserDocsVisionPrimer] Unexpected load error:", err?.message || err);
});
return { ok: true }; // fire-and-forget started successfully
} else if (quickCheck.userFacingError) {
console.warn("[UserDocsVisionPrimer] Branch: userFacingError —", quickCheck.error);
(globalThis as any).__dtc_visionPrimerResult = {
ok: false,
notInstalled: quickCheck.notInstalled,
userFacingError: quickCheck.userFacingError,
error: quickCheck.error,
};
return {
ok: false,
notInstalled: quickCheck.notInstalled,
error: quickCheck.error,
};
} else if (quickCheck.infrastructureError) {
console.warn("[UserDocsVisionPrimer] Branch: infrastructureError (silent):", quickCheck.error);
return {
ok: false,
infrastructureError: true,
error: quickCheck.error,
};
}
// Fallback — should not reach here
console.debug("[UserDocsVisionPrimer] No branch matched, returning neutral result");
return { ok: true };
}
// ---------------------------------------------------------------------------
// Lazy-init wrapper (called from orchestrator generate(), like ensureAgentModelLoaded)
// ---------------------------------------------------------------------------
/** Configuration for ensureUserDocsVisionPrimer */
export interface EnsureUserDocsVisionPrimerConfig {
baseUrl: string;
apiKey: string;
embeddingBaseUrl: string;
qwen3VlModelPath: string;
}
import { resolveGlobalConfigField } from "./globalConfigReader.js";
let userDocsVisionPrimerInitialized = false;
/**
* Lazy-init wrapper for the user-docs vision primer.
* Runs exactly once per session, guaranteed to have full SDK context (called from orchestrator).
* Returns immediately on subsequent calls.
*
* Uses globalConfigReader to resolve fresh values directly from JSON persistence file,
* since SDK cache may be stale when called early in generate() before tool invocations refresh it.
*/
export async function ensureUserDocsVisionPrimer(
config: EnsureUserDocsVisionPrimerConfig
): Promise<UserDocsVisionPrimerRunResult> {
if (userDocsVisionPrimerInitialized) {
console.debug("[UserDocsVisionPrimer] Already initialized, skipping");
const directResult = (globalThis as any).__dtc_visionPrimerResult;
if (directResult) return directResult as UserDocsVisionPrimerRunResult;
const pending = (globalThis as any).__dtc_visionPrimerPromise;
if (pending) return (await pending) as UserDocsVisionPrimerRunResult;
return { ok: true };
}
userDocsVisionPrimerInitialized = true;
// Resolve fresh values from JSON persistence file (ground truth), using SDK params as fallback.
const qwen3VlModelPath = resolveGlobalConfigField("qwen3VlModelPath", config.qwen3VlModelPath);
const embeddingBaseUrl = resolveGlobalConfigField("embeddingBaseUrl", config.embeddingBaseUrl);
const baseUrl = resolveGlobalConfigField("baseUrl", config.baseUrl);
const apiKey = resolveGlobalConfigField("apiKey", config.apiKey);
console.debug("[UserDocsVisionPrimer] Resolved values:", {
qwen3VlModelPath,
embeddingBaseUrl,
baseUrl: baseUrl ? "[set]" : "[empty]",
apiKey: apiKey ? "[set]" : "[empty]",
});
const primerPolicy = resolveUserDocsVisionPrimerPolicy({
qwen3VlModelPath: String(qwen3VlModelPath || ""),
embeddingBaseUrl: String(embeddingBaseUrl || ""),
});
return await runUserDocsVisionPrimer({
baseUrl: String(baseUrl || config.baseUrl),
apiKey: String(apiKey || config.apiKey),
primerPolicy,
});
}