Forked from vadimfedenko/analyze-images
src / toolsProvider.ts
import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk";
import { readdir, stat } from "fs/promises";
import { basename, dirname, extname, isAbsolute, join, normalize, relative } from "path";
import { z } from "zod";
import { globalConfigSchematics } from "./config";
const IMAGE_EXTENSIONS = new Set([
".jpg",
".jpeg",
".png",
".webp",
".gif",
".bmp",
".tiff",
".tif",
".avif",
]);
const ANALYSIS_TOKENS_SOFT_TARGET = 512;
const MAX_LIST_RESULTS_DEFAULT = 50;
const MAX_LIST_RESULTS_HARD_CAP = 500;
type FoundImage = {
relativePath: string;
absolutePath: string;
sizeBytes: number;
modifiedAt: string;
};
type ResolvedSettings = {
workspaceDirectory: string;
visionModelKey: string;
maxAnalysisTokens: number;
};
export async function toolsProvider(ctl: ToolsProviderController): Promise<Tool[]> {
const listLocalImagesTool = tool({
name: "list_local_images",
description:
"List image files available in the configured workspace directory. " +
"Supported extensions: jpg, jpeg, png, webp, gif, bmp, tiff, tif, avif. " +
"Returns relative paths, sizes, and modified timestamps. " +
"Call this BEFORE analyze_local_image to discover valid image names.",
parameters: {
recursive: z
.boolean()
.optional()
.describe("Recurse into subdirectories. Defaults to true."),
maxResults: z
.number()
.int()
.positive()
.optional()
.describe(
`Maximum number of images to return. Defaults to ${MAX_LIST_RESULTS_DEFAULT}, hard-capped at ${MAX_LIST_RESULTS_HARD_CAP}.`,
),
},
implementation: async ({ recursive, maxResults }, { status, warn }) => {
const settings = readSettings(ctl);
const workspaceError = await assertWorkspaceUsable(settings.workspaceDirectory);
if (workspaceError) return workspaceError;
const effectiveRecursive = recursive ?? true;
const effectiveMaxResults = Math.min(
maxResults ?? MAX_LIST_RESULTS_DEFAULT,
MAX_LIST_RESULTS_HARD_CAP,
);
status(
`Scanning images in ${settings.workspaceDirectory} (recursive=${effectiveRecursive})...`,
);
const images = await collectImages(
settings.workspaceDirectory,
effectiveRecursive,
effectiveMaxResults,
warn,
);
if (images.length === 0) {
return `No images found in ${settings.workspaceDirectory}.`;
}
return {
workspaceDirectory: settings.workspaceDirectory,
count: images.length,
images: images.map((image) => ({
name: basename(image.relativePath),
relativePath: image.relativePath,
sizeBytes: image.sizeBytes,
modifiedAt: image.modifiedAt,
})),
};
},
});
const analyzeLocalImageTool = tool({
name: "analyze_local_image",
description:
"Analyze one local image from the configured workspace directory using a vision-language model. " +
"First call list_local_images to discover available file names, then pass the chosen name here.",
parameters: {
imageName: z
.string()
.describe(
"Image file name or relative path from list_local_images, e.g. '1774334299591-9-thumb.webp'.",
),
prompt: z
.string()
.describe("Required analysis task/question. Keep it clear and specific."),
context: z
.string()
.describe(
"Required known context for this image and task (source, intent, constraints, known facts, prior findings).",
),
},
implementation: async ({ imageName, prompt, context }, { status, warn, signal }) => {
const settings = readSettings(ctl);
const workspaceError = await assertWorkspaceUsable(settings.workspaceDirectory);
if (workspaceError) return workspaceError;
if (!settings.visionModelKey) {
return "Error: visionModelKey is not configured. Set it in plugin Global Settings.";
}
const safeImageName = sanitizeRelativeInput(imageName);
if (!safeImageName) {
return "Error: imageName is empty or invalid.";
}
const resolvedImagePath = await resolveImagePathByName(
settings.workspaceDirectory,
safeImageName,
warn,
);
if (!resolvedImagePath) {
return `Error: image not found in ${settings.workspaceDirectory}: ${safeImageName}`;
}
status(`Loading vision model '${settings.visionModelKey}'...`);
let model;
try {
model = await ctl.client.llm.model(settings.visionModelKey);
} catch (error: any) {
return `Error: failed to load vision model '${settings.visionModelKey}': ${error?.message || String(error)}`;
}
// `vision` is a field on the model info, NOT on the model handle itself.
// Newer SDK exposes `model.getInfo()`, older SDK uses `model.getModelInfo()`.
// Probe at runtime so this works regardless of which @lmstudio/sdk version
// your plugin runtime bundled.
const modelInfo = await readModelInfo(model, warn);
if (modelInfo && !modelInfo.vision) {
return `Error: model '${settings.visionModelKey}' does not support vision. Pick a VLM in plugin settings.`;
}
// If modelInfo could not be read (very old/very new SDK), we skip the
// guard and let model.respond() surface the real error if any.
status("Preparing image for multimodal model...");
const fileHandle = await ctl.client.files.prepareImage(resolvedImagePath.absolutePath);
const userPrompt = prompt.trim();
const userContext = context.trim();
const analysisPrompt =
`You are a vision assistant. Analyze the provided image and give a concise final answer. ` +
`Do not provide hidden reasoning or step-by-step chain-of-thought. ` +
`If uncertain, state uncertainty briefly. ` +
`Target up to ${ANALYSIS_TOKENS_SOFT_TARGET} tokens in the final answer.\n\n` +
`Known context:\n${userContext}\n\n` +
`User request:\n${userPrompt}`;
status("Running multimodal analysis...");
const result = await model.respond(
[
{
role: "user",
content: analysisPrompt,
images: [fileHandle],
},
],
{
// Newer SDK (>= ~1.4) uses `maxPredictedTokens`. If you're on an older
// SDK and TS complains, rename this key to `maxTokens`.
maxPredictedTokens: settings.maxAnalysisTokens,
signal,
} as any,
);
return result.content;
},
});
return [listLocalImagesTool, analyzeLocalImageTool];
}
function readSettings(ctl: ToolsProviderController): ResolvedSettings {
const cfg = ctl.getGlobalPluginConfig(globalConfigSchematics);
return {
workspaceDirectory: (cfg.get("workspaceDirectory") ?? "").trim(),
visionModelKey: (cfg.get("visionModelKey") ?? "").trim(),
maxAnalysisTokens: cfg.get("maxAnalysisTokens") ?? 2048,
};
}
async function readModelInfo(
model: any,
warn: (text: string) => void,
): Promise<{ vision?: boolean; modelKey?: string } | null> {
try {
if (typeof model?.getInfo === "function") {
return await model.getInfo();
}
if (typeof model?.getModelInfo === "function") {
return await model.getModelInfo();
}
warn("Model handle exposes neither getInfo() nor getModelInfo(); skipping vision capability check.");
return null;
} catch (error: any) {
warn(`Failed to read model info: ${error?.message || String(error)}`);
return null;
}
}
async function assertWorkspaceUsable(workspaceDirectory: string): Promise<string | null> {
if (!workspaceDirectory) {
return "Error: workspaceDirectory is not configured. Set it in plugin Global Settings.";
}
if (!isAbsolute(workspaceDirectory)) {
return `Error: workspaceDirectory must be an absolute path. Got: '${workspaceDirectory}'.`;
}
const info = await stat(workspaceDirectory).catch(() => null);
if (!info) {
return `Error: workspace directory does not exist: ${workspaceDirectory}`;
}
if (!info.isDirectory()) {
return `Error: workspace path is not a directory: ${workspaceDirectory}`;
}
return null;
}
async function collectImages(
directoryPath: string,
recursive: boolean,
maxResults: number,
warn: (text: string) => void,
): Promise<FoundImage[]> {
const found: FoundImage[] = [];
const queue: string[] = [directoryPath];
const root = directoryPath;
while (queue.length > 0 && found.length < maxResults) {
const current = queue.shift() as string;
let entries: Array<{ name: string; isFile: () => boolean; isDirectory: () => boolean }>;
try {
entries = await readdir(current, { withFileTypes: true });
} catch (error: any) {
warn(`Cannot read directory '${current}': ${error?.message || String(error)}`);
continue;
}
for (const entry of entries) {
if (found.length >= maxResults) break;
const absolutePath = join(current, entry.name);
if (entry.isDirectory()) {
if (recursive) {
queue.push(absolutePath);
}
continue;
}
if (!entry.isFile()) continue;
if (!isImagePath(entry.name)) continue;
try {
const metadata = await stat(absolutePath);
found.push({
absolutePath,
relativePath: normalize(relative(root, absolutePath)).replace(/\\/g, "/"),
sizeBytes: metadata.size,
modifiedAt: metadata.mtime.toISOString(),
});
} catch (error: any) {
warn(`Cannot stat file '${absolutePath}': ${error?.message || String(error)}`);
}
}
}
return found;
}
function isImagePath(value: string): boolean {
return IMAGE_EXTENSIONS.has(extname(value).toLowerCase());
}
function sanitizeRelativeInput(input?: string): string | null {
if (!input) return null;
const trimmed = input.trim();
if (!trimmed) return null;
if (isAbsolute(trimmed)) return null;
const normalized = normalize(trimmed).replace(/\\/g, "/").replace(/^\.\/+/, "");
if (!normalized || normalized.startsWith("../") || normalized.includes("/../")) return null;
return normalized;
}
async function resolveImagePathByName(
workspaceDirectory: string,
imageName: string,
warn: (text: string) => void,
): Promise<{ absolutePath: string; relativePath: string } | null> {
// 1) Try workspace root first: <workspaceDirectory>/<imageName>
const directAbsolutePath = join(workspaceDirectory, imageName);
const directStats = await stat(directAbsolutePath).catch(() => null);
if (directStats?.isFile() && isImagePath(imageName)) {
return await preferFullImageIfThumb(directAbsolutePath, imageName);
}
// 2) Fallback: recursive basename match across all images
const allImages = await collectImages(workspaceDirectory, true, 1000, warn);
const targetBasename = basename(imageName).toLowerCase();
const matched = allImages.find(
(item) => basename(item.relativePath).toLowerCase() === targetBasename,
);
if (!matched) return null;
return await preferFullImageIfThumb(matched.absolutePath, matched.relativePath);
}
async function preferFullImageIfThumb(
absolutePath: string,
relativePath: string,
): Promise<{ absolutePath: string; relativePath: string }> {
const fileName = basename(relativePath);
const thumbMatch = fileName.match(/^(.*)-thumb\.webp$/i);
if (!thumbMatch) {
return { absolutePath, relativePath };
}
const baseNameWithoutThumb = thumbMatch[1];
const parentDirAbsolute = dirname(absolutePath);
const parentDirRelative = dirname(relativePath).replace(/\\/g, "/");
const candidateExtensions = [
".png",
".jpg",
".jpeg",
".webp",
".gif",
".bmp",
".tiff",
".tif",
".avif",
];
for (const extension of candidateExtensions) {
const candidateFileName = `${baseNameWithoutThumb}${extension}`;
const candidateAbsolutePath = join(parentDirAbsolute, candidateFileName);
const candidateStats = await stat(candidateAbsolutePath).catch(() => null);
if (!candidateStats?.isFile()) continue;
if (!isImagePath(candidateFileName)) continue;
const candidateRelativePath =
parentDirRelative === "." ? candidateFileName : `${parentDirRelative}/${candidateFileName}`;
return {
absolutePath: candidateAbsolutePath,
relativePath: candidateRelativePath,
};
}
return { absolutePath, relativePath };
}