Forked from vadimfedenko/analyze-images
src / toolsProvider.ts
import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk";
import { readdir, stat } from "fs/promises";
import { basename, dirname, extname, isAbsolute, join, normalize, relative } from "path";
import { z } from "zod";
import { configSchematics } from "./config";
const IMAGE_EXTENSIONS = new Set([
".jpg",
".jpeg",
".png",
".webp",
".gif",
".bmp",
".tiff",
".tif",
".avif",
".heic",
".heif",
".jxl",
".svg",
".ico",
]);
type FoundImage = {
relativePath: string;
absolutePath: string;
sizeBytes: number;
modifiedAt: string;
};
export async function toolsProvider(ctl: ToolsProviderController): Promise<Tool[]> {
const cfg = ctl.getPluginConfig(configSchematics);
const maxTokens = (cfg.get("maxTokens") as number | undefined) ?? 2048;
const softTargetTokens = (cfg.get("softTargetTokens") as number | undefined) ?? 512;
const responseLanguage = (cfg.get("responseLanguage") as string | undefined)?.trim() ?? "";
const langInstruction = responseLanguage ? `\n\nRespond in ${responseLanguage}.` : "";
const listLocalImagesTool = tool({
name: "List Local Images",
description:
"Lists image files available in the working directory. " +
"Call this first to get valid filenames before calling Analyze Local Image.",
parameters: {
recursive: z
.boolean()
.optional()
.default(false)
.describe("If true, also lists images in subdirectories."),
maxResults: z
.number()
.int()
.min(1)
.max(500)
.optional()
.default(100)
.describe("Maximum number of images to return."),
},
implementation: async ({ recursive, maxResults }, { status, warn }) => {
status("Scanning working directory for images...");
const workingDirectory = ctl.getWorkingDirectory();
const images = await collectImages(workingDirectory, recursive ?? false, maxResults ?? 100, warn);
if (images.length === 0) {
return {
count: 0,
images: [],
hint: "No images found. Download images first using Visit Website or Download Images.",
};
}
return {
count: images.length,
images: images.map((img) => ({
name: img.relativePath,
sizeBytes: img.sizeBytes,
modifiedAt: img.modifiedAt,
})),
hint: "Pass the 'name' value to Analyze Local Image as 'imageName'.",
};
},
});
const analyzeLocalImageTool = tool({
name: "Analyze Local Image",
description:
"Allows you to analyze one local image from the working directory.",
parameters: {
imageName: z
.string()
.describe("Image file name from List Local Images, e.g. '1774334299591-9-thumb.webp'."),
prompt: z
.string()
.describe("Required analysis task/question. Keep it clear and specific."),
context: z
.string()
.describe("Required known context for this image and task (source, intent, constraints, known facts, prior findings)."),
},
implementation: async ({ imageName, prompt, context }, { status, warn }) => {
const workingDirectory = ctl.getWorkingDirectory();
const safeImageName = sanitizeRelativeInput(imageName);
if (!safeImageName) {
return "Error: imageName is empty or invalid.";
}
const resolvedImagePath = await resolveImagePathByName(workingDirectory, safeImageName, warn);
if (!resolvedImagePath) {
return `Error: image not found: ${safeImageName}`;
}
status("Preparing image for multimodal model...");
let model: Awaited<ReturnType<typeof ctl.client.llm.model>>;
try {
model = await ctl.client.llm.model();
} catch (error: any) {
return `Error: no model is currently loaded. Load a vision model in LM Studio and retry. (${error?.message ?? String(error)})`;
}
if (!model.vision) {
return (
`Error: the currently loaded model "${model.identifier ?? "unknown"}" does not support vision. ` +
"Unload it and load a multimodal vision model (e.g. LLaVA, Qwen2-VL, InternVL, Gemma3) and retry."
);
}
let fileHandle: Awaited<ReturnType<typeof ctl.client.files.prepareImage>>;
try {
fileHandle = await ctl.client.files.prepareImage(resolvedImagePath.absolutePath);
} catch (error: any) {
return `Error: failed to prepare image "${safeImageName}" for the model: ${error?.message ?? String(error)}`;
}
const userPrompt = prompt.trim();
const userContext = context.trim();
const analysisPrompt =
`You are a vision assistant. Analyze the provided image and give a concise final answer. ` +
`Do not provide hidden reasoning or step-by-step chain-of-thought. ` +
`If uncertain, state uncertainty briefly. ` +
`Target up to ${softTargetTokens} tokens in the final answer.\n\n` +
`Known context:\n${userContext}\n\n` +
`User request:\n${userPrompt}` +
langInstruction;
status("Running multimodal analysis...");
let result: Awaited<ReturnType<typeof model.respond>>;
try {
result = await model.respond(
[
{
role: "user",
content: analysisPrompt,
images: [fileHandle],
},
],
{ maxTokens },
);
} catch (error: any) {
return `Error: model inference failed: ${error?.message ?? String(error)}`;
}
return result.content;
},
});
const analyzeMultipleImagesTool = tool({
name: "Analyze Multiple Images",
description:
"Analyzes 2-8 local images in a single vision model call. " +
"Use for comparison, sequence analysis, or multi-image questions.",
parameters: {
imageNames: z
.array(z.string())
.min(2)
.max(8)
.describe("List of image file names from List Local Images."),
prompt: z
.string()
.describe("Required analysis task/question covering all images."),
context: z
.string()
.describe("Required known context (source, intent, constraints, prior findings)."),
},
implementation: async ({ imageNames, prompt, context }, { status, warn }) => {
const workingDirectory = ctl.getWorkingDirectory();
status("Resolving image paths...");
const resolvedPaths: Array<{ absolutePath: string; relativePath: string }> = [];
for (const name of imageNames) {
const safe = sanitizeRelativeInput(name);
if (!safe) {
warn(`Skipping invalid image name: ${name}`);
continue;
}
const resolved = await resolveImagePathByName(workingDirectory, safe, warn);
if (!resolved) {
warn(`Image not found, skipping: ${name}`);
continue;
}
resolvedPaths.push(resolved);
}
if (resolvedPaths.length === 0) {
return "Error: none of the provided image names could be resolved.";
}
let model: Awaited<ReturnType<typeof ctl.client.llm.model>>;
try {
model = await ctl.client.llm.model();
} catch (error: any) {
return `Error: no model is currently loaded. Load a vision model in LM Studio and retry. (${error?.message ?? String(error)})`;
}
if (!model.vision) {
return (
`Error: the currently loaded model "${model.identifier ?? "unknown"}" does not support vision. ` +
"Unload it and load a multimodal vision model (e.g. LLaVA, Qwen2-VL, InternVL, Gemma3) and retry."
);
}
status(`Preparing ${resolvedPaths.length} images for multimodal model...`);
let fileHandles: Awaited<ReturnType<typeof ctl.client.files.prepareImage>>[];
try {
fileHandles = await Promise.all(
resolvedPaths.map((p) => ctl.client.files.prepareImage(p.absolutePath)),
);
} catch (error: any) {
return `Error: failed to prepare images for the model: ${error?.message ?? String(error)}`;
}
const analysisPrompt =
`You are a vision assistant. Analyze all provided images and give a concise final answer. ` +
`Do not provide hidden reasoning or step-by-step chain-of-thought. ` +
`If uncertain, state uncertainty briefly. ` +
`Target up to ${softTargetTokens} tokens in the final answer.\n\n` +
`Images provided: ${resolvedPaths.map((p, i) => `[${i + 1}] ${p.relativePath}`).join(", ")}\n\n` +
`Known context:\n${context.trim()}\n\n` +
`User request:\n${prompt.trim()}` +
langInstruction;
status("Running multimodal analysis...");
let result: Awaited<ReturnType<typeof model.respond>>;
try {
result = await model.respond(
[
{
role: "user",
content: analysisPrompt,
images: fileHandles,
},
],
{ maxTokens },
);
} catch (error: any) {
return `Error: model inference failed: ${error?.message ?? String(error)}`;
}
return result.content;
},
});
return [listLocalImagesTool, analyzeLocalImageTool, analyzeMultipleImagesTool];
}
async function collectImages(
directoryPath: string,
recursive: boolean,
maxResults: number,
warn: (text: string) => void,
): Promise<FoundImage[]> {
const found: FoundImage[] = [];
const queue: string[] = [directoryPath];
const root = directoryPath;
while (queue.length > 0 && found.length < maxResults) {
const current = queue.shift() as string;
let entries: Array<{ name: string; isFile: () => boolean; isDirectory: () => boolean }>;
try {
entries = await readdir(current, { withFileTypes: true });
} catch (error: any) {
warn(`Cannot read directory '${current}': ${error?.message || String(error)}`);
continue;
}
for (const entry of entries) {
if (found.length >= maxResults) break;
const absolutePath = join(current, entry.name);
if (entry.isDirectory()) {
if (recursive) {
queue.push(absolutePath);
}
continue;
}
if (!entry.isFile()) continue;
if (!isImagePath(entry.name)) continue;
try {
const metadata = await stat(absolutePath);
found.push({
absolutePath,
relativePath: normalize(relative(root, absolutePath)).replace(/\\/g, "/"),
sizeBytes: metadata.size,
modifiedAt: metadata.mtime.toISOString(),
});
} catch (error: any) {
warn(`Cannot stat file '${absolutePath}': ${error?.message || String(error)}`);
}
}
}
return found;
}
function isImagePath(value: string): boolean {
return IMAGE_EXTENSIONS.has(extname(value).toLowerCase());
}
function sanitizeRelativeInput(input?: string): string | null {
if (!input) return null;
const trimmed = input.trim();
if (!trimmed) return null;
if (isAbsolute(trimmed)) return null;
const normalized = normalize(trimmed).replace(/\\/g, "/").replace(/^\.\/+/, "");
if (!normalized || normalized.startsWith("../") || normalized.includes("/../")) return null;
return normalized;
}
async function resolveImagePathByName(
workingDirectory: string,
imageName: string,
warn: (text: string) => void,
): Promise<{ absolutePath: string; relativePath: string } | null> {
// 1) Try working-directory root first: <workingDirectory>/<imageName>
const directAbsolutePath = join(workingDirectory, imageName);
const directStats = await stat(directAbsolutePath).catch(() => null);
if (directStats?.isFile() && isImagePath(imageName)) {
return await preferFullImageIfThumb(directAbsolutePath, imageName);
}
// 2) Fallback: recursive basename match across all images
const allImages = await collectImages(workingDirectory, true, 1000, warn);
const targetBasename = basename(imageName).toLowerCase();
const matched = allImages.find((item) => basename(item.relativePath).toLowerCase() === targetBasename);
if (!matched) return null;
return await preferFullImageIfThumb(matched.absolutePath, matched.relativePath);
}
async function preferFullImageIfThumb(
absolutePath: string,
relativePath: string,
): Promise<{ absolutePath: string; relativePath: string }> {
const fileName = basename(relativePath);
const thumbMatch = fileName.match(/^(.*)-thumb\.webp$/i);
if (!thumbMatch) {
return { absolutePath, relativePath };
}
const baseNameWithoutThumb = thumbMatch[1];
const parentDirAbsolute = dirname(absolutePath);
const parentDirRelative = dirname(relativePath).replace(/\\/g, "/");
const candidateExtensions = [
".png",
".jpg",
".jpeg",
".webp",
".gif",
".bmp",
".tiff",
".tif",
".avif",
".heic",
".heif",
".jxl",
".svg",
".ico",
];
type Candidate = { absolutePath: string; relativePath: string; sizeBytes: number };
const candidates: Candidate[] = [];
for (const extension of candidateExtensions) {
const candidateFileName = `${baseNameWithoutThumb}${extension}`;
const candidateAbsolutePath = join(parentDirAbsolute, candidateFileName);
const candidateStats = await stat(candidateAbsolutePath).catch(() => null);
if (!candidateStats?.isFile()) continue;
if (!isImagePath(candidateFileName)) continue;
const candidateRelativePath =
parentDirRelative === "." ? candidateFileName : `${parentDirRelative}/${candidateFileName}`;
candidates.push({
absolutePath: candidateAbsolutePath,
relativePath: candidateRelativePath,
sizeBytes: candidateStats.size,
});
}
if (candidates.length === 0) return { absolutePath, relativePath };
candidates.sort((a, b) => b.sizeBytes - a.sizeBytes);
const best = candidates[0];
return { absolutePath: best.absolutePath, relativePath: best.relativePath };
}