Project Files
src / vlm / analyzer.ts
/**
* VLM (Vision Language Model) Analyzer
* Analyzes images without embedded metadata using LM Studio VLM API
*/
import { readFile } from 'fs/promises';
import path from 'path';
import type { ParsedDocument } from '../types';
/**
* VLM analysis result
*/
export interface VLMAnalysisResult {
description: string;
suggestedTags?: string[];
detectedStyle?: string;
analyzedAt: string;
}
/**
* VLM Analyzer configuration
*/
export interface VLMConfig {
baseUrl: string;
model: string;
maxTokens?: number;
}
const DEFAULT_VLM_PROMPT = `Describe this AI-generated image in detail. Include:
- Main subject and composition
- Art style (photorealistic, anime, digital art, painting, etc.)
- Mood and atmosphere
- Colors and lighting
- Notable details or elements
Be concise but thorough. Format as a natural description that could be used for searching.`;
export class VLMAnalyzer {
private baseUrl: string;
private model: string;
private maxTokens: number;
constructor(config: VLMConfig) {
this.baseUrl = config.baseUrl.replace(/\/$/, '');
this.model = config.model;
this.maxTokens = config.maxTokens || 300;
}
/**
* Analyze an image and return a description
*/
async analyzeImage(imagePath: string, customPrompt?: string): Promise<VLMAnalysisResult> {
const imageBuffer = await readFile(imagePath);
const base64 = imageBuffer.toString('base64');
const mimeType = this.getMimeType(imagePath);
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: this.model,
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: customPrompt || DEFAULT_VLM_PROMPT,
},
{
type: 'image_url',
image_url: {
url: `data:${mimeType};base64,${base64}`,
},
},
],
},
],
max_tokens: this.maxTokens,
temperature: 0.3, // Lower for more consistent descriptions
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`VLM API error: ${response.status} - ${error}`);
}
const data = await response.json() as {
choices: Array<{ message: { content: string } }>;
};
const description = data.choices[0]?.message?.content || '';
return {
description,
analyzedAt: new Date().toISOString(),
};
}
/**
* Parse an image using VLM and return a ParsedDocument
*/
async parse(imagePath: string): Promise<ParsedDocument> {
try {
const result = await this.analyzeImage(imagePath);
return {
content: `VLM Analysis: ${result.description}\nImage: ${imagePath}`,
metadata: {
format: 'vlm-analyzed',
path: imagePath,
vlmDescription: result.description,
analyzedAt: result.analyzedAt,
source: 'vlm',
},
};
} catch (error) {
console.error(`VLM analysis failed for ${imagePath}:`, error);
return {
content: `Image: ${imagePath}\n(VLM analysis failed)`,
metadata: {
format: 'image',
path: imagePath,
hasMetadata: false,
vlmError: error instanceof Error ? error.message : 'Unknown error',
},
};
}
}
/**
* Check if VLM is available
*/
async isAvailable(): Promise<boolean> {
try {
const response = await fetch(`${this.baseUrl}/v1/models`, {
method: 'GET',
signal: AbortSignal.timeout(5000),
});
return response.ok;
} catch {
return false;
}
}
/**
* Get MIME type from file extension
*/
private getMimeType(filePath: string): string {
const ext = path.extname(filePath).toLowerCase();
switch (ext) {
case '.png':
return 'image/png';
case '.jpg':
case '.jpeg':
return 'image/jpeg';
case '.webp':
return 'image/webp';
case '.gif':
return 'image/gif';
default:
return 'image/png';
}
}
}
/**
* Batch VLM analyzer for processing multiple images
*/
export class VLMBatchAnalyzer {
private analyzer: VLMAnalyzer;
private concurrency: number;
constructor(config: VLMConfig, concurrency = 1) {
this.analyzer = new VLMAnalyzer(config);
this.concurrency = concurrency; // Usually 1 for local LM Studio
}
/**
* Analyze multiple images with progress reporting
*/
async analyzeImages(
imagePaths: string[],
onProgress?: (current: number, total: number, path: string) => void
): Promise<Map<string, VLMAnalysisResult | Error>> {
const results = new Map<string, VLMAnalysisResult | Error>();
for (let i = 0; i < imagePaths.length; i++) {
const imagePath = imagePaths[i];
onProgress?.(i + 1, imagePaths.length, imagePath);
try {
const result = await this.analyzer.analyzeImage(imagePath);
results.set(imagePath, result);
} catch (error) {
results.set(imagePath, error instanceof Error ? error : new Error(String(error)));
}
}
return results;
}
/**
* Check if VLM is available
*/
async isAvailable(): Promise<boolean> {
return this.analyzer.isAvailable();
}
}