Project Files
src / indexer.ts
/**
* Indexer - Shared indexing logic for preprocessor and tools
*/
import { existsSync, readFileSync } from "fs";
import { glob } from "glob";
import path from "path";
import { homedir } from "os";
import type { ToolsProviderController, PromptPreprocessorController } from "@lmstudio/sdk";
import { configSchematics } from "./config";
import { JsonlParser } from "./documents/parsers/jsonlParser";
import { PngMetadataParser } from "./documents/parsers/pngMetadataParser";
import { parseDrawThingsProjects } from "./documents/parsers/projectFileParser";
import { watchJsonlLogs, watchImages, watchProjects } from "./documents/fileWatcher";
import { EmbeddingClient, EmbeddingStore } from "./embeddings";
import type { GenerationMetadata, IndexedGeneration } from "./types";
// ═══════════════════════════════════════════════════════════════
// Cache & Persistent Store
// ═══════════════════════════════════════════════════════════════
let cachedGenerations: IndexedGeneration[] | null = null;
let cachedEmbeddingModel: string | null = null;
let isIndexing = false;
let embeddingClient: EmbeddingClient | null = null;
let embeddingStore: EmbeddingStore | null = null;
// ═══════════════════════════════════════════════════════════════
// Indexing
// ═══════════════════════════════════════════════════════════════
type ConfigController = ToolsProviderController | PromptPreprocessorController;
/** Progress callback for status updates (can be async) */
export type IndexProgressCallback = (message: string) => void | Promise<void>;
/**
* Index all configured sources
* Works with both ToolsProviderController and PromptPreprocessorController
*/
export async function indexGenerations(
ctl: ConfigController,
forceReindex = false,
onProgress?: IndexProgressCallback
): Promise<IndexedGeneration[]> {
const config = ctl.getGlobalPluginConfig(configSchematics);
// Check if embedding model changed (requires re-embedding)
const currentEmbeddingModel = config.get("embeddingModel");
const embeddingModelChanged = cachedEmbeddingModel !== null && cachedEmbeddingModel !== currentEmbeddingModel;
if (embeddingModelChanged) {
console.log(`[Index] Embedding model changed: ${cachedEmbeddingModel} → ${currentEmbeddingModel}`);
cachedGenerations = null; // Force re-indexing with new embeddings
}
// Return cached data if available (invalidated by FileWatchers)
if (cachedGenerations && !forceReindex) {
await onProgress?.(`Using cached index (${cachedGenerations.length} generations)`);
return cachedGenerations;
}
if (isIndexing) {
await onProgress?.("Waiting for indexing to complete...");
while (isIndexing) {
await new Promise(r => setTimeout(r, 100));
}
return cachedGenerations || [];
}
isIndexing = true;
const generations: GenerationMetadata[] = [];
// ═══════════════════════════════════════════════════════════════
// Source Statistics (for consolidated summary)
// ═══════════════════════════════════════════════════════════════
interface SourceStats {
name: string;
enabled: boolean;
path: string;
exists: boolean;
filesFound: number;
generationsExtracted: number;
}
const sourceStats: SourceStats[] = [];
try {
// ─────────────────────────────────────────────────────────────
// Index JSONL Logs
// ─────────────────────────────────────────────────────────────
const jsonlEnabled = config.get("searchJsonlLogs");
const logsDir = config.get("jsonlLogsDirectory");
const jsonlStats: SourceStats = {
name: "JSONL Logs",
enabled: jsonlEnabled,
path: logsDir,
exists: false,
filesFound: 0,
generationsExtracted: 0,
};
if (jsonlEnabled) {
await onProgress?.("Scanning JSONL logs...");
jsonlStats.exists = existsSync(logsDir);
if (jsonlStats.exists) {
watchJsonlLogs(logsDir);
// Only index the Draw Things audit log we support, not other JSONL files like vision-promotion.jsonl
const jsonlFiles = await findFiles(logsDir, "**/generate-image-plugin.audit.jsonl");
jsonlStats.filesFound = jsonlFiles.length;
for (let i = 0; i < jsonlFiles.length; i++) {
const file = jsonlFiles[i];
await onProgress?.(`Parsing JSONL ${i + 1}/${jsonlFiles.length}...`);
try {
const parsed = await JsonlParser.parseAsGenerations(file);
for (const gen of parsed) {
generations.push(parsedGenerationToMetadata(gen));
jsonlStats.generationsExtracted++;
}
} catch (e) {
console.warn(`[Index] Failed to parse JSONL: ${file}`, e);
}
}
}
}
sourceStats.push(jsonlStats);
// ─────────────────────────────────────────────────────────────
// Index Image Directories (with embedded metadata)
// ─────────────────────────────────────────────────────────────
const contentDirectories = config.get("contentDirectories");
const imageStats: SourceStats = {
name: "Image Directories",
enabled: contentDirectories.length > 0,
path: contentDirectories.join(", "),
exists: false,
filesFound: 0,
generationsExtracted: 0,
};
if (contentDirectories.length > 0) {
await onProgress?.("Scanning image directories...");
// Collect all image files from all directories
const allImageFiles: string[] = [];
for (const dir of contentDirectories) {
if (existsSync(dir)) {
imageStats.exists = true;
watchImages(dir);
const imageFiles = await findFiles(dir, "**/*.{png,jpg,jpeg,webp}");
allImageFiles.push(...imageFiles);
}
}
imageStats.filesFound = allImageFiles.length;
await onProgress?.(`Found ${allImageFiles.length} images to scan`);
let scanned = 0;
for (const file of allImageFiles) {
scanned++;
if (scanned % 50 === 0) {
await onProgress?.(`Scanning images ${scanned}/${allImageFiles.length}...`);
}
try {
const metadata = await PngMetadataParser.extractMetadata(file);
if (metadata) {
const imageType = metadata.rawType === 'comfyui'
? 'ComfyUI'
: metadata.rawType === 'drawthings'
? 'Draw Things'
: undefined;
const sourceInfo: SourceInfo = imageType
? { type: 'saved_image', filePath: file, imageType }
: { type: 'saved_image', filePath: file };
generations.push(parsedImageToMetadata(metadata, file, sourceInfo));
imageStats.generationsExtracted++;
}
} catch (e) {
// Skip files without metadata
}
}
}
sourceStats.push(imageStats);
// ─────────────────────────────────────────────────────────────
// Index Chat Attachments (user-files)
// ─────────────────────────────────────────────────────────────
const attachmentsEnabled = config.get("searchChatAttachments");
const userFilesDir = path.join(homedir(), ".lmstudio", "user-files");
const attachmentStats: SourceStats = {
name: "Chat Attachments",
enabled: attachmentsEnabled,
path: userFilesDir,
exists: false,
filesFound: 0,
generationsExtracted: 0,
};
if (attachmentsEnabled) {
await onProgress?.("Scanning chat attachments...");
attachmentStats.exists = existsSync(userFilesDir);
if (attachmentStats.exists) {
watchImages(userFilesDir);
const pngFiles = await findFiles(userFilesDir, "**/*.png");
attachmentStats.filesFound = pngFiles.length;
await onProgress?.(`Found ${pngFiles.length} PNG attachments to scan`);
let scanned = 0;
for (const file of pngFiles) {
scanned++;
if (scanned % 50 === 0) {
await onProgress?.(`Scanning attachments ${scanned}/${pngFiles.length}...`);
}
try {
const metadata = await PngMetadataParser.extractMetadata(file);
if (metadata) {
const originalName = await getOriginalName(file) || path.basename(file);
const imageType = metadata.rawType === 'comfyui'
? 'ComfyUI'
: metadata.rawType === 'drawthings'
? 'Draw Things'
: undefined;
const sourceInfo: SourceInfo = imageType
? { type: 'attachment', originalName, imageType }
: { type: 'attachment', originalName };
generations.push(parsedImageToMetadata(metadata, file, sourceInfo));
attachmentStats.generationsExtracted++;
}
} catch (e) {
// Skip files without metadata
}
}
}
}
sourceStats.push(attachmentStats);
// ─────────────────────────────────────────────────────────────
// Index Draw Things Projects (.sqlite3)
// ─────────────────────────────────────────────────────────────
const projectsEnabled = config.get("searchDrawThingsProjects");
const projectsDir = config.get("drawThingsProjectsDirectory");
const projectStats: SourceStats = {
name: "Draw Things Projects",
enabled: projectsEnabled,
path: projectsDir,
exists: false,
filesFound: 0,
generationsExtracted: 0,
};
if (projectsEnabled) {
await onProgress?.("Scanning Draw Things projects...");
projectStats.exists = existsSync(projectsDir);
if (projectStats.exists) {
watchProjects(projectsDir);
const projectFiles = await findFiles(projectsDir, "**/*.sqlite3");
projectStats.filesFound = projectFiles.length;
if (projectFiles.length > 0) {
await onProgress?.("Parsing project files...");
const projectGenerations = await parseDrawThingsProjects(projectFiles);
generations.push(...projectGenerations);
projectStats.generationsExtracted = projectGenerations.length;
}
}
}
sourceStats.push(projectStats);
// ═══════════════════════════════════════════════════════════════
// CONSOLIDATED SOURCE SUMMARY
// ═══════════════════════════════════════════════════════════════
console.log("\n[Index] SOURCE SUMMARY:");
for (const s of sourceStats) {
const status = !s.enabled
? "DISABLED"
: !s.exists
? "NOT FOUND"
: s.generationsExtracted > 0
? "OK"
: "EMPTY";
const statusIcon = status === "OK" ? "✓" : status === "DISABLED" ? "○" : "✗";
console.log(`[Index] ${statusIcon} ${s.name}: ${status}`);
console.log(`[Index] Path: ${s.path}`);
if (s.enabled) {
console.log(`[Index] Exists: ${s.exists ? "yes" : "NO"}`);
if (s.exists) {
console.log(`[Index] Files found: ${s.filesFound}`);
console.log(`[Index] Generations extracted: ${s.generationsExtracted}`);
}
}
}
console.log(`[Index] TOTAL: ${generations.length} generations from ${sourceStats.filter(s => s.enabled && s.exists && s.generationsExtracted > 0).length} sources\n`);
await onProgress?.(`Indexed ${generations.length} generations`);
// ─────────────────────────────────────────────────────────────
// Generate Embeddings (if semantic weight > 0 and model configured)
// ─────────────────────────────────────────────────────────────
const semanticWeight = config.get("semanticWeight");
const embeddingModel = config.get("embeddingModel");
const lmStudioUrl = config.get("lmStudioBaseUrl");
const semanticEnabled = semanticWeight > 0 && !!embeddingModel;
let indexedGenerations: IndexedGeneration[] = generations;
if (semanticEnabled) {
await onProgress?.("Initializing embedding system...");
// Initialize embedding store (SQLite persistence)
if (!embeddingStore) {
embeddingStore = new EmbeddingStore();
await embeddingStore.init();
}
// Initialize embedding client if needed
if (!embeddingClient || embeddingClient.getModelName() !== embeddingModel) {
embeddingClient = new EmbeddingClient({
baseUrl: lmStudioUrl,
model: embeddingModel,
});
}
// Check if embedding API is available
const isAvailable = await embeddingClient.isAvailable();
if (isAvailable) {
await onProgress?.("Loading/generating embeddings...");
indexedGenerations = await generateEmbeddings(generations, embeddingClient, embeddingStore, onProgress);
cachedEmbeddingModel = embeddingModel;
console.log(`[Index] Embeddings ready for ${indexedGenerations.filter(g => g.promptEmbedding).length}/${generations.length} prompts`);
} else {
console.warn(`[Index] Embedding model "${embeddingModel}" not available - semantic search disabled`);
await onProgress?.(`⚠️ Embedding model not loaded - keyword search only`);
cachedEmbeddingModel = null;
}
} else {
console.log("[Index] Semantic search disabled in config");
cachedEmbeddingModel = null;
}
cachedGenerations = indexedGenerations;
} finally {
isIndexing = false;
}
return cachedGenerations;
}
/**
* Generate embeddings for all generations
* Uses batch processing and SQLite caching for efficiency
*/
async function generateEmbeddings(
generations: GenerationMetadata[],
client: EmbeddingClient,
store: EmbeddingStore,
onProgress?: IndexProgressCallback
): Promise<IndexedGeneration[]> {
const BATCH_SIZE = 20; // Embed 20 prompts at a time
const indexed: IndexedGeneration[] = [];
const modelName = client.getModelName();
// Load existing embeddings from store
await onProgress?.("Loading cached embeddings...");
const cachedEmbeddings = store.getAllEmbeddings(modelName);
console.log(`[Index] Found ${cachedEmbeddings.size} cached embeddings for model ${modelName}`);
// Deduplicate prompts and check cache
const uniquePrompts = new Map<string, number[]>();
const promptsToEmbed: string[] = [];
for (const gen of generations) {
if (gen.prompt && !uniquePrompts.has(gen.prompt)) {
// Check if already in cache
const cached = cachedEmbeddings.get(gen.prompt);
if (cached) {
uniquePrompts.set(gen.prompt, cached);
} else {
uniquePrompts.set(gen.prompt, []); // Placeholder
promptsToEmbed.push(gen.prompt);
}
}
}
const cachedCount = uniquePrompts.size - promptsToEmbed.length;
console.log(`[Index] ${cachedCount} prompts from cache, ${promptsToEmbed.length} need embedding`);
if (promptsToEmbed.length === 0) {
await onProgress?.(`Using ${cachedCount} cached embeddings`);
}
// Batch embed only new prompts (using embedPassages for E5 models)
const newEmbeddings: Array<{ prompt: string; embedding: number[] }> = [];
for (let i = 0; i < promptsToEmbed.length; i += BATCH_SIZE) {
const batch = promptsToEmbed.slice(i, i + BATCH_SIZE);
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
const totalBatches = Math.ceil(promptsToEmbed.length / BATCH_SIZE);
await onProgress?.(`Embedding prompts ${i + 1}-${Math.min(i + BATCH_SIZE, promptsToEmbed.length)}/${promptsToEmbed.length}...`);
try {
// embedPassages() adds "passage: " prefix for E5 models
const embeddings = await client.embedPassages(batch);
// Store embeddings in memory and collect for DB
for (let j = 0; j < batch.length; j++) {
uniquePrompts.set(batch[j], embeddings[j]);
newEmbeddings.push({ prompt: batch[j], embedding: embeddings[j] });
}
console.log(`[Index] Embedded batch ${batchNum}/${totalBatches}`);
} catch (error) {
console.error(`[Index] Embedding batch ${batchNum} failed:`, error);
// Continue without embeddings for this batch
}
}
// Persist new embeddings to store
if (newEmbeddings.length > 0) {
await onProgress?.(`Saving ${newEmbeddings.length} new embeddings to disk...`);
store.setEmbeddings(newEmbeddings, modelName);
console.log(`[Index] Persisted ${newEmbeddings.length} new embeddings to store`);
}
// Attach embeddings to generations
for (const gen of generations) {
const embedding = gen.prompt ? uniquePrompts.get(gen.prompt) : undefined;
indexed.push({
...gen,
promptEmbedding: embedding?.length ? embedding : undefined,
embeddingModel: embedding?.length ? modelName : undefined,
});
}
return indexed;
}
/**
* Get cached generation count (for status display)
*/
export function getCachedCount(): number {
return cachedGenerations?.length || 0;
}
/**
* Force cache invalidation (called by FileWatchers)
*/
export function invalidateCache(): void {
cachedGenerations = null;
console.log("[Index] Cache invalidated");
}
// ═══════════════════════════════════════════════════════════════
// Helpers
// ═══════════════════════════════════════════════════════════════
async function findFiles(dir: string, pattern: string): Promise<string[]> {
try {
return await glob(path.join(dir, pattern), {
nodir: true,
absolute: true,
});
} catch {
return [];
}
}
import type { ParsedGeneration } from "./documents/parsers/jsonlParser";
import type { ParsedImageMetadata } from "./documents/parsers/pngMetadataParser";
import type { SourceInfo } from "./types";
function parsedGenerationToMetadata(gen: ParsedGeneration): GenerationMetadata {
const width = gen.entry.output?.backend_returned?.width ||
gen.entry.output?.post_processed?.width ||
gen.entry.user_request?.width;
const height = gen.entry.output?.backend_returned?.height ||
gen.entry.output?.post_processed?.height ||
gen.entry.user_request?.height;
return {
timestamp: gen.entry.timestamp,
prompt: gen.entry.output?.prompt_used || gen.entry.user_request?.prompt || '',
model: gen.entry.output?.model_used || gen.entry.user_request?.model || 'unknown',
loras: gen.entry.output?.loras_used,
width,
height,
inferenceTimeMs: gen.entry.output?.inference_time_ms,
imagePaths: gen.imagePaths,
httpPreviewUrls: gen.httpPreviewUrls,
sourceInfo: { type: 'generate_image_variant', chatId: gen.entry.chat_id },
};
}
function parsedImageToMetadata(
meta: ParsedImageMetadata,
filePath: string,
sourceInfo: SourceInfo
): GenerationMetadata {
return {
prompt: meta.prompt || '',
negativePrompt: meta.negativePrompt,
model: meta.model || 'unknown',
loras: meta.loras,
sampler: meta.sampler,
steps: meta.steps,
cfgScale: meta.cfgScale,
seed: meta.seed,
width: meta.width,
height: meta.height,
imagePaths: [filePath],
sourceInfo,
};
}
/**
* Get original filename from LM Studio attachment metadata
* Format: <file>.metadata.json contains { originalName: "..." }
*/
async function getOriginalName(pngPath: string): Promise<string | undefined> {
try {
const metadataPath = `${pngPath}.metadata.json`;
if (existsSync(metadataPath)) {
const content = readFileSync(metadataPath, 'utf-8');
const metadata = JSON.parse(content);
return metadata.originalName;
}
} catch (e) {
// Ignore errors
}
return undefined;
}