Project Files
src / embeddings / embeddingClient.ts
/**
* LM Studio Embeddings Client
*
* Connects to LM Studio's embedding API for semantic search.
* Adapted from rag-vc for draw-things-index use case.
*
* Key differences from rag-vc:
* - Optimized for short prompts (not document chunks)
* - Returns number[] instead of Float32Array for simpler JSON handling
* - Includes model discovery from LM Studio
*/
export interface EmbeddingConfig {
/** LM Studio base URL (default: http://127.0.0.1:1234) */
baseUrl: string;
/** Embedding model identifier (e.g., "text-embedding-nomic-embed-text-v1.5") */
model: string;
/** Request timeout in ms (default: 30000) */
timeout?: number;
}
export interface EmbeddingResponse {
object: 'list';
data: Array<{
object: 'embedding';
embedding: number[];
index: number;
}>;
model: string;
usage: {
prompt_tokens: number;
total_tokens: number;
};
}
export interface LoadedModel {
id: string;
type: string;
object: string;
}
/**
* Client for generating embeddings via LM Studio API
*
* Supports E5-Instruct models which require query:/passage: prefixes
* for optimal cross-language retrieval performance.
*/
export class EmbeddingClient {
private config: Required<EmbeddingConfig>;
private dimension: number | null = null;
/** Whether this model uses E5-style query:/passage: prefixes */
private readonly usesE5Prefixes: boolean;
constructor(config: EmbeddingConfig) {
this.config = {
timeout: 30000,
...config,
};
// E5-Instruct models need query:/passage: prefixes for best results
this.usesE5Prefixes = config.model.toLowerCase().includes('e5');
if (this.usesE5Prefixes) {
console.log(`[Embeddings] Using E5 model with query:/passage: prefixes`);
}
}
/**
* Generate embeddings for multiple texts (batch)
* More efficient than calling embedSingle multiple times
*/
async embed(texts: string[]): Promise<number[][]> {
if (texts.length === 0) {
return [];
}
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), this.config.timeout);
try {
const response = await fetch(`${this.config.baseUrl}/v1/embeddings`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.config.model,
input: texts,
}),
signal: controller.signal,
});
if (!response.ok) {
const error = await response.text();
throw new Error(`LM Studio embedding API error: ${response.status} - ${error}`);
}
const data: EmbeddingResponse = await response.json();
// Cache dimension on first successful call
if (this.dimension === null && data.data.length > 0) {
this.dimension = data.data[0].embedding.length;
console.log(`[Embeddings] Dimension detected: ${this.dimension}`);
}
// Sort by index and return embeddings
return data.data
.sort((a, b) => a.index - b.index)
.map(item => item.embedding);
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
throw new Error(`Embedding request timed out after ${this.config.timeout}ms`);
}
throw error;
} finally {
clearTimeout(timeoutId);
}
}
/**
* Generate embedding for a single text
*/
async embedSingle(text: string): Promise<number[]> {
const [embedding] = await this.embed([text]);
return embedding;
}
/**
* Embed a search query (adds "query: " prefix for E5 models)
* Use this when embedding user search terms.
*/
async embedQuery(query: string): Promise<number[]> {
const text = this.usesE5Prefixes ? `query: ${query}` : query;
return this.embedSingle(text);
}
/**
* Embed multiple search queries (adds "query: " prefix for E5 models)
*/
async embedQueries(queries: string[]): Promise<number[][]> {
const texts = this.usesE5Prefixes
? queries.map(q => `query: ${q}`)
: queries;
return this.embed(texts);
}
/**
* Embed passages/documents (adds "passage: " prefix for E5 models)
* Use this when embedding prompts for indexing.
*/
async embedPassage(passage: string): Promise<number[]> {
const text = this.usesE5Prefixes ? `passage: ${passage}` : passage;
return this.embedSingle(text);
}
/**
* Embed multiple passages/documents (adds "passage: " prefix for E5 models)
* Use this when batch-embedding prompts for indexing.
*/
async embedPassages(passages: string[]): Promise<number[][]> {
const texts = this.usesE5Prefixes
? passages.map(p => `passage: ${p}`)
: passages;
return this.embed(texts);
}
/**
* Check if this model uses E5-style prefixes
*/
needsE5Prefixes(): boolean {
return this.usesE5Prefixes;
}
/**
* Get the dimension of embeddings from this model
* Uses cached value if available, otherwise makes a test request
*/
async getDimension(): Promise<number> {
if (this.dimension !== null) {
return this.dimension;
}
await this.embedSingle('test');
return this.dimension!;
}
/**
* Check if LM Studio embedding API is available
*/
async isAvailable(): Promise<boolean> {
try {
const response = await fetch(`${this.config.baseUrl}/v1/models`, {
method: 'GET',
signal: AbortSignal.timeout(5000),
});
if (!response.ok) return false;
const data = await response.json();
const models: LoadedModel[] = data.data || [];
// Check if our configured model is loaded
return models.some(m => m.id === this.config.model);
} catch {
return false;
}
}
/**
* Get list of loaded embedding models from LM Studio
*/
static async getLoadedEmbeddingModels(baseUrl: string): Promise<string[]> {
try {
const response = await fetch(`${baseUrl}/v1/models`, {
method: 'GET',
signal: AbortSignal.timeout(5000),
});
if (!response.ok) return [];
const data = await response.json();
const models: LoadedModel[] = data.data || [];
// Filter for embedding models (heuristic: contains "embed" in name)
return models
.filter(m => m.type === 'embedding' || m.id.toLowerCase().includes('embed'))
.map(m => m.id);
} catch {
return [];
}
}
/**
* Get the configured model name
*/
getModelName(): string {
return this.config.model;
}
}
/**
* Default LM Studio configuration
*/
export const DEFAULT_EMBEDDING_CONFIG: Partial<EmbeddingConfig> = {
baseUrl: 'http://127.0.0.1:1234',
timeout: 30000,
};