/**
* Embedding I/O for semantic lore matching.
*
* This is the side-effecting counterpart to the pure `selectLore` (scan.ts): it
* turns lore text into vectors via an LM Studio embedding model, so a French
* conversation can trigger an English lore entry (and vice-versa) by meaning
* rather than surface form. The vectors are handed to `selectLore`, which stays
* pure (cosine similarity is just arithmetic).
*
* Two cost controls:
* - **Entry vectors are cached by content** in a process-scoped map, so the
* static lore is embedded once, not every turn. Only the recent-text query
* is embedded each turn (it changes every turn).
* - The whole thing degrades gracefully: if no embedding model is available
* (none configured/loaded, or the call fails), every vector is `null` and
* `selectLore` falls back to keyword-only matching.
*
* Folder/handle resolution mirrors the other loaders; the model identifier
* comes from the global `embeddingModel` config (empty = the default loaded
* embedding model).
*/
import type { LMStudioClient } from "@lmstudio/sdk";
import { resolveModel } from "../shared/embedding.js";
import { LoreEntry } from "./schema.js";
/** Content → embedding cache. Lore is static across turns, so this rarely misses. */
const entryVectorCache = new Map<string, number[]>();
export interface LoreEmbeddings {
/** Embedding of the recent text, or null if it couldn't be produced. */
queryEmbedding: number[] | null;
/** Per-entry embeddings aligned by index to the entries passed in. */
entryEmbeddings: (number[] | null)[];
/**
* Human-readable reason the semantic path produced nothing, for the debug
* log. Null when embeddings were produced successfully.
*/
error: string | null;
}
/**
* Embed the recent text (query) and the lore entries' content. Entry vectors
* are served from the content cache when possible; only cache misses and the
* query hit the model. Any failure yields nulls (keyword-only fallback).
*/
export async function embedLore(
client: LMStudioClient,
identifier: string,
entries: LoreEntry[],
recentText: string,
): Promise<LoreEmbeddings> {
const nulls = entries.map(() => null as number[] | null);
const { model, error } = await resolveModel(client, identifier);
if (!model) return { queryEmbedding: null, entryEmbeddings: nulls, error };
// Entry vectors: reuse cached ones, batch-embed the misses.
const entryEmbeddings: (number[] | null)[] = entries.map(
(e) => entryVectorCache.get(e.content) ?? null,
);
const missIndices = entryEmbeddings
.map((vec, i) => (vec === null ? i : -1))
.filter((i) => i >= 0);
let embedError: string | null = null;
if (missIndices.length > 0) {
try {
const results = await model.embed(missIndices.map((i) => entries[i].content));
results.forEach((res, k) => {
const i = missIndices[k];
entryVectorCache.set(entries[i].content, res.embedding);
entryEmbeddings[i] = res.embedding;
});
} catch (e) {
// Leave the missed entries as null — they fall back to keyword matching.
embedError = `entry embedding failed: ${e instanceof Error ? e.message : String(e)}`;
}
}
// Query vector: not cached (the recent text changes every turn).
let queryEmbedding: number[] | null = null;
const query = recentText.trim();
if (query) {
try {
const res = await model.embed(query);
queryEmbedding = res.embedding;
} catch (e) {
queryEmbedding = null;
embedError = `query embedding failed: ${e instanceof Error ? e.message : String(e)}`;
}
}
return { queryEmbedding, entryEmbeddings, error: embedError };
}