/**
* Embedding I/O for vector-RAG recall (Phase F).
*
* The side-effecting counterpart to the pure `selectMemories` (recall.ts): it
* turns each stored past message into a vector via an LM Studio embedding model,
* so a detail from far back can be recalled by meaning when the recent text is
* about it. The vectors are handed to `selectMemories`, which stays pure.
*
* Mirrors `world/embed.ts` / `characters/embed.ts` (kept separate so `memory`
* stays decoupled from `world`/`characters`, per CLAUDE.md), with the same cost
* controls:
* - **Chunk vectors are cached by text** in a process-scoped map, so a stored
* message is embedded once, not every turn. Across a plugin reload the store
* re-embeds once on the next turn (one batch call) — the same trade-off the
* lore/cast caches already accept.
* - The recent-text **query embedding can be supplied** by the caller (the
* lore/cast pass already embedded the same recent text), so a turn that runs
* several semantic passes embeds the query only once. Omit it to embed here.
* - Graceful degradation: no embedding model → every vector is `null` and
* `selectMemories` recalls nothing.
*/
import type { LMStudioClient } from "@lmstudio/sdk";
import { resolveModel } from "../shared/embedding.js";
/** Chunk text → embedding cache. Stored messages are immutable across turns. */
const chunkVectorCache = new Map<string, number[]>();
export interface MemoryEmbeddings {
/** Embedding of the recent text, or null if it couldn't be produced. */
queryEmbedding: number[] | null;
/** Per-chunk embeddings aligned by index to the chunk texts passed in. */
chunkEmbeddings: (number[] | null)[];
/** Why recall produced nothing, for the debug log; null on success. */
error: string | null;
}
/**
* Embed the stored chunk texts, plus the recent text (query) unless the caller
* already has it. Chunk vectors are served from the content cache when possible;
* only cache misses and (optionally) the query hit the model. Any failure yields
* nulls (recall simply produces nothing).
*
* @param queryEmbedding Pass a precomputed recent-text vector to skip embedding
* it again (the lore/cast pass usually already produced it). Pass `undefined`
* to have this embed `recentText` itself.
*/
export async function embedMemories(
client: LMStudioClient,
identifier: string,
chunkTexts: string[],
recentText: string,
queryEmbedding?: number[] | null,
): Promise<MemoryEmbeddings> {
const nulls = chunkTexts.map(() => null as number[] | null);
const { model, error } = await resolveModel(client, identifier);
if (!model) {
return { queryEmbedding: queryEmbedding ?? null, chunkEmbeddings: nulls, error };
}
// Chunk vectors: reuse cached ones, batch-embed the misses.
const chunkEmbeddings: (number[] | null)[] = chunkTexts.map(
(t) => chunkVectorCache.get(t) ?? null,
);
const missIndices = chunkEmbeddings
.map((vec, i) => (vec === null ? i : -1))
.filter((i) => i >= 0);
let embedError: string | null = null;
if (missIndices.length > 0) {
try {
const results = await model.embed(missIndices.map((i) => chunkTexts[i]));
results.forEach((res, k) => {
const i = missIndices[k];
chunkVectorCache.set(chunkTexts[i], res.embedding);
chunkEmbeddings[i] = res.embedding;
});
} catch (e) {
embedError = `memory embedding failed: ${e instanceof Error ? e.message : String(e)}`;
}
}
// Query vector: reuse the caller's if given, else embed the recent text.
let query: number[] | null = queryEmbedding ?? null;
if (queryEmbedding === undefined) {
const text = recentText.trim();
if (text) {
try {
const res = await model.embed(text);
query = res.embedding;
} catch (e) {
query = null;
embedError = `query embedding failed: ${e instanceof Error ? e.message : String(e)}`;
}
}
}
return { queryEmbedding: query, chunkEmbeddings, error: embedError };
}