Project Files
.gitignore
.lmsignore
LICENSE
manifest.json
package-lock.json
package.json
README.md
roleplay.config.example.json
tsconfig.json
src / memory / recall.ts
/**
 * Vector-RAG recall — the pure core of Phase F (no LM Studio dependency).
 *
 * The Phase C rolling summary keeps continuity (the *fil*) but is bounded: it
 * recompresses fine old details away. This module is the other half of the
 * documented hybrid — summary = continuity, retrieval = precise recall. It keeps
 * the **verbatim text** of messages that have scrolled out of the protected
 * recent window in a per-save store, and each turn surfaces the few past
 * messages most semantically similar to the recent text (the `# Relevant past
 * events` block).
 *
 * Everything here is pure arithmetic/string work: the embeddings are produced by
 * `embed.ts` (I/O) and passed in, so this stays unit-testable. The keyword/cosine
 * machinery mirrors `world/scan.ts` and `characters/select.ts` but is duplicated
 * here on purpose — `memory` stays decoupled from `world` and `characters` (see
 * CLAUDE.md). The token estimate is the same cheap `chars/4` heuristic.
 */

import { MemoryChunk } from "../state/schema.js";
import { cosineSimilarity, estimateTokens } from "../shared/vector.js";

export interface ExtendStoreOptions {
  /**
   * Most-recent messages NOT archived — they are still in the live window, so
   * archiving (and later recalling) them would duplicate context.
   */
  protectRecent: number;
  /** Hard cap on stored chunks; the oldest are dropped past this. */
  maxStore: number;
  /** Turn stamped on newly-archived chunks (for recency/debug). */
  turn: number;
}

export interface ExtendStoreResult {
  store: MemoryChunk[];
  /** New `storedMessageCount` marker (messages consumed into the store). */
  storedMessageCount: number;
  /** How many oldest chunks the cap dropped this call (for the debug log). */
  dropped: number;
}

/**
 * Archive the messages that have left the protected recent window. Pure.
 *
 * Appends `conversation[storedMessageCount : len - protectRecent]` as chunks,
 * skipping any whose trimmed text is already stored (dedup, so an edit/regen of
 * the tail can't double-store), then trims the store to the most-recent
 * `maxStore` chunks. The marker always advances to `len - protectRecent` so the
 * same messages are never re-considered, even when the cap drops old chunks.
 */
export function extendStore(
  store: MemoryChunk[],
  storedMessageCount: number,
  conversation: string[],
  options: ExtendStoreOptions,
): ExtendStoreResult {
  const protectRecent = Math.max(0, Math.floor(options.protectRecent));
  const maxStore = Math.max(0, Math.floor(options.maxStore));
  const start = Math.max(0, Math.min(storedMessageCount, conversation.length));
  const end = Math.max(start, conversation.length - protectRecent);

  const seen = new Set(store.map((c) => c.text.trim()));
  const next = [...store];
  for (const raw of conversation.slice(start, end)) {
    const text = raw.trim();
    if (!text || seen.has(text)) continue;
    seen.add(text);
    next.push({ text, turn: options.turn });
  }

  let dropped = 0;
  let trimmed = next;
  if (maxStore > 0 && next.length > maxStore) {
    dropped = next.length - maxStore;
    trimmed = next.slice(dropped);
  }
  return { store: trimmed, storedMessageCount: end, dropped };
}

export interface RecallOptions {
  /** Embedding of the recent text (the query). Null disables recall. */
  queryEmbedding: number[] | null;
  /** Per-chunk embeddings, aligned by index to the `store` array. */
  chunkEmbeddings: (number[] | null)[];
  /** Noise floor: a chunk must reach at least this cosine similarity to count. */
  threshold: number;
  /** Keep at most this many chunks — the closest. Non-finite means "no cap". */
  topK: number;
  /** Token budget for the whole recall block. Default: unlimited. */
  maxTokens?: number;
}

/**
 * Select the past messages to recall this turn. Pure and deterministic.
 *
 * 1. Score every chunk with a vector at/above the floor by cosine similarity;
 *    keep the closest `topK`.
 * 2. Fit them under the token budget, dropping the lowest-scoring first.
 * 3. Return the kept chunks in **chronological** (store) order, so the excerpt
 *    reads as a coherent slice of the past rather than a relevance jumble.
 */
export function selectMemories(
  store: MemoryChunk[],
  options: RecallOptions,
): MemoryChunk[] {
  if (!options.queryEmbedding || store.length === 0) return [];
  const maxTokens = options.maxTokens ?? Infinity;

  const hits: { index: number; score: number }[] = [];
  store.forEach((_, i) => {
    const vec = options.chunkEmbeddings[i];
    if (!vec) return;
    const score = cosineSimilarity(options.queryEmbedding as number[], vec);
    if (score >= options.threshold) hits.push({ index: i, score });
  });

  const topK = Number.isFinite(options.topK)
    ? Math.max(0, Math.floor(options.topK))
    : Infinity;

  // Rank by score, cap to top-K, then fit under the token budget (still by
  // score, so the weakest are dropped first), and finally order chronologically.
  const ranked = hits.sort((a, b) => b.score - a.score).slice(0, topK);
  const kept: number[] = [];
  let used = 0;
  for (const { index } of ranked) {
    const cost = estimateTokens(store[index].text);
    if (used + cost > maxTokens) continue; // skip this one, a cheaper later hit may still fit
    kept.push(index);
    used += cost;
  }

  return kept.sort((a, b) => a - b).map((i) => store[i]);
}

/**
 * The `# Relevant past events` block: the recalled verbatim excerpts, one per
 * line group, kept apart from the `# Story so far` summary so the model reads
 * them as precise recall rather than continuity. Null when nothing was recalled.
 */
export function recallBlock(chunks: MemoryChunk[]): string | null {
  const parts = chunks.map((c) => c.text.trim()).filter(Boolean);
  if (parts.length === 0) return null;
  return [
    "# Relevant past events",
    "(Earlier moments surfaced because they relate to the current scene.)",
    ...parts,
  ].join("\n");
}
roleplay-master

roleplay-master