Project Files
.gitignore
.lmsignore
LICENSE
manifest.json
package-lock.json
package.json
README.md
roleplay.config.example.json
tsconfig.json
src / world / scan.ts
/**
 * Relevance selection over lore entries — the differentiating feature.
 *
 * `selectLore` is **pure** (no I/O): given the candidate entries and the recent
 * conversation text, it returns the entries to inject this turn. An entry
 * activates when it is `constant`, OR a keyword appears in the window
 * (whole-word, case-insensitive), OR — when semantic vectors are supplied — its
 * embedding is similar enough to the recent text. The result is sorted by
 * `order` and greedily fit under a token budget.
 *
 * The semantic path is what lets a French conversation trigger an English lore
 * entry (and vice-versa): meaning, not surface form. Embeddings are computed by
 * the handler (I/O) in `embed.ts` and passed in here, so this stays pure —
 * cosine similarity is just arithmetic. With no vectors, behavior is identical
 * to keyword-only matching.
 *
 * The token estimate is a cheap `chars / 4` heuristic for the MVP (roadmap open
 * decision #1); the SDK tokenizer can replace `estimateTokens` later behind the
 * same signature. Advanced matching (secondary keys, AND/NOT, recursion) is
 * Phase E and deliberately not here.
 */

import { LoreEntry } from "./schema.js";
import { wholeWordMatch } from "../shared/text.js";
import { cosineSimilarity, estimateTokens } from "../shared/vector.js";

export interface SemanticOptions {
  /** Embedding of the recent text (the query). Null disables the semantic path. */
  queryEmbedding: number[] | null;
  /** Per-entry embeddings, aligned by index to the `entries` array. */
  entryEmbeddings: (number[] | null)[];
  /**
   * Noise floor: an entry must reach at least this cosine similarity to be a
   * semantic candidate at all. Kept low — discrimination is done by `topK`.
   */
  threshold: number;
  /**
   * Keep at most this many *semantic-only* matches per turn — the closest ones.
   * Robust to embedding models (like bge-m3) that compress all scores into a
   * narrow band: the best matches win on rank, not on an absolute cutoff.
   * `constant`/keyword matches are forced in regardless and do not count
   * against this cap. Non-finite (e.g. omitted) means "no cap".
   */
  topK: number;
}

export interface SelectLoreOptions {
  /** Token budget for the whole `# World lore` block. Default: unlimited. */
  maxTokens?: number;
  /**
   * Optional semantic matching. When provided with a query embedding, an entry
   * also activates if its embedding's cosine similarity to the query meets the
   * threshold — in addition to keyword/constant matching.
   */
  semantic?: SemanticOptions;
  /**
   * Dramatic-arc act gate (Phase I): the fact ids the CURRENT act has opened (see
   * `arc/actEligibleIds`). An entry with a non-empty `reveal` that is NOT in this
   * set is fully suppressed this turn — not constant, not keyword, not semantic
   * can surface it — so a spoiler tagged `reveal: "white-room"` stays out of the
   * prompt until the arc opens it, mirroring the card-secret gate. An entry with
   * no `reveal` is ambient and always eligible. `null`/absent = no act
   * restriction (wildcard / default arc / feature off), leaving behaviour
   * identical to keyword/semantic-only selection.
   */
  actEligibleIds?: Set<string> | null;
}

/**
 * Is a lore entry's arc gate open this turn? Ambient entries (no `reveal`) and
 * the no-restriction case (`null`/absent eligible set) are always eligible;
 * a tagged spoiler is eligible only once its id is in the act's opened set.
 * Mirrors the card-secret gate in `knowledge/gate.ts`.
 */
function actEligible(reveal: string, eligible: Set<string> | null | undefined): boolean {
  if (!reveal) return true;
  if (eligible == null) return true;
  return eligible.has(reveal);
}

/**
 * Choose the lore to inject this turn. Pure and deterministic.
 *
 * 1. Force in every enabled entry that is `constant` or whose `keys` hit
 *    `recentText` — these are unconditional and ignore the semantic cap.
 * 2. Among the remaining enabled entries with a vector at/above the floor,
 *    rank by cosine similarity and keep the closest `topK`.
 * 3. Sort the chosen set by `order` ascending (lower = higher priority).
 * 4. Insert in that order until the next entry would exceed `maxTokens`, then
 *    drop the rest (the lowest-priority / highest-`order` entries go first).
 *
 * Returns the selected entries in insertion (priority) order.
 */
export function selectLore(
  entries: LoreEntry[],
  recentText: string,
  options: SelectLoreOptions = {},
): LoreEntry[] {
  const maxTokens = options.maxTokens ?? Infinity;
  const text = recentText ?? "";
  const sem = options.semantic;

  // Forced matches (constant / keyword) vs. competing semantic-only matches.
  const chosen = new Set<number>();
  const semanticHits: { index: number; score: number }[] = [];

  entries.forEach((e, i) => {
    if (!e.enabled) return;
    // Arc gate (Phase I): a tagged spoiler the current act has not opened is
    // suppressed entirely — before constant/keyword/semantic can pick it up.
    if (!actEligible(e.reveal, options.actEligibleIds)) return;
    if (e.constant || e.keys.some((k) => wholeWordMatch(text, k))) {
      chosen.add(i);
      return;
    }
    if (sem?.queryEmbedding) {
      const vec = sem.entryEmbeddings[i];
      if (vec) {
        const score = cosineSimilarity(sem.queryEmbedding, vec);
        if (score >= sem.threshold) semanticHits.push({ index: i, score });
      }
    }
  });

  // Keep the closest `topK` semantic matches (non-finite cap = keep all).
  const topK =
    sem && Number.isFinite(sem.topK) ? Math.max(0, Math.floor(sem.topK)) : Infinity;
  semanticHits
    .sort((a, b) => b.score - a.score)
    .slice(0, topK)
    .forEach(({ index }) => chosen.add(index));

  // Stable sort by order ascending — keeps authoring order within a tier.
  const sorted = entries
    .map((entry, index) => ({ entry, index }))
    .filter(({ index }) => chosen.has(index))
    .sort((a, b) => a.entry.order - b.entry.order || a.index - b.index)
    .map(({ entry }) => entry);

  const selected: LoreEntry[] = [];
  let used = 0;
  for (const entry of sorted) {
    const cost = estimateTokens(entry.content);
    if (used + cost > maxTokens) break; // budget hit — drop the rest
    selected.push(entry);
    used += cost;
  }
  return selected;
}
roleplay-master

roleplay-master