/**
 * First-mention introduction — the disclosure system (pure).
 *
 * The problem this solves: handed a piece of lore (a place, a faction) or an NPC
 * card, an LLM narrator names it as if the player already knows it — "you head
 * for the Ashen Court", "Maren waves you over" — with no grounding, the first
 * time it ever comes up. The player has never heard of it. The fix is the mirror
 * image of knowledge gating (`src/knowledge/`): that system withholds an NPC's
 * *secrets* from the prompt until earned; this one withholds nothing — every
 * selected element is already in the prompt — it simply asks the narrator to
 * GROUND a thing on its first appearance, and remembers which things have been
 * introduced so the ask stops afterwards.
 *
 * Reliability lives in two halves on the turn's two clocks (like the revelation
 * gate/digest):
 *   - PRE-narration (`planDisclosure`): from the elements selected for THIS turn
 *     and the playthrough's already-introduced set, decide which are first
 *     appearances → the assembler annotates only those, so a known element's
 *     block stays byte-identical. Only *detectable* elements (a lore entry with
 *     trigger words, an NPC with a name) are flagged: an element we cannot detect
 *     in the prose afterwards would otherwise carry the intro note forever.
 *   - POST-narration (`markDisclosed`): a pure keyword pass over the reply marks
 *     a flagged element introduced once its name actually appears in the prose —
 *     so "injected but not used" never burns the introduction, and a real first
 *     mention flips it exactly once. No model call: detection IS the name showing
 *     up, which is precisely the symptom (people/places named without a framing).
 *
 * Pure and unit-testable: no I/O, no `@lmstudio/sdk`, no dependency on the world
 * or character modules — it takes the minimal structural inputs it needs, exactly
 * like the relationship/knowledge gates take values rather than reach for state.
 */

import { escapeRegExp } from "../shared/text.js";

/**
 * A flagged element to watch in the reply. `key` is the disclosure key to record
 * once introduced; `terms` are the surface forms whose appearance in the prose
 * counts as that introduction (a lore entry's trigger words, an NPC's name +
 * aliases).
 */
export interface DisclosureTarget {
  key: string;
  terms: string[];
}

/** The pre-narration plan: what to annotate as a first appearance + what to watch. */
export interface DisclosurePlan {
  /** Lore disclosure keys to flag in the `# World lore` block (first appearances). */
  undisclosedLore: Set<string>;
  /** NPC disclosure keys to flag in the `# Characters in scene` block. */
  undisclosedNpcs: Set<string>;
  /** Every flagged element (lore + NPC), for the post-narration keyword marker. */
  targets: DisclosureTarget[];
}

/** Minimal shape of a lore entry the disclosure system reads (subset of `LoreEntry`). */
export interface LoreLike {
  id?: string;
  content: string;
  keys?: string[];
}

/** Minimal shape of a character the disclosure system reads (subset of `CharacterCard`). */
export interface CharacterLike {
  name: string;
  aliases?: string[];
}

/**
 * Deterministic 32-bit FNV-1a hash of a string, as a short base-36 token. Pure
 * arithmetic (no `node:crypto`), so the module stays I/O-free and runs anywhere
 * the rest of the pure layer does. Used only to derive a stable lore id when the
 * entry declares none — collisions merely share a disclosure key, which is
 * harmless (both would be treated as introduced together).
 */
export function stableHash(text: string): string {
  let h = 0x811c9dc5;
  for (let i = 0; i < text.length; i++) {
    h ^= text.charCodeAt(i);
    h = Math.imul(h, 0x01000193);
  }
  return (h >>> 0).toString(36);
}

/**
 * Disclosure key for a lore entry: its explicit `id` (or SillyTavern `uid`) when
 * set, else a content hash. Namespaced `lore:` so it can never collide with an
 * NPC key in the shared `revealed` set.
 */
export function loreDisclosureKey(entry: LoreLike): string {
  const explicit = (entry.id ?? "").trim();
  return `lore:${explicit || stableHash(entry.content)}`;
}

/** Disclosure key for a character: `npc:` + their lowercased display name. */
export function npcDisclosureKey(name: string): string {
  return `npc:${name.trim().toLowerCase()}`;
}

/**
 * Whole-word, case-insensitive, Unicode-aware test for a term in the prose — so a
 * proper noun is matched but not as a fragment of another word ("Ash" must not
 * fire on "ashen"), while accented French names ("Élodie") still work. Falls back
 * to a case-insensitive substring test if the engine lacks lookbehind. Pure.
 */
export function mentions(text: string, term: string): boolean {
  const t = term.trim();
  if (!t || !text) return false;
  try {
    const re = new RegExp(`(?<![\\p{L}\\p{N}])${escapeRegExp(t)}(?![\\p{L}\\p{N}])`, "iu");
    return re.test(text);
  } catch {
    return text.toLowerCase().includes(t.toLowerCase());
  }
}

/**
 * Plan first-mention introductions for this turn (pure). Given the lore and NPCs
 * SELECTED for the turn and the playthrough's already-introduced `revealed` keys,
 * return which are first appearances (to annotate) and the watch list (to mark
 * afterwards). A lore entry with no trigger words is skipped entirely — it cannot
 * be detected in the prose, so flagging it would loop the intro note forever; an
 * NPC always has a name, so it is always detectable. An already-introduced
 * element is omitted, keeping its prompt block byte-identical.
 */
export function planDisclosure(
  lore: LoreLike[],
  cast: CharacterLike[],
  revealed: string[],
): DisclosurePlan {
  const known = new Set(revealed);
  const undisclosedLore = new Set<string>();
  const undisclosedNpcs = new Set<string>();
  const targets: DisclosureTarget[] = [];

  for (const entry of lore) {
    const terms = (entry.keys ?? []).map((k) => k.trim()).filter((k) => k.length > 0);
    if (terms.length === 0) continue; // undetectable in the prose → don't flag
    const key = loreDisclosureKey(entry);
    if (known.has(key)) continue; // already introduced
    undisclosedLore.add(key);
    targets.push({ key, terms });
  }

  for (const npc of cast) {
    const name = (npc.name ?? "").trim();
    if (!name) continue;
    const key = npcDisclosureKey(name);
    if (known.has(key)) continue;
    const terms = [name, ...(npc.aliases ?? [])]
      .map((t) => t.trim())
      .filter((t) => t.length > 0);
    undisclosedNpcs.add(key);
    targets.push({ key, terms });
  }

  return { undisclosedLore, undisclosedNpcs, targets };
}

/**
 * Post-narration marker (pure): fold into the introduced set every flagged target
 * whose name actually appears in the reply. Idempotent — an already-known key is
 * skipped — and order-stable. Returns the grown set plus the keys newly
 * introduced this turn (for the debug log). No model call: a first mention IS the
 * name surfacing in the prose.
 */
export function markDisclosed(
  revealed: string[],
  targets: DisclosureTarget[],
  narration: string,
): { revealed: string[]; newlyDisclosed: string[] } {
  const known = new Set(revealed);
  const newlyDisclosed: string[] = [];
  const text = narration ?? "";

  for (const target of targets) {
    if (known.has(target.key)) continue;
    if (target.terms.some((term) => mentions(text, term))) {
      known.add(target.key);
      newlyDisclosed.push(target.key);
    }
  }

  return { revealed: [...known], newlyDisclosed };
}
roleplay-master

roleplay-master