/**
 * The prediction loop handler — the plugin's core.
 *
 * Unlike a prompt preprocessor (which can only rewrite the user message), a
 * loop handler **drives generation itself**: it gets the full conversation,
 * builds exactly the chat it wants (real system prompt, its own context window),
 * then calls the user-selected model to produce the reply. This lifts the
 * preprocessor's limits — system prompt, prior-message control, windowing.
 *
 * Per-turn flow:
 *   1. Pull the full history (includes the latest user message).
 *   2. Plan the turn (parse /mj, expand a numbered choice, assemble the system
 *      prompt) — see director/plan.ts.
 *   3. Rebuild the chat: replace the last user message with the processed
 *      action, set the assembled system prompt.
 *   4. Generate with the user's selected model, streaming into a content block.
 *   5. Persist state (one-shot directives consumed).
 */

import {
  Chat,
  type LLM,
  type LLMContextOverflowPolicy,
  type LLMGeneratorHandle,
  type PredictionLoopHandlerController,
} from "@lmstudio/sdk";

/**
 * Whether this token source accepts sampling/overflow config on `respond()`.
 *
 * `ctl.tokenSource()` returns either a real model (`LLM`) or another plugin
 * acting as a generator (`LLMGeneratorHandle`); only the former takes prediction
 * config — a generator owns its own sampling. The SDK exports these as **types
 * only** (no runtime value), so `instanceof` is impossible; we duck-type on
 * `getModelInfo`, which exists on `LLM` but not on a generator handle.
 */
function acceptsSamplingConfig(
  model: LLM | LLMGeneratorHandle,
): model is LLM {
  return typeof (model as LLM).getModelInfo === "function";
}

/**
 * Token/timing stats the SDK fills on a prediction's `result()`. Read defensively
 * (everything optional) so a shape change degrades the debug line, never throws.
 */
interface PredictionStats {
  promptTokensCount?: number;
  predictedTokensCount?: number;
  tokensPerSecond?: number;
  timeToFirstTokenSec?: number;
}

/** Pull `.stats` off a prediction result without assuming its concrete type. */
function statsOf(result: unknown): PredictionStats {
  return ((result as { stats?: PredictionStats } | null | undefined)?.stats ??
    {}) as PredictionStats;
}

/** The prompt-token count this pass consumed — the figure that fills the KV cache. */
function promptTokensOf(stats: PredictionStats): number {
  return typeof stats.promptTokensCount === "number" ? stats.promptTokensCount : 0;
}

/**
 * Format a pass's token/timing stats into one debug line, e.g.
 * `prompt 13842 · output 547 · 38.2 tok/s · TTFT 1.20s`. Each part is emitted
 * only when present, so a partial `stats` still produces a useful line.
 */
function formatPredictionStats(stats: PredictionStats): string {
  const parts: string[] = [];
  if (typeof stats.promptTokensCount === "number")
    parts.push(`prompt ${stats.promptTokensCount}`);
  if (typeof stats.predictedTokensCount === "number")
    parts.push(`output ${stats.predictedTokensCount}`);
  if (typeof stats.tokensPerSecond === "number")
    parts.push(`${stats.tokensPerSecond.toFixed(1)} tok/s`);
  if (typeof stats.timeToFirstTokenSec === "number")
    parts.push(`TTFT ${stats.timeToFirstTokenSec.toFixed(2)}s`);
  return parts.length ? parts.join(" · ") : "(no stats)";
}

/**
 * Sampling knobs shared by the grammar-forced structured passes (referee,
 * accountant, sheet generation). These want *faithful*, near-deterministic JSON,
 * not creative variety — so on top of their low temperature we neutralize every
 * sampler that could fight the grammar or nudge a correct token away:
 *   - `repeatPenalty: false` — a repetition penalty would penalize the repeated
 *     structural tokens (`{ " : ,`) and even a legitimately repeated digit,
 *     quietly corrupting numbers and shapes. This is the important one.
 *   - `topPSampling`/`minPSampling: false` — disable the probability filters so
 *     nothing trims the grammar-valid distribution; low temperature alone drives
 *     the (near-greedy) choice.
 * Without this, these knobs fall through to the user's LM Studio model defaults —
 * which commonly include a ~1.1 repetition penalty that distorts structured output.
 * (`topKSampling` is left untouched: it has no documented disable value and, with
 * the grammar already restricting valid tokens, a benign top-K does no harm.)
 */
const STRICT_SAMPLING = {
  repeatPenalty: false,
  topPSampling: false,
  minPSampling: false,
} as const;

/**
 * Derive a character's starting stats/resources from their card prose via a
 * single **structured** generation (Phase G sheet generation). The model only
 * *proposes*: the output is forced into a schema built from the universe's
 * declared keys (no invented stats) and every value is then clamped to the
 * rules' bounds by {@link clampOverrides} (no out-of-range cheating). Returns
 * `null` — so the caller falls back to the flat defaults — when the token
 * source is a generator handle (structured generation needs a real model) or on
 * any failure. Non-streamed, low temperature; one call, then the caller caches.
 */
async function generateSheet(
  model: LLM | LLMGeneratorHandle,
  rules: RulesDefinition,
  card: SheetGenCard,
  opts: { debug?: (msg: string) => void; transcript?: TranscriptLogger | null },
): Promise<SheetOverrides | null> {
  if (!acceptsSamplingConfig(model)) return null;
  try {
    const schema = buildSheetSchema(rules);
    const { system, user } = buildSheetGenPrompt(rules, card);
    const config = { temperature: 0.3, maxTokens: 300, structured: schema, ...STRICT_SAMPLING };
    const prediction = model.respond(
      Chat.from([
        { role: "system", content: system },
        { role: "user", content: user },
      ]),
      config,
    );
    const result = await prediction.result();
    opts.debug?.(
      `[RolePlayMaster] tokens · SHEET GENERATION (${card.name}): ` +
        formatPredictionStats(statsOf(result)),
    );
    await opts.transcript?.record({
      label: `SHEET GENERATION (${card.name})`,
      kind: "auxiliary",
      purpose:
        `Derives ${card.name}'s starting stats/resources from their character ` +
        `card (the model proposes, the engine then clamps to the rules). One-shot, ` +
        `then cached. Does not narrate.`,
      note: "structured output (schema-constrained)",
      messages: [
        { role: "system", content: system },
        { role: "user", content: user },
      ],
      config,
      response: result.content,
    });
    const parsed =
      (result as { parsed?: unknown }).parsed ?? JSON.parse(result.content);
    const overrides = clampOverrides(rules, parsed);
    opts.debug?.(
      `[RolePlayMaster] sheet generated for "${card.name}": ` +
        `stats ${JSON.stringify(overrides.stats)} ` +
        `resources ${JSON.stringify(overrides.resources)} ` +
        `(proposed ${JSON.stringify(parsed)})`,
    );
    return overrides;
  } catch (err) {
    opts.debug?.(
      `[RolePlayMaster] sheet generation failed for "${card.name}" ` +
        `(falling back to defaults): ${String(err)}`,
    );
    return null;
  }
}
import {
  bigFiveBlurb,
  CharacterCard,
  embedCast,
  loadCharacters,
  selectCast,
} from "./characters/index.js";
import {
  configSchematics,
  globalConfigSchematics,
  FALLBACK_UNIVERSE,
  NEW_STORY_SENTINEL,
  DEFAULT_STORY_SENTINEL,
  DEFAULT_EMBEDDING_SENTINEL,
} from "./config.js";
import { loadTuning } from "./tuning.js";
import {
  authorNote,
  consumeOnce,
  conversationForMemory,
  detectSelection,
  expandSelection,
  parseChoices,
  parseDirectives,
  planTurn,
  windowConversation,
  windowStart,
  type ConvoMessage,
} from "./director/index.js";
import {
  applyAdjudication,
  applyExtraction,
  buildAdjudicationPrompt,
  buildAdjudicationSchema,
  buildExtractionPrompt,
  buildExtractionSchema,
  buildSheetGenPrompt,
  buildSheetSchema,
  clampOverrides,
  initSheet,
  loadRules,
  luckReadout,
  readoutLexicon,
  resolveMove,
  riskPreview,
  rollDie,
  statusBlock,
  statusLine,
  tidyRoster,
  type Adjudication,
  type AdjudicationContext,
  type Resolution,
  type ResolvedEffect,
  type RulesDefinition,
  type SheetGenCard,
  type SheetOverrides,
} from "./rules/index.js";
import {
  buildStoryIntegrationPrompt,
  buildSummaryPrompt,
  countWords,
  embedMemories,
  extendStore,
  reconcile,
  shouldSummarize,
} from "./memory/index.js";
import {
  activePairs,
  advanceFamiliarity,
  applyPsycheExtraction,
  applyRelationshipExtraction,
  buildRelationshipPrompt,
  buildRelationshipSchema,
  dispositionWord,
  getRelationship,
  playerPairKey,
} from "./relationships/index.js";
import {
  addressedNpcs,
  applyStance,
  buildStancePrompt,
  buildStanceSchema,
  prunePsyche,
  reconcileStancesWithRoll,
  type Stance,
  type StanceNpc,
} from "./psyche/index.js";
import {
  applyRevelation,
  buildRevelationPrompt,
  buildRevelationSchema,
} from "./knowledge/index.js";
import { markDisclosed } from "./disclosure/index.js";
import {
  actEligibleIds,
  advanceActOnly,
  applyChronicle,
  applyConductor,
  buildChroniclePrompt,
  buildChronicleSchema,
  buildConductorPrompt,
  buildConductorSchema,
  DEFAULT_ARC,
  initPacing,
  loadArc,
  nextAct,
  resolveAct,
  type ConductorContext,
  type DormantActor,
  type LockedFact,
} from "./arc/index.js";
import {
  advanceClock,
  ELAPSED_BUCKETS,
  loadChronosModel,
  resolvePhase,
  resolveWeather,
  type ChronosModel,
  type ChronosSignal,
  type ElapsedBucket,
} from "./chronos/index.js";
import { buildTitlePrompt, cleanTitle } from "./naming/index.js";
import {
  TranscriptLogger,
  deleteTranscript,
  type TranscriptMessage,
} from "./logging/index.js";
import { buildSetupReport, buildTitleCard } from "./onboarding/index.js";
import { createState, loadState, saveState, type Directive, type PendingChoice } from "./state/index.js";
import {
  DEFAULT_NARRATION,
  embedLore,
  loadLore,
  loadWorldDef,
  loreFromCharacterBooks,
} from "./world/index.js";
import { cosineSimilarity } from "./shared/vector.js";

/**
 * Shared tail of every non-streamed, schema-forced structured pass (referee,
 * accountant, relationship, revelation, conductor, chronicler): log the pass's
 * token stats, append the exchange to the opt-in transcript, and return the
 * parsed object (the model's `parsed` if present, else JSON of its raw content).
 * Keeps each pass down to "build prompt → respond → recordStructured → apply",
 * with the token/transcript/parse plumbing written once. The token + transcript
 * work is debug-only side-effect; the return value is the pass's actual result.
 */
async function recordStructured(
  result: { content: string },
  exchange: {
    label: string;
    purpose: string;
    system: string;
    user: string;
    config: unknown;
    note?: string;
  },
  hooks: {
    recordTokens: (label: string, result: unknown) => void;
    transcript: TranscriptLogger | null;
  },
): Promise<unknown> {
  hooks.recordTokens(exchange.label, result);
  await hooks.transcript?.record({
    label: exchange.label,
    kind: "auxiliary",
    purpose: exchange.purpose,
    note: exchange.note ?? "structured output (schema-constrained)",
    messages: [
      { role: "system", content: exchange.system },
      { role: "user", content: exchange.user },
    ],
    config: exchange.config,
    response: result.content,
  });
  return (result as { parsed?: unknown }).parsed ?? JSON.parse(result.content);
}

/** Text of the most recent message with the given role, or null. */
function lastTextByRole(chat: Chat, role: string): string | null {
  const messages = chat.getMessagesArray();
  for (let i = messages.length - 1; i >= 0; i--) {
    if (messages[i].getRole() === role) return messages[i].getText();
  }
  return null;
}

/**
 * The text of the last `n` messages with this role, oldest-first, trimmed and
 * length-capped. Used to give the conductor the player's own recent actions —
 * the evidence its player-behaviour advance conditions are written about.
 */
function recentTextsByRole(chat: Chat, role: string, n: number): string[] {
  const out: string[] = [];
  const messages = chat.getMessagesArray();
  for (let i = messages.length - 1; i >= 0 && out.length < Math.max(0, n); i--) {
    if (messages[i].getRole() !== role) continue;
    const t = messages[i].getText().trim().replace(/\s+/g, " ");
    if (t) out.push(t.slice(0, 240));
  }
  return out.reverse();
}

/** Flatten a Chat into the {role, content} pairs the transcript logger records. */
function chatMessages(chat: Chat): TranscriptMessage[] {
  return chat
    .getMessagesArray()
    .map((m) => ({ role: m.getRole(), content: m.getText() }));
}

/** Join the text of the last `depth` messages, for lore keyword scanning. */
function recentMessagesText(chat: Chat, depth: number): string {
  const messages = chat.getMessagesArray();
  const n = Math.max(1, Math.floor(depth));
  return messages
    .slice(Math.max(0, messages.length - n))
    .map((m) => m.getText())
    .join("\n");
}

/**
 * Strip the XML-ish tags some models hallucinate around our instructions — e.g.
 * `<ignore_options>…</ignore_options>` wrapping the "you may ignore these options"
 * line — plus stray harmony channel markers. Only the tags are removed; the inner
 * text is kept. Applied to BOTH the displayed block and the stored reply: leaving
 * a tag in history is self-reinforcing — the model sees its own `<ignore_options>`
 * next turn and imitates it, so the leak compounds until we clean the record too.
 */
function sanitizeNarration(text: string): string {
  return text
    .replace(/<\/?[a-z][a-z0-9_-]*\s*\/?>/gi, "") // <ignore_options>, </options>, <foo/>
    .replace(/<\|?channel\|?>/gi, "") // harmony <|channel> / <channel|>
    .replace(/[ \t]+\n/g, "\n") // trailing spaces left by a removed tag
    .replace(/\n{3,}/g, "\n\n"); // collapse blank-line runs
}

export async function predict(
  ctl: PredictionLoopHandlerController,
): Promise<void> {
  const status = ctl.createStatus({
    status: "loading",
    text: "Assembling role-play context…",
  });
  // The turn drives several hidden, NON-streamed LLM passes (referee, accountant,
  // revelation, pacing conductor, chapter close, naming) whose only visible sign
  // is this status line. Without an update each one looks like the plugin froze —
  // the status sits on the previous "done" while the model is in fact still
  // thinking. `working()` re-arms the spinner with what's running right now;
  // `ready()` clears it. Call `working()` before every silent pass so the player
  // always sees the model is busy, never a stalled UI.
  const working = (text: string): void => status.setState({ status: "loading", text });
  const ready = (text: string): void => status.setState({ status: "done", text });

  const config = ctl.getPluginConfig(configSchematics);
  const globalConfig = ctl.getGlobalPluginConfig(globalConfigSchematics);

  // Advanced knobs not shown in the UI: defaults, optionally overridden by a
  // roleplay.config.json next to the plugin (read fresh each turn, no rebuild).
  const tuning = await loadTuning();

  const universe = config.get("activeUniverse") || FALLBACK_UNIVERSE;
  // A universe is the content base; the save slot picks which playthrough of it
  // to load/store, so several independent stories can share the same world. The
  // "Story" dropdown holds the chosen save (empty = default); selecting "＋ New
  // story" defers to the free-text `newStoryName` field instead.
  const pickedStory = config.get("storyName");
  const save = (
    pickedStory === NEW_STORY_SENTINEL
      ? config.get("newStoryName")
      : pickedStory === DEFAULT_STORY_SENTINEL
        ? ""
        : pickedStory
  ).trim();
  const storeOptions = {
    savesDir: tuning.savesDir,
    save,
  };
  const loadedState = await loadState(universe, storeOptions);

  // Full LLM transcript (opt-in): a per-save file recording every exchange with
  // the model this turn — what it receives and its raw reply. Best-effort; null
  // when the toggle is off, in which case every `transcript?.record(...)` is a
  // no-op. The displayed turn is the one we're about to play (state.turn + 1).
  const transcript = config.get("transcriptLogging")
    ? new TranscriptLogger(universe, loadedState.turn + 1, storeOptions)
    : null;

  // World identity lives in the universe package (universes/<u>/world.json),
  // loaded fresh each turn like lore/characters — NOT re-typed in per-chat
  // config. The file is the source of truth; fall back to the persisted state
  // (then the schema default) so an empty/missing file never blanks the world.
  const worldDef = await loadWorldDef(universe, { universesDir: tuning.universesDir });
  const state = {
    ...loadedState,
    world: {
      name: worldDef.name || loadedState.world.name,
      setting: worldDef.setting || loadedState.world.setting,
    },
  };
  // Psyche decay (Phase J follow-up): the fluctuating state layer is "current" —
  // drop any mood the social pass hasn't refreshed within the decay window (using
  // the turn we're about to play, `state.turn + 1`), BEFORE anything reads it
  // (the stance roster, the narrator's cast block) or the social pass folds new
  // moods in. So an off-scene NPC returns to their stable persona, not a stale
  // grudge; persists because the pruned `state.psyche` flows into the save.
  state.psyche = prunePsyche(state.psyche, state.turn + 1, tuning.psycheDecayTurns);
  const narrationStyle = worldDef.narration || DEFAULT_NARRATION;

  // Character cards (player persona + NPCs) live in the package, loaded fresh
  // each turn — never duplicated into the universe state. I/O stays out of planTurn.
  const characters = await loadCharacters(universe, {
    universesDir: tuning.universesDir,
  });

  // World lore also lives in the package, read fresh each turn (never in state):
  // universes/<u>/lore.json plus the character_book embedded in cards.
  const fileLore = await loadLore(universe, {
    universesDir: tuning.universesDir,
  });
  const cards = [characters.player, ...characters.cast].filter(
    (c): c is CharacterCard => c !== null,
  );
  const cardLore = loreFromCharacterBooks(cards.map((c) => c.character_book));
  const lore = [...fileLore, ...cardLore];

  // Structured mechanics (Phase G): the universe's optional rules.json. Null
  // when the file is absent/invalid or the player turned mechanics off — then
  // nothing below changes behaviour (the assembler omits every mechanics block).
  const rulesDef = config.get("mechanicsEnabled")
    ? await loadRules(universe, { universesDir: tuning.universesDir })
    : null;

  // Dramatic arc (Phase I): the universe's optional arc.json — the authored
  // act structure that gates secrets by act and paces the drama. When the
  // feature is on we fall back to the built-in generic arc if the universe ships
  // none (it still paces; it imposes no secret suppression). Null = feature off,
  // and every arc effect below is a no-op (byte-identical to before).
  const arc = config.get("dramaticArc")
    ? ((await loadArc(universe, { universesDir: tuning.universesDir })) ?? DEFAULT_ARC)
    : null;

  // World clock (the chronos subsystem): the universe's authored clock/climate
  // model from world.json (the built-in default when none is authored). Loaded
  // like the arc — content, never persisted. Null = the feature is off, and every
  // chronos effect below is a no-op (the `# Time & weather` block is omitted and
  // the clock never advances, so the turn is byte-identical).
  const chronosModel: ChronosModel | null = config.get("timeWeather")
    ? await loadChronosModel(universe, { universesDir: tuning.universesDir })
    : null;

  // 1. Full history (mutable copy), including the latest user message.
  const chat = await ctl.pullHistory();
  const lastUserText = lastTextByRole(chat, "user") ?? "";
  const prevAssistantText = lastTextByRole(chat, "assistant");
  const recentText = recentMessagesText(chat, tuning.loreScanDepth);

  // `/mj restart` — wipe THIS playthrough and END the turn before any generation.
  // We persist a brand-new state over the current save (rolling memory, long-term
  // recall, character sheets, dramatic arc/pacing, tone & `/mj` directives, the
  // turn counter — all reset) and return, so none of the costly passes below run
  // and the end-of-turn `saveState` (which would re-persist the OLD state) is
  // never reached. We also delete this save's raw transcript log so its old turns
  // don't linger under the fresh story. The visible conversation belongs to LM
  // Studio and cannot be cleared from a loop handler, so we tell the player to
  // open a new chat for a truly blank slate; otherwise the messages still on
  // screen re-seed memory as play continues from here.
  if (parseDirectives(lastUserText).restart) {
    await saveState(createState(universe), storeOptions);
    await deleteTranscript(universe, storeOptions);
    const where = save ? `the story "${save}"` : "the default story";
    ctl
      .createContentBlock()
      .appendText(
        `⚠️ **Playthrough reset.** Progress for ${where} in "${universe}" has been ` +
          `wiped — rolling memory, long-term recall, character sheets, the dramatic ` +
          `arc, your tone and directives, and the turn counter all start fresh.\n\n` +
          `The messages already on screen stay (LM Studio owns the chat window): ` +
          `**open a new chat** for a completely blank slate, or just keep playing — ` +
          `the story picks up clean from here.`,
      );
    ready("Playthrough reset");
    return;
  }

  // HARD STOP after game-over: the player's run already ended on a previous turn
  // (the death scene + `# Conclusion` were narrated then). A counted-life RPG must
  // make death final, so we do NOT generate fresh death vignettes turn after turn,
  // and — crucially — we skip every costly pass below AND the end-of-turn
  // `saveState`, so the dead state is left exactly as it was (no stray accountant
  // cost, no advancing turn counter). The only ways forward are a new playthrough.
  // Checks the LOADED `state.gameOver`, so the turn the player actually dies (where
  // it flips true mid-turn) still narrates its ending — only later inputs hard-stop.
  if (state.gameOver) {
    const where = save ? `"${save}"` : "this story";
    ctl
      .createContentBlock()
      .appendText(
        `🪦 **The story has ended.** Your character has fallen, and in this world ` +
          `death is final — ${where} in "${universe}" is over.\n\n` +
          `To play again: **open a new chat**, or type \`/mj restart\` to wipe this ` +
          `playthrough and begin a fresh story in the same world.`,
      );
    ready("The story has ended");
    return;
  }

  // The token source (real model or another plugin's generator handle). Resolved
  // up front because sheet generation (below) and the onboarding check both need
  // it; generation reuses it too.
  const model = await ctl.tokenSource();

  // Resolve the player's chosen move (Phase G) BEFORE planning, so the updated
  // sheet and the roll outcome feed the system prompt. Dice are rolled HERE (the
  // impure boundary — the engine math is pure and dice-injected). The player
  // sheet is seeded from the rules exactly once. When the player picked a
  // numbered option carrying a move (declared in last turn's hidden trailer), we
  // roll 2d6+stat, apply the matching tier's deltas to the right targets, and
  // carry the new sheets / defeats / game-over forward.
  let sheet = state.sheet;
  let combatants = state.combatants;
  let npcSheets = state.npcSheets;
  // Annotated `boolean` (not the narrowed literal `false`): the hard-stop above
  // returns on `state.gameOver === true`, so control-flow analysis narrows
  // `state.gameOver` to `false` here — without the annotation `gameOver` would be
  // typed `false` and the later `gameOver = resolution.gameOver` (a boolean) fails.
  let gameOver: boolean = state.gameOver;
  let resolution: Resolution | null = null;
  // Whether the player's input selected a numbered option offered last turn (a
  // pre-vetted pick, plain or with a move). When false the action is free-form,
  // which is the only case the referee (Phase G3) adjudicates below.
  let pickResolved = false;
  if (rulesDef) {
    if (!sheet.initialized) {
      // Seed the player sheet once. With sheet generation on and a player card,
      // derive starting values from the card prose (model proposes → engine
      // clamps); otherwise fall back to the rules' flat defaults.
      let overrides: SheetOverrides | undefined;
      if (config.get("generateSheets") && characters.player) {
        working("Rolling up your character sheet…");
        const gen = await generateSheet(model, rulesDef, characters.player, {
          debug: config.get("debugLogging") ? (m) => ctl.debug(m) : undefined,
          transcript,
        });
        overrides = gen ?? undefined;
      }
      sheet = initSheet(rulesDef, overrides);
    }
    // Heal any roster the model inflated in earlier turns (dedupe by label +
    // cap) before resolving / rendering, so an existing save self-cleans.
    combatants = tidyRoster(combatants, rulesDef, tuning.maxCombatants);
    if (!gameOver) {
      const selection = detectSelection(lastUserText);
      const chosen =
        selection !== null
          ? state.pendingChoices.find((c) => c.index === selection)
          : undefined;
      // A matched option (plain OR with a move) is a pre-vetted pick — the
      // referee must not double-adjudicate it (decision 1).
      pickResolved = chosen !== undefined;
      if (chosen?.move) {
        const rng = () => Math.random();
        const dice = { d1: rollDie(6, rng), d2: rollDie(6, rng) };
        resolution = resolveMove(
          chosen.move,
          { player: sheet, combatants },
          dice,
          rng,
          rulesDef,
          { partial: tuning.pbtaPartial, full: tuning.pbtaFull },
        );
        sheet = resolution.actors.player;
        combatants = resolution.actors.combatants;
        gameOver = resolution.gameOver;
      }
    }
  }

  // 0b. ADJUDICATE FREE-FORM (Phase G3 — the referee): on a typed (free-form)
  //     action only, rule on whether the player's character may attempt it
  //     BEFORE the scene is narrated, so free text gets the same mechanical
  //     vetting a numbered pick already does. Runs only when mechanics are on
  //     (rules.json), adjudication is enabled, the run isn't over, the player
  //     didn't pick an offered option, and we have a real `LLM` token source
  //     (structured output, like the accountant / sheet generation). A
  //     `resisted`/`claim` verdict becomes the `# Adjudication` block; `roll`
  //     and dice are wired in Lot 2; `allowed` interferes with nothing. Failure
  //     is swallowed — the turn still narrates (the next # Status re-anchors).
  let adjudication: Adjudication | null = null;
  // The referee's consequence forecast: which tracked resources this free-form
  // action puts in play (Phase G3 step 2), forwarded to the narrator as the
  // `# Consequences` block so the world's effect isn't silently forgotten.
  let adjudicationAffects: string[] = [];
  // On a free-form `roll`, the referee's framing of the attempt — appended to
  // the `# Action resolution` block so the dice outcome reads with its stakes.
  let adjudicationNote = "";
  // The character the roll was contested against (the referee's raw `opposedBy`,
  // e.g. "aria") — kept even when the engine drops it for lack of a spawned
  // combatant, so the social pass can reconcile its stance with the dice (a
  // full-success contest must not be vetoed by that character's stance).
  let rollOpposedBy = "";
  // Whether this turn's free-form roll carries PERIL (Phase G3): the engine put
  // real vital damage on its miss/partial tiers because the action courts injury
  // or death. Used by the will-gate below: a perilous physical contest must STAND
  // against a hostile NPC's will — the danger to the player's own body is not the
  // NPC's to refuse away (without this, charging a hostile foe would be voided and
  // the player spared the very risk they took).
  let rollHasPeril = false;

  // Per-turn token accounting for the debug log. Each model pass (referee, main
  // narration, accountant, title, summary) reports its prompt/output token count
  // via `recordTokens`; at the end of the turn we print the largest prompt — the
  // figure that must fit the context window — so the user can size it from real
  // data instead of guessing. Gated on `debugLogging`: zero cost when off.
  const passPromptTokens: { label: string; prompt: number }[] = [];
  const recordTokens = (label: string, result: unknown): void => {
    if (!config.get("debugLogging")) return;
    const stats = statsOf(result);
    passPromptTokens.push({ label, prompt: promptTokensOf(stats) });
    ctl.debug(`[RolePlayMaster] tokens · ${label}: ${formatPredictionStats(stats)}`);
  };

  if (
    rulesDef &&
    config.get("adjudicationEnabled") &&
    acceptsSamplingConfig(model) &&
    !gameOver &&
    !pickResolved
  ) {
    // Strip /mj directives so the referee judges the action, not a steer.
    const declaredAction = parseDirectives(lastUserText).cleanedText.trim();
    if (declaredAction) {
      try {
        const schema = buildAdjudicationSchema(rulesDef);
        const playerCard = characters.player;
        const cardProse = playerCard
          ? [
              `Name: ${playerCard.name}`,
              playerCard.description.trim(),
              playerCard.personality.trim()
                ? `Personality: ${playerCard.personality.trim()}`
                : "",
            ]
              .filter(Boolean)
              .join("\n")
          : "(no character card)";
        // The referee's SOURCES OF TRUTH (Phase G3.2): the same canon the
        // narrator sees, so a `claim` ruling is grounded in everything that
        // counts as established — never in the player's own words.
        //   - the written lore (the full canon, not the narrator's budgeted
        //     selection: the referee verifies existence, so it wants all of it);
        //   - the GM's PRIOR narration recalled from the per-save store, filtered
        //     to "Narrator:" lines so the player's past assertions can never be
        //     mistaken for canon, and bounded to the most recent so the gate
        //     stays cheap on the critical path.
        const worldLore = lore
          .map((e) => e.content.trim())
          .filter(Boolean)
          .join("\n\n");
        let pastEvents = "";
        const narrated = state.memory.store
          .map((c) => c.text.trim())
          .filter((t) => /^Narrator:/i.test(t));
        for (let i = narrated.length - 1; i >= 0; i--) {
          const next = pastEvents ? `${narrated[i]}\n\n${pastEvents}` : narrated[i];
          if (next.length > 4000) break; // recency-first budget for the gate
          pastEvents = next;
        }
        const ctx: AdjudicationContext = {
          worldTone: [
            state.world.name && state.world.name !== "Untitled"
              ? `Name: ${state.world.name}`
              : "",
            state.world.setting.trim(),
            narrationStyle.trim() ? `Narration style: ${narrationStyle.trim()}` : "",
          ]
            .filter(Boolean)
            .join("\n"),
          worldLore,
          playerCharacter: [cardProse, statusBlock(sheet, combatants, rulesDef) ?? ""]
            .filter(Boolean)
            .join("\n\n"),
          // Durable context beyond the immediate scene, so an ambiguous action
          // is judged against what has already been established (Phase G3.1).
          // Two-tier memory: the whole-story summary plus the current chapter.
          storySummary: [state.memory.storySummary, state.memory.summary]
            .filter((s) => s.trim())
            .join("\n\n"),
          pastEvents,
          currentScene: prevAssistantText ?? "",
          declaredAction,
        };
        const { system, user } = buildAdjudicationPrompt(rulesDef, ctx);
        const adjConfig = {
          temperature: tuning.adjudicationTemperature,
          maxTokens: Math.max(1, Math.floor(tuning.adjudicationMaxTokens)),
          structured: schema,
          ...STRICT_SAMPLING,
        };
        const adjChat = Chat.from([
          { role: "system", content: system },
          { role: "user", content: user },
        ]);
        working("Refereeing your action…");
        const adjPrediction = model.respond(adjChat, adjConfig);
        const adjResult = await adjPrediction.result();
        const parsed = await recordStructured(
          adjResult,
          {
            label: "ADJUDICATION",
            purpose:
              "An impartial referee rules on whether the player's free-form action " +
              "is something their character can attempt in this world — BEFORE the " +
              "narration is written. Its [user] block is the world tone, the player " +
              "card + sheet, the recent scene, and the declared action; by design " +
              "NOT the game's prompt. Low temperature, structured output; does not " +
              "narrate. Closes the 'I do the impossible → it works' exploit.",
            system,
            user,
            config: adjConfig,
          },
          { recordTokens, transcript },
        );
        const dispatch = applyAdjudication(parsed, ctx, rulesDef);
        adjudication = dispatch.adjudication;
        adjudicationAffects = dispatch.affects;
        // A `roll` verdict feeds the SAME engine a numbered risky pick does: roll
        // 2d6+stat now (dice are the impure boundary), apply the tier's deltas
        // (empty here — the accountant captures the fallout from the narration),
        // and carry the resolution into the prompt as the `# Action resolution`
        // block. Free-form actions thus finally carry mechanical consequence.
        if (dispatch.move) {
          const rng = () => Math.random();
          const dice = { d1: rollDie(6, rng), d2: rollDie(6, rng) };
          resolution = resolveMove(
            dispatch.move,
            { player: sheet, combatants },
            dice,
            rng,
            rulesDef,
            { partial: tuning.pbtaPartial, full: tuning.pbtaFull },
          );
          sheet = resolution.actors.player;
          combatants = resolution.actors.combatants;
          gameOver = resolution.gameOver;
          // Remember who the contest was against, even when the engine cleared
          // the opposition (no spawned combatant for a social foe) — the stance
          // pass reconciles against this so a won contest isn't vetoed.
          rollOpposedBy = dispatch.move.opposedBy ?? "";
          // The engine already baked any peril damage into the move's miss/partial
          // tiers (so a failed dangerous roll wounds, a mortal one can kill); flag
          // it so the will-gate keeps this physical contest standing vs a hostile NPC.
          rollHasPeril = dispatch.peril !== "none";
          // Surface the referee's framing of the attempt to the narrator.
          adjudicationNote = dispatch.reason;
        }
        if (config.get("debugLogging")) {
          ctl.debug(
            `[RolePlayMaster] adjudication: ` +
              (adjudication
                ? `${adjudication.kind} — "${adjudication.reason}"`
                : dispatch.move
                  ? `roll (${dispatch.move.stat || "—"}/${dispatch.move.difficulty || "—"}` +
                    `${dispatch.peril !== "none" ? `, peril:${dispatch.peril}` : ""}) → ${resolution?.tier}` +
                    `${resolution?.gameOver ? " [GAME OVER]" : ""}`
                  : "allowed") +
              (adjudicationAffects.length > 0 ? ` [affects: ${adjudicationAffects.join(", ")}]` : "") +
              `\n--- action ---\n${declaredAction}\n--- parsed ---\n${JSON.stringify(parsed)}`,
          );
        }
      } catch (err) {
        if (config.get("debugLogging")) {
          ctl.debug(
            `[RolePlayMaster] adjudication failed (action narrated as-is): ${String(err)}`,
          );
        }
      }
    }
  }

  // Snapshot the conversation (user/assistant turns only) BEFORE `chat` is
  // mutated below. This is what long-term memory digests at the end of the turn,
  // with the just-generated reply appended once it exists. Kept structured so
  // `conversationForMemory` can clean it (strip the game master's option menus,
  // expand the player's bare picks) before the summary/store see it.
  const memoryMessages: ConvoMessage[] = chat
    .getMessagesArray()
    .filter((m) => m.getRole() === "user" || m.getRole() === "assistant")
    .map((m) => ({
      role: m.getRole() as "user" | "assistant",
      content: m.getText(),
    }));

  // 1b. Semantic matching: embed the recent text + lore entries + NPC cards so
  //     meaning (not surface form) can trigger them — e.g. a French line
  //     activating an English entry, or pulling an NPC into scene by topic.
  //     Cached + fully optional: degrades to keyword/name-only if no embedding
  //     model is available. The recent-text query is embedded once and shared
  //     between the lore and cast passes.
  const npcActivation = tuning.npcActivation;
  const scenePresenceOn = tuning.scenePresence;
  const semanticEnabled = config.get("semanticMatching");
  const loreSemanticOn = semanticEnabled && lore.length > 0;
  const castSemanticOn =
    semanticEnabled && npcActivation && characters.cast.length > 0;
  // Vector-RAG recall (Phase F) is its own switch — it can run even when lore/NPC
  // semantic matching is off. It only has something to do once the store holds
  // archived messages (it builds up over a session).
  const ragEnabled = config.get("ragEnabled");
  const ragOn = ragEnabled && state.memory.store.length > 0;
  // The dropdown's "Default" entry uses a sentinel (the SDK rejects an empty
  // option value); map it back to "" — embed.ts treats "" as "use the default
  // loaded embedding model".
  const embeddingModelRaw = globalConfig.get("embeddingModel");
  const embeddingModel =
    embeddingModelRaw === DEFAULT_EMBEDDING_SENTINEL ? "" : embeddingModelRaw;

  let queryEmbedding: number[] | null = null;
  let entryEmbeddings: (number[] | null)[] = [];
  let castEmbeddings: (number[] | null)[] = [];
  let memoryEmbeddings: (number[] | null)[] = [];
  let embedError: string | null = null;
  // Track whether the recent-text query vector has been produced yet, so each
  // pass embeds it only once and the later passes reuse it.
  let queryEmbedded = false;

  if (loreSemanticOn) {
    const r = await embedLore(ctl.client, embeddingModel, lore, recentText);
    queryEmbedding = r.queryEmbedding;
    entryEmbeddings = r.entryEmbeddings;
    embedError = r.error;
    queryEmbedded = true;
  }
  if (castSemanticOn) {
    // Reuse the query vector when a prior pass produced it; otherwise embedCast
    // embeds the recent text itself.
    const r = await embedCast(
      ctl.client,
      embeddingModel,
      characters.cast,
      recentText,
      queryEmbedded ? queryEmbedding : undefined,
    );
    castEmbeddings = r.castEmbeddings;
    if (!queryEmbedded) queryEmbedding = r.queryEmbedding;
    if (!embedError) embedError = r.error;
    queryEmbedded = true;
  }
  if (ragOn) {
    // Recall over the per-save store; reuse the shared query vector when ready.
    const r = await embedMemories(
      ctl.client,
      embeddingModel,
      state.memory.store.map((c) => c.text),
      recentText,
      queryEmbedded ? queryEmbedding : undefined,
    );
    memoryEmbeddings = r.chunkEmbeddings;
    if (!queryEmbedded) queryEmbedding = r.queryEmbedding;
    if (!embedError) embedError = r.error;
    queryEmbedded = true;
  }
  const semanticOn = loreSemanticOn || castSemanticOn || ragOn;

  // The conversation as plain turns, and the raw texts that will REMAIN in the
  // sent window after pruning (see director/window.ts). Computed before planning
  // so RAG recall can exclude anything still in the window — recall is for events
  // that have scrolled out, not a duplicate of what the model already sees. The
  // current action is substituted into the last user message later (section 3).
  // Same snapshot as `memoryMessages` (chat is not mutated between them); copy
  // it so the last user message can be swapped for the processed action below
  // without touching `memoryMessages`, which long-term memory needs verbatim.
  const convo: ConvoMessage[] = memoryMessages.map((m) => ({ ...m }));
  const windowTexts = convo
    .slice(
      windowStart(convo.length, {
        summarizedCount: state.memory.summarizedMessageCount,
        bridge: tuning.contextBridge,
        prune: tuning.pruneSummarized,
      }),
    )
    .map((m) => m.content);

  // 1c. READ THE ROOM (Phase J — the social referee): before the scene is
  //     narrated, rule on how each present NPC WILLS to respond to the player's
  //     action, so the agreeable model cannot voice everyone as endlessly
  //     compliant. Batched: ONE structured call over the engaged cast
  //     (`stances[]`), independent of cast size — the relationship-pass model, not
  //     one call per NPC. Runs when volition is on, we have a real `LLM`
  //     (structured output), the run isn't over, the action is free-form (a bare
  //     numbered pick is judged by its own machinery), and — the tighter trigger
  //     gate — at least one active NPC is actually ENGAGED by the action: present
  //     on stage or named in it (`addressedNpcs`). A character merely activated by
  //     theme but off-stage is skipped, so the pass no longer fires (and costs a
  //     call) on someone the narration has no one to honour. Independent of
  //     `rules.json` — volition is not a mechanics feature. Only the RESISTING
  //     stances reach the prompt; failure is swallowed (the scene still narrates,
  //     and next turn re-anchors).
  let stances: Stance[] = [];
  // The active cast for this turn — the SAME pure selection planTurn makes below
  // (identical inputs → identical set), computed here so the referee's roster is
  // exactly what the narrator will see.
  let socialCast = characters.cast;
  if (npcActivation && characters.cast.length > 0) {
    const castSemantic = queryEmbedding
      ? {
          queryEmbedding,
          castEmbeddings,
          threshold: tuning.npcSemanticThreshold,
          topK: tuning.npcSemanticTopK,
        }
      : undefined;
    socialCast = selectCast(characters.cast, recentText, { semantic: castSemantic });
  }
  // The player's action for the social pass. A bare numbered pick is expanded to
  // the option's text so the referee judges the real action ("negotiate the price"),
  // not "2".
  let socialAction = parseDirectives(lastUserText).cleanedText.trim();
  const socialSelection = detectSelection(socialAction);
  if (socialSelection !== null && prevAssistantText) {
    const expanded = expandSelection(socialSelection, parseChoices(prevAssistantText));
    if (expanded) socialAction = expanded;
  }
  // Did a dice roll resolve the action this turn (a risky pick, or a free-form
  // `roll`)? That is the case where the dice can promise a success the NPC's will
  // would refuse — the will-gate below converts such a roll into an honest refusal.
  const rolledThisTurn = resolution !== null && !resolution.noRoll;
  const stanceEligible =
    config.get("volitionEnabled") &&
    acceptsSamplingConfig(model) &&
    !gameOver &&
    socialAction !== "" &&
    // Free-form actions always (the brake on the agreeable model). A numbered pick
    // ONLY when it rolled — a plain/cost pick changes nothing by dice, so it carries
    // no success≠outcome risk and needs no will check (keeps the extra call rare).
    (socialSelection === null || rolledThisTurn);
  // The trigger gate: of the active cast, only those the action actually engages
  // (on stage in the prior narration, or named in the action). Empty → skip.
  const addressedCast = stanceEligible
    ? addressedNpcs(socialCast, prevAssistantText ?? "", socialAction)
    : [];
  if (stanceEligible && addressedCast.length > 0) {
    try {
      // Pre-render the three layers into strings (keeps `psyche/` decoupled from
      // `characters`/`state`): persona (personality + Big Five + agenda), the
      // standing toward the player, and the current mood (empty until J4 fills it).
      const npcs: StanceNpc[] = addressedCast.map((card) => {
        const profileParts: string[] = [];
        if (card.personality.trim()) profileParts.push(card.personality.trim());
        const traits = bigFiveBlurb(card.bigFive);
        if (traits) profileParts.push(`Traits: ${traits}`);
        if (card.desires.trim()) profileParts.push(`Wants: ${card.desires.trim()}`);
        if (card.needs.trim()) profileParts.push(`Needs: ${card.needs.trim()}`);
        if (card.boundaries.trim()) profileParts.push(`Will not: ${card.boundaries.trim()}`);
        const rel = getRelationship(state.relationships, playerPairKey(card.name));
        const standing = `${rel.familiarity}; ${dispositionWord(rel.disposition)}`;
        const ps = state.psyche[card.name.trim().toLowerCase()];
        const mood = ps
          ? [ps.mood, ps.intent, ps.goalFocus ? `pursuing: ${ps.goalFocus}` : ""]
              .filter((s) => s.trim())
              .join("; ")
          : "";
        return { name: card.name, profile: profileParts.join(". "), standing, mood };
      });
      const npcNames = npcs.map((n) => n.name);
      const schema = buildStanceSchema(npcNames);
      const worldTone = [
        state.world.name && state.world.name !== "Untitled" ? `Name: ${state.world.name}` : "",
        state.world.setting.trim(),
        narrationStyle.trim() ? `Narration style: ${narrationStyle.trim()}` : "",
      ]
        .filter(Boolean)
        .join("\n");
      const { system, user } = buildStancePrompt({
        worldTone,
        scene: prevAssistantText ?? "",
        declaredAction: socialAction,
        npcs,
      });
      const stanceConfig = {
        temperature: tuning.volitionTemperature,
        maxTokens: Math.max(1, Math.floor(tuning.volitionMaxTokens)),
        structured: schema,
        ...STRICT_SAMPLING,
      };
      const stanceChat = Chat.from([
        { role: "system", content: system },
        { role: "user", content: user },
      ]);
      working("Reading the room…");
      const stancePrediction = model.respond(stanceChat, stanceConfig);
      const stanceRes = await stancePrediction.result();
      const parsed = await recordStructured(
        stanceRes,
        {
          label: "STANCE",
          purpose:
            "The social referee (Phase J): rules on how each present NPC is " +
            "disposed to respond to the player's action — comply / hesitant / " +
            "negotiate / deflect / refuse / hostile / withdraw — AND whether they " +
            "make an unprompted move of their own this turn (initiative: none / " +
            "interject / pursue / leave), from their own personality (Big Five), " +
            "standing, and mood, BEFORE the scene is narrated. Binds the narrator " +
            "so an NPC can refuse or act on their own. Does not narrate.",
          system,
          user,
          config: stanceConfig,
        },
        { recordTokens, transcript },
      );
      stances = applyStance(parsed, npcNames);
      if (config.get("debugLogging")) {
        const resisting = stances.filter((s) => s.verdict !== "comply");
        const acting = stances.filter((s) => s.initiative !== "none");
        ctl.debug(
          `[RolePlayMaster] stance: ${resisting.length}/${stances.length} resisting` +
            (resisting.length > 0
              ? ` — ${resisting.map((s) => `${s.npc}:${s.verdict}`).join(", ")}`
              : "") +
            ` | initiative: ${acting.length}/${stances.length} acting` +
            (acting.length > 0
              ? ` — ${acting.map((s) => `${s.npc}:${s.initiative}`).join(", ")}`
              : "") +
            `\n--- action ---\n${socialAction}\n--- parsed ---\n${JSON.stringify(parsed)}`,
        );
      }
    } catch (err) {
      if (config.get("debugLogging")) {
        ctl.debug(`[RolePlayMaster] stance pass failed (scene narrated as-is): ${String(err)}`);
      }
    }
  } else if (stanceEligible && socialCast.length > 0 && config.get("debugLogging")) {
    // The trigger gate skipped it: the cast was activated (by theme/fallback) but
    // no one is on stage or named in the action — nothing for a stance to bind.
    ctl.debug(
      `[RolePlayMaster] stance skipped — no engaged NPC (activated but off-stage): ` +
        socialCast.map((c) => c.name).join(", "),
    );
  }

  // 1d. RECONCILE the social referee with the dice (Phase J ⊕ G3). The root cause
  //     of "I rolled a success but she refused anyway": the dice decide WHETHER a
  //     social action lands, but nothing checked the NPC's WILL before offering the
  //     roll — so the engine promised a success the character would never grant, and
  //     the narrator (rightly) refused, contradicting the dice. We bind them here.
  //
  //     The NPC the roll contests: the referee's `opposedBy` if named, else the sole
  //     engaged NPC (a one-on-one social scene — the reported case). An ambiguous
  //     multi-NPC roll has no single target and is left to stand.
  if (resolution && !resolution.noRoll && stances.length > 0) {
    const target =
      (rollOpposedBy.trim() &&
        addressedCast.find((c) =>
          [c.name, ...(c.aliases ?? [])].some(
            (n) => n.trim().toLowerCase() === rollOpposedBy.trim().toLowerCase(),
          ),
        )?.name) ||
      (addressedCast.length === 1 ? addressedCast[0].name : "");
    const targetKey = target.trim().toLowerCase();
    const targetStance = targetKey
      ? stances.find((s) => s.npc.trim().toLowerCase() === targetKey)
      : undefined;

    // WILL-GATE: if the contested NPC's will is a hard NO (refuse / hostile /
    // withdraw), the dice should never have been offered — the player cannot win an
    // outcome the character flatly refuses. Discard the roll and let the binding
    // `# Character stance` block narrate the refusal: an honest, honored failure
    // (exactly what the player asked for — "forbid it / 100% fail is fine, just not
    // success = failure"). The risky move carried no deltas, so dropping the
    // resolution leaves the sheet untouched; `turnEffects` below recomputes to [].
    //
    // EXCEPTION — a PERILOUS roll stands (Phase G3): when the action courts injury
    // or death (the engine put real vital damage on the move), the contest is no
    // longer about the NPC granting a social outcome — failure HURTS THE PLAYER,
    // and a hostile foe makes it MORE dangerous, not safe. Discarding it here would
    // spare the player the very risk they chose to run. So a hostile target keeps
    // the roll (its peril damage already applied); the binding stance still colours
    // how the foe meets them.
    const HARD_NO = new Set(["refuse", "hostile", "withdraw"]);
    if (targetStance && HARD_NO.has(targetStance.verdict) && !rollHasPeril) {
      if (config.get("debugLogging")) {
        ctl.debug(
          `[RolePlayMaster] will-gate: ${target} ${targetStance.verdict} — discarding ` +
            `the roll (${resolution.tier}); narrating the refusal instead of a false success`,
        );
      }
      resolution = null;
      // The action is refused, so nothing moves — clear the consequence forecast so
      // the `# Consequences` block doesn't tell the narrator a price was paid.
      adjudicationAffects = [];
    } else {
      // Winnable contest (negotiate / hesitant / comply): the roll stands. On a FULL
      // success the contested NPC yielded, so their binding stance must not also tell
      // the narrator to refuse — drop it (the full-success directive then applies). A
      // partial keeps it (the `wants` becomes the partial's cost); a miss keeps it
      // (the refusal is the failure).
      const before = stances.length;
      stances = reconcileStancesWithRoll(stances, {
        rolled: true,
        tier: resolution.tier,
        contested: targetKey ? [targetKey] : [],
      });
      if (config.get("debugLogging") && stances.length !== before) {
        ctl.debug(
          `[RolePlayMaster] stance reconciled with dice (${resolution.tier} vs ${target}): ` +
            `dropped ${before - stances.length} contested stance(s)`,
        );
      }
    }
  }

  // 2. Plan the turn (pure): keyword + semantic lore selection.
  const plan = planTurn({
    state,
    lastUserText,
    prevAssistantText,
    characters,
    lore,
    recentText,
    recentTextEmbedding: queryEmbedding,
    loreEmbeddings: entryEmbeddings,
    castEmbeddings,
    // Vector-RAG recall (Phase F): the store + its vectors; selection is pure in
    // planTurn (mirrors lore/cast). `windowMessages` lets recall skip events
    // still verbatim in the sent window (no duplication).
    memoryStore: state.memory.store,
    memoryEmbeddings,
    windowMessages: windowTexts,
    // Structured mechanics (Phase G) — resolution rolled above; the planner
    // only forwards these to the assembler (stays pure).
    rulesDef,
    // Dramatic arc (Phase I): the current act (resolved from state.pacing) gates
    // secrets and colours the narration; the conductor that advances it runs
    // post-narration below.
    arc,
    // World clock (the chronos subsystem): the authored model for the
    // `# Time & weather` block (the live state is read from state.chronos in the
    // planner). Null when the feature is off. The clock advances post-narration.
    chronosModel,
    sheet,
    combatants,
    resolution,
    resolutionNote: adjudicationNote,
    adjudication,
    affects: adjudicationAffects,
    // Character volition (Phase J): the social referee's per-NPC stances (above),
    // rendered into the binding `# Character stance` block (only the resisting).
    stances,
    gameOver,
    config: {
      narrationStyle,
      enableChoices: config.get("enableChoices"),
      choiceCount: tuning.choiceCount,
      responseLanguage: globalConfig.get("responseLanguage"),
      loreBudgetTokens: tuning.loreBudgetTokens,
      loreSemanticThreshold: tuning.loreSemanticThreshold,
      loreSemanticTopK: tuning.loreSemanticTopK,
      npcActivation,
      scenePresence: tuning.scenePresence,
      relationPullMinDisposition: tuning.relationPullMinDisposition,
      npcSemanticThreshold: tuning.npcSemanticThreshold,
      npcSemanticTopK: tuning.npcSemanticTopK,
      ragEnabled,
      ragThreshold: tuning.ragThreshold,
      ragTopK: tuning.ragTopK,
      ragBudgetTokens: tuning.ragBudgetTokens,
      relationshipMemory: config.get("relationshipMemory"),
      volition: config.get("volitionEnabled"),
      knowledgeGating: config.get("knowledgeGating"),
      dramaticArc: config.get("dramaticArc"),
      timeWeather: config.get("timeWeather"),
      firstMentionIntros: config.get("firstMentionIntros"),
      // Narration length: compact baseline + inverse-intensity word target.
      narrationLength: tuning.narrationLength,
      narrationCalmWords: tuning.narrationCalmWords,
      narrationIntenseWords: tuning.narrationIntenseWords,
      narrationBaselineIntensity: tuning.narrationBaselineIntensity,
      narrationCombatIntensity: tuning.narrationCombatIntensity,
    },
  });

  // 3. Build the chat we send. Reuse the plain-turns `convo` built above, swap
  //    the last user message for the processed action, then compact it (see
  //    director/window.ts): expand bare numbered picks into explicit actions and
  //    prune the stretch already captured by `# Story so far` (the summary + RAG
  //    recall carry its substance; the whole un-summarized tail stays verbatim).
  //    The pulled `chat` is left intact — it was snapshotted above for memory.
  if (convo.length > 0 && convo[convo.length - 1].role === "user") {
    convo[convo.length - 1] = { role: "user", content: plan.playerAction };
  } else {
    convo.push({ role: "user", content: plan.playerAction });
  }
  const windowed = windowConversation(convo, {
    summarizedCount: state.memory.summarizedMessageCount,
    bridge: tuning.contextBridge,
    prune: tuning.pruneSummarized,
    expandPicks: tuning.expandPicks,
    pickPrefix: "I choose: ",
  });

  // 3b. Assemble the outgoing message list: our system prompt + the compacted
  //     turns. Author's-note (Phase D, Lot 3) is spliced `authorNoteDepth`
  //     messages before the end, so a condensed steering reminder sits close to
  //     generation even when the top direction block has scrolled far away.
  //     Gated: depth 0 = disabled; re-injected only every `authorNoteInterval`-th
  //     turn. Building a fresh array (then one `Chat.from`) means the splice is a
  //     plain index op — no pop/re-append gymnastics.
  const outgoingMessages: {
    role: "system" | "user" | "assistant";
    content: string;
  }[] = [{ role: "system", content: plan.systemPrompt }, ...windowed.messages];
  const turnNo = state.turn + 1;
  const noteInterval = Math.max(1, Math.floor(tuning.authorNoteInterval));
  let noteInjected = "";
  if (tuning.authorNoteDepth > 0 && turnNo % noteInterval === 0) {
    const note = authorNote(plan.director);
    if (note) {
      const depth = Math.min(
        Math.floor(tuning.authorNoteDepth),
        windowed.messages.length,
      );
      outgoingMessages.splice(outgoingMessages.length - depth, 0, {
        role: "system",
        content: note,
      });
      noteInjected = note;
    }
  }
  const outgoing = Chat.from(outgoingMessages);

  if (config.get("debugLogging")) {
    // Per-entry similarity, to tune loreSemanticThreshold from real numbers.
    let semLine = "";
    if (queryEmbedding) {
      const scores = lore
        .map((e, i) => {
          const vec = entryEmbeddings[i];
          const label = e.comment || e.keys.join("/") || "(constant)";
          const sim = vec ? cosineSimilarity(queryEmbedding, vec).toFixed(3) : "n/a";
          return `${label}=${sim}`;
        })
        .join(", ");
      semLine =
        `--- semantic similarities (floor ${tuning.loreSemanticThreshold}, ` +
        `top-K ${tuning.loreSemanticTopK}): ${scores}\n`;
    } else if (semanticOn) {
      // Semantic was requested but produced no vectors — say why.
      semLine =
        `--- semantic INACTIVE (fell back to keyword-only): ` +
        `${embedError ?? "no embedding produced"}\n`;
    }
    const castNames = plan.cast.map((c) => c.name).join(", ") || "(none)";
    const offSceneNames = plan.mentioned.map((c) => c.name).join(", ") || "(none)";
    ctl.debug(
      `[RolePlayMaster] universe "${universe}" turn ${state.turn + 1}\n` +
        `--- lore: ${plan.lore.length}/${lore.length} entries selected ` +
        `(budget ${tuning.loreBudgetTokens} tokens, ` +
        `scan depth ${tuning.loreScanDepth}, ` +
        `semantic ${semanticOn ? "on" : "off"})\n` +
        `--- cast: ${plan.cast.length}/${characters.cast.length} on stage ` +
        `(activation ${npcActivation ? "on" : "off"}, presence ` +
        `${tuning.scenePresence ? "on" : "off"}): ${castNames}\n` +
        `--- off-scene: ${plan.mentioned.length} referenced not voiced: ${offSceneNames}\n` +
        `--- recall: ${plan.recalled.length}/${state.memory.store.length} past ` +
        `events (RAG ${ragEnabled ? (ragOn ? "on" : "on, empty store") : "off"}, ` +
        `floor ${tuning.ragThreshold}, top-K ${tuning.ragTopK})\n` +
        `--- context window: ${convo.length} turns → ${windowed.messages.length} sent ` +
        `(pruned ${windowed.pruned} summarized [marker ${state.memory.summarizedMessageCount}, ` +
        `bridge ${tuning.contextBridge}], expanded ${windowed.expanded} pick(s))` +
        (noteInjected
          ? `\n--- author-note (turn ${turnNo}, interval ${noteInterval}): ${noteInjected}`
          : "") +
        "\n" +
        semLine +
        `--- system prompt ---\n${plan.systemPrompt}\n` +
        `--- user ---\n${plan.playerAction}`,
    );
  }

  // Surface a non-blocking notice when semantic matching was requested but no
  // embedding model was available — otherwise the only sign is the (off by
  // default) debug log, and the user's enabled toggle silently does nothing.
  if (semanticOn && embedError) {
    ready("Context ready — semantic matching off (no embedding model; using keywords)");
  } else {
    ready("Role-play context ready");
  }

  // (`model` was resolved up front, before the Phase G sheet seeding above.)

  // Onboarding: once per story (tracked by the persisted `onboarded` flag, not
  // the turn counter — so existing stories and any new save slot all get it on
  // their next turn), emit a one-time readiness report — config sanity +
  // how-to-play — in its own content block, then narrate as usual. LM Studio
  // can't validate plugin-specific setup, so this is where an empty universe /
  // missing embedding model gets surfaced.
  let onboarded = state.onboarded;
  if (!state.onboarded) {
    if (config.get("setupCheck")) {
      let embeddingAvailable: boolean;
      if (semanticOn) {
        // The semantic pass already ran — a query vector means a model answered.
        embeddingAvailable = queryEmbedding !== null;
      } else {
        // Semantic is off, so nothing was embedded; probe whether one is loaded.
        try {
          embeddingAvailable = (await ctl.client.embedding.listLoaded()).length > 0;
        } catch {
          embeddingAvailable = false;
        }
      }
      const report = buildSetupReport({
        universe,
        storyName: storeOptions.save,
        hasPlayer: characters.player !== null,
        playerName: characters.player?.name ?? "",
        npcCount: characters.cast.length,
        loreCount: lore.length,
        worldName: state.world.name,
        worldSet:
          (state.world.name.trim() !== "" && state.world.name !== "Untitled") ||
          state.world.setting.trim() !== "",
        semanticRequested: semanticEnabled,
        embeddingAvailable,
        realModelTokenSource: acceptsSamplingConfig(model),
        mechanicsActive: rulesDef !== null,
      });
      // `includeInContext: false` (like the status line): the player sees this
      // one-time setup report, but it never re-enters the history — so it can't
      // leak into the sent window or pollute the rolling summary / RAG store.
      ctl.createContentBlock({ includeInContext: false }).appendText(report.text);
    }

    // The curtain rises: a framed world title + premise, between the onboarding
    // text and the cold open of the narration. Same `includeInContext: false`
    // so it stays purely cosmetic and never re-enters the model's history.
    const titleCard = buildTitleCard({
      worldName: state.world.name,
      setting: state.world.setting,
      universe,
    });
    if (titleCard) ctl.createContentBlock({ includeInContext: false }).appendText(titleCard);

    onboarded = true; // shown once; don't repeat for this story
  }

  // 4. Generate the narration with a plain streaming `respond()`. The dice were
  //    already rolled before planning (the resolve-pick step above) and the
  //    verdict injected into the prompt as `# Action resolution`, so the model
  //    only writes the scene — it never touches a number. Reasoning fragments
  //    (<think>, harmony <|channel|>) are dropped from the narrative and kept
  //    only for the transcript.
  //
  //    The plugin owns the sampling parameters, so good roleplay generation
  //    doesn't depend on the user's LM Studio preset. A real LLM token source has
  //    them applied; a generator handle owns its own sampling and we don't.
  const sampling = {
    temperature: config.get("temperature"),
    contextOverflowPolicy: tuning.contextOverflowPolicy as LLMContextOverflowPolicy,
    maxTokens:
      tuning.maxResponseTokens > 0 ? Math.floor(tuning.maxResponseTokens) : (false as const),
    // The plugin owns the narration's anti-repetition + min-p sampling too, so good
    // prose doesn't hinge on the user's LM Studio preset: a light min-p floor for
    // coherence and a mild repeat penalty to break phrase-looping over long RP.
    // The OPPOSITE of the strict mechanics passes (which disable both) — here they
    // are ON by default and power-user tunable; a disable value maps to `false`.
    minPSampling: tuning.narrationMinP > 0 ? tuning.narrationMinP : (false as const),
    repeatPenalty:
      tuning.narrationRepeatPenalty > 1 ? tuning.narrationRepeatPenalty : (false as const),
  };

  // Phase G — the "swing of fate" readout, shown BEFORE the narration (cause
  // then effect, tabletop-style): the dice were already rolled above (resolve-pick
  // / referee), so the player sees the verdict for the action just attempted, then
  // reads how it plays out. QUALITATIVE + numberless in the play language (keeps
  // the surprise of HOW it resolves), code-rendered, includeInContext:false so it
  // is never echoed back to the model. Null on a no-roll turn → nothing shown. The
  // status line and the next-options risk preview stay AFTER the narration (they
  // read post-accountant values), so this readout is the only reading that moves up.
  if (rulesDef && config.get("showDiceReadout")) {
    const readout = luckReadout(
      resolution,
      { partial: tuning.pbtaPartial, full: tuning.pbtaFull },
      readoutLexicon(globalConfig.get("responseLanguage")),
    );
    if (readout) ctl.createContentBlock({ includeInContext: false }).appendText(readout);
  }

  const block = ctl.createContentBlock();
  let fullText = "";
  // Reasoning/chain-of-thought is dropped from the narrative but kept for the
  // transcript (cheap; only meaningful when logging).
  let reasoningText = "";
  const onVisible = (content: string): void => {
    fullText += content;
    block.appendText(content);
  };

  // Next turn's pre-declared option moves and this reply's applied effects. Both
  // start from the resolve-pick step (the player's chosen-move resolution rolled
  // at the top of the turn) and are then rewritten by Pass 2 below: the
  // accountant reads the narration and emits the emergent effects + the tagged
  // options for next turn. `turnEffects` feeds the rendered change cue.
  let pendingChoices: PendingChoice[] = [];
  let turnEffects: ResolvedEffect[] = resolution ? [...resolution.effects] : [];

  // Re-arm the spinner before generating: the narration is streamed, but the
  // first token can lag behind a large prompt — without this the status would
  // still read "context ready" through that wait.
  working("Narrating…");
  const prediction = acceptsSamplingConfig(model)
    ? model.respond(outgoing, sampling)
    : model.respond(outgoing);
  for await (const fragment of prediction) {
    ctl.guardAbort();
    if (fragment.reasoningType && fragment.reasoningType !== "none") {
      reasoningText += fragment.content;
      continue;
    }
    onVisible(fragment.content);
  }
  const mainResult = await prediction.result();
  recordTokens("MAIN GENERATION", mainResult);

  // Strip hallucinated tags (e.g. <ignore_options>) from the prose. We streamed
  // the raw fragments live for responsiveness; now rewrite the block with the
  // cleaned text so the tags never sit in the final reply — and use the same
  // cleaned text for memory/naming, so they don't re-enter history and get imitated.
  const cleanedText = sanitizeNarration(fullText);
  if (cleanedText !== fullText) block.replaceText(cleanedText.replace(/\s+$/, ""));

  // Memory + naming digest the VISIBLE prose only.
  const replyText = cleanedText.replace(/\s+$/, "");

  // Transcript: the main exchange — the assembled prompt + the reply.
  await transcript?.record({
    label: "MAIN GENERATION",
    kind: "main",
    purpose:
      "The actual role-play turn — generates the narration the player sees. The " +
      "[system] message is the full assembled prompt (world, lore, characters, " +
      "status, memory, all instructions); every [user]/[assistant] message is the " +
      "real conversation; the last [user] is the player's action this turn.",
    note: acceptsSamplingConfig(model)
      ? undefined
      : "generator handle — plugin sampling config not applied",
    messages: chatMessages(outgoing),
    config: acceptsSamplingConfig(model) ? sampling : undefined,
    reasoning: reasoningText,
    response: fullText,
  });

  // ACCOUNT (Pass 2 — the accountant): read the narration just shown and emit one
  // schema-forced structured object — the emergent resource changes and any new
  // foe — then apply it to the ledger. Only this pass writes hard numbers, so
  // there is a single writer and no double counting (Pass 1 never touched a
  // value). Gated on a real `LLM` token source (needed for `respond({ structured
  // })`, exactly like sheet generation); skipped gracefully otherwise — the turn
  // still narrated, the ledger simply doesn't move (the self-correcting loop
  // tolerates it). A universe without `rules.json` never reaches this.
  //
  // Also skipped once `gameOver` is set this turn: the player has fallen, the
  // ledger is closed, so there is nothing more to bill — and a stray post-death
  // cost (the bug the hard-stop addresses) is avoided at the source.
  if (rulesDef && acceptsSamplingConfig(model) && !gameOver) {
    try {
      const statusText = statusBlock(sheet, combatants, rulesDef) ?? "";
      const schema = buildExtractionSchema(rulesDef);
      // Feed the player's action too: it is sometimes more precise than the prose
      // (player says "3 coins", narrator writes "a few coins"), and the accountant
      // is told to trust the player's number so the ledger stays exact.
      // The social referee's negotiation hooks (Phase J): for each NPC that would
      // only be moved in exchange for something, the price — so if the scene shows
      // the player paying it, the accountant lands the cost on the ledger.
      const bargains = stances
        .filter((s) => (s.verdict === "negotiate" || s.verdict === "hesitant") && s.wants.trim())
        .map((s) => `${s.npc} is moved only by: ${s.wants.trim()}`);
      const { system, user } = buildExtractionPrompt(
        rulesDef,
        statusText,
        plan.playerAction,
        replyText,
        // The referee's forecast (Phase G3.1): a checklist so the accountant
        // doesn't miss a flagged change the narration depicts but states tersely.
        adjudicationAffects,
        bargains,
        // What the engine already charged/credited this turn (the resolved pick's
        // certain cost, or the roll's deltas) — so the accountant doesn't re-bill a
        // payment the narration depicts but the engine has already applied. At this
        // point `turnEffects` is exactly `resolution.effects` (not yet extended).
        turnEffects,
      );
      const extractConfig = { temperature: 0.2, maxTokens: 400, structured: schema, ...STRICT_SAMPLING };
      const extractChat = Chat.from([
        { role: "system", content: system },
        { role: "user", content: user },
      ]);
      working("Tallying the consequences…");
      const extraction = model.respond(extractChat, extractConfig);
      const extractResult = await extraction.result();
      const parsed = await recordStructured(
        extractResult,
        {
          label: "MECHANICS EXTRACTION",
          purpose:
            "Reads the narration just shown to the player and converts it into ledger " +
            "changes (emergent resource effects + new enemies). The single writer of " +
            "hard numbers — its [user] block is the current # Status plus the narration " +
            "to analyse, by design NOT the game's prompt. Low temperature, structured " +
            "output; does not narrate.",
          system,
          user,
          config: extractConfig,
        },
        { recordTokens, transcript },
      );
      const applied = applyExtraction(
        parsed,
        { sheet, combatants, npcSheets, turnEffects, gameOver, maxCombatants: tuning.maxCombatants },
        rulesDef,
        () => Math.random(),
      );
      sheet = applied.sheet;
      combatants = applied.combatants;
      npcSheets = applied.npcSheets;
      turnEffects = applied.turnEffects;
      gameOver = applied.gameOver;
      pendingChoices = applied.pendingChoices;
      if (config.get("debugLogging")) {
        ctl.debug(
          `[RolePlayMaster] extraction: ` +
            `effects ${turnEffects.length}, combatants ${Object.keys(combatants).length}, ` +
            `options ${pendingChoices.length}, gameOver ${gameOver}\n` +
            `--- parsed ---\n${JSON.stringify(parsed)}`,
        );
      }
    } catch (err) {
      if (config.get("debugLogging")) {
        ctl.debug(
          `[RolePlayMaster] extraction failed (ledger unchanged this turn): ${String(err)}`,
        );
      }
    }
  }

  if (rulesDef) {
    if (config.get("debugLogging")) {
      const r = resolution;
      ctl.debug(
        `[RolePlayMaster] mechanics: ` +
          (r
            ? `roll ${r.noRoll ? "none(certain)" : `${r.d1}+${r.d2}${r.stat ? `+${r.modifier}(${r.stat})` : ""}${r.difficultyModifier ? `${r.difficultyModifier > 0 ? "+" : ""}${r.difficultyModifier}(diff)` : ""}${r.opposingModifier ? `${r.opposingModifier > 0 ? "+" : ""}${r.opposingModifier}(vs ${r.opposingLabel || r.opposedBy})` : ""}=${r.total}`} → ${r.tier}, ` +
              `effects ${r.effects.length}, defeated ${r.defeated.join(",") || "none"}` +
              (r.affordable ? "" : " (UNAFFORDABLE — rejected)")
            : "no roll this turn") +
          `; combatants ${Object.keys(combatants).length}; ` +
          `pendingChoices ${pendingChoices.length}; gameOver ${gameOver}`,
      );
    }

    // Status line (Phase G): shown AFTER the narration, in its own content block
    // marked `includeInContext: false` — displayed to the player but NEVER fed
    // back to the model (otherwise it imitates it). Reflects this turn's
    // resolution and any enemy introduced this turn, with the net change folded
    // inline next to each value it moved (e.g. "🪙 10 (-3)"), so the player sees
    // both the value and what moved it even when the prose omits it.
    const thresholds = { partial: tuning.pbtaPartial, full: tuning.pbtaFull };
    // The player-facing "system" layer (Phase G — opt-in): a QUALITATIVE,
    // numberless reading in the play language. The swing of fate for the roll just
    // resolved is shown BEFORE the narration (see above, cause then effect); here
    // we render the post-narration readings — the current status line, and (looking
    // ahead) the taste of the gamble for the risky options just offered. The latter
    // need post-accountant values (the moved-up readout does not), so they stay
    // here. Code-rendered, includeInContext:false — never echoed to the model.
    const lex = readoutLexicon(globalConfig.get("responseLanguage"));
    if (config.get("showStatusLine")) {
      const text = statusLine(
        sheet,
        combatants,
        rulesDef,
        characters.player?.name ?? "",
        turnEffects,
      );
      if (text) ctl.createContentBlock({ includeInContext: false }).appendText(text);
    }
    if (config.get("showDiceReadout")) {
      const preview = riskPreview(pendingChoices, { player: sheet, combatants }, thresholds, lex);
      if (preview) ctl.createContentBlock({ includeInContext: false }).appendText(preview);
    }
  }

  // 5. Name the conversation once. A prediction loop handler owns generation, so
  //    LM Studio no longer auto-names the chat from the first reply — it would
  //    stay "New Chat". We suggest a name only while the chat still needs one
  //    (`needsNaming()` respects a name the user set by hand). Prefer the explicit
  //    save name; else ask the model for a short title from the opening, falling
  //    back to the world/universe so it is never blank. Best-effort + cosmetic:
  //    any failure here must never break the turn.
  if (tuning.autoNameChat) {
    try {
      if (await ctl.needsNaming()) {
        const maxWords = tuning.autoNameMaxWords;
        let title = cleanTitle(storeOptions.save, maxWords, 60);
        if (!title) {
          const opening = [lastUserText, replyText]
            .map((s) => s.trim())
            .filter(Boolean)
            .join("\n\n")
            .slice(0, 1200);
          if (opening) {
            const { system, user } = buildTitlePrompt(opening, maxWords);
            const titleChat = Chat.from([
              { role: "system", content: system },
              { role: "user", content: user },
            ]);
            const titleConfig = { temperature: 0.4, maxTokens: Math.max(16, maxWords * 6) };
            working("Naming this story…");
            const titlePrediction = acceptsSamplingConfig(model)
              ? model.respond(titleChat, titleConfig)
              : model.respond(titleChat);
            let rawTitle = "";
            let titleReasoning = "";
            for await (const fragment of titlePrediction) {
              ctl.guardAbort();
              if (fragment.reasoningType && fragment.reasoningType !== "none") {
                titleReasoning += fragment.content;
                continue;
              }
              rawTitle += fragment.content;
            }
            const titleResult = await titlePrediction.result();
            recordTokens("CHAT TITLE", titleResult);
            await transcript?.record({
              label: "CHAT TITLE",
              kind: "auxiliary",
              purpose:
                "Suggests a short chat title from the opening exchange (the loop " +
                "handler owns generation, so LM Studio won't auto-name the chat). " +
                "Cosmetic; its [user] block is the opening text, not the game's " +
                "prompt. Does not narrate.",
              messages: [
                { role: "system", content: system },
                { role: "user", content: user },
              ],
              config: acceptsSamplingConfig(model) ? titleConfig : undefined,
              reasoning: titleReasoning,
              response: rawTitle,
            });
            title = cleanTitle(rawTitle, maxWords, 60);
          }
        }
        if (!title) title = cleanTitle(state.world.name || universe, maxWords, 60);
        if (title) {
          await ctl.suggestName(title);
          if (config.get("debugLogging")) {
            ctl.debug(`[RolePlayMaster] suggested chat name: "${title}"`);
          }
        }
      }
    } catch {
      // Naming is cosmetic — swallow any error so a turn never fails over it.
    }
  }

  // 6. Update long-term memory (Phase C summary + Phase F RAG store) — AFTER
  //    streaming the reply, so the player never blocks on it, and folding in the
  //    turn that just finished. Both artifacts are stored for the NEXT turn; this
  //    turn already injected the previously-stored ones (via planTurn).
  let memory = state.memory;
  // Relationship memory (the relationship pass): standing + per-pair history.
  // Declared out here so it persists whether or not the end-of-turn block runs.
  let relationships = state.relationships;
  let relationshipsDigestedCount = state.relationshipsDigestedCount;
  // Character psyche (Phase J — the fluctuating state layer): written by the same
  // social pass as `relationships` (+0 model calls), read by the social referee
  // and narrator. Declared out here so it persists whether or not the pass runs.
  let psyche = state.psyche;
  // Knowledge gating (the revelation system): what the player has learned so far.
  // Declared out here so it persists whether or not the digest pass runs below.
  let knowledge = state.knowledge;
  // First-mention introduction (the disclosure system): which lore/NPCs the
  // narration has already introduced. Declared out here so it persists whether or
  // not the pure marker below runs.
  let disclosure = state.disclosure;
  const relationshipMemory = config.get("relationshipMemory");
  if (config.get("summaryEnabled") || ragEnabled || relationshipMemory) {
    // The full conversation as memory sees it: prior turns + the new reply,
    // cleaned (narration only — no option menus; player picks expanded to the
    // action actually taken) so the summary and RAG store record what happened.
    const conversation = conversationForMemory(
      [...memoryMessages, { role: "assistant", content: replyText }],
      "I choose: ",
    );

    // Edit robustness: if history shrank below a marker (a message was edited /
    // regenerated / deleted), the summary and/or the store may describe events
    // that no longer exist — invalidate whichever is now ahead before updating.
    memory = reconcile(memory, conversation.length);
    // Edit-robustness for the relationship marker too (mirrors `reconcile`): if
    // history shrank below it, fold again from a safe point. The per-pair
    // summaries may then re-absorb a little (acceptable at MVP — never a fact lost).
    if (relationshipsDigestedCount > conversation.length) {
      relationshipsDigestedCount = conversation.length;
    }

    // End-of-turn model passes, run in PARALLEL (Promise.all below): the rolling
    // summary (Phase C) and the relationship pass. They share one trigger cadence
    // and both sit off the player's critical path (the reply already streamed),
    // and they write independent state slices (`memory.summary` vs
    // `relationships`) so there is no cross-dependency. (A shared backend may
    // still serialize the two `respond()` calls; the orchestration is correct
    // regardless, and never worse than sequential.)
    const endOfTurnPasses: Promise<void>[] = [];

    // 6a. Rolling summary (Phase C).
    if (config.get("summaryEnabled")) {
      const decision = shouldSummarize(memory, conversation, {
        intervalMessages: tuning.summaryIntervalMessages,
        intervalWords: tuning.summaryIntervalWords,
        protectTail: tuning.summaryProtectTail,
      });

      if (decision.should) {
        working("Updating memory…");
        endOfTurnPasses.push(
          (async () => {
        // Bridge: the last already-summarized message(s) just before the delta —
        // the narration that prompted the first new action — shown as read-only
        // context so the summary input never starts mid-exchange with a bare pick.
        const summaryStart = Math.max(
          0,
          Math.min(memory.summarizedMessageCount, conversation.length),
        );
        const bridgeN = Math.max(0, Math.floor(tuning.summaryBridge));
        const summaryBridge =
          bridgeN > 0
            ? conversation.slice(Math.max(0, summaryStart - bridgeN), summaryStart)
            : [];
        const { system, user } = buildSummaryPrompt(
          memory.summary,
          decision.delta,
          summaryBridge,
          tuning.summaryTargetWords,
        );

        // A throwaway chat for the summarization call: it is NOT streamed to the
        // player. Low temperature (faithful, not creative); reasoning fragments
        // are dropped like in the main generation. Reuses the player's model
        // (`tokenSource`) per the MVP. No maxTokens cap — the length target is a
        // SOFT prompt instruction (`summaryTargetWords`), so the summary is never
        // truncated mid-sentence; the model recompresses toward the target itself.
        const summaryChat = Chat.from([
          { role: "system", content: system },
          { role: "user", content: user },
        ]);
        const summaryConfig = {
          temperature: 0.3,
          maxTokens: false as const,
        };
        const summaryPrediction = acceptsSamplingConfig(model)
          ? model.respond(summaryChat, summaryConfig)
          : model.respond(summaryChat);

        let summaryText = "";
        let summaryReasoning = "";
        for await (const fragment of summaryPrediction) {
          ctl.guardAbort();
          if (fragment.reasoningType && fragment.reasoningType !== "none") {
            summaryReasoning += fragment.content;
            continue;
          }
          summaryText += fragment.content;
        }
        const summaryResult = await summaryPrediction.result();
        recordTokens("MEMORY SUMMARY", summaryResult);
        await transcript?.record({
          label: "MEMORY SUMMARY",
          kind: "auxiliary",
          purpose:
            "Compresses the story so far into the rolling long-term-memory summary " +
            "(reinjected next turn as '# Story so far'). Its [user] block is the " +
            "conversation to summarize — by design, NOT the game's prompt. Runs at " +
            "low temperature and does not narrate.",
          messages: [
            { role: "system", content: system },
            { role: "user", content: user },
          ],
          config: acceptsSamplingConfig(model) ? summaryConfig : undefined,
          reasoning: summaryReasoning,
          response: summaryText,
        });
        summaryText = summaryText.trim();

        // Only commit a non-empty summary; a blank result leaves memory untouched.
        // Preserve the RAG store fields (spread) — only summary fields change here.
        if (summaryText) {
          memory = {
            ...memory,
            summary: summaryText,
            summarizedMessageCount: decision.newCount,
          };
          ready("Memory updated");
          if (config.get("debugLogging")) {
            ctl.debug(
              `[RolePlayMaster] summary triggered: delta ${decision.delta.length} msgs / ` +
                `${decision.deltaWords} words → ${countWords(summaryText)}-word summary\n` +
                `--- summary ---\n${summaryText}`,
            );
          }
        }
          })(),
        );
      }
    }

    // 6d. Relationship pass — the disposition drift + per-pair shared-history
    //     summary, in PARALLEL with the rolling summary. Structured output, so it
    //     needs a real `LLM` (familiarity itself is bumped in code below, with no
    //     model — that path keeps working on a generator handle). Gated on a
    //     player persona + at least one active NPC, and uses its OWN digest marker
    //     so it folds only events new since its last run.
    if (
      relationshipMemory &&
      acceptsSamplingConfig(model) &&
      characters.player &&
      plan.cast.length > 0
    ) {
      const relDecision = shouldSummarize(
        { ...memory, summarizedMessageCount: relationshipsDigestedCount },
        conversation,
        {
          intervalMessages: tuning.summaryIntervalMessages,
          intervalWords: tuning.summaryIntervalWords,
          protectTail: tuning.summaryProtectTail,
        },
      );
      if (relDecision.should) {
        working("Updating memory…");
        const playerName = characters.player.name;
        const pairs = activePairs(plan.cast.map((c) => c.name));
        const bridgeN = Math.max(0, Math.floor(tuning.summaryBridge));
        const relStart = Math.max(
          0,
          Math.min(relationshipsDigestedCount, conversation.length),
        );
        const relBridge =
          bridgeN > 0
            ? conversation.slice(Math.max(0, relStart - bridgeN), relStart)
            : [];
        endOfTurnPasses.push(
          (async () => {
            try {
              const schema = buildRelationshipSchema(pairs);
              const promptPairs = pairs.map((p) => ({
                npcName: p.npcName,
                rel: getRelationship(relationships, p.key),
              }));
              const { system, user } = buildRelationshipPrompt(
                playerName,
                promptPairs,
                relDecision.delta,
                relBridge,
              );
              const relConfig = {
                temperature: tuning.relationshipTemperature,
                maxTokens: Math.max(1, Math.floor(tuning.relationshipMaxTokens)),
                structured: schema,
                ...STRICT_SAMPLING,
              };
              const relChat = Chat.from([
                { role: "system", content: system },
                { role: "user", content: user },
              ]);
              const relResult = await model.respond(relChat, relConfig).result();
              const parsed = await recordStructured(
                relResult,
                {
                  label: "RELATIONSHIPS",
                  purpose:
                    "Updates how the in-scene characters regard the player and the " +
                    "shared history between them (reinjected next turn as the " +
                    "'# Relationships' block). Its [user] block is each pair's current " +
                    "standing plus the new narration to digest — by design NOT the " +
                    "game's prompt. Low temperature, structured output; does not narrate.",
                  system,
                  user,
                  config: relConfig,
                },
                { recordTokens, transcript },
              );
              const applied = applyRelationshipExtraction(
                parsed,
                relationships,
                pairs,
                state.turn + 1,
                {
                  dispositionMax: tuning.relationshipDispositionMax,
                  deltaCap: tuning.relationshipDeltaCap,
                  // Trust scar (low-water mark): cap recovery so a grave breach
                  // leaves lasting damage. 0 in tuning disables it.
                  scarFactor: tuning.relationshipScarFactor,
                },
              );
              relationships = applied.relationships;
              relationshipsDigestedCount = relDecision.newCount;
              // Phase J: fold the psyche (mood / intent / goalFocus) out of the
              // SAME parsed object — no extra model call. Writes into State.psyche,
              // read next turn by the social referee and the narrator's cast block.
              const psycheApplied = applyPsycheExtraction(
                parsed,
                psyche,
                pairs,
                state.turn + 1,
              );
              psyche = psycheApplied.psyche;
              if (config.get("debugLogging")) {
                ctl.debug(
                  `[RolePlayMaster] relationships: updated ` +
                    `${applied.updated.join(", ") || "none"} ` +
                    `· psyche: ${psycheApplied.updated.join(", ") || "none"} ` +
                    `(delta ${relDecision.delta.length} msgs)\n` +
                    `--- parsed ---\n${JSON.stringify(parsed)}`,
                );
              }
            } catch (err) {
              if (config.get("debugLogging")) {
                ctl.debug(
                  `[RolePlayMaster] relationship pass failed (standing unchanged): ${String(err)}`,
                );
              }
            }
          })(),
        );
      }
    }

    if (endOfTurnPasses.length > 0) {
      await Promise.all(endOfTurnPasses);
      ready("Memory updated");
    }

    // 6b. Vector-RAG store (Phase F): archive the messages that have left the
    //     protected recent window so their verbatim detail survives for recall.
    //     No model call — pure text bookkeeping (the vectors are produced lazily
    //     at recall time and cached). Independent of the summary trigger.
    if (ragEnabled) {
      const ext = extendStore(memory.store, memory.storedMessageCount, conversation, {
        protectRecent: tuning.ragProtectRecent,
        maxStore: tuning.ragMaxStore,
        turn: state.turn + 1,
      });
      const added = ext.storedMessageCount - memory.storedMessageCount;
      memory = { ...memory, store: ext.store, storedMessageCount: ext.storedMessageCount };
      if (config.get("debugLogging") && (added > 0 || ext.dropped > 0)) {
        ctl.debug(
          `[RolePlayMaster] RAG store: archived ${added} message(s), ` +
            `store ${memory.store.length}/${tuning.ragMaxStore}` +
            (ext.dropped > 0 ? ` (dropped ${ext.dropped} oldest)` : ""),
        );
      }
    }
  }

  // 6e. Familiarity in CODE (no model): every turn an NPC shares the scene with
  //     the player, promote a never-met stranger to an acquaintance — so the next
  //     turn the narrator stops treating them as unknown (the visible bug). Runs
  //     even without a real `LLM` and when the relationship pass didn't fire; only
  //     ever raises the floor (it never lowers an existing `known`/`close`), and
  //     applies on top of the pass's result above.
  if (relationshipMemory && characters.player && plan.cast.length > 0) {
    const adv = advanceFamiliarity(
      relationships,
      plan.cast.map((c) => c.name),
      state.turn + 1,
    );
    relationships = adv.relationships;
    if (config.get("debugLogging") && adv.promoted.length > 0) {
      ctl.debug(
        `[RolePlayMaster] familiarity: ${adv.promoted.join(", ")} → acquaintance`,
      );
    }
  }

  // 6f. Revelation digest pass (knowledge gating): when a guarded secret was
  //     unlocked this turn, read the scene just narrated and record which one the
  //     character actually disclosed — growing the player's known-facts set, which
  //     keeps that fact available and opens any secret gated on it next turn. A
  //     cheap structured pass (its output is just a list of ids), off the player's
  //     critical path; runs only when a secret was in play AND we have a real LLM
  //     token source. Failure is swallowed (knowledge simply doesn't grow).
  if (
    config.get("knowledgeGating") &&
    acceptsSamplingConfig(model) &&
    plan.revelationCandidates.length > 0
  ) {
    try {
      const candidateIds = plan.revelationCandidates.map((c) => c.id);
      const schema = buildRevelationSchema(candidateIds);
      const { system, user } = buildRevelationPrompt(
        characters.player?.name ?? "the player",
        plan.revelationCandidates,
        plan.playerAction,
        replyText,
      );
      const revConfig = {
        temperature: 0.2,
        maxTokens: 200,
        structured: schema,
        ...STRICT_SAMPLING,
      };
      const revChat = Chat.from([
        { role: "system", content: system },
        { role: "user", content: user },
      ]);
      working("Noting what was revealed…");
      const revResult = await model.respond(revChat, revConfig).result();
      const parsed = await recordStructured(
        revResult,
        {
          label: "REVELATION",
          purpose:
            "Reads the scene just narrated and records which guarded secret the " +
            "character actually disclosed to the player this turn (knowledge gating). " +
            "Its [user] block is the facts that were in play plus the action + " +
            "narration to judge — by design NOT the game's prompt. What it confirms " +
            "becomes a known fact, keeping it available and opening any secret gated " +
            "on it. Low temperature, structured output; does not narrate.",
          system,
          user,
          config: revConfig,
        },
        { recordTokens, transcript },
      );
      const applied = applyRevelation(parsed, knowledge.playerKnownFacts, candidateIds);
      knowledge = { ...knowledge, playerKnownFacts: applied.playerKnownFacts };
      if (config.get("debugLogging")) {
        ctl.debug(
          `[RolePlayMaster] revelation: learned ` +
            `${applied.learned.join(", ") || "nothing"} ` +
            `(candidates ${candidateIds.join(", ")}; ` +
            `known ${knowledge.playerKnownFacts.length})\n` +
            `--- parsed ---\n${JSON.stringify(parsed)}`,
        );
      }
    } catch (err) {
      if (config.get("debugLogging")) {
        ctl.debug(
          `[RolePlayMaster] revelation pass failed (knowledge unchanged): ${String(err)}`,
        );
      }
    }
  }

  // 6f-bis. First-mention introduction (the disclosure system): a pure, no-model
  //     pass — mark every in-play lore element / NPC whose name actually appears
  //     in the reply as now introduced, so the narrator does not re-introduce it
  //     next turn (and an element flagged but NOT named stays flagged, so its
  //     introduction is never burned). Cheap enough to never need the model, so
  //     no token-source guard — it runs whenever the feature is on and something
  //     undisclosed was in play this turn.
  if (config.get("firstMentionIntros") && plan.disclosureTargets.length > 0) {
    const marked = markDisclosed(disclosure.revealed, plan.disclosureTargets, replyText);
    disclosure = { ...disclosure, revealed: marked.revealed };
    if (config.get("debugLogging") && marked.newlyDisclosed.length > 0) {
      ctl.debug(
        `[RolePlayMaster] disclosure: introduced ${marked.newlyDisclosed.join(", ")} ` +
          `(now ${disclosure.revealed.length} known)`,
      );
    }
  }

  // 6g. Pacing orchestrator (Phase I — the conductor): AFTER the rolling summary,
  //     so it reads the freshest compressed view of the whole story. A structured
  //     pass that does NOT narrate — it reads where the story is in the authored
  //     arc plus the live state and decides tension, a beat (hold/escalate/
  //     introduce/resolve), whether to advance the act, and a few SOFT directive
  //     nudges for the next turn. Gated to a *beat*, not every turn (the cadence
  //     gate below), and to a real LLM token source; failure is swallowed (pacing
  //     simply doesn't move). Its directives are installed in the persist step,
  //     replacing the previous beat's so they never accumulate.
  let pacing = state.pacing;
  // World clock (the chronos subsystem): the live clock + the diegetic signal read
  // off this turn's scene. `chronosSignal` is filled by the conductor pass below
  // (+0 model calls — it piggybacks on a pass already running) when the arc is on;
  // when no such pass runs, it stays null and the clock advances by its gentle
  // code drive alone. The clock itself is advanced (and persisted) after pacing.
  let chronos = state.chronos;
  let chronosSignal: ChronosSignal | null = null;
  // Scene presence (the scene-presence subsystem): carried forward between beats.
  // The conductor's piggyback (below) rewrites it from the scene it just read when
  // scene-presence is on; otherwise it persists unchanged (inert when the feature
  // is off). Read next turn by the planner to tier the cast (present vs off-scene).
  let scene = state.scene;
  let arcDirectives: Directive[] = [];
  let conductorRan = false;
  // Whether the act advanced this beat — the trigger for the chronicler (the
  // session-end hook): a closed act is a closed chapter.
  let conductorAdvanced = false;
  if (arc) {
    pacing = initPacing(arc, pacing, turnNo);
    const act = resolveAct(arc, pacing);
    if (act) {
      const spent = turnNo - pacing.actStartedTurn;
      const canAdvance = spent >= act.minTurns;
      const forceAdvance = act.maxTurns > 0 && spent >= act.maxTurns;
      const isFinalAct = nextAct(arc, act.id) === null;
      const minBeatGap = Math.max(1, Math.floor(tuning.conductorMinBeatGap));
      const dueForBeat = turnNo - pacing.lastBeatTurn >= minBeatGap;
      const canRun = acceptsSamplingConfig(model) && (dueForBeat || forceAdvance);
      if (canRun) {
        try {
          // The conductor's grounded inputs: who is off-stage (so a nudge wakes a
          // real character), and which secrets the arc hasn't opened yet (so a
          // complication it steers toward already exists in the world).
          const activeNames = new Set(plan.cast.map((c) => c.name.toLowerCase()));
          const dormantCast: DormantActor[] = characters.cast
            .filter((c) => !activeNames.has(c.name.toLowerCase()))
            .map((c) => ({
              name: c.name,
              note: c.description.trim().replace(/\s+/g, " ").slice(0, 140),
            }));
          const eligible = actEligibleIds(arc, act.id); // null = every secret open
          const knownSet = new Set(knowledge.playerKnownFacts);
          const lockedFacts: LockedFact[] = [];
          if (eligible) {
            for (const c of cards) {
              for (const s of c.secrets ?? []) {
                if (eligible.has(s.id) || knownSet.has(s.id)) continue;
                if (lockedFacts.some((l) => l.id === s.id && l.npc === c.name)) continue;
                lockedFacts.push({ id: s.id, npc: c.name, surface: (s.surface ?? "").trim() });
              }
            }
          }
          const ctx: ConductorContext = {
            act: { title: act.title, goal: act.goal, mood: act.mood, advanceWhen: act.advanceWhen },
            spent,
            minTurns: act.minTurns,
            maxTurns: act.maxTurns,
            canAdvance,
            forceAdvance,
            isFinalAct,
            tension: pacing.tension,
            openThreads: pacing.openThreads,
            // Two-tier memory: the whole-story summary plus the current chapter,
            // so the conductor paces against the full picture.
            storySummary: [memory.storySummary, memory.summary]
              .filter((s) => s.trim())
              .join("\n\n"),
            recentScene: replyText,
            // The player's own recent moves — the conductor judges `advanceWhen`
            // (a player-behaviour condition) against these, not the narrated scene.
            recentPlayerActions: recentTextsByRole(chat, "user", 3),
            dormantCast,
            introduceable: act.introduces,
            lockedFacts,
            // World-clock piggyback (+0 model calls): when time/weather is on, the
            // conductor ALSO reports the scene's elapsed time + current weather.
            // The chronos vocabularies are passed as plain strings so `arc` stays
            // decoupled from `chronos`; the readings are the live (pre-advance) state.
            chronos: chronosModel
              ? {
                  elapsedBuckets: [...ELAPSED_BUCKETS],
                  palette: chronosModel.weather,
                  currentPhase: resolvePhase(chronosModel, chronos),
                  currentWeather: resolveWeather(chronosModel, chronos),
                }
              : undefined,
            // Scene-presence piggyback (+0 model calls): when scene-presence is on,
            // the conductor ALSO reports where the scene is and who from the roster
            // is physically on stage — written into state.scene for next turn.
            scene: scenePresenceOn
              ? {
                  castNames: characters.cast.map((c) => c.name),
                  currentLocation: scene.location,
                }
              : undefined,
          };
          const schema = buildConductorSchema(
            chronosModel
              ? { elapsedBuckets: [...ELAPSED_BUCKETS], palette: chronosModel.weather }
              : undefined,
            scenePresenceOn ? { castNames: characters.cast.map((c) => c.name) } : undefined,
          );
          const { system, user } = buildConductorPrompt(ctx);
          const condConfig = {
            temperature: tuning.conductorTemperature,
            maxTokens: Math.max(1, Math.floor(tuning.conductorMaxTokens)),
            structured: schema,
            ...STRICT_SAMPLING,
          };
          const condChat = Chat.from([
            { role: "system", content: system },
            { role: "user", content: user },
          ]);
          working("Pacing the story…");
          const condResult = await model.respond(condChat, condConfig).result();
          const parsed = await recordStructured(
            condResult,
            {
              label: "ARC CONDUCTOR",
              purpose:
                "The showrunner/pacing pass — reads where the story is in the authored " +
                "arc plus the freshest summary and the scene just played, then decides " +
                "the dramatic tension, a beat (hold/escalate/introduce/resolve), whether " +
                "to advance the act, and a few SOFT direction nudges for the next turn. " +
                "Its [user] block is the act + live state + the off-stage cast and " +
                "not-yet-opened threads — by design NOT the game's prompt. Runs on a " +
                "beat (not every turn), structured output; does not narrate.",
              system,
              user,
              config: condConfig,
            },
            { recordTokens, transcript },
          );
          const applied = applyConductor(parsed, {
            arc,
            pacing,
            turn: turnNo,
            canAdvance,
            forceAdvance,
            maxDirectives: Math.max(0, Math.floor(tuning.conductorMaxDirectives)),
            maxThreads: Math.max(0, Math.floor(tuning.conductorMaxThreads)),
          });
          pacing = applied.pacing;
          arcDirectives = applied.directives;
          conductorRan = true;
          conductorAdvanced = applied.advanced;
          // World-clock piggyback: turn the conductor's raw readings into a chronos
          // signal that overrides the code drive when the scene actually moved time
          // or the weather. `none`/current readings leave both undefined → the clock
          // drifts on its own. `elapsed` is schema-constrained to a valid bucket;
          // advanceClock tolerates an unexpected value (it maps to 0 steps).
          if (chronosModel && (applied.timeElapsed || applied.weather)) {
            const elapsed =
              applied.timeElapsed && applied.timeElapsed !== "none"
                ? (applied.timeElapsed as ElapsedBucket)
                : undefined;
            const weather = applied.weather || undefined;
            if (elapsed || weather) chronosSignal = { elapsed, weather };
          }
          // Scene-presence piggyback: fold the conductor's reading into state.scene
          // for next turn. The model's `present` is the authoritative roster (an
          // empty list is a valid "nobody here" — trust it); a blank `location`
          // keeps the last known place (the scene did not move).
          if (scenePresenceOn) {
            scene = {
              location: applied.location || scene.location,
              present: applied.present,
              updatedTurn: turnNo,
            };
          }
          if (config.get("debugLogging")) {
            ctl.debug(
              `[RolePlayMaster] conductor: beat ${applied.beat}, tension ` +
                `${pacing.tension.toFixed(2)}, act "${pacing.actId}"` +
                (applied.advanced ? " (ADVANCED)" : ` (spent ${spent}/${act.minTurns}` +
                  (act.maxTurns > 0 ? `–${act.maxTurns}` : "") + ")") +
                `, nudges ${arcDirectives.length}, threads ${pacing.openThreads.length}` +
                (scenePresenceOn
                  ? `\n--- scene: location "${scene.location || "?"}", present [${applied.present.join(", ") || "—"}]`
                  : "") +
                `\n--- parsed ---\n${JSON.stringify(parsed)}`,
            );
          }
        } catch (err) {
          if (config.get("debugLogging")) {
            ctl.debug(
              `[RolePlayMaster] conductor pass failed (pacing unchanged): ${String(err)}`,
            );
          }
        }
      } else if (forceAdvance) {
        // No conductor available (a generator-handle token source) but the act has
        // hit its ceiling — advance in code so the arc never stalls. Also clears any
        // stale arc directives via the persist step (conductorRan flips true).
        pacing = advanceActOnly(arc, pacing, turnNo);
        conductorRan = true;
      }

      // 6g-bis. Chapter close (the conductor's session-end hook): the act just
      //     advanced. Two cheap steps, off the player's critical path, each in
      //     its own try/catch so a failure never undoes the conductor's pacing:
      //       1) two-tier memory — fold the just-ended chapter's summary into the
      //          whole-story summary, then reset the chapter summary for the new
      //          act (its substance now lives in the story summary);
      //       2) chronicler — apply any CONSERVATIVE, bounded sheet evolution the
      //          chapter earned (the model proposes within ±cap; the engine
      //          clamps to the rules' bounds).
      //     Both need a real LLM token source; a code-only ceiling advance still
      //     resets the chapter summary so the next act starts clean.
      if (conductorAdvanced) {
        // The chapter summary as it stands at the close — captured before the
        // reset, used by BOTH the integration and the sheet-evolution pass.
        const closingChapterSummary = memory.summary;

        // 1) Integrate the chapter into the whole-story summary (plain prose,
        //    like the rolling summary). Skipped when there is nothing to fold.
        if (
          config.get("summaryEnabled") &&
          acceptsSamplingConfig(model) &&
          closingChapterSummary.trim()
        ) {
          try {
            const { system, user } = buildStoryIntegrationPrompt(
              memory.storySummary,
              closingChapterSummary,
              replyText,
              act.title,
              tuning.storyTargetWords,
            );
            const integrationConfig = {
              temperature: tuning.chroniclerTemperature,
              maxTokens: false as const,
            };
            const integrationChat = Chat.from([
              { role: "system", content: system },
              { role: "user", content: user },
            ]);
            working("Closing the chapter…");
            const integrationPrediction = model.respond(integrationChat, integrationConfig);
            let integrationText = "";
            let integrationReasoning = "";
            for await (const fragment of integrationPrediction) {
              ctl.guardAbort();
              if (fragment.reasoningType && fragment.reasoningType !== "none") {
                integrationReasoning += fragment.content;
                continue;
              }
              integrationText += fragment.content;
            }
            const integrationResult = await integrationPrediction.result();
            recordTokens("STORY INTEGRATION", integrationResult);
            await transcript?.record({
              label: "STORY INTEGRATION",
              kind: "auxiliary",
              purpose:
                "The chapter-close memory pass (two-tier memory) — fires when the " +
                "conductor advances the act. Folds the just-ended chapter's summary " +
                "into the whole-story summary (reinjected next turn as '# Story so " +
                "far'); the chapter summary is then reset for the new act. Its [user] " +
                "block is the story summary + the closing chapter + its final scene — " +
                "by design NOT the game's prompt. Low temperature; does not narrate.",
              messages: [
                { role: "system", content: system },
                { role: "user", content: user },
              ],
              config: integrationConfig,
              reasoning: integrationReasoning,
              response: integrationText,
            });
            integrationText = integrationText.trim();
            if (integrationText) {
              memory = { ...memory, storySummary: integrationText };
              if (config.get("debugLogging")) {
                ctl.debug(
                  `[RolePlayMaster] chapter close: "${act.title}" folded into story ` +
                    `summary (${countWords(integrationText)} words)`,
                );
              }
            }
          } catch (err) {
            if (config.get("debugLogging")) {
              ctl.debug(
                `[RolePlayMaster] story integration failed (story summary unchanged): ${String(err)}`,
              );
            }
          }
        }

        // Reset the chapter summary for the new act. `summarizedMessageCount` is
        // deliberately NOT advanced, so the old chapter's un-summarized tail rolls
        // into the next chapter's summary rather than being lost.
        memory = { ...memory, summary: "" };

        // 2) Chronicler — conservative, bounded sheet evolution earned this
        //    chapter. The schema is null when there is nothing to evolve (no
        //    rules, or only vital pools) → the pass is skipped entirely.
        const chronSchema = buildChronicleSchema(rulesDef ?? null);
        if (chronSchema && rulesDef && acceptsSamplingConfig(model)) {
          try {
            const { system, user } = buildChroniclePrompt(
              {
                act: { title: act.title, goal: act.goal, mood: act.mood },
                chapterSummary: closingChapterSummary,
                recentScene: replyText,
                sheet,
              },
              rulesDef,
              Math.max(0, Math.floor(tuning.chronicleDeltaCap)),
            );
            const chronConfig = {
              temperature: tuning.chroniclerTemperature,
              maxTokens: Math.max(1, Math.floor(tuning.chroniclerMaxTokens)),
              structured: chronSchema,
              ...STRICT_SAMPLING,
            };
            working("Closing the chapter…");
            const chronResult = await model
              .respond(
                Chat.from([
                  { role: "system", content: system },
                  { role: "user", content: user },
                ]),
                chronConfig,
              )
              .result();
            const chronParsed = await recordStructured(
              chronResult,
              {
                label: "ARC CHRONICLER",
                purpose:
                  "The chapter-close sheet-evolution pass — fires when the conductor " +
                  "advances the act. Reads the closing act + the chapter summary + its " +
                  "final scene + the player's evolvable keys, then may propose " +
                  "CONSERVATIVE, bounded changes to the player's stats/resources " +
                  "(clamped by the engine to the rules' bounds). By design NOT the " +
                  "game's prompt. Structured output; does not narrate.",
                system,
                user,
                config: chronConfig,
              },
              { recordTokens, transcript },
            );
            const chron = applyChronicle(chronParsed, {
              rules: rulesDef,
              sheet,
              deltaCap: Math.max(0, Math.floor(tuning.chronicleDeltaCap)),
            });
            sheet = chron.sheet;
            if (config.get("debugLogging")) {
              ctl.debug(
                `[RolePlayMaster] chronicler: chapter "${act.title}" — sheet changes ` +
                  `${chron.applied.length}` +
                  (chron.applied.length
                    ? `: ${chron.applied.map((a) => `${a.key} ${a.from}→${a.to}`).join(", ")}`
                    : ""),
              );
            }
          } catch (err) {
            if (config.get("debugLogging")) {
              ctl.debug(
                `[RolePlayMaster] chronicler pass failed (sheet unchanged): ${String(err)}`,
              );
            }
          }
        }
      }
    }
  }

  // 6h. World clock (the chronos subsystem): advance time + weather for NEXT turn.
  //     Runs whenever the feature is on, independent of the arc — the gentle code
  //     drive (time creeps a phase every few turns; weather drifts along the
  //     palette) always applies; the conductor's diegetic signal (filled above when
  //     the arc ran) OVERRIDES it, so the clock jumps when the story actually
  //     passed time or turned the weather. Pure math, dice injected — no model call.
  if (chronosModel) {
    const before = chronos;
    chronos = advanceClock(chronosModel, chronos, {
      turn: turnNo,
      turnsPerPhase: tuning.chronosTurnsPerPhase,
      weatherHold: tuning.chronosWeatherHold,
      rng: () => Math.random(),
      signal: chronosSignal,
    });
    if (config.get("debugLogging")) {
      ctl.debug(
        `[RolePlayMaster] chronos: ${resolvePhase(chronosModel, before)} → ${chronos.phase} ` +
          `(day ${chronos.dayCount + 1})` +
          (chronosModel.weather.length > 0
            ? `, weather ${resolveWeather(chronosModel, before)} → ${chronos.weather}`
            : "") +
          (chronosSignal
            ? ` [signal elapsed=${chronosSignal.elapsed ?? "—"} weather=${chronosSignal.weather ?? "—"}]`
            : " [code drive]"),
      );
    }
  }

  // 6b. Record the resolved numbered pick (Phase D, Lot 2): append it to the
  //     choice history, capped to the most-recent `choiceHistoryMax`. Store-only
  //     for now — nothing reads it yet (Phase F/G groundwork). Dropping the
  //     oldest entries is logged so the cap is never a silent truncation.
  let choiceHistory = state.choiceHistory;
  if (plan.resolvedChoice) {
    const appended = [
      ...choiceHistory,
      {
        turn: state.turn + 1,
        index: plan.resolvedChoice.index,
        text: plan.resolvedChoice.text,
      },
    ];
    const cap = Math.max(0, Math.floor(tuning.choiceHistoryMax));
    const dropped = Math.max(0, appended.length - cap);
    choiceHistory = dropped > 0 ? appended.slice(dropped) : appended;
    if (config.get("debugLogging")) {
      ctl.debug(
        `[RolePlayMaster] choice recorded: turn ${state.turn + 1}, ` +
          `option ${plan.resolvedChoice.index} → "${plan.resolvedChoice.text}"; ` +
          `history ${choiceHistory.length}/${cap}` +
          (dropped > 0 ? ` (dropped ${dropped} oldest)` : ""),
      );
    }
  }

  // End-of-turn token summary: the largest prompt across this turn's passes is
  // the figure that must fit the context window. Print it (with the per-pass
  // split) so the context length can be sized from real data, not guesswork.
  if (config.get("debugLogging") && passPromptTokens.length > 0) {
    const peak = passPromptTokens.reduce((a, b) => (b.prompt > a.prompt ? b : a));
    const split = passPromptTokens.map((p) => `${p.label} ${p.prompt}`).join(", ");
    ctl.debug(
      `[RolePlayMaster] turn ${state.turn + 1} token peak: prompt ${peak.prompt} ` +
        `(${peak.label}) — per pass: ${split}`,
    );
  }

  // 7. Persist: advance the turn, consume one-shot directives, store memory.
  //    Director steering: consume this turn's one-shot `/mj!` directives, then —
  //    only on a turn the conductor ran — REPLACE the previous beat's arc nudges
  //    (the `arc-` prefixed directives) with the fresh ones, so they never pile
  //    up and the latest beat is authoritative. Player `/mj` directives are never
  //    touched. On a turn the conductor didn't run, persistent arc nudges carry
  //    over to fill the cadence gap.
  const consumedDirector = consumeOnce(plan.director);
  const directorToSave = conductorRan
    ? {
        ...consumedDirector,
        directives: [
          ...consumedDirector.directives.filter((d) => !d.id.startsWith("arc-")),
          ...arcDirectives,
        ],
      }
    : consumedDirector;
  await saveState(
    {
      ...state,
      turn: state.turn + 1,
      onboarded,
      director: directorToSave,
      // Dramatic-arc pacing (Phase I): the act we're in + the conductor's running
      // tension / open threads, advanced this turn if the conductor said so.
      pacing,
      // World clock (the chronos subsystem): the advanced time/weather for next
      // turn. Unchanged default when the feature is off.
      chronos,
      // Scene presence (the scene-presence subsystem): where the scene is + who is
      // on stage, as the conductor last read it. Carried forward when no beat ran;
      // inert default when the feature is off.
      scene,
      // Two-tier memory: `memory` carries both the current chapter summary and
      // the whole-story summary, updated at a chapter close in section 6g-bis.
      memory,
      choiceHistory,
      // Structured mechanics (Phase G): the post-resolution sheets, the
      // game-over flag, and next turn's tagged choices. When the universe has no
      // rules these carry their unchanged defaults.
      sheet,
      combatants,
      npcSheets,
      gameOver,
      pendingChoices,
      // Relationship memory (the relationship pass): evolving per-pair standing +
      // shared-history summaries, and the pass's own digest marker.
      relationships,
      relationshipsDigestedCount,
      // Character psyche (Phase J): the fluctuating per-NPC mood / intent /
      // goalFocus, written by the same social pass as `relationships`.
      psyche,
      // Knowledge gating (the revelation system): the player's known-facts set,
      // grown by the digest pass above.
      knowledge,
      // First-mention introduction (the disclosure system): the lore/NPCs the
      // narration has introduced so far, grown by the pure marker above.
      disclosure,
    },
    storeOptions,
  );

  // Clear the spinner: a hidden pass (accountant, conductor, chapter close…) may
  // have left the status on "loading" with its own label. Land on a final "done"
  // so the UI shows the turn finished rather than a perpetual in-progress state.
  ready("Turn complete");
}
roleplay-master

roleplay-master