/**
 * Advanced tuning knobs — defaults + an optional on-disk override file.
 *
 * The LM Studio settings panel only exposes the essentials (see `config.ts`).
 * Every other knob lives here, fixed to a validated default — but a power user
 * ("les barbus") can override any of them, with no rebuild, by dropping a
 * `roleplay.config.json` next to the plugin (alongside `characters/`, `lore/`,
 * `universes/`). It is read fresh each turn; missing file or invalid values
 * fall back to the defaults (defensive, like the state/lore loaders).
 *
 * **Single source of truth:** one Zod schema declares each knob's validation
 * bounds AND its default (via `.catch(default)`, which supplies the default for
 * both a missing key and an invalid value). The `Tuning` type is `z.infer`red
 * from it and `TUNING_DEFAULTS` is `parse({})` — so a new knob is added in
 * exactly one place and the three can never drift apart. Full per-field docs
 * live in `docs/configuration.md`.
 *
 * This module intentionally has NO `@lmstudio/sdk` dependency, so the pure
 * `resolveTuning` can be unit-tested in `scripts/smoke.mjs`.
 */

import { readFile } from "node:fs/promises";
import { resolve } from "node:path";
import { z } from "zod";

/**
 * Per-field schema = validation bounds + default in one place. `.catch(default)`
 * returns the default for BOTH a missing key and an invalid value, so a single
 * bad value is ignored (falls back to its default) rather than discarding the
 * whole file; unknown keys are stripped.
 */
const TuningSchema = z.object({
  /** Numbered options offered each turn. */
  choiceCount: z.number().int().min(2).catch(3),
  /** Recent messages scanned for lore/NPC triggers. */
  loreScanDepth: z.number().int().positive().catch(4),
  /** Token ceiling for the injected `# World lore` block. */
  loreBudgetTokens: z.number().int().positive().catch(512),
  /** Cosine floor for a semantic lore candidate (0–1). */
  loreSemanticThreshold: z.number().min(0).max(1).catch(0.45),
  /** Max meaning-only lore matches per turn. */
  loreSemanticTopK: z.number().int().nonnegative().catch(3),
  /** Activate NPCs by relevance (vs. always inject the whole cast). */
  npcActivation: z.boolean().catch(true),
  /**
   * Scene-presence tiering: only NPCs actually PRESENT get a full card and may be
   * voiced; the rest (known but off-stage) are listed in a thin off-scene block
   * (name only — or `name — link` when an on-stage character's authored relation
   * pulls them in), referenceable but not voiced. Also drops the never-empty-scene
   * whole-cast fallback. Requires `npcActivation`; false = the pre-tiering
   * behaviour (whole activated set as full cards).
   *
   * Presence is the MODEL's reading, not a keyword proxy: the conductor reports
   * WHERE the scene is and WHO is on stage at each beat (+0 calls — it piggybacks
   * a pass already running), persisted in `state.scene` and freshened every turn
   * by the player's action (named NPCs) and `locations` binding (a place's
   * regulars — the keeper in her tavern). Works best with the dramatic arc on (the
   * conductor's home); with the arc off, presence falls back to always-active +
   * the addressed NPC only.
   */
  scenePresence: z.boolean().catch(true),
  /**
   * Minimum |disposition| for an authored NPC↔NPC `relations` bond to pull the
   * other character into the off-scene block while this one is on stage (a neutral
   * acquaintance is background, not surfaced). Only applies when `scenePresence`
   * is on.
   */
  relationPullMinDisposition: z.number().min(0).max(100).catch(15),
  /** Cosine floor for a semantic NPC activation (0–1). */
  npcSemanticThreshold: z.number().min(0).max(1).catch(0.45),
  /** Max meaning-only NPC activations per turn. */
  npcSemanticTopK: z.number().int().nonnegative().catch(3),
  /** Summarize after this many new (unprotected) messages… */
  summaryIntervalMessages: z.number().int().positive().catch(10),
  /** …or this many new words, whichever first. */
  summaryIntervalWords: z.number().int().positive().catch(600),
  /**
   * Soft word target the rolling CHAPTER summary (`# Chapter so far`) is
   * recompressed toward. It only holds the current chapter (reset at each
   * chapter close), so it stays small. 0 = no ceiling (unbounded).
   */
  summaryTargetWords: z.number().int().nonnegative().catch(300),
  /**
   * Soft word target the whole-STORY summary (`# Story so far`) is recompressed
   * toward at each chapter close. The permanent backbone reinjected every turn,
   * so held at a higher altitude (standing facts, not scene detail). 0 = none.
   */
  storyTargetWords: z.number().int().nonnegative().catch(500),
  /** Most-recent messages never folded into the summary. */
  summaryProtectTail: z.number().int().nonnegative().catch(2),
  /**
   * When summarizing, prepend this many already-summarized messages as read-only
   * context, so the first new action (often a bare pick) is preceded by the
   * narration that prompted it instead of starting mid-exchange. 0 = no bridge.
   */
  summaryBridge: z.number().int().nonnegative().catch(1),
  /** Max past messages recalled from the RAG store per turn (Phase F). */
  ragTopK: z.number().int().nonnegative().catch(3),
  /** Cosine floor for a recalled memory (0–1) (Phase F). */
  ragThreshold: z.number().min(0).max(1).catch(0.45),
  /** Token ceiling for the injected `# Relevant past events` block (Phase F). */
  ragBudgetTokens: z.number().int().positive().catch(512),
  /** Hard cap on stored chunks; the oldest are dropped past this (Phase F). */
  ragMaxStore: z.number().int().nonnegative().catch(400),
  /** Most-recent messages never archived into the RAG store (Phase F). */
  ragProtectRecent: z.number().int().nonnegative().catch(2),
  /** Auto-name a new chat (the plugin drives the loop, so LM Studio won't). */
  autoNameChat: z.boolean().catch(true),
  /** Word cap for a generated chat title. */
  autoNameMaxWords: z.number().int().min(2).catch(6),
  /** Hard cap on reply length (0 = no limit). */
  maxResponseTokens: z.number().int().nonnegative().catch(0),
  /** Context-overflow policy applied on respond(). */
  contextOverflowPolicy: z
    .enum(["rollingWindow", "truncateMiddle", "stopAtLimit"])
    .catch("rollingWindow"),
  /**
   * Min-p sampling floor for the NARRATION (0 = disabled). A light floor (~0.05)
   * trims the long tail of unlikely tokens for more coherent prose without
   * killing variety. The plugin owns this so it doesn't depend on the LM Studio
   * preset; unlike the strict mechanics passes, the narrator keeps it ON.
   */
  narrationMinP: z.number().min(0).max(1).catch(0.05),
  /**
   * Repetition penalty for the NARRATION (≤ 1 = disabled). A mild value (~1.1)
   * breaks the phrase-/cliché-looping LLMs fall into over long sessions. Keep it
   * gentle: too high degrades character names and natural repetition.
   */
  narrationRepeatPenalty: z.number().min(1).max(2).catch(1.1),
  /** Universes root holding `<universe>/` content packages (empty = `universes/`). */
  universesDir: z.string().catch(""),
  /** Saves root holding `<universe>[__<save>].json` play state (empty = `saves/`). */
  savesDir: z.string().catch(""),
  /** PbtA partial-success threshold: a 2d6+stat total ≥ this is a partial (Phase G). */
  pbtaPartial: z.number().int().catch(7),
  /** PbtA full-success threshold: a 2d6+stat total ≥ this is a full hit (Phase G). */
  pbtaFull: z.number().int().catch(10),
  /** Safety cap on simultaneous combatants; the roster is tidied to this (Phase G). */
  maxCombatants: z.number().int().nonnegative().catch(8),
  /** Sampling temperature for the free-form adjudication (referee) pass (Phase G3). */
  adjudicationTemperature: z.number().min(0).max(2).catch(0.2),
  /** Token cap for the referee's structured verdict — it is tiny (Phase G3). */
  adjudicationMaxTokens: z.number().int().positive().catch(200),
  /** Sampling temperature for the relationship pass (low — faithful, not creative). */
  relationshipTemperature: z.number().min(0).max(2).catch(0.3),
  /** Token cap for the relationship pass's structured output (a few per-pair summaries). */
  relationshipMaxTokens: z.number().int().positive().catch(600),
  /** Symmetric clamp for a stored disposition value (e.g. 100 → [-100, 100]). */
  relationshipDispositionMax: z.number().int().positive().catch(100),
  /** Max absolute disposition move per relationship-pass window (anti-swing). */
  relationshipDeltaCap: z.number().int().nonnegative().catch(25),
  /**
   * Trust scar (the low-water mark): how many points of recovery ceiling are lost
   * per point of the worst disposition a pair ever reached. `1` is point-for-point
   * — a relationship that once sank to −70 can never climb back above +30, so a
   * grave betrayal is forgivable only part-way, never erased (the realistic, near-
   * permanent damage). `0` disables the scar — disposition recovers fully, the
   * pre-scar behaviour.
   */
  relationshipScarFactor: z.number().min(0).catch(1),
  /** Sampling temperature for the social referee / stance pass (Phase J) — low, an impartial reading. */
  volitionTemperature: z.number().min(0).max(2).catch(0.3),
  /** Token cap for the stance pass's structured output (a short verdict per present NPC). */
  volitionMaxTokens: z.number().int().positive().catch(350),
  /**
   * Psyche decay window (Phase J): turns after which a character's mood/intent that
   * the social pass has not refreshed is dropped, so an off-scene NPC returns to
   * their stable persona instead of a stale mood. `0` disables decay (moods persist
   * verbatim). The psyche is the "current" state layer, so this stays short.
   */
  psycheDecayTurns: z.number().int().nonnegative().catch(6),
  /**
   * Narration-length steering: a compact baseline plus a soft per-turn word target
   * that scales INVERSELY with the scene's dramatic intensity — a charged moment
   * reads short and clipped, a calm one may breathe (the `# Narration length`
   * block). false = no block (the old, unsteered verbose behaviour).
   */
  narrationLength: z.boolean().catch(true),
  /**
   * Soft word target at MINIMUM intensity (the long, unhurried end). The
   * direction of the variation is data: `calmWords > intenseWords` gives
   * "intense ⇒ shorter"; swap them to invert. 0 also disables the block.
   */
  narrationCalmWords: z.number().int().nonnegative().catch(220),
  /** Soft word target at MAXIMUM intensity (the short, clipped end). */
  narrationIntenseWords: z.number().int().nonnegative().catch(60),
  /**
   * Intensity (0–1) used when there is no live tension signal — i.e. the dramatic
   * arc is off, so `pacing.tension` never moves. Keeps the target near a sensible
   * compacted centre instead of pinning it to the calm extreme.
   */
  narrationBaselineIntensity: z.number().min(0).max(1).catch(0.4),
  /**
   * Intensity FLOOR applied while combat is live (a fight reads short and urgent),
   * independent of the arc. 0 = ignore combat (length follows tension only).
   */
  narrationCombatIntensity: z.number().min(0).max(1).catch(0.7),
  /** Most-recent resolved numbered picks kept in `state.choiceHistory` (Phase D). */
  choiceHistoryMax: z.number().int().nonnegative().catch(50),
  /**
   * World clock (the chronos subsystem): the code-drive cadence of time. Time
   * creeps forward one day-phase every Nth turn (so a default six-phase day spans
   * ~`6 * turnsPerPhase` turns of loitering). A diegetic signal read off the scene
   * always overrides this. 0 = time never advances on its own (only the signal
   * moves it).
   */
  chronosTurnsPerPhase: z.number().int().nonnegative().catch(2),
  /**
   * World clock: after the weather has held this many turns with no diegetic
   * change, drift it to a neighbouring condition in the setting's palette (gradual
   * calm↔severe movement, never a jarring snap). 0 = weather only ever changes
   * when the narration says so.
   */
  chronosWeatherHold: z.number().int().nonnegative().catch(4),
  /**
   * Dramatic arc (Phase I): minimum turns between two conductor beats — the
   * cadence gate, so pacing is judged on a beat, not every turn. Higher = the
   * conductor weighs in less often (cheaper, calmer).
   */
  conductorMinBeatGap: z.number().int().positive().catch(3),
  /** Sampling temperature for the conductor pass (a measured directorial call). */
  conductorTemperature: z.number().min(0).max(2).catch(0.4),
  /** Token cap for the conductor's structured output. */
  conductorMaxTokens: z.number().int().positive().catch(400),
  /** Max soft directive nudges the conductor may install per beat. */
  conductorMaxDirectives: z.number().int().nonnegative().catch(2),
  /** Max open threads the conductor retains across beats. */
  conductorMaxThreads: z.number().int().nonnegative().catch(12),
  /**
   * Chapter close (the conductor's session-end hook): the temperature shared by
   * the story-summary integration (plain prose) and the chronicler sheet-evolution
   * pass — low, so both are faithful, not creative.
   */
  chroniclerTemperature: z.number().min(0).max(2).catch(0.3),
  /** Token cap for the chronicler's structured sheet-evolution output (a few changes). */
  chroniclerMaxTokens: z.number().int().positive().catch(400),
  /** Hard symmetric bound on each per-chapter sheet delta (anti power-creep). */
  chronicleDeltaCap: z.number().int().nonnegative().catch(3),
  /**
   * Messages before the end of the chat at which a condensed steering reminder
   * (author's-note) is injected (Phase D). 0 = disabled (byte-identical chat).
   */
  authorNoteDepth: z.number().int().nonnegative().catch(0),
  /** Re-inject the author's-note every Nth turn (Phase D). 1 = every turn. */
  authorNoteInterval: z.number().int().positive().catch(1),
  /**
   * Drop from the outgoing chat the messages already folded into `# Story so
   * far`; the summary (and RAG recall) already carry their substance, so the
   * verbatim copies are duplication. The whole un-summarized tail is always
   * kept. false = send the full history (pre-optimization behaviour).
   */
  pruneSummarized: z.boolean().catch(true),
  /**
   * When pruning, keep this many of the most-recent summarized messages as a
   * bridge into the un-summarized tail, so narration still flows. 0 = a hard cut
   * at the summary boundary.
   */
  contextBridge: z.number().int().nonnegative().catch(1),
  /**
   * Rewrite a bare numbered pick ("2") in the history into the explicit
   * `I choose: <option text>` of the option it selected, so the player's choice
   * stays legible once its option list scrolls out of the window. Free-text
   * actions are left untouched. false = leave bare numbers as typed.
   */
  expandPicks: z.boolean().catch(true),
});

/** The resolved set of advanced knobs the handler reads each turn. */
export type Tuning = z.infer<typeof TuningSchema>;

/** Validated defaults. Documented in `docs/configuration.md`. */
export const TUNING_DEFAULTS: Tuning = TuningSchema.parse({});

/** File name read from the plugin working directory. */
export const TUNING_CONFIG_FILE = "roleplay.config.json";

/**
 * Resolve a raw parsed object into the full knob set. Pure. A non-object input
 * yields the defaults unchanged; valid fields override, invalid ones fall back
 * to their default (per-field), unknown keys are ignored.
 */
export function resolveTuning(raw: unknown): Tuning {
  const parsed = TuningSchema.safeParse(raw);
  return parsed.success ? parsed.data : { ...TUNING_DEFAULTS };
}

/**
 * Read `roleplay.config.json` from the plugin working directory and merge it
 * over the defaults. Missing file / bad JSON → defaults (never throws). I/O.
 */
export async function loadTuning(): Promise<Tuning> {
  const file = resolve(process.cwd(), TUNING_CONFIG_FILE);
  try {
    const raw = await readFile(file, "utf8");
    return resolveTuning(JSON.parse(raw));
  } catch {
    return { ...TUNING_DEFAULTS };
  }
}
roleplay-master

roleplay-master