src / predictionLoop.ts
import { type LLM, type PredictionLoopHandlerController, tool } from "@lmstudio/sdk";
import { z } from "zod";
import { configSchematics } from "./config.js";
import { describeError, fetchImageMarkdown } from "./pixlstash.js";

/**
 * Hybrid generator: the model decides *when* to illustrate via a single
 * tool call; the plugin runs the PixlStash search and renders the picture in
 * its own assistant content block. The model never has to echo any markdown,
 * so this works with roleplay fine-tunes that won't repeat tool results.
 */
export async function predictionLoop(ctl: PredictionLoopHandlerController): Promise<void> {
  // `tokenSource()` returns the user's selected model pre-configured with the
  // session prediction config (system prompt, sampler, etc.). That avoids the
  // system-prompt-loss you'd get from a raw `client.llm.model()`.
  const tokenSource = await ctl.tokenSource();
  // The pipeline assumes an LLM; if the user has picked a generator plugin as
  // their token source we'd surface a runtime error from `.act` below.
  const model = tokenSource as LLM;
  const history = await ctl.pullHistory();
  const cfg = ctl.getPluginConfig(configSchematics);
  const maxImages = cfg.get("maxImagesPerResponse");

  // Our own abort controller so the tool can stop the .act() loop when the
  // per-response image cap is hit (otherwise the model treats the refusal as
  // a tool result and merrily keeps generating). User-aborts via
  // `ctl.abortSignal` also flow through this controller.
  const stop = new AbortController();
  if (ctl.abortSignal.aborted) stop.abort();
  else ctl.abortSignal.addEventListener("abort", () => stop.abort(), { once: true });

  // Never repeat the same picture twice in this chat: seed the "used" set from
  // every prior message (our injected files are referenced as
  // `pixlstash-<id>.<ext>`); fetchImageMarkdown adds each newly-shown id too.
  const usedIds = collectUsedIds(history);

  // One streaming text block for the assistant's prose. We open a fresh one
  // after each image so blocks render in the natural order: prose, image, prose.
  let textBlock = ctl.createContentBlock({ roleOverride: "assistant", includeInContext: true });

  // Some tool-call fine-tunes (Sapphira/Llama 3.3 …) emit `[TOOL_RESULT]` or
  // similar markers at the start of a streaming round. Strip leading variants
  // off each new text block before they reach the user.
  const stream = makeLeadingNoiseStripper(() => textBlock);

  let imagesShown = 0;
  const searchTool = tool({
    name: "show_pixlstash_image",
    description:
      "Search the user's PixlStash image library and display a matching picture " +
      "inline in your reply. Call this whenever a visual would help illustrate a " +
      "scene, character, outfit, or object you're describing. The plugin renders " +
      "the picture automatically — you do NOT need to copy or echo any markdown " +
      "into your prose. Just call this tool with a vivid query and keep narrating.",
    parameters: {
      query: z
        .string()
        .min(1)
        .describe(
          "A short, vivid visual description of what to illustrate — e.g. " +
            "'a nervous young woman in a yellow top at a doorway at night', " +
            "'a dimly lit living room with two people on a couch'.",
        ),
    },
    implementation: async ({ query }, ctx) => {
      if (maxImages > 0 && imagesShown >= maxImages) {
        // End the assistant turn here — otherwise the model treats this
        // refusal as a normal tool result and keeps generating more prose.
        stop.abort();
        return `Image quota reached (${maxImages} per response). Assistant turn ends.`;
      }
      ctx.status("Searching PixlStash…");
      try {
        const markdown = await fetchImageMarkdown(ctl, query, ctx.signal, usedIds);
        if (!markdown) {
          return "No matching image was found (or all matches have already been shown in this chat).";
        }
        // Render the image as its own assistant block. `includeInContext: false`
        // keeps it visible to the user but out of the history sent to the model
        // on later turns — so it can't be parroted back as text.
        const imageBlock = ctl.createContentBlock({
          roleOverride: "assistant",
          includeInContext: false,
        });
        imageBlock.appendText(markdown);
        imagesShown++;

        // Flush any buffered text from before the image, then open a fresh text
        // block for the continuation so it lands AFTER the picture.
        stream.flush();
        textBlock = ctl.createContentBlock({
          roleOverride: "assistant",
          includeInContext: true,
        });
        stream.reset();
        return "Image displayed.";
      } catch (err) {
        const detail = describeError(err);
        ctx.warn(`PixlStash error: ${detail}`);
        return `Error: ${detail}`;
      }
    },
  });

  try {
    await model.act(history, [searchTool], {
      signal: stop.signal, // user-aborts + our cap-driven aborts both flow here
      onPredictionFragment: (fragment) => stream.emit(fragment.content),
    });
  } catch (err) {
    // Cap-reached and user-stop are intentional aborts, not failures.
    if (!stop.signal.aborted) throw err;
  }
  stream.flush();
}

// ---------------------------------------------------------------- internals --

/**
 * Scan a Chat for our previously injected `pixlstash-<id>.<ext>` references
 * and collect the ids — so we don't show the same picture twice in this chat.
 * Exported for testing.
 */
export function collectUsedIds(history: {
  getMessagesArray(): Array<{ getText(): string }>;
}): Set<number> {
  const ids = new Set<number>();
  for (const message of history.getMessagesArray()) {
    for (const match of message.getText().matchAll(/pixlstash-(\d+)\.\w+/g)) {
      ids.add(Number(match[1]));
    }
  }
  return ids;
}

export const LEADING_NOISE =
  /^\s*(?:\[\s*\/?\s*tool[_ ]?result\s*\]|<\|?\s*\/?\s*tool[_ ]?result\s*\|?>)[\s\n]*/i;

/**
 * Per-block leading-noise stripper: buffers the first ~32 chars (or until a
 * sentence boundary) of a text block, strips a leading tool-result marker if
 * present, then streams the rest verbatim. `reset()` arms it again after we
 * open a fresh text block; `flush()` emits whatever is buffered.
 * Exported for testing.
 */
export function makeLeadingNoiseStripper(getBlock: () => { appendText(text: string): void }): {
  emit: (text: string) => void;
  flush: () => void;
  reset: () => void;
} {
  let buf = "";
  let done = false;

  const flushBuf = () => {
    const cleaned = buf.replace(LEADING_NOISE, "");
    if (cleaned) getBlock().appendText(cleaned);
    buf = "";
    done = true;
  };

  return {
    emit(text: string) {
      if (done) {
        getBlock().appendText(text);
        return;
      }
      buf += text;
      // Decide once we have enough to recognize (or rule out) the marker, or
      // when a newline / sentence boundary makes the prefix unambiguous.
      if (buf.length >= 32 || /[\n.!?]/.test(buf)) flushBuf();
    },
    flush() {
      if (!done && buf) flushBuf();
    },
    reset() {
      buf = "";
      done = false;
    },
  };
}
pixlstash-lmstudio