Project Files
src / predictionLoop.ts
import { type LLM, type PredictionLoopHandlerController, tool } from "@lmstudio/sdk";
import { z } from "zod";
import { configSchematics } from "./config.js";
import { describeError, fetchImageMarkdown } from "./pixlstash.js";
/**
* Hybrid generator: the model decides *when* to illustrate via a single
* tool call; the plugin runs the PixlStash search and renders the picture in
* its own assistant content block. The model never has to echo any markdown,
* so this works with roleplay fine-tunes that won't repeat tool results.
*/
export async function predictionLoop(ctl: PredictionLoopHandlerController): Promise<void> {
// `tokenSource()` returns the user's selected model pre-configured with the
// session prediction config (system prompt, sampler, etc.). That avoids the
// system-prompt-loss you'd get from a raw `client.llm.model()`.
const tokenSource = await ctl.tokenSource();
// The pipeline assumes an LLM; if the user has picked a generator plugin as
// their token source we'd surface a runtime error from `.act` below.
const model = tokenSource as LLM;
const history = await ctl.pullHistory();
const cfg = ctl.getPluginConfig(configSchematics);
const maxImages = cfg.get("maxImagesPerResponse");
// Our own abort controller so the tool can stop the .act() loop when the
// per-response image cap is hit (otherwise the model treats the refusal as
// a tool result and merrily keeps generating). User-aborts via
// `ctl.abortSignal` also flow through this controller.
const stop = new AbortController();
if (ctl.abortSignal.aborted) stop.abort();
else ctl.abortSignal.addEventListener("abort", () => stop.abort(), { once: true });
// Never repeat the same picture twice in this chat: seed the "used" set from
// every prior message (our injected files are referenced as
// `pixlstash-<id>.<ext>`); fetchImageMarkdown adds each newly-shown id too.
const usedIds = collectUsedIds(history);
// One streaming text block for the assistant's prose. We open a fresh one
// after each image so blocks render in the natural order: prose, image, prose.
let textBlock = ctl.createContentBlock({ roleOverride: "assistant", includeInContext: true });
// Some tool-call fine-tunes (Sapphira/Llama 3.3 …) emit `[TOOL_RESULT]` or
// similar markers at the start of a streaming round. Strip leading variants
// off each new text block before they reach the user.
const stream = makeLeadingNoiseStripper(() => textBlock);
let imagesShown = 0;
const searchTool = tool({
name: "show_pixlstash_image",
description:
"Search the user's PixlStash image library and display a matching picture " +
"inline in your reply. Call this whenever a visual would help illustrate a " +
"scene, character, outfit, or object you're describing. The plugin renders " +
"the picture automatically — you do NOT need to copy or echo any markdown " +
"into your prose. Just call this tool with a vivid query and keep narrating.",
parameters: {
query: z
.string()
.min(1)
.describe(
"A short, vivid visual description of what to illustrate — e.g. " +
"'a nervous young woman in a yellow top at a doorway at night', " +
"'a dimly lit living room with two people on a couch'.",
),
},
implementation: async ({ query }, ctx) => {
if (maxImages > 0 && imagesShown >= maxImages) {
// End the assistant turn here — otherwise the model treats this
// refusal as a normal tool result and keeps generating more prose.
stop.abort();
return `Image quota reached (${maxImages} per response). Assistant turn ends.`;
}
ctx.status("Searching PixlStash…");
try {
const markdown = await fetchImageMarkdown(ctl, query, ctx.signal, usedIds);
if (!markdown) {
return "No matching image was found (or all matches have already been shown in this chat).";
}
// Render the image as its own assistant block. `includeInContext: false`
// keeps it visible to the user but out of the history sent to the model
// on later turns — so it can't be parroted back as text.
const imageBlock = ctl.createContentBlock({
roleOverride: "assistant",
includeInContext: false,
});
imageBlock.appendText(markdown);
imagesShown++;
// Flush any buffered text from before the image, then open a fresh text
// block for the continuation so it lands AFTER the picture.
stream.flush();
textBlock = ctl.createContentBlock({
roleOverride: "assistant",
includeInContext: true,
});
stream.reset();
return "Image displayed.";
} catch (err) {
const detail = describeError(err);
ctx.warn(`PixlStash error: ${detail}`);
return `Error: ${detail}`;
}
},
});
try {
await model.act(history, [searchTool], {
signal: stop.signal, // user-aborts + our cap-driven aborts both flow here
onPredictionFragment: (fragment) => stream.emit(fragment.content),
});
} catch (err) {
// Cap-reached and user-stop are intentional aborts, not failures.
if (!stop.signal.aborted) throw err;
}
stream.flush();
}
// ---------------------------------------------------------------- internals --
/**
* Scan a Chat for our previously injected `pixlstash-<id>.<ext>` references
* and collect the ids — so we don't show the same picture twice in this chat.
* Exported for testing.
*/
export function collectUsedIds(history: {
getMessagesArray(): Array<{ getText(): string }>;
}): Set<number> {
const ids = new Set<number>();
for (const message of history.getMessagesArray()) {
for (const match of message.getText().matchAll(/pixlstash-(\d+)\.\w+/g)) {
ids.add(Number(match[1]));
}
}
return ids;
}
export const LEADING_NOISE =
/^\s*(?:\[\s*\/?\s*tool[_ ]?result\s*\]|<\|?\s*\/?\s*tool[_ ]?result\s*\|?>)[\s\n]*/i;
/**
* Per-block leading-noise stripper: buffers the first ~32 chars (or until a
* sentence boundary) of a text block, strips a leading tool-result marker if
* present, then streams the rest verbatim. `reset()` arms it again after we
* open a fresh text block; `flush()` emits whatever is buffered.
* Exported for testing.
*/
export function makeLeadingNoiseStripper(getBlock: () => { appendText(text: string): void }): {
emit: (text: string) => void;
flush: () => void;
reset: () => void;
} {
let buf = "";
let done = false;
const flushBuf = () => {
const cleaned = buf.replace(LEADING_NOISE, "");
if (cleaned) getBlock().appendText(cleaned);
buf = "";
done = true;
};
return {
emit(text: string) {
if (done) {
getBlock().appendText(text);
return;
}
buf += text;
// Decide once we have enough to recognize (or rule out) the marker, or
// when a newline / sentence boundary makes the prefix unambiguous.
if (buf.length >= 32 || /[\n.!?]/.test(buf)) flushBuf();
},
flush() {
if (!done && buf) flushBuf();
},
reset() {
buf = "";
done = false;
},
};
}