Project Files
src / orchestrator.ts
import {
type Chat,
type GeneratorController,
type InferParsedConfig,
} from "@lmstudio/sdk";
import OpenAI from "openai";
import {
type ChatCompletionMessageParam,
type ChatCompletionMessageToolCall,
type ChatCompletionTool,
type ChatCompletionToolMessageParam,
} from "openai/resources/index.js";
import {
configSchematics,
globalConfigSchematics,
detectCapabilities,
getAttachmentWindowSize,
getEverythingElseWindowSize,
getSelfPluginIdentifier,
isPictureToolAllowlisted,
normalizeToolPluginId,
findLMStudioHome,
resolveActiveLMStudioChatId,
appendPictures,
readState,
writeStateAtomic,
importAttachmentBatch,
buildPromotionItems,
type PromotionItem,
recordGeneratedVariants,
getPromotableAttachmentNs,
shouldPromoteAttachments,
getPromotableVariantVs,
getPromotableImageIs,
getPromotableEverythingElse,
shouldPromoteVariantsIdempotent,
shouldPromoteEverythingElseIdempotent,
toOpenAIPromptParts,
setActiveChatContext,
trackInjection,
stripInjections,
ensureLogsDir,
getLogsDir,
getProjectRoot,
findAllAttachmentsLegacy as findAllAttachmentsFromConversation,
findLastAttachmentLegacy as findLastAttachmentFromConversation,
findAllVariantsLegacy as findAllVariantsFromConversation,
findAllPictures,
findAllImages,
reconcileMedia,
getStateArrayKey,
ensureAgentModelLoaded,
updateLastUsedModelForAgentModel,
} from "./core-bundle.mjs";
import {
harvestToolResult,
type HarvestContext,
type ToolCallInfo,
} from "./services/toolResultHarvester.js";
import fs from "fs";
import path from "path";
import crypto from "crypto";
import os from "os";
import { extractSequenceFrames } from "./helpers/sequenceExtractor.js";
import {
appendVisionPromotionJsonl,
type VisionPromotionJsonlEntry,
type VisionPromotionMode,
} from "./helpers/visionPromotionLog";
function canonicalizeExternalUrl(u: string): string {
let s = String(u || "").trim();
s = s.replace(/[)\],.;]+$/g, "");
s = s.replace(/^"(.+)"$/g, "$1");
s = s.replace(/^'(.+)'$/g, "$1");
return s.trim();
}
function shortHexSha256(input: string, chars = 12): string {
const h = crypto.createHash("sha256").update(input).digest("hex");
return h.slice(0, Math.max(4, Math.min(64, chars)));
}
type ConversationSnapshot = { messages?: unknown[] };
/* -------------------------------------------------------------------------- */
/* In-Memory Deduplication Cache for Image Markdown Injection */
/* -------------------------------------------------------------------------- */
/**
* Prevents race-condition double-injection when multiple calls read the same
* file-based state before either has written back. Entries expire after TTL_MS.
*/
const INJECTION_DEDUP_TTL_MS = 10_000; // 10 seconds
const recentlyInjected = new Map<string, number>(); // key → timestamp
function markAsInjected(key: string): boolean {
const now = Date.now();
// Cleanup expired entries lazily (only when map grows)
if (recentlyInjected.size > 100) {
for (const [k, ts] of recentlyInjected) {
if (now - ts > INJECTION_DEDUP_TTL_MS) recentlyInjected.delete(k);
}
}
// Check if recently injected
const prev = recentlyInjected.get(key);
if (prev !== undefined && now - prev < INJECTION_DEDUP_TTL_MS) {
return false; // Already injected recently → skip
}
recentlyInjected.set(key, now);
return true; // First injection → proceed
}
function resolvePreferredLocale(): string | undefined {
const envPref = process.env.LOG_LOCALE;
const lc =
envPref || process.env.LC_ALL || process.env.LC_TIME || process.env.LANG;
if (!lc) return undefined;
const cleaned = String(lc).split(".")[0].replace(/_/g, "-");
return cleaned || undefined;
}
function localTimestamp(): string {
const opts: Intl.DateTimeFormatOptions = {
year: "numeric",
month: "2-digit",
day: "2-digit",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
hour12: false,
timeZoneName: "short",
};
const loc = resolvePreferredLocale();
try {
return new Date().toLocaleString(loc as any, opts as any);
} catch {
const d = new Date();
const day = String(d.getDate()).padStart(2, "0");
const month = String(d.getMonth() + 1).padStart(2, "0");
const year = d.getFullYear();
const hh = String(d.getHours()).padStart(2, "0");
const mm = String(d.getMinutes()).padStart(2, "0");
const ss = String(d.getSeconds()).padStart(2, "0");
return `${day}/${month}/${year} ${hh}:${mm}:${ss}`;
}
}
function appendPromotionToPluginLog(line: string): void {
try {
ensureLogsDir();
const logsDir = getLogsDir();
const logFile = path.join(logsDir, "generate-image-plugin.log");
fs.appendFileSync(logFile, `${localTimestamp()} - ${line}\n`);
} catch (e) {
console.error(
"[VisionPromotion] Failed to append to generate-image-plugin.log:",
(e as Error).message
);
}
}
function logVisionPromotionFailure(params: {
chatId: string;
requestId: string;
promotedAt: string;
mode: VisionPromotionMode;
stage: string;
message: string;
}): void {
const { chatId, requestId, promotedAt, mode, stage, message } = params;
try {
appendPromotionToPluginLog(
`VIP error: chatId=${chatId} requestId=${requestId} mode=${mode} stage=${stage} error=${message}`
);
} catch {
// best-effort
}
try {
appendVisionPromotionJsonl([
{
type: "error",
chatId,
requestId,
promotedAt,
mode,
stage,
message,
},
]);
} catch {
// best-effort
}
}
type VisionContextCanaryEntry = {
source: "attachment" | "generated";
// Mirror chat_media_state.json for quick diff
filename?: string; // relative original (e.g., attachment-image-...png or generated-image-...png)
preview?: string; // relative preview (e.g., preview-...jpg)
origin?: string; // LM Studio fileIdentifier
originAbs?: string; // attachment provenance absolute path (SSOT)
originalName?: string; // real original filename from metadata (e.g., "Katze.png")
a?: number; // attachment index [aN]
v?: number; // variant index [vN]
i?: number; // image index [iN]
createdAt: string; // from state when available
/** True iff this entry was most recently injected as pixels (base64 preview bytes). */
pixelPromoted?: boolean;
// What actually went as pixels: chat-local preview path
localPath: string; // absolute path to preview bytes used for base64
mimeType: string;
// Compatibility with other projects' canary schema
fileName: string; // basename(localPath)
fileUri: string; // Mode 2/base64: localPath pointer; Mode 3/files: https uri
};
type VisionContextCanary = {
activeAttachments: VisionContextCanaryEntry[];
activeGenerated: VisionContextCanaryEntry[];
// Optional metadata for debugging/monitoring
meta?: {
chatId: string;
requestId: string;
promotedAt: string;
mode: "idempotent" | "persistent";
rollingWindow?: {
attachmentNs: number[];
attachmentAs?: number[];
variantVs: number[];
imageIs: number[];
};
lastPixelPromotedAt?: string;
lastPixelPromoted?: {
attachmentNs: number[];
attachmentAs?: number[];
variantVs: number[];
imageIs: number[];
};
expectedCounts?: {
attachments: number;
variants: number;
images: number;
total: number;
};
actualCounts?: {
attachments: number;
generated: number;
total: number;
};
};
};
function buildRollingWindowCanaryFromState(params: {
chatWd: string;
state: any;
requestId: string;
promotionMode: "idempotent" | "persistent";
}): VisionContextCanary {
const { chatWd, state, requestId, promotionMode } = params;
// Import window sizes from registry
const maxAttachments = getAttachmentWindowSize();
const maxEverythingElse = getEverythingElseWindowSize();
const promotedAt = localTimestamp();
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
const attachments = Array.isArray(state?.attachments)
? state.attachments
: [];
const variants = Array.isArray(state?.variants) ? state.variants : [];
const images = Array.isArray(state?.images) ? state.images : [];
// ═══════════════════════════════════════════════════════════════
// WINDOW 1: Attachments (separate window)
// ═══════════════════════════════════════════════════════════════
const rollingAttachments = [...attachments]
.sort((a: any, b: any) => (a?.a ?? 0) - (b?.a ?? 0))
.slice(-maxAttachments);
const rollingAttachmentNs = rollingAttachments
.map((a: any) => (typeof a?.a === "number" ? a.a : 0))
.filter((n: number) => n > 0);
// ═══════════════════════════════════════════════════════════════
// WINDOW 2: Everything Else (Unified Pool: Variants + Images)
// ═══════════════════════════════════════════════════════════════
type PoolItem = {
kind: "variant" | "image";
index: number;
createdAt: string;
record: any;
};
const pool: PoolItem[] = [];
// Add variants (exclude tool_result)
for (const v of variants) {
if ((v as any)?.kind === "tool_result") continue;
if (!v?.preview) continue;
pool.push({
kind: "variant",
index: typeof v.v === "number" ? v.v : 0,
createdAt: typeof v.createdAt === "string" ? v.createdAt : "",
record: v,
});
}
// Add images
for (const img of images) {
if (!img?.preview) continue;
pool.push({
kind: "image",
index: typeof img.i === "number" ? img.i : 0,
createdAt: typeof img.createdAt === "string" ? img.createdAt : "",
record: img,
});
}
// FIFO sort
pool.sort((a, b) => {
const cmp = a.createdAt.localeCompare(b.createdAt);
if (cmp !== 0) return cmp;
if (a.kind !== b.kind) return a.kind === "variant" ? -1 : 1;
return a.index - b.index;
});
// Rolling window
const windowItems = pool.slice(-maxEverythingElse);
const rollingVariantVs = windowItems
.filter((x) => x.kind === "variant")
.map((x) => x.index)
.filter((v) => v > 0);
const rollingImageIs = windowItems
.filter((x) => x.kind === "image")
.map((x) => x.index)
.filter((i) => i > 0);
// Last pixel promoted tracking
const lastPixelPromotedAt =
typeof state?.lastPixelPromotedAt === "string"
? String(state.lastPixelPromotedAt)
: undefined;
const lastPixelPromotedAttachmentAs = Array.isArray(
state?.lastPixelPromotedAttachmentAs
)
? state.lastPixelPromotedAttachmentAs
.filter((x: any) => typeof x === "number" && Number.isFinite(x))
.map((x: number) => x)
: [];
const lastPixelPromotedVariantVs = Array.isArray(
state?.lastPixelPromotedVariantVs
)
? state.lastPixelPromotedVariantVs
.filter((x: any) => typeof x === "number" && Number.isFinite(x))
.map((x: number) => x)
: [];
const lastPixelPromotedImageIs = Array.isArray(
state?.lastPixelPromotedImageIs
)
? state.lastPixelPromotedImageIs
.filter((x: any) => typeof x === "number" && Number.isFinite(x))
.map((x: number) => x)
: [];
const lastAttSet = new Set<number>(lastPixelPromotedAttachmentAs);
const lastVarSet = new Set<number>(lastPixelPromotedVariantVs);
const lastImgSet = new Set<number>(lastPixelPromotedImageIs);
// Build activeAttachments
const activeAttachments: VisionContextCanaryEntry[] = [];
for (const a of rollingAttachments as any[]) {
if (!a) continue;
const originAbs =
typeof a.originAbs === "string" ? String(a.originAbs) : "";
const filename =
typeof a.filename === "string" && a.filename
? String(a.filename)
: typeof a.origin === "string" && a.origin
? String(a.origin)
: originAbs
? path.basename(originAbs)
: "(unknown)";
const n = typeof a.a === "number" ? a.a : undefined;
const localPath = originAbs || path.join(chatWd, filename);
activeAttachments.push({
source: "attachment",
localPath,
filename,
preview: typeof a.preview === "string" ? String(a.preview) : undefined,
origin: typeof a.origin === "string" ? String(a.origin) : undefined,
originAbs: originAbs || undefined,
originalName:
typeof a.originalName === "string" ? String(a.originalName) : undefined,
a: typeof n === "number" ? n : undefined,
fileName: path.basename(localPath),
fileUri: localPath,
mimeType: mimeFromPath(localPath),
createdAt: typeof a.createdAt === "string" ? a.createdAt : promotedAt,
pixelPromoted: typeof n === "number" ? lastAttSet.has(n) : false,
});
}
// Build activeGenerated (from window items - variants and images)
const activeGenerated: VisionContextCanaryEntry[] = [];
for (const item of windowItems) {
const rec = item.record;
if (!rec) continue;
const filename =
typeof rec.filename === "string" && rec.filename
? String(rec.filename)
: "(unknown)";
const localPath = path.join(chatWd, filename);
if (item.kind === "variant") {
const vv = typeof rec.v === "number" ? rec.v : undefined;
activeGenerated.push({
source: "generated",
localPath,
filename,
preview:
typeof rec.preview === "string" ? String(rec.preview) : undefined,
v: vv,
fileName: path.basename(localPath),
fileUri: localPath,
mimeType: mimeFromPath(localPath),
createdAt:
typeof rec.createdAt === "string" ? rec.createdAt : promotedAt,
pixelPromoted: typeof vv === "number" ? lastVarSet.has(vv) : false,
});
} else {
// image
const ii = typeof rec.i === "number" ? rec.i : undefined;
activeGenerated.push({
source: "generated",
localPath,
filename,
preview:
typeof rec.preview === "string" ? String(rec.preview) : undefined,
i: ii,
fileName: path.basename(localPath),
fileUri: localPath,
mimeType: mimeFromPath(localPath),
createdAt:
typeof rec.createdAt === "string" ? rec.createdAt : promotedAt,
pixelPromoted: typeof ii === "number" ? lastImgSet.has(ii) : false,
});
}
}
return {
activeAttachments,
activeGenerated,
meta: {
chatId,
requestId,
promotedAt,
mode: promotionMode,
rollingWindow: {
attachmentNs: rollingAttachmentNs,
attachmentAs: rollingAttachmentNs,
variantVs: rollingVariantVs,
imageIs: rollingImageIs,
},
lastPixelPromotedAt,
lastPixelPromoted: {
attachmentNs: lastPixelPromotedAttachmentAs,
attachmentAs: lastPixelPromotedAttachmentAs,
variantVs: lastPixelPromotedVariantVs,
imageIs: lastPixelPromotedImageIs,
},
expectedCounts: {
attachments: rollingAttachments.length,
variants: rollingVariantVs.length,
images: rollingImageIs.length,
total: rollingAttachments.length + windowItems.length,
},
actualCounts: {
attachments: activeAttachments.length,
generated: activeGenerated.length,
total: activeAttachments.length + activeGenerated.length,
},
},
};
}
function mimeFromPath(p: string): string {
try {
const ext = path.extname(p).toLowerCase();
if (ext === ".png") return "image/png";
if (ext === ".webp") return "image/webp";
return "image/jpeg";
} catch {
return "image/jpeg";
}
}
async function writeVisionContextCanary(
chatWd: string,
context: VisionContextCanary
): Promise<void> {
// Best-effort: never break the request if canary writing fails.
try {
const dst = path.join(chatWd, "vision_context.canary.json");
const tmp = `${dst}.tmp`;
const pretty = JSON.stringify(context, null, 2);
await fs.promises.writeFile(tmp, pretty, "utf-8");
await fs.promises.rename(tmp, dst);
} catch (e) {
console.error(
"[VisionPromotion] Failed to write vision_context.canary.json:",
(e as Error).message
);
}
}
// NOTE: vision-promotion.jsonl I/O + schema lives in ./helpers/visionPromotionLog
/* -------------------------------------------------------------------------- */
/* Types */
/* -------------------------------------------------------------------------- */
type ToolCallState = {
id: string;
name: string | null;
index: number;
arguments: string;
};
/* -------------------------------------------------------------------------- */
/* Build helpers */
/* -------------------------------------------------------------------------- */
type RuntimeOptions = {
debugChunks: boolean;
debugPromotion: boolean;
previewMaxDim: number;
previewQuality: number;
visionPromotionPersistent: boolean;
};
/** Build a pre-configured OpenAI client for local LM Studio API. */
function createOpenAI(
globalConfig: InferParsedConfig<typeof globalConfigSchematics>
) {
const baseURL = globalConfig.get("baseUrl") || "http://127.0.0.1:1234/v1";
const apiKey = globalConfig.get("apiKey") || "lm-studio"; // Dummy key for local API
return new OpenAI({ apiKey, baseURL });
}
/* -------------------------------------------------------------------------- */
/* Tool name sanitization */
/* -------------------------------------------------------------------------- */
function shortHash(input: string): string {
// Simple, deterministic non-crypto hash for name disambiguation
let h = 5381;
for (let i = 0; i < input.length; i++) {
h = ((h << 5) + h) ^ input.charCodeAt(i);
}
// Convert to unsigned and base36, take 8 chars
return (h >>> 0).toString(36).slice(0, 8);
}
function sanitizeNameBase(name: string): string {
// Replace invalid chars with underscore
const replaced = name.replace(/[^a-zA-Z0-9_-]/g, "_");
// Collapse multiple underscores
return replaced.replace(/_{2,}/g, "_");
}
function clampName64(name: string): string {
return name.length <= 64 ? name : name.slice(0, 64);
}
function splitLeadingJsonObjectPrefix(
s: string
): { json: string; rest: string; hasJsonPrefix: boolean } {
const raw = typeof s === "string" ? s : String(s ?? "");
const trimmed = raw.trimStart();
if (!trimmed.startsWith("{")) {
return { json: "", rest: raw, hasJsonPrefix: false };
}
let depth = 0;
let inString = false;
let escaped = false;
let end = -1;
for (let i = 0; i < trimmed.length; i++) {
const ch = trimmed[i];
if (inString) {
if (escaped) {
escaped = false;
continue;
}
if (ch === "\\") {
escaped = true;
continue;
}
if (ch === '"') {
inString = false;
}
continue;
}
if (ch === '"') {
inString = true;
continue;
}
if (ch === "{") {
depth++;
continue;
}
if (ch === "}") {
depth--;
if (depth === 0) {
end = i;
break;
}
continue;
}
}
if (end < 0 || depth !== 0) {
return { json: "", rest: raw, hasJsonPrefix: false };
}
const json = trimmed.slice(0, end + 1);
const rest = trimmed.slice(end + 1).trimStart();
return { json, rest, hasJsonPrefix: true };
}
function isModelMappingSnapshotJson(s: string): boolean {
const t = typeof s === "string" ? s.trim() : "";
if (!t.startsWith("{") || !t.endsWith("}")) return false;
try {
const obj: any = JSON.parse(t);
return obj?.schema === "dtc.model-mapping-snapshot.v1";
} catch {
return false;
}
}
function sanitizeToolCallArgsForModel(toolName: string, args: unknown): unknown {
// Only sanitize for model input. Never change execution-time tool args.
if (!args || typeof args !== "object") return args;
const tn = String(toolName || "").trim().toLowerCase();
// draw-things-index: index_image
// consumeStream may prefix an inline JSON snapshot into `query` (wire format: `{...snapshot...} <userQuery>`)
// That snapshot is not useful for the model and can be large; strip it for token economy.
if (tn === "index_image") {
const obj: any = args as any;
const qRaw = obj?.query;
if (typeof qRaw === "string") {
const { json, rest, hasJsonPrefix } = splitLeadingJsonObjectPrefix(qRaw);
// Only strip if the prefix is our known snapshot payload.
if (hasJsonPrefix && isModelMappingSnapshotJson(json)) {
return { ...obj, query: rest };
}
}
}
return args;
}
type NameMaps = {
toSafe: Map<string, string>; // original -> safe
toOriginal: Map<string, string>; // safe -> original
};
/** Convert internal chat history to the format expected by OpenAI. */
function toOpenAIMessages(
history: Chat,
nameMaps: NameMaps,
options: RuntimeOptions,
workingDir?: string,
toolResultRewrites?: {
/** tool_call_id -> ordered labels (e.g., ["i1","i2"]) */
imageReviewByToolCallId?: Record<string, string[]>;
}
): ChatCompletionMessageParam[] {
const messages: ChatCompletionMessageParam[] = [];
const reviewHint =
"Carefully examine the result and comment on how well it matches your prompt. Do not assume it does.";
function rewriteToolResultImagesToReviewText(
rawContent: unknown,
labels: string[]
): unknown {
const makeTextPart = (label: string) => ({
type: "text",
text: `${label} successfully generated.`,
$hint: reviewHint,
});
const rewriteParts = (
parts: unknown[]
): { changed: boolean; parts: unknown[] } => {
let idx = 0;
let changed = false;
const out: unknown[] = [];
for (const p of parts) {
if (p && typeof p === "object" && (p as any).type === "image") {
const label = labels[idx] ?? `i${idx + 1}`;
out.push(makeTextPart(label));
idx++;
changed = true;
continue;
}
out.push(p);
}
return { changed, parts: out };
};
const rewriteDrawThingsIndexResults = (payload: any): any => {
if (!payload || typeof payload !== "object") return payload;
if (payload.type !== "draw-things-index-results") return payload;
if (!Array.isArray(payload.images)) return payload;
let idx = 0;
const imagesOut = payload.images.map((img: any) => {
if (!img || typeof img !== "object") return img;
const paths: any[] = Array.isArray(img.imagePaths) ? img.imagePaths : [];
const previews: any[] = Array.isArray(img.httpPreviewUrls)
? img.httpPreviewUrls
: [];
const n = Math.max(paths.length, previews.length);
if (n <= 0) return img;
const index: Array<string | null> = [];
for (let j = 0; j < n; j++) {
const has =
(typeof paths[j] === "string" && paths[j].trim()) ||
(typeof previews[j] === "string" && previews[j].trim());
if (!has) {
index.push(null);
continue;
}
const label = labels[idx] ?? `p${idx + 1}`;
index.push(label);
idx++;
}
// Only additive change: attach stable notations that can be used as review_image targets.
return { ...img, index };
});
return { ...payload, images: imagesOut };
};
// Common case: { content: [...] }
if (
rawContent &&
typeof rawContent === "object" &&
!Array.isArray(rawContent)
) {
const obj: any = rawContent as any;
// Draw-Things index tool returns structured JSON; add pN notations directly.
const rewrittenIndex = rewriteDrawThingsIndexResults(obj);
if (rewrittenIndex !== obj) return rewrittenIndex;
if (Array.isArray(obj.content)) {
const { changed, parts } = rewriteParts(obj.content);
if (!changed) return rawContent;
return { ...obj, content: parts };
}
}
// Alternate: tool content already is an array of parts
if (Array.isArray(rawContent)) {
const { changed, parts } = rewriteParts(rawContent);
return changed ? parts : rawContent;
}
// Best-effort: JSON string tool result
if (typeof rawContent === "string") {
const s = rawContent.trim();
if (s.startsWith("{") || s.startsWith("[")) {
try {
const parsed = JSON.parse(s);
const rewritten = rewriteToolResultImagesToReviewText(parsed, labels);
// Keep as object/array for the model (do NOT stringify back)
return rewritten;
} catch {
return rawContent;
}
}
}
return rawContent;
}
function countToolResultImageParts(rawContent: unknown): number {
try {
const countInParts = (parts: unknown[]): number => {
let n = 0;
for (const p of parts) {
if (p && typeof p === "object" && (p as any).type === "image") n++;
}
return n;
};
if (Array.isArray(rawContent)) return countInParts(rawContent);
if (rawContent && typeof rawContent === "object") {
const obj: any = rawContent as any;
if (Array.isArray(obj.content)) return countInParts(obj.content);
}
if (typeof rawContent === "string") {
// Cheap heuristic: only for logging. Avoid JSON.parse overhead unless likely.
if (
rawContent.includes('"type":"image"') ||
rawContent.includes('"type": "image"')
) {
try {
const parsed = JSON.parse(rawContent);
return countToolResultImageParts(parsed);
} catch {
return 0;
}
}
}
return 0;
} catch {
return 0;
}
}
for (const message of history) {
switch (message.getRole()) {
case "system":
messages.push({ role: "system", content: message.getText() });
break;
case "user": {
const parsed = parseAttachmentWrappers(message.getText());
if (parsed.parts.length > 0) {
const contentParts: any[] = [];
if (parsed.text.trim().length > 0) {
contentParts.push({ type: "text", text: parsed.text });
}
for (const p of parsed.parts) {
if (!p || typeof p !== "object") continue;
if (p.kind === "image" && p.url) {
contentParts.push({
type: "image_url",
image_url: { url: p.url },
});
} else if (p.kind === "text" && p.text) {
// Render text files inline as separate text parts
contentParts.push({ type: "text", text: p.text });
} else if (p.kind === "text_link" && p.url) {
// Provide a textual reference to the file link
contentParts.push({
type: "text",
text: `Attached file: ${p.url}`,
});
}
}
messages.push({ role: "user", content: contentParts } as any);
} else {
messages.push({ role: "user", content: message.getText() });
}
break;
}
case "assistant": {
const toolCalls: ChatCompletionMessageToolCall[] = message
.getToolCallRequests()
.map((toolCall) => ({
id: toolCall.id ?? "",
type: "function",
function: {
// Map original tool name to sanitized safe name
name:
nameMaps.toSafe.get(toolCall.name) ??
clampName64(sanitizeNameBase(toolCall.name)),
arguments: JSON.stringify(
sanitizeToolCallArgsForModel(toolCall.name, toolCall.arguments ?? {})
),
},
}));
// Strip injected markdown (variants, images, tables) before sending to model.
// Uses in-memory registry keyed by workingDir to find exact strings to remove.
const rawText = message.getText();
const cleanedText = workingDir ? stripInjections(workingDir, rawText) : rawText;
messages.push({
role: "assistant",
content: cleanedText,
...(toolCalls.length ? { tool_calls: toolCalls } : {}),
});
break;
}
case "tool": {
const results = message.getToolCallResults();
for (let i = 0; i < results.length; i++) {
const toolCallResult = results[i];
const toolCallId = toolCallResult.toolCallId ?? "";
const rawContent = toolCallResult.content;
const resolveRewriteLabels = (): string[] | undefined => {
const map = toolResultRewrites?.imageReviewByToolCallId;
if (!map) return undefined;
const normalize = (v: unknown): string | undefined => {
if (typeof v === "string") {
const s = v.trim();
return s ? s : undefined;
}
if (typeof v === "number" && Number.isFinite(v)) {
return String(v);
}
return undefined;
};
const r: any = toolCallResult as any;
const candidatesRaw: unknown[] = [
r?.toolCallId,
r?.tool_call_id,
r?.toolCallRequestId,
r?.toolCallRequestID,
r?.callId,
r?.id,
];
const seen = new Set<string>();
const candidates: string[] = [];
for (const c of candidatesRaw) {
const k = normalize(c);
if (!k || seen.has(k)) continue;
seen.add(k);
candidates.push(k);
}
for (const k of candidates) {
const labels = map[k];
if (Array.isArray(labels) && labels.length > 0) return labels;
}
return undefined;
};
const labels = resolveRewriteLabels();
const content =
Array.isArray(labels) && labels.length > 0
? rewriteToolResultImagesToReviewText(rawContent, labels)
: rawContent;
if (options.debugPromotion) {
const before = countToolResultImageParts(rawContent);
const after = countToolResultImageParts(content);
const hasPlan = Array.isArray(labels) && labels.length > 0;
if (before > 0 || hasPlan) {
console.info(
`[ToolResultRewrite] tool_call_id="${
toolCallId || "(empty)"
}" ` +
`plan=${hasPlan ? `labels(${labels.length})` : "none"} ` +
`imagePartsBefore=${before} imagePartsAfter=${after}`
);
if (before > 0 && !hasPlan) {
console.info(
`[ToolResultRewrite] Skip reason: no rewrite plan (likely tool_call_id join mismatch).`
);
}
}
}
// 1) Emit the tool message
messages.push({
role: "tool",
tool_call_id: toolCallId,
content,
} as ChatCompletionToolMessageParam);
}
break;
}
}
}
return messages;
}
/**
* Consolidate consecutive messages with the same role to satisfy
* strict alternating role requirements of some model templates.
* E.g., two consecutive "user" messages are merged into one.
* System and tool messages are left untouched (they have special semantics).
*/
function consolidateConsecutiveRoles(
messages: ChatCompletionMessageParam[]
): ChatCompletionMessageParam[] {
if (messages.length < 2) return messages;
const result: ChatCompletionMessageParam[] = [];
for (const msg of messages) {
const role = (msg as any).role;
const prev = result.length > 0 ? result[result.length - 1] : null;
const prevRole = prev ? (prev as any).role : null;
// Only merge user-user or assistant-assistant (not system, not tool)
if (
prev &&
role === prevRole &&
(role === "user" || role === "assistant")
) {
// Merge content
const prevContent = (prev as any).content;
const currContent = (msg as any).content;
// Normalize to array form
const prevParts = Array.isArray(prevContent)
? prevContent
: typeof prevContent === "string"
? [{ type: "text", text: prevContent }]
: [];
const currParts = Array.isArray(currContent)
? currContent
: typeof currContent === "string"
? [{ type: "text", text: currContent }]
: [];
// Combine parts with a separator
const merged = [
...prevParts,
{ type: "text", text: "\n---\n" },
...currParts,
];
(prev as any).content = merged;
} else {
result.push(msg);
}
}
return result;
}
function summarizeOpenAIMessagesForDebug(
messages: ChatCompletionMessageParam[]
): string {
try {
const lines: string[] = [];
lines.push(`messages=${messages.length}`);
for (let i = 0; i < messages.length; i++) {
const m: any = messages[i] as any;
const role = m?.role ?? "?";
const content = m?.content;
if (Array.isArray(content)) {
let imageUrlCount = 0;
let textCount = 0;
for (const p of content) {
if (p && typeof p === "object") {
if (
p.type === "image_url" &&
p.image_url &&
typeof p.image_url.url === "string"
)
imageUrlCount++;
if (p.type === "text" && typeof p.text === "string") textCount++;
}
}
lines.push(
`#${i} role=${role} parts=image_url:${imageUrlCount} text:${textCount}`
);
} else if (typeof content === "string") {
lines.push(`#${i} role=${role} content=string len=${content.length}`);
} else {
lines.push(`#${i} role=${role} content=${typeof content}`);
}
}
return lines.join(" | ");
} catch {
return "(failed to summarize messages)";
}
}
function summarizeContentPartsForDebug(parts: unknown): string {
try {
if (!Array.isArray(parts)) return `(non-array content: ${typeof parts})`;
const out: string[] = [];
for (const p of parts as any[]) {
if (!p || typeof p !== "object") {
out.push(String(p));
continue;
}
const type = (p as any).type;
if (type === "text") {
const text = typeof (p as any).text === "string" ? (p as any).text : "";
const trimmed = text.replace(/\s+/g, " ").trim();
const preview =
trimmed.length > 96 ? `${trimmed.slice(0, 96)}…` : trimmed;
out.push(`text(${preview.length}):"${preview}"`);
continue;
}
if (type === "image_url") {
const url = (p as any).image_url?.url;
if (typeof url !== "string") {
out.push("image_url:(missing url)");
continue;
}
const m = /^data:([^;]+);base64,(.*)$/s.exec(url);
if (m) {
const mime = m[1];
const b64Len = m[2]?.length ?? 0;
out.push(`image_url(mime=${mime} b64Len=${b64Len})`);
} else {
out.push(`image_url(urlLen=${url.length})`);
}
continue;
}
out.push(`part(type=${String(type)})`);
}
return out.join(" | ");
} catch {
return "(failed to summarize content parts)";
}
}
function countOutgoingImageUrlParts(
messages: ChatCompletionMessageParam[]
): number {
let n = 0;
for (const m of messages as any[]) {
const content = m?.content;
if (!Array.isArray(content)) continue;
for (const p of content) {
if (
p &&
typeof p === "object" &&
p.type === "image_url" &&
p.image_url &&
typeof p.image_url.url === "string"
) {
n++;
}
}
}
return n;
}
/* -------------------------------------------------------------------------- */
/* Attachment wrapper parsing (input) */
/* -------------------------------------------------------------------------- */
function parseAttachmentWrappers(text: string): {
text: string;
parts: Array<{
kind: "image" | "text" | "text_link";
url?: string;
text?: string;
}>;
} {
const parts: Array<{
kind: "image" | "text" | "text_link";
url?: string;
text?: string;
}> = [];
if (!text) return { text, parts };
const re = /\[\[LMSTUDIO_ATTACHMENT:\s*(\{[\s\S]*?\})\s*\]\]/g;
let cleaned = text;
let m: RegExpExecArray | null;
while ((m = re.exec(text)) !== null) {
try {
const obj = JSON.parse(m[1]);
if (obj && obj.kind === "image" && typeof obj.url === "string") {
parts.push({ kind: "image", url: obj.url });
} else if (obj && obj.kind === "text" && typeof obj.text === "string") {
parts.push({ kind: "text", text: obj.text });
} else if (
obj &&
obj.kind === "text_link" &&
typeof obj.url === "string"
) {
parts.push({ kind: "text_link", url: obj.url });
}
cleaned = cleaned.replace(m[0], "");
} catch {
// ignore malformed wrappers
}
}
return { text: cleaned, parts };
}
/* -------------------------------------------------------------------------- */
/* Conversation attachment fallback utilities */
/* -------------------------------------------------------------------------- */
function isImagePath(p: string): boolean {
return /\.(png|jpe?g|gif|webp|bmp|svg)$/i.test(p);
}
function normalizeMaybeFileUri(s: string): string {
if (!s) return s;
if (/^file:\/\//i.test(s)) return fileUriToPath(s);
return s;
}
async function pathExists(p: string): Promise<boolean> {
try {
await fs.promises.access(p, fs.constants.F_OK);
return true;
} catch {
return false;
}
}
async function ensureChatMediaStateFileExists(
chatWd: string,
state: any,
debug: boolean
): Promise<void> {
try {
const p = path.join(chatWd, "chat_media_state.json");
const ok = await fs.promises
.access(p, fs.constants.F_OK)
.then(() => true)
.catch(() => false);
if (ok) return;
await writeStateAtomic(chatWd, state);
if (debug) {
console.info(
`[Orchestrator] Created missing chat_media_state.json in ${chatWd}`
);
}
} catch (e) {
if (debug) {
console.warn(
`[Orchestrator] Failed to ensure chat_media_state.json exists: ${String(
(e as Error)?.message ?? e
)}`
);
}
}
}
/**
* Resolves an LM Studio file identifier (e.g., "abc123.png") to the full path
* in the user-files directory (~/.lmstudio/user-files/abc123.png).
* Returns null if the file doesn't exist.
*/
function resolveLMStudioIdentifier(identifier: string): string | null {
if (!identifier || typeof identifier !== "string") return null;
// Skip if it's already an absolute or relative path
if (
identifier.startsWith("/") ||
identifier.startsWith("./") ||
identifier.startsWith("../")
)
return null;
// Skip if it doesn't look like a file identifier (should be simple filename)
if (identifier.includes("/") || identifier.includes("\\")) return null;
const lmHome = findLMStudioHome() || path.join(os.homedir(), ".lmstudio");
const userFilesDir = path.join(lmHome, "user-files");
const fullPath = path.join(userFilesDir, identifier);
// Check if file exists (or metadata file exists)
try {
if (fs.existsSync(fullPath)) {
console.info(
"[Orchestrator] Resolved LM Studio identifier:",
identifier,
"→",
fullPath
);
return fullPath;
}
if (fs.existsSync(fullPath + ".metadata.json")) {
console.info(
"[Orchestrator] Resolved LM Studio identifier (via metadata):",
identifier,
"→",
fullPath
);
return fullPath;
}
} catch {}
return null;
}
/**
* Recursively collect image file candidates from an object.
* Handles both local paths and LM Studio file identifiers.
*
* For LM Studio format, prioritizes objects with type: "file" and fileType: "image".
*/
function collectImageFileCandidatesFromObject(obj: any, out: string[]) {
if (!obj || typeof obj !== "object") return;
// Handle LM Studio file identifiers (e.g., "1766010597736 - 699.jpg" in user-files)
const pushIfLMStudioIdentifier = (v: unknown) => {
if (typeof v !== "string" || !v.trim()) return;
const resolved = resolveLMStudioIdentifier(v);
if (resolved && isImagePath(resolved)) {
out.push(resolved);
}
};
const pushIfLocal = (v: unknown) => {
if (typeof v !== "string") return;
const n = normalizeMaybeFileUri(v);
if (
isImagePath(n) &&
(n.startsWith("/") || n.startsWith("./") || n.startsWith("../"))
) {
out.push(n);
}
};
// LM Studio content array item: { type: "file", fileIdentifier: "...", fileType: "image" }
// This is the canonical format in conversation.json messages
const objType = (obj as any).type;
const objFileType = (obj as any).fileType;
if (
(objType === "file" || objType === "image" || objType === "attachment") &&
(objFileType === "image" || !objFileType) // fileType may be missing in some formats
) {
// Prioritize fileIdentifier, fall back to identifier
const fid = (obj as any).fileIdentifier || (obj as any).identifier;
if (typeof fid === "string" && fid.trim()) {
pushIfLMStudioIdentifier(fid);
// Don't recurse into this object – we found what we need
return;
}
}
// Common fields that might carry file paths/URIs (legacy/fallback)
[
(obj as any).path,
(obj as any).filePath,
(obj as any).absolutePath,
(obj as any).uri,
(obj as any).url,
(obj as any).preview,
(obj as any).original,
].forEach(pushIfLocal);
// Check for LM Studio specific identifier fields at top level (fallback)
// Only if not already handled above
if (objType !== "file" && objType !== "image" && objType !== "attachment") {
[(obj as any).identifier, (obj as any).fileIdentifier].forEach(
pushIfLMStudioIdentifier
);
}
// Recurse into child objects and arrays
for (const v of Object.values(obj)) {
if (Array.isArray(v)) {
v.forEach((it) => collectImageFileCandidatesFromObject(it, out));
} else if (v && typeof v === "object") {
collectImageFileCandidatesFromObject(v, out);
}
}
}
// Legacy functions now imported from ./services/mediaScanner (unified scanner)
// - findAllAttachmentsFromConversation → findAllAttachmentsLegacy
// - findLastAttachmentFromConversation → findLastAttachmentLegacy
// - findAllVariantsFromConversation → findAllVariantsLegacy
async function readConversationSnapshot(
chatWd: string | undefined,
debug: boolean
): Promise<ConversationSnapshot | null> {
if (!chatWd) return null;
const chatId = path.basename(chatWd);
const lmHome = findLMStudioHome() || path.join(os.homedir(), ".lmstudio");
const conversationsDir = path.join(lmHome, "conversations");
const candidates = [
path.join(conversationsDir, `${chatId}.conversation.json`),
path.join(chatWd, `${chatId}.conversation.json`),
path.join(chatWd, ".conversation.json"),
path.join(chatWd, "conversation.json"),
];
const sleep = (ms: number) =>
new Promise((resolve) => setTimeout(resolve, ms));
// `~/.lmstudio/conversations/<chatId>.conversation.json` is our SSOT.
// It can be transiently invalid while LM Studio is mid-write (e.g. "Unexpected end of JSON input").
// Retry briefly so promotion still happens in the same UI/tool turn.
const maxAttempts = 6;
const baseDelayMs = 20;
for (const p of candidates) {
if (!(await pathExists(p))) continue;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
if (debug && attempt === 1)
console.info("[conversationSnapshot] Reading:", p);
const raw = await fs.promises.readFile(p, "utf-8");
return JSON.parse(raw) as ConversationSnapshot;
} catch (e) {
const msg = (e as Error).message || String(e);
const shouldRetry =
/Unexpected end of JSON input/i.test(msg) ||
/Unexpected end of file/i.test(msg);
if (debug) {
console.warn(
"[conversationSnapshot] Failed parsing:",
p,
`attempt=${attempt}/${maxAttempts}`,
"error:",
msg,
shouldRetry ? "(will retry)" : "(no retry)"
);
}
if (!shouldRetry || attempt === maxAttempts) break;
await sleep(baseDelayMs * attempt);
}
}
}
return null;
}
function extractGeneratedImageFileUrisFromConversation(
snapshot: ConversationSnapshot | null
): string[] {
if (!snapshot) return [];
try {
const raw = JSON.stringify(snapshot);
// Conversation snapshots may contain file:// URLs, http(s) URLs, absolute paths, or just plain
// filenames embedded in tool text. We only need to recover the generated originals, and we
// can safely normalize to just the basename (recordGeneratedVariants expects files in chatWd).
return extractGeneratedOriginalNames(raw);
} catch {
return [];
}
}
function latestGeneratedTsFromFileUris(fileUris: string[]): string | null {
const tsSet = new Set<string>();
for (const u of fileUris) {
try {
const base = path.basename(refToPathOrName(u));
const m = /^generated-image-(.+)-v(\d+)\.\w+$/i.exec(base);
const ts = m ? m[1] : "";
if (ts) tsSet.add(ts);
} catch {
// ignore
}
}
const tsKeys = Array.from(tsSet);
tsKeys.sort();
return tsKeys.length ? tsKeys[tsKeys.length - 1] : null;
}
function extractGeneratedImageFileUrisFromLatestGenerateImageToolResult(
history: Chat
): string[] {
try {
const msgs = Array.from(history);
// Walk from the end: find the latest tool result that belongs to a generate_image tool call.
for (let i = msgs.length - 1; i >= 0; i--) {
const m = msgs[i];
if (!m || m.getRole() !== "tool") continue;
const results = m.getToolCallResults?.() ?? [];
for (const r of results) {
const toolCallId =
(r as any).toolCallId ?? (r as any).tool_call_id ?? null;
// Verify this tool result corresponds to a generate_image tool call request.
let isGenerateImage = false;
if (typeof toolCallId === "string" && toolCallId.length) {
for (let j = i - 1; j >= 0; j--) {
const prev = msgs[j];
if (!prev || prev.getRole() !== "assistant") continue;
const reqs = prev.getToolCallRequests?.() ?? [];
for (const req of reqs) {
if (
(req as any).id === toolCallId &&
(req as any).name === "generate_image"
) {
isGenerateImage = true;
break;
}
}
if (isGenerateImage) break;
}
}
if (!isGenerateImage) continue;
const raw = (r as any).content;
const textRaw = typeof raw === "string" ? raw : JSON.stringify(raw);
// Tool content is often a JSON-encoded array of {type:"text", text:"..."} items.
// The URLs inside are frequently http(s) when the external server is healthy.
// Extract originals by filename so downstream logic can resolve them in chatWd.
const names = extractGeneratedOriginalNames(textRaw);
if (names.length) return names;
}
}
} catch {
// ignore
}
return [];
}
// Removed legacy: content-addressed copy and pending-parts injection helpers
/* -------------------------------------------------------------------------- */
/* Parse image metadata from tool text */
/* -------------------------------------------------------------------------- */
type ImageMeta = {
width?: number;
height?: number;
steps?: number;
backend?: string;
mode_effective?: string;
variants_used?: number;
inference_time_ms?: number;
original?: string;
previews?: string[];
};
/** Extracts a representative image metadata object from tool raw content. */
function extractImageMetaFromRaw(text: string): ImageMeta | null {
const metas: ImageMeta[] = [];
const tryCollect = (obj: any) => {
if (!obj || typeof obj !== "object") return;
const m: ImageMeta = {};
if (typeof obj.width === "number") m.width = obj.width;
if (typeof obj.height === "number") m.height = obj.height;
if (typeof obj.steps === "number") m.steps = obj.steps;
if (typeof obj.backend === "string") m.backend = obj.backend;
if (typeof obj.mode_effective === "string")
m.mode_effective = obj.mode_effective;
if (typeof obj.variants_used === "number")
m.variants_used = obj.variants_used;
if (typeof obj.inference_time_ms === "number")
m.inference_time_ms = obj.inference_time_ms;
const files = (obj as any).files;
if (files && typeof files === "object") {
if (typeof files.original === "string") m.original = files.original;
if (Array.isArray(files.previews))
m.previews = files.previews.filter((x: any) => typeof x === "string");
}
if (Object.keys(m).length > 0) metas.push(m);
};
// 1) Top-level JSON
try {
const parsed = JSON.parse(text);
if (Array.isArray(parsed)) {
for (const it of parsed) {
if (
it &&
typeof it === "object" &&
typeof (it as any).text === "string"
) {
// Some items embed JSON in .text
try {
const nested = JSON.parse((it as any).text);
tryCollect(nested);
} catch {
/* ignore */
}
}
tryCollect(it);
}
} else if (parsed && typeof parsed === "object") {
tryCollect(parsed);
}
} catch {
// not JSON; maybe JSON fragments inside text
const jsonLike = /\{\s*"width"\s*:\s*\d+[\s\S]*?\}\s*$/m; // heuristic: a block containing width ... }
const m = jsonLike.exec(text);
if (m) {
try {
tryCollect(JSON.parse(m[0]));
} catch {
/* ignore */
}
}
}
if (metas.length === 0) return null;
// Prefer the last metadata object (often most detailed)
return metas[metas.length - 1];
}
/* -------------------------------------------------------------------------- */
/* file:// absolute image paths handling */
/* -------------------------------------------------------------------------- */
/** Remove shell escape sequences (backslash before space, colon, etc.) from paths copied via terminal drag&drop. */
function unescapeShellPath(p: string): string {
if (!p) return p;
// Replace \<char> with <char> for common shell escapes
return p.replace(/\\(.)/g, "$1");
}
function extractFileUriImagePaths(text: string): string[] {
const out: string[] = [];
const re =
/file:\/\/[\w\-./:%?#@!$&'()*+,;=]+\.(?:png|jpe?g|gif|webp|bmp|svg)/gi;
let m: RegExpExecArray | null;
while ((m = re.exec(text)) !== null) out.push(m[0]);
// dedupe
const seen = new Set<string>();
const unique: string[] = [];
for (const p of out) {
if (!seen.has(p)) {
seen.add(p);
unique.push(p);
}
}
return unique;
}
function refToPathOrName(ref: string): string {
if (!ref) return ref;
if (/^file:\/\//i.test(ref)) return fileUriToPath(ref);
return ref;
}
function extractGeneratedOriginalNames(text: string): string[] {
// Match the generated originals regardless of how they appear (file:// URL, http URL, absolute path, etc).
// We intentionally normalize to basename-only, because the SSOT/tool text can contain different prefixes.
const out: string[] = [];
const re = /generated-image-[A-Za-z0-9T-]+Z-v\d+\.(png|mov)/gi;
let m: RegExpExecArray | null;
while ((m = re.exec(text)) !== null) {
out.push(m[0]);
}
const seen = new Set<string>();
const unique: string[] = [];
for (const p of out) {
const base = path.basename(p);
if (!seen.has(base)) {
seen.add(base);
unique.push(base);
}
}
return unique;
}
function fileUriToPath(uri: string): string {
// Strip file:// and decode percent-encoding
const withoutScheme = uri.replace(/^file:\/\//i, "");
// On macOS, paths start with /Users/... even after removing scheme
const p = withoutScheme.startsWith("/") ? withoutScheme : "/" + withoutScheme;
return decodeURIComponent(p);
}
// Removed legacy: toDataUrlFromFileUri
/* -------------------------------------------------------------------------- */
/* Promote chat-local images (./image-*.jpg) to data: URLs */
/* -------------------------------------------------------------------------- */
function guessMimeFromExt(p: string): string | null {
const ext = path.extname(p).toLowerCase();
switch (ext) {
case ".jpg":
case ".jpeg":
return "image/jpeg";
case ".png":
return "image/png";
case ".webp":
return "image/webp";
case ".gif":
return "image/gif";
case ".bmp":
return "image/bmp";
case ".svg":
return "image/svg+xml";
default:
return null;
}
}
// Removed legacy: toDataUrlFromLocal
/** Convert LM Studio tool definitions to OpenAI function-tool descriptors. */
function toOpenAITools(ctl: GeneratorController): {
tools?: ChatCompletionTool[];
nameMaps: NameMaps;
} {
const toSafe = new Map<string, string>();
const toOriginal = new Map<string, string>();
const toolDefs = ctl.getToolDefinitions();
const tools: ChatCompletionTool[] = [];
for (const t of toolDefs) {
const original = t.function.name;
const base = sanitizeNameBase(original);
let candidate = clampName64(base);
// Disambiguate collisions
if (toOriginal.has(candidate) && toOriginal.get(candidate) !== original) {
const suffix = `_${shortHash(original)}`;
const room = 64 - suffix.length;
candidate = clampName64(base.slice(0, Math.max(1, room)) + suffix);
}
toSafe.set(original, candidate);
toOriginal.set(candidate, original);
tools.push({
type: "function",
function: {
name: candidate,
description: t.function.description,
parameters: t.function.parameters ?? { type: "object", properties: {} },
},
});
}
return {
tools: tools.length ? tools : undefined,
nameMaps: { toSafe, toOriginal },
};
}
/* -------------------------------------------------------------------------- */
/* Stream-handling utils */
/* -------------------------------------------------------------------------- */
function wireAbort(
ctl: GeneratorController,
stream: { controller: AbortController }
) {
ctl.onAborted(() => {
console.info("Generation aborted by user.");
stream.controller.abort();
});
}
async function consumeStream(
stream: AsyncIterable<any>,
ctl: GeneratorController,
nameMaps: NameMaps,
options: RuntimeOptions,
reasoningSectionParsing: {
enabled: boolean;
startString: string;
endString: string;
},
toolCallRequestTweaks?: {
/**
* If true, prepend an inline snapshot JSON object to the free-text `query` when calling draw-things-index `index_image`.
* This enables model display rewrite + `model_use_hints` from the producer.
*/
enrichDrawThingsIndex?: boolean;
/** Optional snapshot payload (inline JSON object). */
drawThingsIndexSnapshotPayload?: string;
}
) {
// Track multiple concurrent tool-calls by their index
const currentByIndex = new Map<number, ToolCallState>();
let contentBuffer = "";
let lastFlush = Date.now();
let inReasoning = false;
let reasoningBuffer = "";
// True when reasoning was delivered via delta.reasoning_content (out-of-band),
// i.e. NOT via inline <think>…</think> markers inside delta.content.
// Used to close the reasoning block before the first regular content chunk.
let outOfBandReasoning = false;
// Pending buffer for potential partial marker matches across chunk boundaries
// This accumulates text that MIGHT be the start of a marker but we don't know yet
let pendingMarkerBuffer = "";
function flushContent() {
if (contentBuffer.length) {
ctl.fragmentGenerated(contentBuffer);
contentBuffer = "";
lastFlush = Date.now();
}
}
function flushReasoning() {
if (reasoningBuffer.length) {
ctl.fragmentGenerated(reasoningBuffer, { reasoningType: "reasoning" });
reasoningBuffer = "";
}
}
function closeReasoningBlock() {
flushReasoning();
if (inReasoning) {
// Close the reasoning block by sending empty fragment without reasoningType
ctl.fragmentGenerated("");
inReasoning = false;
}
}
function flushToolCallAtIndex(idx: number) {
const st = currentByIndex.get(idx);
if (!st || st.name === null) return;
let args: any = {};
try {
args = JSON.parse(st.arguments || "{}");
} catch {
args = {};
}
// Contract: opt-in enriched results for draw-things-index via query prefix.
// IMPORTANT: This is a request-time transform only. It does not change validation,
// and it does not accept filenames as user inputs.
if (
st.name === "index_image" &&
toolCallRequestTweaks?.enrichDrawThingsIndex
) {
const qRaw = (args as any)?.query;
const q = typeof qRaw === "string" ? qRaw : String(qRaw ?? "");
const snapshot =
typeof toolCallRequestTweaks.drawThingsIndexSnapshotPayload === "string" &&
toolCallRequestTweaks.drawThingsIndexSnapshotPayload.trim()
? toolCallRequestTweaks.drawThingsIndexSnapshotPayload.trim()
: "";
// New wire format: `{...snapshot...} <userQuery>` (no opt-in token).
// If the query already begins with `{`, assume the caller provided a payload.
if (snapshot && !q.trimStart().startsWith("{")) {
(args as any).query = `${snapshot} ${q}`.trim();
}
}
ctl.toolCallGenerationEnded({
type: "function",
name: st.name,
arguments: args,
id: st.id,
});
currentByIndex.delete(idx);
}
for await (const chunk of stream) {
if (options.debugChunks) {
console.info("Received chunk:", JSON.stringify(chunk));
}
const delta = chunk.choices?.[0]?.delta as
| {
content?: string;
reasoning_content?: string; // LM Studio / reasoning-capable backends extract <think> here
tool_calls?: Array<{
index: number;
id?: string;
function?: { name?: string; arguments?: string };
}>;
}
| undefined;
if (!delta) continue;
/* Reasoning content delivered out-of-band (e.g. LM Studio strips <think> from content) */
if (delta.reasoning_content) {
inReasoning = true;
outOfBandReasoning = true;
reasoningBuffer += delta.reasoning_content;
const now = Date.now();
if (reasoningBuffer.length >= 512 || now - lastFlush >= 25) {
flushReasoning();
}
}
/* Text streaming */
if (delta.content) {
// If reasoning was delivered out-of-band, close that block before regular content.
if (outOfBandReasoning && inReasoning) {
closeReasoningBlock();
outOfBandReasoning = false;
}
// Prepend any pending partial marker from previous chunk
let text = pendingMarkerBuffer + delta.content;
pendingMarkerBuffer = "";
const parsingEnabled =
!!reasoningSectionParsing?.enabled &&
typeof reasoningSectionParsing.startString === "string" &&
typeof reasoningSectionParsing.endString === "string" &&
reasoningSectionParsing.startString.length > 0 &&
reasoningSectionParsing.endString.length > 0;
const startMarker = reasoningSectionParsing.startString;
const endMarker = reasoningSectionParsing.endString;
const maxMarkerLen = Math.max(startMarker.length, endMarker.length);
// Case-insensitive indexOf helper - returns { index, matchLength } or { index: -1 }
// matchLength is the length of the actual match in the original text (same as marker length)
function indexOfCaseInsensitive(
haystack: string,
needle: string
): { index: number; matchLength: number } {
const idx = haystack.toLowerCase().indexOf(needle.toLowerCase());
return { index: idx, matchLength: needle.length };
}
// Check if text ends with a potential partial marker prefix
// Returns the length of the suffix that could be a partial match, or 0
function potentialPartialMarkerSuffixLen(
text: string,
marker: string
): number {
const textLower = text.toLowerCase();
const markerLower = marker.toLowerCase();
// Check if text ends with the beginning of the marker
// e.g., text="...[THI" marker="[THINK]" → returns 4 for "[THI"
for (let i = 1; i < marker.length && i <= text.length; i++) {
const suffix = textLower.slice(-i);
const prefix = markerLower.slice(0, i);
if (suffix === prefix) {
return i;
}
}
return 0;
}
// Process text for model-specific reasoning tags (default: <think>...</think>)
// Matching is case-insensitive to handle model output variations like [/THINK] vs [/Think]
while (text.length > 0) {
if (!parsingEnabled) {
contentBuffer += text;
break;
}
// Determine which marker we're looking for
const currentMarker = inReasoning ? endMarker : startMarker;
const { index: markerIdx, matchLength: matchLen } =
indexOfCaseInsensitive(text, currentMarker);
if (markerIdx !== -1) {
// Found complete marker
if (!inReasoning) {
// Found start marker
// Add everything before start marker to content buffer
if (markerIdx > 0) {
contentBuffer += text.slice(0, markerIdx);
}
// Flush content before entering reasoning mode
flushContent();
// Enter reasoning mode
inReasoning = true;
text = text.slice(markerIdx + matchLen); // Skip start marker
} else {
// Found end marker
// Add everything before end marker to reasoning buffer
if (markerIdx > 0) {
reasoningBuffer += text.slice(0, markerIdx);
}
// Close reasoning block
closeReasoningBlock();
// Continue with text after end marker
text = text.slice(markerIdx + matchLen); // Skip end marker
}
} else {
// No complete marker found in current text
// Check if text ends with a potential partial marker
const partialLen = potentialPartialMarkerSuffixLen(
text,
currentMarker
);
if (partialLen > 0 && text.length <= maxMarkerLen) {
// The entire remaining text could be a partial marker
// Hold it in pending buffer until next chunk arrives
pendingMarkerBuffer = text;
text = "";
} else if (partialLen > 0) {
// Text ends with potential partial marker
// Emit safe portion, hold back the potential partial
const safeText = text.slice(0, -partialLen);
pendingMarkerBuffer = text.slice(-partialLen);
if (inReasoning) {
reasoningBuffer += safeText;
} else {
contentBuffer += safeText;
}
text = "";
} else {
// No partial match at end, emit all
if (inReasoning) {
reasoningBuffer += text;
} else {
contentBuffer += text;
}
text = "";
}
break;
}
}
// Flush buffers periodically
const now = Date.now();
if (contentBuffer.length >= 512 || now - lastFlush >= 25) {
flushContent();
}
if (reasoningBuffer.length >= 512 || now - lastFlush >= 25) {
flushReasoning();
}
}
/* Tool-call streaming */
for (const toolCall of delta.tool_calls ?? []) {
const idx = toolCall.index;
let st = currentByIndex.get(idx) || null;
if (toolCall.id !== undefined) {
// Flush any pending partial marker as content before tool-call
// (if we held back text hoping for a marker, emit it now as-is)
if (pendingMarkerBuffer.length) {
if (inReasoning) {
reasoningBuffer += pendingMarkerBuffer;
} else {
contentBuffer += pendingMarkerBuffer;
}
pendingMarkerBuffer = "";
}
// Ensure any pending text is flushed before starting a tool-call
flushContent();
flushReasoning();
// Start or reset the state for this index
st = { id: toolCall.id, name: null, index: idx, arguments: "" };
currentByIndex.set(idx, st);
ctl.toolCallGenerationStarted();
}
if (toolCall.function?.name) {
// Map back from safe name to original name for LM Studio
const originalName =
nameMaps.toOriginal.get(toolCall.function.name) ??
toolCall.function.name;
if (!st) {
// In case name arrives before id (be defensive)
st = { id: "", name: originalName, index: idx, arguments: "" };
currentByIndex.set(idx, st);
ctl.toolCallGenerationStarted();
} else {
st.name = originalName;
}
ctl.toolCallGenerationNameReceived(originalName);
}
if (toolCall.function?.arguments) {
if (!st) {
// Defensive: create state if missing
st = { id: "", name: null, index: idx, arguments: "" };
currentByIndex.set(idx, st);
ctl.toolCallGenerationStarted();
}
st.arguments += toolCall.function.arguments;
ctl.toolCallGenerationArgumentFragmentGenerated(
toolCall.function.arguments
);
}
}
/* Finalize tool call */
if (chunk.choices?.[0]?.finish_reason === "tool_calls") {
// Flush all open tool-calls
for (const idx of Array.from(currentByIndex.keys())) {
flushToolCallAtIndex(idx);
}
}
}
// Flush any pending partial marker buffer (emit as regular text if stream ends mid-marker)
if (pendingMarkerBuffer.length) {
if (inReasoning) {
reasoningBuffer += pendingMarkerBuffer;
} else {
contentBuffer += pendingMarkerBuffer;
}
pendingMarkerBuffer = "";
}
// Flush any remaining text content
flushContent();
// Close any open reasoning block
closeReasoningBlock();
console.info("Generation completed.");
}
/* -------------------------------------------------------------------------- */
/* API */
/* -------------------------------------------------------------------------- */
/**
* Injects markdown for generated images directly into chat after tool calls.
* Only when PREVIEW_IN_CHAT=false, this ensures images are displayed even if
* agent models ignore the $hint in tool responses.
*/
async function injectImageMarkdownAfterToolCall(
ctl: GeneratorController,
history: Chat,
globalConfig: InferParsedConfig<typeof globalConfigSchematics>,
workingDir: string | undefined,
options: Pick<RuntimeOptions, "previewMaxDim" | "previewQuality">,
debug: boolean
) {
// NOTE: We still inject *tool-result* image tables even when PREVIEW_IN_CHAT=true,
// because the main problem is LM Studio not rendering tool-result images reliably.
// For generate_image inline injection, we keep the old behavior to avoid duplicates.
const previewInChat = globalConfig.get("PREVIEW_IN_CHAT");
if (debug) {
const envRaw = process.env.PREVIEW_IN_CHAT;
const envParsed = /^(1|true|yes|on)$/i.test(String(envRaw || "").trim());
const envKnown = envRaw != null && String(envRaw).trim() !== "";
console.info(
`[PREVIEW_IN_CHAT] globalConfig=${previewInChat} env=${
envKnown ? `${envRaw} (→${envParsed})` : "(unset)"
}`
);
if (envKnown && envParsed !== previewInChat) {
console.info(
`[PREVIEW_IN_CHAT] MISMATCH: Orchestrator config and tool env differ; tool output may follow env, injection follows globalConfig.`
);
}
}
// Check if last message is a tool result
const messages = Array.from(history);
const lastMsg = messages[messages.length - 1];
if (!lastMsg || lastMsg.getRole() !== "tool") {
return { imageReviewByToolCallId: {} as Record<string, string[]> };
}
// Extract generated originals from tool results.
// NOTE: tool outputs often contain http(s) links (when the external server is healthy),
// not file:// URLs. Also, the tool response intentionally strips the `files` JSON.
// Therefore, we extract by filename and inject markdown that points to the chat WD.
const basenamesRaw: string[] = [];
// Best-effort mapping: tool results reference tool_call_id, the preceding assistant message has tool call requests.
const toolRequestById = new Map<
string,
{ name: string; pluginIdentifier?: string }
>();
const nearestToolRequests: Array<{
name: string;
pluginIdentifier?: string;
}> = [];
function normalizeJoinKey(v: unknown): string | undefined {
if (typeof v === "string") {
const s = v.trim();
return s ? s : undefined;
}
if (typeof v === "number" && Number.isFinite(v)) {
return String(v);
}
return undefined;
}
function addToolRequestJoinKey(
key: unknown,
value: { name: string; pluginIdentifier?: string }
): void {
const k = normalizeJoinKey(key);
if (!k) return;
toolRequestById.set(k, value);
}
function getToolResultJoinKey(r: unknown): string | undefined {
if (!r || typeof r !== "object") return undefined;
const rr = r as any;
return (
normalizeJoinKey(rr.toolCallId) ??
normalizeJoinKey(rr.tool_call_id) ??
normalizeJoinKey(rr.toolCallRequestId) ??
normalizeJoinKey(rr.toolCallRequestID) ??
normalizeJoinKey(rr.callId)
);
}
function getToolResultPrimaryToolCallId(r: unknown): string | undefined {
if (!r || typeof r !== "object") return undefined;
const rr = r as any;
// Prefer the SDK-facing toolCallId because that's what toOpenAIMessages will see later.
return normalizeJoinKey(rr.toolCallId);
}
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (!msg || msg.getRole() !== "assistant") continue;
const reqs = msg.getToolCallRequests();
if (!reqs || reqs.length === 0) continue;
for (const r of reqs) {
// Debug: Log what SDK actually provides
if (debug) {
console.info(
`[ToolResultHarvest] SDK toolCallRequest keys: ${Object.keys(r as any).join(", ")}`
);
console.info(
`[ToolResultHarvest] SDK toolCallRequest: ${JSON.stringify(r, null, 2).slice(0, 500)}`
);
}
const name = (r as any)?.name;
const pluginIdentifier = (r as any)?.pluginIdentifier;
if (typeof name !== "string" || !name.trim()) continue;
const entry = {
name,
pluginIdentifier:
typeof pluginIdentifier === "string" ? pluginIdentifier : undefined,
};
nearestToolRequests.push(entry);
// Join keys: SDK/SSOT variants across versions
addToolRequestJoinKey((r as any)?.id, entry);
addToolRequestJoinKey((r as any)?.toolCallRequestId, entry);
addToolRequestJoinKey((r as any)?.toolCallRequestID, entry);
addToolRequestJoinKey((r as any)?.callId, entry);
}
break; // nearest preceding assistant message
}
// Collect tool results for unified harvesting
const toolResultsToHarvest: Array<{
content: unknown;
info: ToolCallInfo;
}> = [];
function escapeHtml(s: string): string {
return String(s)
.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/\"/g, """)
.replace(/'/g, "'");
}
for (const r of lastMsg.getToolCallResults()) {
const raw = r.content;
const textRaw = typeof raw === "string" ? raw : JSON.stringify(raw);
// Only collect generate_image/generate_video basenames for per-image injection (Block 2).
// Index results (draw-things-index-results) are fully handled by Block 1 (harvest table).
if (!textRaw.includes('draw-things-index-results')) {
basenamesRaw.push(...extractGeneratedOriginalNames(textRaw));
}
// Tool-result harvesting via unified toolParams system
// Skip if no workingDir (can't materialize files)
if (workingDir) {
const joinKey = getToolResultJoinKey(r);
const primaryToolCallId = getToolResultPrimaryToolCallId(r);
const req = joinKey ? toolRequestById.get(joinKey) : undefined;
// Deterministic fallbacks when IDs are missing/unstable:
// - If there is exactly one tool request in the preceding assistant message, use it.
// - If counts match, map by index order.
let effectiveReq = req;
if (!effectiveReq) {
if (nearestToolRequests.length === 1) {
effectiveReq = nearestToolRequests[0];
} else {
const results = lastMsg.getToolCallResults();
if (
Array.isArray(results) &&
results.length === nearestToolRequests.length
) {
const idx = results.indexOf(r as any);
if (idx >= 0 && idx < nearestToolRequests.length) {
effectiveReq = nearestToolRequests[idx];
}
}
}
if (debug) {
console.info(
`[ToolResultHarvest] Join miss for tool result (joinKey=${
joinKey ?? "none"
}); falling back=${effectiveReq ? "yes" : "no"}`
);
}
}
const toolName = effectiveReq?.name;
const pluginId = normalizeToolPluginId(
effectiveReq?.pluginIdentifier,
typeof toolName === "string" ? toolName : ""
);
if (debug) {
console.info(
`[ToolResultHarvest] Tool: toolName="${
toolName ?? "none"
}" pluginIdentifier="${
effectiveReq?.pluginIdentifier ?? "none"
}" → normalized pluginId="${pluginId ?? "none"}"`
);
}
if (typeof toolName === "string" && toolName.trim()) {
// Collect tool info for batch harvesting
toolResultsToHarvest.push({
content: raw,
info: {
toolCallId: primaryToolCallId ?? joinKey,
toolName,
pluginId:
typeof pluginId === "string" && pluginId.trim()
? pluginId
: undefined,
},
});
} else if (debug) {
console.info(
`[ToolResultHarvest] Skipping tool result harvest: missing toolName (joinKey=${
joinKey ?? "none"
})`
);
}
}
}
// Harvest tool results using unified toolParams system
const harvestResults: Array<{
toolName: string;
toolCallId?: string;
markdown?: string;
mediaType: "picture" | "image" | "variant" | "attachment";
stateChanged: boolean;
}> = [];
const imageReviewByToolCallId: Record<string, string[]> = {};
if (workingDir && toolResultsToHarvest.length > 0) {
const state = await readState(workingDir);
const toggles: Record<string, boolean> = {
PREVIEW_IN_CHAT: previewInChat,
};
for (const { content, info } of toolResultsToHarvest) {
try {
const result = await harvestToolResult(
content,
info,
{
chatWd: workingDir,
state,
toggles,
debug,
},
{
maxDim: 256, // Smaller previews for tool-result images
quality: options.previewQuality,
}
);
if (result) {
harvestResults.push({
toolName: info.toolName,
toolCallId: info.toolCallId,
markdown: result.markdown,
mediaType: result.mediaType,
stateChanged: result.stateChanged,
});
// Always rewrite tool-result image parts to review labels for the model.
// User-facing image display is handled separately via injected markdown tables
// (when PREVIEW_IN_CHAT=false) or via LM Studio's native tool-result rendering
// (when PREVIEW_IN_CHAT=true).
if (
(result.mediaType === "image" || result.mediaType === "picture") &&
typeof info.toolCallId === "string" &&
info.toolCallId.trim()
) {
const labels = result.candidates.map((c, i) =>
result.injection.labelGenerator(c, i)
);
if (labels.length > 0) {
imageReviewByToolCallId[info.toolCallId] = labels;
if (debug) {
console.info(
`[ToolResultRewritePlan] tool_call_id="${
info.toolCallId
}" labels=${labels.join(", ")}`
);
}
}
}
}
} catch (e) {
if (debug) {
console.warn(
`[ToolResultHarvest] Error harvesting ${info.toolName}:`,
(e as Error).message
);
}
}
}
// Save state if any harvest changed it
if (harvestResults.some((r) => r.stateChanged)) {
await writeStateAtomic(workingDir, state);
}
}
// Global deduplication across all tool results to prevent double injection
// when the same filename appears in multiple results (e.g., retries, parallel calls)
const seenBasenames = new Set<string>();
const basenames: string[] = [];
for (const bn of basenamesRaw) {
if (!seenBasenames.has(bn)) {
seenBasenames.add(bn);
basenames.push(bn);
}
}
// If we have neither generated-image basenames nor harvested tool results, nothing to do.
if (basenames.length === 0 && harvestResults.length === 0) {
return { imageReviewByToolCallId };
}
if (debug && basenames.length) {
console.info(
`[InjectMarkdown][LegacyGenerateImageOriginals] Found`,
basenames.length,
`generated-image originals in tool-result (raw=${basenamesRaw.length}, deduped=${basenames.length}):`,
basenames
);
}
// Inject markdown (only once per image). Images are expected to already be in
// the working directory (primary storage).
if (workingDir) {
try {
await fs.promises.mkdir(workingDir, { recursive: true });
// Load state to check for already-injected images
const state = await readState(workingDir);
if (!state.injectedMarkdown) {
state.injectedMarkdown = [];
}
const injectedSet = new Set(state.injectedMarkdown);
// Persist exact injected markdown so we can strip it even after plugin reload.
// Keep it bounded to avoid unbounded growth in chat_media_state.json.
const MAX_PERSISTED_INJECTIONS = 250;
const injectedContent: string[] = Array.isArray((state as any).injectedContent)
? ((state as any).injectedContent as string[]).filter(
(x) => typeof x === "string" && x.trim()
)
: [];
const persistInjectedMarkdown = (md: string): void => {
const s = typeof md === "string" ? md : "";
if (!s.trim()) return;
if (injectedContent.includes(s)) return;
injectedContent.push(s);
if (injectedContent.length > MAX_PERSISTED_INJECTIONS) {
injectedContent.splice(0, injectedContent.length - MAX_PERSISTED_INJECTIONS);
}
(state as any).injectedContent = injectedContent;
};
let injectedCount = 0;
let stateChanged = false;
// 1) Inject markdown tables from harvested tool results (via unified toolParams system)
for (const hr of harvestResults) {
if (!hr.markdown) continue;
const key = `__tool_result_table__:${hr.toolName}:${
hr.toolCallId || "noid"
}`;
if (injectedSet.has(key)) continue;
// Track injection for later stripping, then output unchanged markdown
trackInjection(workingDir, hr.markdown, hr.mediaType);
persistInjectedMarkdown(hr.markdown);
ctl.fragmentGenerated(hr.markdown + "\n");
injectedSet.add(key);
stateChanged = true;
if (debug) {
const mdPreview =
hr.markdown.length > 220
? hr.markdown.slice(0, 220) + "…"
: hr.markdown;
console.info(
`[InjectMarkdown][Harvest] tool="${hr.toolName}" tool_call_id="${
hr.toolCallId || "(none)"
}" markdown="${mdPreview.replace(/\n/g, "\\n")}"`
);
}
}
if (debug) {
console.info(
`[ImageMarkdownInjection] Chat ${path.basename(workingDir)}: ${
basenames.length
} images discovered, ${injectedSet.size} already injected`
);
}
// 2) Existing per-image injection for generate_image outputs (only when PREVIEW_IN_CHAT=false)
if (!previewInChat) {
for (let i = 0; i < basenames.length; i++) {
const originalBaseName = basenames[i];
const originalPath = path.join(workingDir, originalBaseName);
// Prefer preview image for display (smaller, always relative to chat WD).
// Fall back to original if preview is missing.
let displayBaseName = originalBaseName;
try {
const m = /^generated-image-(.+)-v(\d+)\.(png|mov)$/i.exec(
originalBaseName
);
const ts = m ? m[1] : null;
const vNum = m ? parseInt(m[2], 10) : NaN;
if (ts && Number.isFinite(vNum)) {
const candidates = [
`preview-generated-image-${ts}-v${vNum}.jpg`,
`preview-generated-image-${ts}-v${vNum}.webp`,
];
for (const cand of candidates) {
const pAbs = path.join(workingDir, cand);
const ok = await fs.promises
.access(pAbs, fs.constants.F_OK)
.then(() => true)
.catch(() => false);
if (ok) {
displayBaseName = cand;
break;
}
}
}
} catch {
// ignore and fall back to original
}
const displayPath = path.join(workingDir, displayBaseName);
// Skip if already injected in this chat
if (injectedSet.has(displayBaseName)) {
if (debug) {
console.info(
`[ImageMarkdownInjection] Skipping ${displayBaseName} (already injected)`
);
}
continue;
}
// Check if file exists in working directory
const alreadyExists = await fs.promises
.access(displayPath, fs.constants.F_OK)
.then(() => true)
.catch(() => false);
// No copying: if file isn't present, skip injection for this variant.
if (!alreadyExists) {
if (debug) {
console.info(
`[ImageMarkdownInjection] Skipping ${displayBaseName} (missing in workingDir; no copy)`
);
}
continue;
}
if (debug) {
console.info(
`[ImageMarkdownInjection] ${displayBaseName} exists, injecting markdown`
);
}
// In-memory deduplication: prevents race-condition double-injection
// when file-based state hasn't been written yet by a parallel call.
// Key includes workingDir to scope per-chat.
const dedupKey = `${workingDir}:${displayBaseName}`;
if (!markAsInjected(dedupKey)) {
if (debug) {
console.info(
`[ImageMarkdownInjection] Skipping ${displayBaseName} (in-memory dedup, recently injected)`
);
}
continue;
}
// Extract variant number from filename (e.g., "generated-image-...-v2.png" → 2)
// Fall back to loop index + 1 if extraction fails
const vMatch = /^generated-image-.+-v(\d+)\.(png|mov)$/i.exec(
originalBaseName
);
const variantNum = vMatch ? parseInt(vMatch[1], 10) : i + 1;
// IMPORTANT: must be a relative path into the chat working directory.
const markdown = ``;
if (debug) {
console.info(
`[InjectMarkdown][LegacyGenerateImageInline] markdown="${markdown}"`
);
}
// Track injection for later stripping, then output unchanged markdown
trackInjection(workingDir, markdown, "variant");
persistInjectedMarkdown(markdown);
ctl.fragmentGenerated(markdown + "\n");
injectedSet.add(displayBaseName);
stateChanged = true;
injectedCount++;
}
}
// Save state if we injected anything new
if (stateChanged) {
state.injectedMarkdown = Array.from(injectedSet);
await writeStateAtomic(workingDir, state);
if (debug) {
console.info(
`[ImageMarkdownInjection] Updated state with ${state.injectedMarkdown.length} total injected images`
);
}
}
if (debug && injectedCount === 0 && basenames.length > 0) {
console.info(
`[ImageMarkdownInjection] All ${basenames.length} images already injected (skipped)`
);
}
} catch (e) {
console.error("[ImageMarkdownInjection] Error:", (e as Error).message);
}
}
return { imageReviewByToolCallId };
}
export async function generate(ctl: GeneratorController, history: Chat) {
// Generate unique request ID for tracking
const requestId = Math.random().toString(36).substring(2, 9);
// DEBUG: Log that generator was called
console.info(
`[Orchestrator][${requestId}] generate() called, history length:`,
Array.from(history).length
);
const config = ctl.getPluginConfig(configSchematics);
const modelConfig = config.get("model");
const globalConfig = ctl.getGlobalPluginConfig(globalConfigSchematics);
// Use configured model, or fallback to default
const model = modelConfig || "qwen/qwen3-vl-30b";
const baseUrl = globalConfig.get("baseUrl") || "http://127.0.0.1:1234/v1";
const apiKey = globalConfig.get("apiKey") || "";
// Ensure agent model is loaded before proceeding
// This prevents silent fallback to wrong models (e.g., vision-capability-priming)
const agentLoadResult = await ensureAgentModelLoaded({
modelKey: model,
baseUrl,
apiKey,
});
if (!agentLoadResult.ok) {
const errMsg = `[Orchestrator][${requestId}] Agent model not available: ${agentLoadResult.error}`;
console.error(errMsg);
ctl.fragmentGenerated(
agentLoadResult.error || "Failed to load agent model."
);
return;
}
// Check for Vision Primer user-facing errors (from main() startup)
// The primer runs non-blocking, so we need to await the promise here.
// Also check the direct result for cases where no load was attempted (e.g., not installed).
// Show once per session, then clear to avoid repeating.
const visionPrimerPromise = (globalThis as any).__dtc_visionPrimerPromise;
const visionPrimerDirectResult = (globalThis as any).__dtc_visionPrimerResult;
let visionPrimerResult: any = null;
if (visionPrimerPromise) {
try {
// Wait for primer to complete (should be fast if already done)
visionPrimerResult = await Promise.race([
visionPrimerPromise,
new Promise<null>((resolve) => setTimeout(() => resolve(null), 5000)), // 5s timeout
]);
} catch (e) {
console.warn(`[Orchestrator][${requestId}] Vision Primer await failed:`, e);
}
} else if (visionPrimerDirectResult) {
// No promise (e.g., model not installed - no load attempted)
visionPrimerResult = visionPrimerDirectResult;
}
if (visionPrimerResult?.userFacingError) {
console.warn(
`[Orchestrator][${requestId}] Vision Primer error to display:`,
visionPrimerResult.error
);
ctl.fragmentGenerated(visionPrimerResult.userFacingError + "\n\n---\n\n");
}
// Clear to show only once (regardless of outcome)
if (visionPrimerPromise || visionPrimerDirectResult) {
delete (globalThis as any).__dtc_visionPrimerPromise;
delete (globalThis as any).__dtc_visionPrimerResult;
}
if (agentLoadResult.alreadyLoaded) {
console.info(
`[Orchestrator][${requestId}] Agent model already loaded: ${model}`
);
} else {
console.info(
`[Orchestrator][${requestId}] Agent model loaded: ${model} (${
agentLoadResult.size ?? "?"
} in ${agentLoadResult.loadTimeSec ?? "?"}s)`
);
}
// Update lastUsedModel in conversation.json to reflect the actual agent model.
// This fixes the UI issue where context % is shown based on vision-capability-priming (4096 ctx).
// Fire-and-forget to not block the response.
updateLastUsedModelForAgentModel({
modelKey: model,
instanceId: agentLoadResult.loadedInstanceId,
contextLength: agentLoadResult.loadedInstanceContextLength,
}).then((updateResult) => {
if (updateResult.ok && !updateResult.alreadyCorrect) {
console.debug(
`[Orchestrator][${requestId}] lastUsedModel updated to agent model in chat ${updateResult.chatId}`
);
} else if (!updateResult.ok) {
console.debug(
`[Orchestrator][${requestId}] lastUsedModel update skipped: ${updateResult.error}`
);
}
}).catch((e) => {
console.warn(`[Orchestrator][${requestId}] lastUsedModel update failed:`, e?.message || e);
});
// Expose unload context so tool calls can free VRAM during long renders.
// generate_image.ts consumes this and guards by mode + baseUrl locality.
(globalThis as any).__dtc_agentModelUnloadCtx = {
modelKey: agentLoadResult.loadedInstanceId ?? model,
baseUrl,
apiKey,
};
// Detect model capabilities
const capabilities = detectCapabilities(model);
console.info(`[Orchestrator][${requestId}] Model capabilities:`, {
model,
supportsThinking: capabilities.supportsThinking,
supportsTools: capabilities.supportsTools,
supportsVision: capabilities.supportsVision,
});
/* 1. Resolve runtime options (ENV can override) */
// Friendly hint: warn once per process if context length is likely too small
// for Vision Promotion reliability. We do not set context length; we only inform.
const MIN_RECOMMENDED_CONTEXT = 16_384;
const fmtInt = (n: number) => new Intl.NumberFormat("en-US").format(n);
const warnKey = `${agentLoadResult.loadedInstanceId ?? model}:$${
agentLoadResult.loadedInstanceContextLength ?? "?"
}`;
// Module-level cache to avoid spamming.
(globalThis as any).__dtc_warnedCtx ??= new Set<string>();
const warnedCtx: Set<string> = (globalThis as any).__dtc_warnedCtx;
if (
typeof agentLoadResult.loadedInstanceContextLength === "number" &&
agentLoadResult.loadedInstanceContextLength > 0 &&
agentLoadResult.loadedInstanceContextLength < MIN_RECOMMENDED_CONTEXT &&
!warnedCtx.has(warnKey)
) {
warnedCtx.add(warnKey);
const instanceId = agentLoadResult.loadedInstanceId ?? model;
const maxCtx =
typeof agentLoadResult.maxContextLength === "number" &&
agentLoadResult.maxContextLength > 0
? fmtInt(agentLoadResult.maxContextLength)
: "(unknown)";
ctl.fragmentGenerated(
`Your model configuration might not be complete yet. The current context length for ${instanceId} is ${fmtInt(
agentLoadResult.loadedInstanceContextLength
)} tokens. That's a bit tight when it comes to Vision Promotion. This model supports up to ${maxCtx} tokens. Recommended: at least 32,768 tokens.`
);
}
const options: RuntimeOptions = {
debugChunks: config.get("debugChunks"),
debugPromotion: config.get("debugPromotion"),
// Fixed values per project policy
previewMaxDim: 1024,
previewQuality: 85,
visionPromotionPersistent: config.get("visionPromotionPersistent"),
};
/* 2. Attachment detection + pending promotion marker */
appendPromotionToPluginLog(`[${requestId}] Orchestrator phase 2 start`);
let workingDir =
typeof (ctl as any).getWorkingDirectory === "function"
? (ctl as any).getWorkingDirectory()
: undefined;
appendPromotionToPluginLog(
`[${requestId}] workingDir from ctl: ${workingDir ?? "(undefined)"}`
);
// Fallback: if LM Studio doesn't provide a working directory, use heuristic
if (!workingDir) {
try {
const resolved = await resolveActiveLMStudioChatId();
appendPromotionToPluginLog(
`[${requestId}] heuristic result: ok=${resolved.ok} ${
resolved.ok
? `chatId=${resolved.chatId} confidence=${resolved.confidence}`
: `reason=${resolved.reason}`
}`
);
if (resolved.ok) {
const lmHome = findLMStudioHome();
workingDir = path.join(lmHome, "working-directories", resolved.chatId);
// Ensure directory exists
await fs.promises.mkdir(workingDir, { recursive: true });
console.info(
`[Orchestrator][${requestId}] workingDir resolved via heuristic:`,
workingDir,
`(confidence=${resolved.confidence})`
);
}
} catch (e) {
console.warn(
`[Orchestrator][${requestId}] Failed to resolve workingDir via heuristic:`,
(e as Error).message
);
appendPromotionToPluginLog(
`[${requestId}] heuristic failed: ${(e as Error).message}`
);
}
}
console.info(
`[Orchestrator][${requestId}] workingDir =`,
workingDir ?? "(undefined)"
);
if (options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] workingDir =`,
workingDir ?? "(undefined)"
);
}
// Make tool calls deterministic: tools run *after* generation, so we keep the last
// generator-derived chat context available for the tool handler (no global "newest file" guessing).
try {
if (typeof workingDir === "string" && workingDir.trim().length > 0) {
const chatId = path.basename(workingDir);
if (/^\d+$/.test(chatId)) {
setActiveChatContext({ chatId, workingDir, requestId });
}
}
} catch {
// never block generation
}
// Abort recovery: if the user aborts generation and then hits "Regenerate Response",
// the model often loses visual context while idempotent vision-promotion would not repeat.
// We persist a one-shot state flag so the NEXT turn can force pixel promotion.
try {
let abortMarkerWritten = false;
ctl.onAborted(() => {
if (abortMarkerWritten) return;
abortMarkerWritten = true;
if (!workingDir) return;
const chatWd = workingDir;
// Best-effort: never throw from abort handler.
(async () => {
try {
const state = await readState(chatWd);
state.forcePixelPromotionNextTurn = true;
state.forcePixelPromotionSetAt = localTimestamp();
state.forcePixelPromotionReason = "abort";
await writeStateAtomic(chatWd, state);
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP abort marker set: chatId=${chatId} requestId=${requestId} at=${state.forcePixelPromotionSetAt}`
);
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
appendPromotionToPluginLog(
`VIP abort marker failed: requestId=${requestId} error=${msg}`
);
}
})();
});
} catch {
// never block generation
}
const historyMsgs = Array.from(history);
const lastHistoryMsg = historyMsgs[historyMsgs.length - 1];
const isToolResultTurn =
!!lastHistoryMsg && lastHistoryMsg.getRole() === "tool";
const turnKind = isToolResultTurn ? "tool-result" : "non-tool";
// For tool-result turns, SSOT is typically lagging while the tool executes. The freshest truth is the
// tool result itself (matched by tool_call_id to generate_image). For non-tool turns, SSOT is the truth.
const toolGeneratedUris = isToolResultTurn
? extractGeneratedImageFileUrisFromLatestGenerateImageToolResult(history)
: [];
const toolLatestTs = latestGeneratedTsFromFileUris(toolGeneratedUris);
// SSOT: derive current generation context from conversation snapshot (only when not in a tool-result turn)
const debug = options.debugPromotion;
const conversationSnapshot = !isToolResultTurn
? await readConversationSnapshot(workingDir, debug)
: null;
const conversationGeneratedUris = !isToolResultTurn
? extractGeneratedImageFileUrisFromConversation(conversationSnapshot)
: [];
const conversationLatestTs = !isToolResultTurn
? latestGeneratedTsFromFileUris(conversationGeneratedUris)
: null;
const effectiveGeneratedUris = toolGeneratedUris.length
? toolGeneratedUris
: conversationGeneratedUris;
const effectiveLatestTs = toolLatestTs || conversationLatestTs;
if (options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] Turn kind=${turnKind}; ` +
`toolLatestTs=${toolLatestTs ?? "(none)"} ` +
`toolUris=${toolGeneratedUris.length}; ` +
`ssotLatestTs=${conversationLatestTs ?? "(none)"} ` +
`ssotUris=${conversationGeneratedUris.length}; ` +
`effectiveLatestTs=${effectiveLatestTs ?? "(none)"} ` +
`effectiveUris=${effectiveGeneratedUris.length}`
);
}
try {
const chatWd = workingDir;
if (chatWd) {
// Load state
let state = await readState(chatWd);
await ensureChatMediaStateFileExists(chatWd, state, debug);
// Snapshot: align state to history (no file deletions)
try {
// Determine if history currently presents an attachment.
// IMPORTANT: during tool-result turns SSOT can lag / be mid-write.
// Never clear attachment state based on SSOT in tool-result turns.
let hasAttachment = false;
if (isToolResultTurn) {
hasAttachment = Array.isArray(state.attachments)
? state.attachments.length > 0
: false;
} else {
const found = await findLastAttachmentFromConversation(chatWd, debug);
if (found && found.length) hasAttachment = true;
if (!hasAttachment) {
// Fallback: wrappers in last user turn
let lastUserText = "";
for (const m of history)
if (m.getRole() === "user") lastUserText = m.getText();
const parsed = parseAttachmentWrappers(lastUserText || "");
// Check for local paths OR LM Studio identifiers
hasAttachment = parsed.parts.some((p) => {
const url = (p as any).url as string | undefined;
const identifier = (p as any).identifier as string | undefined;
// Local path check
if (
url &&
(/^file:\/\//i.test(url) ||
url.startsWith("/") ||
url.startsWith("./") ||
url.startsWith("../"))
)
return true;
// LM Studio identifier check
if (identifier && resolveLMStudioIdentifier(identifier))
return true;
// URL as identifier fallback
if (
url &&
!url.includes("/") &&
!url.includes("\\") &&
resolveLMStudioIdentifier(url)
)
return true;
return false;
});
}
}
// VARIANT STATE MANAGEMENT:
// Keep ALL variants in state for text-promotion (unlimited) and reference lookups.
// Only clear variants if SSOT is readable AND explicitly shows no generated images.
// The rolling window (visual promotion limit of 3) is applied at PROMOTION time,
// not at state storage time.
const ssotReadable = !isToolResultTurn && conversationSnapshot !== null;
const ssotHasGeneratedImages = effectiveGeneratedUris.length > 0;
let changed = false;
// NOTE: Attachments are NOT cleared when the latest user turn has no attachments.
// Attachments persist until explicitly replaced by new attachments in the import phase.
// This ensures i2i workflow works across many text-only turns.
// SAME LOGIC FOR VARIANTS: only clear if SSOT explicitly shows no generated images.
if (state.variants.length) {
if (ssotReadable && !ssotHasGeneratedImages) {
// SSOT is readable and shows no generated images → user deleted all turns with images
state.variants = [];
state.lastVariantsTs = undefined;
// NOTE: nextVariantV is intentionally NOT reset — stable versioning: vN is never reused.
changed = true;
if (debug) {
console.info(
"[Variant State] SSOT shows no generated images → clearing all variants"
);
}
}
// NOTE: NO CAPPING HERE! All variants stay in state for:
// - Text promotion (unlimited)
// - Reference lookups (canvas: v1, sourceVariant: 1, etc.)
// Visual promotion rolling window (3) is applied in buildPromotionItems()
}
if (changed) {
// Variants changed (filtered/cleared) → reset promotion state so model sees current state
state.lastPromotedTs = undefined;
state.lastPromotedAttachmentAs = undefined;
state.lastPromotedAttachmentA = undefined;
state.lastPromotedVariantVs = undefined;
await writeStateAtomic(chatWd, state);
if (debug)
console.info(
"Variants changed → promotion state reset, state snapshot aligned to history."
);
}
} catch (e) {
console.error("Snapshot alignment error:", (e as Error).message);
}
// Batch-import ALL attachments from SSOT (conversation.json) into chat_media_state.json
// This builds the complete i2i inventory across ALL user turns.
try {
const { found, turnIdByAbs: turnIdByFoundAbs } =
await findAllAttachmentsFromConversation(chatWd, debug);
appendPromotionToPluginLog(
`[${requestId}] SSOT found ${found.length} total attachments: ${
found.map((f) => path.basename(f)).join(", ") || "(none)"
}`
);
if (debug)
console.info(
"[Attachment Batch Import] All attachments from SSOT:",
found
);
// Normalize paths (file:// URIs → absolute paths)
const sourcePaths: string[] = [];
const turnIdByOriginAbs: Record<string, number> = {};
for (const item of found) {
const abs = /^file:\/\//i.test(item)
? fileUriToPath(item)
: path.isAbsolute(item)
? item
: path.join(chatWd, item);
sourcePaths.push(abs);
const maybeTurnId = turnIdByFoundAbs[normalizeMaybeFileUri(item)];
if (typeof maybeTurnId === "number" && Number.isFinite(maybeTurnId)) {
// Use absolute resolved path as key for the importer.
turnIdByOriginAbs[path.resolve(abs)] = maybeTurnId;
}
}
if (debug)
console.info(
"[Attachment Batch Import] Normalized paths:",
sourcePaths
);
// Batch import (replaces entire state.attachments array with ALL SSOT data)
// IMPORTANT: SSOT can lag during tool-result turns. Never regress/overwrite attachments based on
// potentially stale SSOT while a tool result is being produced.
const hasCurrentAttachments =
Array.isArray(state.attachments) && state.attachments.length > 0;
const shouldReconcileNow = !isToolResultTurn || !hasCurrentAttachments;
if (shouldReconcileNow) {
// Also avoid clearing attachments from an empty/laggy SSOT during tool-result turns.
if (!isToolResultTurn || sourcePaths.length > 0) {
const importResult = await importAttachmentBatch(
chatWd,
state,
sourcePaths,
turnIdByOriginAbs,
{
maxDim: options.previewMaxDim,
quality: options.previewQuality,
},
sourcePaths.length, // Generate previews for ALL (needed for i2i tool)
debug
);
// If attachments were re-imported (inventory rebuilt), reset ATTACHMENT promotion state
// so the model sees the current attachments again.
// NOTE: Do NOT reset variant promotion state here - variants are independent!
if (importResult.changed) {
if (debug) {
console.info(
"[Attachment Batch Import] Inventory changed → resetting ATTACHMENT promotion state"
);
}
state.lastPromotedTs = undefined;
state.lastPromotedAttachmentAs = undefined;
state.lastPromotedAttachmentA = undefined;
// Deliberately NOT resetting lastPromotedVariantVs - variants have their own lifecycle
}
} else if (debug) {
console.info(
"[Attachment Batch Import] Skipped: tool-result turn with empty SSOT result (keep existing state)."
);
}
} else if (debug) {
console.info(
"[Attachment Batch Import] Skipped: tool-result turn (keep existing attachment state; SSOT may lag)."
);
}
} catch (e) {
console.error("Attachment batch import error:", (e as Error).message);
}
// ──────────────────────────────────────────────────────────────────────
// VARIANT BATCH IMPORT (analogous to attachment batch import above)
// Rebuild state.variants from ALL generated images in conversation.json
// ──────────────────────────────────────────────────────────────────────
try {
const { found: allVariantBasenames, turnIdByBasename } =
await findAllVariantsFromConversation(chatWd, debug);
if (debug) {
console.info(
`[Variant Batch Import] Found ${allVariantBasenames.length} variants in SSOT:`,
allVariantBasenames.join(", ")
);
}
if (allVariantBasenames.length > 0) {
// Resolve basenames to real candidate locations.
// Current: generated images live directly in chatWd (LM Studio chat working directory).
// Legacy: some older setups stored originals under <projectRoot>/images and previews under <projectRoot>/images/previews.
const projectImagesDir = path.join(getProjectRoot(), "images");
const projectPreviewsDir = path.join(projectImagesDir, "previews");
// Check which ones actually exist on disk.
// IMPORTANT: recordGeneratedVariants expects originals+previews to be available in chatWd.
// Therefore, when we find a legacy file under <projectRoot>/images, we copy it into chatWd as a best-effort compat path.
const existingPaths: string[] = [];
for (const bn of allVariantBasenames) {
const inChatWd = path.join(chatWd, bn);
if (await pathExists(inChatWd)) {
existingPaths.push(inChatWd);
continue;
}
const inProjectImages = path.join(projectImagesDir, bn);
if (await pathExists(inProjectImages)) {
try {
await fs.promises.copyFile(inProjectImages, inChatWd);
if (debug)
console.info(
`[Variant Batch Import] Copied legacy original into chatWd: ${inProjectImages} -> ${inChatWd}`
);
} catch (e) {
console.error(
`[Variant Batch Import] Failed to copy legacy original into chatWd: ${inProjectImages} -> ${inChatWd}: ${String(
e
)}`
);
}
// Also try to copy the matching preview if present in legacy location.
try {
const m =
/^generated-image-(.+)-v(\d+)\.(png|jpe?g|webp)$/i.exec(bn);
if (m) {
const ts = m[1];
const vNum = parseInt(m[2], 10);
const previewName = `preview-generated-image-${ts}-v${vNum}.jpg`;
const legacyPreviewAbs = path.join(
projectPreviewsDir,
previewName
);
const chatPreviewAbs = path.join(chatWd, previewName);
if (
(await pathExists(legacyPreviewAbs)) &&
!(await pathExists(chatPreviewAbs))
) {
await fs.promises.copyFile(
legacyPreviewAbs,
chatPreviewAbs
);
if (debug)
console.info(
`[Variant Batch Import] Copied legacy preview into chatWd: ${legacyPreviewAbs} -> ${chatPreviewAbs}`
);
}
}
} catch (e) {
console.error(
`[Variant Batch Import] Failed to copy legacy preview into chatWd for ${bn}: ${String(
e
)}`
);
}
if (await pathExists(inChatWd)) {
existingPaths.push(inChatWd);
} else if (debug) {
console.warn(
`[Variant Batch Import] Legacy original existed but is not present in chatWd after copy: ${inChatWd}`
);
}
continue;
}
if (debug) {
console.warn(
`[Variant Batch Import] Variant not found in candidates (chatWd or <projectRoot>/images): ${bn}`
);
}
}
if (existingPaths.length > 0) {
// Protect current state during tool-result turns (SSOT may lag)
const hasCurrentVariants =
Array.isArray(state.variants) && state.variants.length > 0;
const shouldReconcileNow = !isToolResultTurn || !hasCurrentVariants;
if (shouldReconcileNow) {
// Also avoid clearing variants from an empty/laggy SSOT during tool-result turns.
if (!isToolResultTurn || existingPaths.length > 0) {
// Convert to file:// URIs for recordGeneratedVariants
const variantUris = existingPaths.map(
(p) => `file://${encodeURI(p)}`
);
const turnIdByOriginAbs: Record<string, number> = {};
try {
for (const p of existingPaths) {
const bn = path.basename(p);
const tid = turnIdByBasename[bn];
if (typeof tid === "number" && Number.isFinite(tid)) {
turnIdByOriginAbs[path.resolve(p)] = tid;
}
}
} catch {
// best-effort
}
const variantImportResult = await recordGeneratedVariants(
chatWd,
state,
variantUris,
{
maxDim: options.previewMaxDim,
quality: options.previewQuality,
},
turnIdByOriginAbs,
debug
);
// If variants were re-imported (inventory rebuilt), reset VARIANT promotion state
// so the model sees the current variants again.
if (variantImportResult.changed) {
if (debug) {
console.info(
`[Variant Batch Import] Inventory changed → resetting VARIANT promotion state`
);
}
state.lastPromotedVariantVs = undefined;
// NOTE: Do NOT reset attachment promotion state here - attachments are independent!
}
if (debug) {
console.info(
`[Variant Batch Import] Imported ${
existingPaths.length
} variants into state (now ${
state.variants?.length ?? 0
} total, changed=${variantImportResult.changed})`
);
}
}
} else if (debug) {
console.info(
"[Variant Batch Import] Skipped: tool-result turn (keep existing variant state; SSOT may lag)."
);
}
}
}
} catch (e) {
console.error("Variant batch import error:", (e as Error).message);
}
// ──────────────────────────────────────────────────────────────────────
// PICTURE RECONCILE (sync state with SSOT, remove deleted pictures)
// Uses unified reconcileMedia() with merge strategy
// ──────────────────────────────────────────────────────────────────────
try {
if (debug) {
console.info(
`[${requestId}] PICTURE RECONCILE: isToolResultTurn=${isToolResultTurn}`
);
}
if (!isToolResultTurn) {
const { candidates: ssotPictures } = await findAllPictures(
chatWd,
debug
);
if (debug) {
console.info(
`[${requestId}] PICTURE RECONCILE: SSOT scan returned ${ssotPictures.length} candidates`
);
}
try {
appendPromotionToPluginLog(
`[${requestId}] SSOT picture candidates=${ssotPictures.length}`
);
} catch {
// best-effort
}
const currentPictures = Array.isArray((state as any).pictures)
? ((state as any).pictures as any[])
: [];
const { result, newArray } = await reconcileMedia(
chatWd,
currentPictures,
"picture",
ssotPictures,
{ strategy: "merge", debug }
);
if (debug) {
console.info(
`[${requestId}] PICTURE RECONCILE: result added=${result.added} removed=${result.removed} unchanged=${result.unchanged} stateChanged=${result.stateChanged}`
);
console.info(
`[${requestId}] PICTURE RECONCILE: currentPictures=${currentPictures.length} -> newArray=${newArray.length}`
);
}
if (result.stateChanged) {
(state as any).pictures = newArray;
// Update counter to match reconciled array
const lastP = newArray.reduce(
(max, r) => Math.max(max, typeof r.p === "number" ? r.p : 0),
0
);
state.counters.nextPictureP = lastP + 1;
await writeStateAtomic(chatWd, state);
try {
appendPromotionToPluginLog(
`[${requestId}] Pictures reconciled: now ${
(state as any).pictures?.length ?? 0
}`
);
} catch {
// best-effort
}
}
}
} catch (e) {
console.error("Picture reconcile error:", (e as Error).message);
try {
appendPromotionToPluginLog(
`[${requestId}] Picture reconcile error: ${(e as Error).message}`
);
} catch {
// best-effort
}
}
// ──────────────────────────────────────────────────────────────────────
// IMAGE RECONCILE (sync state with SSOT, remove deleted images)
// Uses unified reconcileMedia() with merge strategy
// ──────────────────────────────────────────────────────────────────────
try {
if (!isToolResultTurn) {
const { candidates: ssotImages } = await findAllImages(chatWd, debug);
const currentImages = Array.isArray((state as any).images)
? ((state as any).images as any[])
: [];
const { result, newArray } = await reconcileMedia(
chatWd,
currentImages,
"image",
ssotImages,
{ strategy: "merge", debug }
);
if (result.stateChanged) {
(state as any).images = newArray;
// Update counter to match reconciled array
const lastI = newArray.reduce(
(max, r) => Math.max(max, typeof r.i === "number" ? r.i : 0),
0
);
state.counters.nextImageI = lastI + 1;
await writeStateAtomic(chatWd, state);
}
}
} catch (e) {
console.error("Image reconcile error:", (e as Error).message);
}
// Discover generated PNG originals via conversation.json (SSOT)
if (debug) {
const sourceLabel = isToolResultTurn ? "tool-result" : "SSOT";
console.info(
`[VisionPromotion][${requestId}] Generation scan (${sourceLabel}) found`,
effectiveGeneratedUris.length,
"generated image URIs:",
effectiveGeneratedUris.slice(0, 5).join(", "),
effectiveGeneratedUris.length > 5
? `... (+${effectiveGeneratedUris.length - 5} more)`
: ""
);
}
if (effectiveLatestTs && state.lastVariantsTs === effectiveLatestTs) {
if (debug)
console.info(
`[VisionPromotion][${requestId}] Variants already recorded for ts=${effectiveLatestTs} (skipped)`
);
} else if (effectiveGeneratedUris.length) {
if (debug)
console.info(
`[VisionPromotion][${requestId}] Calling recordGeneratedVariants with`,
effectiveGeneratedUris.length,
"URIs"
);
await recordGeneratedVariants(
chatWd,
state,
effectiveGeneratedUris,
{ maxDim: options.previewMaxDim, quality: options.previewQuality },
undefined,
debug
);
if (effectiveLatestTs) {
state.lastVariantsTs = effectiveLatestTs;
await writeStateAtomic(chatWd, state);
}
}
}
} catch (e) {
console.error("Media promotion pipeline error:", (e as Error).message);
}
/* 2.5. Inject markdown for generated images (PREVIEW_IN_CHAT=false only) */
// This ensures images are displayed even if agent models ignore $hint in tool responses
const toolResultRewrites = await injectImageMarkdownAfterToolCall(
ctl,
history,
globalConfig,
workingDir,
options,
options.debugPromotion
);
// Prime injection registry from persisted state so sanitizing works after plugin reload.
// This ensures injected content NEVER reaches the model, even if the plugin was restarted.
if (workingDir) {
try {
const state = await readState(workingDir);
const injectedContent = (state as any).injectedContent;
if (Array.isArray(injectedContent)) {
for (const md of injectedContent) {
if (typeof md === "string" && md.trim()) {
trackInjection(workingDir, md, "persisted");
}
}
}
} catch {
// best-effort
}
}
/* 3. Setup client & payload */
const openai = createOpenAI(globalConfig);
const { tools, nameMaps } = toOpenAITools(ctl);
const messages = toOpenAIMessages(
history,
nameMaps,
options,
workingDir,
toolResultRewrites
);
// Track itemsKept for outcome verification (set inside try, used after consolidation)
let vipItemsKeptCount = 0;
// Count pre-existing image_url parts BEFORE VisionPromotion runs.
// These come from tool results, user attachments, etc. and must be excluded
// from the outcome verification which only checks VP-injected images.
const preExistingImageUrlParts = countOutgoingImageUrlParts(messages);
// Build promotion from state and append inline (only if allowed AND not suppressed)
try {
const chatWd = workingDir;
if (chatWd) {
const state = await readState(chatWd);
await ensureChatMediaStateFileExists(
chatWd,
state,
options.debugPromotion
);
if (options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] State read for promotion block:`,
`attachments=${state.attachments.length}`,
`variants=${state.variants.length}`,
`counters=${JSON.stringify(state.counters)}`
);
}
const promotionMode: "idempotent" | "persistent" =
options.visionPromotionPersistent ? "persistent" : "idempotent";
// Regenerate/delete recovery:
// If the user deletes turns (often the last assistant/tool turn) and then hits "Regenerate Response",
// SDK history shrinks. Treat this as a one-shot signal to re-inject pixel context even if attachments
// are unchanged by idempotent rules.
const historyMessageCount = (() => {
try {
return Array.from(history).length;
} catch {
return 0;
}
})();
const lastHistoryMessageCount =
typeof (state as any).lastHistoryMessageCount === "number" &&
Number.isFinite((state as any).lastHistoryMessageCount)
? (state as any).lastHistoryMessageCount
: undefined;
const historyShrunk =
typeof lastHistoryMessageCount === "number" &&
historyMessageCount > 0 &&
historyMessageCount < lastHistoryMessageCount;
if (
historyShrunk &&
capabilities.supportsVision &&
!state.forcePixelPromotionNextTurn
) {
state.forcePixelPromotionNextTurn = true;
state.forcePixelPromotionSetAt = localTimestamp();
state.forcePixelPromotionReason = "history-shrink";
}
// One-shot forced promotion (abort recovery): if an abort happened recently, re-inject
// pixel context once even if items are not "new" by idempotent rules.
const forceVipTtlMs = 10 * 60 * 1000; // 10 minutes
const forceVipRequested = !!state.forcePixelPromotionNextTurn;
const forceVipReason =
typeof state.forcePixelPromotionReason === "string"
? state.forcePixelPromotionReason
: undefined;
const forceVip = (() => {
if (!capabilities.supportsVision) return false;
if (!state.forcePixelPromotionNextTurn) return false;
const ts =
typeof state.forcePixelPromotionSetAt === "string"
? state.forcePixelPromotionSetAt
: "";
if (!ts) return true;
const ms = Date.parse(ts);
if (!Number.isFinite(ms)) return true;
const age = Date.now() - ms;
return !(Number.isFinite(age) && age > forceVipTtlMs);
})();
const forceVipStaleCleared = forceVipRequested && !forceVip;
// If the flag is stale, clear it so we don't keep forcing forever.
if (state.forcePixelPromotionNextTurn && !forceVip) {
state.forcePixelPromotionNextTurn = undefined;
state.forcePixelPromotionSetAt = undefined;
state.forcePixelPromotionReason = undefined;
try {
await writeStateAtomic(chatWd, state);
} catch {
// best-effort
}
try {
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP force stale cleared: chatId=${chatId} requestId=${requestId} ttlMs=${forceVipTtlMs}`
);
} catch {
// best-effort
}
} else if (forceVip) {
try {
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP force enabled: chatId=${chatId} requestId=${requestId} reason=${
forceVipReason ?? "(none)"
} setAt=${
typeof state.forcePixelPromotionSetAt === "string"
? state.forcePixelPromotionSetAt
: "(unknown)"
}`
);
} catch {
// best-effort
}
}
// One-shot targeted review promotion (review_image): inject exact requested items once.
const reviewVipTtlMsDefault = 5 * 60 * 1000; // 5 minutes
const reviewVipRequested =
!!(state as any).pendingReviewPromotion &&
typeof (state as any).pendingReviewPromotion === "object";
const reviewVipReason = (() => {
try {
const r = (state as any).pendingReviewPromotion;
const s = typeof r?.reason === "string" ? String(r.reason).trim() : "";
return s ? s : undefined;
} catch {
return undefined;
}
})();
const reviewVip = (() => {
if (!capabilities.supportsVision) return false;
const r = (state as any).pendingReviewPromotion;
if (!r || typeof r !== "object") return false;
const requestedAt =
typeof r.requestedAt === "string" ? String(r.requestedAt) : "";
const ttlMs =
typeof r.ttlMs === "number" && Number.isFinite(r.ttlMs) && r.ttlMs > 0
? r.ttlMs
: reviewVipTtlMsDefault;
if (!requestedAt) return true;
const ms = Date.parse(requestedAt);
if (!Number.isFinite(ms)) return true;
const age = Date.now() - ms;
return !(Number.isFinite(age) && age > ttlMs);
})();
const reviewVipStaleCleared = reviewVipRequested && !reviewVip;
if (reviewVipRequested && !reviewVip) {
(state as any).pendingReviewPromotion = undefined;
try {
await writeStateAtomic(chatWd, state);
} catch {
// best-effort
}
try {
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP review stale cleared: chatId=${chatId} requestId=${requestId} ttlMs=${reviewVipTtlMsDefault}`
);
} catch {
// best-effort
}
} else if (reviewVip) {
try {
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
const r: any = (state as any).pendingReviewPromotion;
const targets = r?.targets && typeof r.targets === "object" ? r.targets : {};
const fmt = (k: string): string => {
const a = Array.isArray((targets as any)[k])
? ((targets as any)[k] as any[])
.filter((x) => typeof x === "number" && Number.isFinite(x) && x > 0)
.map((x) => Math.floor(x))
: [];
return a.length ? `${k}=[${a.join(",")}]` : `${k}=[]`;
};
appendPromotionToPluginLog(
`VIP review enabled: chatId=${chatId} requestId=${requestId} ` +
`${fmt("a")} ${fmt("v")} ${fmt("i")} ${fmt("p")} ` +
`reason=${reviewVipReason ?? "(none)"}`
);
} catch {
// best-effort
}
}
// Use new helper functions for cleaner attachment promotion logic
// NOTE: getPromotableAttachmentNs() now uses registry internally, no cap parameter needed
const promotableNs = getPromotableAttachmentNs(state);
const hasPromotableAttachments = promotableNs.length > 0;
let shouldPromoteAttachment =
promotionMode === "persistent"
? hasPromotableAttachments
: shouldPromoteAttachments(state);
if (forceVip && hasPromotableAttachments) {
shouldPromoteAttachment = true;
}
appendPromotionToPluginLog(
`[${requestId}] shouldPromoteAttachment: mode=${promotionMode} ` +
`promotableNs=[${promotableNs.join(",")}] ` +
`lastPromotedNs=[${(state.lastPromotedAttachmentAs || []).join(
","
)}] ` +
`result=${shouldPromoteAttachment}`
);
const latestTsFromStateVariants = (() => {
try {
const tss: string[] = [];
for (const v of state.variants || []) {
const fn =
typeof (v as any)?.filename === "string"
? String((v as any).filename)
: "";
const m = /^generated-image-(.+)-v\d+\.(png|jpe?g|webp)$/i.exec(fn);
if (m && m[1]) tss.push(m[1]);
}
if (!tss.length) return null;
tss.sort();
return tss[tss.length - 1];
} catch {
return null;
}
})();
// For tool-result turns, rely on the tool result; for non-tool turns, rely on SSOT.
// We do a best-effort SSOT re-read only on non-tool turns (to reflect manual deletions).
let promoLatestTs: string | null = null;
let promoGeneratedUris: string[] = [];
let promoLatestTsSource: "tool" | "ssot" | "state" | "none" = "none";
if (isToolResultTurn) {
promoLatestTs = toolLatestTs;
promoGeneratedUris = toolGeneratedUris;
promoLatestTsSource = promoLatestTs ? "tool" : "none";
} else {
const promotionSnapshot = await readConversationSnapshot(chatWd, debug);
const ssotUris =
extractGeneratedImageFileUrisFromConversation(promotionSnapshot);
promoLatestTs = latestGeneratedTsFromFileUris(ssotUris);
promoGeneratedUris = ssotUris;
promoLatestTsSource = promoLatestTs ? "ssot" : "none";
}
// If SSOT is missing/empty, fall back to state.variants as the source of truth.
// This keeps promotion working in chats where conversation.json is not present.
if (!promoLatestTs && latestTsFromStateVariants) {
promoLatestTs = latestTsFromStateVariants;
promoLatestTsSource = "state";
if (options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] promoLatestTs fallback: source=state variants; ` +
`stateVariantTs=${latestTsFromStateVariants}`
);
}
}
// STALE PROMOTION DETECTION:
// If lastPromotedTs references a variant that no longer exists (user deleted turns),
// reset promotion state so attachments get re-injected.
// This handles the case where user deletes turns but attachments remain unchanged.
if (state.lastPromotedTs && !promoLatestTs) {
// We have a lastPromotedTs but no current variants → context was deleted
if (debug || options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] Stale promotion detected: ` +
`lastPromotedTs=${state.lastPromotedTs} but no variants exist → resetting promotion state`
);
}
state.lastPromotedTs = undefined;
state.lastPromotedAttachmentAs = undefined;
state.lastPromotedAttachmentA = undefined;
state.lastPromotedVariantVs = undefined;
await writeStateAtomic(chatWd, state);
}
// VARIANT SHRINK DETECTION (Regenerate/Delete recovery):
// If attachments were previously promoted AND variants have shrunk (user deleted turns),
// force re-promotion of attachments on this turn. This handles "Regenerate Response"
// where the user deletes the assistant turn but attachments remain unchanged.
const lastVs = Array.isArray(state.lastPromotedVariantVs)
? state.lastPromotedVariantVs
: [];
const currentVariantCount = Array.isArray(state.variants)
? state.variants.length
: 0;
const hadPromotedAttachments =
Array.isArray(state.lastPromotedAttachmentAs) &&
state.lastPromotedAttachmentAs.length > 0;
const variantsShrunk =
lastVs.length > 0 && currentVariantCount < lastVs.length;
const variantsGone = lastVs.length > 0 && currentVariantCount === 0;
if (
hadPromotedAttachments &&
(variantsShrunk || variantsGone) &&
!state.forcePixelPromotionNextTurn
) {
// Variants were deleted but attachments still exist → force re-promotion
if (debug || options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] Variant shrink detected: ` +
`lastPromotedVariantVs=${lastVs.length} current=${currentVariantCount} ` +
`→ forcing attachment re-promotion`
);
}
state.forcePixelPromotionNextTurn = true;
state.forcePixelPromotionSetAt = localTimestamp();
state.forcePixelPromotionReason = "variant-shrink";
// Also reset the stale variant tracking
state.lastPromotedVariantVs = undefined;
await writeStateAtomic(chatWd, state);
try {
const chatId = path.basename(chatWd);
appendPromotionToPluginLog(
`VIP variant-shrink marker set: chatId=${chatId} requestId=${requestId} ` +
`lastVs=${lastVs.length} currentVariants=${currentVariantCount}`
);
} catch {
// best-effort
}
}
// Use new helper functions for cleaner variant promotion logic (analogous to attachments)
// NOTE: getPromotableVariantVs() now uses registry internally, no cap parameter needed
const promotableVs = getPromotableVariantVs(state);
const promotableIs = getPromotableImageIs(state);
const hasPromotableVariants = promotableVs.length > 0;
const hasPromotableImages = promotableIs.length > 0;
const hasPromotableEverythingElse =
hasPromotableVariants || hasPromotableImages;
let shouldPromoteVariants =
promotionMode === "persistent"
? hasPromotableEverythingElse
: shouldPromoteVariantsIdempotent(state);
if (forceVip && hasPromotableEverythingElse) {
shouldPromoteVariants = true;
}
if (options.debugPromotion) {
const decisionLine =
`Decision: turn=${turnKind} ` +
`mode=${promotionMode} ` +
`forceVip=${forceVip} ` +
`forceVipRequested=${forceVipRequested} ` +
`forceVipReason=${forceVipReason ?? "(none)"} ` +
`promoTs=${
promoLatestTs ?? "(none)"
} source=${promoLatestTsSource} ` +
`lastPromotedTs=${state.lastPromotedTs ?? "(none)"} ` +
`shouldPromoteVariants=${shouldPromoteVariants}; ` +
`promotableVs=[${promotableVs.join(",")}] ` +
`lastPromotedVs=[${(state.lastPromotedVariantVs || []).join(",")}] ` +
`promotableIs=[${promotableIs.join(",")}] ` +
`lastPromotedIs=[${(state.lastPromotedImageIs || []).join(",")}] ` +
`promotableNs=[${promotableNs.join(",")}] ` +
`lastPromotedNs=[${(state.lastPromotedAttachmentAs || []).join(
","
)}] ` +
`shouldPromoteAttachment=${shouldPromoteAttachment}; ` +
`promoUris=${promoGeneratedUris.length} stateVariants=${
(state.variants || []).length
} stateAttachments=${(state.attachments || []).length}`;
console.info(`[VisionPromotion][${requestId}] ${decisionLine}`);
// Mirror a concise decision line into the main plugin log so it's visible
// alongside generate_image backend logs.
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP ${decisionLine} chatId=${chatId} requestId=${requestId}`
);
}
// Promotion can be triggered by either a new generation run OR a new attachment.
// reviewVip (review_image) is a one-shot targeted request and must bypass idempotent early-exit.
if (!shouldPromoteVariants && !shouldPromoteAttachment && !reviewVip) {
// Always write/replace a canary file for the CURRENT rolling window.
// This is used as the primary debugging surface for whether the rolling window is stable.
try {
const canary = buildRollingWindowCanaryFromState({
chatWd,
state,
requestId,
promotionMode,
});
await writeVisionContextCanary(chatWd, canary);
} catch (e) {
console.error(
`[VisionPromotion][${requestId}] Failed to write rolling-window canary (skip path):`,
(e as Error).message
);
}
if (options.debugPromotion) {
const why = !promoLatestTs
? "no promoTs (tool/SSOT/state empty) AND attachment already promoted/missing"
: `promoTs=${promoLatestTs} already promoted AND attachment already promoted/missing`;
console.info(
`[VisionPromotion][${requestId}] Skip promotion: turn=${turnKind} source=${promoLatestTsSource} ` +
`promoTs=${promoLatestTs ?? "(none)"}; reason=${why}`
);
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP skip: chatId=${chatId} requestId=${requestId} turn=${turnKind} source=${promoLatestTsSource} ` +
`promoTs=${promoLatestTs ?? "(none)"} reason=${why}`
);
}
} else {
if (options.debugPromotion) {
const reasons: string[] = [];
if (shouldPromoteVariants) reasons.push("variants");
if (shouldPromoteAttachment) reasons.push("attachment");
console.info(
`[VisionPromotion][${requestId}] Promote: turn=${turnKind} willPromote=${
reasons.join("+") || "(none)"
} ` +
`promoTs=${
promoLatestTs ?? "(none)"
} source=${promoLatestTsSource}`
);
}
// If we are promoting variants for a given ts (idempotent mode), ensure state has variants recorded for that ts.
if (
promotionMode === "idempotent" &&
shouldPromoteVariants &&
promoLatestTs &&
promoGeneratedUris.length &&
state.lastVariantsTs !== promoLatestTs
) {
if (options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] Bootstrapping variants for ts=${promoLatestTs} from`,
promoGeneratedUris.length,
"URIs"
);
}
await recordGeneratedVariants(
chatWd,
state,
promoGeneratedUris,
{ maxDim: options.previewMaxDim, quality: options.previewQuality },
undefined,
debug
);
state.lastVariantsTs = promoLatestTs;
await writeStateAtomic(chatWd, state);
}
// Always promote a complete, UI-consistent attachment label list (no limit).
// Base64 preview injection is optional and only happens when visionPromotionPersistent=true.
// No fallbacks: attachment labels must use originalName.
// CRITICAL: Use stable `n` field, not array index
const attachmentLabels: string[] = [];
const variantLabels: string[] = [];
try {
const atts = Array.isArray(state.attachments)
? state.attachments
: [];
for (const a of atts) {
if (!a) continue;
const stableN = typeof a.a === "number" ? a.a : 0;
if (typeof a.originalName !== "string" || !a.originalName.trim()) {
throw new Error(
`[VisionPromotion][${requestId}] Attachment a${stableN} is missing originalName (filename=${String(
a.filename
)}, origin=${String(a.origin)}, originAbs=${String(
a.originAbs
)})`
);
}
attachmentLabels.push(
`Attachment [a${stableN}] ${String(a.originalName)}`
);
}
// Build variant labels for ALL variants (text promotion is unlimited)
const vars = Array.isArray(state.variants) ? state.variants : [];
for (const v of vars) {
if (!v) continue;
const stableV = typeof v.v === "number" ? v.v : 0;
variantLabels.push(`Variant [v${stableV}]`);
}
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
logVisionPromotionFailure({
chatId,
requestId,
promotedAt: localTimestamp(),
mode: promotionMode,
stage: "build_attachment_labels",
message: msg,
});
throw e;
}
// Persistent: always inject all items every turn
// Idempotent: inject only when new attachments or variants detected
// CRITICAL: In idempotent mode, only inject what's actually new:
// - If only variants are new → maxAttachmentItems: 0 (don't distract with old attachments)
// - If only attachments are new → maxVariantItems: 0
// - If both are new or persistent mode → inject both
const shouldInjectPixels =
options.visionPromotionPersistent ||
forceVip ||
reviewVip ||
shouldPromoteAttachment ||
shouldPromoteVariants;
// Determine what to inject based on what changed
const injectAttachmentPixels =
options.visionPromotionPersistent ||
reviewVip ||
(forceVip ? hasPromotableAttachments : shouldPromoteAttachment);
const injectVariantPixels =
options.visionPromotionPersistent ||
reviewVip ||
(forceVip ? hasPromotableEverythingElse : shouldPromoteVariants);
const injectImagePixels = injectVariantPixels;
// Idempotent mode: compute which Ns/Vs are NEW (not yet promoted)
// Persistent mode: undefined = no filter, inject all
const lastPromotedNsSet = new Set(state.lastPromotedAttachmentAs || []);
const lastPromotedVsSet = new Set(state.lastPromotedVariantVs || []);
const lastPromotedIsSet = new Set(state.lastPromotedImageIs || []);
// New attachments = promotable Ns that are NOT in lastPromotedNs
const newAttachmentNs =
options.visionPromotionPersistent || forceVip
? undefined // persistent/forced: no filter
: promotableNs.filter((n) => !lastPromotedNsSet.has(n));
// New variants = promotable Vs that are NOT in lastPromotedVs
const newVariantVs =
options.visionPromotionPersistent || forceVip
? undefined // persistent/forced: no filter
: promotableVs.filter((v) => !lastPromotedVsSet.has(v));
// New images = promotable Is that are NOT in lastPromotedIs
const newImageIs =
options.visionPromotionPersistent || forceVip
? undefined // persistent/forced: no filter
: promotableIs.filter((i) => !lastPromotedIsSet.has(i));
// NOTE: buildPromotionItems uses rolling windows. For reviewVip, we bypass windows
// and inject EXACT requested items (including pictures).
let items: PromotionItem[] = [];
if (shouldInjectPixels) {
if (reviewVip) {
const r: any = (state as any).pendingReviewPromotion;
const targets = r?.targets && typeof r.targets === "object" ? r.targets : {};
const onlyA = Array.isArray((targets as any).a)
? ((targets as any).a as any[])
.filter((x) => typeof x === "number" && Number.isFinite(x))
.map((x) => Math.floor(x))
.filter((x) => x > 0)
: [];
const onlyV = Array.isArray((targets as any).v)
? ((targets as any).v as any[])
.filter((x) => typeof x === "number" && Number.isFinite(x))
.map((x) => Math.floor(x))
.filter((x) => x > 0)
: [];
const onlyI = Array.isArray((targets as any).i)
? ((targets as any).i as any[])
.filter((x) => typeof x === "number" && Number.isFinite(x))
.map((x) => Math.floor(x))
.filter((x) => x > 0)
: [];
const onlyP = Array.isArray((targets as any).p)
? ((targets as any).p as any[])
.filter((x) => typeof x === "number" && Number.isFinite(x))
.map((x) => Math.floor(x))
.filter((x) => x > 0)
: [];
// Attachments: exact
for (const aNum of onlyA) {
const aRec = (state.attachments || []).find(
(x: any) => typeof x?.a === "number" && x.a === aNum
);
if (!aRec) throw new Error(`reviewVip: attachment a${aNum} not found`);
const abs =
typeof (aRec as any).originAbs === "string" &&
String((aRec as any).originAbs).trim()
? String((aRec as any).originAbs)
: "";
const pRel =
typeof (aRec as any).preview === "string" &&
String((aRec as any).preview).trim()
? String((aRec as any).preview)
: "";
const pAbs = pRel ? path.join(chatWd, pRel) : "";
if (!abs) {
throw new Error(
`reviewVip: attachment a${aNum} missing originAbs (origin=${String(
(aRec as any).origin
)})`
);
}
if (!pAbs) {
throw new Error(
`reviewVip: attachment a${aNum} missing preview (originAbs=${String(
(aRec as any).originAbs
)})`
);
}
const originalName =
typeof (aRec as any).originalName === "string" &&
String((aRec as any).originalName).trim()
? String((aRec as any).originalName)
: "(unknown)";
items.push({
abs,
previewAbs: pAbs,
label: `Attachment [a${aNum}] ${originalName}`,
});
}
// Variants: exact
for (const vNum of onlyV) {
const vRec = (state.variants || []).find(
(x: any) => typeof x?.v === "number" && x.v === vNum
);
if (!vRec) throw new Error(`reviewVip: variant v${vNum} not found`);
const fn =
typeof (vRec as any).filename === "string"
? String((vRec as any).filename)
: "";
const pv =
typeof (vRec as any).preview === "string"
? String((vRec as any).preview)
: "";
if (!fn || !pv)
throw new Error(`reviewVip: variant v${vNum} missing filename/preview`);
items.push({
abs: path.join(chatWd, fn),
previewAbs: path.join(chatWd, pv),
label: `Variant [v${vNum}]`,
});
}
// Images: exact
for (const iNum of onlyI) {
const iRec = (state.images || []).find(
(x: any) => typeof x?.i === "number" && x.i === iNum
);
if (!iRec) throw new Error(`reviewVip: image i${iNum} not found`);
const fn =
typeof (iRec as any).filename === "string"
? String((iRec as any).filename)
: "";
const pv =
typeof (iRec as any).preview === "string"
? String((iRec as any).preview)
: "";
if (!fn || !pv)
throw new Error(`reviewVip: image i${iNum} missing filename/preview`);
items.push({
abs: path.join(chatWd, fn),
previewAbs: path.join(chatWd, pv),
label: `Image [i${iNum}]`,
});
}
// Pictures: exact
for (const pNum of onlyP) {
const pRec = (state.pictures || []).find(
(x: any) => typeof x?.p === "number" && x.p === pNum
);
if (!pRec) throw new Error(`reviewVip: picture p${pNum} not found`);
const fn =
typeof (pRec as any).filename === "string"
? String((pRec as any).filename)
: "";
const pv =
typeof (pRec as any).preview === "string"
? String((pRec as any).preview)
: "";
if (!fn || !pv)
throw new Error(`reviewVip: picture p${pNum} missing filename/preview`);
items.push({
abs: path.join(chatWd, fn),
previewAbs: path.join(chatWd, pv),
label: `Picture [p${pNum}]`,
});
}
} else {
items = buildPromotionItems(chatWd, state, {
labels: true,
// Idempotent filter: only inject NEW items (undefined = all in persistent mode)
onlyAttachmentNs: injectAttachmentPixels ? newAttachmentNs : [],
onlyVariantVs: injectVariantPixels ? newVariantVs : [],
onlyImageIs: injectImagePixels ? newImageIs : [],
});
}
}
const itemsRequestedCount = items.length;
// Fail-safe: only inject items whose preview file exists (no silent failures).
try {
const kept: typeof items = [];
for (const it of items) {
const okPreview = await fs.promises
.access(it.previewAbs, fs.constants.F_OK)
.then(() => true)
.catch(() => false);
if (!okPreview) {
if (options.debugPromotion) {
console.warn(
`[VisionPromotion][${requestId}] Missing preview; skipping:`,
it.previewAbs
);
}
continue;
}
kept.push(it);
}
items = kept;
} catch (e) {
console.error(
`[VisionPromotion][${requestId}] Item preview existence check failed:`,
(e as Error).message
);
}
const itemsKeptCount = items.length;
vipItemsKeptCount = itemsKeptCount; // Hoist for outcome verification
const itemsDroppedMissingPreviewCount = Math.max(
0,
itemsRequestedCount - itemsKeptCount
);
// Missing preview is a real error condition for pixel-promotion.
// Previously this was only visible in debug logs; make it explicit via JSONL + plugin log.
if (itemsDroppedMissingPreviewCount > 0) {
try {
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
// Keep message compact to avoid log spam; include a few basenames.
const droppedHint = (() => {
try {
const requested = (items as any[])
.map((it: any) => String(it?.previewAbs || ""))
.filter(Boolean)
.slice(0, 5)
.map((p: string) => path.basename(p));
return requested.length
? ` examples=${requested.join(",")}`
: "";
} catch {
return "";
}
})();
logVisionPromotionFailure({
chatId,
requestId,
promotedAt: localTimestamp(),
mode: promotionMode,
stage: "missing_preview",
message:
`Dropped ${itemsDroppedMissingPreviewCount}/${itemsRequestedCount} pixel items due to missing preview files.` +
droppedHint,
});
} catch {
// best-effort
}
}
// Track what was *actually* injected as pixels for canary/debugging.
// Do NOT overwrite if we injected no image bytes this turn.
let injectedAttachmentNsThisTurn: number[] = [];
let injectedVariantVsThisTurn: number[] = [];
let injectedImageIsThisTurn: number[] = [];
let pixelPromotedAtThisTurn: string | undefined = undefined;
try {
const attachmentByOriginAbs = new Map(
(state.attachments || [])
.filter((a: any) => a && typeof a.originAbs === "string")
.map((a: any) => [String(a.originAbs), a])
);
const attachmentByOrigin = new Map(
(state.attachments || [])
.filter((a: any) => a && typeof a.origin === "string")
.map((a: any) => [String(a.origin), a])
);
const pictureByFilename = new Map(
(state.pictures || []).map((p: any) => [String(p.filename), p])
);
const variantByFilename = new Map(
(state.variants || []).map((v: any) => [String(v.filename), v])
);
const imageByFilename = new Map(
(state.images || []).map((img: any) => [String(img.filename), img])
);
for (const it of items) {
const filename = path.basename(it.abs);
const a =
attachmentByOriginAbs.get(String(it.abs)) ||
attachmentByOrigin.get(filename);
if (a) {
if (typeof a.a === "number")
injectedAttachmentNsThisTurn.push(a.a);
continue;
}
const img = imageByFilename.get(filename);
if (img && typeof img.i === "number") {
injectedImageIsThisTurn.push(img.i);
continue;
}
const v = variantByFilename.get(filename);
if (v && typeof v.v === "number")
injectedVariantVsThisTurn.push(v.v);
}
} catch {
// best-effort
}
injectedAttachmentNsThisTurn = Array.from(
new Set(injectedAttachmentNsThisTurn.filter((x) => x > 0))
).sort((a, b) => a - b);
injectedVariantVsThisTurn = Array.from(
new Set(injectedVariantVsThisTurn.filter((x) => x > 0))
).sort((a, b) => a - b);
injectedImageIsThisTurn = Array.from(
new Set(injectedImageIsThisTurn.filter((x) => x > 0))
).sort((a, b) => a - b);
if (items.length > 0) {
// Capture a single timestamp used both for the canary and persisted state.
pixelPromotedAtThisTurn = localTimestamp();
state.lastPixelPromotedAt = pixelPromotedAtThisTurn;
state.lastPixelPromotedAttachmentAs = injectedAttachmentNsThisTurn;
state.lastPixelPromotedVariantVs = injectedVariantVsThisTurn;
state.lastPixelPromotedImageIs = injectedImageIsThisTurn;
}
// Consume one-shot review request after we attempted to build the items for it.
// This prevents repeated injections across turns.
const reviewVipConsumed = reviewVipRequested && reviewVip;
if (reviewVipConsumed) {
(state as any).pendingReviewPromotion = undefined;
try {
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP review consumed: chatId=${chatId} requestId=${requestId} items=${items.length}`
);
} catch {
// best-effort
}
}
// CRITICAL: Remove labels for attachments already present as base64 items
// (otherwise the last two attachments get labeled twice)
if (items.length > 0) {
// Collect item labels already present
const itemLabels = new Set<string>();
for (const it of items) {
if (it.label) itemLabels.add(it.label);
}
// Filter attachmentLabels
const filtered = attachmentLabels.filter(
(lbl) => !itemLabels.has(lbl)
);
attachmentLabels.length = 0;
attachmentLabels.push(...filtered);
}
if (options.debugPromotion) {
const allAttachments = Array.isArray(state.attachments)
? state.attachments
: [];
const previewableAttachments = allAttachments.filter(
(a: any) =>
typeof a?.preview === "string" && String(a.preview).trim()
);
const attachmentByOriginAbs = new Map(
allAttachments
.filter((a: any) => a && typeof a.originAbs === "string")
.map((a: any) => [String(a.originAbs), a])
);
const attachmentByOrigin = new Map(
allAttachments
.filter((a: any) => a && typeof a.origin === "string")
.map((a: any) => [String(a.origin), a])
);
let injectedAttachmentCount = 0;
let injectedVariantCount = 0;
for (const it of items) {
const base = path.basename(it.abs);
const a =
attachmentByOriginAbs.get(String(it.abs)) ||
attachmentByOrigin.get(base);
if (a) injectedAttachmentCount++;
else injectedVariantCount++;
}
console.info(
`[VisionPromotion][${requestId}] Attachments: total=${allAttachments.length} ` +
`previewable=${previewableAttachments.length} (base64 limit=${
options.visionPromotionPersistent ? 2 : 0
})`
);
console.info(
`[VisionPromotion][${requestId}] Injected as pixels: attachments=${injectedAttachmentCount} variants=${injectedVariantCount} items=${items.length}`
);
console.info(
`[VisionPromotion][${requestId}] buildPromotionItems returned items:`,
items.map((i) => i.label || path.basename(i.previewAbs)).join(", ")
);
}
if (attachmentLabels.length || variantLabels.length || items.length) {
if (options.debugPromotion) {
try {
let totalBytes = 0;
const perItem: Array<{ label: string; bytes: number }> = [];
for (const it of items) {
try {
const st = await fs.promises.stat(it.previewAbs);
const b = typeof st.size === "number" ? st.size : 0;
totalBytes += b;
perItem.push({
label: it.label || path.basename(it.previewAbs),
bytes: b,
});
} catch (e) {
const msg = (e as Error).message;
perItem.push({
label: it.label || path.basename(it.previewAbs),
bytes: 0,
});
console.warn(
`[VisionPromotion][${requestId}] Failed to stat preview for size logging:`,
it.previewAbs,
msg
);
}
}
// base64 expands roughly 4/3.
const estBase64Bytes = Math.ceil(totalBytes / 3) * 4;
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP payload sizes: chatId=${chatId} requestId=${requestId} items=${items.length} ` +
`previews_bytes=${totalBytes} est_base64_bytes=${estBase64Bytes} ` +
`per_item=${perItem
.map((p) => `${p.label}:${p.bytes}`)
.join("|")}`
);
} catch (e) {
console.error(
`[VisionPromotion][${requestId}] Failed to compute payload sizes:`,
(e as Error).message
);
}
}
// Always log promotion status (not gated by debugPromotion)
try {
const chatId = chatWd ? path.basename(chatWd) : "(unknown)";
const itemLabelsDebug = items
.map((it) => it.label || "(no label)")
.join(", ");
appendPromotionToPluginLog(
`VIP build: chatId=${chatId} requestId=${requestId} ` +
`attachmentLabels=${attachmentLabels.length} variantLabels=${variantLabels.length} items=${items.length} ` +
`itemLabels=[${itemLabelsDebug}] ` +
`visionPromotionPersistent=${
options.visionPromotionPersistent
} forceVip=${forceVip} reason=${forceVipReason ?? "(none)"}`
);
} catch {}
// Inject a synthetic user message right after the last tool result.
const contentParts: any[] = [];
// Text labels for ALL attachments (unlimited)
if (attachmentLabels.length) {
contentParts.push({
type: "text",
text: attachmentLabels.join("\n"),
});
}
// Text labels for ALL variants (unlimited)
if (variantLabels.length) {
contentParts.push({
type: "text",
text: variantLabels.join("\n"),
});
}
// Pixel injection for selected items only (limited by maxAttachmentItems/maxVariantItems)
if (items.length) {
const imageParts = await toOpenAIPromptParts(items);
contentParts.push(...imageParts);
contentParts.push({
type: "text",
text:
"Use the labeled images above as visual context for this response. " +
"If you cannot see/access the images, explicitly say so and do not invent visual details.",
});
} else {
contentParts.push({
type: "text",
text:
"The attachment list above is the current context for this response. " +
"No image bytes were injected for this turn.",
});
}
const promotedMessage: ChatCompletionMessageParam = {
role: "user",
content: contentParts,
} as any;
let insertAt = messages.length;
for (let i = messages.length - 1; i >= 0; i--) {
if ((messages[i] as any).role === "tool") {
insertAt = i + 1;
break;
}
}
messages.splice(insertAt, 0, promotedMessage);
// Write/replace a plugin-interoperable canary file so external monitors can
// see the CURRENT rolling window (stable list), plus which items were most recently
// promoted visually.
try {
const canary = buildRollingWindowCanaryFromState({
chatWd,
state,
requestId,
promotionMode,
});
await writeVisionContextCanary(chatWd, canary);
} catch (e) {
console.error(
`[VisionPromotion][${requestId}] Failed to assemble canary context:`,
(e as Error).message
);
}
// Always write a structured, separate log entry for:
// - all current attachment labels (text promotion), and
// - which variants actually went in as pixels (if enabled).
// This is intentionally not gated behind debugPromotion.
try {
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
const promotedAt = localTimestamp();
const attachmentByOriginAbs = new Map(
(state.attachments || [])
.filter((a: any) => a && typeof a.originAbs === "string")
.map((a: any) => [String(a.originAbs), a])
);
const attachmentByOrigin = new Map(
(state.attachments || [])
.filter((a: any) => a && typeof a.origin === "string")
.map((a: any) => [String(a.origin), a])
);
const variantByFilename = new Map(
(state.variants || []).map((v: any) => [String(v.filename), v])
);
const pictureByFilename = new Map(
(state.pictures || []).map((p: any) => [String(p.filename), p])
);
const imageByFilename = new Map(
(state.images || []).map((img: any) => [
String(img.filename),
img,
])
);
const injectedAttachmentsByOriginAbs = new Set<string>();
const injectedAttachmentsByOrigin = new Set<string>();
const injectedVariantsByFilename = new Set<string>();
const injectedPicturesByFilename = new Set<string>();
const injectedImagesByFilename = new Set<string>();
const injectedAttachments: NonNullable<
Extract<
VisionPromotionJsonlEntry,
{ type: "turn" }
>["injected"]["attachments"]
> = [];
const injectedPictures: NonNullable<
Extract<
VisionPromotionJsonlEntry,
{ type: "turn" }
>["injected"]["pictures"]
> = [];
const injectedVariants: NonNullable<
Extract<
VisionPromotionJsonlEntry,
{ type: "turn" }
>["injected"]["variants"]
> = [];
const injectedImages: NonNullable<
Extract<
VisionPromotionJsonlEntry,
{ type: "turn" }
>["injected"]["images"]
> = [];
for (const it of items) {
const filename = path.basename(it.abs);
const a =
attachmentByOriginAbs.get(String(it.abs)) ||
attachmentByOrigin.get(filename);
if (a) {
if (typeof a.originAbs === "string") {
injectedAttachmentsByOriginAbs.add(String(a.originAbs));
}
if (typeof a.origin === "string") {
injectedAttachmentsByOrigin.add(String(a.origin));
}
injectedAttachments.push({
a: typeof a.a === "number" ? a.a : undefined,
filename:
typeof a.filename === "string" && a.filename
? String(a.filename)
: filename,
origin: typeof a.origin === "string" ? a.origin : undefined,
originAbs:
typeof a.originAbs === "string" ? a.originAbs : undefined,
originalName:
typeof a.originalName === "string"
? a.originalName
: undefined,
preview:
typeof a.preview === "string" ? a.preview : undefined,
previewAbs: it.previewAbs,
});
continue;
}
// Canvas picture promotion (pN) is injected before variants; log separately.
const pic = pictureByFilename.get(filename);
if (pic && typeof pic.filename === "string") {
injectedPicturesByFilename.add(String(pic.filename));
injectedPictures.push({
filename: String(pic.filename),
originAbs: it.abs,
sourceTool:
typeof pic?.sourceTool === "string"
? String(pic.sourceTool)
: undefined,
sourceUrl:
typeof pic?.sourceUrl === "string"
? String(pic.sourceUrl)
: undefined,
preview:
typeof pic?.preview === "string"
? String(pic.preview)
: path.basename(it.previewAbs),
previewAbs: it.previewAbs,
p: typeof pic?.p === "number" ? pic.p : undefined,
});
continue;
}
const img = imageByFilename.get(filename);
if (img && typeof img.filename === "string") {
injectedImagesByFilename.add(String(img.filename));
injectedImages.push({
filename: String(img.filename),
originAbs: it.abs,
sourceTool:
typeof img?.sourceTool === "string"
? String(img.sourceTool)
: undefined,
preview:
typeof img?.preview === "string"
? String(img.preview)
: path.basename(it.previewAbs),
previewAbs: it.previewAbs,
i: typeof img?.i === "number" ? img.i : undefined,
});
continue;
}
const v = variantByFilename.get(filename);
if (v && typeof v.filename === "string") {
injectedVariantsByFilename.add(String(v.filename));
} else {
injectedVariantsByFilename.add(filename);
}
injectedVariants.push({
filename:
typeof v?.filename === "string"
? String(v.filename)
: filename,
originAbs: it.abs,
sourceTool:
typeof v?.sourceTool === "string"
? String(v.sourceTool)
: undefined,
sourceUrl:
typeof v?.sourceUrl === "string"
? String(v.sourceUrl)
: undefined,
preview:
typeof v?.preview === "string"
? String(v.preview)
: path.basename(it.previewAbs),
previewAbs: it.previewAbs,
v: typeof v?.v === "number" ? v.v : undefined,
});
}
const entries: VisionPromotionJsonlEntry[] = [];
// Meta entry for parsing/diagnostics: describes exactly what happened this turn.
const forceVipConsumed = forceVip && items.length > 0;
// NOTE: Images are part of unified "Everything Else" window with variants.
// For M1 we always set shouldPromoteImages = shouldPromoteVariants since they share the same window.
const shouldPromoteImages = shouldPromoteVariants;
// imageLabels is empty for M1 (no label array populated yet)
const imageLabels: string[] = [];
entries.push({
type: "turn",
chatId,
requestId,
promotedAt,
mode: promotionMode,
shouldInjectPixels,
triggers: {
visionPromotionPersistent: options.visionPromotionPersistent,
forceVip,
forceVipRequested,
forceVipReason,
forceVipStaleCleared,
forceVipConsumed,
reviewVip,
reviewVipRequested,
reviewVipReason,
reviewVipStaleCleared,
reviewVipConsumed,
shouldPromoteAttachment,
shouldPromoteVariants,
shouldPromoteImages,
},
counts: {
attachmentLabels: attachmentLabels.length,
variantLabels: variantLabels.length,
imageLabels: imageLabels.length,
itemsRequested: itemsRequestedCount,
itemsKept: itemsKeptCount,
itemsDroppedMissingPreview: itemsDroppedMissingPreviewCount,
injectedAttachments: injectedAttachments.length,
injectedPictures: injectedPictures.length,
injectedVariants: injectedVariants.length,
injectedImages: injectedImages.length,
},
injected: {
attachments: injectedAttachments,
pictures: injectedPictures,
variants: injectedVariants,
images: injectedImages,
},
});
// 1) Always log ALL current attachments (labels are always promoted).
try {
const atts = Array.isArray(state.attachments)
? state.attachments
: [];
for (const a of atts as any[]) {
if (!a) continue;
const pv =
typeof a.preview === "string" ? String(a.preview) : "";
const keyOriginAbs =
typeof a.originAbs === "string" && a.originAbs
? String(a.originAbs)
: undefined;
const keyOrigin =
typeof a.origin === "string" && a.origin
? String(a.origin)
: undefined;
const pixelPromoted =
(keyOriginAbs
? injectedAttachmentsByOriginAbs.has(keyOriginAbs)
: false) ||
(keyOrigin
? injectedAttachmentsByOrigin.has(keyOrigin)
: false);
entries.push({
type: "attachment",
filename:
typeof a.filename === "string" && a.filename
? String(a.filename)
: typeof a.origin === "string" && a.origin
? String(a.origin)
: "(unknown)",
origin: typeof a.origin === "string" ? a.origin : undefined,
originAbs:
typeof a.originAbs === "string" ? a.originAbs : undefined,
originalName:
typeof a.originalName === "string"
? a.originalName
: undefined,
preview: pv || undefined,
previewAbs: pv ? path.join(chatWd, pv) : undefined,
createdAt:
typeof a.createdAt === "string" ? a.createdAt : promotedAt,
a: typeof a.a === "number" ? a.a : undefined,
chatId,
requestId,
promotedAt,
mode: promotionMode,
pixelPromoted,
});
}
} catch {}
// 2) Always log ACTIVE rolling-window variants (not all historical variants),
// with pixelPromoted flags. Also log images in the same unified window.
try {
type PoolItem = {
kind: "variant" | "image";
index: number;
createdAt: string;
record: any;
};
const pool: PoolItem[] = [];
const vars = Array.isArray(state.variants) ? state.variants : [];
const imgs = Array.isArray(state.images) ? state.images : [];
for (const v of vars as any[]) {
if (!v) continue;
if ((v as any).kind === "tool_result") continue;
if (!v.preview) continue;
pool.push({
kind: "variant",
index: typeof v.v === "number" ? v.v : 0,
createdAt: typeof v.createdAt === "string" ? v.createdAt : "",
record: v,
});
}
for (const img of imgs as any[]) {
if (!img) continue;
if (!img.preview) continue;
pool.push({
kind: "image",
index: typeof img.i === "number" ? img.i : 0,
createdAt:
typeof img.createdAt === "string" ? img.createdAt : "",
record: img,
});
}
pool.sort((a, b) => {
const cmp = a.createdAt.localeCompare(b.createdAt);
if (cmp !== 0) return cmp;
if (a.kind !== b.kind) return a.kind === "variant" ? -1 : 1;
return a.index - b.index;
});
const cap = getEverythingElseWindowSize();
const windowItems = cap > 0 ? pool.slice(-cap) : [];
for (const item of windowItems) {
const rec = item.record;
const fn =
typeof rec?.filename === "string" && rec.filename
? String(rec.filename)
: "(unknown)";
const pv =
typeof rec?.preview === "string" ? String(rec.preview) : "";
if (item.kind === "variant") {
const pixelPromoted = injectedVariantsByFilename.has(fn);
entries.push({
type: "variant",
filename: fn,
originAbs: path.join(chatWd, fn),
preview: pv || "(missing-preview)",
previewAbs: pv
? path.join(chatWd, pv)
: "(missing-preview)",
createdAt:
typeof rec.createdAt === "string"
? rec.createdAt
: promotedAt,
v: typeof rec.v === "number" ? rec.v : undefined,
sourceTool:
typeof rec?.sourceTool === "string"
? String(rec.sourceTool)
: undefined,
sourceUrl:
typeof rec?.sourceUrl === "string"
? String(rec.sourceUrl)
: undefined,
chatId,
requestId,
promotedAt,
mode: promotionMode,
pixelPromoted,
});
} else {
const pixelPromoted = injectedImagesByFilename.has(fn);
entries.push({
type: "image",
filename: fn,
originAbs: path.join(chatWd, fn),
preview: pv || "(missing-preview)",
previewAbs: pv
? path.join(chatWd, pv)
: "(missing-preview)",
createdAt:
typeof rec.createdAt === "string"
? rec.createdAt
: promotedAt,
i: typeof rec.i === "number" ? rec.i : undefined,
sourceTool:
typeof rec?.sourceTool === "string"
? String(rec.sourceTool)
: undefined,
chatId,
requestId,
promotedAt,
mode: promotionMode,
pixelPromoted,
});
}
}
} catch {}
// 3) Log injected canvas picture(s) (if any) explicitly.
try {
for (const p of injectedPictures) {
const fn =
typeof p.filename === "string" ? p.filename : "(unknown)";
const pv = typeof p.preview === "string" ? p.preview : "";
entries.push({
type: "picture",
filename: fn,
originAbs:
typeof p.originAbs === "string"
? p.originAbs
: path.join(chatWd, fn),
preview: pv || "(missing-preview)",
previewAbs:
typeof p.previewAbs === "string"
? p.previewAbs
: pv
? path.join(chatWd, pv)
: "(missing-preview)",
createdAt: promotedAt,
p: typeof p.p === "number" ? p.p : undefined,
sourceTool:
typeof p.sourceTool === "string" ? p.sourceTool : undefined,
sourceUrl:
typeof p.sourceUrl === "string" ? p.sourceUrl : undefined,
chatId,
requestId,
promotedAt,
mode: promotionMode,
pixelPromoted: true,
});
}
} catch {}
appendVisionPromotionJsonl(entries);
} catch (e) {
console.error(
`[VisionPromotion][${requestId}] Failed to write structured promotion JSONL:`,
(e as Error).message
);
}
// Mark promotion done.
if (promoLatestTs) {
state.lastPromotedTs = promoLatestTs;
}
// Abort recovery: clear the one-shot force flag only after we actually injected pixels.
// If we couldn't inject (e.g. missing preview files), keep it until TTL expires.
if (forceVip && items.length > 0) {
state.forcePixelPromotionNextTurn = undefined;
state.forcePixelPromotionSetAt = undefined;
state.forcePixelPromotionReason = undefined;
try {
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP force consumed: chatId=${chatId} requestId=${requestId}`
);
} catch {
// best-effort
}
}
// Update lastPromoted* ONLY when we actually injected pixel bytes for that category.
// Otherwise, idempotent mode can get "stuck": we mark a window as promoted even though
// the model received no images (e.g., missing preview), and then we never try again.
//
// Important: we still store the FULL promotable window (e.g., [4,5,6]) rather than only
// the new items (e.g., [6]), because shouldPromote* compares the rolling window sets.
if (
injectAttachmentPixels &&
!reviewVipConsumed &&
promotableNs.length > 0 &&
injectedAttachmentNsThisTurn.length > 0
) {
state.lastPromotedAttachmentAs = promotableNs;
state.lastPromotedAttachmentA = Math.max(...promotableNs);
}
if (
injectVariantPixels &&
!reviewVipConsumed &&
promotableVs.length > 0 &&
injectedVariantVsThisTurn.length > 0
) {
state.lastPromotedVariantVs = promotableVs;
}
if (
injectImagePixels &&
!reviewVipConsumed &&
promotableIs.length > 0 &&
injectedImageIsThisTurn.length > 0
) {
state.lastPromotedImageIs = promotableIs;
}
// Persist the most recent *visual* (pixel/base64) promotion selection for canary/debugging.
if (items.length > 0) {
state.lastPixelPromotedAt =
pixelPromotedAtThisTurn ||
state.lastPixelPromotedAt ||
localTimestamp();
state.lastPixelPromotedAttachmentAs = injectedAttachmentNsThisTurn;
state.lastPixelPromotedVariantVs = injectedVariantVsThisTurn;
state.lastPixelPromotedImageIs = injectedImageIsThisTurn;
}
// Record current SDK history size for stable delete/regenerate detection.
state.lastHistoryMessageCount = historyMessageCount;
state.lastHistoryMessageCountAt = localTimestamp();
await writeStateAtomic(chatWd, state);
if (options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] Injected pixel-first promotion message at index=${insertAt}; ` +
`mode=${promotionMode}; promotedTs=${
promoLatestTs || "(none)"
}; promotedAttachmentNs=[${promotableNs.join(
","
)}]; promotedVariantVs=[${promotableVs.join(",")}]`
);
try {
const summary = summarizeContentPartsForDebug(
(promotedMessage as any).content
);
console.info(
`[VisionPromotion][${requestId}] Injected VIP message parts (redacted): ${summary}`
);
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
appendPromotionToPluginLog(
`VIP injected parts (redacted): chatId=${chatId} requestId=${requestId} ${summary}`
);
} catch (e) {
console.error(
`[VisionPromotion][${requestId}] Failed to summarize injected VIP message parts:`,
(e as Error).message
);
}
// Also log successful promotions to the main plugin log for easier correlation
// with generate_image runs and backend logs.
const chatId = (() => {
try {
return chatWd ? path.basename(chatWd) : "(unknown)";
} catch {
return "(unknown)";
}
})();
const labels = items
.map((i) => i.label || path.basename(i.previewAbs))
.join(", ");
appendPromotionToPluginLog(
`VIP injected: chatId=${chatId} requestId=${requestId} turn=${turnKind} ` +
`mode=${promotionMode} promotedTs=${
promoLatestTs || "(none)"
} promotedAttachmentNs=[${promotableNs.join(",")}] ` +
`items=${items.length}${
labels ? ` (${labels})` : ""
} insertAt=${insertAt}`
);
}
}
}
}
} catch (e) {
console.error(
`[VisionPromotion][${requestId}] Promotion assembly error:`,
(e as Error).message
);
}
// ─── Sequence Review injection (review_sequence) ─────────────────────────────
// Completely independent of the VP subsystem. Uses its own state key.
// Track how many image_url parts the sequence block injects so outcome
// verification can exclude them from the VP count check.
let seqFramesInjectedCount = 0;
if (workingDir) {
try {
const state = await readState(workingDir);
const pendingSeq = (state as any).pendingSequenceReview;
if (pendingSeq && typeof pendingSeq === "object" && capabilities.supportsVision) {
const seqTtlMs =
typeof pendingSeq.ttlMs === "number" && pendingSeq.ttlMs > 0
? pendingSeq.ttlMs
: 5 * 60 * 1000;
const requestedAt =
typeof pendingSeq.requestedAt === "string" ? pendingSeq.requestedAt : "";
const ageMs = requestedAt ? Date.now() - Date.parse(requestedAt) : 0;
const seqValid = !Number.isFinite(ageMs) || ageMs <= seqTtlMs;
if (!seqValid) {
(state as any).pendingSequenceReview = undefined;
await writeStateAtomic(workingDir, state);
} else {
const movAbs =
typeof pendingSeq.movAbs === "string" ? pendingSeq.movAbs : "";
const variant =
typeof pendingSeq.variant === "number" ? pendingSeq.variant : 0;
const variantLabel =
typeof pendingSeq.variantLabel === "string" && pendingSeq.variantLabel
? pendingSeq.variantLabel
: `v${variant}`;
const fps =
typeof pendingSeq.fps === "number" && pendingSeq.fps > 0
? pendingSeq.fps
: 2;
if (movAbs && variant > 0) {
const seqFrames = await extractSequenceFrames(movAbs, fps, {
maxDim: options.previewMaxDim,
quality: options.previewQuality,
});
if (seqFrames.length > 0) {
const contentParts: any[] = [];
for (const frame of seqFrames) {
const b64 = frame.jpegBuf.toString("base64");
contentParts.push({
type: "image_url",
image_url: { url: `data:image/jpeg;base64,${b64}` },
});
contentParts.push({
type: "text",
text: `Variant ${variantLabel} \u2013 Frame ${frame.frameIndex} \u2013 ${frame.timecodeStr}`,
});
}
const seqMessage: ChatCompletionMessageParam = {
role: "user",
content: contentParts,
} as any;
let insertAt = messages.length;
for (let i = messages.length - 1; i >= 0; i--) {
if ((messages[i] as any).role === "tool") {
insertAt = i + 1;
break;
}
}
messages.splice(insertAt, 0, seqMessage);
seqFramesInjectedCount = seqFrames.length;
}
}
// Consume one-shot sequence review request.
(state as any).pendingSequenceReview = undefined;
await writeStateAtomic(workingDir, state);
}
}
} catch (e) {
console.error(
`[SequenceReview][${requestId}] Injection error:`,
(e as Error).message,
);
}
}
if (options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] Outgoing request summary: ${summarizeOpenAIMessagesForDebug(
messages
)}`
);
try {
const chatId = workingDir ? path.basename(workingDir) : "(unknown)";
const imageUrlCount = countOutgoingImageUrlParts(messages);
if (imageUrlCount > 0) {
appendPromotionToPluginLog(
`VIP request has pixels: chatId=${chatId} requestId=${requestId} image_url_parts=${imageUrlCount} messages=${messages.length}`
);
} else {
appendPromotionToPluginLog(
`VIP request has NO pixels: chatId=${chatId} requestId=${requestId} image_url_parts=0 messages=${messages.length}`
);
}
} catch (e) {
console.error(
"[VisionPromotion] Failed to summarize outgoing pixel parts:",
(e as Error).message
);
}
}
/* 4. Consolidate consecutive same-role messages to avoid Jinja template errors */
// Some model templates require strict user/assistant alternation.
// This can break when users delete messages from history.
const consolidatedMessages = consolidateConsecutiveRoles(messages);
/* 4.5. Outcome verification: measure actual image_url parts in final request */
// This MUST happen AFTER consolidation to catch any transformations that might drop parts.
try {
const imageUrlParts = countOutgoingImageUrlParts(consolidatedMessages);
// Exclude pre-existing image_url parts (tool results, user attachments) and
// sequence-review frames — neither is tracked by vipItemsKeptCount.
const imageUrlPartsVpOnly = Math.max(0, imageUrlParts - preExistingImageUrlParts - seqFramesInjectedCount);
const verified = imageUrlPartsVpOnly === vipItemsKeptCount;
const error: "pixels_missing" | "pixels_excess" | null =
imageUrlPartsVpOnly < vipItemsKeptCount
? "pixels_missing"
: imageUrlPartsVpOnly > vipItemsKeptCount
? "pixels_excess"
: null;
const chatId = workingDir ? path.basename(workingDir) : "(unknown)";
const promotedAt = localTimestamp();
// Append outcome entry to JSONL for verification
appendVisionPromotionJsonl([
{
type: "outcome",
chatId,
requestId,
promotedAt,
mode: options.visionPromotionPersistent ? "persistent" : "idempotent",
itemsKept: vipItemsKeptCount,
...(seqFramesInjectedCount > 0 ? { seqFramesInjected: seqFramesInjectedCount } : {}),
outcome: {
imageUrlParts,
preExistingImageUrlParts,
...(seqFramesInjectedCount > 0 ? { seqFramesInjected: seqFramesInjectedCount } : {}),
imageUrlPartsVpOnly,
verified,
error,
},
},
]);
// Log verification result
if (!verified) {
console.warn(
`[VisionPromotion][${requestId}] Outcome verification FAILED: ` +
`itemsKept=${vipItemsKeptCount} imageUrlParts=${imageUrlParts} vpOnly=${imageUrlPartsVpOnly} preExisting=${preExistingImageUrlParts} seqFrames=${seqFramesInjectedCount} error=${error}`
);
appendPromotionToPluginLog(
`VIP outcome MISMATCH: chatId=${chatId} requestId=${requestId} ` +
`itemsKept=${vipItemsKeptCount} imageUrlParts=${imageUrlParts} vpOnly=${imageUrlPartsVpOnly} preExisting=${preExistingImageUrlParts} error=${error}`
);
} else if (options.debugPromotion) {
console.info(
`[VisionPromotion][${requestId}] Outcome verification OK: ` +
`itemsKept=${vipItemsKeptCount} imageUrlParts=${imageUrlParts}`
);
}
} catch (e) {
console.error(
"[VisionPromotion] Outcome verification failed:",
(e as Error).message
);
}
/* 5. Kick off streaming completion */
// Qwen3.5 requires explicit `enable_thinking: true` in chat_template_kwargs
// to activate reasoning via its Jinja template. Unlike Qwen3, it does not
// support the /think soft-switch. See: capabilities.ts → requiresEnableThinkingFlag
const chatTemplateKwargs =
capabilities.supportsThinking &&
capabilities.thinking?.requiresEnableThinkingFlag
? { enable_thinking: true }
: undefined;
const stream = await openai.chat.completions.create({
model: model,
messages: consolidatedMessages,
tools,
stream: true,
...(chatTemplateKwargs ? { chat_template_kwargs: chatTemplateKwargs } : {}),
} as any);
/* 6. Abort wiring & stream processing */
wireAbort(ctl, stream as any);
// Always parse the default <think>...</think> tags so free-text model IDs
// still get robust thinking support even when /api/v1/models does not expose
// a reasoning capability flag.
// Only override markers when the model is explicitly configured in
// src/capabilities.ts.
const reasoningSectionParsing =
capabilities.supportsThinking &&
capabilities.thinking?.reasoningSectionParsing?.enabled
? capabilities.thinking.reasoningSectionParsing
: {
enabled: true,
startString: "<think>",
endString: "</think>",
};
let drawThingsIndexSnapshotPayload: string | undefined;
if (!globalConfig.get("PREVIEW_IN_CHAT")) {
try {
const { buildDtcModelMappingSnapshot } = await import(
"./services/modelMappingSnapshot.js"
);
const snapshot = await buildDtcModelMappingSnapshot();
drawThingsIndexSnapshotPayload = JSON.stringify(snapshot);
} catch {
drawThingsIndexSnapshotPayload = undefined;
}
}
await consumeStream(
stream as any,
ctl,
nameMaps,
options,
reasoningSectionParsing,
{
// Hard requirement:
// PREVIEW_IN_CHAT=false -> include snapshot payload (enables rewrite/hints)
// PREVIEW_IN_CHAT=true -> plain query only
enrichDrawThingsIndex: !globalConfig.get("PREVIEW_IN_CHAT"),
drawThingsIndexSnapshotPayload,
}
);
}