Project Files
src / core / tools.ts
/*
* Core tool handlers for LM Studio Plugin (transport-agnostic)
* This file extracts the logic from LM Studio Plugin tool handlers in src/index.ts
* with minimal changes to keep behavior identical.
*/
import path from "path";
import fs from "fs";
import os from "os";
import net from "net";
import { fileURLToPath, pathToFileURL } from "url";
import axios from "axios";
import FormData from "form-data";
import {
getSize as imgGetSize,
toPng as imgToPng,
resizeAndEncode as imgResizeAndEncode,
resizeInsideToPng as imgResizeInsideToPng,
resizeCoverToPng as imgResizeCoverToPng,
GenerateToolParamsShapeMinimal,
GenerateToolParamsSchemaMinimalStrict,
formatZodError,
ImageGenerationResult,
generateRuntimeDefaults,
engineConnectionDefaults,
getEngineConnectionDefaults,
drawthingsLimits,
drawthingsEditLimits,
previewDefaults,
checkDrawThingsGrpcFilesExist,
findLMStudioHome,
getLMStudioWorkingDir,
resolveActiveLMStudioChatId,
getActiveChatContext,
readState,
writeStateAtomic,
generatePreviewFromBuffer,
resolveImg2ImgSourceLMStudio,
buildAuditLogger,
getHealthyServerBaseUrl,
toHttpOriginalUrl,
toHttpPreviewUrl,
getLogsDir,
ensureLogsDir,
getSelfPluginIdentifier,
VARIANT_FULL_CONFIG,
getAudioSampleRateForModel,
} from "../core-bundle.mjs";
import { DrawThingsService } from "../services/drawThingsService.js";
import { ImageBackend, ProgressCallback } from "../services/imageBackend.js";
import { getModelFilename } from "../services/modelOverlays.js";
// Global debug toggle
const DEBUG_MODE: boolean = true;
ensureLogsDir();
const logsDir: string = getLogsDir();
try {
const line = `${localTimestamp()} - paths: logsDir=${logsDir}\n`;
fs.appendFileSync(path.join(logsDir, "generate-image-plugin.log"), line);
} catch {}
const logFile: string = path.join(logsDir, "generate-image-plugin.log");
// LM Studio only – no client resolver switching
function resolvePreferredLocale(): string | undefined {
const envPref = process.env.LOG_LOCALE;
const lc =
envPref || process.env.LC_ALL || process.env.LC_TIME || process.env.LANG;
if (!lc) return undefined;
const cleaned = String(lc).split(".")[0].replace(/_/g, "-");
return cleaned || undefined;
}
function localTimestamp(): string {
const opts: Intl.DateTimeFormatOptions = {
year: "numeric",
month: "2-digit",
day: "2-digit",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
hour12: false,
timeZoneName: "short",
};
const loc = resolvePreferredLocale();
try {
return new Date().toLocaleString(loc as any, opts as any);
} catch {
const d = new Date();
const day = String(d.getDate()).padStart(2, "0");
const month = String(d.getMonth() + 1).padStart(2, "0");
const year = d.getFullYear();
const hh = String(d.getHours()).padStart(2, "0");
const mm = String(d.getMinutes()).padStart(2, "0");
const ss = String(d.getSeconds()).padStart(2, "0");
return `${day}/${month}/${year} ${hh}:${mm}:${ss}`;
}
}
function log(message: string): void {
const timestamp = localTimestamp();
const line = `${timestamp} - ${message}\n`;
try {
const dir = path.dirname(logFile);
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
fs.appendFileSync(logFile, line);
} catch {}
console.log(line.trim());
}
async function logError(error: unknown): Promise<void> {
try {
const errorLogFile = path.join(logsDir, "error.log");
await fs.promises.mkdir(logsDir, { recursive: true }).catch(() => {});
const timestamp = localTimestamp();
const details =
error instanceof Error
? `${error.message}\n${error.stack}`
: String(error);
const block = `${timestamp} - ERROR:\n${details}\n\n`;
await fs.promises.appendFile(errorLogFile, block);
if (DEBUG_MODE) console.error(block);
} catch {}
}
async function appendErrorRaw(raw: string, status?: number): Promise<void> {
try {
const errorLogFile = path.join(logsDir, "error.log");
await fs.promises.mkdir(logsDir, { recursive: true }).catch(() => {});
const timestamp = localTimestamp();
const header =
typeof status === "number"
? `BACKEND RAW (status ${status})`
: "BACKEND RAW";
const block = `${timestamp} - ${header}:\n${raw}\n\n`;
await fs.promises.appendFile(errorLogFile, block);
} catch {}
}
function isoStamp(): string {
return new Date().toISOString().replace(/[:.]/g, "-");
}
// Compact timestamp for filenames, e.g. 20251115T232635722Z
function isoStampCompact(): string {
const d = new Date();
const year = d.getUTCFullYear();
const month = String(d.getUTCMonth() + 1).padStart(2, "0");
const day = String(d.getUTCDate()).padStart(2, "0");
const hours = String(d.getUTCHours()).padStart(2, "0");
const minutes = String(d.getUTCMinutes()).padStart(2, "0");
const seconds = String(d.getUTCSeconds()).padStart(2, "0");
const millis = String(d.getUTCMilliseconds()).padStart(3, "0");
return `${year}${month}${day}T${hours}${minutes}${seconds}${millis}Z`;
}
function encodeFileUrl(abs: string): string {
return pathToFileURL(abs).toString();
}
function stripInternalToolKeys<
T extends Record<string, any> | null | undefined
>(obj: T): Record<string, any> {
if (!obj || typeof obj !== "object") return {};
const out: Record<string, any> = {};
for (const [k, v] of Object.entries(obj)) {
// Prevent user injection of internal/legacy knobs (e.g. _dt_*, _i2i_*).
if (k.startsWith("_")) continue;
out[k] = v;
}
return out;
}
function isSupportedImageBuffer(buf: Buffer): boolean {
if (!buf || buf.length < 12) return false;
if (
buf[0] === 0x89 &&
buf[1] === 0x50 &&
buf[2] === 0x4e &&
buf[3] === 0x47 &&
buf[4] === 0x0d &&
buf[5] === 0x0a &&
buf[6] === 0x1a &&
buf[7] === 0x0a
)
return true; // PNG
if (buf[0] === 0xff && buf[1] === 0xd8) return true; // JPEG
if (
buf[0] === 0x52 &&
buf[1] === 0x49 &&
buf[2] === 0x46 &&
buf[3] === 0x46 &&
buf[8] === 0x57 &&
buf[9] === 0x45 &&
buf[10] === 0x42 &&
buf[11] === 0x50
)
return true; // WEBP
return false;
}
function isPng(buf: Buffer): boolean {
return (
buf &&
buf.length >= 8 &&
buf[0] === 0x89 &&
buf[1] === 0x50 &&
buf[2] === 0x4e &&
buf[3] === 0x47 &&
buf[4] === 0x0d &&
buf[5] === 0x0a &&
buf[6] === 0x1a &&
buf[7] === 0x0a
);
}
type SourcePool = "attachment" | "variant" | "picture";
function normalizeSourceNotation(s: string): string {
let t = String(s || "")
.trim()
.toLowerCase();
if (t === "a") t = "a1";
if (t === "v") t = "v1";
if (t === "p") t = "p1";
return t;
}
function parsePrefixedNotation(
s: string
): { pool: SourcePool; index: number } | null {
const t = normalizeSourceNotation(s);
const m = t.match(/^([avp])\s*(\d+)$/);
if (!m) return null;
const idx = Math.max(1, parseInt(m[2], 10));
const pool: SourcePool =
m[1] === "a" ? "attachment" : m[1] === "v" ? "variant" : "picture";
return { pool, index: idx };
}
function parseDigitOnlyNotation(s: string): number | null {
const t = String(s || "").trim();
const m = t.match(/^(\d+)$/);
if (!m) return null;
return Math.max(1, parseInt(m[1], 10));
}
async function saveOriginalPng(
sourceBuffer: Buffer,
preferredDir?: string,
preferredFileName?: string
) {
if (!preferredDir) {
throw new Error(
"No output directory resolved (LM Studio chat working directory missing)."
);
}
const dir = path.resolve(preferredDir);
await fs.promises.mkdir(dir, { recursive: true });
const name =
preferredFileName && preferredFileName.endsWith(".png")
? preferredFileName
: preferredFileName
? `${preferredFileName}.png`
: `generated-image-${isoStampCompact()}.png`;
const abs = path.join(dir, name);
try {
if (!isSupportedImageBuffer(sourceBuffer)) {
const binName = name.replace(/\.png$/i, ".bin");
const binAbs = path.join(dir, binName);
await fs.promises.writeFile(binAbs, sourceBuffer);
const stat = await fs.promises.stat(binAbs);
return {
savedPath: binAbs,
fileName: binName,
size: stat.size,
mimeType: "application/octet-stream" as const,
fileUrl: encodeFileUrl(binAbs),
};
}
if (isPng(sourceBuffer)) {
await fs.promises.writeFile(abs, sourceBuffer);
} else {
const png = await imgToPng(sourceBuffer);
await fs.promises.writeFile(abs, png);
}
} catch {
await fs.promises.writeFile(abs, sourceBuffer);
}
const stat = await fs.promises.stat(abs);
return {
savedPath: abs,
fileName: name,
size: stat.size,
mimeType: "image/png" as const,
fileUrl: encodeFileUrl(abs),
};
}
// Legacy TinyPreviewOptions and buildAndSavePreview removed.
// Use generatePreviewFromBuffer() from media-promotion-core/image.js instead.
let lastPreviewRef: {
path: string;
url: string;
mimeType: "image/jpeg" | "image/webp";
width: number;
height: number;
} | null = null;
let lastOriginalRef: { path: string; url: string } | null = null;
// Per-chat variant tracking (LM Studio) - stores v-value and path for proper lookup
const LAST_VARIANTS_BY_LM_CHAT: Record<
string,
Array<{ v: number; path: string }>
> = {};
// PHASE 4: Sticky mode removed - mode is now effectively required when sources exist
const LAST_CONSUMED_ATTACHMENT_ID_BY_LM_CHAT: Record<string, string> = {};
/**
* Get current connection settings from process.env (set by toolsProvider)
* Defaults to hardcoded settings if env vars are not set.
*/
function getCurrentConnectionSettings() {
return getEngineConnectionDefaults({
host: process.env.DRAW_THINGS_HOST,
httpPort: process.env.DRAW_THINGS_HTTP_PORT
? parseInt(process.env.DRAW_THINGS_HTTP_PORT, 10)
: undefined,
grpcPort: process.env.DRAW_THINGS_GRPC_PORT
? parseInt(process.env.DRAW_THINGS_GRPC_PORT, 10)
: undefined,
});
}
// Backend service: Draw Things only
const drawthingsService: ImageBackend = new DrawThingsService(
engineConnectionDefaults.http?.baseUrl || "http://127.0.0.1:7860",
engineConnectionDefaults.sharedSecret || undefined
);
let imageService: ImageBackend = drawthingsService;
async function ensureBackendReady(): Promise<void> {
// Draw Things backend only - always proceed with connection check
// Use config-aware connection settings (reads from process.env)
const conn = getCurrentConnectionSettings();
const httpBaseUrl =
conn.http?.baseUrl ||
`http://${conn.http?.host || "127.0.0.1"}:${conn.http?.port || 7860}`;
const resolvedGrpc = (conn.grpc?.target || `127.0.0.1:7859`).replace(
/^grpc:\/\//i,
""
);
const [host, portStr] = (() => {
const lastColon = resolvedGrpc.lastIndexOf(":");
if (lastColon > -1)
return [
resolvedGrpc.slice(0, lastColon),
resolvedGrpc.slice(lastColon + 1),
];
return [resolvedGrpc, String(7859)];
})();
const portNum = parseInt(portStr, 10);
const tcpReachable = (h: string, p: number, timeoutMs: number) =>
new Promise<boolean>((resolve) => {
try {
const socket = net.connect({ host: h, port: p });
const onOk = () => {
cleanup();
resolve(true);
};
const onErr = () => {
cleanup();
resolve(false);
};
const timer = setTimeout(() => onErr(), timeoutMs);
const cleanup = () => {
try {
clearTimeout(timer);
} catch {}
try {
socket.destroy();
} catch {}
};
socket.once("connect", onOk);
socket.once("error", onErr);
} catch {
resolve(false);
}
});
const transport = conn.transport || "auto";
const wantGrpc = transport === "grpc" || transport === "auto";
const wantHttp = transport === "http" || transport === "auto";
const grpcOk = wantGrpc ? await tcpReachable(host, portNum, 1200) : false;
// probe HTTP only when desired
const httpProbeHost = (() => {
try {
const u = new URL(httpBaseUrl);
return u.hostname || "127.0.0.1";
} catch {
return "127.0.0.1";
}
})();
const httpProbePort = (() => {
try {
const u = new URL(httpBaseUrl);
return Number(u.port) || 7860;
} catch {
return 7860;
}
})();
const httpOk = wantHttp
? await tcpReachable(httpProbeHost, httpProbePort, 1200)
: false;
const httpDesc = (() => {
try {
const u = new URL(httpBaseUrl);
return `${u.protocol}//${u.hostname}:${u.port || 80}`;
} catch {
return httpBaseUrl;
}
})();
log(
[
"Attempting to connect to Draw Things API at:",
` grpc://${host}:${portNum} - ${grpcOk ? "OK" : "UNAVAILABLE"}`,
` ${httpDesc} - ${httpOk ? "OK" : "UNAVAILABLE"}`,
"",
"Starting service...",
"",
].join("\n")
);
let usedTransport: "grpc" | "http" | null = null;
if (grpcOk) {
// Map defaults to expected gRPC envs for downstream service compatibility
try {
if (conn.grpc?.target)
process.env.DRAWTHINGS_GRPC_TARGET = conn.grpc.target;
if (conn.grpc?.service)
process.env.DRAWTHINGS_GRPC_SERVICE = conn.grpc.service;
if (conn.grpc?.compression)
process.env.DRAWTHINGS_GRPC_COMPRESSION = conn.grpc.compression as any;
if (conn.grpc?.acceptEncoding)
process.env.DRAWTHINGS_GRPC_ACCEPT_ENCODING = conn.grpc
.acceptEncoding as any;
if (conn.grpc?.protoPath)
process.env.DRAWTHINGS_GRPC_PROTO = conn.grpc.protoPath as any;
if (conn.sharedSecret)
process.env.DRAWTHINGS_SHARED_SECRET = conn.sharedSecret as any;
} catch {}
try {
const mod = await import("../services/drawThingsGrpcService.js");
const GrpcCtor = (mod as any)?.DrawThingsGrpcService;
if (typeof GrpcCtor !== "function")
throw new Error("DrawThingsGrpcService not exported");
const grpcSvc: ImageBackend = new GrpcCtor(`${host}:${portNum}`);
const ok = await grpcSvc.checkApiConnection();
if (ok) {
imageService = grpcSvc;
usedTransport = "grpc";
// Startup-only: log if SOLL models/LoRAs exist on the gRPC server.
// Non-blocking by design; it helps diagnose silent fallback behavior.
try {
const client = (grpcSvc as any)?.client;
if (client) {
const bn = (s: unknown) => {
try {
return path.basename(String(s || "").trim());
} catch {
return "";
}
};
const {
MODEL_PRESET_TO_CAPABILITY_KEY,
selectAutoModel,
checkModeSupport,
} = await import("../core-bundle.mjs");
const { getModelRequiredFiles } = await import(
"../services/modelOverlays.js"
);
const { defaultParams: defaultT2I } = await import(
"../core-bundle.mjs"
);
const { defaultParamsImg2Img: defaultI2I } = await import(
"../core-bundle.mjs"
);
const { defaultParamsEdit: defaultEdit } = await import(
"../core-bundle.mjs"
);
const requiredModels = new Set<string>();
const requiredLoras = new Set<string>();
const optionalLoras = new Set<string>();
// Defaults (used when model preset is "auto" or when no overlay is applied)
if (defaultT2I?.model) requiredModels.add(bn(defaultT2I.model));
if (defaultI2I?.model) requiredModels.add(bn(defaultI2I.model));
if (defaultEdit?.model) requiredModels.add(bn(defaultEdit.model));
// Default LoRAs are treated as optional to avoid hard assumptions.
for (const d of [defaultT2I, defaultI2I, defaultEdit]) {
const ls = Array.isArray((d as any)?.loras)
? (d as any).loras
: [];
for (const l of ls) {
const f = bn(l?.file);
if (f) optionalLoras.add(f);
}
}
const toolModes = ["text2image", "image2image", "edit", "text2video", "image2video"] as const;
const toOverlayMode = (m: (typeof toolModes)[number]) =>
m === "text2image"
? "txt2img"
: m === "image2image"
? "img2img"
: m === "text2video"
? "txt2vid"
: m === "image2video"
? "img2vid"
: "edit";
const presetKeys = Object.keys(
MODEL_PRESET_TO_CAPABILITY_KEY || {}
) as string[];
// Overlay SOLL files (models + LoRAs)
for (const preset of presetKeys) {
for (const tm of toolModes) {
const supported = checkModeSupport(preset, tm);
if (!(supported as any)?.supported) continue;
const files: string[] = getModelRequiredFiles(
preset,
toOverlayMode(tm)
);
for (const fRaw of files) {
const f = bn(fRaw);
if (!f) continue;
if (/lora/i.test(f)) requiredLoras.add(f);
else requiredModels.add(f);
}
}
}
// Custom Configs: Not checked at warmup (requires config access via toolsProvider).
// Hard-fail happens per-request in the gRPC backend if model/LoRA is missing.
// Explicit log of auto resolution
const autoMap = toolModes.map((m) => `${m}→${selectAutoModel(m)}`);
log(`[startup] auto preset resolution: ${autoMap.join(", ")}`);
const allToCheck = [
...Array.from(requiredModels),
...Array.from(requiredLoras),
...Array.from(optionalLoras),
];
const sharedSecret = process.env.DRAWTHINGS_SHARED_SECRET;
const ex = await checkDrawThingsGrpcFilesExist({
client,
sharedSecret: sharedSecret || undefined,
files: allToCheck,
});
if (!ex.usedFilesExist) {
log(
"[startup] gRPC asset preflight skipped (FilesExist RPC unavailable or failed)."
);
} else {
const missingSet = new Set(ex.missing);
const missingModels = Array.from(requiredModels).filter((f) =>
missingSet.has(f)
);
const missingReqLoras = Array.from(requiredLoras).filter((f) =>
missingSet.has(f)
);
const missingOptLoras = Array.from(optionalLoras).filter((f) =>
missingSet.has(f)
);
if (missingModels.length === 0 && missingReqLoras.length === 0) {
log(
`[startup] gRPC asset preflight OK: required models=${requiredModels.size}, required LoRAs=${requiredLoras.size}`
);
} else {
if (missingModels.length) {
log(
`[startup] gRPC asset preflight MISSING models: ${missingModels.join(
", "
)}`
);
}
if (missingReqLoras.length) {
log(
`[startup] gRPC asset preflight MISSING required LoRAs: ${missingReqLoras.join(
", "
)}`
);
}
}
if (missingOptLoras.length) {
log(
`[startup] gRPC asset preflight (optional) missing LoRAs: ${missingOptLoras.join(
", "
)}`
);
}
}
}
} catch (e: any) {
log(
`[startup] gRPC asset preflight warning: ${e?.message || String(e)}`
);
}
} else {
// fall back to HTTP if desired and reachable
if (httpOk && (transport === "auto" || transport === "http")) {
imageService = drawthingsService;
imageService.setBaseUrl(httpBaseUrl);
usedTransport = "http";
} else {
console.error(
"Draw Things gRPC reachable but not ready; no HTTP fallback available."
);
}
}
} catch (e: any) {
console.error(
`Draw Things gRPC init failed: ${e?.message || String(e)}.`
);
// prefer HTTP fallback on init error
if (httpOk && (transport === "auto" || transport === "http")) {
imageService = drawthingsService;
imageService.setBaseUrl(httpBaseUrl);
usedTransport = "http";
}
}
} else if (httpOk) {
imageService = drawthingsService;
imageService.setBaseUrl(httpBaseUrl);
usedTransport = "http";
} else {
imageService = drawthingsService; // not connected yet
}
(globalThis as any).__DT_SELECTED_TRANSPORT__ = usedTransport;
try {
const isApiConnected = await imageService.checkApiConnection();
const t = (globalThis as any).__DT_SELECTED_TRANSPORT__;
const suffix = t === "grpc" ? " - gRPC" : t === "http" ? " - HTTP" : "";
if (isApiConnected) {
log(`Connected to Draw Things API${suffix}.`);
if (t === "grpc") {
const sec = (imageService as any)?.currentSecurity ?? (globalThis as any).__DT_GRPC_TLS_SELECTED__ ?? "unknown";
log(`[gRPC] security: ${sec}`);
}
}
else log(`Failed to connect to Draw Things API.`);
} catch {}
}
/**
* Startup warmup entrypoint.
* Invoked from the Tools Provider during plugin initialization so the backend probe
* (and gRPC model/LoRA preflight logging) happens before the first tool call.
*/
export async function warmupBackendAtStartup(): Promise<void> {
await ensureBackendReady();
}
// Utility: read last audit prompt and mode for context
async function getLastAuditPromptAndMode(): Promise<{
prompt?: string;
mode?: string;
} | null> {
try {
const p = path.join(logsDir, "generate-image-plugin.audit.jsonl");
const txt = await fs.promises.readFile(p, "utf8").catch(() => "");
if (!txt) return null;
const chunks = txt
.split(/\n\s*\n/g)
.map((s) => s.trim())
.filter((s) => s.length > 0);
for (let i = chunks.length - 1; i >= 0; i--) {
const s = chunks[i];
try {
const obj = JSON.parse(s);
if (obj && typeof obj === "object") {
// Read prompt from output.prompt_used (what was actually used)
const prompt =
typeof obj.output?.prompt_used === "string"
? obj.output.prompt_used
: undefined;
const mode = typeof obj.mode === "string" ? obj.mode : undefined;
return { prompt, mode };
}
} catch {}
}
return null;
} catch {
return null;
}
}
async function getLMConversationFilePath(
chatId?: string
): Promise<string | null> {
try {
const home = findLMStudioHome();
const convDir = path.join(home, "conversations");
if (!fs.existsSync(convDir)) return null;
if (chatId) {
const p = path.join(convDir, `${chatId}.conversation.json`);
return (await fs.promises
.stat(p)
.then((s) => (s.isFile() ? p : null))
.catch(() => null)) as string | null;
}
const entries = await fs.promises
.readdir(convDir)
.catch(() => [] as string[]);
const convFiles = entries
.filter((f) => f.endsWith(".conversation.json"))
.map((f) => path.join(convDir, f));
if (convFiles.length === 0) return null;
const withTimes = (
await Promise.all(
convFiles.map(async (p) => {
try {
const s = await fs.promises.stat(p);
return s.isFile() ? { p, t: s.mtimeMs } : null;
} catch {
return null;
}
})
)
).filter(Boolean) as { p: string; t: number }[];
if (withTimes.length === 0) return null;
withTimes.sort((a, b) => b.t - a.t);
return withTimes[0].p;
} catch {
return null;
}
}
async function getLastVariantGroupFromLMConversation(
chatId?: string | null
): Promise<string[] | null> {
try {
const convPath = await getLMConversationFilePath(chatId || undefined);
if (!convPath) return null;
const text = await fs.promises.readFile(convPath, "utf8");
type GroupInfo = {
lastIndex: number;
variants: Set<number>;
originals: Map<number, string>;
};
const byBase: Map<string, GroupInfo> = new Map();
const reOrig =
/file:\/\/[\S)"']+\/(images|working-directories\/\d+)\/(generated-image-[^\/]*)-v(\d)\.png/gi;
let m: RegExpExecArray | null;
while ((m = reOrig.exec(text)) != null) {
try {
const basePlus = m[2];
const vNum = parseInt(m[3], 10);
const urlStr = m[0].match(/file:\/\/[^^\s)"']+/i)?.[0];
if (!urlStr) continue;
let absPath: string | null = null;
try {
absPath = fileURLToPath(urlStr);
} catch {
absPath = null;
}
if (!absPath) continue;
const g = byBase.get(basePlus) || {
lastIndex: m.index,
variants: new Set<number>(),
originals: new Map<number, string>(),
};
g.lastIndex = Math.max(g.lastIndex, m.index);
g.variants.add(vNum);
g.originals.set(vNum, absPath);
byBase.set(basePlus, g);
} catch {}
}
const rePrev =
/file:\/\/[\S)"']+\/(images\/previews|working-directories\/\d+)\/(preview-generated-image-[^\/]*)-v(\d)\.(jpg|jpeg|webp)/gi;
while ((m = rePrev.exec(text)) != null) {
try {
const nameWithPreview = m[2];
const vNum = parseInt(m[3], 10);
const urlStr = m[0]
.replace(/\/images\/previews\//i, "/images/")
.replace(
/\/working-directories\/(\d+)\//i,
"/working-directories/$1/"
)
.replace(/preview-/, "")
.replace(/\.(jpg|jpeg|webp)$/i, ".png");
let absPath: string | null = null;
try {
const urlOnly = urlStr.match(/file:\/\/[^^\s)"']+/i)?.[0];
absPath = urlOnly ? fileURLToPath(urlOnly) : null;
} catch {
absPath = null;
}
if (!absPath) continue;
const originalBase = nameWithPreview.replace(/^preview-/, "");
const g = byBase.get(originalBase) || {
lastIndex: m.index,
variants: new Set<number>(),
originals: new Map<number, string>(),
};
g.lastIndex = Math.max(g.lastIndex, m.index);
g.variants.add(vNum);
g.originals.set(vNum, absPath);
byBase.set(originalBase, g);
} catch {}
}
if (byBase.size === 0) return null;
const best = Array.from(byBase.entries())
.map(([base, g]) => ({ base, g }))
.sort((a, b) => b.g.lastIndex - a.g.lastIndex)[0];
if (!best) return null;
const out: string[] = [];
for (let v = 1; v <= 3; v++) {
const p = best.g.originals.get(v);
if (!p) continue;
const exists = await fs.promises
.stat(p)
.then((s) => s.isFile())
.catch(() => false);
if (exists) out.push(p);
}
return out.length > 0 ? out : null;
} catch {
return null;
}
}
// Decide which event is the most recent in the conversation file.
// Returns:
// - "variant" when the latest reference to a generated image is v1 (original or preview)
// - "attachment" when the latest user image attachment appears after the last variant
// - null when neither could be detected
async function getLastEventTypeFromLMConversation(
chatId?: string | null
): Promise<"variant" | "attachment" | null> {
try {
const convPath = await getLMConversationFilePath(chatId || undefined);
if (!convPath) return null;
const text = await fs.promises.readFile(convPath, "utf8");
let lastVariantIdx = -1;
let m: RegExpExecArray | null;
const reOrig =
/file:\/\/[\S)"']+\/(images|working-directories\/\d+)\/(generated-image-[^\/]*)-v(\d)\.png/gi;
while ((m = reOrig.exec(text)) != null) {
const vNum = parseInt(m[3], 10);
if (vNum === 1) lastVariantIdx = Math.max(lastVariantIdx, m.index);
}
const rePrev =
/file:\/\/[\S)"']+\/(images\/previews|working-directories\/\d+)\/(preview-generated-image-[^\/]*)-v(\d)\.(jpg|jpeg|webp)/gi;
while ((m = rePrev.exec(text)) != null) {
const vNum = parseInt(m[3], 10);
if (vNum === 1) lastVariantIdx = Math.max(lastVariantIdx, m.index);
}
let lastAttachmentIdx = -1;
const reAtt =
/"(fileIdentifier|identifier)"\s*:\s*"([^"\n]+\.(png|jpg|jpeg|webp|gif|bmp|tif|tiff|heic))"/gi;
while ((m = reAtt.exec(text)) != null) {
lastAttachmentIdx = Math.max(lastAttachmentIdx, m.index);
}
if (lastVariantIdx < 0 && lastAttachmentIdx < 0) return null;
return lastVariantIdx > lastAttachmentIdx ? "variant" : "attachment";
} catch {
return null;
}
}
// ─────────────────────────────────────────────────────────────────────────────
// UNIFIED INPUT NORMALIZATION
// Single function for ALL input image preprocessing (i2i, edit, Canvas, Moodboard)
// ─────────────────────────────────────────────────────────────────────────────
export type NormalizeInputReason =
| "clamped_to_requested_raw"
| "normalized_to_constraints"
| "converted_to_png"
| "unchanged";
export interface NormalizeInputResult {
buf: Buffer;
preprocess: {
original: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
adjusted: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
reason: NormalizeInputReason;
};
normalizedLongSide?: number;
}
/**
* Unified input normalization for all image sources (Attachments, Variants, Pictures).
* Applies in order:
* 1. Adopt target aspect ratio (if user specified output dimensions)
* 2. Sum constraint (w + h <= targetSum)
* 3. Alignment (multiples of 64)
* 4. Minimum dimension (256px)
* 5. Convert to PNG
*/
export async function normalizeInputBuffer(
buf: Buffer,
opts?: {
requestedRawW?: number;
requestedRawH?: number;
logPrefix?: string;
}
): Promise<NormalizeInputResult> {
const prefix = opts?.logPrefix || "[normalize]";
const size = await imgGetSize(buf);
let w = size.width || 0;
let h = size.height || 0;
const align = drawthingsLimits.align;
const minDim = drawthingsLimits.minDim;
const targetSum = drawthingsLimits.targetSum;
const origW = w;
const origH = h;
const origFmt = isPng(buf) ? "png" : undefined;
const origBytes = buf.byteLength;
let reason: NormalizeInputReason = "unchanged";
// 1. Adopt target aspect ratio when both requested dims are given.
// The adjusted image must match the OUTPUT format (e.g. landscape)
// rather than preserving the source image's aspect ratio.
// resizeCoverToPng() then uniformly scales + centre-crops the original
// to fill these dimensions without distortion.
// Subsequent steps (sum clamp, 64-alignment, minDim) refine the size.
const hasReqW =
typeof opts?.requestedRawW === "number" &&
Number.isFinite(opts.requestedRawW) &&
opts.requestedRawW > 0;
const hasReqH =
typeof opts?.requestedRawH === "number" &&
Number.isFinite(opts.requestedRawH) &&
opts.requestedRawH > 0;
if (hasReqW && hasReqH) {
w = opts!.requestedRawW!;
h = opts!.requestedRawH!;
reason = "clamped_to_requested_raw";
log(`${prefix} adopting target dimensions: ${origW}x${origH} → ${w}x${h}`);
}
// 2. Sum constraint: w + h <= targetSum
const currentSum = w + h;
if (currentSum > targetSum) {
const aspect = w / Math.max(1, h);
let newH = targetSum / (aspect + 1);
let newW = aspect * newH;
newW = Math.max(align, Math.floor(newW / align) * align);
newH = Math.max(align, Math.floor(newH / align) * align);
// Reduce further if still over
while (newW + newH > targetSum && (newW > align || newH > align)) {
if (newW >= newH) newW = Math.max(align, newW - align);
else newH = Math.max(align, newH - align);
}
w = newW;
h = newH;
if (reason === "unchanged") reason = "normalized_to_constraints";
}
// 3. Alignment: round to multiples of 64
if (w % align !== 0 || h % align !== 0) {
w = Math.max(align, Math.floor(w / align) * align);
h = Math.max(align, Math.floor(h / align) * align);
if (reason === "unchanged") reason = "normalized_to_constraints";
}
// 4. Minimum dimension: upscale only if needed to satisfy minDim
if (w < minDim || h < minDim) {
const scale = minDim / Math.min(w, h);
w = Math.round(w * scale);
h = Math.round(h * scale);
// Re-align after upscale
w = Math.max(align, Math.floor(w / align) * align);
h = Math.max(align, Math.floor(h / align) * align);
// Ensure sum constraint still met after minDim upscale
while (w + h > targetSum && (w > minDim || h > minDim)) {
if (w > h) w = Math.max(minDim, w - align);
else h = Math.max(minDim, h - align);
}
if (reason === "unchanged") reason = "normalized_to_constraints";
}
// 5. Resize and/or convert to PNG
let outBuf: Buffer;
if (w !== origW || h !== origH) {
outBuf = await imgResizeCoverToPng(buf, w, h);
if (reason === "unchanged") reason = "normalized_to_constraints";
log(
`${prefix} dimension normalization: ${origW}x${origH} → ${w}x${h} (sum=${
w + h
})`
);
} else {
if (isPng(buf)) {
outBuf = buf;
} else {
outBuf = await imgToPng(buf);
if (reason === "unchanged") reason = "converted_to_png";
}
}
return {
buf: outBuf,
preprocess: {
original: {
width: origW,
height: origH,
format: origFmt,
bytes: origBytes,
},
adjusted: {
width: w,
height: h,
format: "png",
bytes: outBuf.byteLength,
},
reason,
},
normalizedLongSide: w !== origW || h !== origH ? Math.max(w, h) : undefined,
};
}
const ALLOWED_GEN_INPUT_KEYS = [
"prompt",
"width",
"height",
"imageFormat",
"quality",
"variants",
"canvas",
"moodboard",
] as const;
export async function handleGenerateImage(
pluginParams: any,
onProgress?: ProgressCallback
) {
await ensureBackendReady().catch((e) => {
log(`[startup] ensureBackendReady failed: ${String(e)}`);
});
try {
const rawIncoming = pluginParams || {};
const parsed = GenerateToolParamsSchemaMinimalStrict.safeParse(rawIncoming);
if (!parsed.success) {
return {
content: [
{
type: "text",
text: `Invalid generate_image parameters: ${formatZodError(
parsed.error
)}`,
},
],
};
}
const input = parsed.data as any;
// ─────────────────────────────────────────────────────────────────────────
// HARD LIMIT CHECK: Reject requests exceeding maxWidth/maxHeight immediately.
// No silent clamping – explicit error with clear guidance.
// ─────────────────────────────────────────────────────────────────────────
{
const reqW = input.width;
const reqH = input.height;
const maxW = drawthingsLimits.maxWidth;
const maxH = drawthingsLimits.maxHeight;
if (typeof reqW === "number" && reqW > maxW) {
log(`[validation] REJECTED: width ${reqW} exceeds maxWidth ${maxW}`);
return {
content: [
{
type: "text",
text: `Invalid width: ${reqW}px exceeds maximum allowed width of ${maxW}px. Please use width ≤ ${maxW}.`,
},
],
isError: true as const,
};
}
if (typeof reqH === "number" && reqH > maxH) {
log(`[validation] REJECTED: height ${reqH} exceeds maxHeight ${maxH}`);
return {
content: [
{
type: "text",
text: `Invalid height: ${reqH}px exceeds maximum allowed height of ${maxH}px. Please use height ≤ ${maxH}.`,
},
],
isError: true as const,
};
}
}
// Preserve the user-requested mode for logging/audit.
// "edit" is a variant of image2image with different defaults and (future) multi-source support.
const requestedMode = input.mode as
| "text2image"
| "image2image"
| "edit"
| "text2video"
| "image2video"
| undefined;
const isEditMode = requestedMode === "edit";
log(`generate_image input: ${JSON.stringify(input)}`);
try {
void getHealthyServerBaseUrl();
} catch {}
// Validate model/mode compatibility early
const modelPreset = (input.model as string) || "auto";
const modeForValidation = requestedMode || "text2image"; // default mode if not specified
// Import capability check and custom configs (dynamic to avoid circular deps at module load)
const {
checkModeSupport,
checkModeSupportWithCustom,
selectAutoModel,
detectImageModelCapabilities,
getCapabilityKeyForPreset,
} = await import("../core-bundle.mjs");
const { getAvailableCustomCombinations, getCustomPreset } = await import(
"../services/customConfigsLoader.js"
);
// Use extended check that includes Custom Configs info
const modeCheck = checkModeSupportWithCustom(
modelPreset,
modeForValidation,
getAvailableCustomCombinations
);
if (!modeCheck.supported) {
log(`[validation] mode/model incompatible: ${modeCheck.reason}`);
return {
content: [
{
type: "text",
text: modeCheck.reason,
},
],
isError: true as const,
};
}
// NOTE: "model=auto" means: do not apply an overlay; backend uses mode-specific defaults.
// We still compute an "effectiveModelPreset" for capability logic (e.g. edit-mode limits),
// but logging should reflect the engine model that will actually be used.
const effectiveModelPreset =
modelPreset === "auto" ? selectAutoModel(modeForValidation) : modelPreset;
// Resolve actual .ckpt filename for logging
const modeForFilename =
modeForValidation === "edit"
? "edit"
: modeForValidation === "image2image"
? "img2img"
: modeForValidation === "text2video"
? "txt2vid"
: modeForValidation === "image2video"
? "img2vid"
: "txt2img";
// If the user did not pick a model (or explicitly picked "auto"), the backend will use
// the per-mode defaultParams*.model value (no overlay). Log that to avoid confusion.
let engineDefaultModel: string | null = null;
if (modelPreset === "auto") {
try {
if (modeForFilename === "txt2img") {
const { defaultParams } = await import(
"../core-bundle.mjs"
);
engineDefaultModel =
typeof (defaultParams as any)?.model === "string"
? (defaultParams as any).model
: null;
} else if (modeForFilename === "img2img") {
const { defaultParamsImg2Img } = await import(
"../core-bundle.mjs"
);
engineDefaultModel =
typeof (defaultParamsImg2Img as any)?.model === "string"
? (defaultParamsImg2Img as any).model
: null;
} else if (modeForFilename === "txt2vid") {
const { defaultParamsText2Video } = await import(
"../services/defaultParamsDrawThingsText2Video.js"
);
engineDefaultModel =
typeof (defaultParamsText2Video as any)?.model === "string"
? (defaultParamsText2Video as any).model
: null;
} else if (modeForFilename === "img2vid") {
const { defaultParamsImage2Video } = await import(
"../services/defaultParamsDrawThingsImage2Video.js"
);
engineDefaultModel =
typeof (defaultParamsImage2Video as any)?.model === "string"
? (defaultParamsImage2Video as any).model
: null;
} else {
const { defaultParamsEdit } = await import(
"../core-bundle.mjs"
);
engineDefaultModel =
typeof (defaultParamsEdit as any)?.model === "string"
? (defaultParamsEdit as any).model
: null;
}
} catch {}
}
const customPresetModel: string | null = (() => {
if (!getModelFilename(effectiveModelPreset, modeForFilename)) {
const raw = getCustomPreset(`${modeForValidation}.${effectiveModelPreset}`)?.params?.model;
if (typeof raw === "string" && raw.trim()) return path.basename(raw.trim());
}
return null;
})();
const effectiveModelFilename =
(engineDefaultModel ? path.basename(engineDefaultModel) : null) ||
getModelFilename(effectiveModelPreset, modeForFilename) ||
customPresetModel ||
effectiveModelPreset;
log(
`[validation] model=${modelPreset} → engineModel=${effectiveModelFilename} mode=${modeForValidation}`
);
// Draw Things backend only
const svc: ImageBackend = imageService;
const resolvedName = "drawthings";
const requestedVariants = input.variants;
const usedVariants =
typeof requestedVariants === "number"
? Math.max(1, Math.min(4, Math.round(requestedVariants)))
: 1;
if (
typeof requestedVariants === "number" &&
requestedVariants !== usedVariants
) {
log(`variants: requested=${requestedVariants} used=${usedVariants}`);
}
log(`generate_image: using backend='${resolvedName}'`);
const mode =
(input.mode as "text2image" | "image2image" | "edit" | "text2video" | "image2video" | undefined) ||
"text2image";
const rawCanvas =
typeof input.canvas === "string" ? input.canvas : undefined;
const rawMoodboard = Array.isArray((input as any).moodboard)
? ((input as any).moodboard as unknown[])
: undefined;
const moodboardNotations: string[] = (rawMoodboard || [])
.filter((x) => typeof x === "string")
.map((x) => String(x));
let result: ImageGenerationResult | any;
let effectiveMode: "text2image" | "image2image" | "edit" | "text2video" | "image2video" = "text2image";
let sourceTag: string | null = null;
let sourceVariantUsed: number | undefined = undefined;
let sourceKind: "attachment" | "variant" | "picture" | undefined =
undefined;
let sourceOriginAbs: string | undefined = undefined;
let sourceOriginalName: string | undefined = undefined; // Real original filename (e.g., "Katze.png")
let hasFreshAttachment: boolean = false;
let isAttachmentSource: boolean = false;
// Track reference metadata for summary (used in edit mode multi-reference)
let usedReferenceMeta: Array<{
type: "attachment" | "variant" | "picture";
index: number;
isCanvas: boolean;
originPath?: string;
originalName?: string;
}> = [];
// Track per-reference preprocessing (normalization) metadata for audit
let usedReferencePreprocess: Array<{
type: "attachment" | "variant" | "picture";
index: number;
role: "canvas" | "moodboard";
originPath?: string;
originalName?: string;
preprocess: {
original: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
adjusted: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
reason: NormalizeInputReason;
};
}> = [];
let currentLmChatId: string | null = null;
let currentLmWorkingDir: string | null = null;
let lmResolverConfidence: "high" | "medium" | "low" | undefined = undefined;
let lmResolverReason: string | undefined = undefined;
let stickyScope: "none" | "lm_chat" = "none";
let sourceFileName: string | undefined = undefined;
let lmCrosscheckInfo:
| { referenced: boolean; group_base?: string }
| undefined = undefined;
let sourcePreprocess:
| undefined
| {
original: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
adjusted: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
reason: NormalizeInputReason;
};
let normalizedToLongSide: number | undefined = undefined;
// requestedRaw: what the user asked for (or the original source size when user omitted).
// requestedEffective: the internally used aligned/clamped size (multiples of 64).
// Backend may still run at a different internal size (e.g. i2i normalization); we track that separately.
let requestedRawW: number | undefined = undefined;
let requestedRawH: number | undefined = undefined;
let requestedEffectiveW: number | undefined = undefined;
let requestedEffectiveH: number | undefined = undefined;
if (resolvedName === "drawthings") {
let srcBuf: Buffer | undefined;
// NOTE: chatWdForContext is computed dynamically inside getLastVariantGroupForContext
// to ensure it uses the latest currentLmChatId/currentLmWorkingDir after resolution.
const getChatWdForContext = (): string | null =>
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : null);
const getLastVariantGroupForContext = async (): Promise<
string[] | null
> => {
const all = await getAllVariantsForContext();
if (!all || all.length === 0) return null;
return all.map((v) => v.path);
};
// V2: Returns all variants with stable v-index for proper lookup
const getAllVariantsForContext = async (): Promise<Array<{
v: number;
path: string;
}> | null> => {
const chatWdForContext = getChatWdForContext();
// Primary: chat working directory state file (contains ALL variants, not just latest generation)
try {
if (chatWdForContext) {
const st: any = await readState(chatWdForContext);
if (st && Array.isArray(st.variants) && st.variants.length > 0) {
const variants = [...st.variants]
.filter((v: any) => v && typeof v.filename === "string")
.sort((a: any, b: any) => (a.v || 0) - (b.v || 0));
const result = variants.map((v: any) => ({
v: v.v || 1,
path: path.join(chatWdForContext, v.filename),
}));
if (result.length > 0) return result;
}
}
} catch {}
// Fallback: in-process memory (only contains latest generation, not all variants)
// NOTE: This is less reliable than state file but useful before state is written
try {
if (currentLmChatId) {
const mem = LAST_VARIANTS_BY_LM_CHAT[currentLmChatId];
if (Array.isArray(mem) && mem.length > 0) return mem;
}
} catch {}
// Fallback: legacy heuristic from SSOT conversation file (returns paths only, assume v=1..n)
try {
const group = await getLastVariantGroupFromLMConversation(
currentLmChatId || undefined
);
if (Array.isArray(group) && group.length > 0) {
return group.map((p, i) => ({ v: i + 1, path: p }));
}
} catch {}
return null;
};
// Try to resolve current LM Studio chat for scoping
try {
// Prefer the generator-provided context (deterministic within the current turn/tool-call loop)
const active = getActiveChatContext();
if (
active &&
typeof active.chatId === "string" &&
/^\d+$/.test(active.chatId)
) {
currentLmChatId = active.chatId;
currentLmWorkingDir = active.workingDir;
lmResolverConfidence = "high";
lmResolverReason = `active_context${
active.requestId ? `:${active.requestId}` : ""
}`;
}
} catch {}
try {
if (currentLmChatId) {
// already resolved via active context
log(
`[chatId] resolved via active_context: chatId=${currentLmChatId} workingDir=${
currentLmWorkingDir || "null"
}`
);
} else {
// Fallback: filesystem heuristic (picks newest *.conversation.json)
console.warn(
"[generate_image] No deterministic chat context available – falling back to filesystem heuristic (newest conversation file). " +
"This may happen if the tool is called outside a normal Generator turn or if context TTL (60s) expired."
);
const lm = await resolveActiveLMStudioChatId({
requireRecentMtimeSec: 600,
});
log(
`[chatId] fallback heuristic result: ok=${(lm as any)?.ok} chatId=${
(lm as any)?.chatId || "null"
} reason=${(lm as any)?.reason || "unknown"}`
);
if ((lm as any)?.ok) {
currentLmChatId = (lm as any).chatId;
lmResolverConfidence = (lm as any).confidence;
lmResolverReason = (lm as any).reason;
}
}
} catch {}
// ========================================================================
// PHASE 3: Global error rules (before source resolution)
// ========================================================================
// Count available sources from state
let stateAttachmentCount = 0;
let stateVariantCount = 0;
let statePictureCount = 0;
const chatWdForValidation = getChatWdForContext();
if (chatWdForValidation) {
try {
const st = await readState(chatWdForValidation);
stateAttachmentCount = Array.isArray(st.attachments)
? st.attachments.length
: 0;
stateVariantCount = Array.isArray(st.variants)
? st.variants.length
: 0;
statePictureCount = Array.isArray((st as any).pictures)
? (st as any).pictures.length
: 0;
} catch (e) {
log(`[phase3] failed to read state for source counts: ${String(e)}`);
}
}
const totalSourcesInState =
stateAttachmentCount + stateVariantCount + statePictureCount;
log(
`[phase3] sources in state: attachments=${stateAttachmentCount}, variants=${stateVariantCount}, pictures=${statePictureCount}, total=${totalSourcesInState}`
);
// (Legacy multi-source checks removed: new interface uses canvas+moodboard.)
// Canvas + moodboard resolution (new interface)
type SourceSel = { pool: SourcePool; index: number; notation: string };
const loadSourceState = async (): Promise<{
chatWd: string | null;
attachments: any[];
pictures: any[];
variants: Array<{ v: number; path: string }>;
}> => {
const chatWd = getChatWdForContext();
let attachments: any[] = [];
let pictures: any[] = [];
try {
if (chatWd) {
const st: any = await readState(chatWd);
attachments = Array.isArray(st?.attachments) ? st.attachments : [];
pictures = Array.isArray(st?.pictures) ? st.pictures : [];
}
} catch (e) {
log(`[state] failed to read attachments/pictures: ${String(e)}`);
}
let variants: Array<{ v: number; path: string }> = [];
try {
variants = (await getAllVariantsForContext()) || [];
} catch (e) {
log(`[state] failed to enumerate variants: ${String(e)}`);
}
return { chatWd, attachments, pictures, variants };
};
const resolveNotation = (
notationRaw: string,
ctx: {
attachments: any[];
pictures: any[];
variants: Array<{ v: number; path: string }>;
}
): SourceSel => {
const pref = parsePrefixedNotation(notationRaw);
if (pref) {
const n = `${
pref.pool === "attachment"
? "a"
: pref.pool === "variant"
? "v"
: "p"
}${pref.index}`;
return { pool: pref.pool, index: pref.index, notation: n };
}
const digit = parseDigitOnlyNotation(notationRaw);
if (digit != null) {
const pools: SourcePool[] = [];
if (ctx.attachments.length > 0) pools.push("attachment");
if (ctx.variants.length > 0) pools.push("variant");
if (ctx.pictures.length > 0) pools.push("picture");
if (pools.length === 0) {
throw new Error("No sources available");
}
if (pools.length > 1) {
const abbrev = pools
.map((p) =>
p === "attachment" ? "a" : p === "variant" ? "v" : "p"
)
.join("/");
throw new Error(`Ambiguous: use prefix (${abbrev})`);
}
const only = pools[0];
const n = `${
only === "attachment" ? "a" : only === "variant" ? "v" : "p"
}${digit}`;
return { pool: only, index: digit, notation: n };
}
throw new Error(
`Invalid source notation: ${String(notationRaw || "").trim()}`
);
};
const { chatWd, attachments, pictures, variants } =
await loadSourceState();
const totalSources =
attachments.length + variants.length + pictures.length;
const autoSelectSingleSource = (): SourceSel => {
if (
attachments.length === 1 &&
variants.length === 0 &&
pictures.length === 0
) {
const a =
typeof attachments[0]?.a === "number" ? attachments[0].a : 1;
return { pool: "attachment", index: a, notation: `a${a}` };
}
if (
variants.length === 1 &&
attachments.length === 0 &&
pictures.length === 0
) {
const v = typeof variants[0]?.v === "number" ? variants[0].v : 1;
return { pool: "variant", index: v, notation: `v${v}` };
}
if (
pictures.length === 1 &&
attachments.length === 0 &&
variants.length === 0
) {
const p = typeof pictures[0]?.p === "number" ? pictures[0].p : 1;
return { pool: "picture", index: p, notation: `p${p}` };
}
throw new Error("Ambiguous source – specify canvas explicitly");
};
let resolvedCanvas: SourceSel | null = null;
if (mode === "text2image" || mode === "text2video") {
if (rawCanvas) {
log(`[info] canvas ignored for mode='${mode}': ${rawCanvas}`);
}
} else {
if (rawCanvas) {
resolvedCanvas = resolveNotation(rawCanvas, {
attachments,
pictures,
variants,
});
} else {
if (totalSources === 0) {
return {
content: [{ type: "text", text: "No source image available." }],
isError: true as const,
};
}
if (totalSources === 1) {
resolvedCanvas = autoSelectSingleSource();
log(`[canvas] auto-resolved to ${resolvedCanvas.notation}`);
} else {
return {
content: [
{
type: "text",
text: "Ambiguous source – specify canvas explicitly (e.g., canvas='a1' or canvas='v1' or canvas='p1').",
},
],
isError: true as const,
};
}
}
}
const loadBufferForSel = async (
sel: SourceSel
): Promise<{
buf: Buffer;
originPath?: string;
originalName?: string;
}> => {
if (sel.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: sel.index,
});
if (!(lm as any)?.ok || !(lm as any).buffer) {
throw new Error(`Attachment a${sel.index} not found.`);
}
return {
buf: (lm as any).buffer as Buffer,
originPath:
typeof (lm as any).originalPath === "string"
? (lm as any).originalPath
: undefined,
originalName:
typeof (lm as any).originalName === "string"
? (lm as any).originalName
: undefined,
};
}
if (sel.pool === "variant") {
const found = variants.find((v) => v.v === sel.index);
if (!found) {
const available =
variants.map((v) => `v${v.v}`).join(", ") || "(none)";
throw new Error(
`Variant v${sel.index} not found. Available: ${available}`
);
}
return {
buf: await fs.promises.readFile(found.path),
originPath: found.path,
};
}
// picture
const found = pictures.find(
(p: any) => typeof p?.p === "number" && p.p === sel.index
);
if (!found) {
const available =
pictures
.map((p: any) => (typeof p?.p === "number" ? `p${p.p}` : null))
.filter(Boolean)
.join(", ") || "(none)";
throw new Error(
`Picture p${sel.index} not found. Available: ${available}`
);
}
if (!chatWd)
throw new Error("No working directory resolved for pictures.");
const abs = path.join(chatWd, String(found.filename || ""));
const exists = await fs.promises
.stat(abs)
.then((s) => s.isFile())
.catch(() => false);
if (!exists) {
throw new Error(`Picture file missing: ${abs}`);
}
return { buf: await fs.promises.readFile(abs), originPath: abs };
};
// Resolve canvas buffer for image2image/edit/image2video
if (mode === "image2image" || mode === "edit" || mode === "image2video") {
if (!resolvedCanvas) {
return {
content: [{ type: "text", text: "No source image available." }],
isError: true as const,
};
}
try {
const loaded = await loadBufferForSel(resolvedCanvas);
srcBuf = loaded.buf;
effectiveMode = mode;
sourceTag = `canvas:${resolvedCanvas.notation}`;
sourceKind = resolvedCanvas.pool;
sourceOriginAbs = loaded.originPath;
sourceOriginalName = loaded.originalName;
sourceFileName = loaded.originPath
? path.basename(loaded.originPath)
: undefined;
if (resolvedCanvas.pool === "variant") {
sourceVariantUsed = resolvedCanvas.index;
}
// Persist last canvas selection so the orchestrator can do smarter vision promotion.
try {
if (chatWd) {
const st: any = await readState(chatWd);
st.lastCanvasNotation = resolvedCanvas.notation;
st.lastCanvasAt = localTimestamp();
await writeStateAtomic(chatWd, st);
}
} catch (e) {
log(`[state] failed to persist lastCanvasNotation: ${String(e)}`);
}
} catch (e) {
return {
content: [{ type: "text", text: String((e as any)?.message || e) }],
isError: true as const,
};
}
}
// Resolve moodboard selections (edit mode, or image2image via gRPC)
// Note: HTTP does not support moodboard for image2image; only gRPC does.
const resolvedMoodboard: SourceSel[] = [];
const selectedTransport = (globalThis as any)
?.__DT_SELECTED_TRANSPORT__ as "grpc" | "http" | null | undefined;
const moodboardAllowedForI2I = selectedTransport === "grpc";
if (
(mode === "edit" || (mode === "image2image" && moodboardAllowedForI2I)) &&
moodboardNotations.length > 0
) {
const seen = new Set<string>();
if (resolvedCanvas) {
seen.add(`${resolvedCanvas.pool}:${resolvedCanvas.index}`);
}
for (const nRaw of moodboardNotations) {
const sel = resolveNotation(nRaw, {
attachments,
pictures,
variants,
});
const key = `${sel.pool}:${sel.index}`;
if (seen.has(key)) continue;
seen.add(key);
resolvedMoodboard.push(sel);
}
log(
`[${mode}] moodboard resolved: ${
resolvedMoodboard.map((s) => s.notation).join(", ") || "(none)"
}`
);
} else if (mode === "image2image" && moodboardNotations.length > 0 && !moodboardAllowedForI2I) {
// Warn user that moodboard is ignored for image2image via HTTP
log(
`[image2image] WARNING: moodboard ignored - requires gRPC transport. Using single canvas only.`
);
}
// Stash for the edit/image2image multi-ref block below
const resolvedCanvasSel = resolvedCanvas;
const resolvedMoodboardSel = resolvedMoodboard;
// Multi-reference mode: edit always, image2image only via gRPC
const isMultiReference =
(mode === "edit" || (mode === "image2image" && moodboardAllowedForI2I)) &&
resolvedMoodboardSel.length > 0;
if (mode === "text2image" || mode === "text2video") {
effectiveMode = mode;
log(`effective mode: ${effectiveMode}`);
// If the user provided non-aligned sizes, round to the effective (backend-safe) multiples of 64,
// but keep the raw values for audit + final postprocess resize.
let serviceInputForT2I: any = stripInternalToolKeys(input as any);
if (mode === "text2video") {
serviceInputForT2I._dt_video_mode = "txt2vid";
}
try {
if (resolvedName === "drawthings") {
const rawW = (input as any)?.width;
const rawH = (input as any)?.height;
const hasW = typeof rawW === "number" && Number.isFinite(rawW);
const hasH = typeof rawH === "number" && Number.isFinite(rawH);
if (hasW) requestedRawW = Math.max(1, Math.round(rawW));
if (hasH) requestedRawH = Math.max(1, Math.round(rawH));
if (hasW || hasH) {
const align = drawthingsLimits.align;
const maxW = drawthingsLimits.maxWidth;
const maxH = drawthingsLimits.maxHeight;
const floorTo = (v: number, step: number) =>
Math.floor(v / step) * step;
const clamp = (v: number, min: number, max: number) =>
Math.min(max, Math.max(min, v));
// requestedEffective must be backend-safe: aligned + never exceed render limits.
// Use floor alignment (never rounds up beyond user's raw request).
if (hasW)
requestedEffectiveW = clamp(
Math.max(align, floorTo(requestedRawW!, align)),
align,
maxW
);
if (hasH)
requestedEffectiveH = clamp(
Math.max(align, floorTo(requestedRawH!, align)),
align,
maxH
);
if (typeof requestedEffectiveW === "number")
serviceInputForT2I.width = requestedEffectiveW;
if (typeof requestedEffectiveH === "number")
serviceInputForT2I.height = requestedEffectiveH;
// Deterministic contract: when raw dims are known, provide them + upscaler decision.
if (
typeof requestedRawW === "number" &&
typeof requestedRawH === "number"
) {
(serviceInputForT2I as any)._dt_requested_raw_w = requestedRawW;
(serviceInputForT2I as any)._dt_requested_raw_h = requestedRawH;
(serviceInputForT2I as any)._dt_needs_upscaler =
requestedRawW > maxW || requestedRawH > maxH;
}
log(
`[t2i] requested: raw=${requestedRawW || "-"}x${
requestedRawH || "-"
} effective=${requestedEffectiveW || "-"}x${
requestedEffectiveH || "-"
}`
);
}
}
} catch (e) {
log(`[t2i] failed to compute effective size: ${String(e)}`);
}
result = await svc.generateImage(serviceInputForT2I, onProgress);
// Reconstruct render_target from service metadata if not already set
// (handles imageFormat/quality shorthand + custom config scenarios)
if (
(typeof requestedRawW !== "number" ||
typeof requestedRawH !== "number") &&
result?.metadata?.requested_dimensions
) {
const reqDims = result.metadata.requested_dimensions;
if (
typeof reqDims.width === "number" &&
typeof reqDims.height === "number"
) {
requestedRawW = reqDims.width;
requestedRawH = reqDims.height;
requestedEffectiveW = reqDims.width;
requestedEffectiveH = reqDims.height;
log(
`[t2i] reconstructed render_target from service: ${requestedRawW}x${requestedRawH}`
);
}
}
} else {
if (!srcBuf) {
return {
content: [{ type: "text", text: "No source image available." }],
isError: true as const,
};
}
// Resolve imageFormat → explicit width/height for i2i/edit/image2video
// so that normalizeInputBuffer and render_target use the correct aspect ratio.
{
const fmt = (input as any)?.imageFormat as string | undefined;
const hasW = typeof (input as any)?.width === "number" && Number.isFinite((input as any).width);
const hasH = typeof (input as any)?.height === "number" && Number.isFinite((input as any).height);
if (fmt && !hasW && !hasH) {
const formatDims: Record<string, { w: number; h: number }> = {
square: { w: 1024, h: 1024 },
landscape: { w: 1024, h: 768 },
portrait: { w: 768, h: 1024 },
"16:9": { w: 1024, h: 576 },
};
const dims = formatDims[fmt];
if (dims) {
(input as any).width = dims.w;
(input as any).height = dims.h;
log(`[i2i/edit] resolved imageFormat "${fmt}" → ${dims.w}x${dims.h}`);
}
}
}
// UNIFIED: Use normalizeInputBuffer for all i2i/edit input preprocessing
try {
if (resolvedName === "drawthings") {
const userOutW = (input as any)?.width;
const userOutH = (input as any)?.height;
const normalized = await normalizeInputBuffer(srcBuf, {
requestedRawW:
typeof userOutW === "number" &&
Number.isFinite(userOutW) &&
userOutW > 0
? userOutW
: undefined,
requestedRawH:
typeof userOutH === "number" &&
Number.isFinite(userOutH) &&
userOutH > 0
? userOutH
: undefined,
logPrefix: "[i2i]",
});
srcBuf = normalized.buf;
sourcePreprocess = normalized.preprocess;
if (typeof normalized.normalizedLongSide === "number") {
normalizedToLongSide = normalized.normalizedLongSide;
}
}
} catch (e) {
const errMsg = `i2i normalization error: ${String(e)}`;
log(errMsg);
return {
content: [
{
type: "text",
text: `Image2Image setup failed. Error: ${String(e)}`,
},
],
isError: true as const,
};
}
log(
`effective mode: ${mode === "image2video" ? "image2video" : "image2image"} (source=${sourceTag || "unknown"}${
sourceFileName ? ", file=" + sourceFileName : ""
})`
);
// Backend input must not accept internal knobs from user.
let serviceInputForI2I: any = stripInternalToolKeys(input as any);
// Only applies when we're actually running an i2i call.
if (mode === "image2video") {
serviceInputForI2I._dt_video_mode = "img2vid";
} else if (isEditMode) {
serviceInputForI2I._dt_i2i_profile = "edit";
} else {
serviceInputForI2I._dt_i2i_profile = "img2img";
}
// Derive requestedRaw + requestedEffective.
// - requestedRaw is the user request when provided; otherwise the ORIGINAL source dimensions.
// - requestedEffective is the aligned/clamped size we treat as the effective target.
// Backend internal processing size (i2i normalization) may still differ.
try {
if (resolvedName === "drawthings") {
const userW = (input as any)?.width;
const userH = (input as any)?.height;
const hasUserW =
typeof userW === "number" && Number.isFinite(userW);
const hasUserH =
typeof userH === "number" && Number.isFinite(userH);
{
const limits = isEditMode
? drawthingsEditLimits
: drawthingsLimits;
const align = limits.align;
const minDim = limits.minDim;
const maxW = limits.maxWidth;
const maxH = limits.maxHeight;
const clamp = (v: number, lo: number, hi: number) =>
Math.max(lo, Math.min(hi, v));
const roundTo = (v: number, step: number) =>
Math.round(v / step) * step;
const floorTo = (v: number, step: number) =>
Math.floor(v / step) * step;
const ceilTo = (v: number, step: number) =>
Math.ceil(v / step) * step;
const chooseEgalized = (origW: number, origH: number) => {
const aspect = origW / Math.max(1, origH);
const minAligned = Math.ceil(minDim / align) * align;
const maxAlignedW = Math.floor(maxW / align) * align;
const maxAlignedH = Math.floor(maxH / align) * align;
const candidates: Array<{ w: number; h: number }> = [];
const add = (w: number, h: number) => {
if (!Number.isFinite(w) || !Number.isFinite(h)) return;
w = Math.round(w);
h = Math.round(h);
if (w <= 0 || h <= 0) return;
if (w % align !== 0 || h % align !== 0) return;
if (w < minAligned || h < minAligned) return;
if (w > maxAlignedW || h > maxAlignedH) return;
candidates.push({ w, h });
};
// 1) Near-original multiples for W/H
const wFloor = clamp(floorTo(origW, align), align, maxAlignedW);
const wCeil = clamp(ceilTo(origW, align), align, maxAlignedW);
const hFloor = clamp(floorTo(origH, align), align, maxAlignedH);
const hCeil = clamp(ceilTo(origH, align), align, maxAlignedH);
// Try deriving H from W candidates (preserve aspect as best we can)
for (const w0 of [wFloor, wCeil, minAligned]) {
const h0 = roundTo(w0 / aspect, align);
add(w0, h0);
}
// Try deriving W from H candidates
for (const h0 of [hFloor, hCeil, minAligned]) {
const w0 = roundTo(h0 * aspect, align);
add(w0, h0);
}
// Small neighborhood search around rounded H to capture exact-aspect pairs
// (e.g., 300x200 -> 384x256 preserves 1.5 exactly).
for (const hBase of [hFloor, hCeil, minAligned]) {
for (const dh of [-2, -1, 0, 1, 2]) {
const h0 = hBase + dh * align;
const w0 = roundTo(h0 * aspect, align);
add(w0, h0);
}
}
if (candidates.length === 0) {
// Hard fallback: clamp + align independently (aspect may drift)
const w0 = clamp(
roundTo(origW, align),
minAligned,
maxAlignedW
);
const h0 = clamp(
roundTo(origH, align),
minAligned,
maxAlignedH
);
return { w: w0, h: h0 };
}
// Pick candidate minimizing aspect error, then size delta
candidates.sort((a, b) => {
const ae = Math.abs(a.w / Math.max(1, a.h) - aspect);
const be = Math.abs(b.w / Math.max(1, b.h) - aspect);
if (ae !== be) return ae - be;
const ad = Math.abs(a.w - origW) + Math.abs(a.h - origH);
const bd = Math.abs(b.w - origW) + Math.abs(b.h - origH);
return ad - bd;
});
return candidates[0];
};
// Prefer explicit user-provided OUT size when both provided.
// Otherwise derive OUT size from ORIGINAL (pre-normalization) source.
const origW0 = sourcePreprocess?.original?.width;
const origH0 = sourcePreprocess?.original?.height;
const origAspect =
typeof origW0 === "number" &&
Number.isFinite(origW0) &&
typeof origH0 === "number" &&
Number.isFinite(origH0) &&
origH0 > 0
? origW0 / origH0
: undefined;
// Fall back to current (normalized) buffer size only if original is missing.
const fallbackSz =
typeof origW0 === "number" && typeof origH0 === "number"
? null
: await imgGetSize(srcBuf);
let baseW: number | undefined;
let baseH: number | undefined;
if (hasUserW && hasUserH) {
baseW = Math.round(userW);
baseH = Math.round(userH);
} else if (!hasUserW && !hasUserH) {
baseW =
typeof origW0 === "number" && Number.isFinite(origW0)
? origW0
: fallbackSz?.width;
baseH =
typeof origH0 === "number" && Number.isFinite(origH0)
? origH0
: fallbackSz?.height;
} else if (hasUserW && !hasUserH) {
baseW = Math.round(userW);
if (typeof origAspect === "number") {
baseH = Math.max(1, Math.round(baseW / origAspect));
}
} else if (!hasUserW && hasUserH) {
baseH = Math.round(userH);
if (typeof origAspect === "number") {
baseW = Math.max(1, Math.round(baseH * origAspect));
}
}
if (typeof baseW === "number" && typeof baseH === "number") {
// requestedRaw: user request when present; else original source size.
if (hasUserW) requestedRawW = Math.max(1, Math.round(userW));
if (hasUserH) requestedRawH = Math.max(1, Math.round(userH));
if (!hasUserW && !hasUserH) {
requestedRawW = Math.max(1, Math.round(baseW));
requestedRawH = Math.max(1, Math.round(baseH));
} else if (hasUserW && !hasUserH) {
requestedRawW = Math.max(1, Math.round(baseW));
requestedRawH = Math.max(1, Math.round(baseH));
} else if (!hasUserW && hasUserH) {
requestedRawW = Math.max(1, Math.round(baseW));
requestedRawH = Math.max(1, Math.round(baseH));
}
// requestedEffective: if user provided BOTH dims, round each independently to align.
// Otherwise, preserve aspect as closely as possible.
if (hasUserW && hasUserH) {
const minAligned = Math.ceil(minDim / align) * align;
const maxAlignedW = Math.floor(maxW / align) * align;
const maxAlignedH = Math.floor(maxH / align) * align;
const wEff = clamp(
roundTo(baseW, align),
minAligned,
maxAlignedW
);
const hEff = clamp(
roundTo(baseH, align),
minAligned,
maxAlignedH
);
requestedEffectiveW = wEff;
requestedEffectiveH = hEff;
} else {
const eg = chooseEgalized(baseW, baseH);
requestedEffectiveW = eg.w;
requestedEffectiveH = eg.h;
}
log(
`[i2i/edit] requested: raw=${requestedRawW || "-"}x${
requestedRawH || "-"
} effective=${requestedEffectiveW || "-"}x${
requestedEffectiveH || "-"
} (user provided: ${hasUserW ? "w" : "-"}${
hasUserH ? "h" : "-"
})`
);
// CRITICAL: Pass requestedEffective dimensions to the backend!
// The backend must generate at requested_effective size, not at
// the normalized source (adjusted) size.
if (
typeof requestedEffectiveW === "number" &&
typeof requestedEffectiveH === "number"
) {
serviceInputForI2I.width = requestedEffectiveW;
serviceInputForI2I.height = requestedEffectiveH;
log(
`[i2i/edit] set serviceInputForI2I dimensions to effective: ${requestedEffectiveW}x${requestedEffectiveH}`
);
}
// Deterministic contract: always provide raw dims + upscaler decision for Draw Things i2i/edit.
const limits = isEditMode
? drawthingsEditLimits
: drawthingsLimits;
const limitsMaxW = limits.maxWidth;
const limitsMaxH = limits.maxHeight;
if (
typeof requestedRawW === "number" &&
typeof requestedRawH === "number"
) {
(serviceInputForI2I as any)._dt_requested_raw_w =
requestedRawW;
(serviceInputForI2I as any)._dt_requested_raw_h =
requestedRawH;
(serviceInputForI2I as any)._dt_needs_upscaler =
requestedRawW > limitsMaxW || requestedRawH > limitsMaxH;
} else {
throw new Error(
"Invariant failed: requested_raw dims missing for drawthings i2i/edit"
);
}
}
}
}
} catch (e) {
log(`[i2i] failed to derive width/height from source: ${String(e)}`);
}
// SAFETY NET: Ensure serviceInputForI2I dimensions ALWAYS respect backend limits.
// This catches edge cases where the main calculation block was skipped or failed.
{
const limits = isEditMode ? drawthingsEditLimits : drawthingsLimits;
const align = limits.align;
const minDim = limits.minDim;
const maxW = limits.maxWidth;
const maxH = limits.maxHeight;
const minAligned = Math.ceil(minDim / align) * align;
const maxAlignedW = Math.floor(maxW / align) * align;
const maxAlignedH = Math.floor(maxH / align) * align;
const clamp = (v: number, lo: number, hi: number) =>
Math.max(lo, Math.min(hi, v));
const roundTo = (v: number, step: number) =>
Math.round(v / step) * step;
const inW = serviceInputForI2I.width;
const inH = serviceInputForI2I.height;
if (typeof inW === "number" && Number.isFinite(inW)) {
const sanitized = clamp(
roundTo(inW, align),
minAligned,
maxAlignedW
);
if (sanitized !== inW) {
log(`[i2i] SAFETY: sanitized width ${inW} → ${sanitized}`);
serviceInputForI2I.width = sanitized;
}
}
if (typeof inH === "number" && Number.isFinite(inH)) {
const sanitized = clamp(
roundTo(inH, align),
minAligned,
maxAlignedH
);
if (sanitized !== inH) {
log(`[i2i] SAFETY: sanitized height ${inH} → ${sanitized}`);
serviceInputForI2I.height = sanitized;
}
}
}
// Edit mode requires gRPC backend (HTTP does not support edit mode at all)
if (isEditMode && typeof svc.generateImageEdit !== "function") {
const selectedTransport = (globalThis as any)
?.__DT_SELECTED_TRANSPORT__ as "grpc" | "http" | null | undefined;
// If no backend is connected at all, prefer the generic backend error.
// Otherwise the message is misleading (it implies HTTP is active).
if (!selectedTransport) {
log(
`[edit] ERROR: edit mode requested but no Draw Things backend is connected`
);
return {
content: [
{
type: "text",
text: "Failed to generate image: backend error",
},
],
isError: true as const,
};
}
if (selectedTransport === "http") {
log(
`[edit] ERROR: HTTP backend does not support edit mode (generateImageEdit not available)`
);
return {
content: [
{
type: "text",
text: `Edit mode is not supported via HTTP. Edit mode requires the Draw Things gRPC backend. Use mode='image2image' instead, or switch to gRPC.`,
},
],
isError: true as const,
};
}
// Defensive fallback: transport says gRPC but method is missing.
log(
`[edit] ERROR: gRPC transport selected but edit mode is unavailable (generateImageEdit missing)`
);
return {
content: [
{ type: "text", text: "Failed to generate image: backend error" },
],
isError: true as const,
};
}
// Multi-reference edit/image2image mode: collect additional buffers and call generateImageEdit
if (isMultiReference && typeof svc.generateImageEdit === "function") {
log(`[${mode}] resolving multi-reference sources...`);
// ─────────────────────────────────────────────────────────────────
// PHASE 2: No auto-fill. Only explicitly selected sources are used.
// ─────────────────────────────────────────────────────────────────
// Get model capabilities for limit checking
// Use edit or image2image limits based on mode
const capKey = getCapabilityKeyForPreset(effectiveModelPreset);
const imageCaps = capKey
? detectImageModelCapabilities(capKey)
: null;
const maxRefs =
mode === "edit"
? (imageCaps?.edit?.maxReferenceImages ?? 4)
: (imageCaps?.image2image?.maxReferenceImages ?? 1);
log(
`[${mode}] model=${effectiveModelPreset}, maxReferenceImages=${maxRefs}`
);
// Get available attachments/variants/pictures count (for existence validation)
const chatWd = getChatWdForContext();
let availableAttachmentCount = 0;
let availableVariantCount = 0;
let availablePictureCount = 0;
if (chatWd) {
try {
const st = await readState(chatWd);
availableAttachmentCount = Array.isArray(st.attachments)
? st.attachments.length
: 0;
availableVariantCount = Array.isArray(st.variants)
? st.variants.length
: 0;
availablePictureCount = Array.isArray((st as any).pictures)
? (st as any).pictures.length
: 0;
log(
`[${mode}] available: ${availableAttachmentCount} attachments, ${availableVariantCount} variants, ${availablePictureCount} pictures`
);
} catch (e) {
log(`[${mode}] failed to read state: ${String(e)}`);
}
}
// Use resolved canvas + moodboard (no legacy sourceAttachment/sourceVariant)
const canvasSel = resolvedCanvasSel;
const moodboardSel = resolvedMoodboardSel;
// ─────────────────────────────────────────────────────────────────
// LIMIT VALIDATION: Check total references against model capabilities
// ─────────────────────────────────────────────────────────────────
const totalRequested =
(canvasSel ? 1 : 0) + (moodboardSel?.length || 0);
log(
`[${mode}] total references requested: ${totalRequested}, limit: ${maxRefs}`
);
if (totalRequested > maxRefs) {
// Build a detailed error message
const details = [
canvasSel ? `canvas=${canvasSel.notation}` : "no canvas",
moodboardSel && moodboardSel.length > 0
? `moodboard=[${moodboardSel.map((s) => s.notation).join(",")}]`
: "",
]
.filter((s) => s)
.join(", ");
return {
content: [
{
type: "text",
text:
`Model '${effectiveModelPreset}' supports max ${maxRefs} reference images in ${mode} mode.\n` +
`Requested: ${totalRequested} (${details}).\n\n` +
`Please make an explicit selection:\n` +
`- Use 'canvas' to specify the priority image (e.g., canvas="a1" or canvas="v2" or canvas="p3")\n` +
`- Use 'moodboard' to add reference images (e.g., moodboard=["a2","v1","p4"])`,
},
],
isError: true as const,
};
}
// ─────────────────────────────────────────────────────────────────
const referenceBuffers: Buffer[] = [];
const referenceMetadata: Array<{
type: "attachment" | "variant" | "picture";
index: number;
isCanvas: boolean;
originPath?: string;
originalName?: string;
}> = [];
const referencePreprocess: Array<{
type: "attachment" | "variant" | "picture";
index: number;
role: "canvas" | "moodboard";
originPath?: string;
originalName?: string;
preprocess: {
original: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
adjusted: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
reason: NormalizeInputReason;
};
}> = [];
// Get user-requested output dimensions for capping input size
const userOutW = (input as any)?.width;
const userOutH = (input as any)?.height;
const editRequestedRawW =
typeof userOutW === "number" &&
Number.isFinite(userOutW) &&
userOutW > 0
? userOutW
: undefined;
const editRequestedRawH =
typeof userOutH === "number" &&
Number.isFinite(userOutH) &&
userOutH > 0
? userOutH
: undefined;
const pushReference = async (sel: any, isCanvas: boolean) => {
const loaded = await loadBufferForSel(sel);
// UNIFIED: Use normalizeInputBuffer for all edit/image2image multi-ref inputs
const normalized = await normalizeInputBuffer(loaded.buf, {
requestedRawW: editRequestedRawW,
requestedRawH: editRequestedRawH,
logPrefix: `[${mode}:${isCanvas ? "canvas" : "moodboard"}]`,
});
referenceBuffers.push(normalized.buf);
referenceMetadata.push({
type: sel.pool,
index: sel.index,
isCanvas,
originPath: loaded.originPath,
originalName: loaded.originalName,
});
referencePreprocess.push({
type: sel.pool,
index: sel.index,
role: isCanvas ? "canvas" : "moodboard",
originPath: loaded.originPath,
originalName: loaded.originalName,
preprocess: normalized.preprocess,
});
};
// 1. Resolve Canvas first
if (canvasSel) {
try {
await pushReference(canvasSel, true);
log(`[${mode}] canvas resolved: ${canvasSel.notation}`);
} catch (e) {
return {
content: [
{
type: "text",
text: `Canvas ${canvasSel.notation} not found: ${String(
(e as any)?.message || e
)}`,
},
],
isError: true as const,
};
}
}
// 2. Resolve Moodboard selections
for (const sel of moodboardSel || []) {
try {
await pushReference(sel, false);
log(`[${mode}] moodboard resolved: ${sel.notation}`);
} catch (e) {
return {
content: [
{
type: "text",
text: `Moodboard ${sel.notation} not found: ${String(
(e as any)?.message || e
)}`,
},
],
isError: true as const,
};
}
}
// If no canvas was explicitly or auto-selected but we have references, use first as canvas
if (!resolvedCanvasSel && referenceBuffers.length > 0) {
referenceMetadata[0].isCanvas = true;
log(
`[${mode}] auto-selected first reference as canvas: ${referenceMetadata[0].type} ${referenceMetadata[0].index}`
);
}
// Fallback: if still no references, use the already-resolved srcBuf
if (referenceBuffers.length === 0 && srcBuf) {
// UNIFIED: Use normalizeInputBuffer
const normalized = await normalizeInputBuffer(srcBuf, {
requestedRawW: editRequestedRawW,
requestedRawH: editRequestedRawH,
logPrefix: `[${mode}:fallback-canvas]`,
});
referenceBuffers.push(normalized.buf);
referenceMetadata.push({
type: sourceKind || "variant",
index: sourceVariantUsed || 1,
isCanvas: true,
});
referencePreprocess.push({
type: sourceKind || "variant",
index: sourceVariantUsed || 1,
role: "canvas",
preprocess: normalized.preprocess,
});
log(`[${mode}] fallback: using single source as canvas`);
}
// Copy metadata for summary (outside this block)
usedReferenceMeta = [...referenceMetadata];
usedReferencePreprocess = [...referencePreprocess];
log(
`[${mode}] calling generateImageEdit with ${referenceBuffers.length} references`
);
result = await svc.generateImageEdit(
serviceInputForI2I,
referenceBuffers,
onProgress
);
// Update sourceTag for audit
sourceTag = `${mode}:refs=${referenceBuffers.length}`;
} else {
// Single-reference path (original behavior)
// Populate usedReferenceMeta for consistency in summary
if (srcBuf && (sourceKind || effectiveMode === "image2image")) {
usedReferenceMeta = [
{
type: sourceKind || "variant",
index: sourceVariantUsed || 1,
isCanvas: true, // Single reference is always canvas
},
];
}
result = await svc.generateImageImg2Img(
serviceInputForI2I,
srcBuf as Buffer,
onProgress
);
// Reconstruct render_target from service metadata if not already set
// (fallback for edge cases where core logic was bypassed)
if (
(typeof requestedRawW !== "number" ||
typeof requestedRawH !== "number") &&
result?.metadata?.requested_dimensions
) {
const reqDims = result.metadata.requested_dimensions;
if (
typeof reqDims.width === "number" &&
typeof reqDims.height === "number"
) {
requestedRawW = reqDims.width;
requestedRawH = reqDims.height;
requestedEffectiveW = reqDims.width;
requestedEffectiveH = reqDims.height;
log(
`[i2i/edit] reconstructed render_target from service: ${requestedRawW}x${requestedRawH}`
);
}
}
}
}
}
if ((result as any).isError || (result as any).error) {
const statusRaw = (result as any).status as unknown;
let statusNum: number | undefined = undefined;
if (typeof statusRaw === "number" && Number.isFinite(statusRaw))
statusNum = statusRaw;
else if (typeof statusRaw === "string") {
const p = parseInt(statusRaw, 10);
if (Number.isFinite(p)) statusNum = p;
}
const raw =
(result as any).errorMessage ||
(result as any).error ||
"unknown error";
const codeText =
typeof statusNum === "number" ? `status ${statusNum}` : "backend error";
await logError(new Error(`Failed to generate image: ${codeText}`));
await appendErrorRaw(
typeof raw === "string" ? raw : String(raw),
statusNum
);
const snippet = (() => {
try {
const s = String(raw);
return s.length > 500 ? s.slice(0, 500) + "…" : s;
} catch {
return "";
}
})();
return {
content: [
{ type: "text", text: `Failed to generate image: ${codeText}` },
...(snippet
? ([{ type: "text", text: `Details: ${snippet}` }] as any[])
: []),
],
isError: true as const,
};
}
let buffers: Buffer[] = [];
if (
(result as any).images &&
Array.isArray((result as any).images) &&
(result as any).images.length > 0
) {
for (const img of (result as any).images) {
if (typeof img === "string") {
const b64 = img.startsWith("data:") ? img.split(",")[1] : img;
buffers.push(Buffer.from(b64, "base64"));
}
}
} else if (
(result as any).imageBuffer &&
Buffer.isBuffer((result as any).imageBuffer)
) {
buffers.push((result as any).imageBuffer as Buffer);
} else if ((result as any).imageData) {
const data = (result as any).imageData;
if (Buffer.isBuffer(data)) buffers.push(data);
else if (typeof data === "string") {
const b64 = data.startsWith("data:") ? data.split(",")[1] : data;
buffers.push(Buffer.from(b64, "base64"));
}
} else if ((result as any).imagePath) {
try {
const abs = path.resolve((result as any).imagePath);
buffers.push(await fs.promises.readFile(abs));
} catch (e) {
log(
`Failed to read returned imagePath: ${
(result as any).imagePath
}: ${String(e)}`
);
}
}
if (buffers.length === 0) throw new Error("No valid image data returned");
let backendReturnedW: number | undefined;
let backendReturnedH: number | undefined;
try {
if (buffers[0]) {
const meta0 = await imgGetSize(buffers[0]);
backendReturnedW = meta0.width;
backendReturnedH = meta0.height;
}
} catch {}
const userReqW = (() => {
const v = (input as any)?.width;
return typeof v === "number" && Number.isFinite(v)
? Math.max(1, Math.round(v))
: undefined;
})();
const userReqH = (() => {
const v = (input as any)?.height;
return typeof v === "number" && Number.isFinite(v)
? Math.max(1, Math.round(v))
: undefined;
})();
if (
(imageService as any)?.name === "drawthings" &&
(effectiveMode === "image2image" || effectiveMode === "edit") &&
sourcePreprocess &&
(sourcePreprocess.reason === "normalized_to_constraints" ||
sourcePreprocess.reason === "clamped_to_requested_raw") &&
sourcePreprocess.original?.width &&
sourcePreprocess.original?.height &&
sourcePreprocess.adjusted?.width &&
sourcePreprocess.adjusted?.height &&
(sourcePreprocess.original.width !== sourcePreprocess.adjusted.width ||
sourcePreprocess.original.height !== sourcePreprocess.adjusted.height)
) {
try {
let targetW =
typeof requestedEffectiveW === "number" &&
Number.isFinite(requestedEffectiveW)
? Math.max(1, Math.round(requestedEffectiveW))
: Math.max(1, Math.round(sourcePreprocess.original.width!));
let targetH =
typeof requestedEffectiveH === "number" &&
Number.isFinite(requestedEffectiveH)
? Math.max(1, Math.round(requestedEffectiveH))
: Math.max(1, Math.round(sourcePreprocess.original.height!));
try {
const limits =
effectiveMode === "edit" ? drawthingsEditLimits : drawthingsLimits;
const maxW = limits.maxWidth;
const maxH = limits.maxHeight;
const s = Math.min(maxW / targetW, maxH / targetH, 1);
if (s < 1) {
targetW = Math.max(1, Math.round(targetW * s));
targetH = Math.max(1, Math.round(targetH * s));
}
} catch {}
const resizedBuffers: Buffer[] = [];
for (const buf of buffers) {
const r = await imgResizeCoverToPng(buf, targetW, targetH);
resizedBuffers.push(r);
}
buffers = resizedBuffers;
const restoreTargetLabel =
typeof requestedEffectiveW === "number" &&
typeof requestedEffectiveH === "number"
? "requested effective size"
: "original source size";
log(
`postprocess: restored generated image(s) to ${restoreTargetLabel} ${targetW}x${targetH} (from normalized ${sourcePreprocess.adjusted.width}x${sourcePreprocess.adjusted.height})`
);
} catch (e) {
log(`postprocess restore-to-original-size failed: ${String(e)}`);
}
}
// Final step: if we have a raw size target (either explicit user size, or derived from the
// attached source when user omitted width/height), resize output back to that exact size.
// Only absent when user provided no size AND there is no attached source (pure defaults).
try {
const finalRawW =
typeof requestedRawW === "number" && Number.isFinite(requestedRawW)
? Math.max(1, Math.round(requestedRawW))
: undefined;
const finalRawH =
typeof requestedRawH === "number" && Number.isFinite(requestedRawH)
? Math.max(1, Math.round(requestedRawH))
: undefined;
if (finalRawW && finalRawH) {
const metaCur = buffers[0] ? await imgGetSize(buffers[0]) : null;
const curW = metaCur?.width;
const curH = metaCur?.height;
if (curW !== finalRawW || curH !== finalRawH) {
const resizedFinal: Buffer[] = [];
for (const buf of buffers) {
const r = await imgResizeCoverToPng(buf, finalRawW, finalRawH);
resizedFinal.push(r);
}
buffers = resizedFinal;
log(
`postprocess: adjusted final generated image(s) to requested raw size ${finalRawW}x${finalRawH}`
);
}
}
} catch (e) {
log(`postprocess final-resize-to-user-request failed: ${String(e)}`);
}
let postProcessedW: number | undefined;
let postProcessedH: number | undefined;
try {
if (buffers[0]) {
const metaF = await imgGetSize(buffers[0]);
postProcessedW = metaF.width;
postProcessedH = metaF.height;
}
} catch {}
const saveOriginal = generateRuntimeDefaults.saveOriginal;
const envPreviewRaw = process.env.PREVIEW_IN_CHAT;
const previewInChat =
envPreviewRaw != null
? /^(1|true|yes)$/i.test(String(envPreviewRaw).trim())
: true;
log(
`preview toggle: PREVIEW_IN_CHAT='${envPreviewRaw}' -> ${previewInChat}`
);
const promptStr = typeof input.prompt === "string" ? input.prompt : "";
const alt = promptStr.trim()
? `Generated image: ${promptStr.slice(0, 80)}`
: "Generated image";
const savedFiles: Array<{
savedPath: string;
fileUrl: string;
size: number;
fileName: string;
}> = [];
// Primary storage: write directly into the active LM Studio chat working directory.
// Fail-fast if chatId could not be resolved.
const primaryOutDir: string | undefined =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : undefined);
if (!primaryOutDir) {
throw new Error(
"Failed to resolve LM Studio chat working directory (chatId missing)."
);
}
await fs.promises.mkdir(primaryOutDir, { recursive: true }).catch(() => {});
// Read current state to get nextVariantV for FORTLAUFENDE (continuous) variant numbering
// This ensures v1, v2, v3... across multiple generation runs (not resetting to v1 each time)
const currentState = await readState(primaryOutDir);
const baseVariantV = Math.max(1, currentState.counters.nextVariantV ?? 1);
log(
`variant numbering: starting at v${baseVariantV} (nextVariantV from state)`
);
const baseStamp = isoStampCompact();
const variantRecordsForState: Array<{
filename: string;
preview: string;
v: number;
sourceTool?: string;
}> = [];
// ── VIDEO PATH ────────────────────────────────────────────────────────
const numFramesMeta = (result as any)?.metadata?.num_frames;
// Require at least 3 buffers: [discarded-first] + [≥1 real frame] + [discarded-last].
// Without this guard, slice(1, buffers.length - 1) returns an empty array when the
// gRPC response only carried a preview/fallback buffer despite num_frames > 1.
// Mode guard: some official defaults carry wrong numFrames > 1 for non-video models.
const isVideoResult =
(effectiveMode === "text2video" || effectiveMode === "image2video") &&
typeof numFramesMeta === "number" && numFramesMeta > 1 && buffers.length >= 3;
let videoFrames: Buffer[] = [];
let videoPngSaved: (typeof savedFiles)[0] | null = null;
if (isVideoResult) {
const videoFps =
typeof (result as any)?.metadata?.fps === "number"
? (result as any).metadata.fps
: 24;
// Trim: discard first frame (confirmed) and last frame (pending verification).
// Roadmap trim rule: slice(1, buffers.length - 1) == slice(1, num_frames + 1)
videoFrames = buffers.slice(1, buffers.length - 1);
const videoVariantV = baseVariantV;
const videoBaseName = `generated-image-${baseStamp}-v${videoVariantV}`;
const lastFrame = videoFrames[videoFrames.length - 1];
// PNG: canonical for state / VP / i2i / lastOriginalRef
videoPngSaved = await saveOriginalPng(lastFrame, primaryOutDir, `${videoBaseName}.png`);
try {
log(`saved original (video last-frame): ${videoPngSaved.savedPath} (${videoPngSaved.size} bytes) [v${videoVariantV}]`);
} catch {}
variantRecordsForState.push({
filename: `${videoBaseName}.png`,
preview: `preview-${videoBaseName}.jpg`,
v: videoVariantV,
sourceTool: `${getSelfPluginIdentifier()}/generate_image`,
});
// MOV: goes into savedFiles so originalLinksText shows .mov. On failure, falls back to PNG.
try {
onProgress?.(-1, undefined, "Assembling video...");
const { assembleVideo } = await import("../helpers/videoAssembler.js");
const audioChunks = (result as any)?.audioBuffers as Buffer[] | undefined;
const audioRaw = audioChunks && audioChunks.length > 0
? Buffer.concat(audioChunks)
: undefined;
const audioSampleRateRaw = getAudioSampleRateForModel(effectiveModelFilename);
if (audioSampleRateRaw === undefined) {
log(`[video] ERROR: no audioSampleRate registered for model '${effectiveModelFilename}' — falling back to 48 000 Hz`);
}
const audioSampleRate = audioSampleRateRaw ?? 48_000;
const movBuffer = await assembleVideo(videoFrames, videoFps, audioRaw, audioSampleRate);
const movFileName = `${videoBaseName}.mov`;
const movPath = path.join(primaryOutDir, movFileName);
await fs.promises.writeFile(movPath, movBuffer);
const movUrl = encodeFileUrl(movPath);
savedFiles.push({ savedPath: movPath, fileUrl: movUrl, size: movBuffer.length, fileName: movFileName });
log(`saved video: ${movPath} (${movBuffer.length} bytes) [v${videoVariantV}]`);
} catch (e: any) {
const msg = e && e.message ? String(e.message) : String(e);
log(`video assembly failed (v${videoVariantV}): ${msg}`);
savedFiles.push(videoPngSaved);
}
}
// ── END VIDEO PATH ────────────────────────────────────────────────────
if (!isVideoResult) {
for (let i = 0; i < buffers.length; i++) {
const buf = buffers[i];
const variantV = baseVariantV + i; // Fortlaufende Nummerierung
const baseName = `generated-image-${baseStamp}-v${variantV}`;
const s = await saveOriginalPng(buf, primaryOutDir, `${baseName}.png`);
try {
log(`saved original: ${s.savedPath} (${s.size} bytes) [v${variantV}]`);
} catch {}
savedFiles.push(s);
// Track for state update later
variantRecordsForState.push({
filename: `${baseName}.png`,
preview: `preview-${baseName}.jpg`,
v: variantV,
sourceTool: `${getSelfPluginIdentifier()}/generate_image`,
});
}
}
const firstSaved = savedFiles[0];
// Video: lastOriginalRef must point to PNG (not MOV) so follow-up i2i/canvas loads work.
lastOriginalRef =
isVideoResult && videoPngSaved
? { path: videoPngSaved.savedPath, url: videoPngSaved.fileUrl }
: { path: firstSaved.savedPath, url: firstSaved.fileUrl };
// Policy: generate JPEG previews only (unified for attachments + variants)
// Use PreviewSpec from VARIANT_FULL_CONFIG with central generatePreviewFromBuffer()
// VARIANT_FULL_CONFIG.preview uses maxSum: 1536 for proper sizing
const variantPreviewSpec = VARIANT_FULL_CONFIG.preview;
const previews: any[] = [];
if (isVideoResult) {
// Generate one JPEG preview from the last trimmed video frame
const previewFrame = videoFrames[videoFrames.length - 1];
const videoVariantV = baseVariantV;
const videoBaseName = `generated-image-${baseStamp}-v${videoVariantV}`;
try {
const p = await generatePreviewFromBuffer(
previewFrame,
primaryOutDir,
videoPngSaved!.fileName,
variantPreviewSpec,
{ customFilename: `preview-${videoBaseName}.jpg` }
);
const previewFilePath = p.previewAbs;
const previewFileUrl = encodeFileUrl(previewFilePath);
previews.push({
ok: true as const,
filePath: previewFilePath,
fileName: p.previewFilename,
fileUrl: previewFileUrl,
size_bytes: p.data.length,
width: p.width,
height: p.height,
mimeType: "image/jpeg" as const,
format: variantPreviewSpec.format,
dataBase64: p.data.toString("base64"),
});
log(
`video preview saved (v${videoVariantV}): ${previewFilePath} ${p.width}x${p.height} ${p.data.length} bytes ok=true`
);
} catch (e: any) {
const msg = e && e.message ? String(e.message) : String(e);
log(
`video preview build failed (v${videoVariantV}): ${msg} spec=${JSON.stringify(variantPreviewSpec)}`
);
}
}
if (!isVideoResult) {
for (let i = 0; i < buffers.length; i++) {
const buf = buffers[i];
const variantV = baseVariantV + i; // Use fortlaufende v-Nummer for logging
try {
if (!isSupportedImageBuffer(buf)) {
try {
const magic = Buffer.from(buf.slice(0, 12) || []).toString("hex");
log(
`preview skip: unsupported buffer signature (v${variantV}) magic=${magic}`
);
} catch {}
continue;
}
// Use central generatePreviewFromBuffer() with correct maxSum/maxWidth constraints
const p = await generatePreviewFromBuffer(
buf,
primaryOutDir,
savedFiles[i].fileName,
variantPreviewSpec
);
const previewFilePath = p.previewAbs;
const previewFileUrl = encodeFileUrl(previewFilePath);
previews.push({
ok: true as const,
filePath: previewFilePath,
fileName: p.previewFilename,
fileUrl: previewFileUrl,
size_bytes: p.data.length,
width: p.width,
height: p.height,
mimeType: "image/jpeg" as const,
format: variantPreviewSpec.format,
dataBase64: p.data.toString("base64"),
});
log(
`preview saved (v${variantV}): ${previewFilePath} ${p.width}x${p.height} ${p.data.length} bytes ok=true`
);
} catch (e: any) {
const msg = e && e.message ? String(e.message) : String(e);
log(
`preview build failed (v${variantV}): ${msg} spec=${JSON.stringify(variantPreviewSpec)}`
);
}
}
}
if (previews.length === 0) {
log(`previews built: count=0 (no preview created)`);
} else {
log(`previews built: count=${previews.length}`);
}
if (previews.length > 0) {
const firstPreview = previews[0];
lastPreviewRef = {
path: firstPreview.filePath,
url: firstPreview.fileUrl,
mimeType: firstPreview.mimeType,
width: firstPreview.width,
height: firstPreview.height,
};
// Track per-chat last variants and clear pending sentinel after any generation
try {
if (currentLmChatId) {
// Store with v-values for proper lookup in getAllVariantsForContext
LAST_VARIANTS_BY_LM_CHAT[currentLmChatId] =
variantRecordsForState.map((vr, idx) => ({
v: vr.v,
path: savedFiles[idx].savedPath,
}));
}
} catch {}
// Update chat_media_state.json with new variants (append, rolling window in orchestrator)
// This ensures the State has the correct v-numbers and nextVariantV is incremented
try {
const { appendVariants } = await import(
"../core-bundle.mjs"
);
const stateForUpdate = await readState(primaryOutDir);
const appendResult = appendVariants(
stateForUpdate,
variantRecordsForState
);
if (appendResult.changed) {
await writeStateAtomic(primaryOutDir, stateForUpdate);
log(
`state updated: appended ${variantRecordsForState.length} variants, nextVariantV=${stateForUpdate.counters.nextVariantV}`
);
}
} catch (e) {
log(`state update failed (non-fatal): ${String((e as Error).message)}`);
}
}
const explicit: Record<string, unknown> = {};
for (const k of ALLOWED_GEN_INPUT_KEYS) {
if (
Object.prototype.hasOwnProperty.call(input, k) &&
(input as any)[k] !== undefined
) {
explicit[k] = (input as any)[k];
}
}
const inferenceMs = (result as any)?.metadata?.inference_time_ms;
const meta = ((result as any)?.metadata || {}) as any;
const effWidth: number | undefined =
typeof postProcessedW === "number"
? Math.round(postProcessedW)
: typeof meta.width === "number"
? Math.round(meta.width)
: undefined;
const effHeight: number | undefined =
typeof postProcessedH === "number"
? Math.round(postProcessedH)
: typeof meta.height === "number"
? Math.round(meta.height)
: undefined;
const imgFmtRaw: string | undefined =
typeof meta.image_format === "string"
? String(meta.image_format)
: undefined;
const effQuality: string | undefined =
typeof meta.quality === "string" ? String(meta.quality) : undefined;
const effSteps: number | undefined =
typeof meta.steps === "number" ? Math.round(meta.steps) : undefined;
const summary = {
width: effWidth,
height: effHeight,
image_format: imgFmtRaw,
quality: effQuality,
...(typeof effSteps === "number" ? { steps: effSteps } : {}),
backend: resolvedName,
mode_effective: effectiveMode,
source: sourceTag || undefined,
...(typeof sourceVariantUsed === "number"
? { source_variant_used: sourceVariantUsed }
: {}),
...(typeof normalizedToLongSide === "number"
? { normalized_to_long_side: normalizedToLongSide }
: {}),
images_generated: isVideoResult ? 1 : buffers.length,
// Reference sources used (for edit mode / img2img)
...(usedReferenceMeta && usedReferenceMeta.length > 0
? {
references_used: (() => {
const moodboardCount = usedReferenceMeta.filter(
(r) => !r.isCanvas
).length;
const moodboardWeight =
moodboardCount > 0 ? 1.0 / moodboardCount : 0;
return usedReferenceMeta.map((r) => ({
source: r.type,
index: r.index,
role: r.isCanvas ? "canvas" : "moodboard",
...(r.originPath ? { source_originAbs: r.originPath } : {}),
...(r.originalName
? { source_originalName: r.originalName }
: {}),
...(!r.isCanvas && moodboardCount > 0
? { weight: moodboardWeight }
: {}),
}));
})(),
canvas_source: (() => {
const canvas = usedReferenceMeta.find((r) => r.isCanvas);
return canvas
? `${canvas.type === "attachment" ? "a" : "v"}${canvas.index}`
: null;
})(),
}
: {}),
files: {
original: firstSaved.fileUrl,
previews: previews.map((p: any) => p.fileUrl),
},
...(typeof inferenceMs === "number"
? { inference_time_ms: inferenceMs }
: {}),
};
// Backfill originalName for audit wherever we have originAbs.
// Rationale: originAbs is the stable attachment identity; originalName may be missing
// in some resolver paths unless explicitly persisted in chat_media_state.json.
try {
const chatWdForAudit =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : null);
if (chatWdForAudit) {
const st: any = await readState(chatWdForAudit);
const attachments: any[] = Array.isArray(st?.attachments)
? st.attachments
: [];
const originalNameByOriginAbs = new Map<string, string>();
const originalNameByFilename = new Map<string, string>();
for (const a of attachments) {
if (!a || typeof a !== "object") continue;
const oa =
typeof a.originAbs === "string" && a.originAbs.trim()
? String(a.originAbs)
: null;
const fn =
typeof a.filename === "string" && a.filename.trim()
? String(a.filename)
: typeof a.origin === "string" && a.origin.trim()
? String(a.origin)
: null;
const on =
typeof a.originalName === "string" && a.originalName.trim()
? String(a.originalName)
: null;
if (oa && on) originalNameByOriginAbs.set(oa, on);
if (fn && on) originalNameByFilename.set(fn, on);
}
if (
sourceKind === "attachment" &&
sourceOriginAbs &&
!sourceOriginalName
) {
sourceOriginalName =
originalNameByOriginAbs.get(sourceOriginAbs) ||
originalNameByFilename.get(path.basename(sourceOriginAbs));
}
if (Array.isArray(usedReferenceMeta) && usedReferenceMeta.length > 0) {
usedReferenceMeta = usedReferenceMeta.map((r) => {
if (
r &&
r.type === "attachment" &&
r.originPath &&
typeof r.originPath === "string" &&
!r.originalName
) {
const filled =
originalNameByOriginAbs.get(r.originPath) ||
originalNameByFilename.get(path.basename(r.originPath));
return filled ? { ...r, originalName: filled } : r;
}
return r;
});
}
}
} catch {}
const httpBase = await getHealthyServerBaseUrl();
const httpOriginals = savedFiles.map((s) =>
httpBase
? toHttpOriginalUrl(s.fileName, httpBase, currentLmChatId || undefined)
: ""
);
// Build preview URLs pointing to preview-* files in chat working directory
const httpPreviews = savedFiles.map((_s, i) => {
if (!httpBase || !currentLmChatId) return "";
const previewFileName = previews[i]?.fileName;
if (!previewFileName) return "";
return toHttpPreviewUrl(previewFileName, httpBase, currentLmChatId);
});
try {
const audit = buildAuditLogger({
backend: resolvedName,
mode: effectiveMode as any,
});
// Metadata
if (currentLmChatId) audit.setChatId(currentLmChatId);
// === USER REQUEST (what the user sent) ===
const userRequest: Record<string, any> = {};
if ((input as any)?.prompt) userRequest.prompt = (input as any).prompt;
if ((input as any)?.mode) userRequest.mode = (input as any).mode;
if ((input as any)?.canvas) userRequest.canvas = (input as any).canvas;
if ((input as any)?.moodboard)
userRequest.moodboard = (input as any).moodboard;
if ((input as any)?.model) userRequest.model = (input as any).model;
if ((input as any)?.width) userRequest.width = (input as any).width;
if ((input as any)?.height) userRequest.height = (input as any).height;
if (typeof (input as any)?.seed === "number") {
userRequest.seed = (input as any).seed;
}
if (Object.prototype.hasOwnProperty.call(input as any, "seed_mode")) {
userRequest.seed_mode = (input as any).seed_mode;
} else if (Object.prototype.hasOwnProperty.call(input as any, "seedMode")) {
userRequest.seed_mode = (input as any).seedMode;
}
if ((input as any)?.imageFormat)
userRequest.imageFormat = (input as any).imageFormat;
if ((input as any)?.quality) userRequest.quality = (input as any).quality;
if ((input as any)?.variants)
userRequest.variants = (input as any).variants;
audit.setUserRequest(userRequest);
// === RENDER TARGET (Step 0+1) ===
const renderTarget: Record<string, any> = {};
if (
typeof requestedRawW === "number" ||
typeof requestedRawH === "number"
) {
renderTarget.requested_raw = {
...(typeof requestedRawW === "number"
? { width: requestedRawW }
: {}),
...(typeof requestedRawH === "number"
? { height: requestedRawH }
: {}),
};
}
if (
typeof requestedEffectiveW === "number" ||
typeof requestedEffectiveH === "number"
) {
renderTarget.requested_effective = {
...(typeof requestedEffectiveW === "number"
? { width: requestedEffectiveW }
: {}),
...(typeof requestedEffectiveH === "number"
? { height: requestedEffectiveH }
: {}),
};
}
// Upscaler decision
const limits =
effectiveMode === "edit" ? drawthingsEditLimits : drawthingsLimits;
const limitsMaxW = limits.maxWidth;
const limitsMaxH = limits.maxHeight;
if (
typeof requestedRawW === "number" &&
typeof requestedRawH === "number"
) {
renderTarget.needs_upscaler =
requestedRawW > limitsMaxW || requestedRawH > limitsMaxH;
}
if (Object.keys(renderTarget).length > 0) {
audit.setRenderTarget(renderTarget);
}
// === INPUTS (Step 2: Canvas + Moodboard) ===
const inputs: Record<string, any> = {};
// Canvas from sourcePreprocess (single-image i2i) or usedReferencePreprocess
if (sourcePreprocess && sourcePreprocess.original) {
inputs.canvas = {
notation: sourceTag || undefined,
source_type: sourceKind || undefined,
file_name: sourceFileName || undefined,
original_name: sourceOriginalName || undefined,
origin_path: sourceOriginAbs || undefined,
original: {
width: sourcePreprocess.original.width,
height: sourcePreprocess.original.height,
bytes: sourcePreprocess.original.bytes,
},
adjusted: sourcePreprocess.adjusted
? {
width: sourcePreprocess.adjusted.width,
height: sourcePreprocess.adjusted.height,
bytes: sourcePreprocess.adjusted.bytes,
}
: undefined,
};
}
// Multi-reference (edit mode): usedReferencePreprocess
if (usedReferencePreprocess && usedReferencePreprocess.length > 0) {
const canvasRef = usedReferencePreprocess.find(
(r) => r.role === "canvas"
);
const moodboardRefs = usedReferencePreprocess.filter(
(r) => r.role === "moodboard"
);
const moodboardCount = moodboardRefs.length;
if (canvasRef) {
const notation = `${
canvasRef.type === "attachment"
? "a"
: canvasRef.type === "variant"
? "v"
: "p"
}${canvasRef.index}`;
inputs.canvas = {
notation,
source_type: canvasRef.type,
original_name: canvasRef.originalName || undefined,
origin_path: canvasRef.originPath || undefined,
original: canvasRef.preprocess?.original,
adjusted: canvasRef.preprocess?.adjusted,
};
}
if (moodboardRefs.length > 0) {
inputs.moodboard = moodboardRefs.map((r) => {
const notation = `${
r.type === "attachment" ? "a" : r.type === "variant" ? "v" : "p"
}${r.index}`;
return {
notation,
source_type: r.type,
original_name: r.originalName || undefined,
origin_path: r.originPath || undefined,
original: r.preprocess?.original,
adjusted: r.preprocess?.adjusted,
weight: moodboardCount > 0 ? 1.0 / moodboardCount : undefined,
};
});
}
}
if (Object.keys(inputs).length > 0) {
audit.setInputs(inputs);
}
// === OUTPUT (Step 3+4) ===
const output: Record<string, any> = {};
// Backend returned dimensions
if (
typeof backendReturnedW === "number" &&
typeof backendReturnedH === "number"
) {
output.backend_returned = {
width: backendReturnedW,
height: backendReturnedH,
};
}
// Post-processed dimensions
if (typeof effWidth === "number" && typeof effHeight === "number") {
output.post_processed = { width: effWidth, height: effHeight };
}
// Inference time
if (typeof inferenceMs === "number") {
output.inference_time_ms = inferenceMs;
}
// Model used
if (typeof meta.model === "string" && meta.model.trim()) {
output.model_used = meta.model;
}
// Model origin + presets
// Read overlay info from service result metadata (authoritative source)
try {
const meta = (result as any)?.metadata || {};
// overlay_source and overlay_preset come from the service layer
if (meta.overlay_source) {
output.overlay_source = meta.overlay_source;
}
if (meta.overlay_preset) {
output.overlay_preset = meta.overlay_preset;
}
if (typeof meta.defaults_used === "string" && meta.defaults_used.trim()) {
output.defaults_used = meta.defaults_used;
}
if (
typeof meta.overlay_lookup_mode === "string" &&
meta.overlay_lookup_mode.trim()
) {
output.overlay_lookup_mode = meta.overlay_lookup_mode;
}
if (typeof meta.i2i_profile === "string" && meta.i2i_profile.trim()) {
output.i2i_profile = meta.i2i_profile;
}
if (typeof meta.strength_used === "number" && Number.isFinite(meta.strength_used)) {
output.strength_used = meta.strength_used;
}
if (typeof meta.steps_used === "number" && Number.isFinite(meta.steps_used)) {
output.steps_used = meta.steps_used;
}
if (typeof meta.sampler_used === "string" && meta.sampler_used.trim()) {
output.sampler_used = meta.sampler_used;
}
if (
typeof meta.guidance_scale_used === "number" &&
Number.isFinite(meta.guidance_scale_used)
) {
output.guidance_scale_used = meta.guidance_scale_used;
}
if (typeof meta.shift_used === "number" && Number.isFinite(meta.shift_used)) {
output.shift_used = meta.shift_used;
}
if (typeof meta.resolution_dependent_shift_used === "boolean") {
output.resolution_dependent_shift_used = meta.resolution_dependent_shift_used;
}
if (typeof meta.compression_artifacts_used === "string") {
output.compression_artifacts_used = meta.compression_artifacts_used;
}
if (typeof meta.compression_artifacts_quality_used === "number" && Number.isFinite(meta.compression_artifacts_quality_used)) {
output.compression_artifacts_quality_used = meta.compression_artifacts_quality_used;
}
// Model used (from service - the actual model that was sent to Draw Things)
if (typeof meta.model === "string" && meta.model.trim()) {
output.model_used = path.basename(meta.model);
}
// LoRAs used (from service - actual LoRA files validated and sent)
if (Array.isArray(meta.loras_used) && meta.loras_used.length > 0) {
output.loras_used = meta.loras_used.map((f: string) =>
path.basename(f)
);
}
// Seed used (from service - effective seed after defaults/overlays)
if (typeof meta.seed === "number" && Number.isFinite(meta.seed)) {
output.seed = meta.seed;
}
if (typeof meta.seed_mode === "string" && meta.seed_mode.trim()) {
output.seed_mode = meta.seed_mode;
}
if (typeof meta.seed_source === "string" && meta.seed_source.trim()) {
output.seed_source = meta.seed_source;
}
if (
typeof meta.seed_mode_source === "string" &&
meta.seed_mode_source.trim()
) {
output.seed_mode_source = meta.seed_mode_source;
}
} catch {}
// Prompt used (prefer backend's prompt_used, fallback to user input)
const metaPrompt = (result as any)?.metadata?.prompt_used;
const promptUsed =
typeof metaPrompt === "string" && metaPrompt.trim()
? metaPrompt
: typeof (input as any)?.prompt === "string"
? (input as any).prompt
: undefined;
if (promptUsed) {
output.prompt_used = promptUsed;
}
// Prompt origin
const backendOriginRaw = (result as any)?.metadata?.prompt_origin;
const userPromptInput =
typeof (input as any)?.prompt === "string"
? String((input as any).prompt).trim()
: "";
if (backendOriginRaw === "user" || userPromptInput) {
output.prompt_origin = "user";
} else {
output.prompt_origin = "default";
}
// Variants (saved files)
if (Array.isArray(savedFiles) && savedFiles.length > 0) {
output.variants = savedFiles.map((s, i) => {
const pv = previews[i];
const vMatch = /-v(\d+)\.(png|jpe?g|webp)$/i.exec(s?.fileName || "");
const v = vMatch ? parseInt(vMatch[1], 10) : i + 1;
return {
v,
path: s.savedPath,
url: s.fileUrl,
bytes: s.size,
...(httpOriginals[i]
? { http_url: httpOriginals[i] }
: {}),
...(pv
? { preview_path: pv.filePath, preview_url: pv.fileUrl }
: {}),
...(httpPreviews[i]
? { http_preview_url: httpPreviews[i] }
: {}),
};
});
}
audit.setOutput(output);
await audit.write();
} catch (e) {
log(`audit logging error: ${String(e)}`);
}
try {
const p = process.env.HTTP_SERVER_PORT;
log(
`[httpServer] generate_image: external server ${
httpBase ? "healthy" : "unavailable"
}${p ? ` (port=${p})` : ""}.`
);
} catch {}
const extractStableVariantV = (fileName: string): number | undefined => {
try {
const m = /-v(\d+)\.(png|jpe?g|webp|mov)$/i.exec(String(fileName || ""));
if (!m) return undefined;
const n = parseInt(m[1], 10);
return Number.isFinite(n) && n > 0 ? n : undefined;
} catch {
return undefined;
}
};
const variantLinksText =
savedFiles.length > 0
? savedFiles
.map((s, i) => {
const stableV =
extractStableVariantV(s.fileName) ||
extractStableVariantV(previews[i]?.fileName) ||
i + 1;
const httpPreviewUrl = httpPreviews[i];
const fallback = previews[i]?.fileUrl || s.fileUrl;
const url = httpPreviewUrl ? httpPreviewUrl : fallback;
return `Preview v${stableV}: ${url}`;
})
.join(" | ")
: "";
// Originals are saved directly to chat working directory.
const originalLinksText =
savedFiles.length > 0
? savedFiles
.map((s, i) => {
const stableV =
extractStableVariantV(s.fileName) ||
extractStableVariantV(previews[i]?.fileName) ||
i + 1;
const httpUrl = httpOriginals[i];
const url = httpUrl ? httpUrl : s.fileUrl;
return `Original v${stableV}: ${url}`;
})
.join(" | ")
: "";
const note =
typeof requestedVariants === "number" &&
requestedVariants !== usedVariants
? `Note: variants=${requestedVariants} was clamped to ${usedVariants}.`
: null;
try {
const invMs = (result as any)?.metadata?.inference_time_ms;
const wLog =
typeof effWidth === "number"
? effWidth
: typeof postProcessedW === "number"
? postProcessedW
: typeof meta.width === "number"
? Math.round(meta.width)
: undefined;
const hLog =
typeof effHeight === "number"
? effHeight
: typeof postProcessedH === "number"
? postProcessedH
: typeof meta.height === "number"
? Math.round(meta.height)
: undefined;
log(
`generation summary: backend=${resolvedName} mode=${effectiveMode} width=${
typeof wLog === "number" ? wLog : "-"
} height=${typeof hLog === "number" ? hLog : "-"} previews=${
previews.length
} variants=${buffers.length} inferenceMs=${invMs ?? "-"}`
);
} catch {}
const reviewHint = "Carefully examine the preview and comment on how well it matches your prompt. Do not assume it does.";
const { files: _files, ...summaryNoFilesBase } = summary as any;
const summaryNoFiles = await (async () => {
const modelUsedBasename =
typeof meta.model === "string" && meta.model.trim()
? path.basename(meta.model)
: undefined;
if (!modelUsedBasename) return summaryNoFilesBase;
let matchingPresets: any[] | undefined;
const overlaySourceFromMeta = meta.overlay_source as string | undefined;
const overlayPresetFromMeta = meta.overlay_preset as string | undefined;
if (overlayPresetFromMeta && overlaySourceFromMeta) {
const dotIdx = overlayPresetFromMeta.indexOf(".");
const presetMode = dotIdx >= 0 ? overlayPresetFromMeta.slice(0, dotIdx) : undefined;
const presetModelId = dotIdx >= 0 ? overlayPresetFromMeta.slice(dotIdx + 1) : undefined;
matchingPresets = [{
mode: presetMode,
modelId: presetModelId,
preset: overlayPresetFromMeta,
overlaySource: overlaySourceFromMeta,
...(overlaySourceFromMeta === "custom" ? { customConfig: overlayPresetFromMeta } : {}),
}];
} else if (overlaySourceFromMeta === "modelOverlay") {
try {
const { resolveImageModelInfoFromModelUsed } = await import(
"../helpers/imageModelMeta.js"
);
const info = resolveImageModelInfoFromModelUsed(modelUsedBasename, {
mode: effectiveMode as any,
});
const overlayOnly = ((info as any)?.presets || []).filter(
(p: any) => p.overlaySource === "modelOverlay"
);
if (overlayOnly.length > 0) matchingPresets = overlayOnly;
} catch {}
}
return {
...summaryNoFilesBase,
model_used: modelUsedBasename,
...(matchingPresets ? { model_presets: matchingPresets } : {}),
};
})();
if (previewInChat && previews.length > 0) {
// The preview files were already written to the primary chat working directory.
// Reuse those file names to avoid duplicates.
const imageContents = previews.map((p: any, i: number) => {
const fname = String(p.fileName || "");
const stableV =
extractStableVariantV(fname) ||
extractStableVariantV(savedFiles[i]?.fileName) ||
i + 1;
return {
type: "image",
fileName: fname,
mimeType: p.mimeType,
markdown: ``,
$hint:
"This is an image file. Present the image to the user by using the markdown above.",
} as any;
});
return {
content: [
...(note ? ([{ type: "text", text: note }] as any) : []),
...imageContents,
{ type: "text", text: variantLinksText },
{ type: "text", text: originalLinksText },
{ type: "text", text: JSON.stringify(summaryNoFiles) },
],
};
} else {
return {
content: [
...(note ? ([{ type: "text", text: note }] as any) : []),
...(() => {
if (httpOriginals.length > 0 && httpOriginals[0]) {
// PREVIEW_IN_CHAT = false: simplified response without inline previews.
// Markdown will be injected by orchestrator after tool call.
// Still provide a hint so the model reviews the output quality.
return httpOriginals.map((u, i) => ({
type: "text",
text: `${isVideoResult ? "Video" : "Image Variant"} v${
extractStableVariantV(savedFiles[i]?.fileName) ||
extractStableVariantV(previews[i]?.fileName) ||
i + 1
} successfully generated.`,
$hint: reviewHint,
}));
}
const count =
typeof usedVariants === "number" && usedVariants > 0
? usedVariants
: 1;
return Array.from({ length: count }, (_, i) => ({
type: "text",
text: `${isVideoResult ? "Video" : "Image Variant"} v${
extractStableVariantV(savedFiles[i]?.fileName) ||
extractStableVariantV(previews[i]?.fileName) ||
i + 1
} successfully generated.`,
$hint: reviewHint,
}));
})(),
...(variantLinksText
? [{ type: "text", text: variantLinksText }]
: []),
...(originalLinksText
? [{ type: "text", text: originalLinksText }]
: []),
{ type: "text", text: JSON.stringify(summaryNoFiles) },
],
};
}
} catch (error) {
log(
`generate_image error: ${
error instanceof Error ? error.message : String(error)
}`
);
await logError(error);
return {
content: [
{ type: "text", text: "Failed to generate image: backend error" },
],
isError: true as const,
};
}
}
export const ToolSchemas = {
generate_image: GenerateToolParamsShapeMinimal,
};