Forked from ceveyne/process-image
Project Files
src / core / tools.ts
/*
* Core tool handlers for LM Studio Plugin (transport-agnostic)
* This file extracts the logic from LM Studio Plugin tool handlers in src/index.ts
* with minimal changes to keep behavior identical.
*/
import path from "path";
import fs from "fs";
import os from "os";
import net from "net";
import { fileURLToPath, pathToFileURL } from "url";
import axios from "axios";
import FormData from "form-data";
import {
getSize as imgGetSize,
toPng as imgToPng,
resizeAndEncode as imgResizeAndEncode,
resizeInsideToPng as imgResizeInsideToPng,
resizeCoverToPng as imgResizeCoverToPng,
resizeFillToPng as imgResizeFillToPng,
cropToPng as imgCropToPng,
GenerateToolParamsShapeMinimal,
GenerateToolParamsSchemaMinimalStrict,
CropToolParamsShape,
CropToolSchemaStrict,
ZoomInToolParamsShape,
ZoomInToolSchemaStrict,
InpaintToolParamsShape,
InpaintToolSchemaStrict,
OutpaintToolParamsShape,
OutpaintToolSchemaStrict,
RefineToolParamsShape,
RefineToolSchemaStrict,
formatZodError,
ImageGenerationResult,
generateRuntimeDefaults,
engineConnectionDefaults,
getEngineConnectionDefaults,
drawthingsLimits,
drawthingsEditLimits,
defaultParamsZoom,
defaultParamsRestore,
drawthingsRestoreLimits,
previewDefaults,
checkDrawThingsGrpcFilesExist,
findLMStudioHome,
getLMStudioWorkingDir,
resolveActiveLMStudioChatId,
getActiveChatContext,
readState,
writeStateAtomic,
generatePreviewFromBuffer,
resolveImg2ImgSourceLMStudio,
buildAuditLogger,
getHealthyServerBaseUrl,
toHttpOriginalUrl,
toHttpPreviewUrl,
getLogsDir,
ensureLogsDir,
getSelfPluginIdentifier,
VARIANT_FULL_CONFIG,
getAudioSampleRateForModel,
} from "../core-bundle.mjs";
import { injectXmpIntoBuffer, type PngXmpParams } from "../helpers/pngMetadata.js";
import { DrawThingsService } from "../services/drawThingsService.js";
import { ImageBackend, ProgressCallback } from "../services/imageBackend.js";
import { getModelFilename } from "../services/modelOverlays.js";
// Global debug toggle
const DEBUG_MODE: boolean = true;
ensureLogsDir();
const logsDir: string = getLogsDir();
try {
const line = `${localTimestamp()} - paths: logsDir=${logsDir}\n`;
fs.appendFileSync(path.join(logsDir, "generate-image-plugin.log"), line);
} catch {}
const logFile: string = path.join(logsDir, "generate-image-plugin.log");
// LM Studio only – no client resolver switching
function resolvePreferredLocale(): string | undefined {
const envPref = process.env.LOG_LOCALE;
const lc =
envPref || process.env.LC_ALL || process.env.LC_TIME || process.env.LANG;
if (!lc) return undefined;
const cleaned = String(lc).split(".")[0].replace(/_/g, "-");
return cleaned || undefined;
}
function localTimestamp(): string {
const opts: Intl.DateTimeFormatOptions = {
year: "numeric",
month: "2-digit",
day: "2-digit",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
hour12: false,
timeZoneName: "short",
};
const loc = resolvePreferredLocale();
try {
return new Date().toLocaleString(loc as any, opts as any);
} catch {
const d = new Date();
const day = String(d.getDate()).padStart(2, "0");
const month = String(d.getMonth() + 1).padStart(2, "0");
const year = d.getFullYear();
const hh = String(d.getHours()).padStart(2, "0");
const mm = String(d.getMinutes()).padStart(2, "0");
const ss = String(d.getSeconds()).padStart(2, "0");
return `${day}/${month}/${year} ${hh}:${mm}:${ss}`;
}
}
function log(message: string): void {
const timestamp = localTimestamp();
const line = `${timestamp} - ${message}\n`;
try {
const dir = path.dirname(logFile);
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
fs.appendFileSync(logFile, line);
} catch {}
console.log(line.trim());
}
async function logError(error: unknown): Promise<void> {
try {
const errorLogFile = path.join(logsDir, "error.log");
await fs.promises.mkdir(logsDir, { recursive: true }).catch(() => {});
const timestamp = localTimestamp();
const details =
error instanceof Error
? `${error.message}\n${error.stack}`
: String(error);
const block = `${timestamp} - ERROR:\n${details}\n\n`;
await fs.promises.appendFile(errorLogFile, block);
if (DEBUG_MODE) console.error(block);
} catch {}
}
async function appendErrorRaw(raw: string, status?: number): Promise<void> {
try {
const errorLogFile = path.join(logsDir, "error.log");
await fs.promises.mkdir(logsDir, { recursive: true }).catch(() => {});
const timestamp = localTimestamp();
const header =
typeof status === "number"
? `BACKEND RAW (status ${status})`
: "BACKEND RAW";
const block = `${timestamp} - ${header}:\n${raw}\n\n`;
await fs.promises.appendFile(errorLogFile, block);
} catch {}
}
function isoStamp(): string {
return new Date().toISOString().replace(/[:.]/g, "-");
}
// Compact timestamp for filenames, e.g. 20251115T232635722Z
function isoStampCompact(): string {
const d = new Date();
const year = d.getUTCFullYear();
const month = String(d.getUTCMonth() + 1).padStart(2, "0");
const day = String(d.getUTCDate()).padStart(2, "0");
const hours = String(d.getUTCHours()).padStart(2, "0");
const minutes = String(d.getUTCMinutes()).padStart(2, "0");
const seconds = String(d.getUTCSeconds()).padStart(2, "0");
const millis = String(d.getUTCMilliseconds()).padStart(3, "0");
return `${year}${month}${day}T${hours}${minutes}${seconds}${millis}Z`;
}
function encodeFileUrl(abs: string): string {
return pathToFileURL(abs).toString();
}
function stripInternalToolKeys<
T extends Record<string, any> | null | undefined
>(obj: T): Record<string, any> {
if (!obj || typeof obj !== "object") return {};
const out: Record<string, any> = {};
for (const [k, v] of Object.entries(obj)) {
// Prevent user injection of internal/legacy knobs (e.g. _dt_*, _i2i_*).
if (k.startsWith("_")) continue;
out[k] = v;
}
return out;
}
function isSupportedImageBuffer(buf: Buffer): boolean {
if (!buf || buf.length < 12) return false;
if (
buf[0] === 0x89 &&
buf[1] === 0x50 &&
buf[2] === 0x4e &&
buf[3] === 0x47 &&
buf[4] === 0x0d &&
buf[5] === 0x0a &&
buf[6] === 0x1a &&
buf[7] === 0x0a
)
return true; // PNG
if (buf[0] === 0xff && buf[1] === 0xd8) return true; // JPEG
if (
buf[0] === 0x52 &&
buf[1] === 0x49 &&
buf[2] === 0x46 &&
buf[3] === 0x46 &&
buf[8] === 0x57 &&
buf[9] === 0x45 &&
buf[10] === 0x42 &&
buf[11] === 0x50
)
return true; // WEBP
return false;
}
function isPng(buf: Buffer): boolean {
return (
buf &&
buf.length >= 8 &&
buf[0] === 0x89 &&
buf[1] === 0x50 &&
buf[2] === 0x4e &&
buf[3] === 0x47 &&
buf[4] === 0x0d &&
buf[5] === 0x0a &&
buf[6] === 0x1a &&
buf[7] === 0x0a
);
}
type SourcePool = "attachment" | "variant" | "picture" | "image";
function normalizeSourceNotation(s: string): string {
let t = String(s || "")
.trim()
.toLowerCase();
if (t === "a") t = "a1";
if (t === "v") t = "v1";
if (t === "p") t = "p1";
if (t === "i") t = "i1";
return t;
}
function parsePrefixedNotation(
s: string
): { pool: SourcePool; index: number } | null {
const t = normalizeSourceNotation(s);
const m = t.match(/^([avpi])\s*(\d+)$/);
if (!m) return null;
const idx = Math.max(1, parseInt(m[2], 10));
const pool: SourcePool =
m[1] === "a" ? "attachment" : m[1] === "v" ? "variant" : m[1] === "i" ? "image" : "picture";
return { pool, index: idx };
}
function parseDigitOnlyNotation(s: string): number | null {
const t = String(s || "").trim();
const m = t.match(/^(\d+)$/);
if (!m) return null;
return Math.max(1, parseInt(m[1], 10));
}
async function saveOriginalPng(
sourceBuffer: Buffer,
preferredDir?: string,
preferredFileName?: string,
xmpParams?: PngXmpParams
) {
if (!preferredDir) {
throw new Error(
"No output directory resolved (LM Studio chat working directory missing)."
);
}
const dir = path.resolve(preferredDir);
await fs.promises.mkdir(dir, { recursive: true });
const name =
preferredFileName && preferredFileName.endsWith(".png")
? preferredFileName
: preferredFileName
? `${preferredFileName}.png`
: `generated-image-${isoStampCompact()}.png`;
const abs = path.join(dir, name);
const embedMeta =
xmpParams != null &&
/^(1|true|yes)$/i.test(String(process.env.EMBED_PNG_METADATA ?? "true").trim());
try {
if (!isSupportedImageBuffer(sourceBuffer)) {
const binName = name.replace(/\.png$/i, ".bin");
const binAbs = path.join(dir, binName);
await fs.promises.writeFile(binAbs, sourceBuffer);
const stat = await fs.promises.stat(binAbs);
return {
savedPath: binAbs,
fileName: binName,
size: stat.size,
mimeType: "application/octet-stream" as const,
fileUrl: encodeFileUrl(binAbs),
};
}
let pngBuf: Buffer;
if (isPng(sourceBuffer)) {
pngBuf = sourceBuffer;
} else {
pngBuf = await imgToPng(sourceBuffer);
}
if (embedMeta) {
pngBuf = injectXmpIntoBuffer(pngBuf, xmpParams!);
}
await fs.promises.writeFile(abs, pngBuf);
} catch {
await fs.promises.writeFile(abs, sourceBuffer);
}
const stat = await fs.promises.stat(abs);
return {
savedPath: abs,
fileName: name,
size: stat.size,
mimeType: "image/png" as const,
fileUrl: encodeFileUrl(abs),
};
}
// Legacy TinyPreviewOptions and buildAndSavePreview removed.
// Use generatePreviewFromBuffer() from media-promotion-core/image.js instead.
let lastPreviewRef: {
path: string;
url: string;
mimeType: "image/jpeg" | "image/webp";
width: number;
height: number;
} | null = null;
let lastOriginalRef: { path: string; url: string } | null = null;
// Per-chat image tracking (LM Studio) - stores i-value and path for proper lookup
const LAST_IMAGES_BY_LM_CHAT: Record<
string,
Array<{ i: number; path: string }>
> = {};
// PHASE 4: Sticky mode removed - mode is now effectively required when sources exist
const LAST_CONSUMED_ATTACHMENT_ID_BY_LM_CHAT: Record<string, string> = {};
/**
* Get current connection settings from process.env (set by toolsProvider)
* Defaults to hardcoded settings if env vars are not set.
*/
function getCurrentConnectionSettings() {
return getEngineConnectionDefaults({
host: process.env.DRAW_THINGS_HOST,
httpPort: process.env.DRAW_THINGS_HTTP_PORT
? parseInt(process.env.DRAW_THINGS_HTTP_PORT, 10)
: undefined,
grpcPort: process.env.DRAW_THINGS_GRPC_PORT
? parseInt(process.env.DRAW_THINGS_GRPC_PORT, 10)
: undefined,
});
}
// Backend service: Draw Things only
const drawthingsService: ImageBackend = new DrawThingsService(
engineConnectionDefaults.http?.baseUrl || "http://127.0.0.1:7860",
engineConnectionDefaults.sharedSecret || undefined
);
let imageService: ImageBackend = drawthingsService;
async function ensureBackendReady(): Promise<void> {
// Draw Things backend only - always proceed with connection check
// Use config-aware connection settings (reads from process.env)
const conn = getCurrentConnectionSettings();
const httpBaseUrl =
conn.http?.baseUrl ||
`http://${conn.http?.host || "127.0.0.1"}:${conn.http?.port || 7860}`;
const resolvedGrpc = (conn.grpc?.target || `127.0.0.1:7859`).replace(
/^grpc:\/\//i,
""
);
const [host, portStr] = (() => {
const lastColon = resolvedGrpc.lastIndexOf(":");
if (lastColon > -1)
return [
resolvedGrpc.slice(0, lastColon),
resolvedGrpc.slice(lastColon + 1),
];
return [resolvedGrpc, String(7859)];
})();
const portNum = parseInt(portStr, 10);
const tcpReachable = (h: string, p: number, timeoutMs: number) =>
new Promise<boolean>((resolve) => {
try {
const socket = net.connect({ host: h, port: p });
const onOk = () => {
cleanup();
resolve(true);
};
const onErr = () => {
cleanup();
resolve(false);
};
const timer = setTimeout(() => onErr(), timeoutMs);
const cleanup = () => {
try {
clearTimeout(timer);
} catch {}
try {
socket.destroy();
} catch {}
};
socket.once("connect", onOk);
socket.once("error", onErr);
} catch {
resolve(false);
}
});
const transport = conn.transport || "auto";
const wantGrpc = transport === "grpc" || transport === "auto";
const wantHttp = transport === "http" || transport === "auto";
const grpcOk = wantGrpc ? await tcpReachable(host, portNum, 1200) : false;
// probe HTTP only when desired
const httpProbeHost = (() => {
try {
const u = new URL(httpBaseUrl);
return u.hostname || "127.0.0.1";
} catch {
return "127.0.0.1";
}
})();
const httpProbePort = (() => {
try {
const u = new URL(httpBaseUrl);
return Number(u.port) || 7860;
} catch {
return 7860;
}
})();
const httpOk = wantHttp
? await tcpReachable(httpProbeHost, httpProbePort, 1200)
: false;
const httpDesc = (() => {
try {
const u = new URL(httpBaseUrl);
return `${u.protocol}//${u.hostname}:${u.port || 80}`;
} catch {
return httpBaseUrl;
}
})();
log(
[
"Attempting to connect to Draw Things API at:",
` grpc://${host}:${portNum} - ${grpcOk ? "OK" : "UNAVAILABLE"}`,
` ${httpDesc} - ${httpOk ? "OK" : "UNAVAILABLE"}`,
"",
"Starting service...",
"",
].join("\n")
);
let usedTransport: "grpc" | "http" | null = null;
if (grpcOk) {
// Map defaults to expected gRPC envs for downstream service compatibility
try {
if (conn.grpc?.target)
process.env.DRAWTHINGS_GRPC_TARGET = conn.grpc.target;
if (conn.grpc?.service)
process.env.DRAWTHINGS_GRPC_SERVICE = conn.grpc.service;
if (conn.grpc?.compression)
process.env.DRAWTHINGS_GRPC_COMPRESSION = conn.grpc.compression as any;
if (conn.grpc?.acceptEncoding)
process.env.DRAWTHINGS_GRPC_ACCEPT_ENCODING = conn.grpc
.acceptEncoding as any;
if (conn.grpc?.protoPath)
process.env.DRAWTHINGS_GRPC_PROTO = conn.grpc.protoPath as any;
if (conn.sharedSecret)
process.env.DRAWTHINGS_SHARED_SECRET = conn.sharedSecret as any;
} catch {}
try {
const mod = await import("../services/drawThingsGrpcService.js");
const GrpcCtor = (mod as any)?.DrawThingsGrpcService;
if (typeof GrpcCtor !== "function")
throw new Error("DrawThingsGrpcService not exported");
const grpcSvc: ImageBackend = new GrpcCtor(`${host}:${portNum}`);
const ok = await grpcSvc.checkApiConnection();
if (ok) {
imageService = grpcSvc;
usedTransport = "grpc";
// Startup-only: log if SOLL models/LoRAs exist on the gRPC server.
// Non-blocking by design; it helps diagnose silent fallback behavior.
try {
const client = (grpcSvc as any)?.client;
if (client) {
const bn = (s: unknown) => {
try {
return path.basename(String(s || "").trim());
} catch {
return "";
}
};
const {
MODEL_PRESET_TO_CAPABILITY_KEY,
selectAutoModel,
checkModeSupport,
} = await import("../core-bundle.mjs");
const { getModelRequiredFiles } = await import(
"../services/modelOverlays.js"
);
const { defaultParams: defaultT2I } = await import(
"../core-bundle.mjs"
);
const { defaultParamsImg2Img: defaultI2I } = await import(
"../core-bundle.mjs"
);
const { defaultParamsEdit: defaultEdit } = await import(
"../core-bundle.mjs"
);
const requiredModels = new Set<string>();
const requiredLoras = new Set<string>();
const optionalLoras = new Set<string>();
// Defaults (used when model preset is "auto" or when no overlay is applied)
if (defaultT2I?.model) requiredModels.add(bn(defaultT2I.model));
if (defaultI2I?.model) requiredModels.add(bn(defaultI2I.model));
if (defaultEdit?.model) requiredModels.add(bn(defaultEdit.model));
// Default LoRAs are treated as optional to avoid hard assumptions.
for (const d of [defaultT2I, defaultI2I, defaultEdit]) {
const ls = Array.isArray((d as any)?.loras)
? (d as any).loras
: [];
for (const l of ls) {
const f = bn(l?.file);
if (f) optionalLoras.add(f);
}
}
const toolModes = ["text2image", "image2image", "edit", "text2video", "image2video"] as const;
const toOverlayMode = (m: (typeof toolModes)[number]) =>
m === "text2image"
? "txt2img"
: m === "image2image"
? "img2img"
: m === "text2video"
? "txt2vid"
: m === "image2video"
? "img2vid"
: "edit";
const presetKeys = Object.keys(
MODEL_PRESET_TO_CAPABILITY_KEY || {}
) as string[];
// Overlay SOLL files (models + LoRAs)
for (const preset of presetKeys) {
for (const tm of toolModes) {
const supported = checkModeSupport(preset, tm);
if (!(supported as any)?.supported) continue;
const files: string[] = getModelRequiredFiles(
preset,
toOverlayMode(tm)
);
for (const fRaw of files) {
const f = bn(fRaw);
if (!f) continue;
if (/lora/i.test(f)) requiredLoras.add(f);
else requiredModels.add(f);
}
}
}
// Custom Configs: Not checked at warmup (requires config access via toolsProvider).
// Hard-fail happens per-request in the gRPC backend if model/LoRA is missing.
// Explicit log of auto resolution
const autoMap = toolModes.map((m) => `${m}→${selectAutoModel(m)}`);
log(`[startup] auto preset resolution: ${autoMap.join(", ")}`);
const allToCheck = [
...Array.from(requiredModels),
...Array.from(requiredLoras),
...Array.from(optionalLoras),
];
const sharedSecret = process.env.DRAWTHINGS_SHARED_SECRET;
const ex = await checkDrawThingsGrpcFilesExist({
client,
sharedSecret: sharedSecret || undefined,
files: allToCheck,
});
if (!ex.usedFilesExist) {
log(
"[startup] gRPC asset preflight skipped (FilesExist RPC unavailable or failed)."
);
} else {
const missingSet = new Set(ex.missing);
const missingModels = Array.from(requiredModels).filter((f) =>
missingSet.has(f)
);
const missingReqLoras = Array.from(requiredLoras).filter((f) =>
missingSet.has(f)
);
const missingOptLoras = Array.from(optionalLoras).filter((f) =>
missingSet.has(f)
);
if (missingModels.length === 0 && missingReqLoras.length === 0) {
log(
`[startup] gRPC asset preflight OK: required models=${requiredModels.size}, required LoRAs=${requiredLoras.size}`
);
} else {
if (missingModels.length) {
log(
`[startup] gRPC asset preflight MISSING models: ${missingModels.join(
", "
)}`
);
}
if (missingReqLoras.length) {
log(
`[startup] gRPC asset preflight MISSING required LoRAs: ${missingReqLoras.join(
", "
)}`
);
}
}
if (missingOptLoras.length) {
log(
`[startup] gRPC asset preflight (optional) missing LoRAs: ${missingOptLoras.join(
", "
)}`
);
}
}
}
} catch (e: any) {
log(
`[startup] gRPC asset preflight warning: ${e?.message || String(e)}`
);
}
} else {
// fall back to HTTP if desired and reachable
if (httpOk && (transport === "auto" || transport === "http")) {
imageService = drawthingsService;
imageService.setBaseUrl(httpBaseUrl);
usedTransport = "http";
} else {
console.error(
"Draw Things gRPC reachable but not ready; no HTTP fallback available."
);
}
}
} catch (e: any) {
console.error(
`Draw Things gRPC init failed: ${e?.message || String(e)}.`
);
// prefer HTTP fallback on init error
if (httpOk && (transport === "auto" || transport === "http")) {
imageService = drawthingsService;
imageService.setBaseUrl(httpBaseUrl);
usedTransport = "http";
}
}
} else if (httpOk) {
imageService = drawthingsService;
imageService.setBaseUrl(httpBaseUrl);
usedTransport = "http";
} else {
imageService = drawthingsService; // not connected yet
}
(globalThis as any).__DT_SELECTED_TRANSPORT__ = usedTransport;
try {
const isApiConnected = await imageService.checkApiConnection();
const t = (globalThis as any).__DT_SELECTED_TRANSPORT__;
const suffix = t === "grpc" ? " - gRPC" : t === "http" ? " - HTTP" : "";
if (isApiConnected) {
log(`Connected to Draw Things API${suffix}.`);
if (t === "grpc") {
const sec = (imageService as any)?.currentSecurity ?? (globalThis as any).__DT_GRPC_TLS_SELECTED__ ?? "unknown";
log(`[gRPC] security: ${sec}`);
}
}
else log(`Failed to connect to Draw Things API.`);
} catch {}
}
/**
* Startup warmup entrypoint.
* Invoked from the Tools Provider during plugin initialization so the backend probe
* (and gRPC model/LoRA preflight logging) happens before the first tool call.
*/
export async function warmupBackendAtStartup(): Promise<void> {
await ensureBackendReady();
}
// Utility: read last audit prompt and mode for context
async function getLastAuditPromptAndMode(): Promise<{
prompt?: string;
mode?: string;
} | null> {
try {
const p = path.join(logsDir, "generate-image-plugin.audit.jsonl");
const txt = await fs.promises.readFile(p, "utf8").catch(() => "");
if (!txt) return null;
const chunks = txt
.split(/\n\s*\n/g)
.map((s) => s.trim())
.filter((s) => s.length > 0);
for (let i = chunks.length - 1; i >= 0; i--) {
const s = chunks[i];
try {
const obj = JSON.parse(s);
if (obj && typeof obj === "object") {
// Read prompt from output.prompt_used (what was actually used)
const prompt =
typeof obj.output?.prompt_used === "string"
? obj.output.prompt_used
: undefined;
const mode = typeof obj.mode === "string" ? obj.mode : undefined;
return { prompt, mode };
}
} catch {}
}
return null;
} catch {
return null;
}
}
async function getLMConversationFilePath(
chatId?: string
): Promise<string | null> {
try {
const home = findLMStudioHome();
const convDir = path.join(home, "conversations");
if (!fs.existsSync(convDir)) return null;
if (chatId) {
const p = path.join(convDir, `${chatId}.conversation.json`);
return (await fs.promises
.stat(p)
.then((s) => (s.isFile() ? p : null))
.catch(() => null)) as string | null;
}
const entries = await fs.promises
.readdir(convDir)
.catch(() => [] as string[]);
const convFiles = entries
.filter((f) => f.endsWith(".conversation.json"))
.map((f) => path.join(convDir, f));
if (convFiles.length === 0) return null;
const withTimes = (
await Promise.all(
convFiles.map(async (p) => {
try {
const s = await fs.promises.stat(p);
return s.isFile() ? { p, t: s.mtimeMs } : null;
} catch {
return null;
}
})
)
).filter(Boolean) as { p: string; t: number }[];
if (withTimes.length === 0) return null;
withTimes.sort((a, b) => b.t - a.t);
return withTimes[0].p;
} catch {
return null;
}
}
async function getLastVariantGroupFromLMConversation(
chatId?: string | null
): Promise<string[] | null> {
try {
const convPath = await getLMConversationFilePath(chatId || undefined);
if (!convPath) return null;
const text = await fs.promises.readFile(convPath, "utf8");
type GroupInfo = {
lastIndex: number;
variants: Set<number>;
originals: Map<number, string>;
};
const byBase: Map<string, GroupInfo> = new Map();
const reOrig =
/file:\/\/[\S)"']+\/(images|working-directories\/\d+)\/(image-[^\/]*)-i(\d+)\.png/gi;
let m: RegExpExecArray | null;
while ((m = reOrig.exec(text)) != null) {
try {
const basePlus = m[2];
const iNum = parseInt(m[3], 10);
const urlStr = m[0].match(/file:\/\/[^^\s)"']+/i)?.[0];
if (!urlStr) continue;
let absPath: string | null = null;
try {
absPath = fileURLToPath(urlStr);
} catch {
absPath = null;
}
if (!absPath) continue;
const g = byBase.get(basePlus) || {
lastIndex: m.index,
variants: new Set<number>(),
originals: new Map<number, string>(),
};
g.lastIndex = Math.max(g.lastIndex, m.index);
g.variants.add(iNum);
g.originals.set(iNum, absPath);
byBase.set(basePlus, g);
} catch {}
}
const rePrev =
/file:\/\/[\S)"']+\/(images\/previews|working-directories\/\d+)\/(preview-image-[^\/]*)-i(\d+)\.(jpg|jpeg|webp)/gi;
while ((m = rePrev.exec(text)) != null) {
try {
const nameWithPreview = m[2];
const iNum = parseInt(m[3], 10);
const urlStr = m[0]
.replace(/\/images\/previews\//i, "/images/")
.replace(
/\/working-directories\/(\d+)\//i,
"/working-directories/$1/"
)
.replace(/preview-/, "")
.replace(/\.(jpg|jpeg|webp)$/i, ".png");
let absPath: string | null = null;
try {
const urlOnly = urlStr.match(/file:\/\/[^^\s)"']+/i)?.[0];
absPath = urlOnly ? fileURLToPath(urlOnly) : null;
} catch {
absPath = null;
}
if (!absPath) continue;
const originalBase = nameWithPreview.replace(/^preview-/, "");
const g = byBase.get(originalBase) || {
lastIndex: m.index,
variants: new Set<number>(),
originals: new Map<number, string>(),
};
g.lastIndex = Math.max(g.lastIndex, m.index);
g.variants.add(iNum);
g.originals.set(iNum, absPath);
byBase.set(originalBase, g);
} catch {}
}
if (byBase.size === 0) return null;
const best = Array.from(byBase.entries())
.map(([base, g]) => ({ base, g }))
.sort((a, b) => b.g.lastIndex - a.g.lastIndex)[0];
if (!best) return null;
const out: string[] = [];
for (let v = 1; v <= 3; v++) {
const p = best.g.originals.get(v);
if (!p) continue;
const exists = await fs.promises
.stat(p)
.then((s) => s.isFile())
.catch(() => false);
if (exists) out.push(p);
}
return out.length > 0 ? out : null;
} catch {
return null;
}
}
// Decide which event is the most recent in the conversation file.
// Returns:
// - "image" when the latest reference to a generated image is i1 (original or preview)
// - "attachment" when the latest user image attachment appears after the last image
// - null when neither could be detected
async function getLastEventTypeFromLMConversation(
chatId?: string | null
): Promise<"image" | "attachment" | null> {
try {
const convPath = await getLMConversationFilePath(chatId || undefined);
if (!convPath) return null;
const text = await fs.promises.readFile(convPath, "utf8");
let lastVariantIdx = -1;
let m: RegExpExecArray | null;
const reOrig =
/file:\/\/[\S)"']+\/(images|working-directories\/\d+)\/(image-[^\/]*)-i(\d+)\.png/gi;
while ((m = reOrig.exec(text)) != null) {
const iNum = parseInt(m[3], 10);
if (iNum === 1) lastVariantIdx = Math.max(lastVariantIdx, m.index);
}
const rePrev =
/file:\/\/[\S)"']+\/(images\/previews|working-directories\/\d+)\/(preview-image-[^\/]*)-i(\d+)\.(jpg|jpeg|webp)/gi;
while ((m = rePrev.exec(text)) != null) {
const iNum = parseInt(m[3], 10);
if (iNum === 1) lastVariantIdx = Math.max(lastVariantIdx, m.index);
}
let lastAttachmentIdx = -1;
const reAtt =
/"(fileIdentifier|identifier)"\s*:\s*"([^"\n]+\.(png|jpg|jpeg|webp|gif|bmp|tif|tiff|heic))"/gi;
while ((m = reAtt.exec(text)) != null) {
lastAttachmentIdx = Math.max(lastAttachmentIdx, m.index);
}
if (lastVariantIdx < 0 && lastAttachmentIdx < 0) return null;
return lastVariantIdx > lastAttachmentIdx ? "image" : "attachment";
} catch {
return null;
}
}
// ─────────────────────────────────────────────────────────────────────────────
// UNIFIED INPUT NORMALIZATION
// Single function for ALL input image preprocessing (i2i, edit, Canvas, Moodboard)
// ─────────────────────────────────────────────────────────────────────────────
export type NormalizeInputReason =
| "clamped_to_requested_raw"
| "normalized_to_constraints"
| "converted_to_png"
| "unchanged";
export interface NormalizeInputResult {
buf: Buffer;
preprocess: {
original: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
adjusted: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
reason: NormalizeInputReason;
};
normalizedLongSide?: number;
}
/**
* Unified input normalization for all image sources (Attachments, Variants, Pictures).
* Applies in order:
* 1. Adopt target aspect ratio (if user specified output dimensions)
* 2. Sum constraint (w + h <= targetSum)
* 3. Alignment (multiples of 64)
* 4. Minimum dimension (256px)
* 5. Convert to PNG
*/
export async function normalizeInputBuffer(
buf: Buffer,
opts?: {
requestedRawW?: number;
requestedRawH?: number;
logPrefix?: string;
skipSumConstraint?: boolean;
targetSumOverride?: number;
}
): Promise<NormalizeInputResult> {
const prefix = opts?.logPrefix || "[normalize]";
const size = await imgGetSize(buf);
let w = size.width || 0;
let h = size.height || 0;
const align = drawthingsLimits.align;
const minDim = drawthingsLimits.minDim;
const targetSum = opts?.targetSumOverride ?? drawthingsLimits.targetSum;
const origW = w;
const origH = h;
const origFmt = isPng(buf) ? "png" : undefined;
const origBytes = buf.byteLength;
let reason: NormalizeInputReason = "unchanged";
// 1. Adopt target aspect ratio when both requested dims are given.
// The adjusted image must match the OUTPUT format (e.g. landscape)
// rather than preserving the source image's aspect ratio.
// resizeCoverToPng() then uniformly scales + centre-crops the original
// to fill these dimensions without distortion.
// Subsequent steps (sum clamp, 64-alignment, minDim) refine the size.
const hasReqW =
typeof opts?.requestedRawW === "number" &&
Number.isFinite(opts.requestedRawW) &&
opts.requestedRawW > 0;
const hasReqH =
typeof opts?.requestedRawH === "number" &&
Number.isFinite(opts.requestedRawH) &&
opts.requestedRawH > 0;
if (hasReqW && hasReqH) {
w = opts!.requestedRawW!;
h = opts!.requestedRawH!;
reason = "clamped_to_requested_raw";
log(`${prefix} adopting target dimensions: ${origW}x${origH} → ${w}x${h}`);
}
// 2. Sum constraint: w + h <= targetSum (skipped for zoom profile)
const currentSum = w + h;
if (!opts?.skipSumConstraint && currentSum > targetSum) {
const aspect = w / Math.max(1, h);
const rawH = targetSum / (aspect + 1);
const rawW = aspect * rawH;
// Build candidates from floor/round/ceil on each axis, plus aspect-derived pairs.
// Independent rounding (round×round) can distort the aspect ratio when both
// axes happen to align to different multiples. The candidate selection below
// picks the pair that best preserves the target aspect ratio while satisfying
// the sum constraint and 64-alignment.
const wFloor = Math.max(align, Math.floor(rawW / align) * align);
const wRound = Math.max(align, Math.round(rawW / align) * align);
const wCeil = Math.max(align, Math.ceil(rawW / align) * align);
const hFloor = Math.max(align, Math.floor(rawH / align) * align);
const hRound = Math.max(align, Math.round(rawH / align) * align);
const hCeil = Math.max(align, Math.ceil(rawH / align) * align);
const sumCandidates: Array<{ cw: number; ch: number }> = [];
for (const cw of [wFloor, wRound, wCeil]) {
for (const ch of [hFloor, hRound, hCeil]) {
if (cw + ch <= targetSum) sumCandidates.push({ cw, ch });
}
}
// Aspect-derived pairs: derive one axis from the other
for (const cw of [wFloor, wRound]) {
const ch = Math.max(align, Math.round(cw / aspect / align) * align);
if (cw + ch <= targetSum) sumCandidates.push({ cw, ch });
}
for (const ch of [hFloor, hRound]) {
const cw = Math.max(align, Math.round(ch * aspect / align) * align);
if (cw + ch <= targetSum) sumCandidates.push({ cw, ch });
}
if (sumCandidates.length > 0) {
sumCandidates.sort((a, b) => {
const ae = Math.abs(a.cw / Math.max(1, a.ch) - aspect);
const be = Math.abs(b.cw / Math.max(1, b.ch) - aspect);
if (ae !== be) return ae - be;
return (Math.abs(a.cw - rawW) + Math.abs(a.ch - rawH)) -
(Math.abs(b.cw - rawW) + Math.abs(b.ch - rawH));
});
w = sumCandidates[0].cw;
h = sumCandidates[0].ch;
} else {
// Fallback: reduce by align increments (should not happen within normal limits)
let fbW = wFloor;
let fbH = hFloor;
while (fbW + fbH > targetSum && (fbW > align || fbH > align)) {
if (fbW >= fbH) fbW = Math.max(align, fbW - align);
else fbH = Math.max(align, fbH - align);
}
w = fbW;
h = fbH;
}
if (reason === "unchanged") reason = "normalized_to_constraints";
}
// 3. Alignment: round to multiples of 64 while preserving aspect ratio.
// Independent floor on each axis would distort the aspect (e.g. 523×697 → 512×640).
// Instead, pick the (floor,round) / (round,floor) / (floor,floor) / (round,round)
// combination that best preserves the original aspect ratio.
if (w % align !== 0 || h % align !== 0) {
const aspect = w / Math.max(1, h);
const wFloor = Math.max(align, Math.floor(w / align) * align);
const wRound = Math.max(align, Math.round(w / align) * align);
const hFloor = Math.max(align, Math.floor(h / align) * align);
const hRound = Math.max(align, Math.round(h / align) * align);
const candidates: Array<{ cw: number; ch: number }> = [
{ cw: wFloor, ch: hFloor },
{ cw: wFloor, ch: hRound },
{ cw: wRound, ch: hFloor },
{ cw: wRound, ch: hRound },
];
// Also try deriving one axis from the other to hit exact aspect multiples.
for (const wBase of [wFloor, wRound]) {
const hDerived = Math.max(align, Math.round(wBase / aspect / align) * align);
candidates.push({ cw: wBase, ch: hDerived });
}
for (const hBase of [hFloor, hRound]) {
const wDerived = Math.max(align, Math.round(hBase * aspect / align) * align);
candidates.push({ cw: wDerived, ch: hBase });
}
// Filter valid candidates (within targetSum, ≥ align), then pick by lowest aspect error.
const valid = candidates.filter(
(c) => c.cw >= align && c.ch >= align && c.cw + c.ch <= targetSum
);
if (valid.length > 0) {
valid.sort((a, b) => {
const ae = Math.abs(a.cw / Math.max(1, a.ch) - aspect);
const be = Math.abs(b.cw / Math.max(1, b.ch) - aspect);
if (ae !== be) return ae - be;
// Tie-break: prefer closer to original pixel count.
const ad = Math.abs(a.cw - w) + Math.abs(a.ch - h);
const bd = Math.abs(b.cw - w) + Math.abs(b.ch - h);
return ad - bd;
});
w = valid[0].cw;
h = valid[0].ch;
} else {
// Hard fallback: independent floor (should not happen within normal limits).
w = Math.max(align, Math.floor(w / align) * align);
h = Math.max(align, Math.floor(h / align) * align);
}
if (reason === "unchanged") reason = "normalized_to_constraints";
}
// 4. Minimum dimension: upscale only if needed to satisfy minDim
if (w < minDim || h < minDim) {
const scale = minDim / Math.min(w, h);
w = Math.round(w * scale);
h = Math.round(h * scale);
// Re-align after upscale
w = Math.max(align, Math.floor(w / align) * align);
h = Math.max(align, Math.floor(h / align) * align);
// Ensure sum constraint still met after minDim upscale
while (w + h > targetSum && (w > minDim || h > minDim)) {
if (w > h) w = Math.max(minDim, w - align);
else h = Math.max(minDim, h - align);
}
if (reason === "unchanged") reason = "normalized_to_constraints";
}
// 5. Resize and/or convert to PNG
let outBuf: Buffer;
if (w !== origW || h !== origH) {
outBuf = await imgResizeCoverToPng(buf, w, h);
if (reason === "unchanged") reason = "normalized_to_constraints";
log(
`${prefix} dimension normalization: ${origW}x${origH} → ${w}x${h} (sum=${
w + h
})`
);
} else {
if (isPng(buf)) {
outBuf = buf;
} else {
outBuf = await imgToPng(buf);
if (reason === "unchanged") reason = "converted_to_png";
}
}
return {
buf: outBuf,
preprocess: {
original: {
width: origW,
height: origH,
format: origFmt,
bytes: origBytes,
},
adjusted: {
width: w,
height: h,
format: "png",
bytes: outBuf.byteLength,
},
reason,
},
normalizedLongSide: w !== origW || h !== origH ? Math.max(w, h) : undefined,
};
}
const ALLOWED_GEN_INPUT_KEYS = [
"prompt",
"width",
"height",
"imageFormat",
"quality",
"variants",
"canvas",
"moodboard",
] as const;
export async function handleGenerateImage(
pluginParams: any,
onProgress?: ProgressCallback,
_internal?: { presuppliedSourceBuf?: Buffer; sourceTag?: string; maskBuf?: Buffer; cropMeta?: { left: number; top: number; right: number; bottom: number; cropSource?: string }; auditSourceOverride?: { sourceKind?: "attachment" | "image" | "picture" | "variant"; sourceFileName?: string; sourceOriginalName?: string; sourceOriginAbs?: string; originalDims?: { width: number; height: number; bytes: number } } }
) {
await ensureBackendReady().catch((e) => {
log(`[startup] ensureBackendReady failed: ${String(e)}`);
});
try {
const rawIncoming = pluginParams || {};
// zoom/upscale sources may supply width/height up to ZOOM_TOOL_MAX_DIM (2048),
// which exceeds the standard schema limit (1536). Strip them before schema
// validation and re-inject after, so the rest of the pipeline sees them.
const isZoomLikeSource =
_internal?.sourceTag === "canvas:zoom-in" ||
_internal?.sourceTag === "canvas:upscale";
const rawForValidation = isZoomLikeSource
? (({ width: _w, height: _h, ...rest }) => rest)(rawIncoming)
: rawIncoming;
const parsed = GenerateToolParamsSchemaMinimalStrict.safeParse(rawForValidation);
if (!parsed.success) {
return {
content: [
{
type: "text",
text: `Invalid generate_image parameters: ${formatZodError(
parsed.error
)}`,
},
],
};
}
const input = parsed.data as any;
// Re-inject zoom/upscale dims after schema validation
if (isZoomLikeSource) {
if (rawIncoming.width !== undefined) input.width = rawIncoming.width;
if (rawIncoming.height !== undefined) input.height = rawIncoming.height;
}
// ─────────────────────────────────────────────────────────────────────────
// HARD LIMIT CHECK: Reject requests exceeding maxWidth/maxHeight immediately.
// No silent clamping – explicit error with clear guidance.
// Skipped for zoom-like sources (canvas:zoom-in / canvas:upscale): input.width/height
// represent requestedRaw (the final post-processing target), not the backend input.
// The zoom-pass pipeline handles any requestedRaw size via SeedVR2 + post-processing.
// ─────────────────────────────────────────────────────────────────────────
if (!isZoomLikeSource) {
const reqW = input.width;
const reqH = input.height;
const maxW = drawthingsLimits.maxWidth;
const maxH = drawthingsLimits.maxHeight;
if (typeof reqW === "number" && reqW > maxW) {
log(`[validation] REJECTED: width ${reqW} exceeds maxWidth ${maxW}`);
return {
content: [
{
type: "text",
text: `Invalid width: ${reqW}px exceeds maximum allowed width of ${maxW}px. Please use width ≤ ${maxW}.`,
},
],
isError: true as const,
};
}
if (typeof reqH === "number" && reqH > maxH) {
log(`[validation] REJECTED: height ${reqH} exceeds maxHeight ${maxH}`);
return {
content: [
{
type: "text",
text: `Invalid height: ${reqH}px exceeds maximum allowed height of ${maxH}px. Please use height ≤ ${maxH}.`,
},
],
isError: true as const,
};
}
}
// Preserve the user-requested mode for logging/audit.
// "edit" is a variant of image2image with different defaults and (future) multi-source support.
const requestedMode = input.mode as
| "text2image"
| "image2image"
| "edit"
| "text2video"
| "image2video"
| undefined;
const isEditMode = requestedMode === "edit";
log(`generate_image input: ${JSON.stringify(input)}`);
try {
void getHealthyServerBaseUrl();
} catch {}
// Validate model/mode compatibility early
const modelPreset = (input.model as string) || "auto";
const modeForValidation = requestedMode || "text2image"; // default mode if not specified
// Import capability check and custom configs (dynamic to avoid circular deps at module load)
const {
checkModeSupport,
checkModeSupportWithCustom,
selectAutoModel,
detectImageModelCapabilities,
getCapabilityKeyForPreset,
} = await import("../core-bundle.mjs");
const { getAvailableCustomCombinations, getCustomPreset } = await import(
"../services/customConfigsLoader.js"
);
// Use extended check that includes Custom Configs info
const modeCheck = checkModeSupportWithCustom(
modelPreset,
modeForValidation,
getAvailableCustomCombinations
);
if (!modeCheck.supported) {
log(`[validation] mode/model incompatible: ${modeCheck.reason}`);
return {
content: [
{
type: "text",
text: modeCheck.reason,
},
],
isError: true as const,
};
}
// NOTE: "model=auto" means: do not apply an overlay; backend uses mode-specific defaults.
// We still compute an "effectiveModelPreset" for capability logic (e.g. edit-mode limits),
// but logging should reflect the engine model that will actually be used.
const effectiveModelPreset =
modelPreset === "auto" ? selectAutoModel(modeForValidation) : modelPreset;
// Resolve actual .ckpt filename for logging
const modeForFilename =
modeForValidation === "edit"
? "edit"
: modeForValidation === "image2image"
? "img2img"
: modeForValidation === "text2video"
? "txt2vid"
: modeForValidation === "image2video"
? "img2vid"
: "txt2img";
// If the user did not pick a model (or explicitly picked "auto"), the backend will use
// the per-mode defaultParams*.model value (no overlay). Log that to avoid confusion.
let engineDefaultModel: string | null = null;
if (modelPreset === "auto") {
try {
if (modeForFilename === "txt2img") {
const { defaultParams } = await import(
"../core-bundle.mjs"
);
engineDefaultModel =
typeof (defaultParams as any)?.model === "string"
? (defaultParams as any).model
: null;
} else if (modeForFilename === "img2img") {
const { defaultParamsImg2Img } = await import(
"../core-bundle.mjs"
);
engineDefaultModel =
typeof (defaultParamsImg2Img as any)?.model === "string"
? (defaultParamsImg2Img as any).model
: null;
} else if (modeForFilename === "txt2vid") {
engineDefaultModel = null;
} else if (modeForFilename === "img2vid") {
engineDefaultModel = null;
} else {
const { defaultParamsEdit } = await import(
"../core-bundle.mjs"
);
engineDefaultModel =
typeof (defaultParamsEdit as any)?.model === "string"
? (defaultParamsEdit as any).model
: null;
}
} catch {}
}
const customPresetModel: string | null = (() => {
if (!getModelFilename(effectiveModelPreset, modeForFilename)) {
const raw = getCustomPreset(`${modeForValidation}.${effectiveModelPreset}`)?.params?.model;
if (typeof raw === "string" && raw.trim()) return path.basename(raw.trim());
}
return null;
})();
const effectiveModelFilename =
(engineDefaultModel ? path.basename(engineDefaultModel) : null) ||
getModelFilename(effectiveModelPreset, modeForFilename) ||
customPresetModel ||
effectiveModelPreset;
log(
`[validation] model=${modelPreset} → engineModel=${effectiveModelFilename} mode=${modeForValidation}`
);
// Draw Things backend only
const svc: ImageBackend = imageService;
const resolvedName = "drawthings";
const requestedVariants = input.variants;
const usedVariants =
typeof requestedVariants === "number"
? Math.max(1, Math.min(4, Math.round(requestedVariants)))
: 1;
if (
typeof requestedVariants === "number" &&
requestedVariants !== usedVariants
) {
log(`variants: requested=${requestedVariants} used=${usedVariants}`);
}
log(`generate_image: using backend='${resolvedName}'`);
const mode =
(input.mode as "text2image" | "image2image" | "edit" | "text2video" | "image2video" | undefined) ||
"text2image";
const rawCanvas =
typeof input.canvas === "string" ? input.canvas : undefined;
if (rawCanvas !== undefined && requestedMode === undefined) {
return {
content: [
{
type: "text",
text: "A reference image (`canvas`) was provided but `mode` was not specified. Please re-run and set `mode` to one of: `image2image` (use the reference as a base), `edit` (edit / inpaint the reference), or `image2video` (animate the reference image).",
},
],
isError: true as const,
};
}
const rawMoodboard = Array.isArray((input as any).moodboard)
? ((input as any).moodboard as unknown[])
: undefined;
const moodboardNotations: string[] = (rawMoodboard || [])
.filter((x) => typeof x === "string")
.map((x) => String(x));
let result: ImageGenerationResult | any;
let effectiveMode: "text2image" | "image2image" | "edit" | "text2video" | "image2video" = "text2image";
let sourceTag: string | null = null;
let sourceVariantUsed: number | undefined = undefined;
let sourceKind: "attachment" | "image" | "picture" | "variant" | undefined =
undefined;
let sourceOriginAbs: string | undefined = undefined;
let sourceOriginalName: string | undefined = undefined; // Real original filename (e.g., "Katze.png")
let hasFreshAttachment: boolean = false;
let isAttachmentSource: boolean = false;
// Track reference metadata for summary (used in edit mode multi-reference)
let usedReferenceMeta: Array<{
type: "attachment" | "variant" | "picture" | "image";
index: number;
isCanvas: boolean;
originPath?: string;
originalName?: string;
}> = [];
// Track per-reference preprocessing (normalization) metadata for audit
let usedReferencePreprocess: Array<{
type: "attachment" | "variant" | "picture" | "image";
index: number;
role: "canvas" | "moodboard";
originPath?: string;
originalName?: string;
preprocess: {
original: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
adjusted: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
reason: NormalizeInputReason;
};
}> = [];
let currentLmChatId: string | null = null;
let currentLmWorkingDir: string | null = null;
let lmResolverConfidence: "high" | "medium" | "low" | undefined = undefined;
let lmResolverReason: string | undefined = undefined;
let stickyScope: "none" | "lm_chat" = "none";
let sourceFileName: string | undefined = undefined;
let lmCrosscheckInfo:
| { referenced: boolean; group_base?: string }
| undefined = undefined;
let sourcePreprocess:
| undefined
| {
original: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
adjusted: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
reason: NormalizeInputReason;
};
let normalizedToLongSide: number | undefined = undefined;
// requestedRaw: what the user asked for (or the original source size when user omitted).
// requestedEffective: the internally used aligned/clamped size (multiples of 64).
// Backend may still run at a different internal size (e.g. i2i normalization); we track that separately.
let requestedRawW: number | undefined = undefined;
let requestedRawH: number | undefined = undefined;
let requestedEffectiveW: number | undefined = undefined;
let requestedEffectiveH: number | undefined = undefined;
let _dtNeedsUpscaler = false; // propagated from i2i block; read in post-processing
let zoomPassRan = false; // set to true when zoom-pass pipeline completes
let pendingAudit2: ReturnType<typeof buildAuditLogger> | null = null;
let auditMaskBuf: Buffer | undefined = undefined; // post-resize mask PNG; read by audit block
const auditRequestId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
if (resolvedName === "drawthings") {
let srcBuf: Buffer | undefined;
// NOTE: chatWdForContext is computed dynamically to ensure it uses
// the latest currentLmChatId/currentLmWorkingDir after resolution.
const getChatWdForContext = (): string | null =>
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : null);
// V2: Returns all images with stable i-index for proper lookup
const getAllImagesForContext = async (): Promise<Array<{
i: number;
path: string;
}> | null> => {
const chatWdForContext = getChatWdForContext();
// Primary: chat working directory state file (contains ALL images, not just latest generation)
try {
if (chatWdForContext) {
const st: any = await readState(chatWdForContext);
if (st && Array.isArray(st.images) && st.images.length > 0) {
const images = [...st.images]
.filter((v: any) => v && typeof v.filename === "string")
.sort((a: any, b: any) => (a.i || 0) - (b.i || 0));
const result = images.map((v: any) => ({
i: v.i || 1,
path: path.join(chatWdForContext, v.filename),
}));
if (result.length > 0) return result;
}
}
} catch {}
// Fallback: in-process memory (only contains latest generation, not all images)
// NOTE: This is less reliable than state file but useful before state is written
try {
if (currentLmChatId) {
const mem = LAST_IMAGES_BY_LM_CHAT[currentLmChatId];
if (Array.isArray(mem) && mem.length > 0) return mem;
}
} catch {}
// Fallback: legacy heuristic from SSOT conversation file (returns paths only, assume i=1..n)
try {
const group = await getLastVariantGroupFromLMConversation(
currentLmChatId || undefined
);
if (Array.isArray(group) && group.length > 0) {
return group.map((p, i) => ({ i: i + 1, path: p }));
}
} catch {}
return null;
};
// Try to resolve current LM Studio chat for scoping
try {
// Prefer the generator-provided context (deterministic within the current turn/tool-call loop)
const active = getActiveChatContext();
if (
active &&
typeof active.chatId === "string" &&
/^\d+$/.test(active.chatId)
) {
currentLmChatId = active.chatId;
currentLmWorkingDir = active.workingDir;
lmResolverConfidence = "high";
lmResolverReason = `active_context${
active.requestId ? `:${active.requestId}` : ""
}`;
}
} catch {}
try {
if (currentLmChatId) {
// already resolved via active context
log(
`[chatId] resolved via active_context: chatId=${currentLmChatId} workingDir=${
currentLmWorkingDir || "null"
}`
);
} else {
// Fallback: filesystem heuristic (picks newest *.conversation.json)
console.warn(
"[generate_image] No deterministic chat context available – falling back to filesystem heuristic (newest conversation file). " +
"This may happen if the tool is called outside a normal Generator turn or if context TTL (60s) expired."
);
const lm = await resolveActiveLMStudioChatId({
requireRecentMtimeSec: 600,
});
log(
`[chatId] fallback heuristic result: ok=${(lm as any)?.ok} chatId=${
(lm as any)?.chatId || "null"
} reason=${(lm as any)?.reason || "unknown"}`
);
if ((lm as any)?.ok) {
currentLmChatId = (lm as any).chatId;
lmResolverConfidence = (lm as any).confidence;
lmResolverReason = (lm as any).reason;
}
}
} catch {}
// ========================================================================
// PHASE 3: Global error rules (before source resolution)
// ========================================================================
// Count available sources from state
let stateAttachmentCount = 0;
let stateImageCount = 0;
let statePictureCount = 0;
const chatWdForValidation = getChatWdForContext();
if (chatWdForValidation) {
try {
const st = await readState(chatWdForValidation);
stateAttachmentCount = Array.isArray(st.attachments)
? st.attachments.length
: 0;
stateImageCount = Array.isArray(st.images)
? st.images.length
: 0;
statePictureCount = Array.isArray((st as any).pictures)
? (st as any).pictures.length
: 0;
} catch (e) {
log(`[phase3] failed to read state for source counts: ${String(e)}`);
}
}
const totalSourcesInState =
stateAttachmentCount + stateImageCount + statePictureCount;
log(
`[phase3] sources in state: attachments=${stateAttachmentCount}, images=${stateImageCount}, pictures=${statePictureCount}, total=${totalSourcesInState}`
);
// (Legacy multi-source checks removed: new interface uses canvas+moodboard.)
// Canvas + moodboard resolution (new interface)
type SourceSel = { pool: SourcePool; index: number; notation: string };
const loadSourceState = async (): Promise<{
chatWd: string | null;
attachments: any[];
pictures: any[];
images: Array<{ i: number; path: string }>;
variants: Array<{ v: number; path: string }>;
}> => {
const chatWd = getChatWdForContext();
let attachments: any[] = [];
let pictures: any[] = [];
let variants: Array<{ v: number; path: string }> = [];
try {
if (chatWd) {
const st: any = await readState(chatWd);
attachments = Array.isArray(st?.attachments) ? st.attachments : [];
pictures = Array.isArray(st?.pictures) ? st.pictures : [];
if (Array.isArray(st?.variants)) {
variants = st.variants
.filter((v: any) => v && typeof v.filename === "string")
.map((v: any) => ({
v: typeof v.v === "number" ? v.v : 1,
path: path.join(chatWd, v.filename),
}));
}
}
} catch (e) {
log(`[state] failed to read attachments/pictures/variants: ${String(e)}`);
}
let images: Array<{ i: number; path: string }> = [];
try {
images = (await getAllImagesForContext()) || [];
} catch (e) {
log(`[state] failed to enumerate images: ${String(e)}`);
}
return { chatWd, attachments, pictures, images, variants };
};
const resolveNotation = (
notationRaw: string,
ctx: {
attachments: any[];
pictures: any[];
images: Array<{ i: number; path: string }>;
variants: Array<{ v: number; path: string }>;
}
): SourceSel => {
const pref = parsePrefixedNotation(notationRaw);
if (pref) {
const n = `${
pref.pool === "attachment"
? "a"
: pref.pool === "variant"
? "v"
: pref.pool === "image"
? "i"
: "p"
}${pref.index}`;
return { pool: pref.pool, index: pref.index, notation: n };
}
const digit = parseDigitOnlyNotation(notationRaw);
if (digit != null) {
const pools: SourcePool[] = [];
if (ctx.attachments.length > 0) pools.push("attachment");
if (ctx.variants.length > 0) pools.push("variant");
if (ctx.images.length > 0) pools.push("image");
if (ctx.pictures.length > 0) pools.push("picture");
if (pools.length === 0) {
throw new Error("No sources available");
}
if (pools.length > 1) {
const abbrev = pools
.map((p) =>
p === "attachment" ? "a" : p === "variant" ? "v" : p === "image" ? "i" : "p"
)
.join("/");
throw new Error(`Ambiguous: use prefix (${abbrev})`);
}
const only = pools[0];
const n = `${
only === "attachment" ? "a" : only === "variant" ? "v" : only === "image" ? "i" : "p"
}${digit}`;
return { pool: only, index: digit, notation: n };
}
throw new Error(
`Invalid source notation: ${String(notationRaw || "").trim()}`
);
};
const { chatWd, attachments, pictures, images, variants } =
await loadSourceState();
const totalSources =
attachments.length + variants.length + images.length + pictures.length;
const autoSelectSingleSource = (): SourceSel => {
if (
attachments.length === 1 &&
variants.length === 0 &&
images.length === 0 &&
pictures.length === 0
) {
const a =
typeof attachments[0]?.a === "number" ? attachments[0].a : 1;
return { pool: "attachment", index: a, notation: `a${a}` };
}
if (
variants.length === 1 &&
attachments.length === 0 &&
images.length === 0 &&
pictures.length === 0
) {
const v = typeof variants[0]?.v === "number" ? variants[0].v : 1;
return { pool: "variant", index: v, notation: `v${v}` };
}
if (
images.length === 1 &&
attachments.length === 0 &&
variants.length === 0 &&
pictures.length === 0
) {
const i = typeof images[0]?.i === "number" ? images[0].i : 1;
return { pool: "image", index: i, notation: `i${i}` };
}
if (
pictures.length === 1 &&
attachments.length === 0 &&
variants.length === 0 &&
images.length === 0
) {
const p = typeof pictures[0]?.p === "number" ? pictures[0].p : 1;
return { pool: "picture", index: p, notation: `p${p}` };
}
throw new Error("Ambiguous source – specify canvas explicitly");
};
let resolvedCanvas: SourceSel | null = null;
if (mode === "text2image" || mode === "text2video") {
if (rawCanvas) {
log(`[info] canvas ignored for mode='${mode}': ${rawCanvas}`);
}
} else {
if (rawCanvas) {
resolvedCanvas = resolveNotation(rawCanvas, {
attachments,
variants,
pictures,
images,
});
} else {
if (totalSources === 0) {
return {
content: [{ type: "text", text: "No source image available." }],
isError: true as const,
};
}
if (totalSources === 1) {
resolvedCanvas = autoSelectSingleSource();
log(`[canvas] auto-resolved to ${resolvedCanvas.notation}`);
} else {
return {
content: [
{
type: "text",
text: "Ambiguous source – specify canvas explicitly (e.g., canvas='a1' or canvas='v1' or canvas='p1').",
},
],
isError: true as const,
};
}
}
}
const loadBufferForSel = async (
sel: SourceSel
): Promise<{
buf: Buffer;
originPath?: string;
originalName?: string;
}> => {
if (sel.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: sel.index,
});
if (!(lm as any)?.ok || !(lm as any).buffer) {
throw new Error(`Attachment a${sel.index} not found.`);
}
return {
buf: (lm as any).buffer as Buffer,
originPath:
typeof (lm as any).originalPath === "string"
? (lm as any).originalPath
: undefined,
originalName:
typeof (lm as any).originalName === "string"
? (lm as any).originalName
: undefined,
};
}
if (sel.pool === "variant") {
const found = variants.find((v) => v.v === sel.index);
if (!found) {
const available =
variants.map((v) => `v${v.v}`).join(", ") || "(none)";
throw new Error(
`Variant v${sel.index} not found. Available: ${available}`
);
}
return {
buf: await fs.promises.readFile(found.path),
originPath: found.path,
};
}
if (sel.pool === "image") {
const found = images.find((img) => img.i === sel.index);
if (!found) {
const available =
images.map((img) => `i${img.i}`).join(", ") || "(none)";
throw new Error(
`Image i${sel.index} not found. Available: ${available}`
);
}
return {
buf: await fs.promises.readFile(found.path),
originPath: found.path,
};
}
// picture
const found = pictures.find(
(p: any) => typeof p?.p === "number" && p.p === sel.index
);
if (!found) {
const available =
pictures
.map((p: any) => (typeof p?.p === "number" ? `p${p.p}` : null))
.filter(Boolean)
.join(", ") || "(none)";
throw new Error(
`Picture p${sel.index} not found. Available: ${available}`
);
}
if (!chatWd)
throw new Error("No working directory resolved for pictures.");
const abs = path.join(chatWd, String(found.filename || ""));
const exists = await fs.promises
.stat(abs)
.then((s) => s.isFile())
.catch(() => false);
if (!exists) {
throw new Error(`Picture file missing: ${abs}`);
}
return { buf: await fs.promises.readFile(abs), originPath: abs };
};
// Resolve canvas buffer for image2image/edit/image2video
if (mode === "image2image" || mode === "edit" || mode === "image2video") {
if (!resolvedCanvas) {
return {
content: [{ type: "text", text: "No source image available." }],
isError: true as const,
};
}
try {
const loaded = await loadBufferForSel(resolvedCanvas);
srcBuf = loaded.buf;
effectiveMode = mode;
sourceTag = `canvas:${resolvedCanvas.notation}`;
sourceKind = resolvedCanvas.pool;
sourceOriginAbs = loaded.originPath;
sourceOriginalName = loaded.originalName;
sourceFileName = loaded.originPath
? path.basename(loaded.originPath)
: undefined;
if (resolvedCanvas.pool === "image") {
sourceVariantUsed = resolvedCanvas.index;
}
// Persist last canvas selection so the orchestrator can do smarter vision promotion.
try {
if (chatWd) {
const st: any = await readState(chatWd);
st.lastCanvasNotation = resolvedCanvas.notation;
st.lastCanvasAt = localTimestamp();
await writeStateAtomic(chatWd, st);
}
} catch (e) {
log(`[state] failed to persist lastCanvasNotation: ${String(e)}`);
}
} catch (e) {
return {
content: [{ type: "text", text: String((e as any)?.message || e) }],
isError: true as const,
};
}
}
// _internal override: pre-cropped buffer bypasses canvas loading
if (_internal?.presuppliedSourceBuf) {
srcBuf = _internal.presuppliedSourceBuf;
if (_internal.sourceTag) sourceTag = _internal.sourceTag;
}
// Audit source override: lets callers (e.g. handleZoomIn) report the
// original pre-crop source instead of the presupplied post-processed buffer.
if (_internal?.auditSourceOverride) {
const aso = _internal.auditSourceOverride;
if (aso.sourceKind !== undefined) sourceKind = aso.sourceKind;
if (aso.sourceFileName !== undefined) sourceFileName = aso.sourceFileName;
if (aso.sourceOriginalName !== undefined) sourceOriginalName = aso.sourceOriginalName;
if (aso.sourceOriginAbs !== undefined) sourceOriginAbs = aso.sourceOriginAbs;
}
// Track mask separately so it can be scaled to match adjusted canvas dims
let effectiveMaskBuf: Buffer | undefined = _internal?.maskBuf;
// Resolve moodboard selections (edit mode, or image2image via gRPC)
// Note: HTTP does not support moodboard for image2image; only gRPC does.
const resolvedMoodboard: SourceSel[] = [];
const selectedTransport = (globalThis as any)
?.__DT_SELECTED_TRANSPORT__ as "grpc" | "http" | null | undefined;
const moodboardAllowedForI2I = selectedTransport === "grpc";
if (
(mode === "edit" || (mode === "image2image" && moodboardAllowedForI2I)) &&
moodboardNotations.length > 0
) {
const seen = new Set<string>();
if (resolvedCanvas) {
seen.add(`${resolvedCanvas.pool}:${resolvedCanvas.index}`);
}
for (const nRaw of moodboardNotations) {
const sel = resolveNotation(nRaw, {
attachments,
variants,
pictures,
images,
});
const key = `${sel.pool}:${sel.index}`;
if (seen.has(key)) continue;
seen.add(key);
resolvedMoodboard.push(sel);
}
log(
`[${mode}] moodboard resolved: ${
resolvedMoodboard.map((s) => s.notation).join(", ") || "(none)"
}`
);
} else if (mode === "image2image" && moodboardNotations.length > 0 && !moodboardAllowedForI2I) {
// Warn user that moodboard is ignored for image2image via HTTP
log(
`[image2image] WARNING: moodboard ignored - requires gRPC transport. Using single canvas only.`
);
}
// Stash for the edit/image2image multi-ref block below
const resolvedCanvasSel = resolvedCanvas;
const resolvedMoodboardSel = resolvedMoodboard;
// Multi-reference mode: edit always, image2image only via gRPC
const isMultiReference =
(mode === "edit" || (mode === "image2image" && moodboardAllowedForI2I)) &&
resolvedMoodboardSel.length > 0;
if (mode === "text2image" || mode === "text2video") {
effectiveMode = mode;
log(`effective mode: ${effectiveMode}`);
// If the user provided non-aligned sizes, round to the effective (backend-safe) multiples of 64,
// but keep the raw values for audit + final postprocess resize.
let serviceInputForT2I: any = stripInternalToolKeys(input as any);
if (mode === "text2video") {
serviceInputForT2I._dt_video_mode = "txt2vid";
}
try {
if (resolvedName === "drawthings") {
const rawW = (input as any)?.width;
const rawH = (input as any)?.height;
const hasW = typeof rawW === "number" && Number.isFinite(rawW);
const hasH = typeof rawH === "number" && Number.isFinite(rawH);
if (hasW) requestedRawW = Math.max(1, Math.round(rawW));
if (hasH) requestedRawH = Math.max(1, Math.round(rawH));
if (hasW || hasH) {
const align = drawthingsLimits.align;
const maxW = drawthingsLimits.maxWidth;
const maxH = drawthingsLimits.maxHeight;
const floorTo = (v: number, step: number) =>
Math.floor(v / step) * step;
const clamp = (v: number, min: number, max: number) =>
Math.min(max, Math.max(min, v));
// requestedEffective must be backend-safe: aligned + never exceed render limits.
// Use floor alignment (never rounds up beyond user's raw request).
if (hasW)
requestedEffectiveW = clamp(
Math.max(align, floorTo(requestedRawW!, align)),
align,
maxW
);
if (hasH)
requestedEffectiveH = clamp(
Math.max(align, floorTo(requestedRawH!, align)),
align,
maxH
);
if (typeof requestedEffectiveW === "number")
serviceInputForT2I.width = requestedEffectiveW;
if (typeof requestedEffectiveH === "number")
serviceInputForT2I.height = requestedEffectiveH;
// Deterministic contract: when raw dims are known, provide them + upscaler decision.
if (
typeof requestedRawW === "number" &&
typeof requestedRawH === "number"
) {
(serviceInputForT2I as any)._dt_requested_raw_w = requestedRawW;
(serviceInputForT2I as any)._dt_requested_raw_h = requestedRawH;
(serviceInputForT2I as any)._dt_needs_upscaler =
requestedRawW > maxW || requestedRawH > maxH;
}
log(
`[t2i] requested: raw=${requestedRawW || "-"}x${
requestedRawH || "-"
} effective=${requestedEffectiveW || "-"}x${
requestedEffectiveH || "-"
}`
);
}
}
} catch (e) {
log(`[t2i] failed to compute effective size: ${String(e)}`);
}
result = await svc.generateImage(serviceInputForT2I, onProgress);
// Reconstruct render_target from service metadata if not already set
// (handles imageFormat/quality shorthand + custom config scenarios)
if (
(typeof requestedRawW !== "number" ||
typeof requestedRawH !== "number") &&
result?.metadata?.requested_dimensions
) {
const reqDims = result.metadata.requested_dimensions;
if (
typeof reqDims.width === "number" &&
typeof reqDims.height === "number"
) {
requestedRawW = reqDims.width;
requestedRawH = reqDims.height;
requestedEffectiveW = reqDims.width;
requestedEffectiveH = reqDims.height;
log(
`[t2i] reconstructed render_target from service: ${requestedRawW}x${requestedRawH}`
);
}
}
} else {
if (!srcBuf) {
return {
content: [{ type: "text", text: "No source image available." }],
isError: true as const,
};
}
// Resolve imageFormat → explicit width/height for i2i/edit/image2video
// so that normalizeInputBuffer and render_target use the correct aspect ratio.
{
const fmt = (input as any)?.imageFormat as string | undefined;
const hasW = typeof (input as any)?.width === "number" && Number.isFinite((input as any).width);
const hasH = typeof (input as any)?.height === "number" && Number.isFinite((input as any).height);
if (fmt && !hasW && !hasH) {
const formatDims: Record<string, { w: number; h: number }> = {
square: { w: 1024, h: 1024 },
landscape: { w: 1024, h: 768 },
portrait: { w: 768, h: 1024 },
"16:9": { w: 1024, h: 576 },
};
const dims = formatDims[fmt];
if (dims) {
(input as any).width = dims.w;
(input as any).height = dims.h;
log(`[i2i/edit] resolved imageFormat "${fmt}" → ${dims.w}x${dims.h}`);
}
}
}
// UNIFIED: Use normalizeInputBuffer for all i2i/edit input preprocessing
try {
if (resolvedName === "drawthings") {
const userOutW = (input as any)?.width;
const userOutH = (input as any)?.height;
const isZoomProfile = !isEditMode && (
_internal?.sourceTag === "canvas:zoom-in" ||
_internal?.sourceTag === "canvas:upscale"
);
const isRefineProfile = !isEditMode && _internal?.sourceTag === "canvas:refine";
const normalized = await normalizeInputBuffer(srcBuf, {
requestedRawW:
typeof userOutW === "number" &&
Number.isFinite(userOutW) &&
userOutW > 0
? userOutW
: undefined,
requestedRawH:
typeof userOutH === "number" &&
Number.isFinite(userOutH) &&
userOutH > 0
? userOutH
: undefined,
logPrefix: "[i2i]",
targetSumOverride: isZoomProfile
? drawthingsLimits.targetSumZoom
: isRefineProfile
? drawthingsLimits.targetSumRefine
: undefined,
});
srcBuf = normalized.buf;
sourcePreprocess = normalized.preprocess;
if (typeof normalized.normalizedLongSide === "number") {
normalizedToLongSide = normalized.normalizedLongSide;
}
// Scale mask to match adjusted canvas dims
if (effectiveMaskBuf) {
const adjW = normalized.preprocess.adjusted.width;
const adjH = normalized.preprocess.adjusted.height;
const origW = normalized.preprocess.original.width;
const origH = normalized.preprocess.original.height;
if (adjW && adjH && (adjW !== origW || adjH !== origH)) {
effectiveMaskBuf = await imgResizeFillToPng(effectiveMaskBuf, adjW, adjH);
}
}
auditMaskBuf = effectiveMaskBuf;
}
} catch (e) {
const errMsg = `i2i normalization error: ${String(e)}`;
log(errMsg);
return {
content: [
{
type: "text",
text: `Image2Image setup failed. Error: ${String(e)}`,
},
],
isError: true as const,
};
}
log(
`effective mode: ${mode === "image2video" ? "image2video" : mode} (source=${sourceTag || "unknown"}${
sourceFileName ? ", file=" + sourceFileName : ""
})`
);
// Backend input must not accept internal knobs from user.
let serviceInputForI2I: any = stripInternalToolKeys(input as any);
// Only applies when we're actually running an i2i call.
if (mode === "image2video") {
serviceInputForI2I._dt_video_mode = "img2vid";
} else if (isEditMode) {
if (_internal?.sourceTag === "canvas:inpaint") {
serviceInputForI2I._dt_i2i_profile = "inpaint";
} else if (_internal?.sourceTag === "canvas:outpaint") {
serviceInputForI2I._dt_i2i_profile = "outpaint";
} else {
serviceInputForI2I._dt_i2i_profile = "edit";
}
} else {
serviceInputForI2I._dt_i2i_profile = (_internal?.sourceTag === "canvas:zoom-in" || _internal?.sourceTag === "canvas:upscale") ? "zoom" : _internal?.sourceTag === "canvas:refine" ? "refine" : "img2img";
}
// Derive requestedRaw + requestedEffective.
// - requestedRaw is the user request when provided; otherwise the ORIGINAL source dimensions.
// - requestedEffective is the aligned/clamped size we treat as the effective target.
// Backend internal processing size (i2i normalization) may still differ.
try {
if (resolvedName === "drawthings") {
const userW = (input as any)?.width;
const userH = (input as any)?.height;
const hasUserW =
typeof userW === "number" && Number.isFinite(userW);
const hasUserH =
typeof userH === "number" && Number.isFinite(userH);
{
const limits = isEditMode
? drawthingsEditLimits
: drawthingsLimits;
const align = limits.align;
const minDim = limits.minDim;
const maxW = limits.maxWidth;
const maxH = limits.maxHeight;
const clamp = (v: number, lo: number, hi: number) =>
Math.max(lo, Math.min(hi, v));
const roundTo = (v: number, step: number) =>
Math.round(v / step) * step;
const floorTo = (v: number, step: number) =>
Math.floor(v / step) * step;
const ceilTo = (v: number, step: number) =>
Math.ceil(v / step) * step;
const chooseEgalized = (origW: number, origH: number) => {
const aspect = origW / Math.max(1, origH);
const minAligned = Math.ceil(minDim / align) * align;
const maxAlignedW = Math.floor(maxW / align) * align;
const maxAlignedH = Math.floor(maxH / align) * align;
const candidates: Array<{ w: number; h: number }> = [];
const add = (w: number, h: number) => {
if (!Number.isFinite(w) || !Number.isFinite(h)) return;
w = Math.round(w);
h = Math.round(h);
if (w <= 0 || h <= 0) return;
if (w % align !== 0 || h % align !== 0) return;
if (w < minAligned || h < minAligned) return;
if (w > maxAlignedW || h > maxAlignedH) return;
candidates.push({ w, h });
};
// 1) Near-original multiples for W/H
const wFloor = clamp(floorTo(origW, align), align, maxAlignedW);
const wCeil = clamp(ceilTo(origW, align), align, maxAlignedW);
const hFloor = clamp(floorTo(origH, align), align, maxAlignedH);
const hCeil = clamp(ceilTo(origH, align), align, maxAlignedH);
// Try deriving H from W candidates (preserve aspect as best we can)
for (const w0 of [wFloor, wCeil, minAligned]) {
const h0 = roundTo(w0 / aspect, align);
add(w0, h0);
}
// Try deriving W from H candidates
for (const h0 of [hFloor, hCeil, minAligned]) {
const w0 = roundTo(h0 * aspect, align);
add(w0, h0);
}
// Small neighborhood search around rounded H to capture exact-aspect pairs
// (e.g., 300x200 -> 384x256 preserves 1.5 exactly).
for (const hBase of [hFloor, hCeil, minAligned]) {
for (const dh of [-2, -1, 0, 1, 2]) {
const h0 = hBase + dh * align;
const w0 = roundTo(h0 * aspect, align);
add(w0, h0);
}
}
if (candidates.length === 0) {
// Hard fallback: clamp + align independently (aspect may drift)
const w0 = clamp(
roundTo(origW, align),
minAligned,
maxAlignedW
);
const h0 = clamp(
roundTo(origH, align),
minAligned,
maxAlignedH
);
return { w: w0, h: h0 };
}
// Pick candidate minimizing aspect error, then size delta
candidates.sort((a, b) => {
const ae = Math.abs(a.w / Math.max(1, a.h) - aspect);
const be = Math.abs(b.w / Math.max(1, b.h) - aspect);
if (ae !== be) return ae - be;
const ad = Math.abs(a.w - origW) + Math.abs(a.h - origH);
const bd = Math.abs(b.w - origW) + Math.abs(b.h - origH);
return ad - bd;
});
return candidates[0];
};
// Prefer explicit user-provided OUT size when both provided.
// Otherwise derive OUT size from ORIGINAL (pre-normalization) source.
const origW0 = sourcePreprocess?.original?.width;
const origH0 = sourcePreprocess?.original?.height;
const origAspect =
typeof origW0 === "number" &&
Number.isFinite(origW0) &&
typeof origH0 === "number" &&
Number.isFinite(origH0) &&
origH0 > 0
? origW0 / origH0
: undefined;
// Fall back to current (normalized) buffer size only if original is missing.
const fallbackSz =
typeof origW0 === "number" && typeof origH0 === "number"
? null
: await imgGetSize(srcBuf);
let baseW: number | undefined;
let baseH: number | undefined;
if (hasUserW && hasUserH) {
baseW = Math.round(userW);
baseH = Math.round(userH);
} else if (!hasUserW && !hasUserH) {
baseW =
typeof origW0 === "number" && Number.isFinite(origW0)
? origW0
: fallbackSz?.width;
baseH =
typeof origH0 === "number" && Number.isFinite(origH0)
? origH0
: fallbackSz?.height;
} else if (hasUserW && !hasUserH) {
baseW = Math.round(userW);
if (typeof origAspect === "number") {
baseH = Math.max(1, Math.round(baseW / origAspect));
}
} else if (!hasUserW && hasUserH) {
baseH = Math.round(userH);
if (typeof origAspect === "number") {
baseW = Math.max(1, Math.round(baseH * origAspect));
}
}
if (typeof baseW === "number" && typeof baseH === "number") {
// requestedRaw: user request when present; else original source size.
if (hasUserW) requestedRawW = Math.max(1, Math.round(userW));
if (hasUserH) requestedRawH = Math.max(1, Math.round(userH));
if (!hasUserW && !hasUserH) {
requestedRawW = Math.max(1, Math.round(baseW));
requestedRawH = Math.max(1, Math.round(baseH));
} else if (hasUserW && !hasUserH) {
requestedRawW = Math.max(1, Math.round(baseW));
requestedRawH = Math.max(1, Math.round(baseH));
} else if (!hasUserW && hasUserH) {
requestedRawW = Math.max(1, Math.round(baseW));
requestedRawH = Math.max(1, Math.round(baseH));
}
// requestedEffective: if user provided BOTH dims, round each independently to align.
// Otherwise, preserve aspect as closely as possible.
if (hasUserW && hasUserH) {
const minAligned = Math.ceil(minDim / align) * align;
const maxAlignedW = Math.floor(maxW / align) * align;
const maxAlignedH = Math.floor(maxH / align) * align;
// Zoom/upscale profile: DT backend renders at input image dims, not config dims.
// Skip maxW/maxH clamp so requestedEffective = adjusted (= requestedRaw for zoom).
const isZoomProfileEffective = !isEditMode && (
_internal?.sourceTag === "canvas:zoom-in" ||
_internal?.sourceTag === "canvas:upscale"
);
const wEff = isZoomProfileEffective
? Math.max(minAligned, roundTo(baseW, align))
: clamp(roundTo(baseW, align), minAligned, maxAlignedW);
const hEff = isZoomProfileEffective
? Math.max(minAligned, roundTo(baseH, align))
: clamp(roundTo(baseH, align), minAligned, maxAlignedH);
requestedEffectiveW = wEff;
requestedEffectiveH = hEff;
} else {
const eg = chooseEgalized(baseW, baseH);
requestedEffectiveW = eg.w;
requestedEffectiveH = eg.h;
}
log(
`[i2i/edit] requested: raw=${requestedRawW || "-"}x${
requestedRawH || "-"
} effective=${requestedEffectiveW || "-"}x${
requestedEffectiveH || "-"
} (user provided: ${hasUserW ? "w" : "-"}${
hasUserH ? "h" : "-"
})`
);
// CRITICAL: Pass requestedEffective dimensions to the backend!
// The backend must generate at requested_effective size, not at
// the normalized source (adjusted) size.
if (
typeof requestedEffectiveW === "number" &&
typeof requestedEffectiveH === "number"
) {
serviceInputForI2I.width = requestedEffectiveW;
serviceInputForI2I.height = requestedEffectiveH;
log(
`[i2i/edit] set serviceInputForI2I dimensions to effective: ${requestedEffectiveW}x${requestedEffectiveH}`
);
}
// Deterministic contract: always provide raw dims + upscaler decision for Draw Things i2i/edit.
if (
typeof requestedRawW === "number" &&
typeof requestedRawH === "number"
) {
(serviceInputForI2I as any)._dt_requested_raw_w =
requestedRawW;
(serviceInputForI2I as any)._dt_requested_raw_h =
requestedRawH;
// needs_upscaler: true when requestedEffective exceeds adjusted AND the
// scale-factor (rawW/adjW) meets the method-specific threshold.
// Below the threshold, Jimp alone handles the upscale; no upscaler/zoom-pass.
const _adjW = sourcePreprocess?.adjusted?.width ?? 0;
const _adjH = sourcePreprocess?.adjusted?.height ?? 0;
const _scaleFactor = _adjW > 0 && _adjH > 0
? Math.max(requestedRawW / _adjW, requestedRawH / _adjH)
: 0;
const _minThreshold = drawthingsLimits.upscaleMethod === "zoom-pass"
? drawthingsLimits.minUpscaleFactorZoomPass
: drawthingsLimits.minUpscaleFactorUpscaler;
const _exceedsAdjusted =
(typeof requestedEffectiveW === "number" && requestedEffectiveW > _adjW) ||
(typeof requestedEffectiveH === "number" && requestedEffectiveH > _adjH);
const _dt_needs_upscaler = _exceedsAdjusted && _scaleFactor >= _minThreshold;
_dtNeedsUpscaler = _dt_needs_upscaler;
(serviceInputForI2I as any)._dt_needs_upscaler = _dt_needs_upscaler;
// When zoom-pass will run, Pass 1 generates at adjusted size.
// Zoom-pass handles all upscaling; edit model only needs canvas resolution.
// Exception: canvas:zoom-in / canvas:upscale always run SeedVR2 at target dims — no override.
const _isZoomToolCall = _internal?.sourceTag === "canvas:zoom-in" || _internal?.sourceTag === "canvas:upscale";
if (_dt_needs_upscaler && drawthingsLimits.upscaleMethod === "zoom-pass" && _adjW > 0 && _adjH > 0 && !_isZoomToolCall) {
serviceInputForI2I.width = _adjW;
serviceInputForI2I.height = _adjH;
}
} else {
throw new Error(
"Invariant failed: requested_raw dims missing for drawthings i2i/edit"
);
}
}
}
}
} catch (e) {
log(`[i2i] failed to derive width/height from source: ${String(e)}`);
}
// SAFETY NET: Ensure serviceInputForI2I dimensions ALWAYS respect backend limits.
// This catches edge cases where the main calculation block was skipped or failed.
// Zoom/upscale profile is excluded: DT backend ignores config dims for i2i and
// renders at input image dimensions, which may legitimately exceed 1536.
{
const isZoomProfileSafety = !isEditMode && (
_internal?.sourceTag === "canvas:zoom-in" ||
_internal?.sourceTag === "canvas:upscale"
);
if (!isZoomProfileSafety) {
const limits = isEditMode ? drawthingsEditLimits : drawthingsLimits;
const align = limits.align;
const minDim = limits.minDim;
const maxW = limits.maxWidth;
const maxH = limits.maxHeight;
const minAligned = Math.ceil(minDim / align) * align;
const maxAlignedW = Math.floor(maxW / align) * align;
const maxAlignedH = Math.floor(maxH / align) * align;
const clamp = (v: number, lo: number, hi: number) =>
Math.max(lo, Math.min(hi, v));
const roundTo = (v: number, step: number) =>
Math.round(v / step) * step;
const inW = serviceInputForI2I.width;
const inH = serviceInputForI2I.height;
if (typeof inW === "number" && Number.isFinite(inW)) {
const sanitized = clamp(
roundTo(inW, align),
minAligned,
maxAlignedW
);
if (sanitized !== inW) {
log(`[i2i] SAFETY: sanitized width ${inW} → ${sanitized}`);
serviceInputForI2I.width = sanitized;
}
}
if (typeof inH === "number" && Number.isFinite(inH)) {
const sanitized = clamp(
roundTo(inH, align),
minAligned,
maxAlignedH
);
if (sanitized !== inH) {
log(`[i2i] SAFETY: sanitized height ${inH} → ${sanitized}`);
serviceInputForI2I.height = sanitized;
}
}
}
}
// Edit mode requires gRPC backend (HTTP does not support edit mode at all)
if (isEditMode && typeof svc.generateImageEdit !== "function") {
const selectedTransport = (globalThis as any)
?.__DT_SELECTED_TRANSPORT__ as "grpc" | "http" | null | undefined;
// If no backend is connected at all, prefer the generic backend error.
// Otherwise the message is misleading (it implies HTTP is active).
if (!selectedTransport) {
log(
`[edit] ERROR: edit mode requested but no Draw Things backend is connected`
);
return {
content: [
{
type: "text",
text: "Failed to generate image: backend error",
},
],
isError: true as const,
};
}
if (selectedTransport === "http") {
log(
`[edit] ERROR: HTTP backend does not support edit mode (generateImageEdit not available)`
);
return {
content: [
{
type: "text",
text: `Edit mode is not supported via HTTP. Edit mode requires the Draw Things gRPC backend. Use mode='image2image' instead, or switch to gRPC.`,
},
],
isError: true as const,
};
}
// Defensive fallback: transport says gRPC but method is missing.
log(
`[edit] ERROR: gRPC transport selected but edit mode is unavailable (generateImageEdit missing)`
);
return {
content: [
{ type: "text", text: "Failed to generate image: backend error" },
],
isError: true as const,
};
}
// Multi-reference edit/image2image mode: collect additional buffers and call generateImageEdit
if (isMultiReference && typeof svc.generateImageEdit === "function") {
log(`[${mode}] resolving multi-reference sources...`);
// ─────────────────────────────────────────────────────────────────
// PHASE 2: No auto-fill. Only explicitly selected sources are used.
// ─────────────────────────────────────────────────────────────────
// Get model capabilities for limit checking
// Use edit or image2image limits based on mode
const capKey = getCapabilityKeyForPreset(effectiveModelPreset);
const imageCaps = capKey
? detectImageModelCapabilities(capKey)
: null;
const maxRefs =
mode === "edit"
? (imageCaps?.edit?.maxReferenceImages ?? 4)
: (imageCaps?.image2image?.maxReferenceImages ?? 1);
log(
`[${mode}] model=${effectiveModelPreset}, maxReferenceImages=${maxRefs}`
);
// Get available attachments/variants/pictures count (for existence validation)
const chatWd = getChatWdForContext();
let availableAttachmentCount = 0;
let availableVariantCount = 0;
let availablePictureCount = 0;
if (chatWd) {
try {
const st = await readState(chatWd);
availableAttachmentCount = Array.isArray(st.attachments)
? st.attachments.length
: 0;
availableVariantCount = Array.isArray(st.variants)
? st.variants.length
: 0;
availablePictureCount = Array.isArray((st as any).pictures)
? (st as any).pictures.length
: 0;
log(
`[${mode}] available: ${availableAttachmentCount} attachments, ${availableVariantCount} variants, ${availablePictureCount} pictures`
);
} catch (e) {
log(`[${mode}] failed to read state: ${String(e)}`);
}
}
// Use resolved canvas + moodboard (no legacy sourceAttachment/sourceVariant)
const canvasSel = resolvedCanvasSel;
const moodboardSel = resolvedMoodboardSel;
// ─────────────────────────────────────────────────────────────────
// LIMIT VALIDATION: Check total references against model capabilities
// ─────────────────────────────────────────────────────────────────
const totalRequested =
(canvasSel ? 1 : 0) + (moodboardSel?.length || 0);
log(
`[${mode}] total references requested: ${totalRequested}, limit: ${maxRefs}`
);
if (totalRequested > maxRefs) {
// Build a detailed error message
const details = [
canvasSel ? `canvas=${canvasSel.notation}` : "no canvas",
moodboardSel && moodboardSel.length > 0
? `moodboard=[${moodboardSel.map((s) => s.notation).join(",")}]`
: "",
]
.filter((s) => s)
.join(", ");
return {
content: [
{
type: "text",
text:
`Model '${effectiveModelPreset}' supports max ${maxRefs} reference images in ${mode} mode.\n` +
`Requested: ${totalRequested} (${details}).\n\n` +
`Please make an explicit selection:\n` +
`- Use 'canvas' to specify the priority image (e.g., canvas="a1" or canvas="v2" or canvas="p3")\n` +
`- Use 'moodboard' to add reference images (e.g., moodboard=["a2","v1","p4"])`,
},
],
isError: true as const,
};
}
// ─────────────────────────────────────────────────────────────────
const referenceBuffers: Buffer[] = [];
const referenceMetadata: Array<{
type: "attachment" | "image" | "picture" | "variant";
index: number;
isCanvas: boolean;
originPath?: string;
originalName?: string;
}> = [];
const referencePreprocess: Array<{
type: "attachment" | "image" | "picture" | "variant";
index: number;
role: "canvas" | "moodboard";
originPath?: string;
originalName?: string;
preprocess: {
original: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
adjusted: {
width?: number;
height?: number;
format?: string;
bytes?: number;
};
reason: NormalizeInputReason;
};
}> = [];
// Get user-requested output dimensions for capping input size
const userOutW = (input as any)?.width;
const userOutH = (input as any)?.height;
const editRequestedRawW =
typeof userOutW === "number" &&
Number.isFinite(userOutW) &&
userOutW > 0
? userOutW
: undefined;
const editRequestedRawH =
typeof userOutH === "number" &&
Number.isFinite(userOutH) &&
userOutH > 0
? userOutH
: undefined;
const pushReference = async (sel: any, isCanvas: boolean) => {
const loaded = await loadBufferForSel(sel);
// UNIFIED: Use normalizeInputBuffer for all edit/image2image multi-ref inputs
// Only canvas adopts the requested output AR; moodboard preserves its native AR.
const normalized = await normalizeInputBuffer(loaded.buf, {
requestedRawW: isCanvas ? editRequestedRawW : undefined,
requestedRawH: isCanvas ? editRequestedRawH : undefined,
logPrefix: `[${mode}:${isCanvas ? "canvas" : "moodboard"}]`,
});
referenceBuffers.push(normalized.buf);
referenceMetadata.push({
type: sel.pool,
index: sel.index,
isCanvas,
originPath: loaded.originPath,
originalName: loaded.originalName,
});
referencePreprocess.push({
type: sel.pool,
index: sel.index,
role: isCanvas ? "canvas" : "moodboard",
originPath: loaded.originPath,
originalName: loaded.originalName,
preprocess: normalized.preprocess,
});
};
// 1. Resolve Canvas first
if (canvasSel) {
try {
await pushReference(canvasSel, true);
log(`[${mode}] canvas resolved: ${canvasSel.notation}`);
} catch (e) {
return {
content: [
{
type: "text",
text: `Canvas ${canvasSel.notation} not found: ${String(
(e as any)?.message || e
)}`,
},
],
isError: true as const,
};
}
}
// 2. Resolve Moodboard selections
for (const sel of moodboardSel || []) {
try {
await pushReference(sel, false);
log(`[${mode}] moodboard resolved: ${sel.notation}`);
} catch (e) {
return {
content: [
{
type: "text",
text: `Moodboard ${sel.notation} not found: ${String(
(e as any)?.message || e
)}`,
},
],
isError: true as const,
};
}
}
// If no canvas was explicitly or auto-selected but we have references, use first as canvas
if (!resolvedCanvasSel && referenceBuffers.length > 0) {
referenceMetadata[0].isCanvas = true;
log(
`[${mode}] auto-selected first reference as canvas: ${referenceMetadata[0].type} ${referenceMetadata[0].index}`
);
}
// Fallback: if still no references, use the already-resolved srcBuf
if (referenceBuffers.length === 0 && srcBuf) {
// UNIFIED: Use normalizeInputBuffer
const normalized = await normalizeInputBuffer(srcBuf, {
requestedRawW: editRequestedRawW,
requestedRawH: editRequestedRawH,
logPrefix: `[${mode}:fallback-canvas]`,
});
referenceBuffers.push(normalized.buf);
referenceMetadata.push({
type: sourceKind || "image",
index: sourceVariantUsed || 1,
isCanvas: true,
});
referencePreprocess.push({
type: sourceKind || "image",
index: sourceVariantUsed || 1,
role: "canvas",
preprocess: normalized.preprocess,
});
log(`[${mode}] fallback: using single source as canvas`);
}
// Copy metadata for summary (outside this block)
usedReferenceMeta = [...referenceMetadata];
usedReferencePreprocess = [...referencePreprocess];
log(
`[${mode}] calling generateImageEdit with ${referenceBuffers.length} references`
);
result = await svc.generateImageEdit(
serviceInputForI2I,
referenceBuffers,
onProgress,
effectiveMaskBuf
);
// Update sourceTag for audit
sourceTag = `${mode}:refs=${referenceBuffers.length}`;
} else {
// Single-reference path (original behavior)
// Populate usedReferenceMeta for consistency in summary
if (srcBuf && (sourceKind || effectiveMode === "image2image")) {
usedReferenceMeta = [
{
type: sourceKind || "image",
index: sourceVariantUsed || 1,
isCanvas: true, // Single reference is always canvas
},
];
}
result = await svc.generateImageImg2Img(
serviceInputForI2I,
srcBuf as Buffer,
onProgress,
effectiveMaskBuf
);
// Reconstruct render_target from service metadata if not already set
// (fallback for edge cases where core logic was bypassed)
if (
(typeof requestedRawW !== "number" ||
typeof requestedRawH !== "number") &&
result?.metadata?.requested_dimensions
) {
const reqDims = result.metadata.requested_dimensions;
if (
typeof reqDims.width === "number" &&
typeof reqDims.height === "number"
) {
requestedRawW = reqDims.width;
requestedRawH = reqDims.height;
requestedEffectiveW = reqDims.width;
requestedEffectiveH = reqDims.height;
log(
`[i2i/edit] reconstructed render_target from service: ${requestedRawW}x${requestedRawH}`
);
}
}
}
}
}
if ((result as any).isError || (result as any).error) {
const statusRaw = (result as any).status as unknown;
let statusNum: number | undefined = undefined;
if (typeof statusRaw === "number" && Number.isFinite(statusRaw))
statusNum = statusRaw;
else if (typeof statusRaw === "string") {
const p = parseInt(statusRaw, 10);
if (Number.isFinite(p)) statusNum = p;
}
const raw =
(result as any).errorMessage ||
(result as any).error ||
"unknown error";
const codeText =
typeof statusNum === "number" ? `status ${statusNum}` : "backend error";
await logError(new Error(`Failed to generate image: ${codeText}`));
await appendErrorRaw(
typeof raw === "string" ? raw : String(raw),
statusNum
);
const snippet = (() => {
try {
const s = String(raw);
return s.length > 500 ? s.slice(0, 500) + "…" : s;
} catch {
return "";
}
})();
return {
content: [
{ type: "text", text: `Failed to generate image: ${codeText}` },
...(snippet
? ([{ type: "text", text: `Details: ${snippet}` }] as any[])
: []),
],
isError: true as const,
};
}
let buffers: Buffer[] = [];
if (
(result as any).images &&
Array.isArray((result as any).images) &&
(result as any).images.length > 0
) {
for (const img of (result as any).images) {
if (typeof img === "string") {
const b64 = img.startsWith("data:") ? img.split(",")[1] : img;
buffers.push(Buffer.from(b64, "base64"));
}
}
} else if (
(result as any).imageBuffer &&
Buffer.isBuffer((result as any).imageBuffer)
) {
buffers.push((result as any).imageBuffer as Buffer);
} else if ((result as any).imageData) {
const data = (result as any).imageData;
if (Buffer.isBuffer(data)) buffers.push(data);
else if (typeof data === "string") {
const b64 = data.startsWith("data:") ? data.split(",")[1] : data;
buffers.push(Buffer.from(b64, "base64"));
}
} else if ((result as any).imagePath) {
try {
const abs = path.resolve((result as any).imagePath);
buffers.push(await fs.promises.readFile(abs));
} catch (e) {
log(
`Failed to read returned imagePath: ${
(result as any).imagePath
}: ${String(e)}`
);
}
}
if (buffers.length === 0) throw new Error("No valid image data returned");
// Measure Pass-1 backend output before zoom-pass can replace buffers.
let backendReturnedW: number | undefined;
let backendReturnedH: number | undefined;
try {
if (buffers[0]) {
const meta0 = await imgGetSize(buffers[0]);
backendReturnedW = meta0.width;
backendReturnedH = meta0.height;
}
} catch {}
// ── ZOOM-PASS PIPELINE (Schritte 3-7) ────────────────────────────────────
if (
(imageService as any)?.name === "drawthings" &&
drawthingsLimits.upscaleMethod === "zoom-pass" &&
_dtNeedsUpscaler &&
(effectiveMode === "image2image" || effectiveMode === "edit") &&
_internal?.sourceTag !== "canvas:zoom-in" &&
_internal?.sourceTag !== "canvas:upscale" &&
typeof requestedRawW === "number" &&
typeof requestedRawH === "number" &&
buffers.length > 0
) {
try {
const rawSum = requestedRawW + requestedRawH;
const targetSumZoom = drawthingsLimits.targetSumZoom;
const align = drawthingsLimits.align;
// Step 4: Jimp Pre-Resize — build Zoom-Pass Canvas
let zoomCanvasW: number;
let zoomCanvasH: number;
let zoomCanvasBuf: Buffer;
if (rawSum <= targetSumZoom) {
zoomCanvasW = requestedRawW;
zoomCanvasH = requestedRawH;
zoomCanvasBuf = await imgResizeFillToPng(buffers[0], zoomCanvasW, zoomCanvasH);
} else {
// Canvas = AR-preserving scale to targetSumZoom, 64-aligned
// Use candidate selection (floor/round/ceil on each axis) to minimise AR error.
const aspect = requestedRawW / requestedRawH;
const hRaw = targetSumZoom / (1 + aspect);
const wRaw = targetSumZoom - hRaw;
const snaps = (v: number) => [
Math.max(align, Math.floor(v / align) * align),
Math.max(align, Math.round(v / align) * align),
Math.max(align, Math.ceil(v / align) * align),
];
type Cand = { w: number; h: number; arErr: number };
const candidates: Cand[] = [];
for (const wSnap of snaps(wRaw)) {
for (const hSnap of snaps(hRaw)) {
if (wSnap + hSnap <= targetSumZoom)
candidates.push({ w: wSnap, h: hSnap, arErr: Math.abs(wSnap / hSnap - aspect) });
}
}
candidates.sort((a, b) => a.arErr - b.arErr || (b.w + b.h) - (a.w + a.h));
const best = candidates[0] ?? { w: Math.max(align, Math.floor(wRaw / align) * align), h: Math.max(align, Math.floor(hRaw / align) * align) };
zoomCanvasW = best.w;
zoomCanvasH = best.h;
const arExpected = requestedRawW / requestedRawH;
const arActual = zoomCanvasW / zoomCanvasH;
if (Math.abs(arActual - arExpected) > 0.01)
log(`[AR-CHECK] zoom-canvas: ${zoomCanvasW}x${zoomCanvasH} AR=${arActual.toFixed(4)} vs raw ${requestedRawW}x${requestedRawH} AR=${arExpected.toFixed(4)} err=${Math.abs(arActual - arExpected).toFixed(4)}`);
zoomCanvasBuf = await imgResizeFillToPng(buffers[0], zoomCanvasW, zoomCanvasH);
}
// Step 5: Pass 2 — SeedVR2 call
const zoomOnProgress = onProgress
? (step: number, total: unknown, msg?: string) => {
if (step === -1) {
onProgress(-1, total as any, msg ? `Zoom ${msg}` : "Zoom");
} else {
const t = typeof total === "number" ? total : undefined;
if (t && t > 0) {
onProgress(-1, total as any, `Zoom Step ${step}/${t} (${Math.round((step / (t + 1)) * 100)}%)`);
} else {
onProgress(-1, total as any, `Zoom Step ${step}...`);
}
}
}
: undefined;
// Only pass runtime overrides — the service fills defaultParamsZoom as baseDefaults itself.
const zoomParams: any = {
width: zoomCanvasW,
height: zoomCanvasH,
prompt: typeof input.prompt === "string" ? input.prompt : "",
_dt_i2i_profile: "zoom",
_dt_needs_upscaler: false,
_dt_requested_raw_w: zoomCanvasW,
_dt_requested_raw_h: zoomCanvasH,
};
const zoomStartMs = Date.now();
const zoomResult = await svc.generateImageImg2Img(zoomParams, zoomCanvasBuf, zoomOnProgress);
const zoomInferenceMs = Date.now() - zoomStartMs;
if ((zoomResult as any).isError || (zoomResult as any).error) {
throw new Error(`Zoom-Pass failed: ${(zoomResult as any).errorMessage ?? (zoomResult as any).error ?? "unknown"}`);
}
// Extract Pass-2 buffers (same pattern as main buffer extraction above)
let zoomBuffers: Buffer[] = [];
if (Array.isArray((zoomResult as any).images) && (zoomResult as any).images.length > 0) {
for (const img of (zoomResult as any).images) {
if (typeof img === "string") {
const b64 = img.startsWith("data:") ? img.split(",")[1] : img;
zoomBuffers.push(Buffer.from(b64, "base64"));
}
}
} else if (Buffer.isBuffer((zoomResult as any).imageBuffer)) {
zoomBuffers.push((zoomResult as any).imageBuffer as Buffer);
} else if ((zoomResult as any).imageData) {
const d = (zoomResult as any).imageData;
if (Buffer.isBuffer(d)) zoomBuffers.push(d);
else if (typeof d === "string") {
const b64 = d.startsWith("data:") ? d.split(",")[1] : d;
zoomBuffers.push(Buffer.from(b64, "base64"));
}
}
if (zoomBuffers.length === 0) throw new Error("Zoom-Pass returned no image data");
// Measure Pass-2 backend output
let zoomBackendW: number | undefined;
let zoomBackendH: number | undefined;
try {
const zm = await imgGetSize(zoomBuffers[0]);
zoomBackendW = zm.width;
zoomBackendH = zm.height;
} catch {}
const zoomMeta = (zoomResult as any)?.metadata ?? {};
// Step 6: Post-Processing Stage 2
if (rawSum <= targetSumZoom) {
buffers = zoomBuffers;
} else {
const resized: Buffer[] = [];
for (const zb of zoomBuffers) {
resized.push(await imgResizeFillToPng(zb, requestedRawW, requestedRawH));
}
buffers = resized;
}
// Step 7: Write Pass-2 audit entry
zoomPassRan = true;
try {
const audit2 = buildAuditLogger({ backend: resolvedName, mode: "zoom", requestId: auditRequestId });
if (currentLmChatId) audit2.setChatId(currentLmChatId);
const zoomUserReq: Record<string, any> = {};
if ((input as any)?.mode) zoomUserReq.mode = (input as any).mode;
if ((input as any)?.prompt) zoomUserReq.prompt = (input as any).prompt;
if ((input as any)?.canvas) zoomUserReq.canvas = (input as any).canvas;
audit2.setUserRequest(zoomUserReq as any);
audit2.setRenderTarget({
requested_raw: { width: requestedRawW, height: requestedRawH },
requested_effective: { width: zoomCanvasW, height: zoomCanvasH },
needs_upscaler: true,
});
audit2.setInputs({
canvas: {
original: backendReturnedW !== undefined && backendReturnedH !== undefined
? { width: backendReturnedW, height: backendReturnedH }
: { width: requestedRawW, height: requestedRawH },
adjusted: { width: zoomCanvasW, height: zoomCanvasH },
},
});
const zoomOutput: Record<string, any> = {};
if (zoomBackendW !== undefined && zoomBackendH !== undefined) {
zoomOutput.backend_returned = { width: zoomBackendW, height: zoomBackendH };
}
zoomOutput.post_processed = { width: requestedRawW, height: requestedRawH };
zoomOutput.inference_time_ms = zoomInferenceMs;
if (typeof zoomMeta.model === "string" && zoomMeta.model.trim()) {
zoomOutput.model_used = path.basename(zoomMeta.model);
} else if (typeof defaultParamsZoom.model === "string") {
zoomOutput.model_used = defaultParamsZoom.model;
}
if (typeof zoomMeta.steps_used === "number") {
zoomOutput.steps_used = zoomMeta.steps_used;
} else if (typeof defaultParamsZoom.steps === "number") {
zoomOutput.steps_used = defaultParamsZoom.steps;
}
if (typeof zoomMeta.sampler_used === "string" && zoomMeta.sampler_used.trim()) {
zoomOutput.sampler_used = zoomMeta.sampler_used;
} else if (typeof defaultParamsZoom.sampler === "string") {
zoomOutput.sampler_used = defaultParamsZoom.sampler;
}
audit2.setOutput(zoomOutput as any);
pendingAudit2 = audit2;
} catch (auditErr) {
log(`zoom-pass audit write failed: ${String(auditErr)}`);
}
log(`zoom-pass: canvas=${zoomCanvasW}x${zoomCanvasH} backend=${zoomBackendW ?? "?"}x${zoomBackendH ?? "?"} -> final=${requestedRawW}x${requestedRawH} (${zoomInferenceMs}ms)`);
} catch (e) {
throw new Error(`Zoom-Pass pipeline failed: ${String((e as any)?.message || e)}`);
}
}
// ── END ZOOM-PASS PIPELINE ────────────────────────────────────────────────
const userReqW = (() => {
const v = (input as any)?.width;
return typeof v === "number" && Number.isFinite(v)
? Math.max(1, Math.round(v))
: undefined;
})();
const userReqH = (() => {
const v = (input as any)?.height;
return typeof v === "number" && Number.isFinite(v)
? Math.max(1, Math.round(v))
: undefined;
})();
if (
!zoomPassRan &&
(imageService as any)?.name === "drawthings" &&
(effectiveMode === "image2image" || effectiveMode === "edit") &&
sourcePreprocess &&
(sourcePreprocess.reason === "normalized_to_constraints" ||
sourcePreprocess.reason === "clamped_to_requested_raw") &&
sourcePreprocess.original?.width &&
sourcePreprocess.original?.height &&
sourcePreprocess.adjusted?.width &&
sourcePreprocess.adjusted?.height &&
(sourcePreprocess.original.width !== sourcePreprocess.adjusted.width ||
sourcePreprocess.original.height !== sourcePreprocess.adjusted.height)
) {
try {
let targetW =
typeof requestedEffectiveW === "number" &&
Number.isFinite(requestedEffectiveW)
? Math.max(1, Math.round(requestedEffectiveW))
: Math.max(1, Math.round(sourcePreprocess.original.width!));
let targetH =
typeof requestedEffectiveH === "number" &&
Number.isFinite(requestedEffectiveH)
? Math.max(1, Math.round(requestedEffectiveH))
: Math.max(1, Math.round(sourcePreprocess.original.height!));
try {
const limits =
effectiveMode === "edit" ? drawthingsEditLimits : drawthingsLimits;
const maxW = limits.maxWidth;
const maxH = limits.maxHeight;
const s = Math.min(maxW / targetW, maxH / targetH, 1);
if (s < 1) {
targetW = Math.max(1, Math.round(targetW * s));
targetH = Math.max(1, Math.round(targetH * s));
}
} catch {}
const resizedBuffers: Buffer[] = [];
for (const buf of buffers) {
const r = await imgResizeCoverToPng(buf, targetW, targetH);
resizedBuffers.push(r);
}
buffers = resizedBuffers;
const restoreTargetLabel =
typeof requestedEffectiveW === "number" &&
typeof requestedEffectiveH === "number"
? "requested effective size"
: "original source size";
log(
`postprocess: restored generated image(s) to ${restoreTargetLabel} ${targetW}x${targetH} (from normalized ${sourcePreprocess.adjusted.width}x${sourcePreprocess.adjusted.height})`
);
} catch (e) {
log(`postprocess restore-to-original-size failed: ${String(e)}`);
}
}
// Final step: if we have a raw size target (either explicit user size, or derived from the
// attached source when user omitted width/height), resize output back to that exact size.
// Only absent when user provided no size AND there is no attached source (pure defaults).
try {
const finalRawW =
typeof requestedRawW === "number" && Number.isFinite(requestedRawW)
? Math.max(1, Math.round(requestedRawW))
: undefined;
const finalRawH =
typeof requestedRawH === "number" && Number.isFinite(requestedRawH)
? Math.max(1, Math.round(requestedRawH))
: undefined;
if (finalRawW && finalRawH) {
const metaCur = buffers[0] ? await imgGetSize(buffers[0]) : null;
const curW = metaCur?.width;
const curH = metaCur?.height;
if (curW !== finalRawW || curH !== finalRawH) {
const resizedFinal: Buffer[] = [];
for (const buf of buffers) {
const r = await imgResizeCoverToPng(buf, finalRawW, finalRawH);
resizedFinal.push(r);
}
buffers = resizedFinal;
log(
`postprocess: adjusted final generated image(s) to requested raw size ${finalRawW}x${finalRawH}`
);
}
}
} catch (e) {
log(`postprocess final-resize-to-user-request failed: ${String(e)}`);
}
let postProcessedW: number | undefined;
let postProcessedH: number | undefined;
try {
if (zoomPassRan) {
// Pass 1 post-processed = Pass 1 backend output (before zoom-pass replaced buffers).
postProcessedW = backendReturnedW;
postProcessedH = backendReturnedH;
} else if (buffers[0]) {
const metaF = await imgGetSize(buffers[0]);
postProcessedW = metaF.width;
postProcessedH = metaF.height;
}
} catch {}
const saveOriginal = generateRuntimeDefaults.saveOriginal;
const envPreviewRaw = process.env.PREVIEW_IN_CHAT;
const previewInChat =
envPreviewRaw != null
? /^(1|true|yes)$/i.test(String(envPreviewRaw).trim())
: true;
log(
`preview toggle: PREVIEW_IN_CHAT='${envPreviewRaw}' -> ${previewInChat}`
);
const promptStr = typeof input.prompt === "string" ? input.prompt : "";
const alt = promptStr.trim()
? `Generated image: ${promptStr.slice(0, 80)}`
: "Generated image";
const savedFiles: Array<{
savedPath: string;
fileUrl: string;
size: number;
fileName: string;
}> = [];
// Primary storage: write directly into the active LM Studio chat working directory.
// Fail-fast if chatId could not be resolved.
const primaryOutDir: string | undefined =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : undefined);
if (!primaryOutDir) {
throw new Error(
"Failed to resolve LM Studio chat working directory (chatId missing)."
);
}
await fs.promises.mkdir(primaryOutDir, { recursive: true }).catch(() => {});
// Read current state to get nextImageI for FORTLAUFENDE (continuous) image numbering
// This ensures i1, i2, i3... across multiple generation runs (not resetting to i1 each time)
const currentState = await readState(primaryOutDir);
const baseImageI = Math.max(1, currentState.counters.nextImageI ?? 1);
log(
`image numbering: starting at i${baseImageI} (nextImageI from state)`
);
const baseStamp = isoStampCompact();
const imageRecordsForState: Array<{
filename: string;
preview: string;
i: number;
sourceTool?: string;
}> = [];
// ── VIDEO PATH ────────────────────────────────────────────────────────
const numFramesMeta = (result as any)?.metadata?.num_frames;
// Require at least 3 buffers: [discarded-first] + [≥1 real frame] + [discarded-last].
// Without this guard, slice(1, buffers.length - 1) returns an empty array when the
// gRPC response only carried a preview/fallback buffer despite num_frames > 1.
// Mode guard: some official defaults carry wrong numFrames > 1 for non-video models.
const isVideoResult =
(effectiveMode === "text2video" || effectiveMode === "image2video") &&
typeof numFramesMeta === "number" && numFramesMeta > 1 && buffers.length >= 3;
// ── BUILD XMP PARAMS (shared by video + image paths) ─────────────────
const resMeta = (result as any)?.metadata ?? {};
const xmpSourcePaths: string[] = [];
if (sourceOriginAbs) xmpSourcePaths.push(sourceOriginAbs);
for (const r of usedReferenceMeta ?? []) {
if (r.originPath && !xmpSourcePaths.includes(r.originPath)) {
xmpSourcePaths.push(r.originPath);
}
}
const xmpLorasUsed: Array<string> = Array.isArray(resMeta.loras_used)
? resMeta.loras_used
: [];
const xmpParams: PngXmpParams = {
...(typeof input.prompt === "string" && input.prompt ? { prompt: input.prompt } : {}),
...(typeof resMeta.model === "string" && resMeta.model
? { model: path.basename(resMeta.model) }
: effectiveModelFilename
? { model: path.basename(effectiveModelFilename) }
: {}),
...(typeof resMeta.width === "number" ? { width: resMeta.width } : {}),
...(typeof resMeta.height === "number" ? { height: resMeta.height } : {}),
...(typeof resMeta.steps_used === "number" ? { steps: resMeta.steps_used } : {}),
...(typeof resMeta.seed === "number" ? { seed: resMeta.seed } : {}),
...(typeof resMeta.seed_mode === "string" && resMeta.seed_mode ? { seedMode: resMeta.seed_mode } : {}),
...(typeof resMeta.sampler_used === "string" && resMeta.sampler_used ? { sampler: resMeta.sampler_used } : {}),
...(typeof resMeta.guidance_scale_used === "number" ? { guidanceScale: resMeta.guidance_scale_used } : {}),
...(typeof resMeta.strength_used === "number" ? { strength: resMeta.strength_used } : {}),
...(typeof resMeta.shift_used === "number" ? { shift: resMeta.shift_used } : {}),
...(xmpLorasUsed.length > 0 ? { loras: xmpLorasUsed.map((f) => ({ file: f })) } : {}),
...(xmpSourcePaths.length > 0 ? { sources: xmpSourcePaths } : {}),
mode: effectiveMode,
...(_internal?.cropMeta ? { crop: { left: _internal.cropMeta.left, top: _internal.cropMeta.top, right: _internal.cropMeta.right, bottom: _internal.cropMeta.bottom } } : {}),
...(_internal?.cropMeta?.cropSource ? { cropSource: _internal.cropMeta.cropSource } : {}),
};
// ── END XMP PARAMS ────────────────────────────────────────────────────
let videoFrames: Buffer[] = [];
let videoPngSaved: (typeof savedFiles)[0] | null = null;
if (isVideoResult) {
const videoFps =
typeof (result as any)?.metadata?.fps === "number"
? (result as any).metadata.fps
: 24;
// Trim: discard first frame (confirmed) and last frame (pending verification).
// Roadmap trim rule: slice(1, buffers.length - 1) == slice(1, num_frames + 1)
videoFrames = buffers.slice(1, buffers.length - 1);
const videoImageI = baseImageI;
const videoBaseName = `image-${baseStamp}-i${videoImageI}`;
const lastFrame = videoFrames[videoFrames.length - 1];
// PNG: canonical for state / VP / i2i / lastOriginalRef
videoPngSaved = await saveOriginalPng(lastFrame, primaryOutDir, `${videoBaseName}.png`, { ...xmpParams, isVideoFrame: true });
try {
log(`saved original (video last-frame): ${videoPngSaved.savedPath} (${videoPngSaved.size} bytes) [i${videoImageI}]`);
} catch {}
imageRecordsForState.push({
filename: `${videoBaseName}.png`,
preview: `preview-${videoBaseName}.jpg`,
i: videoImageI,
sourceTool: `${getSelfPluginIdentifier()}/generate_image`,
});
// MOV: goes into savedFiles so originalLinksText shows .mov. On failure, falls back to PNG.
try {
onProgress?.(-1, undefined, "Assembling video...");
const { assembleVideo } = await import("../helpers/videoAssembler.js");
const audioChunks = (result as any)?.audioBuffers as Buffer[] | undefined;
const audioRaw = audioChunks && audioChunks.length > 0
? Buffer.concat(audioChunks)
: undefined;
const audioSampleRateRaw = getAudioSampleRateForModel(effectiveModelFilename);
if (audioSampleRateRaw === undefined) {
log(`[video] ERROR: no audioSampleRate registered for model '${effectiveModelFilename}' — falling back to 48 000 Hz`);
}
const audioSampleRate = audioSampleRateRaw ?? 48_000;
const movBuffer = await assembleVideo(videoFrames, videoFps, audioRaw, audioSampleRate);
const movFileName = `${videoBaseName}.mov`;
const movPath = path.join(primaryOutDir, movFileName);
await fs.promises.writeFile(movPath, movBuffer);
const movUrl = encodeFileUrl(movPath);
savedFiles.push({ savedPath: movPath, fileUrl: movUrl, size: movBuffer.length, fileName: movFileName });
log(`saved video: ${movPath} (${movBuffer.length} bytes) [i${videoImageI}]`);
} catch (e: any) {
const msg = e && e.message ? String(e.message) : String(e);
log(`video assembly failed (i${videoImageI}): ${msg}`);
savedFiles.push(videoPngSaved);
}
}
// ── END VIDEO PATH ────────────────────────────────────────────────────
if (!isVideoResult) {
for (let i = 0; i < buffers.length; i++) {
const buf = buffers[i];
const imageI = baseImageI + i; // Fortlaufende Nummerierung
const baseName = `image-${baseStamp}-i${imageI}`;
const s = await saveOriginalPng(buf, primaryOutDir, `${baseName}.png`, xmpParams);
try {
log(`saved original: ${s.savedPath} (${s.size} bytes) [i${imageI}]`);
} catch {}
savedFiles.push(s);
// Track for state update later
imageRecordsForState.push({
filename: `${baseName}.png`,
preview: `preview-${baseName}.jpg`,
i: imageI,
sourceTool: `${getSelfPluginIdentifier()}/generate_image`,
});
}
}
const firstSaved = savedFiles[0];
// Video: lastOriginalRef must point to PNG (not MOV) so follow-up i2i/canvas loads work.
lastOriginalRef =
isVideoResult && videoPngSaved
? { path: videoPngSaved.savedPath, url: videoPngSaved.fileUrl }
: { path: firstSaved.savedPath, url: firstSaved.fileUrl };
// Policy: generate JPEG previews only (unified for attachments + variants)
// Use PreviewSpec from VARIANT_FULL_CONFIG with central generatePreviewFromBuffer()
// VARIANT_FULL_CONFIG.preview uses maxSum: 1536 for proper sizing
const variantPreviewSpec = VARIANT_FULL_CONFIG.preview;
const previews: any[] = [];
if (isVideoResult) {
// Generate one JPEG preview from the last trimmed video frame
const previewFrame = videoFrames[videoFrames.length - 1];
const videoImageI = baseImageI;
const videoBaseName = `image-${baseStamp}-i${videoImageI}`;
try {
const p = await generatePreviewFromBuffer(
previewFrame,
primaryOutDir,
videoPngSaved!.fileName,
variantPreviewSpec,
{ customFilename: `preview-${videoBaseName}.jpg` }
);
const previewFilePath = p.previewAbs;
const previewFileUrl = encodeFileUrl(previewFilePath);
previews.push({
ok: true as const,
filePath: previewFilePath,
fileName: p.previewFilename,
fileUrl: previewFileUrl,
size_bytes: p.data.length,
width: p.width,
height: p.height,
mimeType: "image/jpeg" as const,
format: variantPreviewSpec.format,
dataBase64: p.data.toString("base64"),
});
log(
`video preview saved (i${videoImageI}): ${previewFilePath} ${p.width}x${p.height} ${p.data.length} bytes ok=true`
);
} catch (e: any) {
const msg = e && e.message ? String(e.message) : String(e);
log(
`video preview build failed (i${videoImageI}): ${msg} spec=${JSON.stringify(variantPreviewSpec)}`
);
}
}
if (!isVideoResult) {
for (let i = 0; i < buffers.length; i++) {
const buf = buffers[i];
const imageI = baseImageI + i; // Use fortlaufende i-Nummer for logging
try {
if (!isSupportedImageBuffer(buf)) {
try {
const magic = Buffer.from(buf.slice(0, 12) || []).toString("hex");
log(
`preview skip: unsupported buffer signature (i${imageI}) magic=${magic}`
);
} catch {}
continue;
}
// Use central generatePreviewFromBuffer() with correct maxSum/maxWidth constraints
const p = await generatePreviewFromBuffer(
buf,
primaryOutDir,
savedFiles[i].fileName,
variantPreviewSpec
);
const previewFilePath = p.previewAbs;
const previewFileUrl = encodeFileUrl(previewFilePath);
previews.push({
ok: true as const,
filePath: previewFilePath,
fileName: p.previewFilename,
fileUrl: previewFileUrl,
size_bytes: p.data.length,
width: p.width,
height: p.height,
mimeType: "image/jpeg" as const,
format: variantPreviewSpec.format,
dataBase64: p.data.toString("base64"),
});
log(
`preview saved (i${imageI}): ${previewFilePath} ${p.width}x${p.height} ${p.data.length} bytes ok=true`
);
} catch (e: any) {
const msg = e && e.message ? String(e.message) : String(e);
log(
`preview build failed (i${imageI}): ${msg} spec=${JSON.stringify(variantPreviewSpec)}`
);
}
}
}
if (previews.length === 0) {
log(`previews built: count=0 (no preview created)`);
} else {
log(`previews built: count=${previews.length}`);
}
if (previews.length > 0) {
const firstPreview = previews[0];
lastPreviewRef = {
path: firstPreview.filePath,
url: firstPreview.fileUrl,
mimeType: firstPreview.mimeType,
width: firstPreview.width,
height: firstPreview.height,
};
// Track per-chat last images and clear pending sentinel after any generation
try {
if (currentLmChatId) {
// Store with i-values for proper lookup in getAllImagesForContext
LAST_IMAGES_BY_LM_CHAT[currentLmChatId] =
imageRecordsForState.map((ir, idx) => ({
i: ir.i,
path: savedFiles[idx].savedPath,
}));
}
} catch {}
// Update chat_media_state.json with new images (append, rolling window in orchestrator)
// This ensures the State has the correct i-numbers and nextImageI is incremented
try {
const { appendImages } = await import(
"../core-bundle.mjs"
);
const stateForUpdate = await readState(primaryOutDir);
const appendResult = appendImages(
stateForUpdate,
imageRecordsForState
);
if (appendResult.changed) {
await writeStateAtomic(primaryOutDir, stateForUpdate);
log(
`state updated: appended ${imageRecordsForState.length} images, nextImageI=${stateForUpdate.counters.nextImageI}`
);
}
} catch (e) {
log(`state update failed (non-fatal): ${String((e as Error).message)}`);
}
}
const explicit: Record<string, unknown> = {};
for (const k of ALLOWED_GEN_INPUT_KEYS) {
if (
Object.prototype.hasOwnProperty.call(input, k) &&
(input as any)[k] !== undefined
) {
explicit[k] = (input as any)[k];
}
}
const inferenceMs = (result as any)?.metadata?.inference_time_ms;
const meta = ((result as any)?.metadata || {}) as any;
const effWidth: number | undefined =
typeof postProcessedW === "number"
? Math.round(postProcessedW)
: typeof meta.width === "number"
? Math.round(meta.width)
: undefined;
const effHeight: number | undefined =
typeof postProcessedH === "number"
? Math.round(postProcessedH)
: typeof meta.height === "number"
? Math.round(meta.height)
: undefined;
const imgFmtRaw: string | undefined =
typeof meta.image_format === "string"
? String(meta.image_format)
: undefined;
const effQuality: string | undefined =
typeof meta.quality === "string" ? String(meta.quality) : undefined;
const effSteps: number | undefined =
typeof meta.steps === "number" ? Math.round(meta.steps) : undefined;
const summary = {
width: effWidth,
height: effHeight,
image_format: imgFmtRaw,
quality: effQuality,
...(typeof effSteps === "number" ? { steps: effSteps } : {}),
backend: resolvedName,
mode_effective: effectiveMode,
source: sourceTag || undefined,
...(typeof sourceVariantUsed === "number"
? { source_variant_used: sourceVariantUsed }
: {}),
...(typeof normalizedToLongSide === "number"
? { normalized_to_long_side: normalizedToLongSide }
: {}),
images_generated: isVideoResult ? 1 : buffers.length,
// Reference sources used (for edit mode / img2img)
...(usedReferenceMeta && usedReferenceMeta.length > 0
? {
references_used: (() => {
const moodboardCount = usedReferenceMeta.filter(
(r) => !r.isCanvas
).length;
const moodboardWeight =
moodboardCount > 0 ? 1.0 / moodboardCount : 0;
return usedReferenceMeta.map((r) => ({
source: r.type,
index: r.index,
role: r.isCanvas ? "canvas" : "moodboard",
...(r.originPath ? { source_originAbs: r.originPath } : {}),
...(r.originalName
? { source_originalName: r.originalName }
: {}),
...(!r.isCanvas && moodboardCount > 0
? { weight: moodboardWeight }
: {}),
}));
})(),
canvas_source: (() => {
const canvas = usedReferenceMeta.find((r) => r.isCanvas);
return canvas
? `${canvas.type === "attachment" ? "a" : "v"}${canvas.index}`
: null;
})(),
}
: {}),
files: {
original: firstSaved.fileUrl,
previews: previews.map((p: any) => p.fileUrl),
},
...(typeof inferenceMs === "number"
? { inference_time_ms: inferenceMs }
: {}),
};
// Backfill originalName for audit wherever we have originAbs.
// Rationale: originAbs is the stable attachment identity; originalName may be missing
// in some resolver paths unless explicitly persisted in chat_media_state.json.
try {
const chatWdForAudit =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : null);
if (chatWdForAudit) {
const st: any = await readState(chatWdForAudit);
const attachments: any[] = Array.isArray(st?.attachments)
? st.attachments
: [];
const originalNameByOriginAbs = new Map<string, string>();
const originalNameByFilename = new Map<string, string>();
for (const a of attachments) {
if (!a || typeof a !== "object") continue;
const oa =
typeof a.originAbs === "string" && a.originAbs.trim()
? String(a.originAbs)
: null;
const fn =
typeof a.filename === "string" && a.filename.trim()
? String(a.filename)
: typeof a.origin === "string" && a.origin.trim()
? String(a.origin)
: null;
const on =
typeof a.originalName === "string" && a.originalName.trim()
? String(a.originalName)
: null;
if (oa && on) originalNameByOriginAbs.set(oa, on);
if (fn && on) originalNameByFilename.set(fn, on);
}
if (
sourceKind === "attachment" &&
sourceOriginAbs &&
!sourceOriginalName
) {
sourceOriginalName =
originalNameByOriginAbs.get(sourceOriginAbs) ||
originalNameByFilename.get(path.basename(sourceOriginAbs));
}
if (Array.isArray(usedReferenceMeta) && usedReferenceMeta.length > 0) {
usedReferenceMeta = usedReferenceMeta.map((r) => {
if (
r &&
r.type === "attachment" &&
r.originPath &&
typeof r.originPath === "string" &&
!r.originalName
) {
const filled =
originalNameByOriginAbs.get(r.originPath) ||
originalNameByFilename.get(path.basename(r.originPath));
return filled ? { ...r, originalName: filled } : r;
}
return r;
});
}
}
} catch {}
const httpBase = await getHealthyServerBaseUrl();
const httpOriginals = savedFiles.map((s) =>
httpBase
? toHttpOriginalUrl(s.fileName, httpBase, currentLmChatId || undefined)
: ""
);
// Build preview URLs pointing to preview-* files in chat working directory
const httpPreviews = savedFiles.map((_s, i) => {
if (!httpBase || !currentLmChatId) return "";
const previewFileName = previews[i]?.fileName;
if (!previewFileName) return "";
return toHttpPreviewUrl(previewFileName, httpBase, currentLmChatId);
});
try {
const audit = buildAuditLogger({
backend: resolvedName,
mode: effectiveMode as any,
requestId: auditRequestId,
});
// Metadata
if (currentLmChatId) audit.setChatId(currentLmChatId);
// === USER REQUEST (what the user sent) ===
const userRequest: Record<string, any> = {};
if ((input as any)?.prompt) userRequest.prompt = (input as any).prompt;
if ((input as any)?.mode) userRequest.mode = (input as any).mode;
if ((input as any)?.canvas) userRequest.canvas = (input as any).canvas;
if ((input as any)?.moodboard)
userRequest.moodboard = (input as any).moodboard;
if ((input as any)?.model) userRequest.model = (input as any).model;
if ((input as any)?.width) userRequest.width = (input as any).width;
if ((input as any)?.height) userRequest.height = (input as any).height;
if (typeof (input as any)?.seed === "number") {
userRequest.seed = (input as any).seed;
}
if (Object.prototype.hasOwnProperty.call(input as any, "seed_mode")) {
userRequest.seed_mode = (input as any).seed_mode;
} else if (Object.prototype.hasOwnProperty.call(input as any, "seedMode")) {
userRequest.seed_mode = (input as any).seedMode;
}
if ((input as any)?.imageFormat)
userRequest.imageFormat = (input as any).imageFormat;
if ((input as any)?.quality) userRequest.quality = (input as any).quality;
if ((input as any)?.variants)
userRequest.variants = (input as any).variants;
audit.setUserRequest(userRequest);
// === RENDER TARGET (Step 0+1) ===
const renderTarget: Record<string, any> = {};
if (
typeof requestedRawW === "number" ||
typeof requestedRawH === "number"
) {
renderTarget.requested_raw = {
...(typeof requestedRawW === "number"
? { width: requestedRawW }
: {}),
...(typeof requestedRawH === "number"
? { height: requestedRawH }
: {}),
};
}
if (
typeof requestedEffectiveW === "number" ||
typeof requestedEffectiveH === "number"
) {
renderTarget.requested_effective = {
...(typeof requestedEffectiveW === "number"
? { width: requestedEffectiveW }
: {}),
...(typeof requestedEffectiveH === "number"
? { height: requestedEffectiveH }
: {}),
};
}
if (
typeof requestedRawW === "number" &&
typeof requestedRawH === "number"
) {
renderTarget.needs_upscaler = _dtNeedsUpscaler;
}
if (Object.keys(renderTarget).length > 0) {
audit.setRenderTarget(renderTarget);
}
// === INPUTS (Step 2: Canvas + Moodboard) ===
const inputs: Record<string, any> = {};
// Canvas from sourcePreprocess (single-image i2i) or usedReferencePreprocess
if (sourcePreprocess && sourcePreprocess.original) {
inputs.canvas = {
notation: sourceTag || undefined,
source_type: sourceKind || undefined,
file_name: sourceFileName || undefined,
original_name: sourceOriginalName || undefined,
origin_path: sourceOriginAbs || undefined,
original: _internal?.auditSourceOverride?.originalDims ?? {
width: sourcePreprocess.original.width,
height: sourcePreprocess.original.height,
bytes: sourcePreprocess.original.bytes,
},
adjusted: sourcePreprocess.adjusted
? {
width: sourcePreprocess.adjusted.width,
height: sourcePreprocess.adjusted.height,
bytes: sourcePreprocess.adjusted.bytes,
}
: undefined,
};
}
// Multi-reference (edit mode): usedReferencePreprocess
if (usedReferencePreprocess && usedReferencePreprocess.length > 0) {
const canvasRef = usedReferencePreprocess.find(
(r) => r.role === "canvas"
);
const moodboardRefs = usedReferencePreprocess.filter(
(r) => r.role === "moodboard"
);
const moodboardCount = moodboardRefs.length;
if (canvasRef) {
const notation = `${
canvasRef.type === "attachment"
? "a"
: canvasRef.type === "image"
? "i"
: "p"
}${canvasRef.index}`;
inputs.canvas = {
notation,
source_type: canvasRef.type,
original_name: canvasRef.originalName || undefined,
origin_path: canvasRef.originPath || undefined,
original: canvasRef.preprocess?.original,
adjusted: canvasRef.preprocess?.adjusted,
};
}
if (moodboardRefs.length > 0) {
inputs.moodboard = moodboardRefs.map((r) => {
const notation = `${
r.type === "attachment" ? "a" : r.type === "image" ? "i" : "p"
}${r.index}`;
return {
notation,
source_type: r.type,
original_name: r.originalName || undefined,
origin_path: r.originPath || undefined,
original: r.preprocess?.original,
adjusted: r.preprocess?.adjusted,
weight: moodboardCount > 0 ? 1.0 / moodboardCount : undefined,
};
});
}
}
// Mask info (inpaint / outpaint) — uses auditMaskBuf (post-resize, actual dims sent)
if (auditMaskBuf && auditMaskBuf.length >= 24) {
const mb = auditMaskBuf;
// Read PNG dimensions from IHDR (bytes 16–23, big-endian uint32)
const maskW = mb.readUInt32BE(16);
const maskH = mb.readUInt32BE(20);
inputs.mask = {
bytes: mb.length,
width: maskW,
height: maskH,
...((_internal?.cropMeta) && { crop_percent: {
left: _internal.cropMeta.left,
top: _internal.cropMeta.top,
right: _internal.cropMeta.right,
bottom: _internal.cropMeta.bottom,
}}),
};
}
if (Object.keys(inputs).length > 0) {
audit.setInputs(inputs);
}
// === OUTPUT (Step 3+4) ===
const output: Record<string, any> = {};
// Backend returned dimensions
if (
typeof backendReturnedW === "number" &&
typeof backendReturnedH === "number"
) {
output.backend_returned = {
width: backendReturnedW,
height: backendReturnedH,
};
}
// Post-processed dimensions
if (typeof effWidth === "number" && typeof effHeight === "number") {
output.post_processed = { width: effWidth, height: effHeight };
}
// Inference time
if (typeof inferenceMs === "number") {
output.inference_time_ms = inferenceMs;
}
// Model used
if (typeof meta.model === "string" && meta.model.trim()) {
output.model_used = meta.model;
}
// Model origin + presets
// Read overlay info from service result metadata (authoritative source)
try {
const meta = (result as any)?.metadata || {};
// overlay_source and overlay_preset come from the service layer
if (meta.overlay_source) {
output.overlay_source = meta.overlay_source;
}
if (meta.overlay_preset) {
output.overlay_preset = meta.overlay_preset;
}
if (typeof meta.defaults_used === "string" && meta.defaults_used.trim()) {
output.defaults_used = meta.defaults_used;
}
if (
typeof meta.overlay_lookup_mode === "string" &&
meta.overlay_lookup_mode.trim()
) {
output.overlay_lookup_mode = meta.overlay_lookup_mode;
}
if (typeof meta.i2i_profile === "string" && meta.i2i_profile.trim()) {
output.i2i_profile = meta.i2i_profile;
}
if (typeof meta.strength_used === "number" && Number.isFinite(meta.strength_used)) {
output.strength_used = meta.strength_used;
}
if (typeof meta.steps_used === "number" && Number.isFinite(meta.steps_used)) {
output.steps_used = meta.steps_used;
}
if (typeof meta.sampler_used === "string" && meta.sampler_used.trim()) {
output.sampler_used = meta.sampler_used;
}
if (
typeof meta.guidance_scale_used === "number" &&
Number.isFinite(meta.guidance_scale_used)
) {
output.guidance_scale_used = meta.guidance_scale_used;
}
if (typeof meta.shift_used === "number" && Number.isFinite(meta.shift_used)) {
output.shift_used = meta.shift_used;
}
if (typeof meta.resolution_dependent_shift_used === "boolean") {
output.resolution_dependent_shift_used = meta.resolution_dependent_shift_used;
}
if (typeof meta.compression_artifacts_used === "string") {
output.compression_artifacts_used = meta.compression_artifacts_used;
}
if (typeof meta.compression_artifacts_quality_used === "number" && Number.isFinite(meta.compression_artifacts_quality_used)) {
output.compression_artifacts_quality_used = meta.compression_artifacts_quality_used;
}
// Model used (from service - the actual model that was sent to Draw Things)
if (typeof meta.model === "string" && meta.model.trim()) {
output.model_used = path.basename(meta.model);
}
// LoRAs used (from service - actual LoRA files validated and sent)
if (Array.isArray(meta.loras_used) && meta.loras_used.length > 0) {
output.loras_used = meta.loras_used.map((f: string) =>
path.basename(f)
);
}
// Seed used (from service - effective seed after defaults/overlays)
if (typeof meta.seed === "number" && Number.isFinite(meta.seed)) {
output.seed = meta.seed;
}
if (typeof meta.seed_mode === "string" && meta.seed_mode.trim()) {
output.seed_mode = meta.seed_mode;
}
if (typeof meta.seed_source === "string" && meta.seed_source.trim()) {
output.seed_source = meta.seed_source;
}
if (
typeof meta.seed_mode_source === "string" &&
meta.seed_mode_source.trim()
) {
output.seed_mode_source = meta.seed_mode_source;
}
} catch {}
// Prompt used (prefer backend's prompt_used, fallback to user input)
const metaPrompt = (result as any)?.metadata?.prompt_used;
const promptUsed =
typeof metaPrompt === "string" && metaPrompt.trim()
? metaPrompt
: typeof (input as any)?.prompt === "string"
? (input as any).prompt
: undefined;
if (promptUsed) {
output.prompt_used = promptUsed;
}
// Prompt origin
const backendOriginRaw = (result as any)?.metadata?.prompt_origin;
const userPromptInput =
typeof (input as any)?.prompt === "string"
? String((input as any).prompt).trim()
: "";
if (backendOriginRaw === "user" || userPromptInput) {
output.prompt_origin = "user";
} else {
output.prompt_origin = "default";
}
// Images (saved files)
if (Array.isArray(savedFiles) && savedFiles.length > 0) {
output.images = savedFiles.map((s, i) => {
const pv = previews[i];
const iMatch = /[-]i(\d+)\.(png|jpe?g|webp)$/i.exec(s?.fileName || "");
const idx = iMatch ? parseInt(iMatch[1], 10) : i + 1;
return {
i: idx,
path: s.savedPath,
url: s.fileUrl,
bytes: s.size,
...(httpOriginals[i]
? { http_url: httpOriginals[i] }
: {}),
...(pv
? { preview_path: pv.filePath, preview_url: pv.fileUrl }
: {}),
...(httpPreviews[i]
? { http_preview_url: httpPreviews[i] }
: {}),
};
});
}
audit.setOutput(output);
await audit.write();
if (pendingAudit2) {
try { await pendingAudit2.write(); } catch (e2) { log(`zoom-pass audit2 write failed: ${String(e2)}`); }
pendingAudit2 = null;
}
} catch (e) {
log(`audit logging error: ${String(e)}`);
}
try {
const p = process.env.HTTP_SERVER_PORT;
log(
`[httpServer] generate_image: external server ${
httpBase ? "healthy" : "unavailable"
}${p ? ` (port=${p})` : ""}.`
);
} catch {}
const extractStableVariantV = (fileName: string): number | undefined => {
try {
const m = /[-]i(\d+)\.(png|jpe?g|webp|mov)$/i.exec(String(fileName || "")) ||
/-v(\d+)\.(png|jpe?g|webp|mov)$/i.exec(String(fileName || ""));
if (!m) return undefined;
const n = parseInt(m[1], 10);
return Number.isFinite(n) && n > 0 ? n : undefined;
} catch {
return undefined;
}
};
const variantLinksText =
savedFiles.length > 0
? savedFiles
.map((s, i) => {
const stableV =
extractStableVariantV(s.fileName) ||
extractStableVariantV(previews[i]?.fileName) ||
i + 1;
const httpPreviewUrl = httpPreviews[i];
const fallback = previews[i]?.fileUrl || s.fileUrl;
const url = httpPreviewUrl ? httpPreviewUrl : fallback;
return `Preview i${stableV}: ${url}`;
})
.join(" | ")
: "";
// Originals are saved directly to chat working directory.
const originalLinksText =
savedFiles.length > 0
? savedFiles
.map((s, i) => {
const stableV =
extractStableVariantV(s.fileName) ||
extractStableVariantV(previews[i]?.fileName) ||
i + 1;
const httpUrl = httpOriginals[i];
const url = httpUrl ? httpUrl : s.fileUrl;
return `Original i${stableV}: ${url}`;
})
.join(" | ")
: "";
const note =
typeof requestedVariants === "number" &&
requestedVariants !== usedVariants
? `Note: variants=${requestedVariants} was clamped to ${usedVariants}.`
: null;
try {
const invMs = (result as any)?.metadata?.inference_time_ms;
const wLog =
typeof effWidth === "number"
? effWidth
: typeof postProcessedW === "number"
? postProcessedW
: typeof meta.width === "number"
? Math.round(meta.width)
: undefined;
const hLog =
typeof effHeight === "number"
? effHeight
: typeof postProcessedH === "number"
? postProcessedH
: typeof meta.height === "number"
? Math.round(meta.height)
: undefined;
log(
`generation summary: backend=${resolvedName} mode=${effectiveMode} width=${
typeof wLog === "number" ? wLog : "-"
} height=${typeof hLog === "number" ? hLog : "-"} previews=${
previews.length
} variants=${buffers.length} inferenceMs=${invMs ?? "-"}`
);
} catch {}
const reviewHint = "Carefully examine the preview and comment on how well it matches your prompt. Do not assume it does.";
const { files: _files, ...summaryNoFilesBase } = summary as any;
const summaryNoFiles = await (async () => {
const modelUsedBasename =
typeof meta.model === "string" && meta.model.trim()
? path.basename(meta.model)
: undefined;
if (!modelUsedBasename) return summaryNoFilesBase;
let matchingPresets: any[] | undefined;
const overlaySourceFromMeta = meta.overlay_source as string | undefined;
const overlayPresetFromMeta = meta.overlay_preset as string | undefined;
if (overlayPresetFromMeta && overlaySourceFromMeta) {
const dotIdx = overlayPresetFromMeta.indexOf(".");
const presetMode = dotIdx >= 0 ? overlayPresetFromMeta.slice(0, dotIdx) : undefined;
const presetModelId = dotIdx >= 0 ? overlayPresetFromMeta.slice(dotIdx + 1) : undefined;
matchingPresets = [{
mode: presetMode,
modelId: presetModelId,
preset: overlayPresetFromMeta,
overlaySource: overlaySourceFromMeta,
...(overlaySourceFromMeta === "custom" ? { customConfig: overlayPresetFromMeta } : {}),
}];
} else if (overlaySourceFromMeta === "modelOverlay") {
try {
const { resolveImageModelInfoFromModelUsed } = await import(
"../helpers/imageModelMeta.js"
);
const info = resolveImageModelInfoFromModelUsed(modelUsedBasename, {
mode: effectiveMode as any,
});
const overlayOnly = ((info as any)?.presets || []).filter(
(p: any) => p.overlaySource === "modelOverlay"
);
if (overlayOnly.length > 0) matchingPresets = overlayOnly;
} catch {}
}
return {
...summaryNoFilesBase,
model_used: modelUsedBasename,
...(matchingPresets ? { model_presets: matchingPresets } : {}),
};
})();
if (previewInChat && previews.length > 0) {
// The preview files were already written to the primary chat working directory.
// Reuse those file names to avoid duplicates.
const imageContents = previews.map((p: any, i: number) => {
const fname = String(p.fileName || "");
const stableV =
extractStableVariantV(fname) ||
extractStableVariantV(savedFiles[i]?.fileName) ||
i + 1;
return {
type: "image",
fileName: String(savedFiles[i]?.fileName || fname),
mimeType: p.mimeType,
markdown: `})`,
$hint:
"This is an image file. Present the image to the user by using the markdown above.",
} as any;
});
return {
content: [
...(note ? ([{ type: "text", text: note }] as any) : []),
...imageContents,
{ type: "text", text: variantLinksText },
{ type: "text", text: originalLinksText },
{ type: "text", text: JSON.stringify(summaryNoFiles) },
],
};
} else {
return {
content: [
...(note ? ([{ type: "text", text: note }] as any) : []),
...(() => {
if (httpOriginals.length > 0 && httpOriginals[0]) {
// PREVIEW_IN_CHAT = false: simplified response without inline previews.
// Markdown will be injected by orchestrator after tool call.
// Still provide a hint so the model reviews the output quality.
return httpOriginals.map((u, i) => ({
type: "text",
text: `${isVideoResult ? "Video" : "Image"} i${
extractStableVariantV(savedFiles[i]?.fileName) ||
extractStableVariantV(previews[i]?.fileName) ||
i + 1
} successfully generated.`,
$hint: reviewHint,
}));
}
const count =
typeof usedVariants === "number" && usedVariants > 0
? usedVariants
: 1;
return Array.from({ length: count }, (_, i) => ({
type: "text",
text: `${isVideoResult ? "Video" : "Image"} i${
extractStableVariantV(savedFiles[i]?.fileName) ||
extractStableVariantV(previews[i]?.fileName) ||
i + 1
} successfully generated.`,
$hint: reviewHint,
}));
})(),
...(variantLinksText
? [{ type: "text", text: variantLinksText }]
: []),
...(originalLinksText
? [{ type: "text", text: originalLinksText }]
: []),
{ type: "text", text: JSON.stringify(summaryNoFiles) },
],
};
}
} catch (error) {
log(
`generate_image error: ${
error instanceof Error ? error.message : String(error)
}`
);
await logError(error);
return {
content: [
{ type: "text", text: "Failed to generate image: backend error" },
],
isError: true as const,
};
}
}
export const ToolSchemas = {
generate_image: GenerateToolParamsShapeMinimal,
crop: CropToolParamsShape,
mask: CropToolParamsShape,
zoom_in: ZoomInToolParamsShape,
inpaint: InpaintToolParamsShape,
outpaint: OutpaintToolParamsShape,
refine: RefineToolParamsShape,
};
// ─────────────────────────────────────────────────────────────────────────────
// applyCropFormat: compute final crop percentages from explicit values + imageFormat.
// If imageFormat is set AND not all 4 sides are given, derive missing sides from AR.
// If imageFormat is NOT set, use explicit values only (0 for unset sides).
// If all 4 sides are explicitly given, imageFormat is ignored.
// Each crop side accepts number (→ %) or string like "10", "10%", "10 %", "120px", "120 px".
// Returns { left, right, top, bottom } as percentages, or an error string.
// ─────────────────────────────────────────────────────────────────────────────
function parseCropSide(raw: number | string | undefined): { value: number; unit: "%" | "px" } | Error | undefined {
if (raw === undefined) return undefined;
if (typeof raw === "number") return { value: raw, unit: "%" };
const m = String(raw).trim().match(/^(\d+(?:\.\d+)?)\s*(px|%)?$/i);
if (!m) return new Error(`Invalid crop value "${raw}": expected a number optionally followed by "%" or "px".`);
const num = parseFloat(m[1]);
const unitStr = (m[2] ?? "").toLowerCase();
return { value: num, unit: unitStr === "px" ? "px" : "%" };
}
async function applyCropFormat(
srcBuf: Buffer,
input: {
cropLeft?: number | string;
cropRight?: number | string;
cropTop?: number | string;
cropBottom?: number | string;
imageFormat?: string;
frameAdjust?: number | string;
}
): Promise<{ left: number; right: number; top: number; bottom: number } | string> {
const pLeft = parseCropSide(input.cropLeft);
const pRight = parseCropSide(input.cropRight);
const pTop = parseCropSide(input.cropTop);
const pBottom = parseCropSide(input.cropBottom);
for (const [name, p] of [["cropLeft", pLeft], ["cropRight", pRight], ["cropTop", pTop], ["cropBottom", pBottom]] as const) {
if (p instanceof Error) return p.message;
}
const leftSet = pLeft !== undefined;
const rightSet = pRight !== undefined;
const topSet = pTop !== undefined;
const bottomSet = pBottom !== undefined;
const hasPx =
(pLeft as any)?.unit === "px" || (pRight as any)?.unit === "px" ||
(pTop as any)?.unit === "px" || (pBottom as any)?.unit === "px";
const needsImageDims =
hasPx || !!input.imageFormat || input.frameAdjust !== undefined;
let imgW = 0, imgH = 0;
if (needsImageDims) {
const size = await imgGetSize(srcBuf);
imgW = size.width;
imgH = size.height;
}
function toPct(p: { value: number; unit: "%" | "px" } | undefined, dim: number): number {
if (!p) return 0;
return p.unit === "px" ? (dim > 0 ? (p.value / dim) * 100 : 0) : p.value;
}
let left = toPct(pLeft as any, imgW);
let right = toPct(pRight as any, imgW);
let top = toPct(pTop as any, imgH);
let bottom = toPct(pBottom as any, imgH);
if (!(leftSet && rightSet && topSet && bottomSet) && input.imageFormat) {
const ratioMap: Record<string, number> = {
square: 1,
landscape: 4 / 3,
portrait: 3 / 4,
"16:9": 16 / 9,
};
const targetRatio = ratioMap[input.imageFormat]; // W/H
// imgW, imgH already computed above (needsImageDims covers this branch)
const hFixed = leftSet && rightSet;
const vFixed = topSet && bottomSet;
const cropW_px = imgW * (1 - left / 100 - right / 100);
const cropH_px = imgH * (1 - top / 100 - bottom / 100);
if (hFixed) {
const cropH_needed = cropW_px / targetRatio;
if (topSet) {
bottom = Math.max(0, (imgH * (1 - top / 100) - cropH_needed) / imgH * 100);
} else if (bottomSet) {
top = Math.max(0, (imgH * (1 - bottom / 100) - cropH_needed) / imgH * 100);
} else {
const margin = Math.max(0, (imgH - cropH_needed) / 2 / imgH * 100);
top = margin;
bottom = margin;
}
} else if (vFixed) {
const cropW_needed = cropH_px * targetRatio;
if (leftSet) {
right = Math.max(0, (imgW * (1 - left / 100) - cropW_needed) / imgW * 100);
} else if (rightSet) {
left = Math.max(0, (imgW * (1 - right / 100) - cropW_needed) / imgW * 100);
} else {
const margin = Math.max(0, (imgW - cropW_needed) / 2 / imgW * 100);
left = margin;
right = margin;
}
} else {
const currentRatio = cropW_px / cropH_px;
if (Math.abs(currentRatio - targetRatio) > 1e-6) {
if (currentRatio > targetRatio) {
const cropW_needed = cropH_px * targetRatio;
if (leftSet) {
right = Math.max(0, (imgW * (1 - left / 100) - cropW_needed) / imgW * 100);
} else if (rightSet) {
left = Math.max(0, (imgW * (1 - right / 100) - cropW_needed) / imgW * 100);
} else {
const margin = Math.max(0, (imgW - cropW_needed) / 2 / imgW * 100);
left = margin;
right = margin;
}
} else {
const cropH_needed = cropW_px / targetRatio;
if (topSet) {
bottom = Math.max(0, (imgH * (1 - top / 100) - cropH_needed) / imgH * 100);
} else if (bottomSet) {
top = Math.max(0, (imgH * (1 - bottom / 100) - cropH_needed) / imgH * 100);
} else {
const margin = Math.max(0, (imgH - cropH_needed) / 2 / imgH * 100);
top = margin;
bottom = margin;
}
}
}
}
if (left + right >= 100 || top + bottom >= 100) {
return `imageFormat '${input.imageFormat}' cannot be achieved with the given crop values.`;
}
} else if (input.imageFormat) {
// All 4 sides are set (e.g. from detectLabel) — apply symmetric AR expansion,
// matching the zoom-in algorithm: always expand the shorter dimension outward.
const ratioMap: Record<string, number> = {
square: 1,
landscape: 4 / 3,
portrait: 3 / 4,
"16:9": 16 / 9,
};
const targetRatio = ratioMap[input.imageFormat];
if (targetRatio !== undefined) {
const regionW = imgW * (1 - left / 100 - right / 100);
const regionH = imgH * (1 - top / 100 - bottom / 100);
const regionAR = regionW / regionH;
if (Math.abs(regionAR - targetRatio) > 0.005) {
if (regionAR < targetRatio) {
// Region too tall → expand width symmetrically.
const totalExtraPct = ((regionH * targetRatio - regionW) / imgW) * 100;
const halfExtra = totalExtraPct / 2;
const leftReduction = Math.min(left, halfExtra);
const rightReduction = Math.min(right, halfExtra + (halfExtra - leftReduction));
const leftReduction2 = Math.min(left, halfExtra + (halfExtra - rightReduction));
left = Math.max(0, left - leftReduction2);
right = Math.max(0, right - rightReduction);
} else {
// Region too wide → expand height symmetrically.
const totalExtraPct = ((regionW / targetRatio - regionH) / imgH) * 100;
const halfExtra = totalExtraPct / 2;
const topReduction = Math.min(top, halfExtra);
const bottomReduction = Math.min(bottom, halfExtra + (halfExtra - topReduction));
const topReduction2 = Math.min(top, halfExtra + (halfExtra - bottomReduction));
top = Math.max(0, top - topReduction2);
bottom = Math.max(0, bottom - bottomReduction);
}
}
}
}
// Apply frameAdjust to the final crop region (non-detect path; detect path applied it already
// inside resolveDetectionCrop and nullifies input.frameAdjust before calling applyCropFormat).
if (input.frameAdjust !== undefined) {
const cropW_px = imgW * (1 - left / 100 - right / 100);
const cropH_px = imgH * (1 - top / 100 - bottom / 100);
const cropDiag = Math.hypot(cropW_px, cropH_px);
let d_px: number;
if (typeof input.frameAdjust === "string") {
const m = String(input.frameAdjust).trim().match(/^([+-]?\d+(?:\.\d+)?)\s*(%|px)?$/i);
if (!m) return `Invalid frameAdjust value: "${input.frameAdjust}"`;
const val = parseFloat(m[1]);
d_px = m[2]?.toLowerCase() === "px" ? val : (val / 100) * cropDiag;
} else {
d_px = (input.frameAdjust / 100) * cropDiag;
}
const dLR = (d_px / imgW) * 100;
const dTB = (d_px / imgH) * 100;
left = Math.max(0, left - dLR);
right = Math.max(0, right - dLR);
top = Math.max(0, top - dTB);
bottom = Math.max(0, bottom - dTB);
}
if (left + right >= 100) {
return `cropLeft + cropRight must not cover the full image width.`;
}
if (top + bottom >= 100) {
return `cropTop + cropBottom must not cover the full image height.`;
}
return { left, right, top, bottom };
}
// ─────────────────────────────────────────────────────────────────────────────
// ─────────────────────────────────────────────────────────────────────────────
// resolveDetectionCrop: find a detection by label in a prior detect_object record
// and return crop percentages, optionally adjusted by frameAdjust.
// ─────────────────────────────────────────────────────────────────────────────
function resolveDetectionCrop(
imageRecords: any[],
rawCanvas: string | undefined,
detectLabel: string,
detectIndex: number,
frameAdjust: number | string | undefined
):
| { ok: true; cropLeft: number; cropRight: number; cropTop: number; cropBottom: number; srcCanvas: string | undefined; canvasWasDetectRecord: boolean }
| { ok: false; error: string } {
// --- 1. find detect_object record ---
let detectRec: any | undefined;
let canvasWasDetectRecord = false;
let srcCanvas: string | undefined = rawCanvas;
if (rawCanvas) {
// Check if rawCanvas is itself an iN detect_object record
const pref = parsePrefixedNotation(rawCanvas);
if (pref?.pool === "image") {
const candidate = imageRecords.find((r: any) => r?.i === pref.index);
if (candidate && Array.isArray(candidate.detections)) {
detectRec = candidate;
canvasWasDetectRecord = true;
srcCanvas = typeof detectRec.detectSource === "string" ? detectRec.detectSource : undefined;
}
}
}
if (!detectRec && rawCanvas) {
// Search for detect_object record referencing rawCanvas as source
const lc = rawCanvas.toLowerCase();
const candidates = imageRecords
.filter((r: any) =>
typeof r?.detectSource === "string" &&
r.detectSource.toLowerCase() === lc &&
Array.isArray(r.detections)
)
.sort((a: any, b: any) => (b.i ?? 0) - (a.i ?? 0));
if (candidates.length > 0) {
detectRec = candidates[0];
canvasWasDetectRecord = false;
srcCanvas = rawCanvas;
}
}
if (!detectRec) {
return {
ok: false,
error: `No detect_object result found for canvas '${rawCanvas ?? "(auto)"}'. Run detect_object on the source image first.`,
};
}
// --- 2. find detection by label ---
const detections: any[] = detectRec.detections;
const matching: any[] = detections.filter(
(d: any) => typeof d?.label === "string" && d.label.toLowerCase() === detectLabel.toLowerCase()
);
if (matching.length === 0) {
const allLabels = [...new Set(detections.map((d: any) => d?.label).filter(Boolean))].join(", ");
return {
ok: false,
error: `Label '${detectLabel}' not found in detections. Available: ${allLabels || "(none)"}`,
};
}
if (detectIndex >= matching.length) {
return {
ok: false,
error: `detectIndex ${detectIndex} out of range for label '${detectLabel}' (${matching.length} detection(s)).`,
};
}
const det = matching[detectIndex];
// --- 3. apply frameAdjust or use pre-computed crop percentages ---
if (frameAdjust === undefined) {
const crop = det.crop;
return {
ok: true,
cropLeft: typeof crop?.cropLeft === "number" ? crop.cropLeft : 0,
cropRight: typeof crop?.cropRight === "number" ? crop.cropRight : 0,
cropTop: typeof crop?.cropTop === "number" ? crop.cropTop : 0,
cropBottom: typeof crop?.cropBottom === "number" ? crop.cropBottom : 0,
srcCanvas,
canvasWasDetectRecord,
};
}
// frameAdjust requires image dimensions stored on the detect record
const imgW: number = typeof detectRec.imageWidth === "number" ? detectRec.imageWidth : 0;
const imgH: number = typeof detectRec.imageHeight === "number" ? detectRec.imageHeight : 0;
if (imgW <= 0 || imgH <= 0) {
return {
ok: false,
error: `frameAdjust requires imageWidth/imageHeight in the detect record. Re-run detect_object to update the record.`,
};
}
// Bbox raw coordinates
const bbox = det.bbox;
const bx1 = typeof bbox?.x1 === "number" ? bbox.x1 : 0;
const by1 = typeof bbox?.y1 === "number" ? bbox.y1 : 0;
const bx2 = typeof bbox?.x2 === "number" ? bbox.x2 : imgW;
const by2 = typeof bbox?.y2 === "number" ? bbox.y2 : imgH;
// Parse frameAdjust → per-axis pixel deltas.
// %-value is relative to the respective bbox dimension (width for X, height
// for Y) so the total box grows by pct% in each dimension, preserving AR.
// px-value is an absolute margin applied uniformly on all four sides.
const bboxW = bx2 - bx1;
const bboxH = by2 - by1;
let dX_px: number;
let dY_px: number;
if (typeof frameAdjust === "string") {
const m = String(frameAdjust).trim().match(/^([+-]?\d+(?:\.\d+)?)\s*(%|px)?$/i);
if (!m) {
return { ok: false, error: `Invalid frameAdjust value: "${frameAdjust}"` };
}
const val = parseFloat(m[1]);
if (m[2]?.toLowerCase() === "px") {
dX_px = Math.round(val);
dY_px = Math.round(val);
} else {
dX_px = Math.round((val / 100) * bboxW / 2);
dY_px = Math.round((val / 100) * bboxH / 2);
}
} else {
dX_px = Math.round((frameAdjust / 100) * bboxW / 2);
dY_px = Math.round((frameAdjust / 100) * bboxH / 2);
}
// Apply adjustment. Clamp to [0, imgW] / [0, imgH] (inclusive — bbox
// coordinates are exclusive-end so x2=imgW is valid and must not become
// imgW-1, which would leave a spurious 1-pixel crop at a border edge).
let x1 = Math.max(0, bx1 - dX_px);
let x2 = Math.min(imgW, bx2 + dX_px);
let y1 = Math.max(0, by1 - dY_px);
let y2 = Math.min(imgH, by2 + dY_px);
// Ensure at least 1×1
if (x2 <= x1) x2 = Math.min(imgW, x1 + 1);
if (y2 <= y1) y2 = Math.min(imgH, y1 + 1);
// Convert back to crop percentages
return {
ok: true,
cropLeft: (x1 / imgW) * 100,
cropRight: ((imgW - x2) / imgW) * 100,
cropTop: (y1 / imgH) * 100,
cropBottom: ((imgH - y2) / imgH) * 100,
srcCanvas,
canvasWasDetectRecord,
};
}
// handleCrop: crop an image by percentage from each side, save as a new variant
// ─────────────────────────────────────────────────────────────────────────────
export async function handleCrop(pluginParams: any): Promise<any> {
await ensureBackendReady().catch((e) => {
log(`[crop] ensureBackendReady failed: ${String(e)}`);
});
try {
const parsed = CropToolSchemaStrict.safeParse(pluginParams || {});
if (!parsed.success) {
return {
content: [
{
type: "text",
text: `Invalid crop parameters: ${formatZodError(parsed.error)}`,
},
],
};
}
const input = parsed.data as any;
let currentLmChatId: string | null = null;
let currentLmWorkingDir: string | null = null;
try {
const ctx = await getActiveChatContext();
if ((ctx as any)?.chatId) currentLmChatId = (ctx as any).chatId;
if ((ctx as any)?.workingDir) currentLmWorkingDir = (ctx as any).workingDir;
} catch {}
if (!currentLmChatId) {
try {
const _r = await resolveActiveLMStudioChatId();
if ((_r as any)?.ok) currentLmChatId = (_r as any).chatId;
} catch {}
}
const primaryOutDir: string | undefined =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : undefined);
if (!primaryOutDir) {
return {
content: [{ type: "text", text: "Failed to resolve LM Studio chat working directory." }],
isError: true as const,
};
}
await fs.promises.mkdir(primaryOutDir, { recursive: true }).catch(() => {});
// Load source state
const st: any = await readState(primaryOutDir);
const attachments: any[] = Array.isArray(st?.attachments) ? st.attachments : [];
const pictures: any[] = Array.isArray((st as any)?.pictures) ? (st as any).pictures : [];
const imageRecords: any[] = Array.isArray(st?.images) ? st.images : [];
const images: Array<{ i: number; path: string }> = imageRecords
.filter((r: any) => r && typeof r.filename === "string")
.sort((a: any, b: any) => (a.i || 0) - (b.i || 0))
.map((r: any) => ({ i: r.i || 1, path: path.join(primaryOutDir, r.filename) }));
const variantRecords: any[] = Array.isArray(st?.variants) ? st.variants : [];
const variants: Array<{ v: number; path: string }> = variantRecords
.filter((v: any) => v && typeof v.filename === "string")
.map((v: any) => ({ v: v.v || 1, path: path.join(primaryOutDir, v.filename) }));
// Resolve canvas notation
let rawCanvas = typeof input.canvas === "string" ? input.canvas : undefined;
let srcBuf: Buffer;
// detectLabel: resolve crop coordinates from a prior detect_object run
if (typeof input.detectLabel === "string" && input.detectLabel.trim()) {
const detectResult = resolveDetectionCrop(
imageRecords,
rawCanvas,
input.detectLabel.trim(),
typeof input.detectIndex === "number" ? Math.max(0, input.detectIndex) : 0,
input.frameAdjust
);
if (!detectResult.ok) {
return {
content: [{ type: "text", text: detectResult.error }],
isError: true as const,
};
}
input.cropLeft = detectResult.cropLeft;
input.cropRight = detectResult.cropRight;
input.cropTop = detectResult.cropTop;
input.cropBottom = detectResult.cropBottom;
input.frameAdjust = undefined; // already applied by resolveDetectionCrop
// If canvas was the iN detect result, redirect srcBuf loading to the actual source
if (detectResult.canvasWasDetectRecord && detectResult.srcCanvas) {
rawCanvas = detectResult.srcCanvas;
input.canvas = detectResult.srcCanvas;
}
}
// Auto-redirect: if canvas points to a detect_object result and no detectLabel is given, use detectSource
if (!input.detectLabel && rawCanvas) {
const _autoRedirectPref = parsePrefixedNotation(rawCanvas);
if (_autoRedirectPref?.pool === "image") {
const _autoRedirectRec = imageRecords.find((r: any) => r?.i === _autoRedirectPref.index);
if (typeof _autoRedirectRec?.sourceTool === "string" &&
_autoRedirectRec.sourceTool.includes("detect_object") &&
typeof _autoRedirectRec.detectSource === "string") {
rawCanvas = _autoRedirectRec.detectSource;
input.canvas = _autoRedirectRec.detectSource;
}
}
}
// Auto-redirect: if canvas points to a mask result, crop the original unlined image instead.
// The CYAN annotation lines would otherwise appear at the crop boundary in the result.
if (rawCanvas) {
const _maskRedirectPref = parsePrefixedNotation(rawCanvas);
if (_maskRedirectPref?.pool === "image") {
const _maskRec = imageRecords.find((r: any) => r?.i === _maskRedirectPref.index);
if (typeof _maskRec?.sourceTool === "string" &&
_maskRec.sourceTool.includes("/mask") &&
typeof _maskRec.cropSource === "string") {
log(`[crop] canvas ${rawCanvas} is a mask result — redirecting to cropSource ${_maskRec.cropSource}`);
rawCanvas = _maskRec.cropSource;
input.canvas = _maskRec.cropSource;
}
}
}
try {
if (rawCanvas) {
const pref = parsePrefixedNotation(rawCanvas);
if (pref) {
if (pref.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: pref.index,
});
if (!(lm as any)?.ok || !(lm as any).buffer) {
throw new Error(`Attachment a${pref.index} not found.`);
}
srcBuf = (lm as any).buffer as Buffer;
} else if (pref.pool === "variant") {
const found = variants.find((v) => v.v === pref.index);
if (!found) throw new Error(`Variant v${pref.index} not found.`);
srcBuf = await fs.promises.readFile(found.path);
} else if (pref.pool === "image") {
const found = images.find((img) => img.i === pref.index);
if (!found) throw new Error(`Image i${pref.index} not found.`);
srcBuf = await fs.promises.readFile(found.path);
} else {
const found = pictures.find((p: any) => p?.p === pref.index);
if (!found) throw new Error(`Picture p${pref.index} not found.`);
srcBuf = await fs.promises.readFile(path.join(primaryOutDir, String(found.filename || "")));
}
} else {
throw new Error(`Invalid canvas notation: ${rawCanvas}`);
}
} else {
// Auto-select single source
const total = attachments.length + variants.length + images.length + pictures.length;
if (total === 0) throw new Error("No source image available.");
if (total > 1) throw new Error("Ambiguous source — specify canvas explicitly.");
if (attachments.length === 1) {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: typeof attachments[0]?.a === "number" ? attachments[0].a : 1,
});
if (!(lm as any)?.ok || !(lm as any).buffer) throw new Error("Attachment not found.");
srcBuf = (lm as any).buffer as Buffer;
} else if (variants.length === 1) {
srcBuf = await fs.promises.readFile(variants[0].path);
} else if (images.length === 1) {
srcBuf = await fs.promises.readFile(images[0].path);
} else {
srcBuf = await fs.promises.readFile(path.join(primaryOutDir, String(pictures[0].filename || "")));
}
}
} catch (e) {
return {
content: [{ type: "text", text: String((e as any)?.message || e) }],
isError: true as const,
};
}
const cropResult = await applyCropFormat(srcBuf, input);
if (typeof cropResult === "string") {
return { content: [{ type: "text", text: cropResult }], isError: true as const };
}
const { left, right, top, bottom } = cropResult;
// Original dimensions for result reporting
const originalSize = await imgGetSize(srcBuf);
const originalWidth = originalSize.width;
const originalHeight = originalSize.height;
// Apply crop
const croppedBuf = await imgCropToPng(srcBuf, { left, top, right, bottom });
// Measure cropped dimensions for XMP
const croppedSize = await imgGetSize(croppedBuf);
// Build XMP params for crop (no diffusion — processing only)
const cropXmpParams: PngXmpParams = {
mode: "crop",
...(typeof croppedSize.width === "number" ? { width: croppedSize.width } : {}),
...(typeof croppedSize.height === "number" ? { height: croppedSize.height } : {}),
crop: { left, top, right, bottom },
...(typeof input.canvas === "string" ? { cropSource: input.canvas } : {}),
};
// Save as new image
const baseImageI = Math.max(1, st.counters?.nextImageI ?? 1);
const stamp = isoStampCompact();
const baseName = `image-${stamp}-i${baseImageI}`;
const saved = await saveOriginalPng(croppedBuf, primaryOutDir, `${baseName}.png`, cropXmpParams);
log(`[crop] saved: ${saved.savedPath} (${saved.size} bytes) [i${baseImageI}]`);
// Generate preview
const variantPreviewSpec = VARIANT_FULL_CONFIG.preview;
const imageRecordsForState: Array<{ filename: string; preview: string; i: number; sourceTool?: string; cropLeft?: number; cropTop?: number; cropRight?: number; cropBottom?: number; cropSource?: string }> = [
{
filename: `${baseName}.png`,
preview: `preview-${baseName}.jpg`,
i: baseImageI,
sourceTool: `${getSelfPluginIdentifier()}/crop`,
cropLeft: left,
cropTop: top,
cropRight: right,
cropBottom: bottom,
cropSource: typeof input.canvas === "string" ? input.canvas : undefined,
},
];
const previews: any[] = [];
try {
const p = await generatePreviewFromBuffer(
croppedBuf,
primaryOutDir,
saved.fileName,
variantPreviewSpec
);
const previewFilePath = p.previewAbs;
const previewFileUrl = encodeFileUrl(previewFilePath);
previews.push({
ok: true as const,
filePath: previewFilePath,
fileName: p.previewFilename,
fileUrl: previewFileUrl,
size_bytes: p.data.length,
width: p.width,
height: p.height,
mimeType: "image/jpeg" as const,
format: variantPreviewSpec.format,
dataBase64: p.data.toString("base64"),
});
log(`[crop] preview saved: ${previewFilePath} ${p.width}x${p.height}`);
} catch (e) {
log(`[crop] preview failed: ${String(e)}`);
}
// Update state
try {
const { appendImages } = await import("../core-bundle.mjs");
const stateForUpdate = await readState(primaryOutDir);
const appendResult = appendImages(stateForUpdate, imageRecordsForState);
if (appendResult.changed) {
await writeStateAtomic(primaryOutDir, stateForUpdate);
log(`[crop] state updated: appended i${baseImageI}, nextImageI=${stateForUpdate.counters.nextImageI}`);
}
} catch (e) {
log(`[crop] state update failed: ${String(e)}`);
}
// HTTP URLs
const httpBase = await getHealthyServerBaseUrl();
const httpOriginal = httpBase
? toHttpOriginalUrl(saved.fileName, httpBase, currentLmChatId || undefined)
: "";
const httpPreview = (() => {
if (!httpBase || !currentLmChatId) return "";
const previewFileName = previews[0]?.fileName;
if (!previewFileName) return "";
return toHttpPreviewUrl(previewFileName, httpBase, currentLmChatId);
})();
// Audit log
try {
const audit = buildAuditLogger({ backend: "crop", mode: "crop" as any });
if (currentLmChatId) audit.setChatId(currentLmChatId);
const cropUserRequest: Record<string, any> = {};
if (input.canvas) cropUserRequest.canvas = input.canvas;
cropUserRequest.cropLeft = left;
cropUserRequest.cropRight = right;
cropUserRequest.cropTop = top;
cropUserRequest.cropBottom = bottom;
audit.setUserRequest(cropUserRequest as any);
const auditOutput: Record<string, any> = {
i: baseImageI,
crop: { left, top, right, bottom },
images: [{
i: baseImageI,
path: saved.savedPath,
url: saved.fileUrl,
bytes: saved.size,
...(httpOriginal ? { http_url: httpOriginal } : {}),
...(previews[0] ? { preview_path: previews[0].filePath, preview_url: previews[0].fileUrl } : {}),
...(httpPreview ? { http_preview_url: httpPreview } : {}),
}],
};
audit.setOutput(auditOutput);
await audit.write();
} catch (e) {
log(`[crop] audit logging error: ${String(e)}`);
}
// Assemble tool result — same structure as handleGenerateImage
const envPreviewRaw = process.env.PREVIEW_IN_CHAT;
const previewInChat =
envPreviewRaw === undefined
? true
: envPreviewRaw === "1" || envPreviewRaw.toLowerCase() === "true";
const summary = {
tool: "crop",
i: baseImageI,
originalWidth,
originalHeight,
crop: {
left: { pct: left, px: Math.round(left / 100 * originalWidth) },
right: { pct: right, px: Math.round(right / 100 * originalWidth) },
top: { pct: top, px: Math.round(top / 100 * originalHeight) },
bottom: { pct: bottom, px: Math.round(bottom / 100 * originalHeight) },
},
images_generated: 1,
files: {
original: saved.fileUrl,
previews: previews.map((p: any) => p.fileUrl),
},
};
const { files: _files, ...summaryNoFiles } = summary;
const fallbackPreviewUrl = previews[0]?.fileUrl || saved.fileUrl;
const variantLinksText = `Preview i${baseImageI}: ${httpPreview ? httpPreview : fallbackPreviewUrl}`;
const originalLinksText = `Original i${baseImageI}: ${httpOriginal ? httpOriginal : saved.fileUrl}`;
const reviewHint = "Carefully examine the preview and comment on how well the crop matches your intent.";
if (previewInChat && previews.length > 0) {
const fname = String(previews[0].fileName || "");
return {
content: [
{
type: "image",
fileName: saved.fileName,
mimeType: previews[0].mimeType,
markdown: ``,
$hint: "This is an image file. Present the image to the user by using the markdown above.",
} as any,
{ type: "text", text: variantLinksText },
{ type: "text", text: originalLinksText },
{ type: "text", text: JSON.stringify(summaryNoFiles) },
],
};
} else {
return {
content: [
{
type: "text",
text: `Cropped Image i${baseImageI} successfully saved.`,
$hint: reviewHint,
},
{ type: "text", text: variantLinksText },
{ type: "text", text: originalLinksText },
{ type: "text", text: JSON.stringify(summaryNoFiles) },
],
};
}
} catch (error) {
log(`[crop] error: ${error instanceof Error ? error.message : String(error)}`);
return {
content: [{ type: "text", text: "Failed to crop image: internal error" }],
isError: true as const,
};
}
}
// handleMask: draw a CYAN bounding box on the source image from crop percentages,
// save as a new image with crop metadata for use with inpaint / outpaint.
// ─────────────────────────────────────────────────────────────────────────────
export async function handleMask(pluginParams: any): Promise<any> {
await ensureBackendReady().catch((e) => {
log(`[mask] ensureBackendReady failed: ${String(e)}`);
});
try {
const parsed = CropToolSchemaStrict.safeParse(pluginParams || {});
if (!parsed.success) {
return {
content: [
{
type: "text",
text: `Invalid mask parameters: ${formatZodError(parsed.error)}`,
},
],
};
}
const input = parsed.data as any;
let currentLmChatId: string | null = null;
let currentLmWorkingDir: string | null = null;
try {
const ctx = await getActiveChatContext();
if ((ctx as any)?.chatId) currentLmChatId = (ctx as any).chatId;
if ((ctx as any)?.workingDir) currentLmWorkingDir = (ctx as any).workingDir;
} catch {}
if (!currentLmChatId) {
try {
const _r = await resolveActiveLMStudioChatId();
if ((_r as any)?.ok) currentLmChatId = (_r as any).chatId;
} catch {}
}
const primaryOutDir: string | undefined =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : undefined);
if (!primaryOutDir) {
return {
content: [{ type: "text", text: "Failed to resolve LM Studio chat working directory." }],
isError: true as const,
};
}
await fs.promises.mkdir(primaryOutDir, { recursive: true }).catch(() => {});
// Load source state
const st: any = await readState(primaryOutDir);
const attachments: any[] = Array.isArray(st?.attachments) ? st.attachments : [];
const pictures: any[] = Array.isArray((st as any)?.pictures) ? (st as any).pictures : [];
const imageRecords: any[] = Array.isArray(st?.images) ? st.images : [];
const images: Array<{ i: number; path: string }> = imageRecords
.filter((r: any) => r && typeof r.filename === "string")
.sort((a: any, b: any) => (a.i || 0) - (b.i || 0))
.map((r: any) => ({ i: r.i || 1, path: path.join(primaryOutDir, r.filename) }));
const variantRecords: any[] = Array.isArray(st?.variants) ? st.variants : [];
const variants: Array<{ v: number; path: string }> = variantRecords
.filter((v: any) => v && typeof v.filename === "string")
.map((v: any) => ({ v: v.v || 1, path: path.join(primaryOutDir, v.filename) }));
// Resolve canvas notation
let rawCanvas = typeof input.canvas === "string" ? input.canvas : undefined;
let srcBuf: Buffer;
// detectLabel: resolve crop coordinates from a prior detect_object run
if (typeof input.detectLabel === "string" && input.detectLabel.trim()) {
const detectResult = resolveDetectionCrop(
imageRecords,
rawCanvas,
input.detectLabel.trim(),
typeof input.detectIndex === "number" ? Math.max(0, input.detectIndex) : 0,
input.frameAdjust
);
if (!detectResult.ok) {
return {
content: [{ type: "text", text: detectResult.error }],
isError: true as const,
};
}
input.cropLeft = detectResult.cropLeft;
input.cropRight = detectResult.cropRight;
input.cropTop = detectResult.cropTop;
input.cropBottom = detectResult.cropBottom;
input.frameAdjust = undefined; // already applied by resolveDetectionCrop
// If canvas was the iN detect result, redirect srcBuf loading to the actual source
if (detectResult.canvasWasDetectRecord && detectResult.srcCanvas) {
rawCanvas = detectResult.srcCanvas;
input.canvas = detectResult.srcCanvas;
}
}
// Auto-redirect: if canvas points to a detect_object result and no detectLabel is given, use detectSource
if (!input.detectLabel && rawCanvas) {
const _autoRedirectPref = parsePrefixedNotation(rawCanvas);
if (_autoRedirectPref?.pool === "image") {
const _autoRedirectRec = imageRecords.find((r: any) => r?.i === _autoRedirectPref.index);
if (typeof _autoRedirectRec?.sourceTool === "string" &&
_autoRedirectRec.sourceTool.includes("detect_object") &&
typeof _autoRedirectRec.detectSource === "string") {
rawCanvas = _autoRedirectRec.detectSource;
input.canvas = _autoRedirectRec.detectSource;
}
}
}
try {
if (rawCanvas) {
const pref = parsePrefixedNotation(rawCanvas);
if (pref) {
if (pref.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: pref.index,
});
if (!(lm as any)?.ok || !(lm as any).buffer) {
throw new Error(`Attachment a${pref.index} not found.`);
}
srcBuf = (lm as any).buffer as Buffer;
} else if (pref.pool === "variant") {
const found = variants.find((v) => v.v === pref.index);
if (!found) throw new Error(`Variant v${pref.index} not found.`);
srcBuf = await fs.promises.readFile(found.path);
} else if (pref.pool === "image") {
const found = images.find((img) => img.i === pref.index);
if (!found) throw new Error(`Image i${pref.index} not found.`);
srcBuf = await fs.promises.readFile(found.path);
} else {
const found = pictures.find((p: any) => p?.p === pref.index);
if (!found) throw new Error(`Picture p${pref.index} not found.`);
srcBuf = await fs.promises.readFile(path.join(primaryOutDir, String(found.filename || "")));
}
} else {
throw new Error(`Invalid canvas notation: ${rawCanvas}`);
}
} else {
// Auto-select single source
const total = attachments.length + variants.length + images.length + pictures.length;
if (total === 0) throw new Error("No source image available.");
if (total > 1) throw new Error("Ambiguous source — specify canvas explicitly.");
if (attachments.length === 1) {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: typeof attachments[0]?.a === "number" ? attachments[0].a : 1,
});
if (!(lm as any)?.ok || !(lm as any).buffer) throw new Error("Attachment not found.");
srcBuf = (lm as any).buffer as Buffer;
} else if (variants.length === 1) {
srcBuf = await fs.promises.readFile(variants[0].path);
} else if (images.length === 1) {
srcBuf = await fs.promises.readFile(images[0].path);
} else {
srcBuf = await fs.promises.readFile(path.join(primaryOutDir, String(pictures[0].filename || "")));
}
}
} catch (e) {
return {
content: [{ type: "text", text: String((e as any)?.message || e) }],
isError: true as const,
};
}
const cropResult = await applyCropFormat(srcBuf, input);
if (typeof cropResult === "string") {
return { content: [{ type: "text", text: cropResult }], isError: true as const };
}
const { left, right, top, bottom } = cropResult;
// Original dimensions
const originalSize = await imgGetSize(srcBuf);
const originalWidth = originalSize.width;
const originalHeight = originalSize.height;
// Draw CYAN bounding box on source image
const Jimp = (await import("jimp")).Jimp;
const img = await Jimp.read(srcBuf);
const x1 = Math.max(0, Math.min(originalWidth - 1, Math.round(left / 100 * originalWidth)));
const y1 = Math.max(0, Math.min(originalHeight - 1, Math.round(top / 100 * originalHeight)));
const x2 = Math.max(0, Math.min(originalWidth - 1, Math.round((100 - right) / 100 * originalWidth)));
const y2 = Math.max(0, Math.min(originalHeight - 1, Math.round((100 - bottom) / 100 * originalHeight)));
const cyanInt = (((0x44 & 0xff) << 24) | ((0x7d & 0xff) << 16) | ((0xf7 & 0xff) << 8) | (0xff & 0xff)) >>> 0;
const thickness = 2;
for (let t = 0; t < thickness; t++) {
for (let x = x1; x <= x2; x++) {
if (y1 + t < originalHeight) img.setPixelColor(cyanInt, x, y1 + t);
if (y2 - t >= 0) img.setPixelColor(cyanInt, x, y2 - t);
}
for (let y = y1; y <= y2; y++) {
if (x1 + t < originalWidth) img.setPixelColor(cyanInt, x1 + t, y);
if (x2 - t >= 0) img.setPixelColor(cyanInt, x2 - t, y);
}
}
const annotatedBuf = await img.getBuffer("image/png");
// Build XMP params (processing only, same mode as crop)
const maskXmpParams: PngXmpParams = {
mode: "crop",
...(typeof originalWidth === "number" ? { width: originalWidth } : {}),
...(typeof originalHeight === "number" ? { height: originalHeight } : {}),
crop: { left, top, right, bottom },
...(typeof input.canvas === "string" ? { cropSource: input.canvas } : {}),
};
// Save as new image
const baseImageI = Math.max(1, st.counters?.nextImageI ?? 1);
const stamp = isoStampCompact();
const baseName = `image-${stamp}-i${baseImageI}`;
const saved = await saveOriginalPng(annotatedBuf, primaryOutDir, `${baseName}.png`, maskXmpParams);
log(`[mask] saved: ${saved.savedPath} (${saved.size} bytes) [i${baseImageI}]`);
// Generate preview
const variantPreviewSpec = VARIANT_FULL_CONFIG.preview;
const imageRecordsForState: Array<{ filename: string; preview: string; i: number; sourceTool?: string; cropLeft?: number; cropTop?: number; cropRight?: number; cropBottom?: number; cropSource?: string }> = [
{
filename: `${baseName}.png`,
preview: `preview-${baseName}.jpg`,
i: baseImageI,
sourceTool: `${getSelfPluginIdentifier()}/mask`,
cropLeft: left,
cropTop: top,
cropRight: right,
cropBottom: bottom,
cropSource: typeof input.canvas === "string" ? input.canvas : undefined,
},
];
const previews: any[] = [];
try {
const p = await generatePreviewFromBuffer(
annotatedBuf,
primaryOutDir,
saved.fileName,
variantPreviewSpec
);
const previewFilePath = p.previewAbs;
const previewFileUrl = encodeFileUrl(previewFilePath);
previews.push({
ok: true as const,
filePath: previewFilePath,
fileName: p.previewFilename,
fileUrl: previewFileUrl,
size_bytes: p.data.length,
width: p.width,
height: p.height,
mimeType: "image/jpeg" as const,
format: variantPreviewSpec.format,
dataBase64: p.data.toString("base64"),
});
log(`[mask] preview saved: ${previewFilePath} ${p.width}x${p.height}`);
} catch (e) {
log(`[mask] preview failed: ${String(e)}`);
}
// Update state
try {
const { appendImages } = await import("../core-bundle.mjs");
const stateForUpdate = await readState(primaryOutDir);
const appendResult = appendImages(stateForUpdate, imageRecordsForState);
if (appendResult.changed) {
await writeStateAtomic(primaryOutDir, stateForUpdate);
log(`[mask] state updated: appended i${baseImageI}, nextImageI=${stateForUpdate.counters.nextImageI}`);
}
} catch (e) {
log(`[mask] state update failed: ${String(e)}`);
}
// HTTP URLs
const httpBase = await getHealthyServerBaseUrl();
const httpOriginal = httpBase
? toHttpOriginalUrl(saved.fileName, httpBase, currentLmChatId || undefined)
: "";
const httpPreview = (() => {
if (!httpBase || !currentLmChatId) return "";
const previewFileName = previews[0]?.fileName;
if (!previewFileName) return "";
return toHttpPreviewUrl(previewFileName, httpBase, currentLmChatId);
})();
// Audit log
try {
const audit = buildAuditLogger({ backend: "mask", mode: "crop" as any });
if (currentLmChatId) audit.setChatId(currentLmChatId);
const maskUserRequest: Record<string, any> = {};
if (input.canvas) maskUserRequest.canvas = input.canvas;
maskUserRequest.cropLeft = left;
maskUserRequest.cropRight = right;
maskUserRequest.cropTop = top;
maskUserRequest.cropBottom = bottom;
audit.setUserRequest(maskUserRequest as any);
const auditOutput: Record<string, any> = {
i: baseImageI,
crop: { left, top, right, bottom },
images: [{
i: baseImageI,
path: saved.savedPath,
url: saved.fileUrl,
bytes: saved.size,
...(httpOriginal ? { http_url: httpOriginal } : {}),
...(previews[0] ? { preview_path: previews[0].filePath, preview_url: previews[0].fileUrl } : {}),
...(httpPreview ? { http_preview_url: httpPreview } : {}),
}],
};
audit.setOutput(auditOutput);
await audit.write();
} catch (e) {
log(`[mask] audit logging error: ${String(e)}`);
}
// Assemble tool result
const envPreviewRaw = process.env.PREVIEW_IN_CHAT;
const previewInChat =
envPreviewRaw === undefined
? true
: envPreviewRaw === "1" || envPreviewRaw.toLowerCase() === "true";
const summary = {
tool: "mask",
i: baseImageI,
originalWidth,
originalHeight,
crop: {
left: { pct: left, px: Math.round(left / 100 * originalWidth) },
right: { pct: right, px: Math.round(right / 100 * originalWidth) },
top: { pct: top, px: Math.round(top / 100 * originalHeight) },
bottom: { pct: bottom, px: Math.round(bottom / 100 * originalHeight) },
},
images_generated: 1,
files: {
original: saved.fileUrl,
previews: previews.map((p: any) => p.fileUrl),
},
};
const { files: _files, ...summaryNoFiles } = summary;
const fallbackPreviewUrl = previews[0]?.fileUrl || saved.fileUrl;
const variantLinksText = `Preview i${baseImageI}: ${httpPreview ? httpPreview : fallbackPreviewUrl}`;
const originalLinksText = `Original i${baseImageI}: ${httpOriginal ? httpOriginal : saved.fileUrl}`;
const reviewHint = "Carefully examine the preview and comment on how well the mask region matches your intent.";
if (previewInChat && previews.length > 0) {
const fname = String(previews[0].fileName || "");
return {
content: [
{
type: "image",
fileName: saved.fileName,
mimeType: previews[0].mimeType,
markdown: ``,
$hint: "This is an image file. Present the image to the user by using the markdown above.",
} as any,
{ type: "text", text: variantLinksText },
{ type: "text", text: originalLinksText },
{ type: "text", text: JSON.stringify(summaryNoFiles) },
],
};
} else {
return {
content: [
{
type: "text",
text: `Mask Image i${baseImageI} successfully saved.`,
$hint: reviewHint,
},
{ type: "text", text: variantLinksText },
{ type: "text", text: originalLinksText },
{ type: "text", text: JSON.stringify(summaryNoFiles) },
],
};
}
} catch (error) {
log(`[mask] error: ${error instanceof Error ? error.message : String(error)}`);
return {
content: [{ type: "text", text: "Failed to mask image: internal error" }],
isError: true as const,
};
}
}
// ─────────────────────────────────────────────────────────────────────────────
// handleZoomIn: crop + Draw Things edit + save result
// ─────────────────────────────────────────────────────────────────────────────
export async function handleZoomIn(
pluginParams: any,
onProgress?: ProgressCallback
): Promise<any> {
const parsed = ZoomInToolSchemaStrict.safeParse(pluginParams || {});
if (!parsed.success) {
return {
content: [
{
type: "text",
text: `Invalid zoom-in parameters: ${formatZodError(parsed.error)}`,
},
],
};
}
const input = parsed.data as any;
// Load canvas buffer and resolve crop metadata from state
let rawBuf: Buffer;
let resolvedCropMeta: { cropLeft?: number; cropTop?: number; cropRight?: number; cropBottom?: number; cropSource?: string } | undefined;
// When canvas is an iN image, rawBuf IS already the target region — skip imgCropToPng.
let skipImgCrop = false;
// Source image dimensions for the render target (overrides origW/H when set).
let overrideZoomDims: { w: number; h: number } | undefined;
// Set when detectLabel resolved the crop — post-crop dims needed for render target.
let detectLabelWasUsed = false;
let currentLmChatId: string | null = null;
let _zoomAuditSourceOverride: {
sourceKind?: "attachment" | "image" | "picture" | "variant";
sourceFileName?: string;
sourceOriginalName?: string;
sourceOriginAbs?: string;
originalDims?: { width: number; height: number; bytes: number };
} | undefined = undefined;
try {
// Resolve chat context to find source
let currentLmWorkingDir: string | null = null;
try {
const ctx = await getActiveChatContext();
if ((ctx as any)?.chatId) currentLmChatId = (ctx as any).chatId;
if ((ctx as any)?.workingDir) currentLmWorkingDir = (ctx as any).workingDir;
} catch {}
if (!currentLmChatId) {
try { const _r = await resolveActiveLMStudioChatId(); if ((_r as any)?.ok) currentLmChatId = (_r as any).chatId; } catch {}
}
const chatDir =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : undefined);
const rawCanvas = typeof input.canvas === "string" ? input.canvas : undefined;
if (rawCanvas && chatDir) {
const st: any = await readState(chatDir);
const attachments: any[] = Array.isArray(st?.attachments) ? st.attachments : [];
const variantRecords: any[] = Array.isArray(st?.variants) ? st.variants : [];
const variantPaths: Array<{ v: number; path: string }> = variantRecords
.filter((v: any) => v && typeof v.filename === "string")
.map((v: any) => ({ v: v.v || 1, path: path.join(chatDir, v.filename) }));
const pictures: any[] = Array.isArray((st as any)?.pictures) ? (st as any).pictures : [];
const imageRecords: any[] = Array.isArray(st?.images) ? st.images : [];
const pref = parsePrefixedNotation(rawCanvas);
if (!pref) throw new Error(`Invalid canvas notation: ${rawCanvas}`);
if (pref.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: pref.index,
});
if (!(lm as any)?.ok || !(lm as any).buffer) throw new Error(`Attachment a${pref.index} not found.`);
rawBuf = (lm as any).buffer as Buffer;
_zoomAuditSourceOverride = {
sourceKind: "attachment",
sourceOriginAbs: typeof (lm as any).originalPath === "string" ? (lm as any).originalPath : undefined,
sourceFileName: typeof (lm as any).originalPath === "string" ? path.basename((lm as any).originalPath) : undefined,
sourceOriginalName: typeof (lm as any).originalName === "string" ? (lm as any).originalName : undefined,
// originalDims filled after imgGetSize below
};
// Find the most recent crop record whose cropSource matches this attachment
const cropCandidate = imageRecords
.filter((r: any) => r?.cropSource === rawCanvas &&
(typeof r.cropLeft === "number" || typeof r.cropTop === "number" ||
typeof r.cropRight === "number" || typeof r.cropBottom === "number"))
.sort((a: any, b: any) => (b.i ?? 0) - (a.i ?? 0))[0];
if (cropCandidate) {
resolvedCropMeta = {
cropLeft: cropCandidate.cropLeft,
cropTop: cropCandidate.cropTop,
cropRight: cropCandidate.cropRight,
cropBottom: cropCandidate.cropBottom,
cropSource: cropCandidate.cropSource,
};
}
} else if (pref.pool === "image") {
const found = imageRecords.find((r: any) => r?.i === pref.index);
if (!found) throw new Error(`Image i${pref.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(found.filename)));
const hasCropFields =
typeof found.cropLeft === "number" || typeof found.cropTop === "number" ||
typeof found.cropRight === "number" || typeof found.cropBottom === "number";
const detectSrc = typeof found.detectSource === "string" ? found.detectSource : undefined;
// Mask results store crop coords on the annotated full-source image, not a pre-cropped file.
// For those, rawBuf must be reloaded from cropSource and skipImgCrop must stay false.
let isMaskCanvasResult = false;
if (hasCropFields) {
resolvedCropMeta = {
cropLeft: found.cropLeft,
cropTop: found.cropTop,
cropRight: found.cropRight,
cropBottom: found.cropBottom,
cropSource: found.cropSource,
};
if (typeof found.sourceTool === "string" && found.sourceTool.includes("/mask") &&
typeof found.cropSource === "string") {
isMaskCanvasResult = true;
const _srcRef = parsePrefixedNotation(found.cropSource);
if (_srcRef) {
if (_srcRef.pool === "attachment") {
const _lm = await resolveImg2ImgSourceLMStudio({ chatId: currentLmChatId || undefined, explicitAttachmentSource: true, attachmentIndex: _srcRef.index });
if ((_lm as any)?.ok && (_lm as any).buffer) rawBuf = (_lm as any).buffer as Buffer;
} else if (_srcRef.pool === "image") {
const _srcRec = imageRecords.find((r: any) => r?.i === _srcRef.index);
if (_srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(_srcRec.filename)));
} else if (_srcRef.pool === "variant") {
const _srcPath = variantPaths.find((v) => v.v === _srcRef.index)?.path;
if (_srcPath) rawBuf = await fs.promises.readFile(_srcPath);
} else {
const _srcRec = pictures.find((p: any) => p?.p === _srcRef.index);
if (_srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(_srcRec.filename || "")));
}
}
}
} else if (detectSrc) {
// detect_object result — reload rawBuf from the clean source (not the annotated image).
resolvedCropMeta = { cropLeft: 0, cropTop: 0, cropRight: 0, cropBottom: 0, cropSource: detectSrc };
const _srcRef = parsePrefixedNotation(detectSrc);
if (_srcRef) {
if (_srcRef.pool === "attachment") {
const _lm = await resolveImg2ImgSourceLMStudio({ chatId: currentLmChatId || undefined, explicitAttachmentSource: true, attachmentIndex: _srcRef.index });
if ((_lm as any)?.ok && (_lm as any).buffer) rawBuf = (_lm as any).buffer as Buffer;
} else if (_srcRef.pool === "image") {
const _srcRec = imageRecords.find((r: any) => r?.i === _srcRef.index);
if (_srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(_srcRec.filename)));
} else if (_srcRef.pool === "variant") {
const _srcPath = variantPaths.find((v) => v.v === _srcRef.index)?.path;
if (_srcPath) rawBuf = await fs.promises.readFile(_srcPath);
} else {
const _srcRec = pictures.find((p: any) => p?.p === _srcRef.index);
if (_srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(_srcRec.filename || "")));
}
}
}
if (resolvedCropMeta && !isMaskCanvasResult) {
// rawBuf is already the target region (the iN file). Do not re-apply imgCropToPng.
skipImgCrop = true;
// Look up source image dimensions for the render target.
const sourceRef = (hasCropFields
? (typeof found.cropSource === "string" ? found.cropSource : undefined)
: detectSrc) as string | undefined;
if (typeof sourceRef === "string") {
const srcPref = parsePrefixedNotation(sourceRef);
if (srcPref?.pool === "variant") {
const srcPath = variantPaths.find((v) => v.v === srcPref.index)?.path;
if (srcPath) {
try {
const srcSize = await imgGetSize(await fs.promises.readFile(srcPath));
overrideZoomDims = { w: srcSize.width, h: srcSize.height };
} catch {}
}
} else if (srcPref?.pool === "image") {
const srcRec = imageRecords.find((r: any) => r?.i === srcPref.index);
if (srcRec?.filename) {
try {
const srcSize = await imgGetSize(await fs.promises.readFile(path.join(chatDir, String(srcRec.filename))));
overrideZoomDims = { w: srcSize.width, h: srcSize.height };
} catch {}
}
} else if (srcPref?.pool === "attachment") {
const attRec = attachments.find((a: any) => a?.a === srcPref.index);
if (attRec?.width && attRec?.height) {
overrideZoomDims = { w: attRec.width, h: attRec.height };
} else {
try {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: srcPref.index,
});
if ((lm as any)?.ok && (lm as any).buffer) {
const srcSize = await imgGetSize((lm as any).buffer as Buffer);
overrideZoomDims = { w: srcSize.width, h: srcSize.height };
const _oa = typeof (lm as any).originalPath === "string" ? (lm as any).originalPath : undefined;
_zoomAuditSourceOverride = {
sourceKind: "attachment",
sourceOriginAbs: _oa,
sourceFileName: _oa ? path.basename(_oa) : undefined,
sourceOriginalName: typeof (lm as any).originalName === "string" ? (lm as any).originalName : undefined,
originalDims: { width: srcSize.width, height: srcSize.height, bytes: (lm as any).buffer.byteLength },
};
}
} catch {}
}
} else if (srcPref?.pool === "picture") {
const srcRec = pictures.find((p: any) => p?.p === srcPref.index);
if (srcRec?.filename) {
try {
const srcSize = await imgGetSize(await fs.promises.readFile(path.join(chatDir, String(srcRec.filename))));
overrideZoomDims = { w: srcSize.width, h: srcSize.height };
} catch {}
}
}
}
}
} else if (pref.pool === "variant") {
const found = variantPaths.find((v) => v.v === pref.index);
if (!found) throw new Error(`Variant v${pref.index} not found.`);
rawBuf = await fs.promises.readFile(found.path);
_zoomAuditSourceOverride = {
sourceKind: "variant",
sourceOriginAbs: found.path,
sourceFileName: path.basename(found.path),
// originalDims filled after imgGetSize below
};
// Find the most recent crop record whose cropSource matches this variant
const cropCandidate = imageRecords
.filter((r: any) => r?.cropSource === rawCanvas &&
(typeof r.cropLeft === "number" || typeof r.cropTop === "number" ||
typeof r.cropRight === "number" || typeof r.cropBottom === "number"))
.sort((a: any, b: any) => (b.i ?? 0) - (a.i ?? 0))[0];
if (cropCandidate) {
resolvedCropMeta = {
cropLeft: cropCandidate.cropLeft,
cropTop: cropCandidate.cropTop,
cropRight: cropCandidate.cropRight,
cropBottom: cropCandidate.cropBottom,
cropSource: cropCandidate.cropSource,
};
}
} else {
// picture pool (pN)
const found = pictures.find((p: any) => p?.p === pref.index);
if (!found) throw new Error(`Picture p${pref.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(found.filename || "")));
_zoomAuditSourceOverride = {
sourceKind: "picture",
sourceOriginAbs: path.join(chatDir, String(found.filename || "")),
sourceFileName: typeof found.filename === "string" ? found.filename : undefined,
// originalDims filled after imgGetSize below
};
// Find the most recent crop record whose cropSource matches this picture
const cropCandidate = imageRecords
.filter((r: any) => r?.cropSource === rawCanvas &&
(typeof r.cropLeft === "number" || typeof r.cropTop === "number" ||
typeof r.cropRight === "number" || typeof r.cropBottom === "number"))
.sort((a: any, b: any) => (b.i ?? 0) - (a.i ?? 0))[0];
if (cropCandidate) {
resolvedCropMeta = {
cropLeft: cropCandidate.cropLeft,
cropTop: cropCandidate.cropTop,
cropRight: cropCandidate.cropRight,
cropBottom: cropCandidate.cropBottom,
cropSource: cropCandidate.cropSource,
};
}
}
// detectLabel: resolve crop coordinates from a prior detect_object run
if (typeof input.detectLabel === "string" && input.detectLabel.trim()) {
const detectResult = resolveDetectionCrop(
imageRecords,
rawCanvas,
input.detectLabel.trim(),
typeof input.detectIndex === "number" ? Math.max(0, input.detectIndex) : 0,
input.frameAdjust
);
if (!detectResult.ok) {
throw new Error(detectResult.error);
}
resolvedCropMeta = {
cropLeft: detectResult.cropLeft,
cropRight: detectResult.cropRight,
cropTop: detectResult.cropTop,
cropBottom: detectResult.cropBottom,
};
skipImgCrop = false;
overrideZoomDims = undefined;
detectLabelWasUsed = true;
// If canvas was the iN detect result, reload rawBuf from the actual source
if (detectResult.canvasWasDetectRecord && detectResult.srcCanvas) {
const srcRef = parsePrefixedNotation(detectResult.srcCanvas);
if (srcRef) {
if (srcRef.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: srcRef.index,
});
if (!(lm as any)?.ok || !(lm as any).buffer) throw new Error(`Source a${srcRef.index} not found.`);
rawBuf = (lm as any).buffer as Buffer;
} else if (srcRef.pool === "image") {
const srcRec = imageRecords.find((r: any) => r?.i === srcRef.index);
if (!srcRec) throw new Error(`Source i${srcRef.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename)));
} else if (srcRef.pool === "variant") {
const srcPath = variantPaths.find((v) => v.v === srcRef.index)?.path;
if (!srcPath) throw new Error(`Source v${srcRef.index} not found.`);
rawBuf = await fs.promises.readFile(srcPath);
} else {
// picture
const srcRec = pictures.find((p: any) => p?.p === srcRef.index);
if (!srcRec) throw new Error(`Source p${srcRef.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename || "")));
}
}
}
}
} else {
// Use last attachment as fallback
const lm = await resolveImg2ImgSourceLMStudio({
chatId: input.canvas ? undefined : undefined,
explicitAttachmentSource: true,
});
if (!(lm as any)?.ok || !(lm as any).buffer) throw new Error("No source image available.");
rawBuf = (lm as any).buffer as Buffer;
}
} catch (e) {
return {
content: [{ type: "text", text: String((e as any)?.message || e) }],
isError: true as const,
};
}
// If no crop metadata was found (plain image, no prior crop), default to zero-crop
// (= full image). imgCropToPng with all-zero values is a no-op.
if (!resolvedCropMeta) {
resolvedCropMeta = { cropLeft: 0, cropTop: 0, cropRight: 0, cropBottom: 0 };
}
// Measure rawBuf dimensions (for variant/attachment paths: pre-crop source; for image path: the region itself).
const canvasSize = await imgGetSize(rawBuf);
const origW = canvasSize.width;
const origH = canvasSize.height;
// Complete audit override with pre-crop dims for paths (attachment, variant, picture) that
// could not compute dims inside the try block.
if (_zoomAuditSourceOverride && !_zoomAuditSourceOverride.originalDims) {
_zoomAuditSourceOverride = {
..._zoomAuditSourceOverride,
originalDims: { width: origW, height: origH, bytes: rawBuf.byteLength },
};
}
// When imageFormat is specified and we are about to crop the source image,
// expand the crop region symmetrically so the region already has the target AR.
// This ensures normalizeInputBuffer can cover-resize without clipping the detection region.
// Always expand (never shrink) — the full selected region must remain visible.
if (input.imageFormat && resolvedCropMeta && !skipImgCrop) {
const ratioMap: Record<string, number> = {
square: 1,
landscape: 4 / 3,
portrait: 3 / 4,
"16:9": 16 / 9,
};
const targetAR = ratioMap[input.imageFormat as string];
if (targetAR !== undefined) {
const cl = resolvedCropMeta.cropLeft ?? 0;
const cr = resolvedCropMeta.cropRight ?? 0;
const ct = resolvedCropMeta.cropTop ?? 0;
const cb = resolvedCropMeta.cropBottom ?? 0;
const regionW = origW * (1 - cl / 100 - cr / 100);
const regionH = origH * (1 - ct / 100 - cb / 100);
const regionAR = regionW / regionH;
if (Math.abs(regionAR - targetAR) > 0.005) {
if (regionAR < targetAR) {
// Region too tall → need to expand width.
// Try symmetric first; if one side hits the image boundary (crop=0),
// transfer the remainder to the opposite side.
const totalExtraPct = ((regionH * targetAR - regionW) / origW) * 100;
const halfExtra = totalExtraPct / 2;
const leftReduction = Math.min(cl, halfExtra);
const rightReduction = Math.min(cr, halfExtra + (halfExtra - leftReduction));
const leftReduction2 = Math.min(cl, halfExtra + (halfExtra - rightReduction));
resolvedCropMeta = {
...resolvedCropMeta,
cropLeft: Math.max(0, cl - leftReduction2),
cropRight: Math.max(0, cr - rightReduction),
};
} else {
// Region too wide → need to expand height.
const totalExtraPct = ((regionW / targetAR - regionH) / origH) * 100;
const halfExtra = totalExtraPct / 2;
const topReduction = Math.min(ct, halfExtra);
const bottomReduction = Math.min(cb, halfExtra + (halfExtra - topReduction));
const topReduction2 = Math.min(ct, halfExtra + (halfExtra - bottomReduction));
resolvedCropMeta = {
...resolvedCropMeta,
cropTop: Math.max(0, ct - topReduction2),
cropBottom: Math.max(0, cb - bottomReduction),
};
}
}
}
}
// Apply frameAdjust to resolvedCropMeta for non-detect, non-skip paths.
// detectLabelWasUsed: frameAdjust was already applied inside resolveDetectionCrop.
// skipImgCrop: rawBuf is already the cropped region; can't re-crop from source here.
if (input.frameAdjust !== undefined && !detectLabelWasUsed && !skipImgCrop) {
const cl = resolvedCropMeta.cropLeft ?? 0;
const cr = resolvedCropMeta.cropRight ?? 0;
const ct = resolvedCropMeta.cropTop ?? 0;
const cb = resolvedCropMeta.cropBottom ?? 0;
const regionW = origW * (1 - cl / 100 - cr / 100);
const regionH = origH * (1 - ct / 100 - cb / 100);
const cropDiag = Math.hypot(regionW, regionH);
let d_px: number;
if (typeof input.frameAdjust === "string") {
const m = String(input.frameAdjust).trim().match(/^([+-]?\d+(?:\.\d+)?)\s*(%|px)?$/i);
if (m) {
const val = parseFloat(m[1]);
d_px = m[2]?.toLowerCase() === "px" ? val : (val / 100) * cropDiag;
} else {
d_px = 0;
}
} else {
d_px = (input.frameAdjust / 100) * cropDiag;
}
const dLR = (d_px / origW) * 100;
const dTB = (d_px / origH) * 100;
resolvedCropMeta = {
...resolvedCropMeta,
cropLeft: Math.max(0, cl - dLR),
cropRight: Math.max(0, cr - dLR),
cropTop: Math.max(0, ct - dTB),
cropBottom: Math.max(0, cb - dTB),
};
}
// Apply the crop from persisted metadata only when rawBuf is the full source (variant/attachment).
// For iN canvas (skipImgCrop=true) rawBuf is already the target region.
if (!skipImgCrop) {
rawBuf = await imgCropToPng(rawBuf, {
left: resolvedCropMeta.cropLeft ?? 0,
top: resolvedCropMeta.cropTop ?? 0,
right: resolvedCropMeta.cropRight ?? 0,
bottom: resolvedCropMeta.cropBottom ?? 0,
});
}
// ── RESTORE-PASS — edit-class quality pre-stage for small sources ─────────
// Restore is EDIT, not image2image. Must use generateImageEdit.
// If the source is below the restore threshold, run a fast edit pass to
// recover quality before the zoom-in pipeline consumes the buffer.
if ((imageService as any)?.name === "drawthings") {
if (typeof imageService.generateImageEdit !== "function") {
throw new Error(
"Restore-Pass requires edit mode (gRPC backend). HTTP backend does not support generateImageEdit."
);
}
const rawSize = await imgGetSize(rawBuf);
if (rawSize.width + rawSize.height < drawthingsRestoreLimits.targetSumRestore) {
const restoreOnProgress = onProgress
? (step: number, total: unknown, msg?: string) =>
onProgress(step, total as any, step === -1 ? (msg ? `Restore ${msg}` : "Restore") : "Restore")
: undefined;
// Normalize source up to targetSum (1344) using the standard pipeline.
// targetSumRestore is only the firing threshold; targetSum is the actual
// normalize goal. Supply requestedRawW/H that exceed targetSum to force
// normalizeInputBuffer to produce the maximum 64-aligned result at targetSum.
const _restoreRawSum = rawSize.width + rawSize.height;
const _restoreFactor = Math.ceil(drawthingsRestoreLimits.targetSum / _restoreRawSum) + 1;
const restoreNormalized = await normalizeInputBuffer(rawBuf, {
requestedRawW: rawSize.width * _restoreFactor,
requestedRawH: rawSize.height * _restoreFactor,
targetSumOverride: drawthingsRestoreLimits.targetSum,
logPrefix: "[restore]",
});
const restoreAdjW = restoreNormalized.preprocess.adjusted.width!;
const restoreAdjH = restoreNormalized.preprocess.adjusted.height!;
const restoreNormBuf = restoreNormalized.buf;
// model must NOT be sent — Draw Things must use whatever model is currently loaded.
// input.model is irrelevant here and must not be set.
// Only pass runtime overrides — the service fills baseDefaults itself.
// Pre-spreading defaultParamsRestore would flood `filtered` and override the overlay.
const restoreParams: any = {
width: restoreAdjW,
height: restoreAdjH,
prompt: input.prompt ?? "",
_dt_i2i_profile: "restore",
_dt_needs_upscaler: false,
};
const restoreStartMs = Date.now();
const restoreResult = await imageService.generateImageEdit!(
restoreParams,
[restoreNormBuf],
restoreOnProgress,
undefined
);
const restoreInferenceMs = Date.now() - restoreStartMs;
if ((restoreResult as any).isError || (restoreResult as any).error) {
throw new Error(
`Restore-Pass failed: ${(restoreResult as any).errorMessage ?? (restoreResult as any).error ?? "unknown"}`
);
}
let restoreBuffers: Buffer[] = [];
if (Array.isArray((restoreResult as any).images) && (restoreResult as any).images.length > 0) {
for (const img of (restoreResult as any).images) {
if (typeof img === "string") {
const b64 = img.startsWith("data:") ? img.split(",")[1] : img;
restoreBuffers.push(Buffer.from(b64, "base64"));
}
}
} else if (Buffer.isBuffer((restoreResult as any).imageBuffer)) {
restoreBuffers.push((restoreResult as any).imageBuffer as Buffer);
} else if ((restoreResult as any).imageData) {
const d = (restoreResult as any).imageData;
if (Buffer.isBuffer(d)) restoreBuffers.push(d);
else if (typeof d === "string") {
const b64 = d.startsWith("data:") ? d.split(",")[1] : d;
restoreBuffers.push(Buffer.from(b64, "base64"));
}
}
if (restoreBuffers.length === 0) throw new Error("Restore-Pass returned no image data");
const restoreMeta = (restoreResult as any)?.metadata ?? {};
let restoreBackendW: number | undefined;
let restoreBackendH: number | undefined;
try {
const rm = await imgGetSize(restoreBuffers[0]);
restoreBackendW = rm.width;
restoreBackendH = rm.height;
} catch {}
rawBuf = restoreBuffers[0];
try {
const auditR = buildAuditLogger({ backend: "drawthings", mode: "edit", requestId: undefined });
if (currentLmChatId) auditR.setChatId(currentLmChatId);
auditR.setUserRequest({ canvas: input.canvas, prompt: input.prompt ?? "" } as any);
auditR.setRenderTarget({
requested_raw: { width: restoreAdjW, height: restoreAdjH },
requested_effective: { width: restoreAdjW, height: restoreAdjH },
needs_upscaler: false,
});
auditR.setInputs({
canvas: {
notation: input.canvas ?? undefined,
original: {
width: restoreNormalized.preprocess.original.width,
height: restoreNormalized.preprocess.original.height,
bytes: restoreNormalized.preprocess.original.bytes,
},
adjusted: {
width: restoreNormalized.preprocess.adjusted.width,
height: restoreNormalized.preprocess.adjusted.height,
bytes: restoreNormalized.preprocess.adjusted.bytes,
},
},
});
const restoreOutput: Record<string, any> = { restore_pass: true };
if (restoreBackendW !== undefined && restoreBackendH !== undefined) {
restoreOutput.backend_returned = { width: restoreBackendW, height: restoreBackendH };
}
restoreOutput.inference_time_ms = restoreInferenceMs;
if (typeof restoreMeta.prompt_used === "string") {
restoreOutput.prompt_used = restoreMeta.prompt_used;
}
if (typeof restoreMeta.model === "string" && restoreMeta.model.trim()) {
restoreOutput.model_used = path.basename(restoreMeta.model);
}
if (typeof restoreMeta.overlay_source === "string" && restoreMeta.overlay_source.trim()) {
restoreOutput.overlay_source = restoreMeta.overlay_source;
}
if (typeof restoreMeta.overlay_preset === "string" && restoreMeta.overlay_preset.trim()) {
restoreOutput.overlay_preset = restoreMeta.overlay_preset;
}
if (typeof restoreMeta.defaults_used === "string" && restoreMeta.defaults_used.trim()) {
restoreOutput.defaults_used = restoreMeta.defaults_used;
}
if (typeof restoreMeta.i2i_profile === "string" && restoreMeta.i2i_profile.trim()) {
restoreOutput.i2i_profile = restoreMeta.i2i_profile;
}
if (typeof restoreMeta.strength_used === "number" && Number.isFinite(restoreMeta.strength_used)) {
restoreOutput.strength_used = restoreMeta.strength_used;
}
if (typeof restoreMeta.steps_used === "number" && Number.isFinite(restoreMeta.steps_used)) {
restoreOutput.steps_used = restoreMeta.steps_used;
}
if (typeof restoreMeta.sampler_used === "string" && restoreMeta.sampler_used.trim()) {
restoreOutput.sampler_used = restoreMeta.sampler_used;
}
if (typeof restoreMeta.guidance_scale_used === "number" && Number.isFinite(restoreMeta.guidance_scale_used)) {
restoreOutput.guidance_scale_used = restoreMeta.guidance_scale_used;
}
if (Array.isArray(restoreMeta.loras_used) && restoreMeta.loras_used.length > 0) {
restoreOutput.loras_used = restoreMeta.loras_used;
}
auditR.setOutput(restoreOutput as any);
await auditR.write();
} catch (auditErr) {
log(`[restore] audit write failed: ${String(auditErr)}`);
}
}
}
// ── END RESTORE-PASS ───────────────────────────────────────────────────────
// For the detectLabel path the crop was just applied above: origW/H are the
// pre-crop source dims. Use those as overrideZoomDims (= source resolution
// reference) and derive zoomSrcW/H from the post-crop buffer so that the
// render target scales the detection region AR up to source resolution —
// matching the iN detect path behaviour.
let zoomSrcW = origW;
let zoomSrcH = origH;
if (detectLabelWasUsed && !skipImgCrop) {
const croppedSize = await imgGetSize(rawBuf);
overrideZoomDims = { w: origW, h: origH };
zoomSrcW = croppedSize.width;
zoomSrcH = croppedSize.height;
}
// Derive render_target dimensions.
// When overrideZoomDims is available (iN canvas or detectLabel case), zoomSrcW/H are
// the cropped region dims. Scale the crop to the source resolution while preserving
// crop AR, using the longer source axis as the constraining dim.
// When overrideZoomDims is absent (variant/attachment canvas), zoomSrcW/H are the
// pre-crop source dims and are used directly.
let zoomTargetW: number;
let zoomTargetH: number;
if (overrideZoomDims) {
if (zoomSrcW > zoomSrcH) {
// landscape crop: constrain to source width
zoomTargetW = overrideZoomDims.w;
zoomTargetH = Math.round(overrideZoomDims.w * zoomSrcH / zoomSrcW);
} else {
// portrait or square crop: constrain to source height
zoomTargetH = overrideZoomDims.h;
zoomTargetW = Math.round(overrideZoomDims.h * zoomSrcW / zoomSrcH);
}
} else {
zoomTargetW = zoomSrcW;
zoomTargetH = zoomSrcH;
}
// Safety floor: if either dimension is below 256, scale both up proportionally
// so the short side reaches 512 — preserving aspect ratio.
const minZoomDim = Math.min(zoomTargetW, zoomTargetH);
if (minZoomDim < 256) {
const scale = Math.ceil(256 / minZoomDim);
zoomTargetW = Math.round(zoomTargetW * scale);
zoomTargetH = Math.round(zoomTargetH * scale);
}
// Build merged params for handleGenerateImage (image2image mode).
// Explicit user-supplied width/height/imageFormat always override the cropSource-derived dims.
// cropSource-derived zoomTargetW/H are used only when the user provided none of those.
const hasExplicitDims = input.width !== undefined || input.height !== undefined || input.imageFormat !== undefined;
const mergedParams: Record<string, unknown> = {
mode: "image2image",
prompt: input.prompt ?? "",
};
if (hasExplicitDims) {
if (input.width !== undefined) mergedParams.width = input.width;
if (input.height !== undefined) mergedParams.height = input.height;
if (input.imageFormat !== undefined) mergedParams.imageFormat = input.imageFormat;
} else {
mergedParams.width = zoomTargetW;
mergedParams.height = zoomTargetH;
}
if (input.quality !== undefined) mergedParams.quality = input.quality;
if (input.model !== undefined) mergedParams.model = input.model;
if (input.canvas !== undefined) mergedParams.canvas = input.canvas;
const zoomHandleProgress = onProgress
? (step: number, total: unknown, msg?: string) =>
onProgress(step, total as any, step === -1 ? (msg ? `Zoom ${msg}` : "Zoom") : "Zoom")
: undefined;
const result = await handleGenerateImage(
mergedParams,
zoomHandleProgress,
{
presuppliedSourceBuf: rawBuf,
sourceTag: "canvas:zoom-in",
cropMeta: resolvedCropMeta ? {
left: resolvedCropMeta.cropLeft ?? 0,
top: resolvedCropMeta.cropTop ?? 0,
right: resolvedCropMeta.cropRight ?? 0,
bottom: resolvedCropMeta.cropBottom ?? 0,
cropSource: resolvedCropMeta.cropSource,
} : undefined,
auditSourceOverride: _zoomAuditSourceOverride,
}
);
// Inject zoom-specific metadata into tool result summary so the agent
// (and user) can see what canvas state was used without checking audit logs.
if (result.content && Array.isArray(result.content) && result.content.length > 0) {
const lastItem = result.content[result.content.length - 1];
if (lastItem && lastItem.type === "text" && typeof lastItem.text === "string") {
try {
const summary = JSON.parse(lastItem.text);
summary.zoom_input = {
canvas_source: { width: origW, height: origH },
render_target: { width: zoomTargetW, height: zoomTargetH },
...(resolvedCropMeta ? { crop: resolvedCropMeta } : {}),
};
lastItem.text = JSON.stringify(summary);
} catch { /* summary not JSON — leave as-is */ }
}
}
return result;
}
// ─────────────────────────────────────────────────────────────────────────────
// handleInpaint: load canvas, resize to aligned dims, build white-on-black mask
// (white = region to repaint, from cropLeft/cropTop/cropRight/cropBottom),
// call handleGenerateImage in edit mode with sourceTag canvas:inpaint.
// ─────────────────────────────────────────────────────────────────────────────
export async function handleInpaint(
pluginParams: any,
onProgress?: ProgressCallback
): Promise<any> {
const parsed = InpaintToolSchemaStrict.safeParse(pluginParams || {});
if (!parsed.success) {
return {
content: [{ type: "text", text: `Invalid inpaint parameters: ${formatZodError(parsed.error)}` }],
};
}
const input = parsed.data as any;
let rawBuf: Buffer;
let resolvedBBoxes: Array<{ left: number; top: number; right: number; bottom: number }> = [{ left: 0, top: 0, right: 0, bottom: 0 }];
let chatDir: string | undefined;
try {
let currentLmChatId: string | null = null;
let currentLmWorkingDir: string | null = null;
try {
const ctx = await getActiveChatContext();
if ((ctx as any)?.chatId) currentLmChatId = (ctx as any).chatId;
if ((ctx as any)?.workingDir) currentLmWorkingDir = (ctx as any).workingDir;
} catch {}
if (!currentLmChatId) {
try { const _r = await resolveActiveLMStudioChatId(); if ((_r as any)?.ok) currentLmChatId = (_r as any).chatId; } catch {}
}
chatDir =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : undefined);
const rawCanvas = typeof input.canvas === "string" ? input.canvas : undefined;
if (!rawCanvas || !chatDir) throw new Error("canvas is required for inpaint");
const st: any = await readState(chatDir);
const variantRecords: any[] = Array.isArray(st?.variants) ? st.variants : [];
const variantPaths: Array<{ v: number; path: string }> = variantRecords
.filter((v: any) => v && typeof v.filename === "string")
.map((v: any) => ({ v: v.v || 1, path: path.join(chatDir!, v.filename) }));
const pictures: any[] = Array.isArray((st as any)?.pictures) ? (st as any).pictures : [];
const imageRecords: any[] = Array.isArray(st?.images) ? st.images : [];
const pref = parsePrefixedNotation(rawCanvas);
if (!pref) throw new Error(`Invalid canvas notation: ${rawCanvas}`);
if (pref.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({ chatId: currentLmChatId || undefined, explicitAttachmentSource: true, attachmentIndex: pref.index });
if (!(lm as any)?.ok || !(lm as any).buffer) throw new Error(`Attachment a${pref.index} not found.`);
rawBuf = (lm as any).buffer as Buffer;
} else if (pref.pool === "image") {
const found = imageRecords.find((r: any) => r?.i === pref.index);
if (!found) throw new Error(`Image i${pref.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(found.filename)));
} else if (pref.pool === "variant") {
const found = variantPaths.find((v) => v.v === pref.index);
if (!found) throw new Error(`Variant v${pref.index} not found.`);
rawBuf = await fs.promises.readFile(found.path);
} else {
const found = pictures.find((p: any) => p?.p === pref.index);
if (!found) throw new Error(`Picture p${pref.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(found.filename || "")));
}
// Resolve crop coordinates from mask/crop record or detectLabel(s).
const labels: string[] = input.detectLabel ?? [];
const indices: number[] = input.detectIndex ?? [];
if (labels.length > 0) {
const boxes: Array<{ left: number; top: number; right: number; bottom: number }> = [];
let resolvedSrcCanvas: string | undefined = undefined;
for (let li = 0; li < labels.length; li++) {
const label = labels[li];
const detectIdx = indices[li] !== undefined ? indices[li] : (indices[0] !== undefined ? indices[0] : 0);
const detectResult = resolveDetectionCrop(
imageRecords,
rawCanvas,
label,
detectIdx,
input.frameAdjust
);
if (!detectResult.ok) throw new Error(detectResult.error);
boxes.push({
left: detectResult.cropLeft,
top: detectResult.cropTop,
right: detectResult.cropRight,
bottom: detectResult.cropBottom,
});
if (detectResult.canvasWasDetectRecord && detectResult.srcCanvas) {
if (resolvedSrcCanvas !== undefined && resolvedSrcCanvas !== detectResult.srcCanvas) {
throw new Error(
`Mixed sources — detectLabel entries resolve to different source images ` +
`('${resolvedSrcCanvas}' vs '${detectResult.srcCanvas}'). ` +
`All labels must refer to detections on the same source image.`
);
}
resolvedSrcCanvas = detectResult.srcCanvas;
}
}
if (resolvedSrcCanvas) {
const srcRef = parsePrefixedNotation(resolvedSrcCanvas);
if (srcRef) {
if (srcRef.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({ chatId: currentLmChatId || undefined, explicitAttachmentSource: true, attachmentIndex: srcRef.index });
if ((lm as any)?.ok && (lm as any).buffer) rawBuf = (lm as any).buffer as Buffer;
} else if (srcRef.pool === "image") {
const srcRec = imageRecords.find((r: any) => r?.i === srcRef.index);
if (srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename)));
} else if (srcRef.pool === "variant") {
const srcPath = variantPaths.find((v) => v.v === srcRef.index)?.path;
if (srcPath) rawBuf = await fs.promises.readFile(srcPath);
} else {
const srcRec = pictures.find((p: any) => p?.p === srcRef.index);
if (srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename || "")));
}
}
}
resolvedBBoxes = boxes;
} else if (pref.pool === "image") {
const found = imageRecords.find((r: any) => r?.i === pref.index);
if (found) {
if (Array.isArray(found.detections) && found.detections.length > 1) {
const ambiguousLabels = [...new Set((found.detections as any[]).map((d: any) => d?.label).filter(Boolean))].join(", ");
throw new Error(
`Ambiguous — canvas '${rawCanvas}' contains ${found.detections.length} detected regions. ` +
`Specify detectLabel to select one. Available labels: ${ambiguousLabels || "(none)"}.`
);
}
const hasCropFields =
typeof found.cropLeft === "number" || typeof found.cropTop === "number" ||
typeof found.cropRight === "number" || typeof found.cropBottom === "number";
if (hasCropFields) {
resolvedBBoxes = [{ left: found.cropLeft ?? 0, top: found.cropTop ?? 0, right: found.cropRight ?? 0, bottom: found.cropBottom ?? 0 }];
if (typeof found.cropSource === "string") {
const srcRef = parsePrefixedNotation(found.cropSource);
if (srcRef) {
if (srcRef.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({ chatId: currentLmChatId || undefined, explicitAttachmentSource: true, attachmentIndex: srcRef.index });
if ((lm as any)?.ok && (lm as any).buffer) rawBuf = (lm as any).buffer as Buffer;
} else if (srcRef.pool === "image") {
const srcRec = imageRecords.find((r: any) => r?.i === srcRef.index);
if (srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename)));
} else if (srcRef.pool === "variant") {
const srcPath = variantPaths.find((v) => v.v === srcRef.index)?.path;
if (srcPath) rawBuf = await fs.promises.readFile(srcPath);
} else {
const srcRec = pictures.find((p: any) => p?.p === srcRef.index);
if (srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename || "")));
}
}
}
}
}
}
} catch (e) {
return { content: [{ type: "text", text: String((e as any)?.message || e) }], isError: true as const };
}
const canvasSize = await imgGetSize(rawBuf);
const adjW = canvasSize.width;
const adjH = canvasSize.height;
// Build inpaint mask: white (0xffffffff) inside each BBox, black (0x000000ff) outside.
const Jimp = (await import("jimp")).Jimp;
const mask = new Jimp({ width: adjW, height: adjH, color: 0x000000ff });
for (const bbox of resolvedBBoxes) {
const bx1 = Math.round(bbox.left / 100 * adjW);
const bx2 = Math.round((100 - bbox.right) / 100 * adjW);
const by1 = Math.round(bbox.top / 100 * adjH);
const by2 = Math.round((100 - bbox.bottom) / 100 * adjH);
mask.scan(bx1, by1, Math.max(0, bx2 - bx1), Math.max(0, by2 - by1), (_x: number, _y: number, idx: number) => {
mask.bitmap.data[idx] = 255;
mask.bitmap.data[idx + 1] = 255;
mask.bitmap.data[idx + 2] = 255;
mask.bitmap.data[idx + 3] = 255;
});
}
// DEBUG mask save:
// await fs.promises.writeFile(path.join(logsDir, `debug-mask-${Date.now()}.png`), await mask.getBuffer("image/png"));
mask.blur(2);
const maskBuf = await mask.getBuffer("image/png");
const mergedParams: Record<string, unknown> = {
mode: "edit",
prompt: input.prompt ?? "",
width: adjW,
height: adjH,
};
if (input.quality !== undefined) mergedParams.quality = input.quality;
if (input.model !== undefined) mergedParams.model = input.model;
if (input.canvas !== undefined) mergedParams.canvas = input.canvas;
return handleGenerateImage(mergedParams, onProgress, {
presuppliedSourceBuf: rawBuf,
sourceTag: "canvas:inpaint",
maskBuf,
});
}
// ─────────────────────────────────────────────────────────────────────────────
// handleOutpaint: load canvas, build black-on-white mask
// (white = region outside image = area to extend), call handleGenerateImage in
// edit mode with sourceTag canvas:outpaint.
// ─────────────────────────────────────────────────────────────────────────────
export async function handleOutpaint(
pluginParams: any,
onProgress?: ProgressCallback
): Promise<any> {
const parsed = OutpaintToolSchemaStrict.safeParse(pluginParams || {});
if (!parsed.success) {
return {
content: [{ type: "text", text: `Invalid outpaint parameters: ${formatZodError(parsed.error)}` }],
};
}
const input = parsed.data as any;
let rawBuf: Buffer;
let resolvedBBoxes: Array<{ left: number; top: number; right: number; bottom: number }> = [{ left: 0, top: 0, right: 0, bottom: 0 }];
let chatDir: string | undefined;
try {
let currentLmChatId: string | null = null;
let currentLmWorkingDir: string | null = null;
try {
const ctx = await getActiveChatContext();
if ((ctx as any)?.chatId) currentLmChatId = (ctx as any).chatId;
if ((ctx as any)?.workingDir) currentLmWorkingDir = (ctx as any).workingDir;
} catch {}
if (!currentLmChatId) {
try { const _r = await resolveActiveLMStudioChatId(); if ((_r as any)?.ok) currentLmChatId = (_r as any).chatId; } catch {}
}
chatDir =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : undefined);
const rawCanvas = typeof input.canvas === "string" ? input.canvas : undefined;
if (!rawCanvas || !chatDir) throw new Error("canvas is required for outpaint");
const st: any = await readState(chatDir);
const variantRecords: any[] = Array.isArray(st?.variants) ? st.variants : [];
const variantPaths: Array<{ v: number; path: string }> = variantRecords
.filter((v: any) => v && typeof v.filename === "string")
.map((v: any) => ({ v: v.v || 1, path: path.join(chatDir!, v.filename) }));
const pictures: any[] = Array.isArray((st as any)?.pictures) ? (st as any).pictures : [];
const imageRecords: any[] = Array.isArray(st?.images) ? st.images : [];
const pref = parsePrefixedNotation(rawCanvas);
if (!pref) throw new Error(`Invalid canvas notation: ${rawCanvas}`);
if (pref.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({ chatId: currentLmChatId || undefined, explicitAttachmentSource: true, attachmentIndex: pref.index });
if (!(lm as any)?.ok || !(lm as any).buffer) throw new Error(`Attachment a${pref.index} not found.`);
rawBuf = (lm as any).buffer as Buffer;
} else if (pref.pool === "image") {
const found = imageRecords.find((r: any) => r?.i === pref.index);
if (!found) throw new Error(`Image i${pref.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(found.filename)));
} else if (pref.pool === "variant") {
const found = variantPaths.find((v) => v.v === pref.index);
if (!found) throw new Error(`Variant v${pref.index} not found.`);
rawBuf = await fs.promises.readFile(found.path);
} else {
const found = pictures.find((p: any) => p?.p === pref.index);
if (!found) throw new Error(`Picture p${pref.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(found.filename || "")));
}
// Resolve crop coordinates from mask/crop record or detectLabel(s).
const labels: string[] = input.detectLabel ?? [];
const indices: number[] = input.detectIndex ?? [];
if (labels.length > 0) {
const boxes: Array<{ left: number; top: number; right: number; bottom: number }> = [];
let resolvedSrcCanvas: string | undefined = undefined;
for (let li = 0; li < labels.length; li++) {
const label = labels[li];
const detectIdx = indices[li] !== undefined ? indices[li] : (indices[0] !== undefined ? indices[0] : 0);
const detectResult = resolveDetectionCrop(
imageRecords,
rawCanvas,
label,
detectIdx,
input.frameAdjust
);
if (!detectResult.ok) throw new Error(detectResult.error);
boxes.push({
left: detectResult.cropLeft,
top: detectResult.cropTop,
right: detectResult.cropRight,
bottom: detectResult.cropBottom,
});
if (detectResult.canvasWasDetectRecord && detectResult.srcCanvas) {
if (resolvedSrcCanvas !== undefined && resolvedSrcCanvas !== detectResult.srcCanvas) {
throw new Error(
`Mixed sources — detectLabel entries resolve to different source images ` +
`('${resolvedSrcCanvas}' vs '${detectResult.srcCanvas}'). ` +
`All labels must refer to detections on the same source image.`
);
}
resolvedSrcCanvas = detectResult.srcCanvas;
}
}
if (resolvedSrcCanvas) {
const srcRef = parsePrefixedNotation(resolvedSrcCanvas);
if (srcRef) {
if (srcRef.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({ chatId: currentLmChatId || undefined, explicitAttachmentSource: true, attachmentIndex: srcRef.index });
if ((lm as any)?.ok && (lm as any).buffer) rawBuf = (lm as any).buffer as Buffer;
} else if (srcRef.pool === "image") {
const srcRec = imageRecords.find((r: any) => r?.i === srcRef.index);
if (srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename)));
} else if (srcRef.pool === "variant") {
const srcPath = variantPaths.find((v) => v.v === srcRef.index)?.path;
if (srcPath) rawBuf = await fs.promises.readFile(srcPath);
} else {
const srcRec = pictures.find((p: any) => p?.p === srcRef.index);
if (srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename || "")));
}
}
}
resolvedBBoxes = boxes;
} else if (pref.pool === "image") {
const found = imageRecords.find((r: any) => r?.i === pref.index);
if (found) {
if (Array.isArray(found.detections) && found.detections.length > 1) {
const ambiguousLabels = [...new Set((found.detections as any[]).map((d: any) => d?.label).filter(Boolean))].join(", ");
throw new Error(
`Ambiguous — canvas '${rawCanvas}' contains ${found.detections.length} detected regions. ` +
`Specify detectLabel to select one. Available labels: ${ambiguousLabels || "(none)"}.`
);
}
const hasCropFields =
typeof found.cropLeft === "number" || typeof found.cropTop === "number" ||
typeof found.cropRight === "number" || typeof found.cropBottom === "number";
if (hasCropFields) {
resolvedBBoxes = [{ left: found.cropLeft ?? 0, top: found.cropTop ?? 0, right: found.cropRight ?? 0, bottom: found.cropBottom ?? 0 }];
if (typeof found.cropSource === "string") {
const srcRef = parsePrefixedNotation(found.cropSource);
if (srcRef) {
if (srcRef.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({ chatId: currentLmChatId || undefined, explicitAttachmentSource: true, attachmentIndex: srcRef.index });
if ((lm as any)?.ok && (lm as any).buffer) rawBuf = (lm as any).buffer as Buffer;
} else if (srcRef.pool === "image") {
const srcRec = imageRecords.find((r: any) => r?.i === srcRef.index);
if (srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename)));
} else if (srcRef.pool === "variant") {
const srcPath = variantPaths.find((v) => v.v === srcRef.index)?.path;
if (srcPath) rawBuf = await fs.promises.readFile(srcPath);
} else {
const srcRec = pictures.find((p: any) => p?.p === srcRef.index);
if (srcRec?.filename) rawBuf = await fs.promises.readFile(path.join(chatDir, String(srcRec.filename || "")));
}
}
}
}
}
}
} catch (e) {
return { content: [{ type: "text", text: String((e as any)?.message || e) }], isError: true as const };
}
const canvasSize = await imgGetSize(rawBuf);
const adjW = canvasSize.width;
const adjH = canvasSize.height;
// Build outpaint mask: black (0x000000ff) inside each BBox (preserve), white (0xffffffff) outside (repaint).
const Jimp = (await import("jimp")).Jimp;
const mask = new Jimp({ width: adjW, height: adjH, color: 0xffffffff });
for (const bbox of resolvedBBoxes) {
const bx1 = Math.round(bbox.left / 100 * adjW);
const bx2 = Math.round((100 - bbox.right) / 100 * adjW);
const by1 = Math.round(bbox.top / 100 * adjH);
const by2 = Math.round((100 - bbox.bottom) / 100 * adjH);
mask.scan(bx1, by1, Math.max(0, bx2 - bx1), Math.max(0, by2 - by1), (_x: number, _y: number, idx: number) => {
mask.bitmap.data[idx] = 0;
mask.bitmap.data[idx + 1] = 0;
mask.bitmap.data[idx + 2] = 0;
mask.bitmap.data[idx + 3] = 255;
});
}
// DEBUG mask save:
// await fs.promises.writeFile(path.join(logsDir, `debug-mask-${Date.now()}.png`), await mask.getBuffer("image/png"));
mask.blur(2);
const maskBuf = await mask.getBuffer("image/png");
const mergedParams: Record<string, unknown> = {
mode: "edit",
prompt: input.prompt ?? "",
width: adjW,
height: adjH,
};
if (input.quality !== undefined) mergedParams.quality = input.quality;
if (input.model !== undefined) mergedParams.model = input.model;
if (input.canvas !== undefined) mergedParams.canvas = input.canvas;
return handleGenerateImage(mergedParams, onProgress, {
presuppliedSourceBuf: rawBuf,
sourceTag: "canvas:outpaint",
maskBuf,
});
}
// handleRefine: image2image with refine overlay, no dimension override
// ─────────────────────────────────────────────────────────────────────────────
export async function handleRefine(
pluginParams: any,
onProgress?: ProgressCallback
): Promise<any> {
const parsed = RefineToolSchemaStrict.safeParse(pluginParams || {});
if (!parsed.success) {
return {
content: [
{
type: "text",
text: `Invalid refine parameters: ${formatZodError(parsed.error)}`,
},
],
};
}
const input = parsed.data as any;
// Load canvas buffer
let rawBuf: Buffer;
try {
let currentLmChatId: string | null = null;
let currentLmWorkingDir: string | null = null;
try {
const ctx = await getActiveChatContext();
if ((ctx as any)?.chatId) currentLmChatId = (ctx as any).chatId;
if ((ctx as any)?.workingDir) currentLmWorkingDir = (ctx as any).workingDir;
} catch {}
if (!currentLmChatId) {
try { const _r = await resolveActiveLMStudioChatId(); if ((_r as any)?.ok) currentLmChatId = (_r as any).chatId; } catch {}
}
const chatDir =
currentLmWorkingDir ||
(currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : undefined);
const rawCanvas = typeof input.canvas === "string" ? input.canvas : undefined;
if (rawCanvas && chatDir) {
const st: any = await readState(chatDir);
const variantRecords: any[] = Array.isArray(st?.variants) ? st.variants : [];
const variantPaths: Array<{ v: number; path: string }> = variantRecords
.filter((v: any) => v && typeof v.filename === "string")
.map((v: any) => ({ v: v.v || 1, path: path.join(chatDir, v.filename) }));
const pictures: any[] = Array.isArray((st as any)?.pictures) ? (st as any).pictures : [];
const imageRecords: any[] = Array.isArray(st?.images) ? st.images : [];
const pref = parsePrefixedNotation(rawCanvas);
if (!pref) throw new Error(`Invalid canvas notation: ${rawCanvas}`);
if (pref.pool === "attachment") {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: currentLmChatId || undefined,
explicitAttachmentSource: true,
attachmentIndex: pref.index,
});
if (!(lm as any)?.ok || !(lm as any).buffer) throw new Error(`Attachment a${pref.index} not found.`);
rawBuf = (lm as any).buffer as Buffer;
} else if (pref.pool === "image") {
const found = imageRecords.find((r: any) => r?.i === pref.index);
if (!found) throw new Error(`Image i${pref.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(found.filename)));
} else if (pref.pool === "variant") {
const found = variantPaths.find((v) => v.v === pref.index);
if (!found) throw new Error(`Variant v${pref.index} not found.`);
rawBuf = await fs.promises.readFile(found.path);
} else {
// picture pool (pN)
const found = pictures.find((p: any) => p?.p === pref.index);
if (!found) throw new Error(`Picture p${pref.index} not found.`);
rawBuf = await fs.promises.readFile(path.join(chatDir, String(found.filename || "")));
}
} else {
const lm = await resolveImg2ImgSourceLMStudio({
chatId: input.canvas ? undefined : undefined,
explicitAttachmentSource: true,
});
if (!(lm as any)?.ok || !(lm as any).buffer) throw new Error("No source image available.");
rawBuf = (lm as any).buffer as Buffer;
}
} catch (e) {
return {
content: [{ type: "text", text: String((e as any)?.message || e) }],
isError: true as const,
};
}
const mergedParams: Record<string, unknown> = {
mode: "image2image",
model: input.model,
};
if (input.canvas !== undefined) mergedParams.canvas = input.canvas;
if (input.width !== undefined) mergedParams.width = input.width;
if (input.height !== undefined) mergedParams.height = input.height;
if (input.imageFormat !== undefined) mergedParams.imageFormat = input.imageFormat;
return handleGenerateImage(mergedParams, onProgress, { presuppliedSourceBuf: rawBuf, sourceTag: "canvas:refine" });
}