import { tool, type Tool, type ToolsProviderController, type ToolCallContext } from "@lmstudio/sdk";
import { z } from "zod";
import path from "path";
import fs from "fs";
import { pathToFileURL } from "url";
import {
  syncAttachmentsToState,
  getActiveChatContext,
  resolveActiveLMStudioChatId,
  getLMStudioWorkingDir,
  readState,
  writeStateAtomic,
  generatePreviewFromBuffer,
  appendImages,
  getHealthyServerBaseUrl,
  toHttpOriginalUrl,
  toHttpPreviewUrl,
  buildAuditLogger,
  getSelfPluginIdentifier,
  VARIANT_FULL_CONFIG,
} from "../core-bundle.mjs";

async function drawBboxesOnImage(
  buffer: Buffer,
  bboxes: [number, number, number, number][],
  sourceDims?: { width: number; height: number }
): Promise<Buffer> {
  const _require = typeof require !== "undefined" ? require : (await import("module")).createRequire(__filename);
  const jimpMod: any = _require("jimp");
  const Jimp: any = jimpMod.Jimp ?? jimpMod.default ?? jimpMod;
  if (!Jimp || typeof Jimp.read !== "function") {
    throw new Error("drawBboxesOnImage: Jimp.read not available");
  }

  const img: any = await Jimp.read(buffer);
  const imgW: number =
    typeof img.getWidth === "function" ? img.getWidth() :
    typeof img.width === "number" ? img.width :
    img.bitmap?.width || 0;
  const imgH: number =
    typeof img.getHeight === "function" ? img.getHeight() :
    typeof img.height === "number" ? img.height :
    img.bitmap?.height || 0;

  // Scale factors: convert bbox coords from sourceDims space to this image's space.
  // When sourceDims matches the image (or is omitted), scaleX/scaleY = 1 (no change).
  const scaleX = (sourceDims && sourceDims.width > 0) ? imgW / sourceDims.width : 1;
  const scaleY = (sourceDims && sourceDims.height > 0) ? imgH / sourceDims.height : 1;

  const palette: [number, number, number, number][] = [
    [0xff, 0x3b, 0x30, 0xff],
    [0x34, 0xc7, 0x59, 0xff],
    [0x00, 0x7a, 0xff, 0xff],
    [0xff, 0x9f, 0x0a, 0xff],
    [0xbf, 0x5a, 0xf2, 0xff],
    [0xff, 0xd6, 0x0a, 0xff],
  ];
  const thickness = 2;

  for (let bi = 0; bi < bboxes.length; bi++) {
    const [r, g, b, a] = palette[bi % palette.length];
    const colorInt = (((r & 0xff) << 24) | ((g & 0xff) << 16) | ((b & 0xff) << 8) | (a & 0xff)) >>> 0;
    const [bx1, by1, bx2, by2] = bboxes[bi];
    const x1raw = bx1 * scaleX;
    const y1raw = by1 * scaleY;
    const x2raw = bx2 * scaleX;
    const y2raw = by2 * scaleY;
    const x1 = Math.max(0, Math.min(imgW - 1, Math.round(x1raw)));
    const y1 = Math.max(0, Math.min(imgH - 1, Math.round(y1raw)));
    const x2 = Math.max(0, Math.min(imgW - 1, Math.round(x2raw)));
    const y2 = Math.max(0, Math.min(imgH - 1, Math.round(y2raw)));
    for (let t = 0; t < thickness; t++) {
      for (let x = x1; x <= x2; x++) {
        if (y1 + t < imgH) img.setPixelColor(colorInt, x, y1 + t);
        if (y2 - t >= 0) img.setPixelColor(colorInt, x, y2 - t);
      }
      for (let y = y1; y <= y2; y++) {
        if (x1 + t < imgW) img.setPixelColor(colorInt, x1 + t, y);
        if (x2 - t >= 0) img.setPixelColor(colorInt, x2 - t, y);
      }
    }
  }

  const bufResult: any =
    typeof img.getBufferAsync === "function"
      ? img.getBufferAsync("image/png")
      : img.getBuffer("image/png");

  if (bufResult && typeof bufResult.then === "function") {
    return bufResult as Promise<Buffer>;
  }

  // Legacy Jimp v0 callback style
  return new Promise<Buffer>((resolve, reject) =>
    img.getBuffer("image/png", (err: any, data: Buffer) =>
      err ? reject(err) : resolve(data)
    )
  );
}
import {
  analyzeMlxDetectionBatch,
  type MlxDetectionAnalyzerConfig,
  type MlxAnalysisItem,
} from "../services/mlxVisionAnalyzer.js";
import { FLORENCE2_MODEL_PATH, DETECT_ENDPOINT } from "../config.js";
import { ensureDetectServerRunning } from "../detect-server-manager.js";

function isoStampCompact(): string {
  const d = new Date();
  const year = d.getUTCFullYear();
  const month = String(d.getUTCMonth() + 1).padStart(2, "0");
  const day = String(d.getUTCDate()).padStart(2, "0");
  const hours = String(d.getUTCHours()).padStart(2, "0");
  const minutes = String(d.getUTCMinutes()).padStart(2, "0");
  const seconds = String(d.getUTCSeconds()).padStart(2, "0");
  const millis = String(d.getUTCMilliseconds()).padStart(3, "0");
  return `${year}${month}${day}T${hours}${minutes}${seconds}${millis}Z`;
}

function parsePrefixedNotation(
  s: string
): { pool: "attachment" | "image" | "variant" | "picture"; index: number } | null {
  const t = String(s || "").trim().toLowerCase();
  const m = t.match(/^([avip])(\d+)$/);
  if (!m) return null;
  const idx = Math.max(1, parseInt(m[2], 10));
  const pool =
    m[1] === "a" ? "attachment" : m[1] === "v" ? "variant" : m[1] === "i" ? "image" : "picture";
  return { pool, index: idx };
}

function formatPluginMeta(): string {
  try {
    const cwd = process.cwd();
    const pkg = JSON.parse(fs.readFileSync(path.join(cwd, "package.json"), "utf-8"));
    const mf = JSON.parse(fs.readFileSync(path.join(cwd, "manifest.json"), "utf-8"));
    const id = mf?.owner && mf?.name ? `${mf.owner}/${mf.name}` : pkg?.name || "ceveyne/analyse-image";
    return `Plugin-Identifier: ${id}\nPlugin version: ${pkg?.version || ""}`;
  } catch {
    return "Plugin-Identifier: ceveyne/analyse-image";
  }
}

/**
 * Flexible targets schema: accepts array OR comma/space-separated string.
 * Analogous to analyse_image targets — supports single or batch detection.
 */
const FlexibleTargetsList = z
  .union([
    z.string().transform((s) =>
      s
        .split(/[\s,]+/)
        .map((x) => x.trim())
        .filter((x) => x.length > 0)
    ),
    z.array(z.string()),
  ])
  .refine((arr) => arr.length >= 1, "targets must contain at least one notation")
  .refine((arr) => arr.length <= 16, "targets must contain at most 16 notations");

const DetectObjectParamsShape = {
  targets: FlexibleTargetsList.optional().describe(
    "One or more source image notations (e.g. ['a1','i3'] or 'a1, i3'). " +
      "Omit when there is exactly one image — it will be selected automatically."
  ),
  task: z
    .string()
    .optional()
    .default("<OD>")
    .describe("Florence-2 detection task token. Default: '<OD>' (object detection)."),
} satisfies Record<string, z.ZodTypeAny>;

export function createDetectObjectTool(ctl: ToolsProviderController): Tool {
  return tool({
    name: "detect_object",
    description: `Detect objects in one or more images and draw colored bounding boxes on each result.

For each source image, returns a new annotated image (saved as iN) with bounding boxes for each detected object, plus a JSON summary with labels, coordinates, and crop percentages. The active backend is configured in plugin settings (Florence-2 or Qwen3-VL).

Parameters:
- targets: One or more image notations (e.g. ['a1','i3'] or 'a1, i3'). Accepts array or comma-separated string. Omit when there is exactly one image.
- task: What to detect.
  - Florence-2 backend — use task tokens: '<OD>' (generic objects with labels), '<DENSE_REGION_CAPTION>' (richer captions per region), '<REGION_PROPOSAL>' (regions without labels), '<OPEN_VOCABULARY_DETECTION>dog' (specific concept, replace 'dog').
  - Qwen3-VL backend — use natural language: e.g. 'Detect all faces and hands' or 'Find the cat and the laptop'. Omit for full-image general detection.

${formatPluginMeta()}`,
    parameters: DetectObjectParamsShape,
    implementation: async (args: any, ctx: ToolCallContext) => {
      try {
        // Parse targets: accept array or comma/space-separated string (analogous to analyse_image)
        let rawTargets: string[] = [];
        if (Array.isArray(args?.targets)) {
          rawTargets = args.targets.map((s: any) => String(s).trim()).filter(Boolean);
        } else if (typeof args?.targets === "string" && args.targets.trim()) {
          rawTargets = args.targets.trim().split(/[\s,]+/).map((s: string) => s.trim()).filter(Boolean);
        }

        const task =
          typeof args?.task === "string" && args.task.trim() ? args.task.trim() : "<OD>";

        console.log("[detect_object] invoked", { targets: rawTargets, task });

        let currentLmChatId: string | null = null;
        let currentLmWorkingDir: string | null = null;
        try {
          const chatCtx = await getActiveChatContext();
          if ((chatCtx as any)?.chatId) currentLmChatId = (chatCtx as any).chatId;
          if ((chatCtx as any)?.workingDir) currentLmWorkingDir = (chatCtx as any).workingDir;
        } catch {}
        if (!currentLmChatId) {
          try {
            const resolved = await resolveActiveLMStudioChatId();
            if ((resolved as any)?.ok) currentLmChatId = (resolved as any).chatId;
          } catch {}
        }

        const primaryOutDir: string | undefined =
          currentLmWorkingDir ||
          (currentLmChatId ? getLMStudioWorkingDir(currentLmChatId) : undefined);
        if (!primaryOutDir) {
          console.error("[detect_object] could not resolve working directory");
          return {
            content: [
              {
                type: "text",
                text: "detect_object failed: could not resolve LM Studio chat working directory.",
              },
            ],
            isError: true as const,
          };
        }
        console.log("[detect_object] primaryOutDir:", primaryOutDir);
        await fs.promises.mkdir(primaryOutDir, { recursive: true }).catch(() => {});

        // Sync attachments so state is up to date
        console.log("[detect_object] syncing attachments...");
        try {
          await syncAttachmentsToState(primaryOutDir, false, Number.MAX_SAFE_INTEGER);
        } catch (e) {
          console.warn("[detect_object] attachment sync failed (non-fatal):", (e as any)?.message ?? e);
        }
        console.log("[detect_object] attachment sync done");

        console.log("[detect_object] reading state...");
        const st = await readState(primaryOutDir);
        const attachments: any[] = Array.isArray(st?.attachments) ? st.attachments : [];
        const pictures: any[] = Array.isArray(st?.pictures) ? st.pictures : [];
        const imageRecords: any[] = Array.isArray(st?.images) ? st.images : [];
        const images: Array<{ i: number; path: string }> = imageRecords
          .filter((r: any) => r && typeof r.filename === "string")
          .sort((a: any, b: any) => (a.i || 0) - (b.i || 0))
          .map((r: any) => ({ i: r.i || 1, path: path.join(primaryOutDir, r.filename) }));
        const variantRecords: any[] = Array.isArray(st?.variants) ? st.variants : [];
        const variants: Array<{ v: number; path: string }> = variantRecords
          .filter((v: any) => v && typeof v.filename === "string")
          .map((v: any) => ({ v: v.v || 1, path: path.join(primaryOutDir, v.filename) }));

        console.log("[detect_object] state:", {
          attachments: attachments.length,
          images: images.length,
          variants: variants.length,
          pictures: pictures.length,
        });

        // Resolve source buffers for each target (or auto-select if none given)
        type SourceEntry = { id: string; buf: Buffer; origBuf: Buffer };
        const sourceEntries: SourceEntry[] = [];

        async function resolveOneBuf(rawCanvas: string): Promise<Buffer> {
          const pref = parsePrefixedNotation(rawCanvas);
          if (!pref) throw new Error(`Invalid canvas notation: ${rawCanvas}`);
          if (pref.pool === "attachment") {
            const rec = attachments.find((a: any) => a?.a === pref.index);
            const previewRel = rec && typeof rec.preview === "string" ? rec.preview : "";
            if (!previewRel) throw new Error(`Preview for attachment a${pref.index} not found.`);
            return fs.promises.readFile(path.join(primaryOutDir!, previewRel));
          } else if (pref.pool === "image") {
            const rec = imageRecords.find((r: any) => r?.i === pref.index);
            const previewRel = rec && typeof rec.preview === "string" ? rec.preview : "";
            if (!previewRel) throw new Error(`Preview for image i${pref.index} not found.`);
            return fs.promises.readFile(path.join(primaryOutDir!, previewRel));
          } else if (pref.pool === "variant") {
            const rec = variantRecords.find((v: any) => v?.v === pref.index);
            const previewRel = rec && typeof rec.preview === "string" ? rec.preview : "";
            if (!previewRel) throw new Error(`Preview for variant v${pref.index} not found.`);
            return fs.promises.readFile(path.join(primaryOutDir!, previewRel));
          } else {
            const rec = pictures.find((p: any) => p?.p === pref.index);
            const previewRel = rec && typeof rec.preview === "string" ? rec.preview : "";
            if (!previewRel) throw new Error(`Preview for picture p${pref.index} not found.`);
            return fs.promises.readFile(path.join(primaryOutDir!, previewRel));
          }
        }

        // Resolves the original (full-resolution) file for a canvas notation.
        // Falls back to previewFallback when the original is unavailable.
        async function resolveOriginalBuf(rawCanvas: string, previewFallback: Buffer): Promise<Buffer> {
          try {
            const pref = parsePrefixedNotation(rawCanvas);
            if (!pref) return previewFallback;
            if (pref.pool === "attachment") {
              const rec = attachments.find((a: any) => a?.a === pref.index);
              const originAbs = rec && typeof rec.originAbs === "string" ? rec.originAbs : "";
              if (!originAbs) return previewFallback;
              return await fs.promises.readFile(originAbs);
            }
            let rec: any;
            if (pref.pool === "image") rec = imageRecords.find((r: any) => r?.i === pref.index);
            else if (pref.pool === "variant") rec = variantRecords.find((v: any) => v?.v === pref.index);
            else rec = pictures.find((p: any) => p?.p === pref.index);
            const filename = rec && typeof rec.filename === "string" ? rec.filename : "";
            if (!filename) return previewFallback;
            return await fs.promises.readFile(path.join(primaryOutDir!, filename));
          } catch {
            return previewFallback;
          }
        }

        try {
          if (rawTargets.length > 0) {
            for (const t of rawTargets) {
              const buf = await resolveOneBuf(t);
              const origBuf = await resolveOriginalBuf(t, buf);
              sourceEntries.push({ id: t, buf, origBuf });
            }
          } else {
            // Auto-select single source
            const total = attachments.length + variantRecords.length + imageRecords.length + pictures.length;
            if (total === 0) throw new Error("No source image available.");
            if (total > 1) throw new Error("Ambiguous source — specify targets explicitly.");
            let buf: Buffer;
            let id: string;
            if (attachments.length === 1) {
              const rec = attachments[0];
              const previewRel = rec && typeof rec.preview === "string" ? rec.preview : "";
              if (!previewRel) throw new Error("Preview for attachment not found.");
              buf = await fs.promises.readFile(path.join(primaryOutDir!, previewRel));
              id = `a${typeof rec.a === "number" ? rec.a : 1}`;
            } else if (variantRecords.length === 1) {
              const rec = variantRecords[0];
              const previewRel = rec && typeof rec.preview === "string" ? rec.preview : "";
              if (!previewRel) throw new Error("Preview for variant not found.");
              buf = await fs.promises.readFile(path.join(primaryOutDir!, previewRel));
              id = `v${typeof rec.v === "number" ? rec.v : 1}`;
            } else if (imageRecords.length === 1) {
              const rec = imageRecords[0];
              const previewRel = rec && typeof rec.preview === "string" ? rec.preview : "";
              if (!previewRel) throw new Error("Preview for image not found.");
              buf = await fs.promises.readFile(path.join(primaryOutDir!, previewRel));
              id = `i${typeof rec.i === "number" ? rec.i : 1}`;
            } else {
              const rec = pictures[0];
              const previewRel = rec && typeof rec.preview === "string" ? rec.preview : "";
              if (!previewRel) throw new Error("Preview for picture not found.");
              buf = await fs.promises.readFile(path.join(primaryOutDir!, previewRel));
              id = `p${rec.p ?? 1}`;
            }
            const origBuf = await resolveOriginalBuf(id, buf);
            sourceEntries.push({ id, buf, origBuf });
          }
        } catch (e) {
          return {
            content: [{ type: "text", text: String((e as any)?.message || e) }],
            isError: true as const,
          };
        }

        console.log("[detect_object] sources resolved:", sourceEntries.map((s) => s.id));

        // Start managed server unless TTL=0 (external server mode)
        const serverTTL = parseInt(process.env.SERVER_TTL ?? "1440", 10);
        if (serverTTL !== 0) {
          try {
            await ensureDetectServerRunning(
              {
                port: parseInt(process.env.MLX_VISION_PORT ?? "8765", 10),
                mlxVisionModelPath: process.env.MLX_VISION_MODEL_PATH ?? "",
                mlxVisionEnabled: process.env.MLX_VISION_ENABLED !== "false",
                florence2ModelPath: process.env.FLORENCE2_MODEL_PATH || process.env.DETECT_MODEL_PATH || "",
                backend: process.env.FASTVLM_BACKEND || "mlx",
                maxTokens: process.env.MLX_VISION_MAX_TOKENS ? parseInt(process.env.MLX_VISION_MAX_TOKENS, 10) : undefined,
                temperature: process.env.MLX_VISION_TEMPERATURE ? parseFloat(process.env.MLX_VISION_TEMPERATURE) : undefined,
                detectBackend: process.env.FASTVLM_DETECT_BACKEND || "florence2",
                qwen3VlModelPath: process.env.FASTVLM_QWEN3_VL_MODEL_PATH || "",
              },
              (msg) => { try { ctx.status(msg); } catch {} }
            );
          } catch (e) {
            throw new Error(`Failed to start detection server: ${(e as Error).message || String(e)}`);
          }
        }

        try {
          ctx.status(`Detecting objects in ${sourceEntries.length} image${sourceEntries.length === 1 ? "" : "s"}…`);
        } catch {}

        // Write source buffers to temp files and build detection items
        const detectPort = parseInt(process.env.MLX_VISION_PORT ?? "8765", 10);
        const detectionConfig: MlxDetectionAnalyzerConfig = {
          endpoint: process.env.DETECT_ENDPOINT || DETECT_ENDPOINT || `http://localhost:${detectPort}/detect`,
          task,
          florence2ModelPath: process.env.FLORENCE2_MODEL_PATH || FLORENCE2_MODEL_PATH,
          timeoutMs: 120_000,
        };

        const tmpPaths: string[] = [];
        const detectionItems: MlxAnalysisItem[] = [];
        for (const entry of sourceEntries) {
          const tmpPath = path.join(primaryOutDir, `_tmp_detect_src_${entry.id}_${Date.now()}.png`);
          await fs.promises.writeFile(tmpPath, entry.buf);
          tmpPaths.push(tmpPath);
          detectionItems.push({ id: entry.id, filePath: tmpPath });
        }

        console.log("[detect_object] calling detection API for", detectionItems.length, "items");

        let batchResult;
        try {
          batchResult = await analyzeMlxDetectionBatch(detectionItems, detectionConfig);
          console.log("[detect_object] detection API returned:", {
            results: batchResult.results.length,
            totalMs: batchResult.totalInferenceTimeMs,
          });
          try {
            const totalObjects = batchResult.results.reduce((s, r) => s + (r.objects?.length ?? 0), 0);
            const ms = Math.round(batchResult.totalInferenceTimeMs);
            ctx.status(`${totalObjects} object${totalObjects === 1 ? "" : "s"} found across ${batchResult.results.length} image${batchResult.results.length === 1 ? "" : "s"} (${ms}ms) — drawing bounding boxes…`);
          } catch {}
        } finally {
          for (const tp of tmpPaths) await fs.promises.unlink(tp).catch(() => {});
        }

        if (!batchResult.results.length) {
          return {
            content: [{ type: "text", text: "detect_object: no results returned from Florence-2 API." }],
            isError: true as const,
          };
        }

        // Per-image: draw bboxes, save, generate preview, build state records
        const variantPreviewSpec = (VARIANT_FULL_CONFIG as any).preview;
        const stamp = isoStampCompact();
        let nextI = Math.max(1, st.counters?.nextImageI ?? 1);

        const imageRecordsForState: any[] = [];
        const resultEntries: Array<{
          id: string;
          i: number;
          detResult: typeof batchResult.results[0];
          savedPath: string;
          savedFileUrl: string;
          savedSize: number;
          preview: any | null;
          httpOriginal: string;
          httpPreview: string;
        }> = [];

        const httpBase = await getHealthyServerBaseUrl();

        for (let idx = 0; idx < batchResult.results.length; idx++) {
          const detResult = batchResult.results[idx];
          const sourceId = sourceEntries[idx]?.id ?? `canvas${idx + 1}`;
          const origBuf = sourceEntries[idx].origBuf;
          const currentI = nextI++;

          const bboxes = detResult.objects.map((o) => o.bbox as [number, number, number, number]);
          console.log(`[detect_object] drawing ${bboxes.length} bboxes for ${sourceId}...`);
          // Draw on the original-resolution file; sourceDims carries the preview space in which
          // the detection server reported its bbox coordinates so they get scaled up correctly.
          const annotatedBuf = await drawBboxesOnImage(origBuf, bboxes, {
            width: detResult.imageWidth,
            height: detResult.imageHeight,
          });

          const baseName = `image-${stamp}-i${currentI}`;
          const savedPath = path.join(primaryOutDir, `${baseName}.png`);
          await fs.promises.writeFile(savedPath, annotatedBuf);
          const savedFileUrl = pathToFileURL(savedPath).toString();
          const savedSize = annotatedBuf.length;
          console.log(`[detect_object] annotated image written: ${savedPath} (${savedSize} bytes)`);

          let preview: any = null;
          try {
            const p = await generatePreviewFromBuffer(annotatedBuf, primaryOutDir, `${baseName}.png`, variantPreviewSpec);
            preview = {
              ok: true as const,
              filePath: p.previewAbs,
              fileName: p.previewFilename,
              fileUrl: pathToFileURL(p.previewAbs).toString(),
              size_bytes: p.data.length,
              width: p.width,
              height: p.height,
              mimeType: "image/jpeg" as const,
              dataBase64: p.data.toString("base64"),
            };
          } catch (e) {
            console.warn(`[detect_object] preview generation failed for ${sourceId}:`, String(e));
          }

          const httpOriginal = httpBase
            ? toHttpOriginalUrl(`${baseName}.png`, httpBase, currentLmChatId || undefined)
            : "";
          const httpPreview = (() => {
            if (!httpBase || !currentLmChatId || !preview?.fileName) return "";
            return toHttpPreviewUrl(preview.fileName, httpBase, currentLmChatId);
          })();

          imageRecordsForState.push({
            filename: `${baseName}.png`,
            preview: preview ? `preview-${baseName}.jpg` : undefined,
            i: currentI,
            sourceTool: `${getSelfPluginIdentifier()}/detect_object`,
            detectSource: sourceId,
            task,
            imageWidth: detResult.imageWidth,
            imageHeight: detResult.imageHeight,
            detections: detResult.objects.map((o) => ({
              label: o.label,
              bbox: { x1: o.bbox[0], y1: o.bbox[1], x2: o.bbox[2], y2: o.bbox[3] },
              crop: {
                cropLeft: o.cropLeft,
                cropRight: o.cropRight,
                cropTop: o.cropTop,
                cropBottom: o.cropBottom,
              },
            })),
          });

          resultEntries.push({ id: sourceId, i: currentI, detResult, savedPath, savedFileUrl, savedSize, preview, httpOriginal, httpPreview });
        }

        // Update state once for all images
        console.log("[detect_object] updating state...");
        try {
          const stateForUpdate = await readState(primaryOutDir);
          const appendResult = appendImages(stateForUpdate, imageRecordsForState);
          if (appendResult.changed) {
            await writeStateAtomic(primaryOutDir, stateForUpdate);
            console.log("[detect_object] state written, nextImageI:", stateForUpdate.counters?.nextImageI);
          }
        } catch (e) {
          console.warn("[detect_object] state update failed:", String(e));
        }

        // Audit log
        try {
          const audit = buildAuditLogger({ backend: "detect_object", mode: "detect_object" as any, requestId: undefined });
          if (currentLmChatId) audit.setChatId(currentLmChatId);
          audit.setUserRequest({ targets: rawTargets, task });
          audit.setOutput({
            images: resultEntries.map((r) => ({
              id: r.id,
              i: r.i,
              detections: r.detResult.objects.length,
              path: r.savedPath,
              url: r.savedFileUrl,
              bytes: r.savedSize,
              ...(r.httpOriginal ? { http_url: r.httpOriginal } : {}),
              ...(r.preview ? { preview_path: r.preview.filePath, preview_url: r.preview.fileUrl } : {}),
              ...(r.httpPreview ? { http_preview_url: r.httpPreview } : {}),
            })),
          });
          await audit.write();
        } catch (e) {
          console.warn("[detect_object] audit logging failed:", String(e));
        }

        // Assemble tool result
        const envPreviewRaw = process.env["PREVIEW_IN_CHAT"];
        const previewInChat =
          envPreviewRaw === undefined
            ? true
            : envPreviewRaw === "1" || envPreviewRaw.toLowerCase() === "true";

        const summaries = resultEntries.map((r) => ({
          tool: "detect_object",
          source: r.id,
          i: r.i,
          imageWidth: r.detResult.imageWidth,
          imageHeight: r.detResult.imageHeight,
          task,
          inferenceTimeMs: r.detResult.inferenceTimeMs,
          detections: r.detResult.objects.map((o) => ({
            label: o.label,
            bbox: { x1: o.bbox[0], y1: o.bbox[1], x2: o.bbox[2], y2: o.bbox[3] },
            crop: {
              left:   { pct: o.cropLeft,   px: Math.round((o.cropLeft   / 100) * r.detResult.imageWidth) },
              right:  { pct: o.cropRight,  px: Math.round((o.cropRight  / 100) * r.detResult.imageWidth) },
              top:    { pct: o.cropTop,    px: Math.round((o.cropTop    / 100) * r.detResult.imageHeight) },
              bottom: { pct: o.cropBottom, px: Math.round((o.cropBottom / 100) * r.detResult.imageHeight) },
            },
            crop_tool_hint: "Pass crop.left.pct as cropLeft, crop.right.pct as cropRight, crop.top.pct as cropTop, crop.bottom.pct as cropBottom to the crop tool.",
          })),
        }));

        const reviewHint = "Carefully examine the preview and comment on how well the object detection matches your intent.";
        const content: any[] = [];

        for (const r of resultEntries) {
          const fallbackPreviewUrl = r.preview?.fileUrl || r.savedFileUrl;
          const previewLine = `Preview i${r.i}: ${r.httpPreview ? r.httpPreview : fallbackPreviewUrl}`;
          const originalLine = `Original i${r.i}: ${r.httpOriginal ? r.httpOriginal : r.savedFileUrl}`;

          if (previewInChat && r.preview) {
            const fname = String(r.preview.fileName || "");
            content.push({
              type: "image",
              fileName: fname,
              mimeType: r.preview.mimeType,
              markdown: `![Detected Objects i${r.i} (${r.id})](./${fname})`,
              $hint: "This is an image file. Present the image to the user by using the markdown above.",
            } as any);
          }
          content.push({ type: "text", text: previewLine });
          content.push({ type: "text", text: originalLine });
        }

        const totalMs = Math.round(batchResult.totalInferenceTimeMs);
        if (totalMs > 0) {
          content.push({ type: "text", text: `Total inference time: ${totalMs}ms` });
        }

        content.push({
          type: "text",
          text: JSON.stringify(summaries.length === 1 ? summaries[0] : summaries),
          $hint: reviewHint,
        });

        return { content };
      } catch (error) {
        return {
          content: [
            {
              type: "text",
              text: `detect_object failed: ${(error as Error).message || String(error)}`,
            },
          ],
          isError: true as const,
        };
      }
    },
  });
}