import fs from "fs";
import path from "path";
import { getLogsDir } from "../core-bundle.mjs";

export interface VisionAnalysisItem {
  id: string;
  filePath: string;
}

export interface VisionAnalysisResult {
  id: string;
  text: string;
  inferenceTimeMs: number;
}

export interface VisionAnalysisBatchResult {
  results: VisionAnalysisResult[];
  totalInferenceTimeMs: number;
  backend: string;
}

export interface VisionDetectionObject {
  label: string;
  bbox: [number, number, number, number];
  cropLeft: number;
  cropRight: number;
  cropTop: number;
  cropBottom: number;
}

export interface VisionDetectionResult {
  id: string;
  objects: VisionDetectionObject[];
  imageWidth: number;
  imageHeight: number;
  inferenceTimeMs: number;
}

export interface VisionDetectionBatchResult {
  results: VisionDetectionResult[];
  totalInferenceTimeMs: number;
  backend: string;
}

export interface VisionDetectionAnalyzerConfig {
  task?: string;
  odPrompt?: string;
  maxTokens?: number;
  temperature?: number;
  timeoutMs?: number;
}

export interface LmStudioVisionAnalyzerConfig {
  /** Vision API root/base URL, e.g. http://127.0.0.1:1234/v1 */
  baseUrl: string;
  /** Optional bearer token for the Vision API. */
  apiKey?: string;
  /** Vision API model key. */
  model?: string;
  /** Prompt sent to the vision model for each image. */
  prompt?: string;
  /** Max output tokens for response. */
  maxTokens?: number;
  /** Sampling temperature. */
  temperature?: number;
  /** Timeout in ms. */
  timeoutMs?: number;
}

export interface LmStudioVisionEnsureConfig {
  baseUrl: string;
  apiKey?: string;
  modelKey: string;
  status?: (message: string) => void;
}

const JSON_FENCE_RE = /```(?:json)?\s*([\s\S]*?)```/i;
const ITEM_RE = /\{\s*"bbox_2d":\s*\[(\d+),\s*(\d+),\s*(\d+),\s*(\d+)\],\s*"label":\s*"([^"]+)"\s*\}/gi;
const JSON_FORMAT =
  " Output JSON only — a JSON array where each element has" +
  " 'bbox_2d' ([x1, y1, x2, y2] as integers normalized 0–1000) and 'label' (a string)." +
  " No prose, no markdown, no explanation.";
const LABEL_FORMAT_RULE =
  "\n\nLABEL FORMAT RULE (mandatory):" +
  "\n- Labels must be concise and specific: 2–4 words maximum." +
  "\n- Examples: 'plugin list', 'plugin name', 'human face', 'left hand', 'red car', 'fluffy owl toy'";

function normalizeLmApiRoot(baseUrl: string): string {
  return String(baseUrl || "")
    .trim()
    .replace(/\/(api\/v1|v1)\/?$/i, "")
    .replace(/\/+$/, "");
}

function authHeaders(apiKey: string | undefined, contentType = false): Record<string, string> {
  const headers: Record<string, string> = {};
  if (contentType) headers["Content-Type"] = "application/json";
  if (apiKey?.trim()) headers.Authorization = `Bearer ${apiKey.trim()}`;
  return headers;
}

function logVisionRequestMetadata(metadata: Record<string, unknown>): void {
  const line = `[LmStudioVisionAnalyzer] /api/v1/chat request ${JSON.stringify(metadata)}`;
  console.info(line);
  try {
    const logsDir = getLogsDir();
    if (!fs.existsSync(logsDir)) fs.mkdirSync(logsDir, { recursive: true });
    fs.appendFileSync(
      path.join(logsDir, "user-docs-plugin.log"),
      `${new Date().toISOString()} - ${line}\n`,
      "utf8"
    );
  } catch {}
}

function hasLoadedInstances(modelInfo: any): boolean {
  return Array.isArray(modelInfo?.loaded_instances) && modelInfo.loaded_instances.length > 0;
}

async function getVisionModelState(
  baseUrl: string,
  apiKey: string | undefined,
  modelKey: string
): Promise<{ loaded: boolean; modelKey?: string }> {
  const apiRoot = normalizeLmApiRoot(baseUrl);
  if (!apiRoot) return { loaded: false };
  const normalizedModelKey = modelKey.trim().toLowerCase();
  const controller = new AbortController();
  const timeout = setTimeout(() => controller.abort(), 5000);
  try {
    const response = await fetch(`${apiRoot}/api/v1/models`, {
      headers: authHeaders(apiKey),
      signal: controller.signal,
    });
    if (!response.ok) return { loaded: false };
    const data = await response.json() as any;
    const models: any[] = Array.isArray(data)
      ? data
      : Array.isArray(data?.models)
      ? data.models
      : Array.isArray(data?.data)
      ? data.data
      : [];
    const modelInfo = models.find((entry: any) => {
      const key = String(entry?.key || entry?.id || "").trim().toLowerCase();
      return key === normalizedModelKey;
    });
    if (!modelInfo) return { loaded: false };
    return {
      loaded: hasLoadedInstances(modelInfo),
      modelKey: String(modelInfo?.key || modelInfo?.id || "").trim() || undefined,
    };
  } catch {
    return { loaded: false };
  } finally {
    clearTimeout(timeout);
  }
}

async function loadVisionInstanceViaApi(
  baseUrl: string,
  apiKey: string | undefined,
  modelKey: string
): Promise<{ ok: true } | { ok: false; error: string }> {
  const apiRoot = normalizeLmApiRoot(baseUrl);
  if (!apiRoot) {
    return { ok: false, error: "Vision API base URL is empty." };
  }

  const controller = new AbortController();
  const timeout = setTimeout(() => controller.abort(), 600_000);
  try {
    const response = await fetch(`${apiRoot}/api/v1/models/load`, {
      method: "POST",
      headers: authHeaders(apiKey, true),
      body: JSON.stringify({
        model: modelKey,
        echo_load_config: true,
      }),
      signal: controller.signal,
    });
    const text = await response.text().catch(() => "");
    let data: any = null;
    if (text.trim()) {
      try {
        data = JSON.parse(text);
      } catch {
        data = { raw: text };
      }
    }
    const apiError = data?.error?.message || data?.error || data?.message;
    if (!response.ok || apiError) {
      const detail = apiError || text || `${response.status} ${response.statusText}`;
      return {
        ok: false,
        error: `Vision API could not load '${modelKey}' via /api/v1/models/load. This can happen when there are not enough system resources available. Error: ${detail}`,
      };
    }
    return { ok: true };
  } catch (error: any) {
    const detail = error?.name === "AbortError"
      ? "request timed out after 600000 ms"
      : error?.message || String(error);
    return {
      ok: false,
      error: `Vision API could not load '${modelKey}' via /api/v1/models/load. This can happen when there are not enough system resources available. Error: ${detail}`,
    };
  } finally {
    clearTimeout(timeout);
  }
}

export async function ensureLmStudioVisionInstanceReady(
  config: LmStudioVisionEnsureConfig
): Promise<{ ok: true; loaded: boolean } | { ok: false; error: string }> {
  const modelKey = String(config.modelKey || "").trim();
  if (!modelKey) {
    return {
      ok: false,
      error: "Vision API mode is active, but Qwen3-VL model key is empty.",
    };
  }

  const initialState = await getVisionModelState(config.baseUrl, config.apiKey, modelKey);
  if (initialState.loaded) {
    return { ok: true, loaded: false };
  }

  try { config.status?.(`Loading ${modelKey}...`); } catch {}

  const loadResult = await loadVisionInstanceViaApi(
    config.baseUrl,
    config.apiKey,
    modelKey
  );
  if (!loadResult.ok) return loadResult;

  const loadedState = await getVisionModelState(config.baseUrl, config.apiKey, modelKey);
  if (!loadedState.loaded) {
    return {
      ok: false,
      error: `Vision API loaded '${modelKey}' via /api/v1/models/load, but /api/v1/models did not report it as loaded.`,
    };
  }
  if (loadedState.modelKey?.trim().toLowerCase() !== modelKey.toLowerCase()) {
    return {
      ok: false,
      error: `Vision API loaded a model, but /api/v1/models reports '${loadedState.modelKey || "unknown model"}' instead of '${modelKey}'.`,
    };
  }

  return { ok: true, loaded: true };
}

function mimeFromPath(filePath: string): string {
  const ext = path.extname(filePath).toLowerCase();
  if (ext === ".jpg" || ext === ".jpeg") return "image/jpeg";
  if (ext === ".webp") return "image/webp";
  if (ext === ".gif") return "image/gif";
  return "image/png";
}

function readUInt24LE(buffer: Buffer, offset: number): number {
  return buffer[offset] | (buffer[offset + 1] << 8) | (buffer[offset + 2] << 16);
}

function readPngDimensions(buffer: Buffer): { width: number; height: number } | null {
  if (buffer.length < 24) return null;
  if (buffer.toString("ascii", 1, 4) !== "PNG") return null;
  return {
    width: buffer.readUInt32BE(16),
    height: buffer.readUInt32BE(20),
  };
}

function readGifDimensions(buffer: Buffer): { width: number; height: number } | null {
  if (buffer.length < 10) return null;
  const signature = buffer.toString("ascii", 0, 6);
  if (signature !== "GIF87a" && signature !== "GIF89a") return null;
  return {
    width: buffer.readUInt16LE(6),
    height: buffer.readUInt16LE(8),
  };
}

function readWebpDimensions(buffer: Buffer): { width: number; height: number } | null {
  if (buffer.length < 30) return null;
  if (buffer.toString("ascii", 0, 4) !== "RIFF" || buffer.toString("ascii", 8, 12) !== "WEBP") {
    return null;
  }
  const chunkType = buffer.toString("ascii", 12, 16);
  if (chunkType === "VP8X" && buffer.length >= 30) {
    return {
      width: readUInt24LE(buffer, 24) + 1,
      height: readUInt24LE(buffer, 27) + 1,
    };
  }
  if (chunkType === "VP8L" && buffer.length >= 25 && buffer[20] === 0x2f) {
    const bits = buffer.readUInt32LE(21);
    return {
      width: (bits & 0x3fff) + 1,
      height: ((bits >> 14) & 0x3fff) + 1,
    };
  }
  if (chunkType === "VP8 " && buffer.length >= 30) {
    return {
      width: buffer.readUInt16LE(26) & 0x3fff,
      height: buffer.readUInt16LE(28) & 0x3fff,
    };
  }
  return null;
}

function readJpegDimensions(buffer: Buffer): { width: number; height: number } | null {
  if (buffer.length < 4 || buffer[0] !== 0xff || buffer[1] !== 0xd8) return null;
  let offset = 2;
  while (offset + 9 < buffer.length) {
    if (buffer[offset] !== 0xff) {
      offset += 1;
      continue;
    }
    while (offset < buffer.length && buffer[offset] === 0xff) offset += 1;
    const marker = buffer[offset];
    offset += 1;
    if (marker === 0xd9 || marker === 0xda) break;
    if (offset + 2 > buffer.length) break;
    const segmentLength = buffer.readUInt16BE(offset);
    if (segmentLength < 2 || offset + segmentLength > buffer.length) break;
    const isStartOfFrame =
      (marker >= 0xc0 && marker <= 0xc3) ||
      (marker >= 0xc5 && marker <= 0xc7) ||
      (marker >= 0xc9 && marker <= 0xcb) ||
      (marker >= 0xcd && marker <= 0xcf);
    if (isStartOfFrame && segmentLength >= 7) {
      return {
        height: buffer.readUInt16BE(offset + 3),
        width: buffer.readUInt16BE(offset + 5),
      };
    }
    offset += segmentLength;
  }
  return null;
}

async function readImageDimensions(filePath: string): Promise<{ width: number; height: number }> {
  const buffer = await fs.promises.readFile(filePath);
  const dimensions =
    readPngDimensions(buffer) ||
    readJpegDimensions(buffer) ||
    readWebpDimensions(buffer) ||
    readGifDimensions(buffer);
  if (!dimensions || dimensions.width <= 0 || dimensions.height <= 0) {
    throw new Error(`Could not determine image dimensions for ${filePath}`);
  }
  return dimensions;
}

function extractMessageText(data: any): string {
  const output = Array.isArray(data?.output) ? data.output : [];
  const pieces: string[] = [];
  for (const item of output) {
    if (item?.type !== "message") continue;
    const content = item?.content;
    if (typeof content === "string") {
      pieces.push(content);
    } else if (Array.isArray(content)) {
      for (const part of content) {
        if (typeof part === "string") {
          pieces.push(part);
        } else if (typeof part?.text === "string") {
          pieces.push(part.text);
        } else if (typeof part?.content === "string") {
          pieces.push(part.content);
        }
      }
    }
  }

  if (pieces.length === 0 && typeof data?.text === "string") {
    pieces.push(data.text);
  }
  if (pieces.length === 0 && typeof data?.content === "string") {
    pieces.push(data.content);
  }
  return pieces.join("\n").trim();
}

function buildDetectPrompt(task: string | undefined, odPrompt: string | undefined): string {
  const label = String(task || "").trim();
  if (label) {
    return `Detect all instances of '${label}' in the image.` + JSON_FORMAT;
  }
  const instruction = String(odPrompt || "").trim();
  if (!instruction) {
    throw new Error("No OD prompt available: odPrompt not set and DETECT_OD_PROMPT env var not set");
  }
  return instruction + LABEL_FORMAT_RULE + JSON_FORMAT;
}

function bboxToCrop(
  bbox: [number, number, number, number],
  width: number,
  height: number
): { cropLeft: number; cropRight: number; cropTop: number; cropBottom: number } {
  const [x1, y1, x2, y2] = bbox;
  return {
    cropLeft: (x1 / width) * 100,
    cropRight: ((width - x2) / width) * 100,
    cropTop: (y1 / height) * 100,
    cropBottom: ((height - y2) / height) * 100,
  };
}

function parseQwen3VlDetectionOutput(text: string, width: number, height: number): VisionDetectionObject[] {
  const objects: VisionDetectionObject[] = [];
  const seen = new Set<string>();
  const fenceMatch = JSON_FENCE_RE.exec(text);
  const jsonText = fenceMatch ? fenceMatch[1].trim() : text.trim();
  let items: any[] | null = null;

  try {
    const parsed = JSON.parse(jsonText);
    items = Array.isArray(parsed) ? parsed : [parsed];
  } catch {
    const recovered: any[] = [];
    ITEM_RE.lastIndex = 0;
    for (const match of text.matchAll(ITEM_RE)) {
      recovered.push({
        bbox_2d: [Number(match[1]), Number(match[2]), Number(match[3]), Number(match[4])],
        label: match[5],
      });
    }
    items = recovered.length > 0 ? recovered : [];
  }

  for (const item of items) {
    if (!item || typeof item !== "object") continue;
    const bbox = item.bbox_2d;
    const label = String(item.label || "");
    if (!Array.isArray(bbox) || bbox.length !== 4) continue;
    const [nx1, ny1, nx2, ny2] = bbox.map((value: unknown) => Number(value));
    if (![nx1, ny1, nx2, ny2].every((value) => Number.isFinite(value) && value >= 0 && value <= 1000)) {
      continue;
    }
    if (nx2 <= nx1 || ny2 <= ny1) continue;
    if (nx1 < 10 && ny1 < 10 && nx2 > 990 && ny2 > 990) continue;

    const dedupKey = `${Math.round(nx1)}:${Math.round(ny1)}:${Math.round(nx2)}:${Math.round(ny2)}:${label}`;
    if (seen.has(dedupKey)) continue;
    seen.add(dedupKey);

    const pixelBbox: [number, number, number, number] = [
      (nx1 / 1000) * width,
      (ny1 / 1000) * height,
      (nx2 / 1000) * width,
      (ny2 / 1000) * height,
    ];
    objects.push({
      label,
      bbox: pixelBbox,
      ...bboxToCrop(pixelBbox, width, height),
    });
  }

  return objects;
}

async function chatOnce(
  item: VisionAnalysisItem,
  prompt: string,
  config: {
    baseUrl: string;
    apiKey?: string;
    model?: string;
    maxTokens?: number;
    temperature?: number;
    timeoutMs?: number;
  }
): Promise<{ text: string; elapsedMs: number; bytes: number; modelInstanceId: string }> {
  const apiRoot = normalizeLmApiRoot(config.baseUrl);
  if (!apiRoot) {
    throw new Error("Vision API base URL is empty");
  }

  const endpoint = `${apiRoot}/api/v1/chat`;
  const timeoutMs = config.timeoutMs ?? 180_000;
  const model = config.model || "vision-capability-priming";
  const buf = await fs.promises.readFile(item.filePath);
  const dataUrl = `data:${mimeFromPath(item.filePath)};base64,${buf.toString("base64")}`;
  const payload: any = {
    model,
    input: [
      { type: "text", content: prompt },
      { type: "image", data_url: dataUrl },
    ],
    store: false,
  };
  if (typeof config.maxTokens === "number" && Number.isFinite(config.maxTokens) && config.maxTokens > 0) {
    payload.max_output_tokens = Math.floor(config.maxTokens);
  }
  if (typeof config.temperature === "number" && Number.isFinite(config.temperature)) {
    payload.temperature = config.temperature;
  }

  logVisionRequestMetadata({
    configuredBaseUrl: config.baseUrl,
    apiRoot,
    endpoint,
    model,
    store: payload.store,
    max_output_tokens: payload.max_output_tokens ?? null,
    temperature: payload.temperature ?? null,
    promptChars: prompt.length,
    imageBytes: buf.byteLength,
    inputTypes: Array.isArray(payload.input) ? payload.input.map((part: any) => part.type) : [],
    payloadKeys: Object.keys(payload),
  });

  const headers = authHeaders(config.apiKey, true);

  const controller = new AbortController();
  const timeout = setTimeout(() => controller.abort(), timeoutMs);
  const startedAt = Date.now();

  let data: any;
  try {
    const resp = await fetch(endpoint, {
      method: "POST",
      headers,
      body: JSON.stringify(payload),
      signal: controller.signal,
    });
    clearTimeout(timeout);

    if (!resp.ok) {
      const detail = await resp.text().catch(() => "(no body)");
      throw new Error(`Vision API ${resp.status}: ${detail}`);
    }
    data = await resp.json();
  } catch (error: any) {
    clearTimeout(timeout);
    if (error?.name === "AbortError") {
      throw new Error(`Vision API timed out after ${timeoutMs}ms`);
    }
    throw new Error(`Vision API failed: ${error?.message || String(error)}`);
  }

  return {
    text: extractMessageText(data),
    elapsedMs: Date.now() - startedAt,
    bytes: buf.byteLength,
    modelInstanceId: typeof data?.model_instance_id === "string" ? data.model_instance_id : "",
  };
}

export async function analyzeLmStudioVisionBatch(
  items: VisionAnalysisItem[],
  config: LmStudioVisionAnalyzerConfig
): Promise<VisionAnalysisBatchResult> {
  if (!items.length) {
    return {
      results: [],
      totalInferenceTimeMs: 0,
      backend: "vision-api",
    };
  }

  const results: VisionAnalysisResult[] = [];
  let totalInferenceTimeMs = 0;

  for (const item of items) {
    console.info(
      `[LmStudioVisionAnalyzer] /api/v1/chat start mode=analyze id=${item.id} timeoutMs=${config.timeoutMs ?? 180_000}`
    );
    const response = await chatOnce(item, config.prompt || "Describe the image.", config);
    console.info(
      `[LmStudioVisionAnalyzer] /api/v1/chat ok mode=analyze id=${item.id} bytes=${response.bytes} elapsedMs=${response.elapsedMs} modelInstance=${response.modelInstanceId || "?"}`
    );
    results.push({
      id: item.id,
      text: response.text,
      inferenceTimeMs: response.elapsedMs,
    });
    totalInferenceTimeMs += response.elapsedMs;
  }

  return {
    results,
    totalInferenceTimeMs,
    backend: "vision-api",
  };
}

export async function detectLmStudioVisionBatch(
  items: VisionAnalysisItem[],
  config: VisionDetectionAnalyzerConfig & {
    baseUrl: string;
    apiKey?: string;
    model?: string;
  }
): Promise<VisionDetectionBatchResult> {
  if (!items.length) {
    return {
      results: [],
      totalInferenceTimeMs: 0,
      backend: "vision-api",
    };
  }

  const prompt = buildDetectPrompt(config.task, config.odPrompt);
  const results: VisionDetectionResult[] = [];
  let totalInferenceTimeMs = 0;

  for (const item of items) {
    const { width, height } = await readImageDimensions(item.filePath);

    console.info(
      `[LmStudioVisionAnalyzer] /api/v1/chat start mode=detect id=${item.id} timeoutMs=${config.timeoutMs ?? 120_000}`
    );
    const response = await chatOnce(item, prompt, {
      baseUrl: config.baseUrl,
      apiKey: config.apiKey,
      model: config.model || "vision-capability-priming",
      maxTokens: config.maxTokens,
      temperature: config.temperature,
      timeoutMs: config.timeoutMs ?? 120_000,
    });
    const objects = parseQwen3VlDetectionOutput(response.text, width, height);
    console.info(
      `[LmStudioVisionAnalyzer] /api/v1/chat ok mode=detect id=${item.id} objects=${objects.length} bytes=${response.bytes} elapsedMs=${response.elapsedMs} modelInstance=${response.modelInstanceId || "?"}`
    );

    results.push({
      id: item.id,
      objects,
      imageWidth: width,
      imageHeight: height,
      inferenceTimeMs: response.elapsedMs,
    });
    totalInferenceTimeMs += response.elapsedMs;
  }

  return {
    results,
    totalInferenceTimeMs,
    backend: "vision-api",
  };
}