import fs from "fs";
import os from "os";
import path from "path";
import { spawn, execFileSync } from "child_process";

const HEALTH_POLL_INTERVAL_MS = 1_000;
const HEALTH_POLL_TIMEOUT_MS = 90_000;
const HEALTH_FETCH_TIMEOUT_MS = 2_000;

const VENV_PACKAGES_COMMON = [
  "fastapi==0.128.8",
  "uvicorn[standard]==0.39.0",
  "pillow==10.4.0",
  "transformers==4.57.6",
  "torch==2.8.0",
  "einops==0.8.2",
  "timm==1.0.26",
  "tokenizers==0.22.2",
  "sentencepiece==0.2.1",
  "huggingface_hub==0.36.2",
];
const VENV_PACKAGES_MACOS = [
  "mlx==0.29.3",
  "mlx-lm==0.29.1",
  "mlx-vlm==0.1.13",
  "coremltools==9.0",
];
const VENV_PACKAGES = process.platform === "darwin"
  ? [...VENV_PACKAGES_COMMON, ...VENV_PACKAGES_MACOS]
  : VENV_PACKAGES_COMMON;

let _activePort: number | null = null;

function pluginRoot(): string {
  return process.cwd();
}

function venvDir(): string {
  return path.join(os.homedir(), ".fastvlm", "venv");
}

function venvPython(): string {
  return path.join(venvDir(), "bin", "python3");
}

function venvPip(): string {
  return path.join(venvDir(), "bin", "pip");
}

function venvExists(): boolean {
  return fs.existsSync(venvPython());
}

function venvPythonVersion(): string | null {
  try {
    const out = execFileSync(venvPython(), ["--version"], { encoding: "utf-8", timeout: 5_000 });
    const m = out.trim().match(/^Python (\d+\.\d+)/);
    return m ? m[1] : null;
  } catch {
    return null;
  }
}

function logsDir(): string {
  return path.join(pluginRoot(), "logs");
}

function pidFilePath(): string {
  return path.join(logsDir(), "fastvlm-server.pid");
}

function logFilePath(): string {
  return path.join(logsDir(), "mlx-vision-server.log");
}

function runAndStream(
  cmd: string,
  args: string[],
  cwd: string,
  onLine: (line: string) => void,
  logFile?: string
): Promise<void> {
  return new Promise((resolve, reject) => {
    const child = spawn(cmd, args, { cwd, stdio: ["ignore", "pipe", "pipe"] });
    const logStream = logFile ? fs.createWriteStream(logFile, { flags: "a" }) : null;

    function onData(data: Buffer) {
      for (const line of data.toString().split("\n")) {
        const t = line.trim();
        if (t) {
          onLine(t);
          logStream?.write(t + "\n");
        }
      }
    }

    child.stdout.on("data", onData);
    child.stderr.on("data", onData);
    child.on("error", (err) => {
      logStream?.end();
      reject(err);
    });
    child.on("close", (code) => {
      logStream?.end();
      if (code === 0) resolve();
      else reject(new Error(`${path.basename(cmd)} exited with code ${code}`));
    });
  });
}

// Python 3.9 from Xcode CLT — matches the working venv (lib/python3.9/).
// mlx-vlm 0.1.13 requires this version for llava_qwen2 remapping support.
const PYTHON39 = "/Library/Developer/CommandLineTools/usr/bin/python3.9";

async function ensureVenv(onStatus: (msg: string) => void, logFile: string): Promise<void> {
  if (venvExists()) {
    const version = venvPythonVersion();
    if (version !== null && !version.startsWith("3.9")) {
      onStatus(`Python environment is ${version} — rebuilding with Python 3.9…`);
      fs.rmSync(venvDir(), { recursive: true, force: true });
    }
  }

  if (!venvExists()) {
    onStatus("Creating Python environment…");
    await runAndStream(PYTHON39, ["-m", "venv", venvDir()], pluginRoot(), onStatus, logFile);

    onStatus("Installing dependencies — this may take a few minutes…");
    await runAndStream(venvPip(), ["install", ...VENV_PACKAGES], pluginRoot(), onStatus, logFile);
  }

  await ensureVenvPatches(onStatus, logFile);

  onStatus("Python environment ready");
}

// Applies all mlx-vlm patches required for FastVLM-7B support.
// Runs on every startup so that gaps left by older venv setups are filled.
async function ensureVenvPatches(onStatus: (msg: string) => void, logFile: string): Promise<void> {
  // Resolve site-packages dynamically — the venv Python version may vary (3.9, 3.13, …).
  const libDir = path.join(venvDir(), "lib");
  const pythonSubdir = fs.existsSync(libDir)
    ? fs.readdirSync(libDir).find((d) => d.startsWith("python")) ?? "python3.9"
    : "python3.9";
  const sitePackages = path.join(venvDir(), "lib", pythonSubdir, "site-packages");
  const mlxVlmRoot = path.join(sitePackages, "mlx_vlm");

  // Ensure coremltools is installed — required by models/fastvlm/fastvlm.py.
  // Use a dedicated marker file instead of checking .dist-info (whose path includes the
  // Python version and would not be found if the venv was created with a different version).
  const coremltoolsMarker = path.join(venvDir(), "coremltools_installed.marker");
  if (!fs.existsSync(coremltoolsMarker)) {
    onStatus("Installing missing dependencies…");
    await runAndStream(venvPip(), ["install", "coremltools==9.0"], pluginRoot(), onStatus, logFile);
    fs.writeFileSync(coremltoolsMarker, new Date().toISOString());
  }

  // Patch 1: utils.py — three sub-patches required for FastVLM support
  const mlxVlmUtils = path.join(mlxVlmRoot, "utils.py");
  if (fs.existsSync(mlxVlmUtils)) {
    let src = fs.readFileSync(mlxVlmUtils, "utf-8");

    // 1a: add "llava_qwen2" -> "fastvlm" to MODEL_REMAPPING
    src = src.replace(
      /MODEL_REMAPPING\s*=\s*\{([^}]*)\}/,
      (match) => {
        if (match.includes("llava_qwen2")) return match;
        return match.replace(/\}$/, ', "llava_qwen2": "fastvlm"}');
      }
    );

    // 1b: add "import coremltools" after "import mlx.nn as nn" if not already present
    if (!src.includes("import coremltools")) {
      src = src.replace(
        /^(import mlx\.nn as nn\s*\n)/m,
        "$1import coremltools\n"
      );
    }

    // 1c: guard sanitize_weights(model_class.VisionModel, ...) with hasattr check
    //     and load CoreML vision tower for models that have no VisionModel (e.g. fastvlm)
    if (!src.includes("hasattr(model_class, 'VisionModel')")) {
      src = src.replace(
        /    weights = sanitize_weights\(\s*model_class\.VisionModel, weights, model_config\.vision_config\s*\)/,
        [
          "    if hasattr(model_class, 'VisionModel'):",
          "        weights = sanitize_weights(",
          "            model_class.VisionModel, weights, model_config.vision_config",
          "        )",
          "    else:",
          "        # Load CoreML vision tower (used by fastvlm)",
          "        print(\"Looking for CoreML vision tower\")",
          "        coreml_file = glob.glob(str(model_path / \"*.mlpackage\"))",
          "        if len(coreml_file) > 0:",
          "            assert len(coreml_file) == 1, \"Found multiple vision model files.\"",
          "            print(f\"Loading {coreml_file[0]} vision tower\")",
          "            model.vision_tower = coremltools.models.MLModel(coreml_file[0], compute_units=coremltools.ComputeUnit.CPU_ONLY)",
        ].join("\n")
      );
    }

    fs.writeFileSync(mlxVlmUtils, src, "utf-8");
  }

  // Patch 2: prompt_utils.py — add llava_qwen2 format entry and fix null chat_template
  const mlxVlmPromptUtils = path.join(mlxVlmRoot, "prompt_utils.py");
  if (fs.existsSync(mlxVlmPromptUtils)) {
    let src = fs.readFileSync(mlxVlmPromptUtils, "utf-8");
    src = src.replace(
      /("llava":\s*"message_list_with_image",)(\s*"llava_next":)/,
      (match, llavaEntry, llavaNextEntry) => {
        if (src.includes('"llava_qwen2"')) return match;
        return `${llavaEntry}\n        "llava_qwen2": "message_with_image_token_new_line",${llavaNextEntry}`;
      }
    );
    src = src.replace(
      /if "chat_template" in processor\.__dict__\.keys\(\):/,
      (match) => {
        if (src.includes("processor.chat_template is not None")) return match;
        return `if ("chat_template" in processor.__dict__.keys()) and (processor.chat_template is not None):`;
      }
    );
    fs.writeFileSync(mlxVlmPromptUtils, src, "utf-8");
  }

  // Patch 3: copy mlx_vlm/models/fastvlm/ — the model implementation missing from the wheel.
  // Always re-copy so updated source files (e.g. copy=False fix in fastvlm.py) are applied.
  const fastvlmModelDest = path.join(mlxVlmRoot, "models", "fastvlm");
  const fastvlmModelSrc = path.join(pluginRoot(), "src", "fastvlm_server", "mlx_vlm_patches", "models", "fastvlm");
  fs.mkdirSync(fastvlmModelDest, { recursive: true });
  for (const file of fs.readdirSync(fastvlmModelSrc)) {
    fs.copyFileSync(path.join(fastvlmModelSrc, file), path.join(fastvlmModelDest, file));
  }
}

const PYTHON310 = "/opt/homebrew/bin/python3.10";

function qwen3VlVenvDir(): string {
  return path.join(os.homedir(), ".fastvlm", "qwen3vl_venv");
}

function qwen3VlVenvPip(): string {
  return path.join(qwen3VlVenvDir(), "bin", "pip");
}

function qwen3VlVenvExists(): boolean {
  return fs.existsSync(path.join(qwen3VlVenvDir(), "bin", "python3"));
}

function qwen3VlReadyMarker(): string {
  return path.join(qwen3VlVenvDir(), "qwen3vl_ready.marker");
}

async function ensureQwen3VlVenv(onStatus: (msg: string) => void, logFile: string): Promise<void> {
  if (fs.existsSync(qwen3VlReadyMarker())) {
    return;
  }

  if (!qwen3VlVenvExists()) {
    onStatus("Creating Qwen3-VL Python environment (Python 3.10)…");
    await runAndStream(PYTHON310, ["-m", "venv", qwen3VlVenvDir()], pluginRoot(), onStatus, logFile);
  }

  onStatus("Installing mlx-vlm for Qwen3-VL — this may take a few minutes…");
  await runAndStream(
    qwen3VlVenvPip(),
    ["install", "--upgrade", "pip"],
    pluginRoot(),
    onStatus,
    logFile
  );
  await runAndStream(
    qwen3VlVenvPip(),
    ["install", "pillow", "git+https://github.com/Blaizzy/mlx-vlm.git"],
    pluginRoot(),
    onStatus,
    logFile
  );

  fs.writeFileSync(qwen3VlReadyMarker(), new Date().toISOString());
  onStatus("Qwen3-VL environment ready");
}

function readPid(): number | null {
  try {
    const raw = fs.readFileSync(pidFilePath(), "utf-8").trim();
    const n = parseInt(raw, 10);
    return Number.isFinite(n) && n > 0 ? n : null;
  } catch {
    return null;
  }
}

function isProcessAlive(pid: number): boolean {
  try {
    process.kill(pid, 0);
    return true;
  } catch {
    return false;
  }
}

function fetchWithTimeout(url: string, options: RequestInit, timeoutMs: number): Promise<Response> {
  const controller = new AbortController();
  const timer = setTimeout(() => controller.abort(), timeoutMs);
  return fetch(url, { ...options, signal: controller.signal }).finally(() => clearTimeout(timer));
}

async function pollHealth(port: number, onAttempt: (n: number) => void): Promise<boolean> {
  const url = `http://127.0.0.1:${port}/health`;
  const deadline = Date.now() + HEALTH_POLL_TIMEOUT_MS;
  let attempt = 0;
  while (Date.now() < deadline) {
    attempt++;
    onAttempt(attempt);
    try {
      const res = await fetchWithTimeout(url, {}, HEALTH_FETCH_TIMEOUT_MS);
      if (res.ok) return true;
    } catch {
      // not ready yet
    }
    await new Promise<void>((r) => setTimeout(r, HEALTH_POLL_INTERVAL_MS));
  }
  return false;
}

export interface FastVLMServerConfig {
  port: number;
  /** Absolute path to the FastVLM model directory. Empty string = skip --model arg. */
  modelPath: string;
  /** Whether to load the FastVLM model on server start. */
  mlxVisionEnabled: boolean;
  /** Absolute path to the Florence-2 model directory. Empty string = Florence-2 not loaded. */
  florence2ModelPath?: string;
  /** Backend mode: 'ane' (CoreML+ANE) or 'mlx' (MLX-only). Default: 'mlx'. */
  backend?: string;
  /** Max tokens for generation. */
  maxTokens?: number;
  /** Sampling temperature. */
  temperature?: number;
  /** Object detection backend: 'florence2' or 'qwen3-vl'. Default: 'florence2'. */
  detectBackend?: string;
  /** Absolute path to the Qwen3-VL model directory. Required when detectBackend='qwen3-vl'. */
  qwen3VlModelPath?: string;
}

/**
 * Ensures the FastVLM server process is running and healthy.
 *
 * If a healthy server is already running (detected via PID file + /health),
 * returns immediately. Otherwise spawns a new detached process.
 *
 * @param config  Server parameters derived from plugin config
 * @param onStatus  Callback for streaming status messages (ctx.status)
 * @throws if the server fails to become healthy within the timeout
 */
export async function ensureFastvlmServerRunning(
  config: FastVLMServerConfig,
  onStatus: (msg: string) => void
): Promise<void> {
  const { port } = config;
  _activePort = port;

  // Adopt any healthy server already listening on the port — unless the backend differs.
  // The PID file may be stale (wrong PID) or absent, but the server can still be running.
  const requestedBackend = config.backend ?? "mlx";
  const requestedDetectBackend = config.detectBackend ?? "florence2";
  try {
    const res = await fetchWithTimeout(
      `http://127.0.0.1:${port}/health`,
      {},
      HEALTH_FETCH_TIMEOUT_MS
    );
    if (res.ok) {
      // Check if the running server's backend or detect_backend differs from what we need.
      let runningBackend: string | null = null;
      let runningDetectBackend: string | null = null;
      try {
        const statusRes = await fetchWithTimeout(
          `http://127.0.0.1:${port}/status`,
          {},
          HEALTH_FETCH_TIMEOUT_MS
        );
        if (statusRes.ok) {
          const statusJson = await statusRes.json() as { backend?: string; detect_backend?: string };
          runningBackend = statusJson.backend ?? null;
          runningDetectBackend = statusJson.detect_backend ?? null;
        }
      } catch {
        // /status unreachable — treat as matching to avoid unnecessary restart.
      }

      const backendMismatch = runningBackend !== null && runningBackend !== requestedBackend;
      const detectBackendMismatch = runningDetectBackend !== null && runningDetectBackend !== requestedDetectBackend;

      if (backendMismatch || detectBackendMismatch) {
        const isoNow = () => new Date().toISOString().replace(/\.\d+Z$/, "Z");
        fs.appendFileSync(logFilePath(), `[mgr] ${isoNow()} Backend mismatch: running=${runningBackend}/${runningDetectBackend}, requested=${requestedBackend}/${requestedDetectBackend} — restarting [${port}]\n`);
        onStatus("Restarting server (backend changed)…");
        await stopFastvlmServer(port);
        // Fall through to spawn with new backend.
      } else {
        if (requestedDetectBackend === "qwen3-vl") {
          const logFile = logFilePath();
          fs.mkdirSync(logsDir(), { recursive: true });
          await ensureQwen3VlVenv(onStatus, logFile);
        }
        const pid = readPid();
        fs.appendFileSync(logFilePath(), `[mgr] ${new Date().toISOString().replace(/\.\d+Z$/, "Z")} Adopted [${port}]${pid !== null ? ` — PID ${pid}` : ""}\n`);
        return;
      }
    }
  } catch {
    // Nothing healthy on the port — proceed to spawn.
  }

  // Kill any stale process recorded in the PID file so the port is free before spawn.
  const stalePid = readPid();
  if (stalePid !== null && isProcessAlive(stalePid)) {
    try { process.kill(stalePid, "SIGTERM"); } catch { /* already gone */ }
  }

  fs.mkdirSync(logsDir(), { recursive: true });

  const logFile = logFilePath();

  // MLX/FastVLM is macOS-only; force-disable on other platforms
  const effectiveConfig: FastVLMServerConfig = process.platform !== "darwin"
    ? { ...config, mlxVisionEnabled: false }
    : config;

  await ensureVenv(onStatus, logFile);

  if (effectiveConfig.detectBackend === "qwen3-vl") {
    await ensureQwen3VlVenv(onStatus, logFile);
  }

  const args: string[] = [
    "-m",
    "fastvlm_server",
    "--port",
    String(port),
    "--host",
    "127.0.0.1",
    "--pid-file",
    pidFilePath(),
  ];

  if (effectiveConfig.mlxVisionEnabled && effectiveConfig.modelPath.trim()) {
    args.push("--model", effectiveConfig.modelPath.trim());
  }

  const backend = effectiveConfig.backend ?? "mlx";
  args.push("--backend", backend);

  if (effectiveConfig.maxTokens !== undefined) {
    args.push("--max-tokens", String(effectiveConfig.maxTokens));
  }

  if (effectiveConfig.temperature !== undefined) {
    args.push("--temperature", String(effectiveConfig.temperature));
  }

  if (effectiveConfig.florence2ModelPath?.trim()) {
    args.push("--florence2-model-path", effectiveConfig.florence2ModelPath.trim());
  }

  if (effectiveConfig.detectBackend) {
    args.push("--detect-backend", effectiveConfig.detectBackend);
  }

  if (effectiveConfig.qwen3VlModelPath?.trim()) {
    args.push("--qwen3-vl-model-path", effectiveConfig.qwen3VlModelPath.trim());
  }

  // Plugin mode: always lazy-load models on first request
  args.push("--lazy");

  // Python server writes its own log via --log-file (FileHandler in setup_logging).
  // stdout/stderr are redirected to /dev/null by the bash intermediary below.
  args.push("--log-file", logFile);

  onStatus("Starting server…");

  const pythonPath = path.join(pluginRoot(), "src");
  const isoNow = () => new Date().toISOString().replace(/\.\d+Z$/, "Z");

  fs.appendFileSync(logFile, `[mgr] ${isoNow()} Starting [${port}] model=${effectiveConfig.modelPath.trim() || "(none)"}\n`);

  // TEST MODE: direct spawn without double-fork — to verify whether LM Studio
  // kills this process on plugin unload/reload. If the server survives without
  // double-fork, we can remove the bash intermediary permanently.
  // Revert this block to the double-fork variant once the test result is known.
  const child = spawn(venvPython(), args, {
    cwd: pluginRoot(),
    detached: true,
    stdio: "ignore",
    env: { ...process.env, PYTHONPATH: pythonPath },
  });

  child.unref();

  fs.appendFileSync(logFile, `[mgr] ${isoNow()} Spawned [${port}] via double-fork\n`);

  const ready = await pollHealth(port, (n) => {
    onStatus("Loading…");
  });

  if (!ready) {
    throw new Error(
      `FastVLM server did not become healthy within ${HEALTH_POLL_TIMEOUT_MS / 1000}s. ` +
        `Check logs: ${logFile}`
    );
  }

  const startedPid = readPid();
  fs.appendFileSync(logFile, `[mgr] ${isoNow()} Started [${port}]${startedPid !== null ? ` — PID ${startedPid}` : ""}\n`);

  onStatus("Server ready");
}

/**
 * Shuts down the server gracefully via /shutdown, falling back to SIGTERM.
 * Cleans up the PID file afterwards.
 */
export async function stopFastvlmServer(port: number): Promise<void> {
  const logFile = logFilePath();
  const isoNow = () => new Date().toISOString().replace(/\.\d+Z$/, "Z");
  const pid = readPid();
  if (pid !== null && isProcessAlive(pid)) {
    fs.appendFileSync(logFile, `[mgr] ${isoNow()} Stopping [${port}]...\n`);
    try {
      await fetchWithTimeout(
        `http://127.0.0.1:${port}/shutdown`,
        { method: "POST" },
        3_000
      );
    } catch {
      try {
        process.kill(pid, "SIGTERM");
      } catch {
        // process already gone
      }
    }
    fs.appendFileSync(logFile, `[mgr] ${isoNow()} Stopped [${port}]\n`);
  }
  try {
    fs.unlinkSync(pidFilePath());
  } catch {
    // already gone
  }
}

/**
 * Stops the server that was started in this process session.
 * Uses the port recorded by the last ensureFastvlmServerRunning call,
 * falling back to the default port 8765.
 */
export async function stopActiveFastvlmServer(): Promise<void> {
  await stopFastvlmServer(_activePort ?? 8765);
}
analyse-image