"""CLI entry point for FastVLM Server."""
from __future__ import annotations
import argparse
import logging
import os
import signal
import sys
from pathlib import Path

import uvicorn

from .config import ServerConfig, set_config


def setup_logging(log_level: str, log_file: str | None = None) -> None:
    level = getattr(logging, log_level.upper(), logging.INFO)
    handlers: list[logging.Handler] = [logging.StreamHandler(sys.stdout)]
    if log_file:
        log_path = Path(log_file)
        log_path.parent.mkdir(parents=True, exist_ok=True)
        handlers.append(logging.FileHandler(log_file))
    logging.basicConfig(
        level=level,
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
        handlers=handlers
    )


def write_pid_file(pid_file: str) -> None:
    pid_path = Path(pid_file)
    pid_path.parent.mkdir(parents=True, exist_ok=True)
    pid_path.write_text(str(os.getpid()))


def remove_pid_file(pid_file: str) -> None:
    try:
        Path(pid_file).unlink(missing_ok=True)
    except Exception as e:
        logging.getLogger(__name__).warning(f"Failed to remove pid file '{pid_file}': {e}")


def main() -> int:
    parser = argparse.ArgumentParser(
        description="FastVLM Sidecar Server - HTTP API for image analysis"
    )
    parser.add_argument(
        "--port", "-p",
        type=int,
        default=int(os.environ.get("FASTVLM_PORT", "8765")),
        help="HTTP port (default: 8765)"
    )
    parser.add_argument(
        "--model", "-m",
        type=str,
        default=os.environ.get("FASTVLM_MODEL", ""),
        required=False,
        help="Path to FastVLM model directory. Omit to run in detection-only mode."
    )
    parser.add_argument(
        "--host",
        type=str,
        default=os.environ.get("FASTVLM_HOST", "127.0.0.1"),
        help="Bind address (default: 127.0.0.1)"
    )
    parser.add_argument(
        "--pid-file",
        type=str,
        default=os.environ.get("FASTVLM_PID_FILE",
            str(Path(os.environ.get("FASTVLM_HOME", str(Path.home() / ".fastvlm"))) / "fastvlm.pid"))
    )
    parser.add_argument(
        "--log-file",
        type=str,
        default=os.environ.get("FASTVLM_LOG_FILE",
            str(Path(os.environ.get("FASTVLM_HOME", str(Path.home() / ".fastvlm"))) / "fastvlm.log"))
    )
    parser.add_argument(
        "--log-level",
        type=str,
        choices=["debug", "info", "warn", "error"],
        default="info"
    )
    parser.add_argument(
        "--max-tokens",
        type=int,
        default=int(os.environ.get("FASTVLM_MAX_TOKENS", "256")),
        help="Default max tokens for generation (default: 256)"
    )
    parser.add_argument(
        "--temperature",
        type=float,
        default=float(os.environ.get("FASTVLM_TEMPERATURE", "0.7")),
        help="Default temperature for generation (default: 0.7)"
    )
    parser.add_argument(
        "--backend",
        type=str,
        choices=["ane", "mlx"],
        default=os.environ.get("FASTVLM_BACKEND", "ane"),
        help="Backend mode: ane (CoreML/ANE) or mlx (MLX-only)"
    )
    parser.add_argument(
        "--florence2-model-path",
        type=str,
        default=os.environ.get("FASTVLM_FLORENCE2_MODEL_PATH", ""),
        help="Path to Florence-2 model directory (optional; preloaded at startup if provided)"
    )
    parser.add_argument(
        "--lazy",
        action="store_true",
        default=False,
        help="Lazy-load models on first request instead of at startup (for plugin/embedded mode)"
    )
    parser.add_argument(
        "--detect-backend",
        type=str,
        choices=["florence2", "qwen3-vl"],
        default=os.environ.get("FASTVLM_DETECT_BACKEND", "florence2"),
        help="Object detection backend: florence2 or qwen3-vl (default: florence2)"
    )
    parser.add_argument(
        "--qwen3-vl-model-path",
        type=str,
        default=os.environ.get("FASTVLM_QWEN3_VL_MODEL_PATH", ""),
        help="Path to Qwen3-VL model directory (required when --detect-backend=qwen3-vl)"
    )
    args = parser.parse_args()

    # Validate model path for local paths (not empty)
    if args.model.strip():
        model_path = Path(args.model).expanduser()
        if not model_path.exists():
            print(f"Error: Model path not found: {model_path}", file=sys.stderr)
            return 1

    config = ServerConfig(
        port=args.port,
        host=args.host,
        model_path=args.model,
        log_level=args.log_level,
        backend=args.backend,
        pid_file=args.pid_file,
        log_file=args.log_file,
        max_tokens=args.max_tokens,
        temperature=args.temperature,
        florence2_model_path=args.florence2_model_path,
        lazy=args.lazy,
        detect_backend=args.detect_backend,
        qwen3_vl_model_path=args.qwen3_vl_model_path,
    )
    set_config(config)

    setup_logging(config.log_level, config.log_file)
    logger = logging.getLogger(__name__)

    logger.info(f"FastVLM Server starting on {config.host}:{config.port}")
    logger.info(f"Model: {config.model_path or '(none)'}")
    logger.info(f"Defaults: max_tokens={config.max_tokens}, temperature={config.temperature}")
    logger.info(f"Backend: {config.resolved_backend}")
    if config.lazy:
        logger.info("Mode: lazy (models load on first request)")
    else:
        logger.info("Mode: eager (models load at startup)")
    if config.florence2_model_path:
        logger.info(f"Florence-2: {config.florence2_model_path}")
    else:
        logger.info("Florence-2: not configured (--florence2-model-path not set)")

    write_pid_file(config.pid_file)

    def cleanup(signum, frame):
        logger.info("Signal received, shutting down...")
        remove_pid_file(config.pid_file)
        sys.exit(0)

    signal.signal(signal.SIGINT, cleanup)
    signal.signal(signal.SIGTERM, cleanup)

    try:
        uvicorn.run(
            "fastvlm_server.server:app",
            host=config.host,
            port=config.port,
            log_level=config.log_level,
            loop="asyncio",
            http="h11"
        )
    except Exception as e:
        logger.error(f"Server error: {e}")
        return 1
    finally:
        remove_pid_file(config.pid_file)

    return 0


if __name__ == "__main__":
    sys.exit(main())



def write_pid_file(pid_file: str) -> None:
    pid_path = Path(pid_file)
    pid_path.parent.mkdir(parents=True, exist_ok=True)
    pid_path.write_text(str(os.getpid()))


def remove_pid_file(pid_file: str) -> None:
    try:
        Path(pid_file).unlink(missing_ok=True)
    except Exception:
        pass


def main() -> int:
    parser = argparse.ArgumentParser(
        description="FastVLM Sidecar Server - HTTP API for image analysis and Florence-2 detection (v3.1.0)",
        epilog="Example: python -m fastvlm_server --model apple/FastVLM-7B-int4 --port 8765"
    )
    parser.add_argument(
        "--port", "-p",
        type=int,
        default=int(os.environ.get("FASTVLM_PORT", "8765")),
        help="HTTP port (default: 8765)"
    )
    parser.add_argument(
        "--model", "-m",
        type=str,
        default=os.environ.get("FASTVLM_MODEL", ""),
        required=False,
        help="HuggingFace model ID (e.g., apple/FastVLM-7B-int4) or local path. "
             "Omit to run in detection-only mode (Florence-2 /detect endpoint only)."
    )
    parser.add_argument(
        "--host",
        type=str,
        default=os.environ.get("FASTVLM_HOST", "127.0.0.1"),
        help="Bind address (default: 127.0.0.1)"
    )
    parser.add_argument(
        "--backend",
        type=str,
        choices=["mlx", "pytorch", "auto"],
        default=os.environ.get("FASTVLM_BACKEND", "auto"),
        help="Backend: mlx (fast ~1s), pytorch (slow ~20s), auto (detect)"
    )
    parser.add_argument(
        "--pid-file",
        type=str,
        default=str(Path.home() / ".fastvlm" / "fastvlm.pid")
    )
    parser.add_argument(
        "--log-file",
        type=str,
        default=str(Path.home() / ".fastvlm" / "fastvlm.log")
    )
    parser.add_argument(
        "--log-level",
        type=str,
        choices=["debug", "info", "warn", "error"],
        default="info"
    )
    args = parser.parse_args()

    # Validate model path for local files (skip for HuggingFace IDs and empty path)
    if args.model.strip():
        is_hf_model = "/" in args.model and not args.model.startswith((".", "/", "~"))
        if not is_hf_model:
            model_path = Path(args.model).expanduser()
            if not model_path.exists():
                print(f"Error: Model path not found: {model_path}", file=sys.stderr)
                print(f"Hint: Use HuggingFace model ID like 'apple/FastVLM-7B-int4'", file=sys.stderr)
                return 1

    config = ServerConfig(
        port=args.port,
        host=args.host,
        model_path=args.model,
        backend=args.backend,
        log_level=args.log_level,
        pid_file=args.pid_file,
        log_file=args.log_file
    )
    set_config(config)

    setup_logging(config.log_level, config.log_file)
    logger = logging.getLogger(__name__)

    logger.info(f"FastVLM Server v3.0 (Native MLX-VLM) starting on {config.host}:{config.port}")
    logger.info(f"Model: {config.model_path}")
    logger.info(f"Backend: {config.resolved_backend}")
    
    if config.is_huggingface_model:
        logger.info("📦 Using HuggingFace model (will auto-download if needed)")

    write_pid_file(config.pid_file)

    def cleanup(signum, frame):
        logger.info("Signal received, shutting down...")
        remove_pid_file(config.pid_file)
        sys.exit(0)

    signal.signal(signal.SIGINT, cleanup)
    signal.signal(signal.SIGTERM, cleanup)

    try:
        uvicorn.run(
            "fastvlm_server.server:app",
            host=config.host,
            port=config.port,
            log_level=config.log_level
        )
    except Exception as e:
        logger.error(f"Server error: {e}")
        return 1
    finally:
        remove_pid_file(config.pid_file)

    return 0


if __name__ == "__main__":
    sys.exit(main())
analyse-image