Project Files
src / fastvlm_server / __main__.py
"""CLI entry point for FastVLM Server."""
from __future__ import annotations
import argparse
import logging
import os
import signal
import sys
from pathlib import Path
import uvicorn
from .config import ServerConfig, set_config
def setup_logging(log_level: str, log_file: str | None = None) -> None:
level = getattr(logging, log_level.upper(), logging.INFO)
handlers: list[logging.Handler] = [logging.StreamHandler(sys.stdout)]
if log_file:
log_path = Path(log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
handlers.append(logging.FileHandler(log_file))
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
handlers=handlers
)
def write_pid_file(pid_file: str) -> None:
pid_path = Path(pid_file)
pid_path.parent.mkdir(parents=True, exist_ok=True)
pid_path.write_text(str(os.getpid()))
def remove_pid_file(pid_file: str) -> None:
try:
Path(pid_file).unlink(missing_ok=True)
except Exception as e:
logging.getLogger(__name__).warning(f"Failed to remove pid file '{pid_file}': {e}")
def main() -> int:
parser = argparse.ArgumentParser(
description="FastVLM Sidecar Server - HTTP API for image analysis"
)
parser.add_argument(
"--port", "-p",
type=int,
default=int(os.environ.get("FASTVLM_PORT", "8765")),
help="HTTP port (default: 8765)"
)
parser.add_argument(
"--model", "-m",
type=str,
default=os.environ.get("FASTVLM_MODEL", ""),
required=False,
help="Path to FastVLM model directory. Omit to run in detection-only mode."
)
parser.add_argument(
"--host",
type=str,
default=os.environ.get("FASTVLM_HOST", "127.0.0.1"),
help="Bind address (default: 127.0.0.1)"
)
parser.add_argument(
"--pid-file",
type=str,
default=os.environ.get("FASTVLM_PID_FILE",
str(Path(os.environ.get("FASTVLM_HOME", str(Path.home() / ".fastvlm"))) / "fastvlm.pid"))
)
parser.add_argument(
"--log-file",
type=str,
default=os.environ.get("FASTVLM_LOG_FILE",
str(Path(os.environ.get("FASTVLM_HOME", str(Path.home() / ".fastvlm"))) / "fastvlm.log"))
)
parser.add_argument(
"--log-level",
type=str,
choices=["debug", "info", "warn", "error"],
default="info"
)
parser.add_argument(
"--max-tokens",
type=int,
default=int(os.environ.get("FASTVLM_MAX_TOKENS", "256")),
help="Default max tokens for generation (default: 256)"
)
parser.add_argument(
"--temperature",
type=float,
default=float(os.environ.get("FASTVLM_TEMPERATURE", "0.7")),
help="Default temperature for generation (default: 0.7)"
)
parser.add_argument(
"--backend",
type=str,
choices=["ane", "mlx"],
default=os.environ.get("FASTVLM_BACKEND", "ane"),
help="Backend mode: ane (CoreML/ANE) or mlx (MLX-only)"
)
parser.add_argument(
"--florence2-model-path",
type=str,
default=os.environ.get("FASTVLM_FLORENCE2_MODEL_PATH", ""),
help="Path to Florence-2 model directory (optional; preloaded at startup if provided)"
)
parser.add_argument(
"--lazy",
action="store_true",
default=False,
help="Lazy-load models on first request instead of at startup (for plugin/embedded mode)"
)
parser.add_argument(
"--detect-backend",
type=str,
choices=["florence2", "qwen3-vl"],
default=os.environ.get("FASTVLM_DETECT_BACKEND", "florence2"),
help="Object detection backend: florence2 or qwen3-vl (default: florence2)"
)
parser.add_argument(
"--qwen3-vl-model-path",
type=str,
default=os.environ.get("FASTVLM_QWEN3_VL_MODEL_PATH", ""),
help="Path to Qwen3-VL model directory (required when --detect-backend=qwen3-vl)"
)
args = parser.parse_args()
# Validate model path for local paths (not empty)
if args.model.strip():
model_path = Path(args.model).expanduser()
if not model_path.exists():
print(f"Error: Model path not found: {model_path}", file=sys.stderr)
return 1
config = ServerConfig(
port=args.port,
host=args.host,
model_path=args.model,
log_level=args.log_level,
backend=args.backend,
pid_file=args.pid_file,
log_file=args.log_file,
max_tokens=args.max_tokens,
temperature=args.temperature,
florence2_model_path=args.florence2_model_path,
lazy=args.lazy,
detect_backend=args.detect_backend,
qwen3_vl_model_path=args.qwen3_vl_model_path,
)
set_config(config)
setup_logging(config.log_level, config.log_file)
logger = logging.getLogger(__name__)
logger.info(f"FastVLM Server starting on {config.host}:{config.port}")
logger.info(f"Model: {config.model_path or '(none)'}")
logger.info(f"Defaults: max_tokens={config.max_tokens}, temperature={config.temperature}")
logger.info(f"Backend: {config.resolved_backend}")
if config.lazy:
logger.info("Mode: lazy (models load on first request)")
else:
logger.info("Mode: eager (models load at startup)")
if config.florence2_model_path:
logger.info(f"Florence-2: {config.florence2_model_path}")
else:
logger.info("Florence-2: not configured (--florence2-model-path not set)")
write_pid_file(config.pid_file)
def cleanup(signum, frame):
logger.info("Signal received, shutting down...")
remove_pid_file(config.pid_file)
sys.exit(0)
signal.signal(signal.SIGINT, cleanup)
signal.signal(signal.SIGTERM, cleanup)
try:
uvicorn.run(
"fastvlm_server.server:app",
host=config.host,
port=config.port,
log_level=config.log_level,
loop="asyncio",
http="h11"
)
except Exception as e:
logger.error(f"Server error: {e}")
return 1
finally:
remove_pid_file(config.pid_file)
return 0
if __name__ == "__main__":
sys.exit(main())
def write_pid_file(pid_file: str) -> None:
pid_path = Path(pid_file)
pid_path.parent.mkdir(parents=True, exist_ok=True)
pid_path.write_text(str(os.getpid()))
def remove_pid_file(pid_file: str) -> None:
try:
Path(pid_file).unlink(missing_ok=True)
except Exception:
pass
def main() -> int:
parser = argparse.ArgumentParser(
description="FastVLM Sidecar Server - HTTP API for image analysis and Florence-2 detection (v3.1.0)",
epilog="Example: python -m fastvlm_server --model apple/FastVLM-7B-int4 --port 8765"
)
parser.add_argument(
"--port", "-p",
type=int,
default=int(os.environ.get("FASTVLM_PORT", "8765")),
help="HTTP port (default: 8765)"
)
parser.add_argument(
"--model", "-m",
type=str,
default=os.environ.get("FASTVLM_MODEL", ""),
required=False,
help="HuggingFace model ID (e.g., apple/FastVLM-7B-int4) or local path. "
"Omit to run in detection-only mode (Florence-2 /detect endpoint only)."
)
parser.add_argument(
"--host",
type=str,
default=os.environ.get("FASTVLM_HOST", "127.0.0.1"),
help="Bind address (default: 127.0.0.1)"
)
parser.add_argument(
"--backend",
type=str,
choices=["mlx", "pytorch", "auto"],
default=os.environ.get("FASTVLM_BACKEND", "auto"),
help="Backend: mlx (fast ~1s), pytorch (slow ~20s), auto (detect)"
)
parser.add_argument(
"--pid-file",
type=str,
default=str(Path.home() / ".fastvlm" / "fastvlm.pid")
)
parser.add_argument(
"--log-file",
type=str,
default=str(Path.home() / ".fastvlm" / "fastvlm.log")
)
parser.add_argument(
"--log-level",
type=str,
choices=["debug", "info", "warn", "error"],
default="info"
)
args = parser.parse_args()
# Validate model path for local files (skip for HuggingFace IDs and empty path)
if args.model.strip():
is_hf_model = "/" in args.model and not args.model.startswith((".", "/", "~"))
if not is_hf_model:
model_path = Path(args.model).expanduser()
if not model_path.exists():
print(f"Error: Model path not found: {model_path}", file=sys.stderr)
print(f"Hint: Use HuggingFace model ID like 'apple/FastVLM-7B-int4'", file=sys.stderr)
return 1
config = ServerConfig(
port=args.port,
host=args.host,
model_path=args.model,
backend=args.backend,
log_level=args.log_level,
pid_file=args.pid_file,
log_file=args.log_file
)
set_config(config)
setup_logging(config.log_level, config.log_file)
logger = logging.getLogger(__name__)
logger.info(f"FastVLM Server v3.0 (Native MLX-VLM) starting on {config.host}:{config.port}")
logger.info(f"Model: {config.model_path}")
logger.info(f"Backend: {config.resolved_backend}")
if config.is_huggingface_model:
logger.info("📦 Using HuggingFace model (will auto-download if needed)")
write_pid_file(config.pid_file)
def cleanup(signum, frame):
logger.info("Signal received, shutting down...")
remove_pid_file(config.pid_file)
sys.exit(0)
signal.signal(signal.SIGINT, cleanup)
signal.signal(signal.SIGTERM, cleanup)
try:
uvicorn.run(
"fastvlm_server.server:app",
host=config.host,
port=config.port,
log_level=config.log_level
)
except Exception as e:
logger.error(f"Server error: {e}")
return 1
finally:
remove_pid_file(config.pid_file)
return 0
if __name__ == "__main__":
sys.exit(main())