Project Files
src / config.ts
import { createConfigSchematics } from "@lmstudio/sdk";
export const globalConfigSchematics = createConfigSchematics()
.field(
"PREVIEW_IN_CHAT",
"boolean",
{
displayName: "Previews in Chat",
subtitle:
"When enabled, tool responses include inline image previews. Recommended for local models without vision capability.",
},
true
)
.field(
"mlxVisionEnabled",
"boolean",
{
displayName: "MLX Vision: Load Model",
subtitle:
"When enabled, the FastVLM model is loaded on server start. Disable if you do not use vision analysis.",
},
true
)
.field(
"mlxVisionBackend",
"boolean",
{
displayName: "FastVLM: CoreML Vision Backend",
subtitle:
"Off (default): MLX Metal GPU. On: CoreML CPU inference for vision — frees Metal GPU for language generation (requires fastvithd.mlpackage).",
engineDoesNotSupport: true,
},
false
)
.field(
"mlxVisionEndpoint",
"string",
{
displayName: "MLX Vision Endpoint",
subtitle:
"URL of the MLX Vision /analyze endpoint. Default: http://localhost:8765/analyze",
placeholder: "http://localhost:8765/analyze",
engineDoesNotSupport: true,
},
"http://localhost:8765/analyze"
)
.field(
"mlxVisionPrompt",
"string",
{
displayName: "Vision Prompt",
subtitle:
"Default prompt sent to the vision model. Leave empty to use the model default.",
placeholder: "",
isParagraph: true,
},
[
"Analyze this image based strictly on what is directly visible. Do not infer, assume, or complete information that is not present.",
"",
"STEP 1 — GROUND TRUTH (always required):",
'Before any detailed analysis, state in one sentence what the image actually shows (e.g., "This image shows a person", "This image shows a geometric shape", "This image shows a product on a plain background"). If the image does not contain a person, skip all person-specific sections below and describe only what is present.',
"",
"---",
"",
"IF AND ONLY IF a person is visible, describe:",
"",
"1. SUBJECT PHYSICAL CHARACTERISTICS:",
" - Face shape, skin tone, facial features (eyes, nose, lips, eyebrows) — only what is clearly visible",
" - Hair: color, length, style, texture, specific arrangement",
" - Visible age indicators and gender markers based on physical traits alone",
"",
"2. CLOTHING & DESIGN ELEMENTS — only if clothing is present:",
" - Garment type, colors, patterns, textures (only if present and visible)",
" - Specific design features (lines, shapes, geometric elements)",
" - Color palette with exact color names where possible",
"",
"3. COMPOSITION & FRAMING:",
" - What is included in the frame (head position, body coverage)",
" - Background characteristics and spatial relationships",
" - Lighting quality, direction, shadow patterns",
"",
"4. CULTURAL & ETHNIC INDICATORS:",
" - Specific facial features that suggest ethnic background",
" - Any visible cultural markers in clothing or styling",
" - Note only what is visually present, not inferred",
"",
"---",
"",
"IF no person is visible, describe only:",
"- Shape, form, color, texture, and spatial relationships of what is actually present",
"- Composition and framing as above",
"",
"---",
"",
"INTERPRETATIONS (separate section, always):",
"- Based solely on the observable facts above, note any stylistic intentions or design approaches suggested by the visual evidence",
"- Clearly distinguish between what IS seen and what CAN BE INFERRED",
"",
'Avoid vague terms like "beautiful," "modern," "gender-neutral" unless supported by specific visual evidence. Never describe content that is not present in the image.',
].join("\n")
)
.field(
"detectEndpoint",
"string",
{
displayName: "Florence-2 Detect Endpoint",
subtitle:
"URL of the Florence-2 /detect endpoint. Default: http://localhost:8765/detect",
placeholder: "http://localhost:8765/detect",
engineDoesNotSupport: true,
},
"http://localhost:8765/detect"
)
.field(
"mlxVisionModelPath",
"string",
{
displayName: "MLX Vision: Model Path",
subtitle:
"Absolute path to the FastVLM model directory (e.g. FastVLM-7B-int4). Required for Node-managed server mode.",
placeholder: "~/Documents/Models/FastVLM-7B-MLX",
},
""
)
.field(
"mlxVisionPort",
"numeric",
{
displayName: "MLX Vision: Port",
subtitle:
"Port for the local FastVLM server (shared with Florence-2 detect). Default: 8765.",
},
8765
)
.field(
"mlxVisionMaxTokens",
"numeric",
{
displayName: "MLX Vision: Max Tokens",
subtitle: "Maximum response length in tokens (1–4096). Default: 384.",
},
384
)
.field(
"mlxVisionTemperature",
"numeric",
{
displayName: "MLX Vision: Temperature",
subtitle: "Sampling temperature (0.0–2.0). Default: 0.7.",
},
0.7
)
.field(
"detectEnabled",
"boolean",
{
displayName: "Detection: Load Model",
subtitle:
"When enabled, the detection model is loaded on server start. Disable if object detection is not needed.",
},
true
)
.field(
"detectModelPath",
"string",
{
displayName: "Florence-2: Model Path",
subtitle:
"Absolute path to the Florence-2 model directory. Required for Node-managed server mode.",
placeholder: "~/Documents/Models/Florence-2-large",
},
""
)
.field(
"detectBackend",
"boolean",
{
displayName: "Detection Backend: Use Qwen3-VL",
subtitle:
"When enabled, Qwen3-VL is used for object detection instead of Florence-2. Requires Qwen3-VL Model Path below.",
engineDoesNotSupport: false,
},
false
)
.field(
"qwen3VlOdPrompt",
"string",
{
displayName: "Qwen3-VL: Object Detection Prompt",
subtitle:
"Instruction sent to Qwen3-VL for default object detection (task omitted or '<OD>'). Leave empty to use the built-in default.",
placeholder: "",
isParagraph: true,
engineDoesNotSupport: false,
},
[
"Detect objects in the image with strict hierarchical prioritization.",
"",
"PRIORITY 1 (CRITICAL - MUST DETECT FIRST):",
'- You MUST detect \"human face\" (highest priority if a person is present)',
'- You MUST detect \"person\" (if no face is clearly visible or if the person is the main subject)',
"",
"PRIORITY 2 (MAIN SUBJECT / HERO ELEMENT):",
"- The most visually prominent object or subject that is NOT part of the background.",
"- Use specific, concrete labels (e.g., 'red car', 'fluffy owl toy').",
"- Avoid generic terms like 'object' or 'thing'.",
"",
"PRIORITY 3 (CONTEXTUAL BACKGROUND ELEMENTS):",
"- Only detect background elements if they are significant to the scene composition OR if the main subject is interacting with them.",
"- Do not detect minor or redundant background details.",
"",
"PRIORITY 4 (FOCUSSED MAIN SUBJECT / HERO ELEMENT):",
"- All visible body parts (hands, feet, arms, legs).",
"- Elements of the face, as far as clearly detectable and focussed on close-ups: nose, mouth, left and right eyes, eyebrows and ears",
"- anatomical details, as far as recognizable as \\\"focussed\\\" or \\\"prominent\\\" (e.g., 'iris', 'pupil', 'eyelid')",
"",
"RULES:",
"- Maximum 16 objects total.",
"- Each bounding box must be unique and non-redundant.",
"- For clothing, name the specific garment (e.g., 'tank top', 'jeans').",
"- For body parts, qualify by position (e.g., 'left hand').",
"- NEVER prioritize background elements over the main subject or human face.",
"- NEVER prioritize anatomical details over general concepts unless they are solely focussed (e.g. only detect 'eyes' unless 'human face' is the dominant part of the image)",
"- If the main subject is a person, focus on the person and their immediate interactions. Ignore background elements unless they are directly involved in the interaction.",
].join("\n")
)
.field(
"qwen3VlModelPath",
"string",
{
displayName: "Qwen3-VL: Model Path",
subtitle:
"Absolute path to the Qwen3-VL MLX model directory (e.g. Qwen3-VL-8B-Instruct-MLX-4bit). Required when Detection Backend is set to Qwen3-VL.",
placeholder: "~/.lmstudio/models/lmstudio-community/Qwen3-VL-8B-Instruct-MLX-4bit",
engineDoesNotSupport: false,
},
""
)
.field(
"serverTTL",
"numeric",
{
displayName: "Server TTL (minutes)",
subtitle:
"Controls server lifetime. 0 = do not start server; N = offload model after N minutes of inactivity; 1440 = keep loaded until LM Studio exits.",
engineDoesNotSupport: true,
},
1440
)
.field(
"HTTP_SERVER_PORT",
"numeric",
{
displayName: "Local HTTP Server Port",
subtitle:
"Port for serving generated images over localhost (default: 54760).",
engineDoesNotSupport: true,
},
54760
)
.field(
"includeGenerationMetadata",
"boolean",
{
displayName: "Include Generation Metadata",
subtitle:
"When enabled, Draw Things generation parameters (prompt, model, sampler, seed, …) embedded in PNG files are appended to each analysis result.",
engineDoesNotSupport: false,
},
true
)
.build();
export const FLORENCE2_MODEL_PATH = process.env["FLORENCE2_MODEL_PATH"] ?? "";
export const DETECT_ENDPOINT =
process.env["DETECT_ENDPOINT"] ?? "http://localhost:8765/detect";