Project Files
src / media / videoAnalysis.ts
/**
* @file Video analysis tool — extracts evenly-spaced frames using ffmpeg (absolute path).
*
* Returns an array of low-resolution base64 frames for vision model analysis.
* Requires ffmpeg installed on the system (brew install ffmpeg on macOS).
*/
import { tool, type Tool, type ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";
import { mkdir, readFile, rm, stat } from "fs/promises";
import { extname, isAbsolute, join, resolve } from "path";
import { tmpdir } from "os";
import { randomBytes } from "crypto";
import { execFile } from "child_process";
import { promisify } from "util";
import { getFfmpegPath, getFfprobePath } from "./ffmpegPath";
const execFileAsync = promisify(execFile);
const SUPPORTED_VIDEO_EXTENSIONS = new Set([
".mp4", ".mov", ".avi", ".mkv", ".webm", ".m4v",
]);
const BLOCKED_PREFIXES = ["/etc", "/var", "/usr", "/System", "/Library", "/private"];
function isSafePath(p: string): boolean {
return !BLOCKED_PREFIXES.some(prefix => p.startsWith(prefix));
}
export function createVideoAnalysisTool(ctl: ToolsProviderController, configFrameCount: number = 4, configMaxDim: number = 384): Tool {
return tool({
name: "analyze_video",
description:
"Extract evenly-spaced frames from a local video file and return them as low-resolution base64 images. " +
"Supports MP4, MOV, AVI, MKV, WebM. " +
"Use this to describe video content, detect scenes, or analyze screen recordings.",
parameters: {
file_path: z.string().describe("Absolute path to the video file."),
frame_count: z
.number()
.int()
.min(1)
.max(10)
.optional()
.describe("Number of evenly-spaced frames to extract. Default: 4."),
max_dimension: z
.number()
.int()
.min(128)
.max(768)
.optional()
.describe("Max width or height of each frame in pixels. Default: 384."),
},
implementation: async (
{ file_path, frame_count, max_dimension }: { file_path: string; frame_count?: number; max_dimension?: number },
{ status, warn },
) => {
const numFrames = frame_count ?? configFrameCount;
const maxDim = Math.min(max_dimension ?? configMaxDim, 768);
const resolvedPath = isAbsolute(file_path) ? file_path : resolve(file_path);
if (!isSafePath(resolvedPath)) {
return { error: `Access denied: '${resolvedPath}' is in a protected system directory.` };
}
const ext = extname(resolvedPath).toLowerCase();
if (!SUPPORTED_VIDEO_EXTENSIONS.has(ext)) {
return {
error: `Unsupported video format '${ext}'. Supported: ${[...SUPPORTED_VIDEO_EXTENSIONS].join(", ")}`,
};
}
try {
const fileStat = await stat(resolvedPath);
if (!fileStat.isFile()) {
return { error: `Not a file: ${resolvedPath}` };
}
} catch {
return { error: `File not found: ${resolvedPath}` };
}
const tmpDir = join(tmpdir(), `maestro-video-${randomBytes(6).toString("hex")}`);
await mkdir(tmpDir, { recursive: true });
try {
const ffmpeg = await getFfmpegPath();
const ffprobe = await getFfprobePath();
// Step 1: Get video duration
status("Probing video metadata...");
let duration: number;
try {
const { stdout } = await execFileAsync(ffprobe, [
"-v", "quiet",
"-print_format", "json",
"-show_format",
resolvedPath,
]);
const probeData = JSON.parse(stdout);
duration = parseFloat(probeData?.format?.duration ?? "0");
} catch (err: any) {
return {
error: `Could not probe video: ${err?.message || String(err)}`,
};
}
if (duration <= 0) {
return { error: "Could not determine video duration." };
}
// Step 2: Calculate timestamps
const timestamps = Array.from({ length: numFrames }, (_, i) =>
(duration / (numFrames + 1)) * (i + 1),
);
// Step 3: Extract and resize frames directly with ffmpeg
status(`Extracting ${numFrames} frames from ${duration.toFixed(1)}s video...`);
const frames = await Promise.all(
timestamps.map(async (t, i) => {
const outputPath = join(tmpDir, `frame_${String(i).padStart(3, "0")}.jpg`);
try {
await execFileAsync(ffmpeg, [
"-ss", String(t),
"-i", resolvedPath,
"-frames:v", "1",
"-vf", `scale='min(${maxDim},iw)':'min(${maxDim},ih)':force_original_aspect_ratio=decrease`,
"-q:v", "8",
"-y",
outputPath,
]);
const buffer = await readFile(outputPath);
return {
index: i,
timestamp_s: Math.round(t * 10) / 10,
data_uri: `data:image/jpeg;base64,${buffer.toString("base64")}`,
bytes: buffer.byteLength,
};
} catch (err: any) {
warn(`Failed to extract frame at ${t.toFixed(1)}s: ${err?.message || String(err)}`);
return null;
}
}),
);
let validFrames = frames.filter((f): f is NonNullable<typeof f> => f !== null);
if (validFrames.length === 0) {
return { error: "Failed to extract any frames from the video." };
}
// Enforce total byte budget (600KB base64 ≈ ~800 tokens per frame)
const MAX_TOTAL_BYTES = 600_000;
const totalBytes = validFrames.reduce((sum, f) => sum + f.bytes, 0);
if (totalBytes > MAX_TOTAL_BYTES) {
// Re-extract all frames at lower quality and smaller size
const smallerDim = Math.round(maxDim * 0.6);
status(`Frames too large (${(totalBytes / 1024).toFixed(0)}KB), re-compressing at ${smallerDim}px...`);
const recompressed = await Promise.all(
timestamps.map(async (t, i) => {
const outputPath = join(tmpDir, `frame_small_${String(i).padStart(3, "0")}.jpg`);
try {
await execFileAsync(ffmpeg, [
"-ss", String(t),
"-i", resolvedPath,
"-frames:v", "1",
"-vf", `scale='min(${smallerDim},iw)':'min(${smallerDim},ih)':force_original_aspect_ratio=decrease`,
"-q:v", "12",
"-y",
outputPath,
]);
const buffer = await readFile(outputPath);
return {
index: i,
timestamp_s: Math.round(t * 10) / 10,
data_uri: `data:image/jpeg;base64,${buffer.toString("base64")}`,
bytes: buffer.byteLength,
};
} catch {
return null;
}
}),
);
const reValid = recompressed.filter((f): f is NonNullable<typeof f> => f !== null);
if (reValid.length > 0) validFrames = reValid;
}
status(`Done — ${validFrames.length} frames extracted`);
return {
file_path: resolvedPath,
duration_s: Math.round(duration * 10) / 10,
frame_count: validFrames.length,
max_dimension: maxDim,
frames: validFrames,
};
} catch (err: any) {
return {
error: `Failed to process video: ${err?.message || String(err)}`,
file_path: resolvedPath,
};
} finally {
await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
}
},
});
}