Project Files
src / extract.ts
import * as fs from "node:fs";
import * as fsp from "node:fs/promises";
import * as path from "node:path";
import exifr from "exifr";
import sizeOf from "image-size";
import piexif from "piexifjs";
import { flattenToMetadataList } from "./flatten";
import { parsePngTextAndZtxt } from "./pngText";
import { decodeCharacterCardFromPngChunks } from "./sillyTavernPng";
const IMAGE_SUFFIXES = new Set([
".jpg",
".jpeg",
".png",
".webp",
".tif",
".tiff",
".bmp",
".gif",
]);
const VIDEO_SUFFIXES = new Set([
".mp4",
".mov",
".mkv",
".avi",
".webm",
".m4v",
".wmv",
".mpg",
".mpeg",
".3gp",
]);
function serializeExifValue(value: unknown): unknown {
if (Buffer.isBuffer(value)) {
return value.toString("utf8");
}
if (value instanceof Uint8Array) {
return Buffer.from(value).toString("utf8");
}
if (Array.isArray(value)) return value.map(serializeExifValue);
if (value && typeof value === "object") {
const o = value as Record<string, unknown>;
if ("numerator" in o && "denominator" in o) {
const n = Number(o.numerator);
const d = Number(o.denominator);
if (Number.isFinite(n) && Number.isFinite(d) && d !== 0) return [n, d];
}
}
return value;
}
async function pillowLikeExif(filePath: string): Promise<Record<string, unknown>> {
const tags = await exifr.parse(filePath, { translateKeys: true, reviveValues: true });
if (!tags || typeof tags !== "object") return {};
const out: Record<string, unknown> = {};
for (const [k, v] of Object.entries(tags as Record<string, unknown>)) {
if (k === "errors" || k === "undefined") continue;
out[String(k)] = serializeExifValue(v);
}
return out;
}
function piexifDict(filePath: string): Record<string, unknown> | null {
try {
const binary = fs.readFileSync(filePath).toString("binary");
const raw = piexif.load(binary) as Record<string, unknown>;
const out: Record<string, unknown> = {};
for (const ifdName of Object.keys(raw)) {
if (ifdName === "thumbnail") continue;
const ifd = raw[ifdName];
if (!ifd || typeof ifd !== "object" || Array.isArray(ifd)) continue;
const section: Record<string, unknown> = {};
const tagTable = (piexif as { Tags?: Record<string, Record<number, { name: string }>> }).Tags?.[ifdName];
for (const [tag, val] of Object.entries(ifd as Record<string, unknown>)) {
const tagNum = Number(tag);
const tagName =
tagTable && Number.isFinite(tagNum) && tagTable[tagNum]?.name ? tagTable[tagNum]!.name : String(tag);
section[tagName] = serializeExifValue(val);
}
out[ifdName] = section;
}
return out;
} catch {
return null;
}
}
function sniffKind(filePath: string): "image" | "video" | null {
const suf = path.extname(filePath).toLowerCase();
if (IMAGE_SUFFIXES.has(suf)) return "image";
if (VIDEO_SUFFIXES.has(suf)) return "video";
return null;
}
async function guessMediaKind(filePath: string): Promise<"image" | "video"> {
const guessed = sniffKind(filePath);
if (guessed) return guessed;
try {
const buf = await fsp.readFile(filePath);
sizeOf(buf);
return "image";
} catch {
// ignore
}
try {
const t = await exifr.parse(filePath, true);
if (t && typeof t === "object") return "image";
} catch {
// ignore
}
return "video";
}
export async function extractMetadataDocument(
filePath: string,
includePiexif: boolean,
): Promise<Record<string, unknown>> {
const st = await fsp.stat(filePath);
if (!st.isFile()) throw new Error(`Not a file: ${filePath}`);
const kind = await guessMediaKind(filePath);
const extractedAt = new Date().toISOString();
const doc: Record<string, unknown> = {
schema_version: 1,
source_path: path.resolve(filePath),
media_kind: kind,
extracted_at: extractedAt,
};
if (kind === "image") {
const buf = await fsp.readFile(filePath);
let dims: { width?: number; height?: number; type?: string } = {};
try {
dims = sizeOf(buf) as { width?: number; height?: number; type?: string };
} catch {
dims = {};
}
const extFmt = path.extname(filePath).replace(".", "").toUpperCase();
const format = (dims.type?.toUpperCase() ?? (extFmt || null)) as string | null;
const imageBlock: Record<string, unknown> = {
format,
mode: null,
size: [dims.width ?? null, dims.height ?? null],
exif_pillow: await pillowLikeExif(filePath),
};
if (format === "PNG" || path.extname(filePath).toLowerCase() === ".png") {
const pngTextChunks = parsePngTextAndZtxt(filePath);
imageBlock.png_text_chunks = pngTextChunks;
const card = decodeCharacterCardFromPngChunks(pngTextChunks);
if (card) {
if ("error" in card) {
imageBlock.character_card = { error: card.error };
} else {
imageBlock.character_card = {
format: card.format,
keyword: card.keyword,
json: card.json,
};
}
}
}
doc.image = imageBlock;
const lower = path.extname(filePath).toLowerCase();
if (includePiexif && (lower === ".jpg" || lower === ".jpeg")) {
doc.piexif = piexifDict(filePath);
}
} else {
doc.ffprobe = {
unsupported: true,
reason:
"This LM Studio plugin build does not invoke ffprobe. Use the Python exif-sniffer MCP server for full ffprobe JSON.",
};
}
return doc;
}
export async function extractMetadataList(filePath: string, includePiexif: boolean) {
const doc = await extractMetadataDocument(filePath, includePiexif);
return flattenToMetadataList(doc);
}