Project Files
src / documents / parsers / pngMetadataParser.ts
/**
* PNG Metadata Parser for Draw Things + ComfyUI Generated Images
* - Draw Things: Extract generation parameters from PNG XMP (exif:UserComment JSON)
* - ComfyUI: Extract generation graph from PNG text chunks (tEXt/iTXt/zTXt), typically keyword "prompt" (JSON)
*/
import { readFile } from 'fs/promises';
import { inflateSync } from 'zlib';
import type { ParsedDocument } from '../../types';
/**
* Draw Things PNG metadata structure (from exif:UserComment JSON)
*/
export interface DrawThingsMetadata {
c: string; // prompt (caption)
uc?: string; // negative prompt (uncondition)
model: string;
lora?: Array<{ model: string; weight: number }>;
sampler: string;
scale: number; // CFG scale
seed: number;
seed_mode?: string;
shift?: number;
size: string; // "768x1024"
steps: number;
strength?: number;
mask_blur?: number;
profile?: {
duration: number;
timings: Array<{ name: string; durations: number[] }>;
};
v2?: Record<string, any>; // Additional v2 parameters
}
/**
* Parsed image metadata
*/
export interface ParsedImageMetadata {
prompt: string;
negativePrompt?: string;
model: string;
loras?: Array<{ model: string; weight: number }>;
sampler: string;
cfgScale: number;
seed: number;
seedMode?: string;
width: number;
height: number;
steps: number;
strength?: number;
inferenceTime?: number;
rawType?: 'drawthings' | 'comfyui';
raw?: unknown;
}
export class PngMetadataParser {
/**
* Parse PNG file and extract Draw Things metadata
*/
static async parse(filePath: string): Promise<ParsedDocument> {
const metadata = await this.extractMetadata(filePath);
if (!metadata) {
return {
content: `Image: ${filePath}\n(No embedded metadata found)`,
metadata: {
format: 'png',
hasMetadata: false,
path: filePath,
},
};
}
const searchableText = this.createSearchableText(metadata, filePath);
return {
content: searchableText,
metadata: {
format: 'png',
hasMetadata: true,
path: filePath,
...metadata,
},
};
}
/**
* Extract metadata from PNG file
*/
static async extractMetadata(filePath: string): Promise<ParsedImageMetadata | null> {
try {
const buffer = await readFile(filePath);
// 1) Draw Things: Find XMP data in PNG chunks
const xmpData = this.extractXmpFromPng(buffer);
if (xmpData) {
// Parse the JSON from exif:UserComment
const jsonMatch = xmpData.match(/exif:UserComment[^{]*(\{[^]*?\})\s*<\/rdf:li>/);
if (jsonMatch) {
const jsonStr = this.unescapeXml(jsonMatch[1]);
const dtMeta = JSON.parse(jsonStr) as DrawThingsMetadata;
const converted = this.convertToMetadata(dtMeta);
converted.rawType = 'drawthings';
return converted;
}
// Try dc:description for basic info
const fallback = this.parseDescriptionFallback(xmpData, filePath);
if (fallback) {
fallback.rawType = 'drawthings';
return fallback;
}
}
// 2) ComfyUI: Parse PNG text chunks (prompt/workflow)
const textChunks = this.extractTextChunksFromPng(buffer);
const comfy = this.extractComfyUiFromTextChunks(textChunks);
if (comfy) return comfy;
return null;
} catch (e) {
console.warn(`Failed to extract metadata from ${filePath}:`, e);
return null;
}
}
private static extractComfyUiFromTextChunks(
chunks: Array<{ keyword: string; text: string }>
): ParsedImageMetadata | null {
const promptChunk = chunks.find(c => c.keyword === 'prompt');
if (!promptChunk) return null;
let graph: any;
try {
graph = JSON.parse(promptChunk.text);
} catch {
return null;
}
if (!graph || typeof graph !== 'object') return null;
const nodes: Array<any> = Object.values(graph);
// Prompts
const clipNodes = nodes.filter(n => typeof n?.class_type === 'string' && n.class_type.toLowerCase().includes('cliptextencode'));
let positivePrompt: string | undefined;
let negativePrompt: string | undefined;
for (const n of clipNodes) {
const title = (n?._meta?.title ?? '').toString();
const text = (n?.inputs?.text ?? '').toString();
if (!text) continue;
if (/negative/i.test(title)) {
negativePrompt ??= text;
} else if (/positive/i.test(title) || /prompt/i.test(title)) {
positivePrompt ??= text;
}
}
// Fallback: if we couldn't classify by title
if (!positivePrompt && clipNodes.length >= 1) {
const first = (clipNodes[0]?.inputs?.text ?? '').toString();
if (first) positivePrompt = first;
}
if (!negativePrompt && clipNodes.length >= 2) {
const second = (clipNodes[1]?.inputs?.text ?? '').toString();
if (second) negativePrompt = second;
}
// Sampler / steps / cfg / seed
// ComfyUI graphs vary widely; sampler selection, scheduler, noise and guidance are often separate nodes.
const samplerNode = nodes.find(n => {
const t = (n?.class_type ?? '').toString().toLowerCase();
return t.includes('ksampler');
});
const sampler = (
this.tryFirstString(samplerNode?.inputs, ['sampler_name', 'sampler', 'samplerName']) ?? 'unknown'
).toString();
let steps =
this.tryNumber(samplerNode?.inputs?.steps) ??
this.tryNumber(samplerNode?.inputs?.num_steps) ??
0;
let cfgScale =
this.tryNumber(samplerNode?.inputs?.cfg) ??
this.tryNumber(samplerNode?.inputs?.guidance) ??
1;
let seed =
this.tryNumber(samplerNode?.inputs?.seed) ??
this.tryNumber(samplerNode?.inputs?.noise_seed) ??
0;
const strength =
this.tryNumber(samplerNode?.inputs?.denoise) ??
this.tryNumber(samplerNode?.inputs?.strength);
// Fallback scanning: Scheduler nodes often hold steps; noise nodes hold seed; guidance nodes hold CFG/guidance.
if (!steps) {
const candidateSteps = this.scanAllNumbers(nodes, ['steps', 'num_steps']);
if (candidateSteps.length) {
steps = Math.max(...candidateSteps);
}
}
if (seed === 0) {
const candidateSeeds = this.scanAllNumbers(nodes, ['seed', 'noise_seed']);
if (candidateSeeds.length) {
seed = candidateSeeds[0];
}
}
if (cfgScale === 1) {
const candidateCfg = this.scanAllNumbers(nodes, ['cfg', 'guidance', 'guidance_scale']);
const first = candidateCfg.find(n => n !== 1);
if (typeof first === 'number') {
cfgScale = first;
}
}
// Resolution
const sizeNode = nodes.find(n => {
const w = this.tryNumber(n?.inputs?.width);
const h = this.tryNumber(n?.inputs?.height);
return typeof w === 'number' && typeof h === 'number' && w > 0 && h > 0;
});
const width = this.tryNumber(sizeNode?.inputs?.width) ?? 0;
const height = this.tryNumber(sizeNode?.inputs?.height) ?? 0;
// Model
const loader = nodes.find(n => {
const t = (n?.class_type ?? '').toString().toLowerCase();
return (
t.includes('checkpoint') ||
t.includes('loadcheckpoint') ||
t.includes('checkpointloader') ||
t.includes('unetloader') ||
t.includes('unet') ||
t.includes('loader')
);
});
const model =
this.tryFirstString(loader?.inputs, ['ckpt_name', 'checkpoint', 'model_name', 'unet_name', 'ckptName', 'name', 'ckpt']) ??
// Some graphs (e.g., FLUX) use UNETLoader + unet_name without checkpoints
this.tryFirstString(
nodes.find(n => (n?.class_type ?? '').toString().toLowerCase().includes('unetloader'))?.inputs,
['unet_name', 'model_name', 'name']
) ??
'unknown';
if (!positivePrompt && model === 'unknown' && sampler === 'unknown') {
// Avoid false positives on arbitrary JSON
return null;
}
return {
prompt: positivePrompt ?? '',
negativePrompt: negativePrompt || undefined,
model,
sampler,
cfgScale,
seed,
width,
height,
steps,
strength,
rawType: 'comfyui',
};
}
private static scanAllNumbers(nodes: Array<any>, keys: string[]): number[] {
const out: number[] = [];
for (const n of nodes) {
const inputs = n?.inputs;
if (!inputs || typeof inputs !== 'object') continue;
for (const k of keys) {
const v = this.tryNumber((inputs as any)[k]);
if (typeof v === 'number') out.push(v);
}
}
return out;
}
private static tryFirstString(obj: any, keys: string[]): string | undefined {
if (!obj || typeof obj !== 'object') return undefined;
for (const k of keys) {
const v = (obj as any)[k];
if (typeof v === 'string' && v.trim()) return v.trim();
}
return undefined;
}
private static tryNumber(v: unknown): number | undefined {
if (typeof v === 'number' && Number.isFinite(v)) return v;
if (typeof v === 'string' && v.trim()) {
const n = Number(v);
if (Number.isFinite(n)) return n;
}
return undefined;
}
/**
* Extract XMP data from PNG buffer
*/
private static extractXmpFromPng(buffer: Buffer): string | null {
// PNG signature: 89 50 4E 47 0D 0A 1A 0A
if (buffer.slice(0, 8).toString('hex') !== '89504e470d0a1a0a') {
return null;
}
let offset = 8;
while (offset < buffer.length) {
const length = buffer.readUInt32BE(offset);
const type = buffer.slice(offset + 4, offset + 8).toString('ascii');
if (type === 'iTXt' || type === 'tEXt' || type === 'zTXt') {
const chunkData = buffer.slice(offset + 8, offset + 8 + length);
const dataStr = chunkData.toString('utf-8');
// Look for XMP data
if (dataStr.includes('XML:com.adobe.xmp') || dataStr.includes('x:xmpmeta')) {
// Extract just the XML part
const xmlStart = dataStr.indexOf('<?xml') !== -1
? dataStr.indexOf('<?xml')
: dataStr.indexOf('<x:xmpmeta');
if (xmlStart !== -1) {
return dataStr.slice(xmlStart);
}
return dataStr;
}
}
// Move to next chunk: length + type (4) + data (length) + CRC (4)
offset += 12 + length;
}
return null;
}
/**
* Extract PNG text chunks (tEXt / iTXt / zTXt) into keyword/text pairs.
* ComfyUI commonly stores JSON in a tEXt chunk with keyword "prompt".
*/
private static extractTextChunksFromPng(buffer: Buffer): Array<{ keyword: string; text: string }> {
if (buffer.slice(0, 8).toString('hex') !== '89504e470d0a1a0a') {
return [];
}
const out: Array<{ keyword: string; text: string }> = [];
let offset = 8;
while (offset < buffer.length) {
const length = buffer.readUInt32BE(offset);
const type = buffer.slice(offset + 4, offset + 8).toString('ascii');
const chunkData = buffer.slice(offset + 8, offset + 8 + length);
try {
if (type === 'tEXt') {
const nul = chunkData.indexOf(0);
if (nul > 0) {
const keyword = chunkData.slice(0, nul).toString('latin1');
const text = chunkData.slice(nul + 1).toString('utf8');
out.push({ keyword, text });
}
} else if (type === 'zTXt') {
const nul = chunkData.indexOf(0);
if (nul > 0 && nul + 1 < chunkData.length) {
const keyword = chunkData.slice(0, nul).toString('latin1');
const compressionMethod = chunkData[nul + 1];
const compressed = chunkData.slice(nul + 2);
if (compressionMethod === 0) {
const text = inflateSync(compressed).toString('utf8');
out.push({ keyword, text });
}
}
} else if (type === 'iTXt') {
// keyword\0 compressionFlag compressionMethod languageTag\0 translatedKeyword\0 text
let i = 0;
const nul = chunkData.indexOf(0);
if (nul <= 0) {
// ignore
} else {
const keyword = chunkData.slice(0, nul).toString('latin1');
i = nul + 1;
if (i + 2 <= chunkData.length) {
const compressionFlag = chunkData[i];
const compressionMethod = chunkData[i + 1];
i += 2;
const langNul = chunkData.indexOf(0, i);
if (langNul === -1) throw new Error('invalid iTXt');
i = langNul + 1;
const transNul = chunkData.indexOf(0, i);
if (transNul === -1) throw new Error('invalid iTXt');
i = transNul + 1;
const payload = chunkData.slice(i);
let text: string;
if (compressionFlag === 1 && compressionMethod === 0) {
text = inflateSync(payload).toString('utf8');
} else {
text = payload.toString('utf8');
}
out.push({ keyword, text });
}
}
}
} catch {
// Ignore malformed chunk
}
offset += 12 + length;
}
return out;
}
/**
* Unescape XML entities
*/
private static unescapeXml(str: string): string {
return str
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/&/g, '&')
.replace(/"/g, '"')
.replace(/
/g, '\n')
.replace(/ /g, '\n');
}
/**
* Parse metadata from dc:description (fallback)
*/
private static parseDescriptionFallback(xmpData: string, filePath: string): ParsedImageMetadata | null {
const descMatch = xmpData.match(/<dc:description[^>]*>[\s\S]*?<rdf:li[^>]*>([^<]+)/);
if (!descMatch) return null;
const description = this.unescapeXml(descMatch[1]);
// Parse "Prompt\nSteps: 8, Sampler: UniPC..." format
const lines = description.split('\n');
const prompt = lines[0];
const params: Partial<ParsedImageMetadata> = {
prompt,
model: 'unknown',
sampler: 'unknown',
cfgScale: 1,
seed: 0,
width: 0,
height: 0,
steps: 0,
};
// Parse key-value pairs from remaining lines
const paramStr = lines.slice(1).join(', ');
const stepMatch = paramStr.match(/Steps:\s*(\d+)/i);
if (stepMatch) params.steps = parseInt(stepMatch[1]);
const samplerMatch = paramStr.match(/Sampler:\s*([^,]+)/i);
if (samplerMatch) params.sampler = samplerMatch[1].trim();
const cfgMatch = paramStr.match(/(?:Guidance Scale|CFG):\s*([\d.]+)/i);
if (cfgMatch) params.cfgScale = parseFloat(cfgMatch[1]);
const seedMatch = paramStr.match(/Seed:\s*(\d+)/i);
if (seedMatch) params.seed = parseInt(seedMatch[1]);
const sizeMatch = paramStr.match(/Size:\s*(\d+)x(\d+)/i);
if (sizeMatch) {
params.width = parseInt(sizeMatch[1]);
params.height = parseInt(sizeMatch[2]);
}
const modelMatch = paramStr.match(/Model:\s*([^,]+)/i);
if (modelMatch) params.model = modelMatch[1].trim();
// Parse LoRAs
const loraMatches = paramStr.matchAll(/LoRA\s*\d*\s*Model:\s*([^,]+),\s*LoRA\s*\d*\s*Weight:\s*([\d.]+)/gi);
const loras: Array<{ model: string; weight: number }> = [];
for (const match of loraMatches) {
loras.push({ model: match[1].trim(), weight: parseFloat(match[2]) });
}
if (loras.length) params.loras = loras;
return params as ParsedImageMetadata;
}
/**
* Convert Draw Things metadata to our format
*/
private static convertToMetadata(dt: DrawThingsMetadata): ParsedImageMetadata {
const [width, height] = (dt.size || '0x0').split('x').map(Number);
return {
prompt: dt.c,
negativePrompt: dt.uc || undefined,
model: dt.model,
loras: dt.lora,
sampler: dt.sampler,
cfgScale: dt.scale,
seed: dt.seed,
seedMode: dt.seed_mode,
width,
height,
steps: dt.steps,
strength: dt.strength,
inferenceTime: dt.profile?.duration,
raw: dt,
};
}
/**
* Create searchable text from metadata
*/
static createSearchableText(metadata: ParsedImageMetadata, filePath: string): string {
const parts: string[] = [
`Prompt: ${metadata.prompt}`,
];
if (metadata.negativePrompt) {
parts.push(`Negative Prompt: ${metadata.negativePrompt}`);
}
parts.push(`Model: ${metadata.model}`);
if (metadata.rawType === 'comfyui') {
parts.push('Source: ComfyUI');
} else if (metadata.rawType === 'drawthings') {
parts.push('Source: Draw Things');
}
if (metadata.loras?.length) {
const loraStr = metadata.loras
.map(l => `${l.model} (${(l.weight * 100).toFixed(0)}%)`)
.join(', ');
parts.push(`LoRAs: ${loraStr}`);
}
parts.push(`Sampler: ${metadata.sampler}`);
parts.push(`Steps: ${metadata.steps}, CFG: ${metadata.cfgScale}, Seed: ${metadata.seed}`);
parts.push(`Size: ${metadata.width}x${metadata.height}`);
if (metadata.inferenceTime) {
parts.push(`Inference Time: ${metadata.inferenceTime.toFixed(1)}s`);
}
parts.push(`Image: ${filePath}`);
return parts.join('\n');
}
/**
* Check if file has Draw Things metadata
*/
static async hasMetadata(filePath: string): Promise<boolean> {
const metadata = await this.extractMetadata(filePath);
return metadata !== null;
}
}