Project Files
src / promotionPreprocessor.ts
import fs from "fs";
import path from "path";
// -------- data:image/...;base64,... --------
export function extractImageDataUrls(text: string): string[] {
const urls: string[] = [];
try {
const j = JSON.parse(text);
scanObjForDataUrls(j, urls);
} catch { /* not JSON */ }
const re = /data:image\/[a-zA-Z0-9.+-]+;base64,[A-Za-z0-9+/=\r\n]+/g;
let m: RegExpExecArray | null;
while ((m = re.exec(text)) !== null) urls.push(m[0]);
return dedupe(urls);
}
function scanObjForDataUrls(o: any, out: string[]) {
if (!o || typeof o !== "object") return;
const cands = [o.dataUri, o.dataURI, o.url, o.href, o.src];
cands.forEach(v => { if (typeof v === "string" && /^data:image\//i.test(v)) out.push(v); });
const b64 = o.base64 ?? o.b64 ?? o.data, mime = o.mime ?? o.mimeType ?? o.contentType;
if (typeof b64 === "string" && typeof mime === "string" && /^image\//i.test(mime))
out.push(`data:${mime};base64,${b64}`);
if (typeof o.markdown === "string") out.push(...extractImageDataUrls(o.markdown));
Object.values(o).forEach(v => Array.isArray(v) ? v.forEach(it => scanObjForDataUrls(it, out)) : scanObjForDataUrls(v, out));
}
// -------- http(s) links: markdown, [image: …], and bare URLs --------
export function extractHttpImageUrls(text: string): string[] {
const out: string[] = [];
try { const j = JSON.parse(text); scanObjForHttp(j, out); } catch {}
let m: RegExpExecArray | null;
const md = /!\[[^\]]*\]\((https?:[^\s)]+)\)/g; while ((m = md.exec(text)) !== null) out.push(m[1]);
const tag = /\[image:\s*(https?:[^\]\s]+)\s*]/gi; while ((m = tag.exec(text)) !== null) out.push(m[1]);
const bare = /(https?:\/\/[^\s)]+)(?=\s|$)/g; while ((m = bare.exec(text)) !== null) out.push(m[1]);
return dedupe(out);
}
function scanObjForHttp(o: any, out: string[]) {
if (!o || typeof o !== "object") return;
[o.url,o.href,o.src,o.imageUrl,o.imageURL,o.previewUrl,o.previewURL].forEach((v: any) => {
if (typeof v === "string" && /^https?:\/\//i.test(v)) out.push(v);
});
if (typeof o.markdown === "string") out.push(...extractHttpImageUrls(o.markdown));
Object.values(o).forEach(v => Array.isArray(v) ? v.forEach(it => scanObjForHttp(it, out)) : scanObjForHttp(v, out));
}
// -------- local chat copies saved in the chat directory, e.g., image-123.jpg --------
export function extractLocalImagePaths(text: string): string[] {
const out: string[] = [];
try { const j = JSON.parse(text); scanObjForLocal(j, out); } catch {}
let m: RegExpExecArray | null;
const mdRel = /!\[[^\]]*\]\(((?:\.\.?\:?\/)[^\s)]+)\)/g; while ((m = mdRel.exec(text)) !== null) out.push(m[1]);
const tagRel = /\[image:\s*((?:\.\.?\:?\/)[^\]\s]+)\s*]/gi; while ((m = tagRel.exec(text)) !== null) out.push(m[1]);
const bareRel = /(^|\s)((?:\.\.?\:?\/)[^\s)]+\.(?:png|jpe?g|gif|webp|bmp|svg))(?!\S)/gi; while ((m = bareRel.exec(text)) !== null) out.push(m[2]);
return dedupe(out);
}
function scanObjForLocal(o: any, out: string[]) {
if (!o || typeof o !== "object") return;
if (typeof o.markdown === "string") out.push(...extractLocalImagePaths(o.markdown));
const fileName = (o as any).fileName || (o as any).filename;
if (typeof fileName === "string" && /\.[a-z]+$/i.test(fileName)) out.push(path.posix.join('.', fileName));
[(o as any).url,(o as any).href,(o as any).src,(o as any).path].forEach((v: any) => { if (typeof v === "string" && /^(?:\.\.?\/)/.test(v)) out.push(v); });
Object.values(o).forEach(v => Array.isArray(v) ? v.forEach(it => scanObjForLocal(it, out)) : scanObjForLocal(v, out));
}
// -------- file:// absolute fallback --------
export function extractFileUriImagePaths(text: string): string[] {
const re = /file:\/\/[\S]+\.(?:png|jpe?g|gif|webp|bmp|svg)/gi;
const out = [...text.matchAll(re)].map(m => m[0]);
return dedupe(out);
}
const guessMime = (p: string) => {
const ext = path.extname(p).toLowerCase();
switch (ext) {
case ".jpg":
case ".jpeg": return "image/jpeg";
case ".png": return "image/png";
case ".webp": return "image/webp";
case ".gif": return "image/gif";
case ".bmp": return "image/bmp";
case ".svg": return "image/svg+xml";
default: return null;
}
};
export function toDataUrlFromLocal(rel: string, workingDir?: string): string | null {
try {
const abs = path.resolve(workingDir || process.cwd(), rel);
if (!fs.existsSync(abs)) return null;
const mime = guessMime(abs); if (!mime) return null;
return `data:${mime};base64,${fs.readFileSync(abs).toString("base64")}`;
} catch { return null; }
}
export function toDataUrlFromFileUri(uri: string): string | null {
try {
const p = decodeURIComponent(uri.replace(/^file:\/\//i, "").replace(/^(?!\/)/, "/"));
if (!fs.existsSync(p)) return null;
const mime = guessMime(p); if (!mime) return null;
return `data:${mime};base64,${fs.readFileSync(p).toString("base64")}`;
} catch { return null; }
}
// -------- Metadata (for unambiguous mapping) --------
export type ImageMeta = {
width?: number; height?: number; steps?: number; backend?: string;
mode_effective?: string; variants_used?: number; inference_time_ms?: number;
original?: string; previews?: string[];
};
export function extractImageMetaFromRaw(text: string): ImageMeta | null {
const metas: ImageMeta[] = [];
const collect = (o: any) => {
if (!o || typeof o !== "object") return;
const m: ImageMeta = {};
["width","height","steps","backend","mode_effective","variants_used","inference_time_ms"].forEach(k => {
const v = (o as any)[k];
if (k === "backend" || k === "mode_effective") { if (typeof v === "string") (m as any)[k] = v; }
else { if (typeof v === "number") (m as any)[k] = v; }
});
const files = (o as any).files;
if (files && typeof files === "object") {
if (typeof files.original === "string") (m as any).original = files.original;
if (Array.isArray(files.previews)) (m as any).previews = files.previews.filter((x: any) => typeof x === "string");
}
if (Object.keys(m).length) metas.push(m);
};
try {
const j = JSON.parse(text);
if (Array.isArray(j)) j.forEach(it => { collect(it); if ((it as any)?.text) { try { collect(JSON.parse((it as any).text)); } catch {} } });
else collect(j);
} catch {
const m = /\{\s*"width"\s*:\s*\d+[\s\S]*?\}/m.exec(text);
if (m) { try { collect(JSON.parse(m[0])); } catch {} }
}
return metas.length ? metas[metas.length - 1] : null;
}
// -------- Build labeled parts to push as synthetic user content --------
export function buildPromotionParts(raw: string, workingDir?: string): any[] {
const items: Array<{ url: string; kind: "local"|"file"|"data"|"http"; source?: string }> = [];
const seen = new Set<string>();
// 1) Prefer chat-local copies (dot-slash …) → data:
for (const p of extractLocalImagePaths(raw)) {
const d = toDataUrlFromLocal(p, workingDir);
if (d && !seen.has(d)) { seen.add(d); items.push({ url: d, kind: "local", source: p }); }
}
// 2) data: previews
for (const u of extractImageDataUrls(raw)) if (!seen.has(u)) { seen.add(u); items.push({ url: u, kind: "data" }); }
// 3) file:// absolute fallback
for (const f of extractFileUriImagePaths(raw)) {
const d = toDataUrlFromFileUri(f);
if (d && !seen.has(d)) { seen.add(d); items.push({ url: d, kind: "file", source: f }); }
}
// 4) http(s) links
for (const u of extractHttpImageUrls(raw)) if (!seen.has(u)) { seen.add(u); items.push({ url: u, kind: "http" }); }
if (!items.length) return [];
const parts: any[] = [];
const meta = extractImageMetaFromRaw(raw);
const previews = meta?.previews?.map(p => path.basename(p)) ?? [];
const original = meta?.original ? path.basename(meta.original) : undefined;
if (meta) {
const summary = [
meta.width && meta.height ? `size: ${meta.width}x${meta.height}` : undefined,
meta.steps ? `steps: ${meta.steps}` : undefined,
meta.backend ? `backend: ${meta.backend}` : undefined,
meta.mode_effective ? `mode: ${meta.mode_effective}` : undefined,
meta.variants_used ? `variants_used: ${meta.variants_used}` : undefined,
meta.inference_time_ms ? `inference_time_ms: ${meta.inference_time_ms}` : undefined,
].filter(Boolean).join(", ");
const filesLine = [
original ? `original: ${original}` : undefined,
previews.length ? `previews: [${previews.join(", ")}]` : undefined,
].filter(Boolean).join("; ");
parts.push({ type: "text", text: `Image metadata${summary ? ` (${summary})` : ""}${filesLine ? ` | ${filesLine}` : ""}` });
}
items.forEach((it, i) => {
const chatName = it.kind === "local" && it.source ? path.basename(it.source) : undefined;
const fileName = it.kind === "file" && it.source ? path.basename(decodeURIComponent(it.source.replace(/^file:\/\//i, "").replace(/^(?!\/)/, "/"))) : undefined;
const httpName = it.kind === "http" ? path.basename(new URL(it.url).pathname) : undefined;
const label = [
`Variant ${i + 1}`,
previews[i] ? `(preview: ${previews[i]})` : undefined,
chatName ? `(chat: ${chatName})` : undefined,
fileName ? `(file: ${fileName})` : undefined,
!previews[i] && httpName ? `(name: ${httpName})` : undefined,
].filter(Boolean).join(" ");
parts.push({ type: "text", text: label });
parts.push({ type: "image_url", image_url: { url: it.url } });
});
return parts;
}
const dedupe = <T,>(arr: T[]) => Array.from(new Set(arr));
// -------- Analysis JPEG discovery (latest run, preserve v1..v3 order) --------
const ISO_LIKE_RE = /^analysis-generated-image-([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{3}Z)-v([1-3])\.jpe?g$/i;
function parseIsoLike(s: string): number | null {
const m = /^([0-9]{4}-[0-9]{2}-[0-9]{2})T([0-9]{2})-([0-9]{2})-([0-9]{2})-([0-9]{3})Z$/.exec(s);
if (!m) return null;
const iso = `${m[1]}T${m[2]}:${m[3]}:${m[4]}.${m[5]}Z`;
const t = Date.parse(iso);
return Number.isNaN(t) ? null : t;
}
export async function findLatestAnalysisVariantFiles(workingDir: string, maxVariants = 3): Promise<string[]> {
try {
const names = await fs.promises.readdir(workingDir);
type Group = { iso: string; variants: Record<number, string>; ts: number };
const groups = new Map<string, Group>();
const statsPromises: Array<Promise<void>> = [];
for (const name of names) {
const m = ISO_LIKE_RE.exec(name);
if (!m) continue;
const iso = m[1];
const v = parseInt(m[2], 10);
let g = groups.get(iso);
if (!g) {
const ts = parseIsoLike(iso) ?? 0;
g = { iso, variants: {}, ts };
groups.set(iso, g);
if (ts === 0) {
// Fallback to file mtime if parsing fails
statsPromises.push((async () => {
try {
const st = await fs.promises.stat(path.join(workingDir, name));
g!.ts = Math.max(g!.ts, st.mtimeMs);
} catch { /* ignore */ }
})());
}
}
g.variants[v] = name;
}
await Promise.all(statsPromises);
if (!groups.size) return [];
const sorted = Array.from(groups.values()).sort((a, b) => b.ts - a.ts);
for (const g of sorted) {
const out: string[] = [];
for (let vi = 1; vi <= 3 && out.length < maxVariants; vi++) {
const fn = g.variants[vi];
if (fn) out.push(fn);
}
if (out.length > 0) return out; // return first non-empty latest group
}
return [];
} catch {
return [];
}
}