Project Files
src / promotion.ts
import fs from "fs";
import path from "path";
import os from "os";
import { type Chat, type GeneratorController } from "@lmstudio/sdk";
import { findLastAttachmentFromConversation } from "./attachments";
import { readChatMediaState, recordVariantsProvision, type ChatMediaState, writeChatMediaStateAtomic } from "./chat-media-state";
import { fileUriToPath, copyFile, encodeJpegFromBuffer, replacePrefixGeneratedToAnalysis } from "./image";
// ============================================================================
// Types
// ============================================================================
export type BuildVisionPromotionPartsParams = {
ctl: GeneratorController;
history: Chat;
apiKey: string;
chatWd: string;
debugChunks: boolean;
shouldUseFilesApi: boolean; // true => Mode C (GCS), false => Mode B (Base64)
model: string;
showOnlyLastImageVariant?: boolean;
};
export type PromotionItem = {
abs: string; // Absolute path to original file
previewAbs: string; // Absolute path to preview file
label?: string; // Human-readable label for model context
};
// ============================================================================
// NEW: buildPromotionItems - based on standalone_generator_guide/promotion.ts
// ============================================================================
/**
* Build a list of items to inject as base64 into the model context.
* Uses stable n-field for labels, NOT array index!
*
* @param chatWd - Chat working directory
* @param state - Current media state
* @param options - Configuration options
* @returns Array of PromotionItem with abs, previewAbs, and label
*/
export function buildPromotionItems(
chatWd: string,
state: ChatMediaState,
{
labels = true,
maxAttachmentItems = 2,
maxVariantItems = 3,
}: {
labels?: boolean;
maxAttachmentItems?: number;
maxVariantItems?: number;
} = {}
): PromotionItem[] {
const items: PromotionItem[] = [];
const attachments = Array.isArray(state.attachments) ? state.attachments : [];
// Sort by n-value (ascending) to ensure chronological order for Rolling Window
const sortedAttachments = [...attachments].sort((a, b) => (a.n ?? 0) - (b.n ?? 0));
const cap = Math.max(0, Math.floor(maxAttachmentItems));
// Take the LAST N (highest n-values = most recent attachments)
const cappedAttachments = cap > 0 ? sortedAttachments.slice(-cap) : [];
for (const a of cappedAttachments) {
if (!a) continue;
// CRITICAL: Use stable n-field, NOT sequential index!
const stableN = typeof a.n === "number" ? a.n : 0;
// originAbs is the source (no copies!)
const abs = typeof a.originAbs === "string" && a.originAbs ? a.originAbs : "";
const pRel = typeof a.preview === "string" && a.preview ? a.preview : "";
const pAbs = pRel ? path.join(chatWd, pRel) : "";
if (!abs) {
throw new Error(`Attachment a${stableN} is missing originAbs (origin=${a.origin})`);
}
if (!pAbs) {
throw new Error(`Attachment a${stableN} is missing preview (origin=${a.origin}, originAbs=${a.originAbs})`);
}
// Build label with stable n and originalName
let label: string | undefined;
if (labels) {
const originalName = a.originalName || a.origin || `attachment-${stableN}`;
label = `Attachment [a${stableN}] ${originalName}`;
}
items.push({ abs, previewAbs: pAbs, label });
}
// Variants
const variants = [...(state.variants || [])];
variants.sort((x, y) => x.v - y.v || x.createdAt.localeCompare(y.createdAt));
const cappedVariants = variants.slice(0, Math.max(0, Math.floor(maxVariantItems)));
for (const v of cappedVariants) {
const abs = path.join(chatWd, v.filename);
const pAbs = path.join(chatWd, v.preview);
items.push({
abs,
previewAbs: pAbs,
label: labels ? `Generated Image [v${v.v}]` : undefined,
});
}
return items;
}
/**
* Convert promotion items to Gemini inlineData parts (base64)
*/
export async function toGeminiInlineDataParts(items: PromotionItem[]): Promise<Array<{ text?: string; inlineData?: { data: string; mimeType: string } }>> {
const parts: Array<{ text?: string; inlineData?: { data: string; mimeType: string } }> = [];
const guessMime = (fn: string) => (
/\.jpe?g$/i.test(fn) ? "image/jpeg"
: /\.png$/i.test(fn) ? "image/png"
: /\.webp$/i.test(fn) ? "image/webp"
: /\.gif$/i.test(fn) ? "image/gif"
: "image/jpeg"
);
for (const it of items) {
if (it.label) {
parts.push({ text: it.label });
}
const buf = await fs.promises.readFile(it.previewAbs);
const mime = guessMime(it.previewAbs);
parts.push({ inlineData: { data: buf.toString("base64"), mimeType: mime } });
}
return parts;
}
/**
* Check if vision promotion should happen based on idempotency tracking
*/
export function shouldPromoteImages(
state: ChatMediaState,
persistentMode: boolean
): { shouldPromoteAttachment: boolean; shouldPromoteVariants: boolean } {
const attachments = Array.isArray(state.attachments) ? state.attachments : [];
const variants = Array.isArray(state.variants) ? state.variants : [];
const attachmentMaxN = attachments.length > 0
? Math.max(0, ...attachments.map((a) => a.n ?? 0))
: 0;
const lastPromotedN = state.lastPromotedAttachmentN ?? 0;
// Extract timestamp group for idempotency.
// Prefer explicit state.lastVariantsTs (tool-generated variants may not keep originals in chatWd).
let currentVariantsTs: string | undefined = state.lastVariantsTs;
if (!currentVariantsTs && variants.length > 0) {
for (const v of variants) {
const m = /^generated-image-(.+)-v\d+\.\w+$/i.exec(v.filename);
if (m) { currentVariantsTs = m[1]; break; }
}
}
const lastPromotedTs = state.lastPromotedTs;
if (persistentMode) {
// In persistent mode: always promote if there's something to promote
return {
shouldPromoteAttachment: attachments.length > 0,
shouldPromoteVariants: variants.length > 0,
};
}
// Idempotent mode: only promote if new
return {
shouldPromoteAttachment: attachmentMaxN > 0 && attachmentMaxN !== lastPromotedN,
shouldPromoteVariants: currentVariantsTs !== undefined && currentVariantsTs !== lastPromotedTs,
};
}
/**
* Update idempotency tracking after promotion
*/
export async function markAsPromoted(
chatWd: string,
state: ChatMediaState,
promotedAttachment: boolean,
promotedVariants: boolean
): Promise<void> {
let changed = false;
if (promotedAttachment) {
const attachments = Array.isArray(state.attachments) ? state.attachments : [];
const maxN = attachments.length > 0 ? Math.max(0, ...attachments.map((a) => a.n ?? 0)) : 0;
if (maxN > 0) {
state.lastPromotedAttachmentN = maxN;
changed = true;
}
}
if (promotedVariants) {
// Prefer explicit lastVariantsTs (set by tool harvesting pipeline)
if (typeof state.lastVariantsTs === "string" && state.lastVariantsTs) {
state.lastPromotedTs = state.lastVariantsTs;
changed = true;
} else {
const variants = Array.isArray(state.variants) ? state.variants : [];
if (variants.length > 0) {
const m = /^generated-image-(.+)-v\d+\.\w+$/i.exec(variants[0].filename);
if (m) {
state.lastPromotedTs = m[1];
changed = true;
}
}
}
}
if (changed) {
await writeChatMediaStateAtomic(chatWd, state);
}
}
// ============================================================================
// Tool-generated variants harvesting (NO copies, NO preview generation)
// ============================================================================
function parseGeneratedTsFromBasename(base: string): string | undefined {
const m = /^generated-image-(.+)-v\d+\.\w+$/i.exec(base);
return m ? m[1] : undefined;
}
type ToolVariantHarvestResult = {
changedState: boolean;
injectedMarkdown: boolean;
source: "tool" | "assistant" | "none";
reason: string;
foundVariants: number;
recordedVariants: number;
};
/**
* Harvest tool-generated variants from the latest tool message and record them into chat_media_state.json.
* - Uses preview JPEGs already written to chatWd by the tool (e.g. image-<epoch>-1.jpg)
* - Uses original abs path from tool text lines (e.g. Original v1: file:///.../generated-image-...-v1.png)
* - Does NOT create previews and does NOT copy originals into chatWd.
* - Optionally injects the provided markdown into the chat output (idempotent via state.injectedMarkdown).
*/
export async function harvestToolGeneratedVariantsFromLatestToolMessage(
ctl: GeneratorController | null,
history: Chat,
chatWd: string,
debug = false,
): Promise<ToolVariantHarvestResult> {
const previewByV = new Map<number, string>();
const markdownByV = new Map<number, string>();
const originAbsByV = new Map<number, string>();
let lastVariantsTs: string | undefined;
const parseVFromBasename = (bn: string): number | undefined => {
const m = /-(\d+)\.(?:jpe?g|png|webp|gif|bmp|tiff?|heic)$/i.exec(bn);
if (!m) return undefined;
const n = parseInt(m[1], 10);
return Number.isFinite(n) && n > 0 ? n : undefined;
};
const extractPreviewBasenameFromMarkdown = (md: string): string | null => {
try {
const m = /\(\.\/([^\)\s]+\.(?:png|jpe?g|webp|gif|bmp|tiff?|heic))\)/i.exec(md);
return m ? m[1] : null;
} catch {
return null;
}
};
let lastToolMsg: any | undefined;
for (const msg of (Array.from(history as any).reverse() as any[])) {
if ((msg as any)?.getRole?.() === "tool") { lastToolMsg = msg; break; }
}
const lastAssistantText = () => {
try {
for (const msg of (Array.from(history as any).reverse() as any[])) {
if ((msg as any)?.getRole?.() !== "assistant") continue;
const t = (msg as any)?.getText?.();
if (typeof t === "string" && t.trim()) return t;
}
} catch { }
return "";
};
const parseAssistantForVariants = (text: string) => {
if (!text || typeof text !== "string") return;
// Extract ./image-....-N.jpg from markdown
const reMd = /!\[[^\]]*\]\(\.\/([^\)\s]+\.(?:png|jpe?g|webp|gif|bmp|tiff?|heic))\)/gi;
let m: RegExpExecArray | null;
while ((m = reMd.exec(text)) !== null) {
const bn = m[1];
const vNum = parseVFromBasename(bn);
if (vNum) {
previewByV.set(vNum, bn);
markdownByV.set(vNum, m[0]);
}
}
// Extract Preview/Original lines
const lines = text.split(/\r?\n/);
for (const line of lines) {
const t = line.trim();
const mPrev = /^Preview v(\d+):\s*(?:\.\/)?([^\s]+\.(?:png|jpe?g|webp|gif|bmp|tiff?|heic))$/i.exec(t);
if (mPrev) {
const vNum = parseInt(mPrev[1], 10);
if (Number.isFinite(vNum) && vNum > 0) previewByV.set(vNum, mPrev[2]);
}
const mOrig = /^Original v(\d+):\s*(file:\/\/\S+)$/i.exec(t);
if (mOrig) {
const vNum = parseInt(mOrig[1], 10);
const abs = fileUriToPath(mOrig[2]);
if (abs) {
originAbsByV.set(vNum, abs);
const ts = parseGeneratedTsFromBasename(path.basename(abs));
if (ts) lastVariantsTs = ts;
}
}
}
};
if (!lastToolMsg || typeof lastToolMsg.getToolCallResults !== "function") {
// Deterministic secondary source: assistant message text.
const aText = lastAssistantText();
if (!aText) {
return { changedState: false, injectedMarkdown: false, source: "none", reason: "no-tool-message-and-no-assistant-text", foundVariants: 0, recordedVariants: 0 };
}
parseAssistantForVariants(aText);
// Continue into recording flow below.
}
const results = lastToolMsg && typeof lastToolMsg.getToolCallResults === "function"
? (lastToolMsg.getToolCallResults() as any[])
: [];
const considerArrayPayload = (arr: any[]) => {
let fallbackV = 0;
for (const it of arr) {
if (!it || typeof it !== "object") continue;
if (it.type === "image" && typeof it.fileName === "string") {
// Prefer explicit v from suffix "-N.ext" in the preview filename
let vNum: number | undefined;
const m = /-(\d+)\.(?:jpe?g|png|webp|gif|bmp)$/i.exec(it.fileName);
if (m) vNum = parseInt(m[1], 10);
if (!vNum || Number.isNaN(vNum)) vNum = ++fallbackV;
previewByV.set(vNum, it.fileName);
if (typeof it.markdown === "string") markdownByV.set(vNum, it.markdown);
continue;
}
// Some tools only provide markdown in image objects.
if (it.type === "image" && typeof it.markdown === "string") {
const bn = extractPreviewBasenameFromMarkdown(it.markdown);
if (bn) {
const vNum = parseVFromBasename(bn) ?? ++fallbackV;
previewByV.set(vNum, bn);
markdownByV.set(vNum, it.markdown);
}
continue;
}
if (it.type === "text" && typeof it.text === "string") {
const t = it.text.trim();
const mPrev = /^Preview v(\d+):\s*(?:\.\/)?([^\s]+\.(?:png|jpe?g|webp|gif|bmp|tiff?|heic))$/i.exec(t);
if (mPrev) {
const vNum = parseInt(mPrev[1], 10);
if (Number.isFinite(vNum) && vNum > 0) {
previewByV.set(vNum, mPrev[2]);
}
}
const mOrig = /^Original v(\d+):\s*(file:\/\/\S+)$/i.exec(t);
if (mOrig) {
const vNum = parseInt(mOrig[1], 10);
const abs = fileUriToPath(mOrig[2]);
if (abs) {
originAbsByV.set(vNum, abs);
const ts = parseGeneratedTsFromBasename(path.basename(abs));
if (ts) lastVariantsTs = ts;
}
}
// If the tool gave us markdown inline in a text entry, capture it too.
const mdBn = extractPreviewBasenameFromMarkdown(t);
if (mdBn) {
const vNum = parseVFromBasename(mdBn) ?? ++fallbackV;
previewByV.set(vNum, mdBn);
// Store the full markdown string if it looks like markdown
if (t.includes(") markdownByV.set(vNum, t);
}
}
}
};
const extractArrayPayloads = (payload: any): any[][] => {
const out: any[][] = [];
const walk = (v: any) => {
if (!v) return;
if (Array.isArray(v)) {
out.push(v);
return;
}
if (typeof v === "string") {
const s = v.trim();
if (s.startsWith("[") && s.endsWith("]")) {
try {
const parsed = JSON.parse(s);
if (Array.isArray(parsed)) out.push(parsed);
} catch { /* ignore */ }
}
return;
}
if (typeof v !== "object") return;
const obj = v as any;
const candidates = [
obj.content,
obj.result,
obj.output,
obj.data,
obj.items,
obj.response,
];
for (const c of candidates) {
if (Array.isArray(c)) out.push(c);
else if (typeof c === "string") {
const s = c.trim();
if (s.startsWith("[") && s.endsWith("]")) {
try {
const parsed = JSON.parse(s);
if (Array.isArray(parsed)) out.push(parsed);
} catch { /* ignore */ }
}
}
}
};
walk(payload);
return out;
};
// Full DFS to catch shapes like: { content: [ { type:"toolCallResult", content:"[ ... ]" } ] }
const collectEmbeddedToolItemArrays = (payload: any): any[][] => {
const out: any[][] = [];
const seen = new Set<any>();
const visit = (v: any) => {
if (v === null || v === undefined) return;
if (seen.has(v)) return;
if (typeof v === "object") seen.add(v);
if (Array.isArray(v)) {
out.push(v);
for (const it of v) visit(it);
return;
}
if (typeof v === "string") {
const s = v.trim();
if (s.startsWith("[") && s.endsWith("]")) {
try {
const parsed = JSON.parse(s);
if (Array.isArray(parsed)) out.push(parsed);
} catch { /* ignore */ }
}
return;
}
if (typeof v !== "object") return;
for (const val of Object.values(v as any)) visit(val);
};
visit(payload);
return out;
};
if (Array.isArray(results) && results.length > 0) {
for (const r of results) {
let payload: any = r?.content ?? r?.result ?? r?.output ?? null;
if (typeof payload === "string") {
try { payload = JSON.parse(payload); } catch { /* ignore */ }
}
if (Array.isArray(payload)) {
considerArrayPayload(payload);
} else {
for (const arr of extractArrayPayloads(payload)) considerArrayPayload(arr);
for (const arr of collectEmbeddedToolItemArrays(payload)) considerArrayPayload(arr);
}
}
}
if (previewByV.size === 0 && originAbsByV.size === 0) {
const hasTool = Array.isArray(results) && results.length > 0;
return {
changedState: false,
injectedMarkdown: false,
source: hasTool ? "tool" : "assistant",
reason: "no-variant-markers-found",
foundVariants: 0,
recordedVariants: 0,
};
}
const vNums = Array.from(new Set([...previewByV.keys(), ...originAbsByV.keys()]))
.filter((n) => typeof n === "number" && n > 0)
.sort((a, b) => a - b)
.slice(0, 3);
const nowIso = new Date().toISOString();
const harvested = vNums.map((vNum) => {
const originAbs = originAbsByV.get(vNum);
const preview = previewByV.get(vNum);
if (!preview) {
// No preview means we cannot display/promote in Base64 mode.
// Keep state untouched (strict) rather than inventing a preview.
return null;
}
return {
v: vNum,
filename: originAbs ? path.basename(originAbs) : preview,
preview,
originAbs,
createdAt: nowIso,
};
}).filter(Boolean) as Array<{ v: number; filename: string; preview: string; originAbs?: string; createdAt: string }>;
if (harvested.length === 0) {
const hasTool = Array.isArray(results) && results.length > 0;
return { changedState: false, injectedMarkdown: false, source: hasTool ? "tool" : "assistant", reason: "no-preview-for-any-variant", foundVariants: vNums.length, recordedVariants: 0 };
}
const state = await readChatMediaState(chatWd);
const injectedSet = new Set<string>(Array.isArray(state.injectedMarkdown) ? state.injectedMarkdown : []);
let injected = false;
if (ctl) {
for (const v of harvested) {
if (injectedSet.has(v.preview)) continue;
const md = markdownByV.get(v.v) || ``;
ctl.fragmentGenerated(`\n\n${md}\n\n`);
injectedSet.add(v.preview);
injected = true;
}
}
const nextState: ChatMediaState = {
...state,
variants: harvested,
injectedMarkdown: injectedSet.size ? Array.from(injectedSet) : undefined,
lastVariantsTs: lastVariantsTs ?? state.lastVariantsTs,
lastEvent: { type: "variants", at: nowIso },
counters: { ...(state.counters ?? {}), nextVariantV: harvested.length + 1 },
};
const changedState = JSON.stringify(state.variants) !== JSON.stringify(nextState.variants)
|| state.lastVariantsTs !== nextState.lastVariantsTs
|| JSON.stringify(state.injectedMarkdown || []) !== JSON.stringify(nextState.injectedMarkdown || []);
if (changedState) {
await writeChatMediaStateAtomic(chatWd, nextState);
if (debug) {
try { console.info(`[Tool Variants] Recorded ${harvested.length} variant(s); lastVariantsTs=${nextState.lastVariantsTs ?? ""}`); } catch { }
}
}
const hasTool = Array.isArray(results) && results.length > 0;
return {
changedState,
injectedMarkdown: injected,
source: hasTool ? "tool" : "assistant",
reason: changedState ? "recorded" : "no-state-change",
foundVariants: vNums.length,
recordedVariants: harvested.length,
};
}
// Recover Pro variants by parsing the last tool message JSON and copying originals into workdir, creating analysis previews,
// then recording them in chat_media_state.json
export async function recoverProVariantsFromHistory(history: Chat, chatWd: string, debug = false, shouldUseFilesApi = false): Promise<void> {
let lastToolMsg: any | undefined;
for (const msg of Array.from(history).reverse()) {
if (msg.getRole && msg.getRole() === "tool") { lastToolMsg = msg; break; }
}
if (!lastToolMsg || typeof lastToolMsg.getToolCallResults !== "function") {
if (debug) console.warn("[Recover] No tool message for Pro variants");
return;
}
const results = lastToolMsg.getToolCallResults() as any[];
if (!Array.isArray(results) || results.length === 0) return;
const rawParts: string[] = [];
for (const r of results) {
const cand = r?.content ?? r?.result ?? r?.output ?? null;
if (typeof cand === "string") rawParts.push(cand);
}
if (!rawParts.length) return;
let arr: any[] | null = null;
for (const s of rawParts) {
try { const parsed = JSON.parse(s); if (Array.isArray(parsed)) { arr = parsed; break; } } catch { }
}
if (!arr) return;
const imageEntries: Array<{ fileName?: string; markdown?: string }> = [];
const originalsFromFiles: string[] = [];
const originalByV: Record<number, string> = {};
for (const it of arr) {
if (it && it.type === "image") {
imageEntries.push({ fileName: it.fileName, markdown: it.markdown });
} else if (it && it.type === "text" && typeof it.text === "string") {
try {
const inner = JSON.parse(it.text);
const files = inner?.files;
if (files && typeof files === "object") {
const orig = files.original;
if (typeof orig === "string") originalsFromFiles.push(orig);
else if (Array.isArray(orig)) originalsFromFiles.push(...orig.filter((x: any) => typeof x === "string"));
}
} catch {
// Fallback: parse human-readable "Original vN: file://..." lines only
const text = it.text;
try {
const reOrig = /Original\s*v\s*(\d+)\s*:\s*(file:\/\/\S+)/gi;
let m: RegExpExecArray | null;
while ((m = reOrig.exec(text)) !== null) {
const v = parseInt(m[1], 10);
if (v >= 1 && v <= 99) originalByV[v] = m[2];
}
} catch { }
}
}
}
// Consolidate originals from JSON and v-maps, preserving v-order if available
const orderedOriginals: string[] = [];
const byVKeys = Object.keys(originalByV).map(n => parseInt(n, 10)).filter(n => !Number.isNaN(n)).sort((a, b) => a - b);
for (const v of byVKeys) orderedOriginals.push(originalByV[v]);
for (const o of originalsFromFiles) if (!orderedOriginals.includes(o)) orderedOriginals.push(o);
if (orderedOriginals.length === 0) return;
const wd = chatWd;
const previewPairs: Array<{ filename: string; preview: string }> = [];
let i = 0;
for (const origUri of orderedOriginals) {
const srcPng = fileUriToPath(origUri);
if (!srcPng) continue;
const srcBase = path.basename(srcPng);
const dstPng = path.join(wd, srcBase);
try {
await copyFile(srcPng, dstPng);
if (debug) console.info("[Recover] Copied original:", srcPng, "->", dstPng);
} catch (e) {
if (debug) console.warn("[Recover] Copy failed:", (e as Error).message);
continue;
}
try {
if (!shouldUseFilesApi) {
const analysisName = replacePrefixGeneratedToAnalysis(srcBase);
const analysisAbs = path.join(wd, analysisName);
const jpeg = await encodeJpegFromBuffer(await fs.promises.readFile(dstPng), 85);
await fs.promises.writeFile(analysisAbs, jpeg);
// Use analysis JPEG as preview only in Base64 mode (B)
previewPairs.push({ filename: srcBase, preview: analysisName });
} else {
// In GCS mode (C), keep original PNGs as previews and avoid any JPEGs
previewPairs.push({ filename: srcBase, preview: srcBase });
}
} catch (e) {
if (debug) console.warn("[Recover] Analysis/preview generation failed:", (e as Error).message);
continue;
}
i++;
}
if (previewPairs.length) {
await recordVariantsProvision(wd, previewPairs);
}
}
// (additional vision helpers removed as part of cleanup)