Project Files
src / strategies / BaseGeminiStrategy.ts
import { GoogleGenerativeAI, HarmCategory, HarmBlockThreshold } from '@google/generative-ai';
import { GenerationContext, ModelStrategy } from "./ModelStrategy";
import { detectCapabilities, shouldUseFilesApiForModel } from "../capabilities";
import { buildGeminiTools } from "../tools";
import fs from "fs";
import path from "path";
import { encodeJpegFromBuffer, toIsoLikeTimestamp, resizeMaxDimJpegFromFile, fileUriToPath } from "../image";
import { buildPromotionPartsForMode } from "../visionModeSelector";
import { snapshotHistoryMediaState } from "../history-state";
import { recordVariantsProvision, readChatMediaState, type ChatMediaState } from "../chat-media-state";
import { findAllAttachmentsFromConversation, importAttachmentBatch } from "../attachments";
import { appendSignature, loadSignatures, pruneSignatures, computeContentHash } from "../thought-signatures";
import {
safeStringify,
toGeminiMessages,
getLastUserText,
collectSystemText,
pad2,
streamTextFragments,
parseAttachmentWrappers,
stableJsonStringify
} from "../generator-utils";
export class BaseGeminiStrategy implements ModelStrategy {
// Hook for subclasses (e.g. GeminiThinkingStrategy) to persist thought_signature for tool calls.
// Default: no-op (keeps BaseGeminiStrategy generic).
protected async onObservedFunctionCallPart(_context: GenerationContext, _safeName: string, _args: any, _sig?: string): Promise<void> {
return;
}
async generate(context: GenerationContext): Promise<void> {
const { ctl, history, model, apiKey, globalConfig, pluginConfig, debugChunks, logRequests } = context;
const visionPromotionPersistent = globalConfig.get("visionPromotionPersistent");
const useFilesApiForVision = pluginConfig.get("useFilesApiForVision");
const redactSecrets: string[] = [];
const genAI = new GoogleGenerativeAI(apiKey);
const caps = detectCapabilities(model);
if (debugChunks) {
try {
console.info("[Capabilities] model=", model, {
supportsTools: caps.supportsTools,
supportsVision: caps.supportsVision,
supportsImage: caps.supportsImage,
supportsThinking: caps.supportsThinking,
supportsStreaming: caps.supportsStreaming,
imageGeneration: caps.imageGeneration,
});
} catch { /* ignore */ }
}
const lastUserText = getLastUserText(history);
const supportsFunctionCalling = caps.supportsTools;
const systemText = collectSystemText(history);
const chatWd = ctl.getWorkingDirectory();
// Reconcile media state strictly from chat history (SSoT)
await snapshotHistoryMediaState(ctl, history, chatWd, model);
// Decide promotion transport mode early
const shouldUseFilesApi = shouldUseFilesApiForModel(model, useFilesApiForVision);
await this.reconcileAttachments(context, shouldUseFilesApi);
// Backfill: in Base64 mode only (not GCS).
await this.backfillAnalysisPreviews(context, shouldUseFilesApi);
// --- Variable Declarations ---
let proChatImageFiles: string[] = [];
let promotedFiles: string[] = [];
const { tools: geminiTools, originalToSafe, safeToOriginal, safeNames } = supportsFunctionCalling
? buildGeminiTools(ctl, lastUserText || "")
: { tools: undefined, originalToSafe: new Map<string, string>(), safeToOriginal: new Map<string, string>(), safeNames: [] } as any;
// Manage Thought Signatures for legacy / non-isolated thinking models only
// gemini-3-pro-image-preview is handled by GeminiImageThinkingStrategy
let contents: any[];
if (caps.supportsThinking && model !== "gemini-3-pro-image-preview") {
let signatures: any[] = [];
try {
await pruneSignatures(chatWd, history);
} catch (e) {
if (debugChunks) console.warn("Failed to prune signatures:", e);
}
const sigState = await loadSignatures(chatWd);
signatures = sigState.signatures;
contents = toGeminiMessages(history, originalToSafe, signatures);
} else {
contents = toGeminiMessages(history, originalToSafe);
}
// Hook for subclasses to modify contents (e.g. flattening for Thinking models)
this.modifyContents(contents, caps);
// Vision promotion handover
try {
if (debugChunks) {
try {
console.info("[Vision Path] Toggle=", !!useFilesApiForVision, "→ shouldUseFilesApi=", !!shouldUseFilesApi);
} catch { /* ignore */ }
}
let promoParts: any[] = [];
let pf: string[] = [];
const res = await buildPromotionPartsForMode({
ctl,
history,
apiKey,
chatWd,
debugChunks,
shouldUseFilesApi,
model,
visionPromotionPersistent,
useFilesApiForVision: !!useFilesApiForVision,
} as any);
promoParts = res.promoParts || [];
pf = res.promotedFiles || [];
promotedFiles = pf;
if (promoParts.length) {
let lastUserMessage = contents.slice().reverse().find(m => m.role === 'user');
if (lastUserMessage) {
lastUserMessage.parts = [...promoParts, ...(lastUserMessage.parts || [])];
} else {
contents.push({ role: "user", parts: promoParts });
}
if (debugChunks) console.info(`Prepended ${promoParts.length} vision part(s) to the user message.`);
}
} catch (e) {
if (debugChunks) console.error("Promotion parts error:", (e as Error).message);
}
// Prepare system instruction
let systemInstruction: any | undefined;
{
const parts: Array<{ text: string }> = [];
if (systemText) parts.push({ text: systemText });
const now = new Date();
const tz = (() => {
try { return Intl.DateTimeFormat().resolvedOptions().timeZone || "UTC"; } catch { return "UTC"; }
})();
const yyyy = now.getFullYear();
const MM = pad2(now.getMonth() + 1);
const DD = pad2(now.getDate());
const hh = pad2(now.getHours());
const mm = pad2(now.getMinutes());
const ss = pad2(now.getSeconds());
parts.push({ text: `Current date/time: ${yyyy}-${MM}-${DD} ${hh}:${mm}:${ss} (${tz})` });
if (!supportsFunctionCalling) {
const imgPolicy =
"You are an image-capable model without tool support. Do not attempt to call tools or functions. " +
"You can describe, analyze, and generate images. Use only information explicitly provided by the user; " +
"leave unspecified parameters at defaults and avoid guesses.";
parts.push({ text: imgPolicy });
} else if (geminiTools) {
const policy =
"Tool use policy: Use available tools only when necessary to fulfill the request. " +
"Use only arguments explicitly provided or clearly implied by the user; leave unspecified parameters at their defaults. " +
"Do not ask for missing arguments and do not guess values. At most one tool call per turn. " +
"When a tool result describes an image, follow its $hint and markdown instructions: show the image using the provided markdown and then describe it briefly. " +
"Images that appear again in later turns are persistent context (from earlier uploads or tool outputs); do not assume the user has just uploaded them again. " +
"For non-image tools, do not echo raw JSON or internal fields such as 'markdown'; instead, summarize the result in a short, natural sentence.";
parts.push({ text: policy });
}
if (parts.length) systemInstruction = { parts };
}
/* 2. Prepare the request */
const generateContent: any = {
contents,
};
if (geminiTools && supportsFunctionCalling) {
generateContent.tools = geminiTools;
generateContent.toolConfig = {
functionCallingConfig: {
mode: "AUTO",
},
} as any;
}
if (systemInstruction) {
generateContent.systemInstruction = systemInstruction;
}
// Hook for subclasses to modify generation config
this.modifyGenerationConfig(generateContent, context, caps);
try {
/* 3. Make the API call using SDK */
// logRequests: nur Rohdaten (ohne Telemetrie-Präfixe)
if (logRequests) console.info(safeStringify({ direction: "request", model, payload: generateContent }, redactSecrets));
const generativeModel = genAI.getGenerativeModel({
model: model,
...(systemInstruction ? { system_instruction: systemInstruction } : {}),
safetySettings: ([{
category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
threshold: HarmBlockThreshold.BLOCK_NONE
}] as any),
});
const isStreamingCandidate = caps.supportsStreaming;
if (debugChunks) {
try {
console.info("[Streaming] isStreamingCandidate=", isStreamingCandidate);
} catch { /* ignore */ }
}
let response: any;
let candidates: any[] | undefined;
if (isStreamingCandidate) {
if (debugChunks) console.info("[Streaming] Using generateContentStream for this request.");
const stream = await generativeModel.generateContentStream({
contents: generateContent.contents,
tools: generateContent.tools,
toolConfig: generateContent.toolConfig,
generationConfig: generateContent.generationConfig,
} as any);
let toolCallEmitted = false;
const allowedSafe = new Set<string>(safeToOriginal ? Array.from(safeToOriginal.keys()) : []);
for await (const item of stream.stream) {
const scands = (item as any)?.candidates as any[] | undefined;
if (!Array.isArray(scands) || !scands.length) continue;
const candidate = scands[0];
const parts = candidate?.content?.parts;
if (!Array.isArray(parts)) continue;
let textBuf = "";
const toolCalls: Array<{ name: string; args: any }> = [];
for (const part of parts) {
const p = part as any;
const isImageLink = typeof p.text === 'string' && /!\[Image\]\(.*?\)/.test(p.text);
if (p.thought && !isImageLink) {
const content = typeof p.thought === 'string' ? p.thought : (p.text || "");
if (content) {
ctl.fragmentGenerated(content, { reasoningType: "reasoning" });
}
} else if (p.text) {
if (!p.thought || isImageLink) {
textBuf += (textBuf ? "\n" : "") + p.text;
}
}
const fcall = part?.functionCall || part?.function_call;
if (fcall && fcall.name) {
let args = fcall.args;
if (typeof args === "string") { try { args = JSON.parse(args); } catch { /* keep as string */ } }
toolCalls.push({ name: String(fcall.name), args });
// Subclass hook: persist thought_signature for tool calls if needed.
try {
const sig = (part as any)?.thought_signature || (part as any)?.thoughtSignature;
await this.onObservedFunctionCallPart(context, String(fcall.name), args, sig);
} catch { /* best-effort */ }
}
}
if (textBuf.trim().length) {
if (debugChunks) console.info("[Streaming] Text update (raw from SDK):", textBuf.slice(0, 120));
ctl.fragmentGenerated(textBuf);
}
if (!toolCallEmitted && toolCalls.length) {
const streamingToolCall = toolCalls.find(tc => allowedSafe.has(tc.name));
if (streamingToolCall) {
const originalName = safeToOriginal.get(streamingToolCall.name) || streamingToolCall.name;
const argsJson = typeof streamingToolCall.args === "string"
? streamingToolCall.args
: JSON.stringify(streamingToolCall.args ?? {});
const callId = `gemini-fc-${Date.now()}-0`;
ctl.toolCallGenerationStarted();
ctl.toolCallGenerationNameReceived(originalName);
ctl.toolCallGenerationArgumentFragmentGenerated(argsJson);
ctl.toolCallGenerationEnded({ type: "function", name: originalName, arguments: streamingToolCall.args ?? {}, id: callId });
toolCallEmitted = true;
// CRITICAL (LM Studio tool-call loop): once we emit a tool call, we must
// end this generate() invocation immediately so LM Studio can run the tool
// and re-invoke generate() with the tool result.
ctl.fragmentGenerated("");
return;
}
}
}
ctl.fragmentGenerated(""); // Close reasoning block
response = stream.response;
candidates = (response as any)?.candidates as any[] | undefined;
// Robustness: if the final response snapshot contains additional
// non-thinking text that was never streamed, render it once here.
if (Array.isArray(candidates) && candidates.length > 0) {
try {
let responseText = "";
for (const cand of candidates) {
const parts = cand?.content?.parts;
if (!Array.isArray(parts)) continue;
for (const p of parts as any[]) {
const isImageLink = typeof p.text === "string" && /!\[Image\]\(.*?\)/.test(p.text);
const isThought = !isImageLink && !!p.thought;
if (p.text && !isImageLink && !isThought) {
responseText += (responseText ? "\n" : "") + p.text;
}
}
}
if (responseText.trim().length) {
if (debugChunks) console.info("[Streaming] Final snapshot text update (Base):", responseText.slice(0, 200));
ctl.fragmentGenerated(responseText);
}
} catch { /* best-effort only */ }
}
} else {
if (debugChunks) console.info("[Streaming] Using non-streaming generateContent for this request.");
const result = await generativeModel.generateContent({
contents: generateContent.contents,
tools: generateContent.tools,
toolConfig: generateContent.toolConfig,
} as any);
response = result.response;
candidates = (response as any)?.candidates as any[] | undefined;
}
if (logRequests) console.info(safeStringify({ direction: "response", model, payload: { candidates, promptFeedback: (response as any)?.promptFeedback } }, redactSecrets));
/* 4. Process response */
if (!Array.isArray(candidates) || candidates.length === 0) {
if (debugChunks) console.warn("Gemini: no candidates in response. Raw response:", safeStringify(response, redactSecrets));
return;
}
// Vision Promotion is always ON
await this.processCandidates(candidates, context, safeToOriginal, true, shouldUseFilesApi, caps);
if (debugChunks) console.info("Generation completed.");
} catch (error: any) {
this.handleError(error, context, genAI, generateContent, systemInstruction, shouldUseFilesApi);
}
}
protected modifyContents(contents: any[], caps: any) {
// Default: do nothing
}
protected modifyGenerationConfig(generateContent: any, context: GenerationContext, caps: any) {
// Default: do nothing
}
private async reconcileAttachments(context: GenerationContext, shouldUseFilesApi: boolean) {
const { ctl, history, debugChunks, globalConfig } = context;
const chatWd = ctl.getWorkingDirectory();
try {
// Use unified SSOT scan from attachments.ts
const ssotPaths = await findAllAttachmentsFromConversation(chatWd, !!debugChunks);
if (ssotPaths.length === 0) {
if (debugChunks) console.info('[Attachment Reconcile] No attachments in history; preserving existing state');
return;
}
// Read current state (or initialize empty)
const state = await readChatMediaState(chatWd).catch(() => ({
attachments: [],
variants: [],
counters: { nextN: 1, nextV: 1 }
} as ChatMediaState));
// Use importAttachmentBatch for stable n-numbering, idempotent, no copies
const result = await importAttachmentBatch(
chatWd,
state,
ssotPaths,
{ maxDim: 1024, quality: 85 },
2, // max 2 attachments
!!debugChunks
);
if (result.changed && debugChunks) {
console.info(`[Attachment Reconcile] Imported attachments from SSOT`);
}
} catch (e) {
if (debugChunks) console.warn('[Attachment Reconcile] Error:', (e as Error).message);
}
}
private async backfillAnalysisPreviews(context: GenerationContext, shouldUseFilesApi: boolean) {
// DEPRECATED: Preview generation is now handled by importAttachmentBatch in reconcileAttachments
// This method is kept for compatibility but does nothing
// The new preview naming is: preview-<origin> (e.g., preview-1766100380042 - 811.jpg)
}
private async processCandidates(candidates: any[], context: GenerationContext, safeToOriginal: Map<string, string>, _allowVisionPromotion: boolean, shouldUseFilesApi: boolean, caps?: any) {
// Note: _allowVisionPromotion is deprecated - Vision Promotion is always ON
const { ctl, debugChunks } = context;
const mimeToExt = (mime: string): string => {
const m = (mime || "").toLowerCase();
if (m.includes("jpeg") || m === "image/jpg") return ".jpg";
if (m.includes("png")) return ".png";
if (m.includes("webp")) return ".webp";
if (m.includes("gif")) return ".gif";
if (m.includes("bmp")) return ".bmp";
if (m.includes("svg")) return ".svg";
return ".png";
};
for (const candidate of candidates) {
const parts = candidate?.content?.parts;
if (debugChunks) console.info("Processing candidate parts:", JSON.stringify(parts));
if (Array.isArray(parts)) {
let textBuf = "";
const images: Array<{ data: string; mimeType: string }> = [];
const toolCalls: Array<{ name: string; args: any }> = [];
for (const part of parts) {
// Capture Thought Signature (Only if model supports thinking)
if (caps?.supportsThinking) {
const sig = part.thought_signature || part.thoughtSignature;
if (sig) {
try {
let hash = "";
if (part.text && !part.functionCall && !part.function_call) {
hash = computeContentHash(part.text);
} else {
const fcall = part.functionCall || part.function_call;
if (fcall && fcall.name) {
const name = String(fcall.name);
let args = fcall.args;
if (typeof args === "string") { try { args = JSON.parse(args); } catch { args = {}; } }
const id = `${name}:${stableJsonStringify(args || {})}`;
hash = computeContentHash(id);
}
}
if (hash) {
await appendSignature(ctl.getWorkingDirectory(), sig, hash);
if (debugChunks) console.info("Captured thought signature for hash:", hash);
}
} catch (e) {
if (debugChunks) console.warn("Failed to capture thought signature:", e);
}
}
}
if (part?.text) textBuf += (textBuf ? "\n" : "") + part.text;
const b64 = part?.inline_data?.data || part?.inlineData?.data;
if (b64) {
const mime = part?.inline_data?.mime_type || part?.inlineData?.mimeType || "image/png";
images.push({ data: b64, mimeType: mime });
}
const fcall = part?.functionCall || part?.function_call;
if (fcall && fcall.name) {
let args = fcall.args;
if (typeof args === "string") { try { args = JSON.parse(args); } catch { /* keep as string */ } }
toolCalls.push({ name: String(fcall.name), args });
// Subclass hook: persist thought_signature for tool calls if needed.
try {
const sig = (part as any)?.thought_signature || (part as any)?.thoughtSignature;
await this.onObservedFunctionCallPart(context, String(fcall.name), args, sig);
} catch { /* best-effort */ }
}
}
const allowedSafe = new Set<string>(safeToOriginal ? Array.from(safeToOriginal.keys()) : []);
const filteredToolCalls = toolCalls.filter(tc => allowedSafe.has(tc.name));
if (textBuf.trim().length) {
if (debugChunks) console.info("Streaming text (simulated):", textBuf.slice(0, 120));
await streamTextFragments(ctl, textBuf);
}
if (filteredToolCalls.length) {
const [tc] = filteredToolCalls;
const originalName = safeToOriginal.get(tc.name) || tc.name;
const argsJson = typeof tc.args === "string" ? tc.args : JSON.stringify(tc.args ?? {});
const callId = `gemini-fc-${Date.now()}-0`;
ctl.toolCallGenerationStarted();
ctl.toolCallGenerationNameReceived(originalName);
ctl.toolCallGenerationArgumentFragmentGenerated(argsJson);
ctl.toolCallGenerationEnded({ type: "function", name: originalName, arguments: tc.args ?? {}, id: callId });
// Same reasoning as streaming path: stop immediately after emitting a tool call.
return;
}
if (images.length > 0) {
const wd = ctl.getWorkingDirectory();
const fileNames: string[] = [];
const analysisNames: string[] = [];
const ts = toIsoLikeTimestamp(new Date());
let idx = 0;
for (const img of images) {
const baseName = images.length > 1 ? `image-${ts}-v${++idx}` : `image-${ts}`;
const ext = mimeToExt(img.mimeType || "");
const fileName = `${baseName}${ext}`;
const abs = path.join(wd, fileName);
try {
const buf = Buffer.from(img.data, "base64");
await fs.promises.writeFile(abs, buf);
fileNames.push(fileName);
if (!shouldUseFilesApi) {
// Vision Promotion is always ON - create analysis preview
try {
const iso = toIsoLikeTimestamp(new Date());
const v = images.length > 1 ? idx : 1;
const analysisName = `analysis-generated-image-${iso}-v${v}.jpg`;
const analysisAbs = path.join(wd, analysisName);
const jpeg = await encodeJpegFromBuffer(buf, 85);
await fs.promises.writeFile(analysisAbs, jpeg);
analysisNames.push(analysisName);
if (debugChunks) console.info("Flash analysis JPEG written:", analysisAbs);
} catch (e) { if (debugChunks) console.error("Failed to write analysis JPEG:", (e as Error)?.message); }
}
} catch (e) {
if (debugChunks) console.error("Failed to write image file:", (e as Error)?.message);
}
}
if (fileNames.length > 0) {
const md = fileNames.map(fn => ``).join("\n\n");
ctl.fragmentGenerated("\n\n" + md + "\n");
try {
const variants = fileNames.map((fn, i) => ({ filename: fn, preview: analysisNames[i] ?? fn }));
if (variants.length) await recordVariantsProvision(wd, variants);
} catch { }
}
}
}
}
}
private async handleError(error: any, context: GenerationContext, genAI: GoogleGenerativeAI, generateContent: any, systemInstruction: any, shouldUseFilesApi: boolean) {
const { ctl, debugChunks, logRequests, model } = context;
const rawMessage = error?.message || String(error);
if (rawMessage.includes("Unexpected token") && (rawMessage.includes("JSON") || rawMessage.includes("<"))) {
console.error("----------------------------------------------------------------");
console.error("CRITICAL ERROR: The Vertex AI SDK received an HTML response instead of JSON.");
console.error("This usually indicates a configuration issue (wrong Project ID, Location, or Model Name).");
console.error("It can also mean the Service Account lacks permissions or the API is down.");
if ((error as any).response) {
const r = (error as any).response;
console.error(`HTTP Status: ${r.status} ${r.statusText}`);
console.error(`URL: ${r.url}`);
}
console.error("----------------------------------------------------------------");
}
if (logRequests || debugChunks) {
try {
console.error("Full Error Object Dump:", JSON.stringify(error, Object.getOwnPropertyNames(error), 2));
} catch {
console.error("Full Error Object Dump: [Circular or Unserializable]");
}
}
if (/function calling is not enabled/i.test(rawMessage)) {
try {
const generativeModel = genAI.getGenerativeModel({
model: model,
...(systemInstruction ? { system_instruction: systemInstruction } : {}),
});
const result2 = await generativeModel.generateContent({
contents: generateContent.contents,
} as any);
const response2 = result2.response;
const candidates = response2?.candidates as any[] | undefined;
if (Array.isArray(candidates) && candidates.length) {
// Vision Promotion is always ON
await this.processCandidates(candidates, context, new Map(), true, shouldUseFilesApi);
return;
}
} catch (e2) {
console.error("Gemini fallback error:", e2);
}
}
console.error("Gemini SDK error:", rawMessage);
throw new Error(`Gemini SDK error: ${rawMessage}`);
}
}