Project Files
src / strategies / GeminiLyriaStrategy.ts
import { BaseGeminiStrategy } from "./BaseGeminiStrategy";
import { GenerationContext } from "./ModelStrategy";
import { GoogleGenerativeAI, HarmCategory, HarmBlockThreshold } from "@google/generative-ai";
import fs from "fs";
import path from "path";
import { toIsoLikeTimestamp, fileUriToPath } from "../image";
import { safeStringify, parseAttachmentWrappers } from "../generator-utils";
const MAX_LYRIA_IMAGES = 10;
function extToMime(ext: string): string {
switch (ext.toLowerCase()) {
case ".jpg":
case ".jpeg": return "image/jpeg";
case ".png": return "image/png";
case ".webp": return "image/webp";
case ".gif": return "image/gif";
default: return "image/jpeg";
}
}
export class GeminiLyriaStrategy extends BaseGeminiStrategy {
public override async generate(context: GenerationContext): Promise<void> {
const { ctl, history, model, apiKey, pluginConfig, debugChunks, logRequests } = context;
const chatWd = ctl.getWorkingDirectory();
const genAI = new GoogleGenerativeAI(apiKey);
if (debugChunks) console.info("[GeminiLyriaStrategy] generate() called. model=", model);
// Collect last user message text + image attachments
let lastUserRawText = "";
for (const msg of history) {
if (msg.getRole() === "user") lastUserRawText = msg.getText() || "";
}
const parsed = parseAttachmentWrappers(lastUserRawText);
const promptText = parsed.text.trim();
// Resolve up to MAX_LYRIA_IMAGES image attachments as inline_data
const imageParts: any[] = [];
const imageUrls = parsed.parts
.filter(p => p.kind === "image" && p.url)
.slice(0, MAX_LYRIA_IMAGES);
for (const imgPart of imageUrls) {
const filePath = fileUriToPath(imgPart.url!);
if (!filePath) {
if (debugChunks) console.warn("[GeminiLyriaStrategy] Could not resolve image path:", imgPart.url);
continue;
}
try {
const buf = await fs.promises.readFile(filePath);
const ext = path.extname(filePath);
const mimeType = extToMime(ext);
imageParts.push({
inline_data: {
data: buf.toString("base64"),
mime_type: mimeType,
},
});
if (debugChunks) console.info("[GeminiLyriaStrategy] Attached image:", filePath, mimeType);
} catch (e) {
console.warn("[GeminiLyriaStrategy] Failed to read image:", filePath, e);
}
}
// Build contents for Lyria: images first (up to 10), then the text prompt
const userParts: any[] = [...imageParts];
if (promptText) {
userParts.push({ text: promptText });
}
if (userParts.length === 0) {
ctl.fragmentGenerated("No prompt text found. Please describe the music you want to generate.");
return;
}
const contents = [{ role: "user", parts: userParts }];
const generateContentRequest: any = {
contents,
generationConfig: {
responseModalities: ["AUDIO", "TEXT"],
},
};
if (logRequests) {
// Redact inline_data blobs from log to keep output manageable
const sanitized = {
direction: "request",
model,
payload: {
...generateContentRequest,
contents: contents.map((c: any) => ({
...c,
parts: c.parts.map((p: any) =>
p.inline_data
? { inline_data: { mime_type: p.inline_data.mime_type, data: "[REDACTED]" } }
: p
),
})),
},
};
console.info(safeStringify(sanitized));
}
// Emit progress message before the (potentially long) API call
const imageNote = imageParts.length > 0
? ` (using ${imageParts.length} image${imageParts.length > 1 ? "s" : ""} as inspiration)`
: "";
ctl.fragmentGenerated(`*Generating music with Lyria 3 Pro${imageNote}…*\n\n`);
try {
const generativeModel = genAI.getGenerativeModel({
model,
safetySettings: ([
{ category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold: HarmBlockThreshold.BLOCK_NONE },
] as any),
});
const result = await generativeModel.generateContent(
generateContentRequest as any,
{ signal: ctl.abortSignal },
);
if (logRequests) console.info(safeStringify({ direction: "response", model, payload: result.response }));
const candidates = (result.response as any)?.candidates as any[] | undefined;
if (!Array.isArray(candidates) || candidates.length === 0) {
ctl.fragmentGenerated("No response received from Lyria 3 Pro.");
return;
}
const ts = toIsoLikeTimestamp(new Date());
let audioCount = 0;
for (const candidate of candidates) {
const parts = candidate?.content?.parts;
if (!Array.isArray(parts)) continue;
for (const part of parts) {
const p = part as any;
// Audio blob
const b64 = p.inline_data?.data || p.inlineData?.data;
const mime: string = p.inline_data?.mime_type || p.inlineData?.mimeType || "";
if (b64 && mime.startsWith("audio/")) {
audioCount++;
const ext = mime.includes("wav") ? ".wav" : ".mp3";
const filename = `lyria-${ts}${audioCount > 1 ? `-${audioCount}` : ""}${ext}`;
const absPath = path.join(chatWd, filename);
try {
const buf = Buffer.from(b64, "base64");
await fs.promises.writeFile(absPath, buf);
ctl.fragmentGenerated(`[🎵 Generated Audio](./${filename})\n\n`);
if (debugChunks) console.info("[GeminiLyriaStrategy] Audio saved:", filename, `(${buf.length} bytes)`);
} catch (e) {
console.error("[GeminiLyriaStrategy] Failed to save audio:", filename, e);
}
continue;
}
// Text (lyrics / structure description)
if (typeof p.text === "string" && p.text.trim()) {
ctl.fragmentGenerated(p.text);
}
}
}
if (audioCount === 0) {
ctl.fragmentGenerated("Lyria 3 Pro returned no audio. The prompt may have been blocked by safety filters.");
}
} catch (error: any) {
console.error("[GeminiLyriaStrategy] Error:", error);
throw error;
}
}
}