Project Files
src / tools / generate_audio.ts
import { tool, type Tool, type ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";
import { GoogleGenerativeAI, HarmCategory, HarmBlockThreshold } from "@google/generative-ai";
import fs from "fs";
import path from "path";
import { configSchematics, globalConfigSchematics } from "../config";
import { toIsoLikeTimestamp } from "../image";
import { safeStringify } from "../generator-utils";
import { formatToolMetaBlock } from "../helpers/pluginMeta";
export function createGenerateAudioTool(ctl: ToolsProviderController): Tool {
return tool({
name: "generate_audio",
description: `Generate music or audio using Lyria 3 Pro via Google AI.
All parameters have sensible defaults. Only override them when there's a good reason OR the user explicitly requests it.
Parameters:
- prompt: Description of the music to generate. Include style, mood, instruments, tempo, key, and any lyrical themes. Examples: "upbeat jazz piano trio, 120 bpm, major key", "cinematic orchestral score, dramatic, strings and brass".
Critical rules:
- Always craft a detailed, descriptive prompt for best results. Lyria generates immediately from whatever is provided.
- Inspect any relevant image attachments in the conversation yourself first and incorporate their visual mood, color palette, and subject matter into the prompt before calling this tool.
Returns: Confirmation text with a Markdown link to the generated audio file.
For a detailed prompt guide and examples, use the recall tool (ceveyne/playbook) and search for "Lyria 3 Pro".
${formatToolMetaBlock()}`,
parameters: {
prompt: z.string().describe("Description of the music to generate (style, mood, instruments, tempo, lyrics theme, etc.)"),
},
implementation: async (args: { prompt: string }) => {
const globalCfg = ctl.getGlobalPluginConfig(globalConfigSchematics);
const pluginCfg = ctl.getPluginConfig(configSchematics);
const apiKey = globalCfg.get("apiKey");
const debugChunks = pluginCfg.get("debugChunks");
const logRequests = pluginCfg.get("logRequests");
if (!apiKey) {
return {
content: [{ type: "text" as const, text: "Error: Google AI Studio API Key is not configured. Set it in the plugin's global configuration." }],
};
}
const chatWd = ctl.getWorkingDirectory();
const { prompt } = args;
if (debugChunks) console.info("[generate_audio] called, prompt:", prompt);
// --- Build contents ---
const contents = [{ role: "user", parts: [{ text: prompt }] }];
const generateContentRequest: any = {
contents,
generationConfig: {
responseModalities: ["AUDIO", "TEXT"],
},
};
if (logRequests) {
console.info(safeStringify({ direction: "request", model: "lyria-3-pro-preview", payload: generateContentRequest }));
}
// --- Call Lyria ---
const genAI = new GoogleGenerativeAI(apiKey);
const generativeModel = genAI.getGenerativeModel({
model: "lyria-3-pro-preview",
safetySettings: ([
{ category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold: HarmBlockThreshold.BLOCK_NONE },
] as any),
});
let result: any;
try {
result = await generativeModel.generateContent(
generateContentRequest as any,
{ signal: ctl.abortSignal },
);
} catch (error: any) {
console.error("[generate_audio] API error:", error);
return {
content: [{ type: "text" as const, text: `Error calling Lyria 3 Pro: ${error?.message ?? String(error)}` }],
};
}
if (logRequests) console.info(safeStringify({ direction: "response", model: "lyria-3-pro-preview", payload: result.response }));
// --- Parse response ---
const candidates = (result.response as any)?.candidates as any[] | undefined;
if (!Array.isArray(candidates) || candidates.length === 0) {
return {
content: [{ type: "text" as const, text: "Lyria 3 Pro returned no response. The prompt may have been blocked by safety filters." }],
};
}
const ts = toIsoLikeTimestamp(new Date());
let audioCount = 0;
const audioLinks: string[] = [];
const textParts: string[] = [];
for (const candidate of candidates) {
const parts = candidate?.content?.parts;
if (!Array.isArray(parts)) continue;
for (const part of parts) {
const p = part as any;
// Audio blob
const b64 = p.inline_data?.data || p.inlineData?.data;
const mime: string = p.inline_data?.mime_type || p.inlineData?.mimeType || "";
if (b64 && mime.startsWith("audio/")) {
audioCount++;
const ext = mime.includes("wav") ? ".wav" : ".mp3";
const filename = `lyria-${ts}${audioCount > 1 ? `-${audioCount}` : ""}${ext}`;
const absPath = path.join(chatWd, filename);
try {
const buf = Buffer.from(b64, "base64");
await fs.promises.writeFile(absPath, buf);
audioLinks.push(`[🎵 Generated Audio](./${filename})`);
if (debugChunks) console.info("[generate_audio] Audio saved:", filename, `(${buf.length} bytes)`);
} catch (e) {
console.error("[generate_audio] Failed to save audio:", filename, e);
}
continue;
}
// Text (lyrics / structure description)
if (typeof p.text === "string" && p.text.trim()) {
textParts.push(p.text.trim());
}
}
}
if (audioCount === 0) {
return {
content: [{ type: "text" as const, text: "Lyria 3 Pro returned no audio. The prompt may have been blocked by safety filters." }],
};
}
// --- Build tool result ---
const resultContent: Array<{ type: "text"; text: string; $hint?: string }> = [];
const countNote = audioCount === 1 ? "1 audio file" : `${audioCount} audio files`;
resultContent.push({
type: "text" as const,
text: `Successfully generated ${countNote} with Lyria 3 Pro.`,
$hint: "Present the audio file(s) to the user using the Markdown link(s) below.",
});
for (const link of audioLinks) {
resultContent.push({ type: "text" as const, text: link });
}
if (textParts.length > 0) {
resultContent.push({ type: "text" as const, text: textParts.join("\n\n") });
}
return { content: resultContent };
},
});
}