src / toolsProvider.ts
import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";
import { configSchematics } from "./config";
import https from "https";
/**
* Reasoning Agent Tools Provider
* Main export for LM Studio - provides reasoning_invoke tool with key rotation
*/
export async function toolsProvider(ctl: ToolsProviderController): Promise<Tool[]> {
// Validate configuration on plugin load
const apiKeysConfig = ctl.getPluginConfig(configSchematics).get("apiKeys") as string | undefined;
if (!apiKeysConfig || apiKeysConfig.trim().length === 0) {
return [];
}
// Parse and validate API keys
const apiKeys = apiKeysConfig
.split(",")
.map((k: string) => k.trim())
.filter((k: string) => k.length > 0);
if (apiKeys.length === 0) {
return [];
}
// Create the reasoning_invoke tool
const reasoningTool = tool({
name: "reasoning_invoke",
description:
"Invoke Google Gemini AI for advanced reasoning and analysis. " +
"Automatically rotates API keys on quota/errors. " +
"Perfect for complex multi-step reasoning, research, and detailed analysis tasks.",
parameters: {
prompt: z
.string()
.min(1, "Prompt cannot be empty")
.max(10000, "Prompt too long (max 10000 chars)")
.describe("The prompt to send to Gemini AI for reasoning and analysis"),
systemPrompt: z
.string()
.optional()
.describe(
"Optional system prompt to set context. " +
"If omitted, a default analytical system prompt is used."
),
temperature: z
.number()
.min(0)
.max(2)
.optional()
.describe("Override temperature (0=deterministic, 2=creative)"),
maxTokens: z
.number()
.min(100)
.max(10000)
.optional()
.describe("Override max tokens in response"),
},
implementation: async (
{ prompt, systemPrompt, temperature, maxTokens },
{ status, warn, signal }
) => {
if (signal.aborted) {
return "Request cancelled.";
}
const modelConfig = ctl.getPluginConfig(configSchematics);
const enableLogging = (modelConfig.get("enableDetailedLogging") as string) === "true";
const log = (message: string) => {
if (enableLogging) {
console.log(`[ReasoningAgent] ${message}`);
}
};
return await invokeGeminiWithRotation(
apiKeys,
modelConfig,
{ prompt, systemPrompt, temperature, maxTokens },
status,
warn,
log,
signal
);
},
});
return [reasoningTool];
}
/**
* Main function to invoke Gemini with automatic key rotation
*/
async function invokeGeminiWithRotation(
apiKeys: string[],
modelConfig: any,
params: {
prompt: string;
systemPrompt?: string;
temperature?: number;
maxTokens?: number;
},
status: (msg: string) => void,
warn: (msg: string) => void,
log: (msg: string) => void,
signal: AbortSignal
): Promise<string> {
let lastError: Error | null = null;
const attemptedKeys = new Set<number>();
const maxRetries = (modelConfig.get("retryAttempts") as number) ?? 3;
const configTemperature = (modelConfig.get("temperature") as number) ?? 0.7;
const configMaxTokens = (modelConfig.get("maxTokens") as number) ?? 2048;
const configModelName = (modelConfig.get("modelName") as string) ?? "gemini-3-flash-preview";
const configTimeoutMs = (modelConfig.get("requestTimeoutMs") as number) ?? 30000;
let currentKeyIndex = 0;
// Try each API key with retries
for (let i = 0; i < apiKeys.length; i++) {
if (signal.aborted) {
return "Request cancelled by user.";
}
if (attemptedKeys.size >= apiKeys.length) {
break; // All keys exhausted
}
// Find next key to try
while (
attemptedKeys.has(currentKeyIndex) &&
attemptedKeys.size < apiKeys.length
) {
currentKeyIndex = (currentKeyIndex + 1) % apiKeys.length;
}
if (attemptedKeys.has(currentKeyIndex)) {
continue; // This key already failed
}
const apiKey = apiKeys[currentKeyIndex];
attemptedKeys.add(currentKeyIndex);
log(
`Attempting request with key ${currentKeyIndex + 1}/${apiKeys.length} ` +
`(${apiKey.substring(0, 8)}***)`
);
status(`🔄 Using API key ${currentKeyIndex + 1}/${apiKeys.length}...`);
// Try multiple times with current key
for (let retryCount = 0; retryCount < maxRetries; retryCount++) {
if (signal.aborted) {
return "Request cancelled by user.";
}
try {
// Call Gemini v1beta API directly (required for gemini-3-flash-preview)
const modelName = configModelName || "gemini-3-flash-preview";
const temperature = params.temperature ?? configTemperature;
const maxTokens = params.maxTokens ?? configMaxTokens;
log(
`Using model: ${modelName}, ` +
`temperature: ${temperature}, ` +
`maxTokens: ${maxTokens}`
);
status(`🤖 Calling ${modelName}...`);
// Make direct HTTP request to v1beta endpoint
const responseText = await callGeminiV1Beta(
apiKey,
modelName,
params.prompt,
temperature,
maxTokens,
configTimeoutMs
);
log(`Successfully generated response (${responseText.length} chars)`);
status(
`✓ Reasoning complete via key ${currentKeyIndex + 1}/${apiKeys.length}`
);
return formatResponse(responseText, modelName, temperature, maxTokens);
} catch (error) {
const errorMsg =
error instanceof Error ? error.message : String(error);
lastError = error instanceof Error ? error : new Error(errorMsg);
const isQuotaError =
errorMsg.includes("quota") ||
errorMsg.includes("429") ||
errorMsg.includes("rate limit") ||
errorMsg.includes("RESOURCE_EXHAUSTED");
const isAuthError =
errorMsg.includes("authentication") ||
errorMsg.includes("Invalid API Key") ||
errorMsg.includes("401") ||
errorMsg.includes("403") ||
errorMsg.includes("UNAUTHENTICATED") ||
errorMsg.includes("PERMISSION_DENIED");
const shouldRotate = isQuotaError || isAuthError;
log(
`Attempt ${retryCount + 1}/${maxRetries} failed: ${errorMsg} ` +
`(${shouldRotate ? "Will rotate key" : "Will retry"})`
);
if (shouldRotate || retryCount === maxRetries - 1) {
break; // Move to next key
}
// Wait before retry
if (retryCount < maxRetries - 1) {
const delayMs = 1000 * (retryCount + 1);
log(`Waiting ${delayMs}ms before retry...`);
await sleep(delayMs);
}
}
}
currentKeyIndex = (currentKeyIndex + 1) % apiKeys.length;
}
// All keys exhausted - return error
const errorMsg =
lastError instanceof Error ? lastError.message : "Unknown error occurred";
const detailedError =
`All API keys exhausted. Last error: ${errorMsg}\n\n` +
`Attempted ${apiKeys.length} key(s) with ${maxRetries} retry(ies) each.`;
warn(`❌ Reasoning failed: All keys exhausted after ${apiKeys.length * maxRetries} attempts`);
log(`Final error: ${detailedError}`);
return `## ❌ Error: All API Keys Exhausted\n\n${detailedError}`;
}
/**
* Call Gemini v1beta API directly via HTTPS
* Required for gemini-3-flash-preview and other preview models
*/
async function callGeminiV1Beta(
apiKey: string,
modelName: string,
prompt: string,
temperature: number,
maxTokens: number,
timeoutMs: number
): Promise<string> {
const url = `https://generativelanguage.googleapis.com/v1beta/models/${modelName}:generateContent?key=${apiKey}`;
const requestBody = {
contents: [
{
role: "user",
parts: [
{
text: prompt,
},
],
},
],
generationConfig: {
temperature,
maxOutputTokens: maxTokens,
topP: 1,
topK: 40,
},
safetySettings: [
{
category: "HARM_CATEGORY_HATE_SPEECH",
threshold: "BLOCK_NONE",
},
{
category: "HARM_CATEGORY_DANGEROUS_CONTENT",
threshold: "BLOCK_NONE",
},
{
category: "HARM_CATEGORY_HARASSMENT",
threshold: "BLOCK_NONE",
},
{
category: "HARM_CATEGORY_SEXUALLY_EXPLICIT",
threshold: "BLOCK_NONE",
},
],
};
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
reject(new Error(`Request timeout after ${timeoutMs}ms`));
}, timeoutMs);
const postData = JSON.stringify(requestBody);
const options = {
method: "POST",
headers: {
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(postData),
},
};
const req = https.request(url, options, (res) => {
let data = "";
res.on("data", (chunk) => {
data += chunk;
});
res.on("end", () => {
clearTimeout(timeout);
if (res.statusCode !== 200) {
reject(
new Error(
`Gemini API error (${res.statusCode}): ${data}`
)
);
return;
}
try {
const response = JSON.parse(data);
const text =
response.candidates?.[0]?.content?.parts?.[0]?.text;
if (!text) {
reject(new Error("Empty response from Gemini API"));
return;
}
resolve(text);
} catch (error) {
reject(
new Error(
`Failed to parse Gemini response: ${error instanceof Error ? error.message : String(error)}`
)
);
}
});
});
req.on("error", (error) => {
clearTimeout(timeout);
reject(error);
});
req.write(postData);
req.end();
});
}
/**
* Format the Gemini response as a nice Markdown report
*/
function formatResponse(
text: string,
modelName: string,
temperature: number,
maxTokens: number
): string {
return (
`## 🤖 Reasoning Agent Response\n\n` +
`${text}\n\n` +
`---\n` +
`**Model:** ${modelName} | **Temperature:** ${temperature} | **Max Tokens:** ${maxTokens}`
);
}
/**
* Simple sleep utility
*/
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}