Project Files
src
config.ts
index.ts
toolsProvider.ts
.gitignore
manifest.json
package-lock.json
package.json
README.md
tsconfig.json
src / toolsProvider.ts
import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk";
import { z } from "zod";
import { configSchematics } from "./config";
import https from "https";

/**
 * Reasoning Agent Tools Provider
 * Main export for LM Studio - provides reasoning_invoke tool with key rotation
 */
export async function toolsProvider(ctl: ToolsProviderController): Promise<Tool[]> {
	// Validate configuration on plugin load
	const apiKeysConfig = ctl.getPluginConfig(configSchematics).get("apiKeys") as string | undefined;

	if (!apiKeysConfig || apiKeysConfig.trim().length === 0) {
		return [];
	}

	// Parse and validate API keys
	const apiKeys = apiKeysConfig
		.split(",")
		.map((k: string) => k.trim())
		.filter((k: string) => k.length > 0);

	if (apiKeys.length === 0) {
		return [];
	}

	// Create the reasoning_invoke tool
	const reasoningTool = tool({
		name: "reasoning_invoke",
		description:
			"Invoke Google Gemini AI for advanced reasoning and analysis. " +
			"Automatically rotates API keys on quota/errors. " +
			"Perfect for complex multi-step reasoning, research, and detailed analysis tasks.",

		parameters: {
			prompt: z
				.string()
				.min(1, "Prompt cannot be empty")
				.max(10000, "Prompt too long (max 10000 chars)")
				.describe("The prompt to send to Gemini AI for reasoning and analysis"),

			systemPrompt: z
				.string()
				.optional()
				.describe(
					"Optional system prompt to set context. " +
					"If omitted, a default analytical system prompt is used."
				),

			temperature: z
				.number()
				.min(0)
				.max(2)
				.optional()
				.describe("Override temperature (0=deterministic, 2=creative)"),

			maxTokens: z
				.number()
				.min(100)
				.max(10000)
				.optional()
				.describe("Override max tokens in response"),
		},

		implementation: async (
			{ prompt, systemPrompt, temperature, maxTokens },
			{ status, warn, signal }
		) => {
			if (signal.aborted) {
				return "Request cancelled.";
			}

			const modelConfig = ctl.getPluginConfig(configSchematics);
			const enableLogging = (modelConfig.get("enableDetailedLogging") as string) === "true";

			const log = (message: string) => {
				if (enableLogging) {
					console.log(`[ReasoningAgent] ${message}`);
				}
			};

			return await invokeGeminiWithRotation(
				apiKeys,
				modelConfig,
				{ prompt, systemPrompt, temperature, maxTokens },
				status,
				warn,
				log,
				signal
			);
		},
	});

	return [reasoningTool];
}
/**
 * Main function to invoke Gemini with automatic key rotation
 */
async function invokeGeminiWithRotation(
	apiKeys: string[],
	modelConfig: any,
	params: {
		prompt: string;
		systemPrompt?: string;
		temperature?: number;
		maxTokens?: number;
	},
	status: (msg: string) => void,
	warn: (msg: string) => void,
	log: (msg: string) => void,
	signal: AbortSignal
): Promise<string> {
	let lastError: Error | null = null;
	const attemptedKeys = new Set<number>();
	const maxRetries = (modelConfig.get("retryAttempts") as number) ?? 3;
	const configTemperature = (modelConfig.get("temperature") as number) ?? 0.7;
	const configMaxTokens = (modelConfig.get("maxTokens") as number) ?? 2048;
	const configModelName = (modelConfig.get("modelName") as string) ?? "gemini-3-flash-preview";
	const configTimeoutMs = (modelConfig.get("requestTimeoutMs") as number) ?? 30000;

	let currentKeyIndex = 0;

	// Try each API key with retries
	for (let i = 0; i < apiKeys.length; i++) {
		if (signal.aborted) {
			return "Request cancelled by user.";
		}

		if (attemptedKeys.size >= apiKeys.length) {
			break; // All keys exhausted
		}

		// Find next key to try
		while (
			attemptedKeys.has(currentKeyIndex) &&
			attemptedKeys.size < apiKeys.length
		) {
			currentKeyIndex = (currentKeyIndex + 1) % apiKeys.length;
		}

		if (attemptedKeys.has(currentKeyIndex)) {
			continue; // This key already failed
		}

		const apiKey = apiKeys[currentKeyIndex];
		attemptedKeys.add(currentKeyIndex);

		log(
			`Attempting request with key ${currentKeyIndex + 1}/${apiKeys.length} ` +
			`(${apiKey.substring(0, 8)}***)`
		);

		status(`🔄 Using API key ${currentKeyIndex + 1}/${apiKeys.length}...`);

		// Try multiple times with current key
		for (let retryCount = 0; retryCount < maxRetries; retryCount++) {
			if (signal.aborted) {
				return "Request cancelled by user.";
			}

			try {
				// Call Gemini v1beta API directly (required for gemini-3-flash-preview)
				const modelName = configModelName || "gemini-3-flash-preview";
				const temperature = params.temperature ?? configTemperature;
				const maxTokens = params.maxTokens ?? configMaxTokens;

				log(
					`Using model: ${modelName}, ` +
					`temperature: ${temperature}, ` +
					`maxTokens: ${maxTokens}`
				);

				status(`🤖 Calling ${modelName}...`);

				// Make direct HTTP request to v1beta endpoint
				const responseText = await callGeminiV1Beta(
					apiKey,
					modelName,
					params.prompt,
					temperature,
					maxTokens,
					configTimeoutMs
				);

				log(`Successfully generated response (${responseText.length} chars)`);

				status(
					`✓ Reasoning complete via key ${currentKeyIndex + 1}/${apiKeys.length}`
				);

				return formatResponse(responseText, modelName, temperature, maxTokens);

			} catch (error) {
				const errorMsg =
					error instanceof Error ? error.message : String(error);
				lastError = error instanceof Error ? error : new Error(errorMsg);

				const isQuotaError =
					errorMsg.includes("quota") ||
					errorMsg.includes("429") ||
					errorMsg.includes("rate limit") ||
					errorMsg.includes("RESOURCE_EXHAUSTED");

				const isAuthError =
					errorMsg.includes("authentication") ||
					errorMsg.includes("Invalid API Key") ||
					errorMsg.includes("401") ||
					errorMsg.includes("403") ||
					errorMsg.includes("UNAUTHENTICATED") ||
					errorMsg.includes("PERMISSION_DENIED");

				const shouldRotate = isQuotaError || isAuthError;

				log(
					`Attempt ${retryCount + 1}/${maxRetries} failed: ${errorMsg} ` +
					`(${shouldRotate ? "Will rotate key" : "Will retry"})`
				);

				if (shouldRotate || retryCount === maxRetries - 1) {
					break; // Move to next key
				}

				// Wait before retry
				if (retryCount < maxRetries - 1) {
					const delayMs = 1000 * (retryCount + 1);
					log(`Waiting ${delayMs}ms before retry...`);
					await sleep(delayMs);
				}
			}
		}

		currentKeyIndex = (currentKeyIndex + 1) % apiKeys.length;
	}

	// All keys exhausted - return error
	const errorMsg =
		lastError instanceof Error ? lastError.message : "Unknown error occurred";

	const detailedError =
		`All API keys exhausted. Last error: ${errorMsg}\n\n` +
		`Attempted ${apiKeys.length} key(s) with ${maxRetries} retry(ies) each.`;

	warn(`❌ Reasoning failed: All keys exhausted after ${apiKeys.length * maxRetries} attempts`);
	log(`Final error: ${detailedError}`);

	return `## ❌ Error: All API Keys Exhausted\n\n${detailedError}`;
}

/**
 * Call Gemini v1beta API directly via HTTPS
 * Required for gemini-3-flash-preview and other preview models
 */
async function callGeminiV1Beta(
	apiKey: string,
	modelName: string,
	prompt: string,
	temperature: number,
	maxTokens: number,
	timeoutMs: number
): Promise<string> {
	const url = `https://generativelanguage.googleapis.com/v1beta/models/${modelName}:generateContent?key=${apiKey}`;

	const requestBody = {
		contents: [
			{
				role: "user",
				parts: [
					{
						text: prompt,
					},
				],
			},
		],
		generationConfig: {
			temperature,
			maxOutputTokens: maxTokens,
			topP: 1,
			topK: 40,
		},
		safetySettings: [
			{
				category: "HARM_CATEGORY_HATE_SPEECH",
				threshold: "BLOCK_NONE",
			},
			{
				category: "HARM_CATEGORY_DANGEROUS_CONTENT",
				threshold: "BLOCK_NONE",
			},
			{
				category: "HARM_CATEGORY_HARASSMENT",
				threshold: "BLOCK_NONE",
			},
			{
				category: "HARM_CATEGORY_SEXUALLY_EXPLICIT",
				threshold: "BLOCK_NONE",
			},
		],
	};

	return new Promise((resolve, reject) => {
		const timeout = setTimeout(() => {
			reject(new Error(`Request timeout after ${timeoutMs}ms`));
		}, timeoutMs);

		const postData = JSON.stringify(requestBody);

		const options = {
			method: "POST",
			headers: {
				"Content-Type": "application/json",
				"Content-Length": Buffer.byteLength(postData),
			},
		};

		const req = https.request(url, options, (res) => {
			let data = "";

			res.on("data", (chunk) => {
				data += chunk;
			});

			res.on("end", () => {
				clearTimeout(timeout);

				if (res.statusCode !== 200) {
					reject(
						new Error(
							`Gemini API error (${res.statusCode}): ${data}`
						)
					);
					return;
				}

				try {
					const response = JSON.parse(data);
					const text =
						response.candidates?.[0]?.content?.parts?.[0]?.text;

					if (!text) {
						reject(new Error("Empty response from Gemini API"));
						return;
					}

					resolve(text);
				} catch (error) {
					reject(
						new Error(
							`Failed to parse Gemini response: ${error instanceof Error ? error.message : String(error)}`
						)
					);
				}
			});
		});

		req.on("error", (error) => {
			clearTimeout(timeout);
			reject(error);
		});

		req.write(postData);
		req.end();
	});
}

/**
 * Format the Gemini response as a nice Markdown report
 */
function formatResponse(
	text: string,
	modelName: string,
	temperature: number,
	maxTokens: number
): string {
	return (
		`## 🤖 Reasoning Agent Response\n\n` +
		`${text}\n\n` +
		`---\n` +
		`**Model:** ${modelName} | **Temperature:** ${temperature} | **Max Tokens:** ${maxTokens}`
	);
}

/**
 * Simple sleep utility
 */
function sleep(ms: number): Promise<void> {
	return new Promise((resolve) => setTimeout(resolve, ms));
}
reasoning-agent