dist / generator.js
import { LMStudioClient } from "@lmstudio/sdk";
import { configSchematics, globalConfigSchematics } from "./config.js";
/**
* Generator function that connects to a remote LM Studio server
* and streams generation to the local LM Studio instance.
*/
export async function generator(ctl, history) {
// Get config values using .get() method
const config = ctl.getPluginConfig(configSchematics);
const globalConfig = ctl.getGlobalPluginConfig(globalConfigSchematics);
// Extract config
const baseUrl = globalConfig.get("baseUrl") || "ws://127.0.0.1:1234";
const model = config.get("model");
const temperature = config.get("temperature");
const topP = config.get("topP");
const topK = config.get("topK");
const maxTokens = config.get("maxTokens");
const contextOverflowPolicy = config.get("contextOverflowPolicy");
const gpuOffloadRatio = globalConfig.get("gpuOffloadRatio");
const clientIdentifier = globalConfig.get("clientIdentifier") || undefined;
const clientPasskey = globalConfig.get("clientPasskey") || undefined;
// Connect to remote LM Studio
const client = new LMStudioClient({
baseUrl,
clientIdentifier,
clientPasskey,
});
try {
// Load the model if needed
const loadedModels = await client.llm.listLoaded();
const isLoaded = loadedModels.some((m) => m.identifier === model);
if (!isLoaded) {
// Load the model with GPU config
await client.llm.load(model, {
config: {
gpu: {
ratio: gpuOffloadRatio,
},
},
});
}
// Get model handle
const llm = await client.llm.model(model);
// Build generation options
const opts = {};
if (temperature !== undefined && temperature > 0) {
opts.temperature = temperature;
}
if (topP !== undefined && topP > 0 && topP < 1) {
opts.topPSamplingConfig = { topP };
}
if (topK !== undefined && topK > 0) {
opts.topKSamplingConfig = { topK };
}
if (maxTokens !== undefined && maxTokens > 0) {
opts.maxTokens = maxTokens;
}
if (contextOverflowPolicy) {
opts.contextOverflowPolicy = contextOverflowPolicy;
}
// Get tool definitions and pass them to the remote model
const tools = ctl.getToolDefinitions();
// Stream generation from remote model
const prediction = llm.respond(history, {
...opts,
tools: tools.length > 0 ? tools : undefined,
});
// Forward streamed fragments to local LM Studio
for await (const fragment of prediction) {
ctl.fragmentGenerated(fragment.content);
}
}
catch (err) {
// Re-throw for LM Studio to handle
throw err;
}
}