Forked from ankh/openai-compat-endpoint
src / generator.ts
// src/generator.ts
import { configSchematics, globalConfigSchematics } from "./config";
import { type Chat, type GeneratorController } from "@lmstudio/sdk";
import OpenAI from "openai";
import {
type ChatCompletionMessageParam,
type ChatCompletionMessageToolCall,
type ChatCompletionTool,
type ChatCompletionToolMessageParam,
} from "openai/resources/index";
/* -------------------------------------------------------------------------- */
/* Global Vars */
/* -------------------------------------------------------------------------- */
const MAX_REQUESTS = 25;
function getFormattedTime(): string {
const now = new Date();
const timeZone = "America/Los_Angeles";
const dateStr = now.toLocaleDateString("en-US", { timeZone });
const timeStr = now.toLocaleTimeString("en-US", {
timeZone,
hour12: false,
hour: "2-digit",
minute: "2-digit",
timeZoneName: "short",
});
return `${dateStr}, ${timeStr}`;
}
/* -------------------------------------------------------------------------- */
/* Types */
/* -------------------------------------------------------------------------- */
type ToolCallState = {
id: string;
name: string | null;
index: number;
arguments: string;
};
/* -------------------------------------------------------------------------- */
/* Config helpers */
/* -------------------------------------------------------------------------- */
function safeGet(cfg: any, name: string, fallback: any = undefined): any {
try {
const value = cfg?.get?.(name);
return value === undefined || value === null ? fallback : value;
} catch {
return fallback;
}
}
function asString(value: any, fallback = ""): string {
return typeof value === "string" ? value.trim() : fallback;
}
function asNumber(value: any, fallback: number): number {
const n = typeof value === "number" ? value : Number(value);
return Number.isFinite(n) ? n : fallback;
}
function asBool(value: any, fallback: boolean): boolean {
if (value === undefined || value === null) {
return fallback;
}
return Boolean(value);
}
function pickNumber(
config: any,
globalConfig: any,
chatOverrideName: string,
globalName: string,
legacyChatName: string,
fallback: number,
): number {
const chatOverride = asNumber(safeGet(config, chatOverrideName, -1), -1);
if (chatOverride >= 0) {
return chatOverride;
}
const globalValue = safeGet(globalConfig, globalName, undefined);
if (globalValue !== undefined) {
return asNumber(globalValue, fallback);
}
return asNumber(safeGet(config, legacyChatName, fallback), fallback);
}
function addNumberIfEnabled(
payload: any,
fieldName: string,
value: number,
omitValue: number,
): void {
if (Number.isFinite(value) && value !== omitValue) {
payload[fieldName] = value;
}
}
function estimateTokensFromChars(chars: number): number {
return Math.ceil(chars / 4);
}
/* -------------------------------------------------------------------------- */
/* Build helpers */
/* -------------------------------------------------------------------------- */
function createOpenAI(globalConfig: any) {
const baseURL =
asString(safeGet(globalConfig, "baseUrl", ""), "") ||
asString(process.env.OPENAI_BASE_URL, "") ||
"https://api.deepseek.com/v1";
const apiKey =
asString(safeGet(globalConfig, "apiKey", ""), "") ||
asString(process.env.DEEPSEEK_API_KEY, "") ||
asString(process.env.OPENAI_API_KEY, "");
return new OpenAI({
apiKey,
baseURL,
});
}
/** Convert internal chat history to the format expected by OpenAI. */
function toOpenAIMessages(history: Chat): ChatCompletionMessageParam[] {
const messages: ChatCompletionMessageParam[] = [];
for (const message of history) {
switch (message.getRole()) {
case "system":
messages.push({ role: "system", content: message.getText() });
break;
case "user":
messages.push({ role: "user", content: message.getText() });
break;
case "assistant": {
const toolCalls: ChatCompletionMessageToolCall[] = message
.getToolCallRequests()
.map((toolCall) => ({
id: toolCall.id ?? "",
type: "function",
function: {
name: toolCall.name,
arguments: JSON.stringify(toolCall.arguments ?? {}),
},
}));
messages.push({
role: "assistant",
content: message.getText(),
...(toolCalls.length ? { tool_calls: toolCalls } : {}),
} as ChatCompletionMessageParam);
break;
}
case "tool": {
message.getToolCallResults().forEach((toolCallResult) => {
messages.push({
role: "tool",
tool_call_id: toolCallResult.toolCallId ?? "",
content: toolCallResult.content,
} as ChatCompletionToolMessageParam);
});
break;
}
}
}
return messages;
}
/** Convert LM Studio tool definitions to OpenAI function-tool descriptors. */
function toOpenAITools(ctl: GeneratorController): ChatCompletionTool[] | undefined {
const tools = ctl.getToolDefinitions().map<ChatCompletionTool>((t) => ({
type: "function",
function: {
name: t.function.name,
description: t.function.description,
parameters: t.function.parameters ?? {},
},
}));
return tools.length ? tools : undefined;
}
function extractProviderError(error: unknown) {
const err = error && typeof error === "object" ? (error as any) : null;
const status =
err?.status ??
err?.response?.status ??
(typeof err?.code === "number" ? err.code : undefined);
const headers = err?.headers ?? err?.response?.headers;
const requestId =
err?.request_id ??
err?.requestId ??
headers?.["x-request-id"] ??
headers?.["X-Request-Id"];
const apiMessage = typeof err?.message === "string" ? err.message : undefined;
const errorObj = err?.error ?? err?.response?.data?.error ?? err?.response?.error;
const responseData = err?.response?.data;
const providerMessage =
(typeof errorObj?.message === "string" && errorObj.message) ||
(typeof responseData?.message === "string" && responseData.message) ||
apiMessage;
return {
status,
requestId,
providerMessage,
apiMessage,
type: errorObj?.type ?? err?.type,
code: errorObj?.code ?? err?.code,
param: errorObj?.param,
error: errorObj ?? responseData,
};
}
/* -------------------------------------------------------------------------- */
/* Message compaction */
/* -------------------------------------------------------------------------- */
function getTextFromContent(content: any): string {
if (typeof content === "string") {
return content;
}
if (Array.isArray(content)) {
return content
.map((part) => {
if (typeof part === "string") {
return part;
}
if (part && typeof part.text === "string") {
return part.text;
}
return "";
})
.join("\n");
}
return "";
}
function setTextContent(message: any, text: string): any {
return {
...message,
content: text,
};
}
function truncateText(text: string, maxChars: number): string {
if (maxChars <= 0 || text.length <= maxChars) {
return text;
}
return (
text.slice(0, maxChars) +
`\n\n[TRUNCATED by LM Studio plugin: original ${text.length} chars, kept ${maxChars} chars]`
);
}
function compactMessages(
messages: any[],
maxMessages: number,
maxToolResultChars: number,
): any[] {
let compacted = messages.map((message) => {
if (message.role !== "tool") {
return message;
}
const text = getTextFromContent(message.content);
return setTextContent(message, truncateText(text, maxToolResultChars));
});
if (maxMessages > 0) {
const systemMessages = compacted.filter((m) => m.role === "system");
const nonSystemMessages = compacted.filter((m) => m.role !== "system");
compacted = [
...systemMessages,
...nonSystemMessages.slice(-maxMessages),
];
}
return compacted;
}
function hasMeaningfulContent(message: any): boolean {
const content = message?.content;
if (typeof content === "string") {
return content.trim().length > 0;
}
if (Array.isArray(content)) {
return content.length > 0;
}
return content != null;
}
function removeToolCalls(message: any): any | null {
const copy = { ...message };
delete copy.tool_calls;
if (!hasMeaningfulContent(copy)) {
return null;
}
return copy;
}
function sanitizeToolMessageOrdering(messages: any[]): any[] {
const result: any[] = [];
for (let i = 0; i < messages.length; i++) {
const message = messages[i];
// A tool message is only valid directly after an assistant message
// with matching tool_calls. Drop orphan tool results.
if (message.role === "tool") {
continue;
}
const toolCalls = Array.isArray(message.tool_calls)
? message.tool_calls
: [];
if (message.role !== "assistant" || toolCalls.length === 0) {
result.push(message);
continue;
}
const expectedIds = new Set(
toolCalls
.map((tc: any) => tc?.id)
.filter((id: any) => typeof id === "string" && id.length > 0),
);
const toolResults: any[] = [];
let j = i + 1;
while (j < messages.length && messages[j].role === "tool") {
const toolMessage = messages[j];
const toolCallId = toolMessage.tool_call_id;
if (expectedIds.has(toolCallId)) {
toolResults.push(toolMessage);
expectedIds.delete(toolCallId);
}
j++;
}
if (expectedIds.size === 0 && toolResults.length > 0) {
result.push(message);
result.push(...toolResults);
i = j - 1;
continue;
}
// Incomplete tool-call group. Keep assistant text only if present,
// but remove tool_calls to avoid provider API errors.
const assistantWithoutToolCalls = removeToolCalls(message);
if (assistantWithoutToolCalls) {
result.push(assistantWithoutToolCalls);
}
// Skip following orphan tool messages.
i = j - 1;
}
return result;
}
function trimHistoryAfterLastCheckpoint(messages: any[]): any[] {
const checkpointRegex =
/\[RE_CHECKPOINT_SAVED\]([\s\S]*?)\[\/RE_CHECKPOINT_SAVED\]/;
let checkpointIndex = -1;
let checkpointText = "";
for (let i = messages.length - 1; i >= 0; i--) {
const text = getTextFromContent(messages[i]?.content);
const match = text.match(checkpointRegex);
if (match) {
checkpointIndex = i;
checkpointText = match[1].trim();
break;
}
}
if (checkpointIndex < 0) {
return messages;
}
const tail = messages.slice(checkpointIndex + 1);
return [
{
role: "system",
content:
"Earlier conversation history was intentionally omitted because the analysis was saved to a persistent RE checkpoint.\n\n" +
"Latest checkpoint marker:\n" +
checkpointText +
"\n\nIf the user asks to continue, first load the latest checkpoint/snapshot from re-memory, then continue from next_step. " +
"Do not rely on omitted chat history.",
},
...tail,
];
}
/* -------------------------------------------------------------------------- */
/* Stream-handling utils */
/* -------------------------------------------------------------------------- */
function wireAbort(
ctl: GeneratorController,
stream: { controller?: AbortController },
) {
ctl.onAborted(() => {
console.info("Generation aborted by user.");
stream.controller?.abort();
});
}
function safeParseToolArguments(raw: string): any {
if (!raw || raw.trim().length === 0) {
return {};
}
try {
return JSON.parse(raw);
} catch {
return {
__raw_arguments: raw,
};
}
}
async function consumeStream(stream: AsyncIterable<any>, ctl: GeneratorController) {
let current: ToolCallState | null = null;
function maybeFlushCurrentToolCall() {
if (current === null || current.name === null) {
return;
}
ctl.toolCallGenerationEnded({
type: "function",
name: current.name,
arguments: safeParseToolArguments(current.arguments),
id: current.id,
});
current = null;
}
for await (const chunk of stream) {
const delta = chunk.choices?.[0]?.delta as
| {
content?: string;
reasoning_content?: string;
tool_calls?: Array<{
index: number;
id?: string;
function?: { name?: string; arguments?: string };
}>;
}
| undefined;
if (!delta) {
continue;
}
// DeepSeek thinking/reasoning stream chunks are intentionally ignored.
// The plugin does not round-trip reasoning_content into future requests.
if (delta.reasoning_content) {
continue;
}
if (delta.content) {
ctl.fragmentGenerated(delta.content);
}
for (const toolCall of delta.tool_calls ?? []) {
if (toolCall.id !== undefined) {
maybeFlushCurrentToolCall();
current = {
id: toolCall.id,
name: null,
index: toolCall.index,
arguments: "",
};
ctl.toolCallGenerationStarted();
}
if (toolCall.function?.name && current) {
current.name = toolCall.function.name;
ctl.toolCallGenerationNameReceived(toolCall.function.name);
}
if (toolCall.function?.arguments && current) {
current.arguments += toolCall.function.arguments;
ctl.toolCallGenerationArgumentFragmentGenerated(toolCall.function.arguments);
}
}
if (chunk.choices?.[0]?.finish_reason === "tool_calls" && current?.name) {
maybeFlushCurrentToolCall();
}
}
console.info("Generation completed.");
}
/* -------------------------------------------------------------------------- */
/* DeepSeek raw stream helper */
/* -------------------------------------------------------------------------- */
async function createDeepSeekStream(
globalConfig: any,
requestPayload: any,
): Promise<AsyncIterable<any> & { controller: AbortController }> {
const baseUrl =
asString(safeGet(globalConfig, "baseUrl", ""), "") ||
asString(process.env.OPENAI_BASE_URL, "") ||
"https://api.deepseek.com/v1";
const apiKey =
asString(safeGet(globalConfig, "apiKey", ""), "") ||
asString(process.env.DEEPSEEK_API_KEY, "") ||
asString(process.env.OPENAI_API_KEY, "");
const controller = new AbortController();
const url = `${baseUrl.replace(/\/+$/, "")}/chat/completions`;
const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
},
body: JSON.stringify(requestPayload),
signal: controller.signal,
});
if (!response.ok) {
const text = await response.text().catch(() => "");
throw {
status: response.status,
error: {
message: `DeepSeek HTTP ${response.status}: ${text}`,
},
};
}
if (!response.body) {
throw {
status: response.status,
error: {
message: "DeepSeek HTTP response did not contain a stream body.",
},
};
}
const iterable = {
controller,
async *[Symbol.asyncIterator]() {
const reader = response.body!.getReader();
const decoder = new TextDecoder();
let buffer = "";
try {
while (true) {
const { value, done } = await reader.read();
if (done) {
break;
}
buffer += decoder.decode(value, { stream: true });
while (true) {
const lineEnd = buffer.indexOf("\n");
if (lineEnd < 0) {
break;
}
const line = buffer.slice(0, lineEnd).trimEnd();
buffer = buffer.slice(lineEnd + 1);
if (!line.startsWith("data:")) {
continue;
}
const data = line.slice("data:".length).trim();
if (!data || data === "[DONE]") {
continue;
}
try {
yield JSON.parse(data);
} catch {
console.warn("[PLUGIN] Failed to parse SSE data:", data);
}
}
}
} finally {
reader.releaseLock();
}
},
};
return iterable;
}
/* -------------------------------------------------------------------------- */
/* API */
/* -------------------------------------------------------------------------- */
export async function generate(ctl: GeneratorController, history: Chat) {
const config = ctl.getPluginConfig(configSchematics as any) as any;
const globalConfig = ctl.getGlobalPluginConfig(globalConfigSchematics as any) as any;
const debug = asBool(safeGet(config, "debug", false), false);
if (debug) {
console.log(`[DEBUG] [ENTER] generate() PID=${Math.random().toString(36).slice(-4)}`);
}
// Debug-only request counter. Do not rely on this for real rate limiting.
let requestCounter = 1;
const allMessages = Array.from(history);
if (debug) {
for (let i = allMessages.length - 1; i >= 0; i--) {
const msg = allMessages[i];
if (msg.getRole() === "assistant") {
const content = msg.getText();
if (content) {
const match = content.match(/\u0192o\. Request #(\d+)/);
if (match) {
requestCounter = parseInt(match[1], 10) + 1;
break;
}
}
}
}
console.log("[PLUGIN] History length:", allMessages.length);
const lastMsg = allMessages.slice(-1)[0];
if (lastMsg) {
console.log(
"[PLUGIN] Last role:",
lastMsg.getRole(),
"preview:",
lastMsg.getText()?.substring(0, 50),
);
}
console.log("[PLUGIN] Parsed counter:", requestCounter);
}
const baseUrl =
asString(safeGet(globalConfig, "baseUrl", ""), "") ||
asString(process.env.OPENAI_BASE_URL, "") ||
"https://api.deepseek.com/v1";
const modelOverride =
asString(safeGet(config, "modelOverride", ""), "") ||
asString(safeGet(config, "model", ""), "");
const defaultModel =
asString(safeGet(globalConfig, "defaultModel", ""), "") ||
asString(process.env.OPENAI_MODEL, "") ||
"deepseek-v4-flash";
const model = modelOverride || defaultModel;
if (!model) {
ctl.fragmentGenerated("\u0192?O Missing model. Set a default model in global plugin settings.\n");
return;
}
const maxMessages = Math.floor(
asNumber(
safeGet(globalConfig, "maxMessages", safeGet(config, "maxMessages", 20)),
20,
),
);
const maxToolResultChars = Math.floor(
asNumber(
safeGet(globalConfig, "maxToolResultChars", safeGet(config, "maxToolResultChars", 30000)),
30000,
),
);
const maxPromptChars = Math.floor(
asNumber(
safeGet(globalConfig, "maxPromptChars", safeGet(config, "maxPromptChars", 200000)),
200000,
),
);
const abortIfPromptTooLarge = asBool(
safeGet(globalConfig, "abortIfPromptTooLarge", safeGet(config, "abortIfPromptTooLarge", true)),
true,
);
const forgetHistoryAfterCheckpoint = asBool(
safeGet(globalConfig, "forgetHistoryAfterCheckpoint", true),
true,
);
let messages: any[] = toOpenAIMessages(history) as any[];
if (forgetHistoryAfterCheckpoint) {
messages = trimHistoryAfterLastCheckpoint(messages);
}
messages = compactMessages(messages, maxMessages, maxToolResultChars);
messages = sanitizeToolMessageOrdering(messages);
const promptJson = JSON.stringify(messages);
const promptChars = promptJson.length;
const estimatedPromptTokens = estimateTokensFromChars(promptChars);
if (abortIfPromptTooLarge && promptChars > maxPromptChars) {
ctl.fragmentGenerated(
`Request blocked: prompt too large.\n` +
`Estimated prompt tokens: ~${estimatedPromptTokens}\n` +
`Prompt chars: ${promptChars}\n` +
`Limit: ${maxPromptChars} chars\n\n` +
`Start a new chat or ask for a compact memory summary first.\n`,
);
return;
}
const enableTools = asBool(
safeGet(globalConfig, "enableTools", safeGet(config, "enableTools", true)),
true,
);
const tools = enableTools ? toOpenAITools(ctl) : undefined;
const isDeepSeek =
model.toLowerCase().startsWith("deepseek-") ||
baseUrl.toLowerCase().includes("deepseek");
const temperature = pickNumber(
config,
globalConfig,
"temperatureOverride",
"defaultTemperature",
"temperature",
0.2,
);
const topP = pickNumber(
config,
globalConfig,
"topPOverride",
"defaultTopP",
"topP",
-1,
);
const maxTokens = Math.floor(
pickNumber(
config,
globalConfig,
"maxTokensOverride",
"defaultMaxTokens",
"maxTokens",
0,
),
);
const presencePenalty = pickNumber(
config,
globalConfig,
"presencePenaltyOverride",
"defaultPresencePenalty",
"presencePenalty",
0,
);
const frequencyPenalty = pickNumber(
config,
globalConfig,
"frequencyPenaltyOverride",
"defaultFrequencyPenalty",
"frequencyPenalty",
0,
);
const reasoningEffort =
asString(safeGet(config, "reasoningEffortOverride", ""), "") ||
asString(safeGet(globalConfig, "reasoningEffort", ""), "") ||
asString(safeGet(config, "reasoningEffort", ""), "") ||
"provider_default";
const deepSeekThinking =
asString(safeGet(globalConfig, "deepSeekThinking", ""), "") ||
asString(safeGet(config, "deepSeekThinking", ""), "") ||
"disabled";
const requestPayload: any = {
model,
messages,
stream: true,
};
if (tools && Array.isArray(tools) && tools.length > 0) {
requestPayload.tools = tools;
}
addNumberIfEnabled(requestPayload, "temperature", temperature, -1);
addNumberIfEnabled(requestPayload, "top_p", topP, -1);
if (maxTokens > 0) {
requestPayload.max_tokens = maxTokens;
}
if (presencePenalty !== 0) {
requestPayload.presence_penalty = presencePenalty;
}
if (frequencyPenalty !== 0) {
requestPayload.frequency_penalty = frequencyPenalty;
}
if (reasoningEffort && reasoningEffort !== "provider_default") {
requestPayload.reasoning_effort = reasoningEffort;
}
if (isDeepSeek) {
const hasTools =
Array.isArray(requestPayload.tools) &&
requestPayload.tools.length > 0;
// DeepSeek V4 defaults to thinking mode. With tool calls it requires
// reasoning_content to be round-tripped. LM Studio's plugin bridge does
// not preserve that field, so force thinking disabled for MCP/tool usage.
requestPayload.thinking = {
type: hasTools
? "disabled"
: deepSeekThinking === "enabled"
? "enabled"
: "disabled",
};
if (requestPayload.thinking.type === "disabled") {
delete requestPayload.reasoning_effort;
}
}
try {
if (debug) {
console.info("[PLUGIN] Request target:", { baseUrl, model });
console.info("[PLUGIN] Generation settings:", {
temperature: requestPayload.temperature,
top_p: requestPayload.top_p,
max_tokens: requestPayload.max_tokens,
presence_penalty: requestPayload.presence_penalty,
frequency_penalty: requestPayload.frequency_penalty,
reasoning_effort: requestPayload.reasoning_effort,
thinking: requestPayload.thinking,
tools_enabled: Boolean(requestPayload.tools?.length),
prompt_chars: promptChars,
estimated_prompt_tokens: estimatedPromptTokens,
});
try {
console.info("[PLUGIN] Request payload:", JSON.stringify(requestPayload, null, 2));
} catch {
console.info("[PLUGIN] Request payload (non-JSON):", requestPayload);
}
}
let stream: any;
if (isDeepSeek) {
stream = await createDeepSeekStream(globalConfig, requestPayload);
} else {
const openai = createOpenAI(globalConfig);
stream = await openai.chat.completions.create(requestPayload);
}
wireAbort(ctl, stream);
await consumeStream(stream, ctl);
if (debug) {
const timeStr = getFormattedTime();
ctl.fragmentGenerated(`\n\u0192o. Request #${requestCounter}/${MAX_REQUESTS} at ${timeStr}\n`);
}
} catch (error: unknown) {
let msg = "\u0192?O Generation failed.";
const info = extractProviderError(error);
const errorPayload = info.error ?? info;
try {
console.error("[PLUGIN] Upstream error:", JSON.stringify(errorPayload, null, 2));
} catch {
console.error("[PLUGIN] Upstream error (non-JSON):", errorPayload);
}
if (info.status === 429) {
msg = `\u0192?O 429 Rate Limit Exceeded. You've used ${requestCounter}/${MAX_REQUESTS} free requests. Try again later or add your own API key.`;
if (info.providerMessage) {
msg += ` Provider: ${info.providerMessage}`;
}
} else if (info.providerMessage) {
if (
typeof info.providerMessage === "string" &&
info.providerMessage.includes("API Key")
) {
msg = "\u0192?O Invalid or missing API key.";
} else {
msg = `\u0192?O API error: ${info.providerMessage}`;
}
} else if (info.apiMessage) {
msg = `\u0192?O API error: ${info.apiMessage}`;
}
ctl.fragmentGenerated(`${msg}\n`);
return;
}
}