Project Files
src
.vs
src.slnx
FileContentIndex
3fe94901-ccd4-48ec-930b-1ba3af68c9cf.vsidx
v18
.wsuo
DocumentLayout.backup.json
DocumentLayout.json
ProjectSettings.json
slnx.sqlite
VSWorkspaceState.json
config.ts
generator.ts
index.ts
manifest.json
package-lock.json
package.json
tsconfig.json
src / generator.ts
// src/generator.ts
import { configSchematics, globalConfigSchematics } from "./config";
import { type Chat, type GeneratorController } from "@lmstudio/sdk";
import OpenAI from "openai";
import {
  type ChatCompletionMessageParam,
  type ChatCompletionMessageToolCall,
  type ChatCompletionTool,
  type ChatCompletionToolMessageParam,
} from "openai/resources/index";

/* -------------------------------------------------------------------------- */
/*                                   Types                                    */
/* -------------------------------------------------------------------------- */

type LocalOrchestration = {
  action: "call_main_model";
  task: string;
  compact_context: string;
  main_model_instructions?: string;
  completed_actions?: string[];
  suggested_integrations?: string[];
  risk_notes?: string[];
};

type ToolCallState = {
  id: string;
  name: string | null;
  index: number;
  arguments: string;
};

type StreamStats = {
  contentChars: number;
  estimatedContentTokens: number;
  reasoningChars: number;
  reasoningForwardedChars: number;
  toolCalls: number;
  toolArgumentChars: number;
  chunks: number;
};

type RelayEfficiency = {
  directInputTokens: number;
  localInputTokens: number;
  mainInputTokens: number;
  tokensSaved: number;
  compressionRatio: number;
  compressionPercent: number;
};

type ReasoningDisplayMode = "hidden" | "status" | "inline";

type PreservedToolOutput = {
  toolCallId: string;
  action: string;
  content: string;
  originalChars: number;
  truncated: boolean;
};

/* -------------------------------------------------------------------------- */
/*                              Generic helpers                               */
/* -------------------------------------------------------------------------- */

function safeGet(cfg: any, name: string, fallback: any = undefined): any {
  try {
    const value = cfg?.get?.(name);
    return value === undefined || value === null ? fallback : value;
  } catch {
    return fallback;
  }
}

function asString(value: any, fallback = ""): string {
  return typeof value === "string" ? value.trim() : fallback;
}

function asNumber(value: any, fallback: number): number {
  const n = typeof value === "number" ? value : Number(value);
  return Number.isFinite(n) ? n : fallback;
}

function asBool(value: any, fallback: boolean): boolean {
  if (value === undefined || value === null) {
    return fallback;
  }

  return Boolean(value);
}

function estimateTokensFromChars(chars: number): number {
  return Math.ceil(chars / 4);
}

function truncateText(text: string, maxChars: number): string {
  if (maxChars <= 0 || text.length <= maxChars) {
    return text;
  }

  return (
    text.slice(0, maxChars) +
    `\n\n[TRUNCATED by MCP Model Relay: original ${text.length} chars, kept ${maxChars} chars]`
  );
}

function dedupeStrings(values: string[]): string[] {
  const seen = new Set<string>();
  const result: string[] = [];

  for (const value of values) {
    const trimmed = value.trim();
    const key = trimmed.toLowerCase();

    if (!trimmed || seen.has(key)) {
      continue;
    }

    seen.add(key);
    result.push(trimmed);
  }

  return result;
}

function addNumberIfEnabled(
  payload: any,
  fieldName: string,
  value: number,
  omitValue: number,
): void {
  if (Number.isFinite(value) && value !== omitValue) {
    payload[fieldName] = value;
  }
}

function normalizeReasoningDisplayMode(value: string): ReasoningDisplayMode {
  const mode = value.trim().toLowerCase();

  if (mode === "inline") {
    return "inline";
  }

  if (mode === "hidden") {
    return "hidden";
  }

  return "status";
}

function computeEfficiency(
  directInputTokens: number,
  localInputTokens: number,
  mainInputTokens: number,
): RelayEfficiency {
  const tokensSaved = directInputTokens - mainInputTokens;

  const compressionRatio =
    directInputTokens > 0 ? mainInputTokens / directInputTokens : 1;

  const compressionPercent =
    directInputTokens > 0 ? Math.round((1 - compressionRatio) * 100) : 0;

  return {
    directInputTokens,
    localInputTokens,
    mainInputTokens,
    tokensSaved,
    compressionRatio: Math.round(compressionRatio * 100) / 100,
    compressionPercent,
  };
}

function formatRelayStats(
  localOrchestratorStatus: string,
  localCompactContextChars: number,
  localTaskChars: number,
  rawToolOutputChars: number,
  rawToolOutputCount: number,
  mainStats: StreamStats,
  isDeepSeek: boolean,
  efficiency: RelayEfficiency,
  toolsBefore: number,
  toolsAfter: number,
  toolSchemaTokens: number,
  removedToolNames: string[],
): string {
  const mainLabel = isDeepSeek ? "DeepSeek" : "Main model";

  let stats = `\n\n[Relay stats]\n`;
  stats += `Local orchestrator: ${localOrchestratorStatus}\n`;

  if (localOrchestratorStatus === "used") {
    stats += `Local chat summary: ${localCompactContextChars} chars (~${estimateTokensFromChars(localCompactContextChars)} tokens)\n`;
    stats += `Local task: ${localTaskChars} chars\n`;
  }

  stats += `Raw tool outputs preserved: ${rawToolOutputCount} item(s), ${rawToolOutputChars} chars (~${estimateTokensFromChars(rawToolOutputChars)} tokens)\n`;

  stats += `\n`;
  stats += `── Tool Exposure ──\n`;
  stats += `Tools exposed to ${mainLabel}: ${toolsBefore} -> ${toolsAfter}\n`;
  stats += `Removed tools: ${removedToolNames.length > 0 ? removedToolNames.join(", ") : "none"}\n`;
  stats += `Tool schema size: ~${toolSchemaTokens} tokens\n`;

  stats += `\n`;
  stats += `── Token Efficiency ──\n`;
  stats += `Direct to ${mainLabel} (ohne Relay):  ~${efficiency.directInputTokens} tokens\n`;
  stats += `Local model input:                  ~${efficiency.localInputTokens} tokens\n`;
  stats += `${mainLabel} input (mit Relay):       ~${efficiency.mainInputTokens} tokens\n`;
  stats += `Tokens saved:                        ~${efficiency.tokensSaved} tokens (${efficiency.compressionPercent}%)\n`;
  stats += `Compression ratio:                   ${efficiency.compressionRatio}x\n`;

  stats += `\n`;
  stats += `── ${mainLabel} Output ──\n`;
  stats += `Result: ${mainStats.contentChars} chars (~${mainStats.estimatedContentTokens} tokens)\n`;
  stats += `Reasoning received: ${mainStats.reasoningChars} chars\n`;
  stats += `Reasoning forwarded: ${mainStats.reasoningForwardedChars} chars\n`;
  stats += `Tool calls: ${mainStats.toolCalls}\n`;
  stats += `Tool arguments: ${mainStats.toolArgumentChars} chars\n`;

  return stats;
}

/* -------------------------------------------------------------------------- */
/*                              OpenAI helpers                                */
/* -------------------------------------------------------------------------- */

function createMainOpenAI(globalConfig: any) {
  const baseURL =
    asString(safeGet(globalConfig, "mainBaseUrl", ""), "") ||
    "https://api.deepseek.com/v1";

  const apiKey =
    asString(safeGet(globalConfig, "mainApiKey", ""), "") ||
    asString(process.env.DEEPSEEK_API_KEY, "") ||
    asString(process.env.OPENAI_API_KEY, "");

  return new OpenAI({
    apiKey,
    baseURL,
  });
}

function toOpenAIMessages(history: Chat): ChatCompletionMessageParam[] {
  const messages: ChatCompletionMessageParam[] = [];

  for (const message of history) {
    switch (message.getRole()) {
      case "system":
        messages.push({ role: "system", content: message.getText() });
        break;

      case "user":
        messages.push({ role: "user", content: message.getText() });
        break;

      case "assistant": {
        const toolCalls: ChatCompletionMessageToolCall[] = message
          .getToolCallRequests()
          .map((toolCall) => ({
            id: toolCall.id ?? "",
            type: "function",
            function: {
              name: toolCall.name,
              arguments: JSON.stringify(toolCall.arguments ?? {}),
            },
          }));

        messages.push({
          role: "assistant",
          content: message.getText(),
          ...(toolCalls.length ? { tool_calls: toolCalls } : {}),
        } as ChatCompletionMessageParam);

        break;
      }

      case "tool": {
        message.getToolCallResults().forEach((toolCallResult) => {
          messages.push({
            role: "tool",
            tool_call_id: toolCallResult.toolCallId ?? "",
            content: toolCallResult.content,
          } as ChatCompletionToolMessageParam);
        });
        break;
      }
    }
  }

  return messages;
}

function toOpenAITools(ctl: GeneratorController): ChatCompletionTool[] | undefined {
  const tools = ctl.getToolDefinitions().map<ChatCompletionTool>((t) => ({
    type: "function",
    function: {
      name: t.function.name,
      description: t.function.description,
      parameters: t.function.parameters ?? {},
    },
  }));

  return tools.length ? tools : undefined;
}

/* -------------------------------------------------------------------------- */
/*                           Message helpers                                  */
/* -------------------------------------------------------------------------- */

function getTextFromContent(content: any): string {
  if (typeof content === "string") {
    return content;
  }

  if (Array.isArray(content)) {
    return content
      .map((part) => {
        if (typeof part === "string") {
          return part;
        }

        if (part && typeof part.text === "string") {
          return part.text;
        }

        return "";
      })
      .join("\n");
  }

  return "";
}

function hasMeaningfulContent(message: any): boolean {
  const content = message?.content;

  if (typeof content === "string") {
    return content.trim().length > 0;
  }

  if (Array.isArray(content)) {
    return content.length > 0;
  }

  return content != null;
}

function selectMessageWindow(messages: any[], maxMessages: number): any[] {
  if (maxMessages <= 0) {
    return messages;
  }

  const systemMessages = messages.filter((m) => m.role === "system");
  const nonSystemMessages = messages.filter((m) => m.role !== "system");

  return [
    ...systemMessages,
    ...nonSystemMessages.slice(-maxMessages),
  ];
}

function compactMessages(
  messages: any[],
  maxMessages: number,
  maxToolResultChars: number,
): any[] {
  const selected = selectMessageWindow(messages, maxMessages);

  return selected.map((message) => {
    if (message.role !== "tool") {
      return message;
    }

    const text = getTextFromContent(message.content);

    return {
      ...message,
      content: truncateText(text, maxToolResultChars),
    };
  });
}

function removeToolCalls(message: any): any | null {
  const copy = { ...message };
  delete copy.tool_calls;

  if (!hasMeaningfulContent(copy)) {
    return null;
  }

  return copy;
}

function toolNameFromAction(action: string): string {
  const index = action.indexOf("(");
  return (index >= 0 ? action.slice(0, index) : action).trim();
}

function splitToolPrefixList(value: string): string[] {
  return value
    .split(",")
    .map((x) => x.trim().toLowerCase())
    .filter((x) => x.length > 0);
}

function isAlwaysAvailableTool(
  toolName: string,
  alwaysAvailableToolPrefixes: string[],
): boolean {
  const name = toolName.toLowerCase();

  return alwaysAvailableToolPrefixes.some((prefix) =>
    name.startsWith(prefix),
  );
}

function filterCompletedTools(
  tools: ChatCompletionTool[] | undefined,
  completedActions: string[],
  alwaysAvailableToolPrefixes: string[],
): ChatCompletionTool[] | undefined {
  if (!tools || completedActions.length === 0) {
    return tools;
  }

  const completedToolNames = new Set(
    completedActions
      .map(toolNameFromAction)
      .map((x) => x.toLowerCase())
      .filter((x) => x.length > 0),
  );

  const filtered = tools.filter((tool) => {
    const name = tool.function?.name?.toLowerCase() ?? "";

    if (isAlwaysAvailableTool(name, alwaysAvailableToolPrefixes)) {
      return true;
    }

    return !completedToolNames.has(name);
  });

  return filtered.length > 0 ? filtered : undefined;
}

function toolMessageToContextMessage(message: any): any {
  const toolCallId =
    typeof message.tool_call_id === "string" && message.tool_call_id.length > 0
      ? message.tool_call_id
      : "unknown";

  const content = getTextFromContent(message.content);

  return {
    role: "user",
    content:
      `[Tool output preserved without matching assistant tool_call]\n` +
      `tool_call_id: ${toolCallId}\n\n` +
      content,
  };
}

function sanitizeToolMessageOrdering(messages: any[]): any[] {
  const result: any[] = [];

  for (let i = 0; i < messages.length; i++) {
    const message = messages[i];

    if (message.role === "tool") {
      result.push(toolMessageToContextMessage(message));
      continue;
    }

    const toolCalls = Array.isArray(message.tool_calls)
      ? message.tool_calls
      : [];

    if (message.role !== "assistant" || toolCalls.length === 0) {
      result.push(message);
      continue;
    }

    const expectedIds = new Set(
      toolCalls
        .map((tc: any) => tc?.id)
        .filter((id: any) => typeof id === "string" && id.length > 0),
    );

    const toolResults: any[] = [];
    const orphanToolResults: any[] = [];

    let j = i + 1;

    while (j < messages.length && messages[j].role === "tool") {
      const toolMessage = messages[j];
      const toolCallId = toolMessage.tool_call_id;

      if (expectedIds.has(toolCallId)) {
        toolResults.push(toolMessage);
        expectedIds.delete(toolCallId);
      } else {
        orphanToolResults.push(toolMessage);
      }

      j++;
    }

    if (expectedIds.size === 0) {
      result.push(message);
      result.push(...toolResults);

      for (const orphan of orphanToolResults) {
        result.push(toolMessageToContextMessage(orphan));
      }

      i = j - 1;
      continue;
    }

    const assistantWithoutToolCalls = removeToolCalls(message);
    if (assistantWithoutToolCalls) {
      result.push(assistantWithoutToolCalls);
    }

    for (const toolResult of toolResults) {
      result.push(toolMessageToContextMessage(toolResult));
    }

    for (const orphan of orphanToolResults) {
      result.push(toolMessageToContextMessage(orphan));
    }

    i = j - 1;
  }

  return result;
}

/* -------------------------------------------------------------------------- */
/*                     Raw tool output preservation                            */
/* -------------------------------------------------------------------------- */

function formatToolActionFromToolCall(toolCall: any): string {
  const name = toolCall?.function?.name || "unknown";
  let args = "";

  try {
    const parsed = JSON.parse(toolCall?.function?.arguments || "{}");

    args =
      parsed.addr ||
      parsed.addrs ||
      parsed.pattern ||
      parsed.query ||
      parsed.name ||
      parsed.symbol ||
      "";

    if (!args) {
      const keys = Object.keys(parsed).filter(
        (k) => k !== "offset" && k !== "count" && k !== "limit",
      );

      if (keys.length > 0) {
        const value = parsed[keys[0]];
        args =
          typeof value === "string"
            ? value
            : JSON.stringify(value).slice(0, 80);
      }
    }
  } catch {
    args = "";
  }

  return `${name}(${args})`;
}

function extractCompletedActions(messages: any[]): string[] {
  const completed: string[] = [];

  for (const message of messages) {
    if (message.role === "assistant" && Array.isArray(message.tool_calls)) {
      for (const toolCall of message.tool_calls) {
        completed.push(formatToolActionFromToolCall(toolCall));
      }
    }
  }

  return completed;
}

function extractRawToolOutputs(
  messages: any[],
  maxToolOutputChars: number,
): PreservedToolOutput[] {
  const pendingActions = new Map<string, string>();
  const outputs: PreservedToolOutput[] = [];

  for (const message of messages) {
    if (message.role === "assistant" && Array.isArray(message.tool_calls)) {
      for (const toolCall of message.tool_calls) {
        const id = toolCall.id ?? "";
        if (id.length > 0) {
          pendingActions.set(id, formatToolActionFromToolCall(toolCall));
        }
      }

      continue;
    }

    if (message.role === "tool") {
      const toolCallId =
        typeof message.tool_call_id === "string" && message.tool_call_id.length > 0
          ? message.tool_call_id
          : "unknown";

      const action = pendingActions.get(toolCallId) ?? `unknown_tool(${toolCallId})`;
      const rawContent = getTextFromContent(message.content);
      const truncated =
        maxToolOutputChars > 0 && rawContent.length > maxToolOutputChars;

      outputs.push({
        toolCallId,
        action,
        content: truncated
          ? rawContent.slice(0, maxToolOutputChars) +
            `\n\n[RAW TOOL OUTPUT TRUNCATED BY RELAY: original ${rawContent.length} chars, kept ${maxToolOutputChars} chars]`
          : rawContent,
        originalChars: rawContent.length,
        truncated,
      });
    }
  }

  return outputs;
}

function stripToolOutputsForLocalCompressor(messages: any[]): any[] {
  return messages
    .map((message) => {
      if (message.role === "tool") {
        return {
          role: "user",
          content:
            "[RAW TOOL OUTPUT OMITTED FROM LOCAL COMPRESSOR]\n" +
            "The relay plugin preserves tool output as-is and passes it to the main model separately.",
        };
      }

      if (message.role === "assistant" && Array.isArray(message.tool_calls)) {
        const actions = message.tool_calls.map(formatToolActionFromToolCall);
        const text = getTextFromContent(message.content);

        return {
          role: "assistant",
          content:
            (text.trim().length > 0 ? text.trim() + "\n\n" : "") +
            `[TOOL CALLS MADE]\n${actions.map((x: string) => `- ${x}`).join("\n")}`,
        };
      }

      return message;
    })
    .filter((message) => hasMeaningfulContent(message));
}

function formatRawToolOutputsForMain(outputs: PreservedToolOutput[]): string {
  if (outputs.length === 0) {
    return "No raw tool outputs preserved in this relay round.";
  }

  return outputs
    .map((output, index) => {
      return (
        `--- RAW TOOL OUTPUT ${index + 1} ---\n` +
        `Action: ${output.action}\n` +
        `tool_call_id: ${output.toolCallId}\n` +
        `original_chars: ${output.originalChars}\n` +
        `truncated_by_relay: ${output.truncated ? "yes" : "no"}\n\n` +
        output.content +
        `\n--- END RAW TOOL OUTPUT ${index + 1} ---`
      );
    })
    .join("\n\n");
}

/* -------------------------------------------------------------------------- */
/*                       Local LM Studio orchestrator                          */
/* -------------------------------------------------------------------------- */

function truncateForLocalContext(
  text: string,
  contextLength: number,
  reservedOutputTokens: number,
): string {
  const overheadTokens = 2048;

  const outputReserveTokens = Math.min(
    Math.max(reservedOutputTokens, 512),
    4096,
  );

  const usableInputTokens = Math.max(
    1024,
    contextLength - outputReserveTokens - overheadTokens,
  );

  const maxChars = Math.floor(usableInputTokens * 2.4);

  if (text.length <= maxChars) {
    return text;
  }

  return (
    text.slice(0, maxChars) +
    `\n\n[TRUNCATED by MCP Model Relay before local orchestrator: original ${text.length} chars, kept ${maxChars} chars]`
  );
}

function extractLmStudioRestText(data: any): string {
  if (typeof data?.output === "string") {
    return data.output;
  }

  if (Array.isArray(data?.output)) {
    const messageText = data.output
      .filter(
        (item: any) =>
          item?.type === "message" && typeof item?.content === "string",
      )
      .map((item: any) => item.content)
      .join("\n")
      .trim();

    if (messageText.length > 0) {
      return messageText;
    }

    return data.output
      .filter((item: any) => typeof item?.content === "string")
      .map((item: any) => item.content)
      .join("\n")
      .trim();
  }

  if (typeof data?.message === "string") {
    return data.message;
  }

  return "";
}

function parseOrchestrationJson(text: string): LocalOrchestration | null {
  const trimmed = text.trim();

  if (trimmed.startsWith("{") && trimmed.endsWith("}")) {
    try {
      const parsed = JSON.parse(trimmed);

      if (
        parsed?.action === "call_main_model" &&
        typeof parsed.task === "string" &&
        typeof parsed.compact_context === "string"
      ) {
        return parsed as LocalOrchestration;
      }
    } catch {
      // Fall through to extraction.
    }
  }

  const fencedJson = trimmed.match(/```json\s*([\s\S]*?)```/i);

  const rawJson = fencedJson
    ? fencedJson[1].trim()
    : trimmed.slice(trimmed.indexOf("{"), trimmed.lastIndexOf("}") + 1);

  if (!rawJson || !rawJson.startsWith("{")) {
    return null;
  }

  try {
    const parsed = JSON.parse(rawJson);

    if (
      parsed?.action === "call_main_model" &&
      typeof parsed.task === "string" &&
      typeof parsed.compact_context === "string"
    ) {
      return parsed as LocalOrchestration;
    }
  } catch {
    return null;
  }

  return null;
}

function splitCsv(value: string): string[] {
  return value
    .split(",")
    .map((x) => x.trim())
    .filter((x) => x.length > 0);
}

function buildLocalIntegrations(
  integrationsCsv: string,
  allowedToolsCsv: string,
): any[] {
  const integrations = splitCsv(integrationsCsv);
  const allowedTools = splitCsv(allowedToolsCsv);

  if (allowedTools.length === 0) {
    return integrations;
  }

  return integrations.map((id) => ({
    type: "plugin",
    id,
    allowed_tools: allowedTools,
  }));
}

/*
function buildOrchestratorInput(
  messagesWithoutRawToolOutputs: any[],
  latestUserText: string,
  maxOutputTokens: number,
  completedActions: string[],
): string {
  const conversationContext = JSON.stringify(messagesWithoutRawToolOutputs, null, 2);

  const completedBlock =
    completedActions.length > 0
      ? completedActions.map((x) => `- ${x}`).join("\n")
      : "none";

  return `You are a chat-context compressor for a two-stage LLM pipeline.

The relay plugin preserves raw tool outputs separately.
You must NOT summarize, rewrite, reconstruct, interpret, or invent tool output.
You only compress human/assistant conversation text, user intent, and next-step instructions.

Return ONLY valid JSON.
No Markdown.
No prose.
No commentary.

JSON shape:
{
  "action": "call_main_model",
  "task": "<one-line summary of what the user wants>",
  "compact_context": "<short chat summary only; no raw tool output>",
  "main_model_instructions": "<what the main model should do next>",
  "suggested_integrations": [],
  "risk_notes": []
}

Rules:
- Do not include decompiled code.
- Do not include disassembly.
- Do not include raw tool output.
- Do not invent code.
- Do not invent findings.
- Mention that raw tool outputs are preserved separately by the relay.
- Keep compact_context concise, preferably under ${maxOutputTokens} output tokens.
- Preserve the user's exact intent and target.
- If the request is only a test/placeholder, say that in compact_context.

Completed tool actions already known:
${completedBlock}

Latest user message:
${latestUserText}

Conversation without raw tool outputs:
${conversationContext}
`;
}
*/

function buildOrchestratorInput(
  messagesWithoutRawToolOutputs: any[],
  latestUserText: string,
  maxOutputTokens: number,
  completedActions: string[],
): string {
  const conversationContext = JSON.stringify(messagesWithoutRawToolOutputs, null, 2);

  const completedBlock =
    completedActions.length > 0
      ? completedActions.map((x) => `- ${x}`).join("\n")
      : "none";

  return `You are a prompt compressor for a two-stage LLM pipeline.

The relay plugin preserves raw tool outputs separately.
You only compress human/assistant conversation text into a clear, concise prompt for the main model.

RULES:
- Write ONLY the compressed prompt text. Nothing else.
- NO JSON. NO Markdown. NO code blocks. NO commentary.
- NO "Here is", "Based on", "I have compressed", etc.
- Just the compressed prompt, plain text.
- Keep it under ${maxOutputTokens} output tokens.
- Preserve the user's exact intent and target.
- Mention that raw tool outputs are preserved separately by the relay.
- If the request is only a test/placeholder, output: "Test request — no analysis needed."

Completed tool actions already known:
${completedBlock}

Latest user message:
${latestUserText}

Conversation without raw tool outputs:
${conversationContext}

Compressed prompt:`;
}
/*
async function callLocalOrchestrator(
  ctl: GeneratorController,
  localBaseUrl: string,
  localApiToken: string,
  localModel: string,
  localReasoning: string,
  input: string,
  integrations: any[],
  contextLength: number,
  temperature: number,
  maxOutputTokens: number,
  timeoutMs: number,
): Promise<LocalOrchestration | null> {
  const url = `${localBaseUrl.replace(/\/+$/, "")}/api/v1/chat`;

  const localSystemPrompt = `You are a strict JSON chat-context compressor.

Return ONLY valid JSON.
No Markdown.
No prose.
No commentary.
Do not start with "Based on".

The relay plugin preserves raw tool outputs separately.
You must not summarize or transform tool outputs.
Your job is only to compress human/assistant conversation text and identify the user's intent.

If you cannot compress safely, output:
{"action":"call_main_model","task":"local_orchestrator_failed","compact_context":"Local compressor could not safely summarize the chat context. Raw tool outputs are preserved separately by the relay.","main_model_instructions":"Use the preserved raw tool outputs and ask for clarification if needed.","suggested_integrations":[],"risk_notes":["local compression failed"]}`;

  const body: any = {
    model: localModel,
    system_prompt: localSystemPrompt,
    input,
    context_length: contextLength,
    temperature,
    store: false,
  };

  if (maxOutputTokens > 0) {
    body.max_output_tokens = maxOutputTokens;
  }

  if (integrations.length > 0) {
    body.integrations = integrations;
  }

  if (localReasoning && localReasoning !== "provider_default") {
    body.reasoning = localReasoning;
  }

  const controller = new AbortController();

  let timedOut = false;
  let abortedByUser = false;
  let timeout: ReturnType<typeof setTimeout> | undefined;

  if (timeoutMs > 0) {
    timeout = setTimeout(() => {
      timedOut = true;
      controller.abort();
    }, timeoutMs);
  }

  ctl.onAborted(() => {
    abortedByUser = true;
    controller.abort();
  });

  let response: Response;

  try {
    response = await fetch(url, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        ...(localApiToken ? { Authorization: `Bearer ${localApiToken}` } : {}),
      },
      body: JSON.stringify(body),
      signal: controller.signal,
    });
  } catch (error) {
    if (timedOut) {
      throw new Error(`Local orchestrator timed out after ${timeoutMs} ms.`);
    }

    if (abortedByUser) {
      throw new Error("Local orchestrator aborted by user.");
    }

    throw error;
  } finally {
    if (timeout) {
      clearTimeout(timeout);
    }
  }

  if (!response.ok) {
    const text = await response.text().catch(() => "");
    throw new Error(`Local orchestrator failed: HTTP ${response.status}: ${text}`);
  }

  const data = await response.json();
  const text = extractLmStudioRestText(data);

  const parsed = parseOrchestrationJson(text);
  if (parsed) {
    return parsed;
  }

  if (text.trim().length > 0) {
    throw new Error(
      "Local orchestrator returned non-JSON/prose output. Preview: " +
        truncateText(text.trim(), 500),
    );
  }

  return null;
}
*/

function latestUserMessageFromContext(input: string): string {
  // Der Input enthält "Latest user message:\n<text>\n\nConversation..."
  const match = input.match(/Latest user message:\n([\s\S]*?)\n\nConversation/);
  
  if (match && match[1].trim().length > 0) {
    const text = match[1].trim();
    // Auf 200 Zeichen kürzen für task
    return text.length > 200 ? text.slice(0, 197) + "..." : text;
  }

  return "Continue the user's request.";
}

async function callLocalOrchestrator(
  ctl: GeneratorController,
  localBaseUrl: string,
  localApiToken: string,
  localModel: string,
  localReasoning: string,
  input: string,
  integrations: any[],
  contextLength: number,
  temperature: number,
  maxOutputTokens: number,
  timeoutMs: number,
): Promise<LocalOrchestration | null> {
  const url = `${localBaseUrl.replace(/\/+$/, "")}/api/v1/chat`;

  const localSystemPrompt = `You are a prompt compressor.

The relay plugin preserves raw tool outputs separately.
Your job: compress the conversation into a clear, concise prompt for the main model.

RULES:
- Write ONLY the compressed prompt. Nothing else.
- NO JSON. NO Markdown. NO "Here is". NO "Based on".
- Just the compressed prompt text.
- If you cannot compress safely, output exactly: FALLBACK: Local compressor could not safely summarize. Raw tool outputs are preserved separately.`;

  const body: any = {
    model: localModel,
    system_prompt: localSystemPrompt,
    input,
    context_length: contextLength,
    temperature,
    store: false,
  };

  if (maxOutputTokens > 0) {
    body.max_output_tokens = maxOutputTokens;
  }

  if (integrations.length > 0) {
    body.integrations = integrations;
  }

  if (localReasoning && localReasoning !== "provider_default") {
    body.reasoning = localReasoning;
  }

  const controller = new AbortController();

  let timedOut = false;
  let abortedByUser = false;
  let timeout: ReturnType<typeof setTimeout> | undefined;

  if (timeoutMs > 0) {
    timeout = setTimeout(() => {
      timedOut = true;
      controller.abort();
    }, timeoutMs);
  }

  ctl.onAborted(() => {
    abortedByUser = true;
    controller.abort();
  });

  let response: Response;

  try {
    response = await fetch(url, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        ...(localApiToken ? { Authorization: `Bearer ${localApiToken}` } : {}),
      },
      body: JSON.stringify(body),
      signal: controller.signal,
    });
  } catch (error) {
    if (timedOut) {
      throw new Error(`Local orchestrator timed out after ${timeoutMs} ms.`);
    }

    if (abortedByUser) {
      throw new Error("Local orchestrator aborted by user.");
    }

    throw error;
  } finally {
    if (timeout) {
      clearTimeout(timeout);
    }
  }

  if (!response.ok) {
    const text = await response.text().catch(() => "");
    throw new Error(`Local orchestrator failed: HTTP ${response.status}: ${text}`);
  }

  const data = await response.json();
  const text = extractLmStudioRestText(data).trim();

  if (!text) {
    return null;
  }

  // Kein JSON-Parsing — der Output IST der compact_context
  const compactContext = text.startsWith("FALLBACK:")
    ? text.slice(9).trim()
    : text;

  return {
    action: "call_main_model",
    task: latestUserMessageFromContext(input),
    compact_context: compactContext,
    main_model_instructions:
      "Use the RAW TOOL OUTPUTS above as primary evidence. " +
      "The local chat summary below may help understand the user's intent.",
    suggested_integrations: [],
    risk_notes: text.startsWith("FALLBACK:")
      ? ["local compression fallback used"]
      : [],
  };
}

/* -------------------------------------------------------------------------- */
/*                         Main message construction                           */
/* -------------------------------------------------------------------------- */

function buildMainMessagesFromOrchestration(
  orchestration: LocalOrchestration,
  completedActions: string[],
  rawToolOutputs: PreservedToolOutput[],
): ChatCompletionMessageParam[] {
  const system = `You are a reverse engineering and static-analysis assistant with access to MCP tools.

YOU ARE CONTINUING AN ANALYSIS — NOT STARTING FRESH.

The relay plugin preserved raw tool outputs as-is.
Treat RAW TOOL OUTPUTS as primary evidence.

CRITICAL RULES:
1. Use RAW TOOL OUTPUTS directly. They are more important than the local chat summary.
2. The local chat summary may be incomplete. Raw tool output wins.
3. Do not repeat a completed tool action unless the raw output is missing, invalid, stale, or the user explicitly requested a refresh.
4. Only call a tool if the information you need is not already present in RAW TOOL OUTPUTS.
5. Do not invent facts, symbols, files, offsets, APIs, class names, or line numbers.
6. Distinguish confirmed evidence from TODO verify.
7. Stay focused on the user's requested target and question.
8. If enough raw evidence is present, analyze it instead of fetching the same data again.
9. MEMORY FIRST: Before using document_read, ALWAYS use re_memory_search to check if the file or module has already been analyzed. document_read is expensive; avoid it if the answer is already in your memory.
10. AUTONOMOUS SAVING: Whenever you finish analyzing a function, find a bug, or reach a conclusion, you MUST autonomously call re_memory_write to persist your findings. Do not wait for the user to ask you to save.`;

  const messages: ChatCompletionMessageParam[] = [
    { role: "system", content: system },
  ];

  // Jedes Raw Tool Output als eigene User-Message — stabil zwischen Requests
  // → LLM Token Cache trifft alle vorherigen Raw Outputs
  for (const output of rawToolOutputs) {
    messages.push({
      role: "user",
      content:
        `--- RAW TOOL OUTPUT (${output.action}) ---\n` +
        `tool_call_id: ${output.toolCallId}\n` +
        `original_chars: ${output.originalChars}\n` +
        `truncated_by_relay: ${output.truncated ? "yes" : "no"}\n\n` +
        output.content +
        `\n--- END RAW TOOL OUTPUT (${output.action}) ---`,
    });
  }

  // Chat-Summary + Task + Instructions als LETZTE Message
  // → Nur DAS ändert sich zwischen Requests, alles davor ist Cache
  const sections: string[] = [];

  if (completedActions.length > 0) {
    sections.push(
      `Completed tool actions already done. Do not repeat these unless necessary:\n` +
        completedActions.map((x) => `- ${x}`).join("\n"),
    );
  }

  if (orchestration.task.trim().length > 0) {
    sections.push(`Task:\n${orchestration.task.trim()}`);
  }

  if (orchestration.compact_context.trim().length > 0) {
    sections.push(`Local chat summary:\n${orchestration.compact_context.trim()}`);
  }

  const instructions = orchestration.main_model_instructions?.trim();
  if (instructions && instructions.length > 0) {
    sections.push(`Main model instructions:\n${instructions}`);
  }

  const suggestedIntegrations = (orchestration.suggested_integrations ?? [])
    .map((x) => x.trim())
    .filter((x) => x.length > 0);

  if (suggestedIntegrations.length > 0) {
    sections.push(`Suggested integrations:\n${suggestedIntegrations.join(", ")}`);
  }

  const riskNotes = (orchestration.risk_notes ?? [])
    .map((x) => x.trim())
    .filter((x) => x.length > 0);

  if (riskNotes.length > 0) {
    sections.push(`Risk notes:\n${riskNotes.map((x) => `- ${x}`).join("\n")}`);
  }

  messages.push({ role: "user", content: sections.join("\n\n") });

  return messages;
}


/* -------------------------------------------------------------------------- */
/*                         Skip local orchestrator logic                       */
/* -------------------------------------------------------------------------- */

function hasToolOrAnalysisContext(messages: any[]): boolean {
  return messages.some((m) => {
    if (m.role === "tool") {
      return true;
    }

    if (m.role === "assistant" && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
      return true;
    }

    const text = getTextFromContent(m.content);

    return (
      text.includes("[RE_CHECKPOINT_SAVED]") ||
      text.includes("TODO verify") ||
      text.includes("confirmed") ||
      text.includes("IDA") ||
      text.includes("DUI70") ||
      text.includes("DirectUI") ||
      text.includes("re-memory") ||
      text.includes("snapshot") ||
      text.includes("checkpoint")
    );
  });
}

function looksLikeContextualFollowup(text: string): boolean {
  const t = text.trim();

  if (t.length === 0) {
    return false;
  }

  if (/[?？]$/.test(t)) {
    return true;
  }

  if (
    /\b0x[0-9a-fA-F]+\b/.test(t) ||
    /\b[A-Za-z_][A-Za-z0-9_:~<>]*\s*\(/.test(t) ||
    /\b[A-Za-z_][A-Za-z0-9_]*::[A-Za-z_][A-Za-z0-9_]*\b/.test(t) ||
    /\b(sub_|loc_|off_|qword_|dword_|byte_|unk_)[0-9A-Fa-f]+\b/.test(t) ||
    /\b(WM_|SHCNE_|SHCNRF_|HRESULT|HWND|COM|vtable|RTTI|IDA|DUI70|DirectUI|DUser)\b/i.test(t)
  ) {
    return true;
  }

  if (
    /\b(patch|fix|reparier|ändere|aendere|mach|continue|weiter|fortsetzen|analysier|analyse|prüf|pruef|erklär|erklaer|was bedeutet|warum|woher|welche funktion)\b/i.test(t)
  ) {
    return true;
  }

  if (
    /\b(das|dies|diese|dieser|diesen|hier|dort|oben|unten|vorher|letzte|letztes|that|this|it|there)\b/i.test(t)
  ) {
    return true;
  }

  return false;
}

function isTrivialMessage(text: string): boolean {
  const t = text.trim().toLowerCase();

  return /^(test|neuer test|ping|ok|okay|oki|gut|ja|nein|no|yes|thanks|danke|danke dir|passt|alles gut|läuft|laeuft|fertig|cool|super)[.!?]*$/.test(t);
}

function shouldSkipLocalOrchestrator(latestUserText: string, messages: any[]): boolean {
  const text = latestUserText.trim();

  if (text.length === 0) {
    return true;
  }

  if (looksLikeContextualFollowup(text)) {
    return false;
  }

  const hasContext = hasToolOrAnalysisContext(messages);

  if (isTrivialMessage(text) && !hasContext) {
    return true;
  }

  return false;
}

/* -------------------------------------------------------------------------- */
/*                            Stream handling                                 */
/* -------------------------------------------------------------------------- */

function wireAbort(
  ctl: GeneratorController,
  stream: { controller?: AbortController },
) {
  ctl.onAborted(() => {
    console.info("Generation aborted by user.");
    stream.controller?.abort();
  });
}

function safeParseToolArguments(raw: string): any {
  if (!raw || raw.trim().length === 0) {
    return {};
  }

  try {
    return JSON.parse(raw);
  } catch {
    return {
      __raw_arguments: raw,
    };
  }
}

async function consumeStream(
  stream: AsyncIterable<any>,
  ctl: GeneratorController,
  reasoningDisplayMode: ReasoningDisplayMode,
): Promise<StreamStats> {
  let current: ToolCallState | null = null;

  const stats: StreamStats = {
    contentChars: 0,
    estimatedContentTokens: 0,
    reasoningChars: 0,
    reasoningForwardedChars: 0,
    toolCalls: 0,
    toolArgumentChars: 0,
    chunks: 0,
  };

  let reasoningStarted = false;
  let answerStarted = false;

  function maybeFlushCurrentToolCall() {
    if (current === null || current.name === null) {
      return;
    }

    ctl.toolCallGenerationEnded({
      type: "function",
      name: current.name,
      arguments: safeParseToolArguments(current.arguments),
      id: current.id,
    });

    current = null;
  }

  for await (const chunk of stream) {
    stats.chunks++;

    const delta = chunk.choices?.[0]?.delta as
      | {
          content?: string;
          reasoning_content?: string;
          tool_calls?: Array<{
            index: number;
            id?: string;
            function?: { name?: string; arguments?: string };
          }>;
        }
      | undefined;

    if (!delta) {
      continue;
    }

    if (delta.reasoning_content) {
      stats.reasoningChars += delta.reasoning_content.length;

      if (!reasoningStarted) {
        reasoningStarted = true;

        if (reasoningDisplayMode === "status") {
          ctl.fragmentGenerated("\n\n[Main model reasoning...]\n");
        } else if (reasoningDisplayMode === "inline") {
          ctl.fragmentGenerated("\n\n[Reasoning]\n");
        }
      }

      if (reasoningDisplayMode === "inline") {
        stats.reasoningForwardedChars += delta.reasoning_content.length;
        ctl.fragmentGenerated(delta.reasoning_content);
      }

      continue;
    }

    if (delta.content) {
      if (!answerStarted) {
        answerStarted = true;

        if (reasoningStarted && reasoningDisplayMode === "inline") {
          ctl.fragmentGenerated("\n\n[Answer]\n");
        } else if (reasoningStarted && reasoningDisplayMode === "status") {
          ctl.fragmentGenerated("\n");
        }
      }

      stats.contentChars += delta.content.length;
      ctl.fragmentGenerated(delta.content);
    }

    for (const toolCall of delta.tool_calls ?? []) {
      if (toolCall.id !== undefined) {
        maybeFlushCurrentToolCall();

        stats.toolCalls++;

        current = {
          id: toolCall.id,
          name: null,
          index: toolCall.index,
          arguments: "",
        };

        ctl.toolCallGenerationStarted();
      }

      if (toolCall.function?.name && current) {
        current.name = toolCall.function.name;
        ctl.toolCallGenerationNameReceived(toolCall.function.name);
      }

      if (toolCall.function?.arguments && current) {
        stats.toolArgumentChars += toolCall.function.arguments.length;

        current.arguments += toolCall.function.arguments;
        ctl.toolCallGenerationArgumentFragmentGenerated(toolCall.function.arguments);
      }
    }

    if (chunk.choices?.[0]?.finish_reason === "tool_calls" && current?.name) {
      maybeFlushCurrentToolCall();
    }
  }

  stats.estimatedContentTokens = estimateTokensFromChars(stats.contentChars);

  console.info("Generation completed.", stats);

  return stats;
}

function getRemovedToolNames(
  allTools: ChatCompletionTool[] | undefined,
  filteredTools: ChatCompletionTool[] | undefined,
): string[] {
  const after = new Set(
    (filteredTools ?? []).map((tool) => tool.function.name.toLowerCase()),
  );

  return (allTools ?? [])
    .map((tool) => tool.function.name)
    .filter((name) => !after.has(name.toLowerCase()));
}

/* -------------------------------------------------------------------------- */
/*                            Main provider error                             */
/* -------------------------------------------------------------------------- */

function extractProviderError(error: unknown) {
  const err = error && typeof error === "object" ? (error as any) : null;

  const status =
    err?.status ??
    err?.response?.status ??
    (typeof err?.code === "number" ? err.code : undefined);

  const apiMessage = typeof err?.message === "string" ? err.message : undefined;
  const errorObj = err?.error ?? err?.response?.data?.error ?? err?.response?.error;
  const responseData = err?.response?.data;

  const providerMessage =
    (typeof errorObj?.message === "string" && errorObj.message) ||
    (typeof responseData?.message === "string" && responseData.message) ||
    apiMessage;

  return {
    status,
    providerMessage,
    apiMessage,
    error: errorObj ?? responseData,
  };
}

/* -------------------------------------------------------------------------- */
/*                                     API                                    */
/* -------------------------------------------------------------------------- */

export async function generate(ctl: GeneratorController, history: Chat) {
  const config = ctl.getPluginConfig(configSchematics as any) as any;
  const globalConfig = ctl.getGlobalPluginConfig(globalConfigSchematics as any) as any;

  const debug = asBool(safeGet(config, "debug", false), false);

  const mainBaseUrl =
    asString(safeGet(globalConfig, "mainBaseUrl", ""), "") ||
    "https://api.deepseek.com/v1";

  const mainModel =
    asString(safeGet(config, "mainModelOverride", ""), "") ||
    asString(safeGet(globalConfig, "mainModel", ""), "") ||
    "deepseek-v4-flash";

  const localModel =
    asString(safeGet(config, "localModelOverride", ""), "") ||
    asString(safeGet(globalConfig, "localModel", ""), "");

  const bypassLocal = asBool(
    safeGet(config, "bypassLocalOrchestrator", false),
    false,
  );

  const failOpen = asBool(safeGet(globalConfig, "failOpen", true), true);

  const showRelayStats = asBool(
    safeGet(globalConfig, "showRelayStats", true),
    true,
  );

  const maxMessages = Math.floor(
    asNumber(safeGet(globalConfig, "maxMessages", 30), 30),
  );

  const maxToolResultChars = Math.floor(
    asNumber(safeGet(globalConfig, "maxToolResultChars", 30000), 30000),
  );

  const maxPromptChars = Math.floor(
    asNumber(safeGet(globalConfig, "maxPromptChars", 200000), 200000),
  );

  const rawMessages: any[] = toOpenAIMessages(history) as any[];

  const selectedRawMessages = selectMessageWindow(rawMessages, maxMessages);
  const completedActions = dedupeStrings(extractCompletedActions(selectedRawMessages));
  const rawToolOutputs = extractRawToolOutputs(selectedRawMessages, maxToolResultChars);

  let messages: any[] = compactMessages(rawMessages, maxMessages, maxToolResultChars);
  messages = sanitizeToolMessageOrdering(messages);

  const messagesForLocal = stripToolOutputsForLocalCompressor(selectedRawMessages);

  const directInputChars = JSON.stringify(messages).length;
  const directInputTokens = estimateTokensFromChars(directInputChars);

  const rawToolOutputChars = rawToolOutputs.reduce(
    (sum, output) => sum + output.content.length,
    0,
  );

  const latestUserMessage =
    [...Array.from(history)]
      .reverse()
      .find((m) => m.getRole() === "user")
      ?.getText() ?? "";

  const promptChars = JSON.stringify(messages).length;
  if (promptChars > maxPromptChars) {
    ctl.fragmentGenerated(
      `Request blocked: prompt too large before orchestration.\n` +
        `Estimated prompt tokens: ~${estimateTokensFromChars(promptChars)}\n` +
        `Prompt chars: ${promptChars}\n` +
        `Limit: ${maxPromptChars}\n`,
    );
    return;
  }

  if (!mainModel) {
    ctl.fragmentGenerated("Missing main model. Set Main Model in global plugin settings.\n");
    return;
  }

  let finalMessages: ChatCompletionMessageParam[] =
    messages as ChatCompletionMessageParam[];

  let localOrchestratorStatus = "skipped";
  let localCompactContextChars = 0;
  let localTaskChars = 0;
  let localInputTokens = 0;
  let mainInputTokens = 0;

  const skipLocalOrchestrator = shouldSkipLocalOrchestrator(
    latestUserMessage,
    messages,
  );

  if (!bypassLocal && localModel.length > 0 && !skipLocalOrchestrator) {
    try {
      const localBaseUrl =
        asString(safeGet(globalConfig, "localBaseUrl", ""), "") ||
        "http://localhost:1234";

      const localApiToken = asString(
        safeGet(globalConfig, "localApiToken", ""),
        "",
      );

      const localReasoning = asString(
        safeGet(globalConfig, "localReasoning", "provider_default"),
        "provider_default",
      );

      const localContextLength = Math.floor(
        asNumber(safeGet(globalConfig, "localContextLength", 8192), 8192),
      );

      const localMaxOutputTokens = Math.floor(
        asNumber(safeGet(globalConfig, "localMaxOutputTokens", 1000), 1000),
      );

      const localTemperature = asNumber(
        safeGet(globalConfig, "localTemperature", 0),
        0,
      );

      const localIntegrations = buildLocalIntegrations(
        asString(safeGet(globalConfig, "localIntegrations", ""), ""),
        asString(safeGet(globalConfig, "localAllowedTools", ""), ""),
      );

      const localTimeoutMs = Math.floor(
        asNumber(
          safeGet(globalConfig, "localOrchestratorTimeoutMs", 60000),
          60000,
        ),
      );

      const rawInput = buildOrchestratorInput(
        messagesForLocal,
        latestUserMessage,
        localMaxOutputTokens,
        completedActions,
      );

      const input = truncateForLocalContext(
        rawInput,
        localContextLength,
        localMaxOutputTokens,
      );

      localInputTokens = estimateTokensFromChars(input.length);

      if (debug) {
        console.info("[MCP Model Relay] Calling local orchestrator:", {
          localBaseUrl,
          localModel,
          localContextLength,
          localMaxOutputTokens,
          localReasoning,
          localInputChars: input.length,
          localIntegrations,
          localTimeoutMs,
          completedActions,
          rawToolOutputs: rawToolOutputs.length,
        });
      }

      const orchestration = await callLocalOrchestrator(
        ctl,
        localBaseUrl,
        localApiToken,
        localModel,
        localReasoning,
        input,
        localIntegrations,
        localContextLength,
        localTemperature,
        localMaxOutputTokens,
        localTimeoutMs,
      );

      if (!orchestration) {
        throw new Error("Local orchestrator returned empty output.");
      }

      orchestration.completed_actions = completedActions;

      localOrchestratorStatus = "used";
      localCompactContextChars = orchestration.compact_context.length;
      localTaskChars = orchestration.task.length;

      finalMessages = buildMainMessagesFromOrchestration(
        orchestration,
        completedActions,
        rawToolOutputs,
      );

      mainInputTokens = estimateTokensFromChars(
        JSON.stringify(finalMessages).length,
      );

      if (debug) {
        console.info("[MCP Model Relay] Local orchestration result:", {
          taskChars: localTaskChars,
          compactContextChars: localCompactContextChars,
          taskPreview: orchestration.task.slice(0, 300),
          compactContextPreview: orchestration.compact_context.slice(0, 500),
          rawToolOutputCount: rawToolOutputs.length,
          rawToolOutputChars,
        });
      }
    } catch (error) {
      localOrchestratorStatus = "failed";

      const msg = error instanceof Error ? error.message : String(error);

      if (!failOpen) {
        ctl.fragmentGenerated(`Local orchestrator failed: ${msg}\n`);
        return;
      }

      if (debug) {
        ctl.fragmentGenerated(
          `[Local orchestrator rejected/fallback: ${msg}]\n\n`,
        );
      }
    }
  } else if (debug) {
    console.info("[MCP Model Relay] Local orchestration omitted:", {
      bypassLocal,
      localModel,
      skipLocalOrchestrator,
    });
  }

  if (mainInputTokens === 0) {
    mainInputTokens = directInputTokens;
  }

  const openai = createMainOpenAI(globalConfig);

  const allTools = toOpenAITools(ctl);

  const alwaysAvailableToolPrefixes = splitToolPrefixList(
    asString(
      safeGet(
        globalConfig,
        "alwaysAvailableToolPrefixes",
        "re_memory_,re_snapshot_",
      ),
      "re_memory_,re_snapshot_",
    ),
  );

  const tools = filterCompletedTools(
    allTools,
    completedActions,
    alwaysAvailableToolPrefixes,
  );

  const toolsBefore = allTools?.length ?? 0;
  const toolsAfter = tools?.length ?? 0;

  const allToolsTokenEstimate = allTools
    ? estimateTokensFromChars(JSON.stringify(allTools).length)
    : 0;

  const toolsTokenEstimate = tools
    ? estimateTokensFromChars(JSON.stringify(tools).length)
    : 0;

  const removedToolNames = getRemovedToolNames(allTools, tools);

  if (debug) {
    console.info("[MCP Model Relay] Tool filtering:", {
      toolsBefore,
      toolsAfter,
      allToolsTokenEstimate,
      toolsTokenEstimate,
      removedToolNames,
      alwaysAvailableToolPrefixes,
      completedActions,
    });
  }

  const requestPayload: any = {
    model: mainModel,
    messages: finalMessages,
    stream: true,
  };

  if (tools && tools.length > 0) {
    requestPayload.tools = tools;
  }

  const mainTemperature = asNumber(
    safeGet(globalConfig, "mainTemperature", 0.1),
    0.1,
  );

  const mainTopP = asNumber(
    safeGet(globalConfig, "mainTopP", -1),
    -1,
  );

  const mainMaxTokens = Math.floor(
    asNumber(safeGet(globalConfig, "mainMaxTokens", 4000), 4000),
  );

  const mainReasoningEffort = asString(
    safeGet(globalConfig, "mainReasoningEffort", "provider_default"),
    "provider_default",
  );

  addNumberIfEnabled(requestPayload, "temperature", mainTemperature, -1);
  addNumberIfEnabled(requestPayload, "top_p", mainTopP, -1);

  if (mainMaxTokens > 0) {
    requestPayload.max_tokens = mainMaxTokens;
  }

  if (mainReasoningEffort && mainReasoningEffort !== "provider_default") {
    requestPayload.reasoning_effort = mainReasoningEffort;
  }

  const isDeepSeek =
    mainModel.toLowerCase().startsWith("deepseek-") ||
    mainBaseUrl.toLowerCase().includes("deepseek");

  if (isDeepSeek) {
    const deepSeekThinking = asString(
      safeGet(globalConfig, "deepSeekThinking", "disabled"),
      "disabled",
    );

    if (deepSeekThinking === "disabled") {
      requestPayload.thinking = { type: "disabled" };
    }

    delete requestPayload.reasoning_effort;
  }

  try {
    if (debug) {
      console.info("[MCP Model Relay] Main request:", {
        mainBaseUrl,
        mainModel,
        messages: finalMessages.length,
        tools: requestPayload.tools?.length ?? 0,
        localOrchestratorStatus,
        completedActions,
        rawToolOutputs: rawToolOutputs.length,
      });
    }

    const mainReasoningDisplay = normalizeReasoningDisplayMode(
      asString(safeGet(globalConfig, "mainReasoningDisplay", "status"), "status"),
    );

    const stream = await openai.chat.completions.create(requestPayload);

    wireAbort(ctl, stream as any);
    const mainStats = await consumeStream(stream as any, ctl, mainReasoningDisplay);

    if (showRelayStats) {
      const directInputTokensWithTools = directInputTokens + allToolsTokenEstimate;
      const mainInputTokensWithTools = mainInputTokens + toolsTokenEstimate;

      const efficiency = computeEfficiency(
        directInputTokensWithTools,
        localInputTokens,
        mainInputTokensWithTools,
      );

      ctl.fragmentGenerated(
        formatRelayStats(
          localOrchestratorStatus,
          localCompactContextChars,
          localTaskChars,
          rawToolOutputChars,
          rawToolOutputs.length,
          mainStats,
          isDeepSeek,
          efficiency,
          toolsBefore,
          toolsAfter,
          toolsTokenEstimate,
          removedToolNames,
        ),
      );
    }
  } catch (error: unknown) {
    const info = extractProviderError(error);

    const errorPayload = info.error ?? info;

    try {
      console.error(
        "[MCP Model Relay] Main provider error:",
        JSON.stringify(errorPayload, null, 2),
      );
    } catch {
      console.error("[MCP Model Relay] Main provider error:", errorPayload);
    }

    if (info.providerMessage) {
      ctl.fragmentGenerated(`Main provider API error: ${info.providerMessage}\n`);
    } else if (info.apiMessage) {
      ctl.fragmentGenerated(`Main provider API error: ${info.apiMessage}\n`);
    } else {
      ctl.fragmentGenerated("Main provider generation failed.\n");
    }
  }
}
mcp-model-relay