import {
  text,
  type Chat,
  type ChatMessage,
  type FileHandle,
  type LLMDynamicHandle,
  type PredictionProcessStatusController,
  type PromptPreprocessorController,
} from "@lmstudio/sdk";
import { readFile } from "fs/promises";
import { dirname, join } from "path";
import { pluginConfigSchematics } from "./config";
import { TOOLS_DOCUMENTATION, TOOLS_DOCUMENTATION_LITE } from "./toolsDocumentation";
import { getPersistedState, savePersistedState } from "./stateManager";
import { getDict } from "./locales/i18n";

type DocumentContextInjectionStrategy = "none" | "inject-full-content" | "retrieval";

export function getSubAgentDocsCandidatePaths(currentWorkingDirectory: string): string[] {
  return [
    join(dirname(__dirname), "subagent_docs.md"),
    join(dirname(__dirname), "instructions", "subagent_docs.md"),
    join(currentWorkingDirectory, "instructions", "subagent_docs.md"),
    join(currentWorkingDirectory, "subagent_docs.md"),
  ];
}

export async function promptPreprocessor(ctl: PromptPreprocessorController, userMessage: ChatMessage) {
  const userPrompt = userMessage.getText();
  
  // 1. RAG / Context Injection Logic
  const history = await ctl.pullHistory();

  // Check if this is the first turn (history is empty) before appending
  let isFirstTurn = false;
  if (Array.isArray(history)) {
    isFirstTurn = history.length === 0;
  } else if ("messages" in history && Array.isArray((history as any).messages)) {
    isFirstTurn = (history as any).messages.length === 0;
  } else if ("length" in history && typeof (history as any).length === "number") {
    isFirstTurn = (history as any).length === 0;
  } else {
    // Fallback: If we can't verify, we default to assuming it's the first turn 
    // to ensure docs are loaded at least once, but this may cause the "always load" issue
    // if the object structure is unexpected. 
    // However, moving this check before append() makes it much more likely to be correct.
    isFirstTurn = true; 
  }

  history.append(userMessage);
  
  const newFiles = userMessage.getFiles(ctl.client).filter(f => f.type !== "image");
  const files = history.getAllFiles(ctl.client).filter(f => f.type !== "image");

  let processingResult: string | ChatMessage | null = null;

  if (newFiles.length > 0) {
    const strategy = await chooseContextInjectionStrategy(ctl, userPrompt, newFiles);
    if (strategy === "inject-full-content") {
      processingResult = await prepareDocumentContextInjection(ctl, userMessage);
    } else if (strategy === "retrieval") {
      processingResult = await prepareRetrievalResultsContextInjection(ctl, userPrompt, files);
    }
  } else if (files.length > 0) {
    processingResult = await prepareRetrievalResultsContextInjection(ctl, userPrompt, files);
  }

  // Determine the current content after RAG processing
  let currentContent: string;
  if (processingResult) {
      if (typeof processingResult === 'string') {
          currentContent = processingResult;
      } else {
          // It's a ChatMessage
          currentContent = processingResult.getText();
      }
  } else {
      currentContent = userPrompt;
  }

  // --- Config reads & single state load (one disk read for the entire invocation) ---
  const pluginConfig = ctl.getPluginConfig(pluginConfigSchematics);
  const defaultWorkspacePath = pluginConfig.get("defaultWorkspacePath");
  const frequency = pluginConfig.get("subAgentFrequency");
  const debugMode = pluginConfig.get("enableDebugMode");

  // Layer 2: resolve runtime dictionary from user-selected language
  const messageLanguage = pluginConfig.get("messageLanguage");
  const rt = getDict(messageLanguage).runtime;

  // Single state read — mutations are accumulated in memory and flushed once at the end.
  // NOTE: uiLanguageOverride persistence is handled by toolsProvider on plugin startup;
  //       doing it here on every message is redundant and was removed.
  const state = await getPersistedState(defaultWorkspacePath);

  // --- Plan Mode Instructions ---
  const planMode = pluginConfig.get("planMode");
  let planHint = "";
  if (planMode === "always") {
    planHint = rt.planHintAlways;
  } else if (planMode === "when_useful") {
    planHint = rt.planHintWhenUseful;
  }

  // --- Delegation Hint ---
  let delegationHint = "";
  if (frequency === "always") {
    delegationHint = rt.delegationHintAlways;
  } else if (frequency === "when_useful") {
    delegationHint = rt.delegationHintWhenUseful;
    if (debugMode) {
      delegationHint += rt.delegationHintWhenUsefulDebug;
    }
  } else if (frequency === "hard_tasks") {
    delegationHint = rt.delegationHintHardTasks;
  }

  if (delegationHint) {
    currentContent += delegationHint;
  }
  if (planHint) {
    currentContent += planHint;
  }

  // --- Sub-Agent Documentation Injection (Startup OR On-Enable) ---
  const enableSecondary = pluginConfig.get("enableSecondaryAgent");

  // Reset the injection flag on the first turn of a new conversation (in memory only).
  if (isFirstTurn) {
    state.subAgentDocsInjected = false;
  }

  if (enableSecondary && !state.subAgentDocsInjected) {
    const { currentWorkingDirectory } = state;
    const candidatePaths = getSubAgentDocsCandidatePaths(currentWorkingDirectory);

    let docsInjected = false;
    for (const subAgentDocsPath of candidatePaths) {
      try {
        const docsContent = await readFile(subAgentDocsPath, "utf-8");
        if (docsContent && docsContent.trim().length > 0) {
          currentContent += `\n\n---\n\n${docsContent}\n\n---\n\n`;
          ctl.debug(`subagent_docs.md injected into context from: ${subAgentDocsPath}`);
          state.subAgentDocsInjected = true;
          docsInjected = true;
          break;
        }
      } catch {
        // Keep trying fallback paths.
      }
    }

    if (!docsInjected) {
      ctl.debug("subagent_docs.md not found or failed to load from plugin/workspace paths. Skipping injection.");
    }
  }

  // --- Tools Documentation & Startup File Injection (First Turn Only) ---
  if (isFirstTurn) {
    const simpleSystemPrompt = pluginConfig.get("simpleSystemPrompt");
    let injectionContent = simpleSystemPrompt ? TOOLS_DOCUMENTATION_LITE : TOOLS_DOCUMENTATION;

    try {
      const { currentWorkingDirectory } = state;
      const candidateStartupPaths = [
        join(currentWorkingDirectory, ".toolbox", "startup.md"),
        join(currentWorkingDirectory, "instructions", "startup.md"),
        join(currentWorkingDirectory, "startup.md"),
      ];

      let startupContent = "";
      let usedStartupPath = "";
      for (const startupPath of candidateStartupPaths) {
        try {
          startupContent = await readFile(startupPath, "utf-8");
          usedStartupPath = dirname(startupPath);
          ctl.debug(`startup.md loaded from: ${startupPath}`);
          break;
        } catch {
          // Keep trying
        }
      }

      if (startupContent) {
        const filesToRead = startupContent.split('\n').map(f => f.trim()).filter(f => f);

        for (const file of filesToRead) {
          // Try relative to startup.md folder first, then relative to CWD
          const candidateFilePaths = [
            join(usedStartupPath, file),
            join(currentWorkingDirectory, file),
          ];

          let loaded = false;
          for (const filePath of candidateFilePaths) {
            try {
              const fileContent = await readFile(filePath, "utf-8");
              if (fileContent.trim().length > 0) {
                injectionContent = `\n\n---\n\n${fileContent}\n\n---\n\n${injectionContent}`;
                ctl.debug(`${file} loaded and injected into context from ${filePath}.`);
                loaded = true;
                break;
              }
            } catch {
              // Keep trying
            }
          }
          if (!loaded) {
            ctl.debug(`Failed to load ${file} from startup.md.`);
          }
        }
      }
    } catch {
      ctl.debug("No startup.md file found or failed to load.");
    }

    // --- Memory Injection (First Turn Only) ---
    // M.3: Session resume block — inject persisted notes, last browser URL, and
    // recently modified files so the model can pick up where it left off.
    const { sessionNotes, lastBrowserUrl, recentFiles } = state;
    if (sessionNotes || lastBrowserUrl || (recentFiles && recentFiles.length > 0)) {
      const parts: string[] = ["## Resumed Session"];
      parts.push(`Working directory: \`${state.currentWorkingDirectory}\``);
      if (recentFiles && recentFiles.length > 0) {
        parts.push(`Recently modified files: ${recentFiles.map(f => `\`${f}\``).join(", ")}`);
      }
      if (lastBrowserUrl) {
        parts.push(`Last browser URL: ${lastBrowserUrl}`);
      }
      if (sessionNotes) {
        parts.push(`Session notes: ${sessionNotes}`);
      }
      const resumeBlock = parts.join("\n") +
        "\n\nYou can clear this note with `save_session_note` once you've oriented yourself.";
      injectionContent = `${resumeBlock}\n\n---\n\n${injectionContent}`;
      ctl.debug("[session] Injected session resume block into first-turn context.");
    }

    // If the memory feature is enabled and the DB has entries, prepend a compact
    // summary so the LLM knows what it has remembered from previous sessions.
    const enableMemory = pluginConfig.get("enableMemory");
    if (enableMemory) {
      try {
        const dbPath = join(state.currentWorkingDirectory, ".memories.db");
        // eslint-disable-next-line @typescript-eslint/no-require-imports
        const Database: new (path: string, opts?: object) => any = require("better-sqlite3");
        const db = new Database(dbPath, { readonly: true });
        const rows: Array<{ id: number; fact: string; tags: string }> =
          db.prepare("SELECT id, fact, tags FROM memories ORDER BY created_at DESC LIMIT 50").all();
        db.close();
        if (rows.length > 0) {
          const lines = rows.map(r =>
            `- [id:${r.id}]${r.tags ? ` [${r.tags}]` : ""} ${r.fact}`
          ).join("\n");
          const memoryBlock =
            `## Memories from Previous Sessions\n` +
            `You have ${rows.length} stored memory entries. ` +
            `Use \`list_memories\`, \`search_memories\`, \`update_memory\`, and \`delete_memory\` to manage them.\n\n` +
            lines;
          injectionContent = `${memoryBlock}\n\n---\n\n${injectionContent}`;
          ctl.debug(`[memory] Injected ${rows.length} memories into first-turn context.`);
        }
      } catch {
        // DB doesn't exist yet or native binding unavailable — skip silently.
      }
    }

    currentContent = `${injectionContent}\n\n---\n\n${currentContent}`;
  }

  // Always increment message count and flush all state mutations in a single write.
  state.messageCount++;

  // ── N.13: Auto-capture memory ─────────────────────────────────────────────
  // Maintain a rolling buffer of recent user messages (used by the extractor).
  const rawUserText = userPrompt.substring(0, 1000); // cap each entry
  const msgBuf = state.recentUserMessages ?? [];
  msgBuf.push(rawUserText);
  if (msgBuf.length > 15) msgBuf.shift();
  state.recentUserMessages = msgBuf;

  const memoryAutoCapture: boolean = pluginConfig.get("memoryAutoCapture") ?? false;
  const memoryAutoCaptureInterval: number = pluginConfig.get("memoryAutoCaptureInterval") ?? 5;
  const memoryEnabled: boolean = pluginConfig.get("enableMemory") ?? false;

  if (memoryAutoCapture && memoryEnabled && state.messageCount % memoryAutoCaptureInterval === 0) {
    const endpoint: string = pluginConfig.get("secondaryAgentEndpoint") ?? "http://localhost:1234/v1";
    const modelId: string = pluginConfig.get("secondaryModelId") ?? "local-model";
    const cwd = state.currentWorkingDirectory;
    // Fire-and-forget: never block the user's turn on capture
    runAutoMemoryCapture(endpoint, modelId, cwd, [...msgBuf]).catch(e => {
      ctl.debug("[auto-memory] Capture failed silently:", String(e));
    });
  }

  try {
    await savePersistedState(state);
  } catch (e) {
    ctl.debug("Failed to persist plugin state.", e);
  }

  // Return modified content string, or the original message object if nothing changed.
  if (currentContent !== userPrompt) {
    return currentContent;
  }
  return userMessage;
}

// ── N.13: Auto-capture memory extractor ──────────────────────────────────────

/**
 * Call the secondary endpoint to extract memorable facts from recent messages,
 * then persist them into the workspace memory DB.
 * Runs fire-and-forget — any error is caught by the caller.
 */
async function runAutoMemoryCapture(
  endpoint: string,
  modelId: string,
  cwd: string,
  recentMessages: string[],
): Promise<void> {
  if (recentMessages.length === 0) return;

  const context = recentMessages.join("\n\n---\n\n").substring(0, 4000);
  const prompt =
    "You are a memory extraction assistant. Extract 3-7 specific, useful facts from " +
    "these recent conversation messages that would help an AI assistant in future sessions. " +
    "Focus on: user preferences, project details, technical decisions, constraints, and " +
    "recurring patterns. Skip one-off or trivial details.\n\n" +
    "Format: one fact per line, starting with \"- \".\n\n" +
    "Recent messages:\n" + context + "\n\nFacts to remember:";

  const res = await fetch(`${endpoint}/chat/completions`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model: modelId,
      messages: [{ role: "user", content: prompt }],
      temperature: 0.2,
      stream: false,
    }),
    signal: AbortSignal.timeout(30_000),
  });
  if (!res.ok) return;

  const data = await res.json();
  const responseText: string = data?.choices?.[0]?.message?.content ?? "";

  const facts = responseText
    .split("\n")
    .filter((l: string) => l.trim().startsWith("- "))
    .map((l: string) => l.replace(/^-\s*/, "").trim())
    .filter((f: string) => f.length >= 10 && f.length <= 400);

  if (facts.length === 0) return;

  const { insertAutoMemory } = await import("./tools/memoryTools");
  for (const fact of facts) {
    await insertAutoMemory(cwd, fact);
  }
}

async function prepareRetrievalResultsContextInjection(
  ctl: PromptPreprocessorController,
  originalUserPrompt: string,
  files: Array<FileHandle>,
): Promise<string> {
  const pluginConfig = ctl.getPluginConfig(pluginConfigSchematics);
  const retrievalLimit = pluginConfig.get("retrievalLimit");
  const retrievalAffinityThreshold = pluginConfig.get("retrievalAffinityThreshold");

  // process files if necessary

  const statusSteps = new Map<FileHandle, PredictionProcessStatusController>();

  // Layer 2: resolve runtime dict for status messages
  const rtRetrieve = getDict(
    ctl.getPluginConfig(pluginConfigSchematics).get("messageLanguage")
  ).runtime;

  const retrievingStatus = ctl.createStatus({
    status: "loading",
    text: rtRetrieve.statusLoadingEmbeddingModel,
  });
  // Using the same model as rag-v1
  const model = await ctl.client.embedding.model("nomic-ai/nomic-embed-text-v1.5-GGUF", {
    signal: ctl.abortSignal,
  });
  retrievingStatus.setState({
    status: "loading",
    text: rtRetrieve.statusRetrievingCitations,
  });
  const result = await ctl.client.files.retrieve(originalUserPrompt, files, {
    embeddingModel: model,
    // Affinity threshold: 0.6 not implemented in SDK retrieve options directly usually, 
    // but we filter below.
    limit: retrievalLimit,
    signal: ctl.abortSignal,
    onFileProcessList(filesToProcess) {
      for (const file of filesToProcess) {
        statusSteps.set(
          file,
          retrievingStatus.addSubStatus({
            status: "waiting",
            text: `Process ${file.name} for retrieval`,
          }),
        );
      }
    },
    onFileProcessingStart(file) {
      statusSteps
        .get(file)!
        .setState({ status: "loading", text: `Processing ${file.name} for retrieval` });
    },
    onFileProcessingEnd(file) {
      statusSteps
        .get(file)!
        .setState({ status: "done", text: `Processed ${file.name} for retrieval` });
    },
    onFileProcessingStepProgress(file, step, progressInStep) {
      const verb = step === "loading" ? "Loading" : step === "chunking" ? "Chunking" : "Embedding";
      statusSteps.get(file)!.setState({
        status: "loading",
        text: `${verb} ${file.name} for retrieval (${(progressInStep * 100).toFixed(1)}%)`,
      });
    },
  });

  result.entries = result.entries.filter(entry => entry.score > retrievalAffinityThreshold);

  // inject retrieval result into the "processed" content
  let processedContent = "";
  const numRetrievals = result.entries.length;
  if (numRetrievals > 0) {
    // retrieval occured and got results
    // show status
    retrievingStatus.setState({
      status: "done",
      text: rtRetrieve.statusRetrievedCitations(numRetrievals),
    });
    ctl.debug("Retrieval results", result);
    // add results to prompt
    processedContent += rtRetrieve.citationPrefix;
    let citationNumber = 1;
    result.entries.forEach(result => {
      const completeText = result.content;
      processedContent += rtRetrieve.citationEntry(citationNumber, completeText);
      citationNumber++;
    });
    await ctl.addCitations(result);
    processedContent += rtRetrieve.citationSuffix(originalUserPrompt);
  } else {
    // retrieval occured but no relevant citations found
    retrievingStatus.setState({
      status: "canceled",
      text: rtRetrieve.statusNoRelevantCitations,
    });
    ctl.debug("No relevant citations found for user query");
    processedContent =
      rtRetrieve.noRelevantCitationsNote + `\n\nUser Query:\n\n${originalUserPrompt}`;
  }
  ctl.debug("Processed content", processedContent);

  return processedContent;
}

async function prepareDocumentContextInjection(
  ctl: PromptPreprocessorController,
  input: ChatMessage,
): Promise<ChatMessage> {
  const documentInjectionSnippets: Map<FileHandle, string> = new Map();
  const files = input.consumeFiles(ctl.client, file => file.type !== "image");
  for (const file of files) {
    // This should take no time as the result is already in the cache
    const { content } = await ctl.client.files.parseDocument(file, {
      signal: ctl.abortSignal,
    });

    ctl.debug(text`
      Strategy: inject-full-content. Injecting full content of file '${file}' into the
      context. Length: ${content.length}.
    `);
    documentInjectionSnippets.set(file, content);
  }

  let formattedFinalUserPrompt = "";

  // Layer 2: resolve runtime dict for document injection strings
  const rtDoc = getDict(
    ctl.getPluginConfig(pluginConfigSchematics).get("messageLanguage")
  ).runtime;

  if (documentInjectionSnippets.size > 0) {
    formattedFinalUserPrompt += rtDoc.documentInjectionHeader;

    for (const [fileHandle, snippet] of documentInjectionSnippets) {
      formattedFinalUserPrompt += rtDoc.documentInjectionFileBlock(fileHandle.name, snippet);
    }

    formattedFinalUserPrompt += rtDoc.documentInjectionSuffix(input.getText());
  }

  input.replaceText(formattedFinalUserPrompt);
  return input;
}

async function measureContextWindow(ctx: Chat, model: LLMDynamicHandle) {
  const currentContextFormatted = await model.applyPromptTemplate(ctx);
  const totalTokensInContext = await model.countTokens(currentContextFormatted);
  const modelContextLength = await model.getContextLength();
  const modelRemainingContextLength = modelContextLength - totalTokensInContext;
  const contextOccupiedPercent = (totalTokensInContext / modelContextLength) * 100;
  return {
    totalTokensInContext,
    modelContextLength,
    modelRemainingContextLength,
    contextOccupiedPercent,
  };
}

async function chooseContextInjectionStrategy(
  ctl: PromptPreprocessorController,
  originalUserPrompt: string,
  files: Array<FileHandle>,
): Promise<DocumentContextInjectionStrategy> {
  // Layer 2: runtime dict for strategy-choice status messages
  const rtStrategy = getDict(
    ctl.getPluginConfig(pluginConfigSchematics).get("messageLanguage")
  ).runtime;

  const status = ctl.createStatus({
    status: "loading",
    text: rtStrategy.statusDecidingStrategy,
  });

  const model = await ctl.client.llm.model();
  const ctx = await ctl.pullHistory();

  // Measure the context window
  const {
    totalTokensInContext,
    modelContextLength,
    modelRemainingContextLength,
    contextOccupiedPercent,
  } = await measureContextWindow(ctx, model);

  ctl.debug(
    `Context measurement result:\n\n` +
      `\tTotal tokens in context: ${totalTokensInContext}\n` +
      `\tModel context length: ${modelContextLength}\n` +
      `\tModel remaining context length: ${modelRemainingContextLength}\n` +
      `\tContext occupied percent: ${contextOccupiedPercent.toFixed(2)}%\n`,
  );

  // Get token count of provided files
  let totalFileTokenCount = 0;
  let totalReadTime = 0;
  let totalTokenizeTime = 0;
  for (const file of files) {
    const startTime = performance.now();

    const loadingStatus = status.addSubStatus({
      status: "loading",
      text: rtStrategy.statusLoadingParser(file.name),
    });
    let actionProgressing = "Reading";
    let parserIndicator = "";

    const { content } = await ctl.client.files.parseDocument(file, {
      signal: ctl.abortSignal,
      onParserLoaded: parser => {
        loadingStatus.setState({
          status: "loading",
          text: `${parser.library} loaded for ${file.name}...`,
        });
        if (parser.library !== "builtIn") {
          actionProgressing = "Parsing";
          parserIndicator = ` with ${parser.library}`;
        }
      },
      onProgress: progress => {
        loadingStatus.setState({
          status: "loading",
          text: `${actionProgressing} file ${file.name}${parserIndicator}... (${(
            progress * 100
          ).toFixed(2)}%)`,
        });
      },
    });
    loadingStatus.remove();

    totalReadTime += performance.now() - startTime;

    // tokenize file content
    const startTokenizeTime = performance.now();
    totalFileTokenCount += await model.countTokens(content);
    totalTokenizeTime += performance.now() - startTokenizeTime;
    if (totalFileTokenCount > modelRemainingContextLength) {
      break;
    }
  }
  ctl.debug(`Total file read time: ${totalReadTime.toFixed(2)} ms`);
  ctl.debug(`Total tokenize time: ${totalTokenizeTime.toFixed(2)} ms`);

  // Calculate total token count of files + user prompt
  ctl.debug(`Original User Prompt: ${originalUserPrompt}`);
  const userPromptTokenCount = (await model.tokenize(originalUserPrompt)).length;
  const totalFilePlusPromptTokenCount = totalFileTokenCount + userPromptTokenCount;

  // Calculate the available context tokens
  const contextOccupiedFraction = contextOccupiedPercent / 100;
  const targetContextUsePercent = 0.7;
  const targetContextUsage = targetContextUsePercent * (1 - contextOccupiedFraction);
  const availableContextTokens = Math.floor(modelRemainingContextLength * targetContextUsage);

  // Debug log
  ctl.debug("Strategy Calculation:");
  ctl.debug(`\tTotal Tokens in All Files: ${totalFileTokenCount}`);
  ctl.debug(`\tTotal Tokens in User Prompt: ${userPromptTokenCount}`);
  ctl.debug(`\tModel Context Remaining: ${modelRemainingContextLength} tokens`);
  ctl.debug(`\tContext Occupied: ${contextOccupiedPercent.toFixed(2)}%`);
  ctl.debug(`\tAvailable Tokens: ${availableContextTokens}\n`);

  if (totalFilePlusPromptTokenCount > availableContextTokens) {
    const chosenStrategy = "retrieval";
    ctl.debug(
      `Chosen context injection strategy: '${chosenStrategy}'. Total file + prompt token count: ` +
        `${totalFilePlusPromptTokenCount} > ${
          targetContextUsage * 100
        }% * available context tokens: ${availableContextTokens}`,
    );
    status.setState({
      status: "done",
      text: rtStrategy.statusStrategyChosen(chosenStrategy, "Retrieval is optimal for the size of content provided"),
    });
    return chosenStrategy;
  }

  const chosenStrategy = "inject-full-content";
  status.setState({
    status: "done",
    text: rtStrategy.statusStrategyChosen(chosenStrategy, "All content can fit into the context"),
  });
  return chosenStrategy;
}
lm-studio-toolbox

lm-studio-toolbox