import {
  text,
  type Chat,
  type ChatMessage,
  type FileHandle,
  type LLMDynamicHandle,
  type PredictionProcessStatusController,
  type PromptPreprocessorController,
} from "@lmstudio/sdk";
import { readFile } from "fs/promises";
import { join } from "path";
import { toolsPluginConfig } from "./config";
import { buildToolsDocumentation } from "./toolsDocumentation";
import { getPersistedState, savePersistedState } from "./stateManager";

type DocumentContextInjectionStrategy = "none" | "inject-full-content" | "retrieval";

export async function promptPreprocessor(ctl: PromptPreprocessorController, userMessage: ChatMessage) {
  const userPrompt = userMessage.getText();
  
  // 1. RAG / Context Injection Logic
  const history = await ctl.pullHistory();

  // Check if this is the first turn (history is empty) before appending
  let isFirstTurn = false;
  if (Array.isArray(history)) {
    isFirstTurn = history.length === 0;
  } else if ("messages" in history && Array.isArray((history as any).messages)) {
    isFirstTurn = (history as any).messages.length === 0;
  } else if ("length" in history && typeof (history as any).length === "number") {
    isFirstTurn = (history as any).length === 0;
  } else {
    // Fallback: If we can't verify, we default to assuming it's the first turn 
    // to ensure docs are loaded at least once, but this may cause the "always load" issue
    // if the object structure is unexpected. 
    // However, moving this check before append() makes it much more likely to be correct.
    isFirstTurn = true; 
  }

  history.append(userMessage);
  
  const newFiles = userMessage.getFiles(ctl.client).filter(f => f.type !== "image");
  const files = history.getAllFiles(ctl.client).filter(f => f.type !== "image");

  let processingResult: string | ChatMessage | null = null;

  if (newFiles.length > 0) {
    const strategy = await chooseContextInjectionStrategy(ctl, userPrompt, newFiles);
    if (strategy === "inject-full-content") {
      processingResult = await prepareDocumentContextInjection(ctl, userMessage);
    } else if (strategy === "retrieval") {
      processingResult = await prepareRetrievalResultsContextInjection(ctl, userPrompt, files);
    }
  } else if (files.length > 0) {
    processingResult = await prepareRetrievalResultsContextInjection(ctl, userPrompt, files);
  }

  // Determine the current content after RAG processing
  let currentContent: string;
  if (processingResult) {
      if (typeof processingResult === 'string') {
          currentContent = processingResult;
      } else {
          // It's a ChatMessage
          currentContent = processingResult.getText();
      }
  } else {
      currentContent = userPrompt;
  }

  // --- Delegation & Safety Instructions ---
  const pluginConfig = ctl.getPluginConfig(toolsPluginConfig);
  const enableSecondary = pluginConfig.get("enableSecondaryAgent");
  const frequency = pluginConfig.get("subAgentFrequency");

  // Build sub-agent capabilities once (used for both delegation hint and docs)
  const allowFileSystem = pluginConfig.get("subAgentAllowFileSystem");
  const allowWeb = pluginConfig.get("subAgentAllowWeb");
  const allowCode = pluginConfig.get("subAgentAllowCode");
  const subCaps: string[] = ["memory operations", "summarization"];
  if (allowFileSystem) subCaps.push("file reading");
  if (allowWeb) subCaps.push("web search");
  if (allowCode) subCaps.push("code execution");

  if (enableSecondary && frequency !== "never") {
    const canDo = subCaps.join(", ");
    const cannotDo: string[] = [];
    if (!allowCode) cannotDo.push("coding", "writing/creating files");
    if (!allowWeb) cannotDo.push("web search");
    if (!allowFileSystem) cannotDo.push("file operations");
    const cannotStr = cannotDo.length > 0
      ? ` Do NOT delegate ${cannotDo.join(" or ")} to it — handle those yourself.`
      : "";

    if (isFirstTurn) {
      currentContent += `\n\n**SYSTEM ADVICE:** A secondary agent (lighter model) is available via \`consult_secondary_agent\`.\nCapabilities: ${canDo}.${cannotStr}\n`;
    } else {
      // Pattern-based delegation suggestion on subsequent turns (no match = no hint)
      const delegationTriggers: Array<{ pattern: RegExp; suggestion: string }> = [
        // Research & analysis
        { pattern: /\b(resum[aoe]|summar|sintetiz)/i, suggestion: "summarize this content" },
        { pattern: /\b(pesquis|research|investig|busca[re]?|procur[aer]|find.*about|look.*up)\b/i, suggestion: "research this topic" },
        { pattern: /\b(revis[aãe]|review|analys[ei]|analis[ae]|audit)\b/i, suggestion: "review/analyze this" },
        { pattern: /\b(compar[aei]|compare|diff|diferenç)/i, suggestion: "compare these items" },
        { pattern: /\b(explic[aã]|explain|what is|o que é|como funciona|how does)\b/i, suggestion: "explain or research background" },
        // Documentation & cataloging
        { pattern: /\b(document|documentaç|gerar doc|write doc)/i, suggestion: "draft documentation" },
        { pattern: /\b(list[ae]r|catalog|levant[ae])/i, suggestion: "catalog or list items" },
        // Creative & visual work
        { pattern: /\b(tratamento visual|visual treatment|lookbook|moodboard|style guide)/i, suggestion: "research visual references or generate supporting text" },
        { pattern: /\b(roteiro|screenplay|script|argumento|sinopse|synopsis)/i, suggestion: "analyze or format the script content" },
        { pattern: /\b(refator[ae]|refactor|reorganiz|reestrutur|restructur)/i, suggestion: "refactor or reorganize this code" },
        { pattern: /\b(test[ae]r|write tests|criar testes|unit test|coverage)\b/i, suggestion: "generate test cases" },
        { pattern: /\b(traduz|translat|localiz)/i, suggestion: "translate or localize this content" },
      ];

      // Special hint for visual work with images — suggest plan_image_layout
      if (/\b(tratamento|visual|slides?|apresentação|presentation|imagens?|images?|galeri)/i.test(userPrompt) &&
          /\b(repet|duplicat|repetid|repeat)/i.test(userPrompt)) {
        currentContent += `\n\n⚠️ [System: To avoid repeating images, use \`plan_image_layout(image_directory, section_count)\` to get a deterministic assignment, then follow it exactly. Use \`audit_html_assets\` after saving to verify.]`;
      }

      for (const { pattern, suggestion } of delegationTriggers) {
        if (pattern.test(userPrompt)) {
          currentContent += `\n\n💡 **Delegation opportunity:** Consider using \`consult_secondary_agent\` to ${suggestion} while you handle the main task.`;
          break;
        }
      }
    }
  }

  const state = await getPersistedState();

  // Reset state on the first turn of a new conversation
  if (isFirstTurn) {
      state.messageCount = 0;
      state.largeFilesSaved = [];
      await savePersistedState(state);
  }

  // System hints — at most one per turn to avoid noise
  if (!isFirstTurn && state.messageCount > 0) {
    const isCheckpointTurn = state.messageCount % 4 === 0;
    const hasLargeFiles = state.largeFilesSaved && state.largeFilesSaved.length > 0;

    if (isCheckpointTurn && state.messageCount >= 8) {
      // Token budget warning takes priority on long conversations
      currentContent += '\n\n⚠️ [System: Long conversation. Use `replace_text_in_file` for edits, `consult_secondary_agent` for subtasks. Consider a new conversation for unrelated tasks.]';
    } else if (hasLargeFiles) {
      // Replace hint when large files exist
      currentContent += '\n\n[System: Use `replace_text_in_file` for edits on large files instead of rewriting with `save_file`.]';
    } else if (isCheckpointTurn) {
      // Memory reminder on other checkpoint turns
      currentContent += '\n\n[System: Remember to use `Remember` to store important user preferences or project facts.]';
    }
  }

  // 2. Tools Documentation & Memory Injection (Startup Only)
  if (isFirstTurn) {

    const toolsDoc = buildToolsDocumentation({
      allowGit: pluginConfig.get("allowGitOperations"),
      allowDb: pluginConfig.get("allowDatabaseInspection"),
      allowNotify: pluginConfig.get("allowSystemNotifications"),
      allowJavascript: pluginConfig.get("allowJavascriptExecution") || pluginConfig.get("allowAllCode"),
      allowPython: pluginConfig.get("allowPythonExecution") || pluginConfig.get("allowAllCode"),
      allowTerminal: pluginConfig.get("allowTerminalExecution") || pluginConfig.get("allowAllCode"),
      allowShell: pluginConfig.get("allowShellCommandExecution") || pluginConfig.get("allowAllCode"),
      enableWikipedia: pluginConfig.get("enableWikipediaTool"),
      enableImageAnalysis: pluginConfig.get("enableImageAnalysis") !== false,
      enableVideoAnalysis: pluginConfig.get("enableVideoAnalysis") !== false,
      enableSecondaryAgent: enableSecondary,
      subAgentCapabilities: subCaps.join(", "),
      enableDesignMode: pluginConfig.get("enableDesignMode"),
    });
    let injectionContent = toolsDoc;

    try {
        const startupPath = join(state.currentWorkingDirectory, "startup.md");
        const startupContent = await readFile(startupPath, "utf-8");
        const filesToRead = startupContent.split('\n').map(f => f.trim()).filter(f => f);

        for (const file of filesToRead) {
            const filePath = join(state.currentWorkingDirectory, file);
            try {
                const fileContent = await readFile(filePath, "utf-8");
                if (fileContent.trim().length > 0) {
                    injectionContent = `\n\n---\n\n${fileContent}\n\n---\n\n${injectionContent}`;
                    ctl.debug(`${file} loaded and injected into context.`);
                }
            } catch (e) {
                ctl.debug(`Failed to load ${file} from startup.md.`);
            }
        }
    } catch (e) {
        ctl.debug("No startup.md file found or failed to load.");
    }

    // Only remind about Recall if auto-inject is off (otherwise memories are already loaded by the preprocessor)
    const autoInjectOn = currentContent.includes("[Persistent Memory");
    const recallReminder = autoInjectOn
      ? ""
      : `\n\n⚠️ REMINDER: Your FIRST tool call must be Recall("${userPrompt.substring(0, 80).replace(/"/g, '')}"). Do NOT skip this.`;
    currentContent = `${injectionContent}\n\n---${recallReminder}\n\n${currentContent}`;
  }

  // Update message count
  try {
    state.messageCount++;
    await savePersistedState(state);
  } catch (e) {
    ctl.debug("Failed to update message count.", e);
  }

  // Return the final content string if it changed, otherwise the original message
  if (currentContent !== userPrompt) {
      return currentContent;
  }

  return userMessage;
}

async function prepareRetrievalResultsContextInjection(
  ctl: PromptPreprocessorController,
  originalUserPrompt: string,
  files: Array<FileHandle>,
): Promise<string> {
  const pluginConfig = ctl.getPluginConfig(toolsPluginConfig);
  const retrievalLimit = pluginConfig.get("retrievalLimit");
  const retrievalAffinityThreshold = pluginConfig.get("retrievalAffinityThreshold");

  // process files if necessary

  const statusSteps = new Map<FileHandle, PredictionProcessStatusController>();

  const retrievingStatus = ctl.createStatus({
    status: "loading",
    text: `Loading an embedding model for retrieval...`,
  });
  // Using the same model as rag-v1
  const model = await ctl.client.embedding.model("nomic-ai/nomic-embed-text-v1.5-GGUF", {
    signal: ctl.abortSignal,
  });
  retrievingStatus.setState({
    status: "loading",
    text: `Retrieving relevant citations for user query...`,
  });
  const result = await ctl.client.files.retrieve(originalUserPrompt, files, {
    embeddingModel: model,
    // Affinity threshold: 0.6 not implemented in SDK retrieve options directly usually, 
    // but we filter below.
    limit: retrievalLimit,
    signal: ctl.abortSignal,
    onFileProcessList(filesToProcess) {
      for (const file of filesToProcess) {
        statusSteps.set(
          file,
          retrievingStatus.addSubStatus({
            status: "waiting",
            text: `Process ${file.name} for retrieval`,
          }),
        );
      }
    },
    onFileProcessingStart(file) {
      statusSteps
        .get(file)!
        .setState({ status: "loading", text: `Processing ${file.name} for retrieval` });
    },
    onFileProcessingEnd(file) {
      statusSteps
        .get(file)!
        .setState({ status: "done", text: `Processed ${file.name} for retrieval` });
    },
    onFileProcessingStepProgress(file, step, progressInStep) {
      const verb = step === "loading" ? "Loading" : step === "chunking" ? "Chunking" : "Embedding";
      statusSteps.get(file)!.setState({
        status: "loading",
        text: `${verb} ${file.name} for retrieval (${(progressInStep * 100).toFixed(1)}%)`,
      });
    },
  });

  result.entries = result.entries.filter(entry => entry.score > retrievalAffinityThreshold);

  // inject retrieval result into the "processed" content
  let processedContent = "";
  const numRetrievals = result.entries.length;
  if (numRetrievals > 0) {
    // retrieval occured and got results
    // show status
    retrievingStatus.setState({
      status: "done",
      text: `Retrieved ${numRetrievals} relevant citations for user query`,
    });
    ctl.debug("Retrieval results", result);
    // add results to prompt
    const prefix = "The following citations were found in the files provided by the user:\n\n";
    processedContent += prefix;
    let citationNumber = 1;
    result.entries.forEach(result => {
      const completeText = result.content;
      processedContent += `Citation ${citationNumber}: "${completeText}"\n\n`;
      citationNumber++;
    });
    await ctl.addCitations(result);
    const suffix =
      "Use the citations above to respond to the user query, only if they are relevant. " +
      `Otherwise, respond to the best of your ability without them.` +
      `\n\nUser Query:\n\n${originalUserPrompt}`;
    processedContent += suffix;
  } else {
    // retrieval occured but no relevant citations found
    retrievingStatus.setState({
      status: "canceled",
      text: `No relevant citations found for user query`,
    });
    ctl.debug("No relevant citations found for user query");
    const noteAboutNoRetrievalResultsFound =
      "Important: No citations were found in the user files for the user query. " +
      `In less than one sentence, inform the user of this. ` +
      `Then respond to the query to the best of your ability.`;
    processedContent =
      noteAboutNoRetrievalResultsFound + `\n\nUser Query:\n\n${originalUserPrompt}`;
  }
  ctl.debug("Processed content", processedContent);

  return processedContent;
}

async function prepareDocumentContextInjection(
  ctl: PromptPreprocessorController,
  input: ChatMessage,
): Promise<ChatMessage> {
  const documentInjectionSnippets: Map<FileHandle, string> = new Map();
  const files = input.consumeFiles(ctl.client, file => file.type !== "image");
  for (const file of files) {
    // This should take no time as the result is already in the cache
    const { content } = await ctl.client.files.parseDocument(file, {
      signal: ctl.abortSignal,
    });

    ctl.debug(text`
      Strategy: inject-full-content. Injecting full content of file '${file}' into the
      context. Length: ${content.length}.
    `);
    documentInjectionSnippets.set(file, content);
  }

  let formattedFinalUserPrompt = "";

  if (documentInjectionSnippets.size > 0) {
    formattedFinalUserPrompt +=
      "This is a Enriched Context Generation scenario.\n\nThe following content was found in the files provided by the user.\n";

    for (const [fileHandle, snippet] of documentInjectionSnippets) {
      formattedFinalUserPrompt += `\n\n** ${fileHandle.name} full content **\n\n${snippet}\n\n** end of ${fileHandle.name} **\n\n`;
    }

    formattedFinalUserPrompt += `Based on the content above, please provide a response to the user query.\n\nUser query: ${input.getText()}`;
  }

  input.replaceText(formattedFinalUserPrompt);
  return input;
}

async function measureContextWindow(ctx: Chat, model: LLMDynamicHandle) {
  const currentContextFormatted = await model.applyPromptTemplate(ctx);
  const totalTokensInContext = await model.countTokens(currentContextFormatted);
  const modelContextLength = await model.getContextLength();
  const modelRemainingContextLength = modelContextLength - totalTokensInContext;
  const contextOccupiedPercent = (totalTokensInContext / modelContextLength) * 100;
  return {
    totalTokensInContext,
    modelContextLength,
    modelRemainingContextLength,
    contextOccupiedPercent,
  };
}

async function chooseContextInjectionStrategy(
  ctl: PromptPreprocessorController,
  originalUserPrompt: string,
  files: Array<FileHandle>,
): Promise<DocumentContextInjectionStrategy> {
  const status = ctl.createStatus({
    status: "loading",
    text: `Deciding how to handle the document(s)...`,
  });

  const model = await ctl.client.llm.model();
  const ctx = await ctl.pullHistory();

  // Measure the context window
  const {
    totalTokensInContext,
    modelContextLength,
    modelRemainingContextLength,
    contextOccupiedPercent,
  } = await measureContextWindow(ctx, model);

  ctl.debug(
    `Context measurement result:\n\n` +
      `\tTotal tokens in context: ${totalTokensInContext}\n` +
      `\tModel context length: ${modelContextLength}\n` +
      `\tModel remaining context length: ${modelRemainingContextLength}\n` +
      `\tContext occupied percent: ${contextOccupiedPercent.toFixed(2)}%\n`,
  );

  // Get token count of provided files
  let totalFileTokenCount = 0;
  let totalReadTime = 0;
  let totalTokenizeTime = 0;
  for (const file of files) {
    const startTime = performance.now();

    const loadingStatus = status.addSubStatus({
      status: "loading",
      text: `Loading parser for ${file.name}...`,
    });
    let actionProgressing = "Reading";
    let parserIndicator = "";

    const { content } = await ctl.client.files.parseDocument(file, {
      signal: ctl.abortSignal,
      onParserLoaded: parser => {
        loadingStatus.setState({
          status: "loading",
          text: `${parser.library} loaded for ${file.name}...`,
        });
        if (parser.library !== "builtIn") {
          actionProgressing = "Parsing";
          parserIndicator = ` with ${parser.library}`;
        }
      },
      onProgress: progress => {
        loadingStatus.setState({
          status: "loading",
          text: `${actionProgressing} file ${file.name}${parserIndicator}... (${(
            progress * 100
          ).toFixed(2)}%)`,
        });
      },
    });
    loadingStatus.remove();

    totalReadTime += performance.now() - startTime;

    // tokenize file content
    const startTokenizeTime = performance.now();
    totalFileTokenCount += await model.countTokens(content);
    totalTokenizeTime += performance.now() - startTokenizeTime;
    if (totalFileTokenCount > modelRemainingContextLength) {
      break;
    }
  }
  ctl.debug(`Total file read time: ${totalReadTime.toFixed(2)} ms`);
  ctl.debug(`Total tokenize time: ${totalTokenizeTime.toFixed(2)} ms`);

  // Calculate total token count of files + user prompt
  ctl.debug(`Original User Prompt: ${originalUserPrompt}`);
  const userPromptTokenCount = (await model.tokenize(originalUserPrompt)).length;
  const totalFilePlusPromptTokenCount = totalFileTokenCount + userPromptTokenCount;

  // Calculate the available context tokens
  const contextOccupiedFraction = contextOccupiedPercent / 100;
  const targetContextUsePercent = 0.7;
  const targetContextUsage = targetContextUsePercent * (1 - contextOccupiedFraction);
  const availableContextTokens = Math.floor(modelRemainingContextLength * targetContextUsage);

  // Debug log
  ctl.debug("Strategy Calculation:");
  ctl.debug(`\tTotal Tokens in All Files: ${totalFileTokenCount}`);
  ctl.debug(`\tTotal Tokens in User Prompt: ${userPromptTokenCount}`);
  ctl.debug(`\tModel Context Remaining: ${modelRemainingContextLength} tokens`);
  ctl.debug(`\tContext Occupied: ${contextOccupiedPercent.toFixed(2)}%`);
  ctl.debug(`\tAvailable Tokens: ${availableContextTokens}\n`);

  if (totalFilePlusPromptTokenCount > availableContextTokens) {
    const chosenStrategy = "retrieval";
    ctl.debug(
      `Chosen context injection strategy: '${chosenStrategy}'. Total file + prompt token count: ` +
        `${totalFilePlusPromptTokenCount} > ${
          targetContextUsage * 100
        }% * available context tokens: ${availableContextTokens}`,
    );
    status.setState({
      status: "done",
      text: `Chosen context injection strategy: '${chosenStrategy}'. Retrieval is optimal for the size of content provided`,
    });
    return chosenStrategy;
  }

  const chosenStrategy = "inject-full-content";
  status.setState({
    status: "done",
    text: `Chosen context injection strategy: '${chosenStrategy}'. All content can fit into the context`,
  });
  return chosenStrategy;
}
maestro