import { createConfigSchematics } from "@lmstudio/sdk";

export type ResponseLanguage = "ru" | "en";

export const DEFAULT_PROMPT_TEMPLATE_EN = `{{rag_context}}

Use the citations above to respond to the user query, only if they are relevant. Otherwise, respond to the best of your ability without them.

User Query:

{{user_query}}`;

export const DEFAULT_PROMPT_TEMPLATE_RU = `{{rag_context}}

Используй приведённые выше цитаты для ответа на запрос пользователя, только если они релевантны. В противном случае ответь самостоятельно, опираясь на свои знания.

Запрос пользователя:

{{user_query}}`;

export const DEFAULT_PROMPT_TEMPLATE = DEFAULT_PROMPT_TEMPLATE_EN;

export function getDefaultPromptTemplate(lang: ResponseLanguage): string {
  return lang === "ru" ? DEFAULT_PROMPT_TEMPLATE_RU : DEFAULT_PROMPT_TEMPLATE_EN;
}

export const configSchematics = createConfigSchematics()
  .field(
    "responseLanguage",
    "string",
    {
      displayName: "Response Language / Язык ответа",
      subtitle: 'Language for RAG instructions sent to the model. "ru" = Русский, "en" = English.',
      placeholder: "ru",
    },
    "ru",
  )
  .field(
    "documentsDirectory",
    "string",
    {
      displayName: "Documents Directory",
      subtitle: "Root directory containing documents to index. All subdirectories will be scanned.",
      placeholder: "/path/to/documents",
    },
    "",
  )
  .field(
    "vectorStoreDirectory",
    "string",
    {
      displayName: "Vector Store Directory",
      subtitle: "Directory where the vector database will be stored.",
      placeholder: "/path/to/vector/store",
    },
    "",
  )
  .field(
    "retrievalLimit",
    "numeric",
    {
      int: true,
      min: 1,
      max: 20,
      displayName: "Retrieval Limit",
      subtitle: "Maximum number of chunks to return during retrieval.",
      slider: { min: 1, max: 20, step: 1 },
    },
    5,
  )
  .field(
    "retrievalAffinityThreshold",
    "numeric",
    {
      min: 0.0,
      max: 1.0,
      displayName: "Retrieval Affinity Threshold",
      subtitle: "Minimum similarity score for a chunk to be considered relevant.",
      slider: { min: 0.0, max: 1.0, step: 0.01 },
    },
    0.5,
  )
  .field(
    "chunkSize",
    "numeric",
    {
      int: true,
      min: 128,
      max: 2048,
      displayName: "Chunk Size",
      subtitle: "Size of text chunks for embedding (in tokens).",
      slider: { min: 128, max: 2048, step: 128 },
    },
    512,
  )
  .field(
    "chunkOverlap",
    "numeric",
    {
      int: true,
      min: 0,
      max: 512,
      displayName: "Chunk Overlap",
      subtitle: "Overlap between consecutive chunks (in tokens).",
      slider: { min: 0, max: 512, step: 32 },
    },
    100,
  )
  .field(
    "maxConcurrentFiles",
    "numeric",
    {
      int: true,
      min: 1,
      max: 10,
      displayName: "Max Concurrent Files",
      subtitle: "Maximum number of files to process concurrently during indexing. Recommend 1 for large PDF datasets.",
      slider: { min: 1, max: 10, step: 1 },
    },
    1,
  )
  .field(
    "parseDelayMs",
    "numeric",
    {
      int: true,
      min: 0,
      max: 5000,
      displayName: "Parser Delay (ms)",
      subtitle: "Wait time before parsing each document (helps avoid WebSocket throttling).",
      slider: { min: 0, max: 5000, step: 100 },
    },
    500,
  )
  // ─── Filename Search ──────────────────────────────────────────────
  .field(
    "enableFilenameSearch",
    "boolean",
    {
      displayName: "Enable Filename Search",
      subtitle:
        "Extract keywords from the user query to find files by name. " +
        "For example, \"найди файлы с именем протокол\" will list all indexed files matching \"протокол\". " +
        "Works alongside normal vector content search.",
    },
    true,
  )
  .field(
    "enableOCR",
    "boolean",
    {
      displayName: "Enable OCR",
      subtitle: "Enable OCR for image files and image-based PDFs using LM Studio's built-in document parser.",
    },
    true,
  )
  // ─── File Type Filters ────────────────────────────────────────────────
  .field(
    "indexHTML",
    "boolean",
    {
      displayName: "Index HTML/XHTML",
      subtitle: "Index .htm, .html, .xhtml files.",
    },
    true,
  )
  .field(
    "indexPDF",
    "boolean",
    {
      displayName: "Index PDF",
      subtitle: "Index .pdf files.",
    },
    true,
  )
  .field(
    "indexEPUB",
    "boolean",
    {
      displayName: "Index EPUB",
      subtitle: "Index .epub files.",
    },
    true,
  )
  .field(
    "indexText",
    "boolean",
    {
      displayName: "Index Text/Markdown",
      subtitle: "Index .txt, .text, .md, .mdx, .markdown, .mdown, .mkd, .mkdn files.",
    },
    true,
  )
  .field(
    "indexDocx",
    "boolean",
    {
      displayName: "Index DOCX",
      subtitle: "Index .docx files.",
    },
    true,
  )
  .field(
    "indexXlsx",
    "boolean",
    {
      displayName: "Index Spreadsheets",
      subtitle: "Index .xlsx, .xls, .csv files.",
    },
    true,
  )
  .field(
    "indexPptx",
    "boolean",
    {
      displayName: "Index Presentations",
      subtitle: "Index .pptx files.",
    },
    true,
  )
  .field(
    "indexImages",
    "boolean",
    {
      displayName: "Index Images (OCR)",
      subtitle: "Index .bmp, .jpg, .jpeg, .png files via OCR.",
      dependencies: [
        {
          key: "enableOCR",
          condition: { type: "equals", value: true },
        },
      ],
    },
    true,
  )
  .field(
    "ocrLanguage",
    "string",
    {
      displayName: "OCR Language",
      subtitle: 'Tesseract language code: "eng" (English), "rus" (Russian), "eng+rus" (both), etc.',
      placeholder: "eng+rus",
    },
    "eng+rus",
  )
  .field(
    "ocrDataPath",
    "string",
    {
      displayName: "OCR Data Path",
      subtitle: 'Path to folder with .traineddata files. Leave empty to auto-detect: the plugin looks for .traineddata files in its own root folder (for offline use). If none found, Tesseract downloads them from CDN on first use. For best quality, download best-traineddata from tesseract-ocr/tessdata_best.',
      placeholder: "",
    },
    "",
  )
  .field(
    "ocrPageSegMode",
    "numeric",
    {
      int: true,
      min: 0,
      max: 13,
      displayName: "OCR Page Segmentation Mode (PSM)",
      subtitle: "Tesseract PSM: 3=auto, 4=single column, 6=uniform block (tables/forms), 11=sparse text. Default 3.",
      slider: { min: 0, max: 13, step: 1 },
    },
    3,
  )
  .field(
    "ocrMinTextLength",
    "numeric",
    {
      int: true,
      min: 0,
      max: 10000,
      displayName: "OCR Min Text Length",
      subtitle: "Minimum characters for PDF text to be considered valid. Lower values catch short pages (stamps, forms).",
      slider: { min: 0, max: 10000, step: 10 },
    },
    20,
  )
  .field(
    "ocrMaxPages",
    "numeric",
    {
      int: true,
      min: 1,
      max: 50000,
      displayName: "OCR Max Pages",
      subtitle: "Maximum PDF pages to process with OCR. Increase for large documents.",
      slider: { min: 1, max: 50000, step: 10 },
    },
    200,
  )
  .field(
    "ocrMaxImagesPerPage",
    "numeric",
    {
      int: true,
      min: 1,
      max: 100,
      displayName: "OCR Max Images Per Page",
      subtitle: "Maximum images per PDF page for OCR. Increase for pages with many diagrams/tables.",
      slider: { min: 1, max: 100, step: 1 },
    },
    10,
  )
  .field(
    "ocrMinImageArea",
    "numeric",
    {
      int: true,
      min: 0,
      max: 100000,
      displayName: "OCR Min Image Area",
      subtitle: "Minimum image area (width×height in px) for OCR. Lower values process smaller images (signatures, stamps).",
      slider: { min: 0, max: 100000, step: 100 },
    },
    2500,
  )
  .field(
    "ocrMaxImagePixels",
    "numeric",
    {
      int: true,
      min: 1000000,
      max: 500000000,
      displayName: "OCR Max Image Pixels",
      subtitle: "Maximum image area (px²) to process. Prevents OOM on huge scans. ~100M = 10000×10000.",
      slider: { min: 1000000, max: 500000000, step: 1000000 },
    },
    100000000,
  )
  .field(
    "ocrImageTimeoutMs",
    "numeric",
    {
      int: true,
      min: 5000,
      max: 300000,
      displayName: "OCR Image Timeout (ms)",
      subtitle: "Timeout in ms for loading image data from PDF. Increase for slow systems.",
      slider: { min: 5000, max: 300000, step: 5000 },
    },
    60000,
  )
  .field(
    "embeddingModel",
    "string",
    {
      displayName: "Embedding Model",
      subtitle: "Model ID for text embeddings. Must be loaded in LM Studio. Examples: nomic-ai/nomic-embed-text-v1.5-GGUF, gpustack/text-embedding-bge-m3",
      placeholder: "gpustack/text-embedding-bge-m3",
    },
    "gpustack/text-embedding-bge-m3",
  )
  .field(
    "manualReindex.trigger",
    "boolean",
    {
      displayName: "Manual Reindex Trigger",
      subtitle:
        "Toggle ON to request an immediate reindex. The plugin resets this after running. Use the “Skip Previously Indexed Files” option below to control whether unchanged files are skipped.",
    },
    false,
  )
  .field(
    "manualReindex.skipPreviouslyIndexed",
    "boolean",
    {
      displayName: "Skip Previously Indexed Files",
      subtitle: "Skip unchanged files for faster manual runs. Only indexes new files or changed files.",
      dependencies: [
        {
          key: "manualReindex.trigger",
          condition: { type: "equals", value: true },
        },
      ],
    },
    true,
  )
  .field(
    "promptTemplate",
    "string",
    {
      displayName: "Prompt Template",
      subtitle:
        "Supports {{rag_context}} (required) and {{user_query}} macros. Leave empty to auto-select based on Response Language setting.",
      placeholder: DEFAULT_PROMPT_TEMPLATE,
      isParagraph: true,
    },
    "",
  )
  .build();
big-rag-rus