/**
 * Fuzzy Matching Utilities for Draw Things Index
 * Implements Levenshtein distance and various fuzzy matching strategies
 */

// ═══════════════════════════════════════════════════════════════
// Simple Stemmer (English)
// ═══════════════════════════════════════════════════════════════

/**
 * Simple English stemmer - handles common suffixes
 * Not as good as Porter, but zero dependencies and fast
 */
function stem(word: string): string {
  if (word.length < 4) return word;
  
  // Plural forms
  if (word.endsWith('ies') && word.length > 4) {
    return word.slice(0, -3) + 'y';  // cities → city
  }
  if (word.endsWith('es') && word.length > 4) {
    // boxes → box, watches → watch, but not "times"
    const stem = word.slice(0, -2);
    if (stem.endsWith('ch') || stem.endsWith('sh') || stem.endsWith('x') || stem.endsWith('s')) {
      return stem;
    }
  }
  if (word.endsWith('s') && !word.endsWith('ss') && word.length > 3) {
    return word.slice(0, -1);  // cats → cat
  }
  
  // Verb forms
  if (word.endsWith('ing') && word.length > 5) {
    const stem = word.slice(0, -3);
    if (stem.endsWith(stem[stem.length - 1])) {
      return stem.slice(0, -1);  // running → run
    }
    return stem;  // walking → walk
  }
  if (word.endsWith('ed') && word.length > 4) {
    const stem = word.slice(0, -2);
    if (stem.endsWith(stem[stem.length - 1])) {
      return stem.slice(0, -1);  // stopped → stop
    }
    return stem;  // walked → walk
  }
  
  // Adjective forms
  if (word.endsWith('ly') && word.length > 4) {
    return word.slice(0, -2);  // quickly → quick
  }
  if (word.endsWith('er') && word.length > 4) {
    return word.slice(0, -2);  // faster → fast
  }
  if (word.endsWith('est') && word.length > 5) {
    return word.slice(0, -3);  // fastest → fast
  }
  
  return word;
}

// ═══════════════════════════════════════════════════════════════
// Levenshtein Distance
// ═══════════════════════════════════════════════════════════════

/**
 * Calculate Levenshtein distance between two strings
 * Optimized implementation with early termination
 */
export function levenshteinDistance(a: string, b: string): number {
  if (a === b) return 0;
  if (a.length === 0) return b.length;
  if (b.length === 0) return a.length;

  // Swap to ensure a is the shorter string (memory optimization)
  if (a.length > b.length) {
    [a, b] = [b, a];
  }

  const aLen = a.length;
  const bLen = b.length;

  // Single row optimization
  let prevRow = new Array(aLen + 1);
  let currRow = new Array(aLen + 1);

  // Initialize first row
  for (let i = 0; i <= aLen; i++) {
    prevRow[i] = i;
  }

  for (let j = 1; j <= bLen; j++) {
    currRow[0] = j;

    for (let i = 1; i <= aLen; i++) {
      const cost = a[i - 1] === b[j - 1] ? 0 : 1;
      currRow[i] = Math.min(
        currRow[i - 1] + 1,      // Insertion
        prevRow[i] + 1,          // Deletion
        prevRow[i - 1] + cost    // Substitution
      );
    }

    // Swap rows
    [prevRow, currRow] = [currRow, prevRow];
  }

  return prevRow[aLen];
}

/**
 * Calculate similarity ratio (0-1) based on Levenshtein distance
 */
export function levenshteinSimilarity(a: string, b: string): number {
  const maxLen = Math.max(a.length, b.length);
  if (maxLen === 0) return 1;
  return 1 - levenshteinDistance(a, b) / maxLen;
}

// ═══════════════════════════════════════════════════════════════
// Fuzzy Matching
// ═══════════════════════════════════════════════════════════════

export interface FuzzyMatchResult {
  /** Match score 0-100 */
  score: number;
  /** Type of match */
  type: 'exact' | 'fuzzy' | 'partial' | 'none';
  /** Which terms matched */
  matchedTerms: string[];
  /** Similarity ratio for fuzzy matches */
  similarity?: number;
}

export interface FuzzyMatchOptions {
  /** Levenshtein similarity threshold for term matching (default: 0.85) */
  fuzzyTermThreshold?: number;
  /** Minimum fraction of query terms that must match (default: 0.5) */
  minTermCoverage?: number;
}

const DEFAULT_FUZZY_OPTIONS: Required<FuzzyMatchOptions> = {
  fuzzyTermThreshold: 0.85,
  minTermCoverage: 0.5,
};

/**
 * Check if a term matches as a WHOLE WORD in the target
 * Uses word boundaries - "cat" won't match "intricately"
 */
function hasWholeWordMatch(term: string, targetWords: string[]): boolean {
  return targetWords.some(word => word === term);
}

/**
 * Check if stems match (for plural/verb forms)
 * "cats" → "cat", "running" → "run"
 */
function hasStemMatch(term: string, targetWords: string[]): boolean {
  const termStem = stem(term);
  return targetWords.some(word => stem(word) === termStem);
}

/**
 * Perform fuzzy matching of a query against a target string
 * Returns detailed match information
 * 
 * Scoring:
 * - 100: Exact whole-word match for all query terms
 * - 90-99: Stem match (cats→cat) for all terms
 * - 60-89: Partial matches (some terms match)
 * - <60: Fuzzy Levenshtein matches
 */
export function fuzzyMatch(
  query: string, 
  target: string,
  options?: FuzzyMatchOptions
): FuzzyMatchResult {
  const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
  const queryLower = query.toLowerCase().trim();
  const targetLower = target.toLowerCase();

  // Tokenize both into words
  const queryTerms = tokenize(queryLower);
  const targetWords = tokenize(targetLower);
  
  if (queryTerms.length === 0) {
    return { score: 0, type: 'none', matchedTerms: [] };
  }

  const matchedTerms: string[] = [];
  const matchTypes: ('exact' | 'stem' | 'fuzzy')[] = [];
  
  for (const term of queryTerms) {
    // Skip very short terms (too many false positives)
    if (term.length < 3) continue;
    
    // 1. Exact whole-word match (highest priority)
    if (hasWholeWordMatch(term, targetWords)) {
      matchedTerms.push(term);
      matchTypes.push('exact');
      continue;
    }
    
    // 2. Stem match (cats→cat, running→run)
    if (hasStemMatch(term, targetWords)) {
      matchedTerms.push(term);
      matchTypes.push('stem');
      continue;
    }
    
    // 3. Fuzzy Levenshtein match on individual words
    let foundFuzzy = false;
    for (const word of targetWords) {
      // Only fuzzy match words of similar length
      if (Math.abs(term.length - word.length) > 2) continue;
      
      const termSimilarity = levenshteinSimilarity(term, word);
      if (termSimilarity >= opts.fuzzyTermThreshold) {
        matchedTerms.push(term);
        matchTypes.push('fuzzy');
        foundFuzzy = true;
        break;
      }
    }
  }

  // No matches at all
  if (matchedTerms.length === 0) {
    return { score: 0, type: 'none', matchedTerms: [] };
  }

  // Calculate coverage and score
  const significantQueryTerms = queryTerms.filter(t => t.length >= 3);
  const significantQueryTermCount = significantQueryTerms.length;
  const coverage = matchedTerms.length / Math.max(1, significantQueryTermCount);

  // Tighten coverage for longer queries to avoid single-term false positives
  // Example: 3-term query where only one common word matches ("like")
  const effectiveMinCoverage = significantQueryTermCount >= 3
    ? Math.max(opts.minTermCoverage, 0.5)
    : opts.minTermCoverage;
  
  // Check minimum coverage
  if (coverage < effectiveMinCoverage) {
    return { score: 0, type: 'none', matchedTerms: [] };
  }

  // Calculate score based on match quality
  const exactCount = matchTypes.filter(t => t === 'exact').length;
  const stemCount = matchTypes.filter(t => t === 'stem').length;
  const fuzzyCount = matchTypes.filter(t => t === 'fuzzy').length;
  
  let score: number;
  let type: 'exact' | 'fuzzy' | 'partial';
  
  if (exactCount === matchedTerms.length && coverage >= 1.0) {
    // All terms matched exactly
    score = 100;
    type = 'exact';
  } else if ((exactCount + stemCount) === matchedTerms.length && coverage >= 1.0) {
    // All terms matched via exact or stem
    score = 95;
    type = 'exact';
  } else {
    // Partial or fuzzy matches
    // Score: base 50 + up to 30 for coverage + up to 10 for exact + 5 for stem
    score = Math.round(50 + (coverage * 30) + (exactCount / matchedTerms.length * 10) + (stemCount / matchedTerms.length * 5));
    score = Math.min(89, score);  // Cap at 89 for partial
    type = fuzzyCount > 0 ? 'fuzzy' : 'partial';
  }

  return {
    score,
    type,
    matchedTerms,
  };
}

/**
 * Find best fuzzy match among multiple targets
 */
export function findBestMatch(
  query: string,
  targets: string[]
): { index: number; result: FuzzyMatchResult } | null {
  let bestIndex = -1;
  let bestResult: FuzzyMatchResult = { score: 0, type: 'none', matchedTerms: [] };

  for (let i = 0; i < targets.length; i++) {
    const result = fuzzyMatch(query, targets[i]);
    if (result.score > bestResult.score) {
      bestIndex = i;
      bestResult = result;
    }
  }

  if (bestIndex === -1 || bestResult.score === 0) {
    return null;
  }

  return { index: bestIndex, result: bestResult };
}

// ═══════════════════════════════════════════════════════════════
// Text Processing
// ═══════════════════════════════════════════════════════════════

/**
 * Tokenize text into searchable terms
 * Removes common words and punctuation
 */
export function tokenize(text: string): string[] {
  // Common words to ignore (English + German basics)
  const stopWords = new Set([
    'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
    'of', 'with', 'by', 'from', 'is', 'are', 'was', 'were', 'be', 'been',
    'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
    'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'need',
    'like',
    'ein', 'eine', 'der', 'die', 'das', 'und', 'oder', 'aber', 'mit',
    'von', 'zu', 'für', 'auf', 'ist', 'sind', 'war', 'waren',
  ]);

  return text
    .toLowerCase()
    .replace(/[^\w\s-]/g, ' ')  // Remove punctuation except hyphens
    .split(/\s+/)
    .filter(word => word.length > 2 && !stopWords.has(word));
}

/**
 * Extract key phrases from a prompt
 * Useful for finding the "essence" of a generation prompt
 */
export function extractKeyPhrases(prompt: string): string[] {
  const phrases: string[] = [];
  
  // Split on common prompt separators
  const parts = prompt.split(/[,;|]/);
  
  for (const part of parts) {
    const trimmed = part.trim();
    if (trimmed.length > 3 && trimmed.length < 100) {
      phrases.push(trimmed);
    }
  }
  
  return phrases;
}

/**
 * Normalize a prompt for comparison
 * Removes weight syntax, special tokens, etc.
 */
export function normalizePrompt(prompt: string): string {
  return prompt
    // Remove weight syntax like (word:1.2) or [word:0.8]
    .replace(/[\(\[](.*?)(?::\d*\.?\d+)?[\)\]]/g, '$1')
    // Remove BREAK tokens
    .replace(/\bBREAK\b/gi, '')
    // Normalize whitespace
    .replace(/\s+/g, ' ')
    .trim()
    .toLowerCase();
}
draw-things-index