/**
 * Vector Operations for Semantic Search
 * 
 * Pure math functions for embedding vector operations.
 * No dependencies on external libraries - these are simple enough
 * that we don't need numpy/etc.
 */

/**
 * Calculate cosine similarity between two vectors
 * Returns value between -1 and 1, where:
 * - 1 = identical direction (same meaning)
 * - 0 = orthogonal (unrelated)
 * - -1 = opposite direction (opposite meaning)
 * 
 * For text embeddings, typical useful range is 0.5 to 1.0
 */
export function cosineSimilarity(a: number[], b: number[]): number {
  if (a.length !== b.length) {
    throw new Error(`Vector dimensions must match: ${a.length} vs ${b.length}`);
  }
  
  if (a.length === 0) {
    return 0;
  }

  let dotProduct = 0;
  let normA = 0;
  let normB = 0;

  for (let i = 0; i < a.length; i++) {
    dotProduct += a[i] * b[i];
    normA += a[i] * a[i];
    normB += b[i] * b[i];
  }

  const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
  return magnitude === 0 ? 0 : dotProduct / magnitude;
}

/**
 * Calculate Euclidean distance between two vectors
 * Lower = more similar (0 = identical)
 */
export function euclideanDistance(a: number[], b: number[]): number {
  if (a.length !== b.length) {
    throw new Error(`Vector dimensions must match: ${a.length} vs ${b.length}`);
  }

  let sum = 0;
  for (let i = 0; i < a.length; i++) {
    const diff = a[i] - b[i];
    sum += diff * diff;
  }
  
  return Math.sqrt(sum);
}

/**
 * Normalize a vector to unit length (magnitude = 1)
 * Useful for pre-processing before cosine similarity
 */
export function normalize(v: number[]): number[] {
  const magnitude = Math.sqrt(v.reduce((sum, x) => sum + x * x, 0));
  if (magnitude === 0) return v;
  return v.map(x => x / magnitude);
}

/**
 * Calculate dot product of two vectors
 * If both vectors are normalized, this equals cosine similarity
 */
export function dotProduct(a: number[], b: number[]): number {
  if (a.length !== b.length) {
    throw new Error(`Vector dimensions must match: ${a.length} vs ${b.length}`);
  }
  
  let sum = 0;
  for (let i = 0; i < a.length; i++) {
    sum += a[i] * b[i];
  }
  return sum;
}

/**
 * Find top-k most similar vectors from a collection
 * Uses cosine similarity by default
 */
export function findTopK<T>(
  queryVector: number[],
  items: T[],
  getVector: (item: T) => number[] | undefined,
  k: number,
  minSimilarity: number = 0.0,
): Array<{ item: T; similarity: number }> {
  const scored: Array<{ item: T; similarity: number }> = [];
  
  for (const item of items) {
    const vector = getVector(item);
    if (!vector) continue;
    
    const similarity = cosineSimilarity(queryVector, vector);
    if (similarity >= minSimilarity) {
      scored.push({ item, similarity });
    }
  }
  
  // Sort by similarity descending
  scored.sort((a, b) => b.similarity - a.similarity);
  
  return scored.slice(0, k);
}

/**
 * Batch compute similarities for a query against multiple vectors
 * More memory-efficient than calling cosineSimilarity in a loop
 * when dealing with large collections
 */
export function batchCosineSimilarity(
  queryVector: number[],
  vectors: number[][],
): number[] {
  // Pre-compute query norm
  let queryNorm = 0;
  for (let i = 0; i < queryVector.length; i++) {
    queryNorm += queryVector[i] * queryVector[i];
  }
  queryNorm = Math.sqrt(queryNorm);
  
  if (queryNorm === 0) {
    return vectors.map(() => 0);
  }
  
  return vectors.map(vector => {
    if (vector.length !== queryVector.length) {
      return 0;
    }
    
    let dotProduct = 0;
    let vectorNorm = 0;
    
    for (let i = 0; i < queryVector.length; i++) {
      dotProduct += queryVector[i] * vector[i];
      vectorNorm += vector[i] * vector[i];
    }
    
    vectorNorm = Math.sqrt(vectorNorm);
    if (vectorNorm === 0) return 0;
    
    return dotProduct / (queryNorm * vectorNorm);
  });
}

/**
 * Convert similarity score (0-1) to a human-readable match score (0-100)
 * Applies a non-linear scaling to make scores more intuitive
 */
export function similarityToScore(similarity: number): number {
  // Typical embedding similarities:
  // - 0.9+ = very strong match
  // - 0.8-0.9 = strong match  
  // - 0.7-0.8 = moderate match
  // - 0.6-0.7 = weak match
  // - <0.6 = poor match
  
  // Map 0.5-1.0 range to 0-100 with slight boost for high similarities
  const normalized = Math.max(0, (similarity - 0.5) * 2);  // 0.5->0, 1.0->1
  const boosted = Math.pow(normalized, 0.8);  // Slight curve to favor high scores
  return Math.round(boosted * 100);
}

/**
 * Convert a match score (0-100) back to similarity (0-1)
 * Inverse of similarityToScore()
 */
export function scoreToSimilarity(score: number): number {
  // Reverse the similarityToScore formula:
  // score = (((similarity - 0.5) * 2) ^ 0.8) * 100
  // 
  // Solving for similarity:
  // score/100 = normalized^0.8
  // normalized = (score/100)^(1/0.8) = (score/100)^1.25
  // similarity = normalized/2 + 0.5
  const boosted = score / 100;
  const normalized = Math.pow(boosted, 1.25);  // Inverse of ^0.8
  return normalized / 2 + 0.5;
}

/**
 * Estimate memory usage for storing embeddings
 * @param count Number of embeddings
 * @param dimension Vector dimension (e.g., 768 or 1536)
 * @returns Estimated memory in bytes
 */
export function estimateMemoryUsage(count: number, dimension: number): number {
  // Each number in JS is 8 bytes (64-bit float)
  // Plus array overhead (~32 bytes per array)
  return count * (dimension * 8 + 32);
}

/**
 * Format memory size for display
 */
export function formatMemorySize(bytes: number): string {
  if (bytes < 1024) return `${bytes} B`;
  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}
draw-things-index