"""
retrieval/fusion.py — Result fusion strategies for hybrid retrieval.

Implemented
───────────
RRF   Reciprocal Rank Fusion — rank-based, no score normalization needed.
      The gold standard for combining heterogeneous rankers.
      Reference: Cormack et al. (2009).

CombSUM  Min-max normalized score summation.
         Good when both rankers produce comparable magnitude scores.
"""

from __future__ import annotations

from collections import defaultdict
from typing import Any


def reciprocal_rank_fusion(
    ranked_lists: list[list[tuple[Any, float]]],
    k: int = 60,
) -> list[tuple[Any, float]]:
    """
    Merge N ranked lists into one using RRF.

    Args:
        ranked_lists: Each list is [(id, score), …] sorted by score DESC.
                      Scores are ignored; only rank position matters.
        k:            RRF smoothing constant (60 is standard).

    Returns:
        [(id, rrf_score), …] sorted by rrf_score DESC.
    """
    rrf: dict[Any, float] = defaultdict(float)
    for ranked in ranked_lists:
        for rank, (doc_id, _) in enumerate(ranked, start=1):
            rrf[doc_id] += 1.0 / (k + rank)
    return sorted(rrf.items(), key=lambda x: x[1], reverse=True)


def comb_sum(
    ranked_lists: list[list[tuple[Any, float]]],
    weights: list[float] | None = None,
) -> list[tuple[Any, float]]:
    """
    Min-max normalize each list's scores, then sum (optionally weighted).

    Args:
        ranked_lists: Each list is [(id, score), …].
        weights:      Optional per-list weights. Defaults to equal weighting.

    Returns:
        [(id, combined_score), …] sorted DESC.
    """
    if weights is None:
        weights = [1.0] * len(ranked_lists)

    combined: dict[Any, float] = defaultdict(float)

    for ranked, w in zip(ranked_lists, weights):
        if not ranked:
            continue
        scores = [s for _, s in ranked]
        lo, hi = min(scores), max(scores)
        span = hi - lo if hi != lo else 1.0
        for doc_id, score in ranked:
            combined[doc_id] += w * (score - lo) / span

    return sorted(combined.items(), key=lambda x: x[1], reverse=True)
cyper-rag-server