"""
retrieval/hyde.py — Hypothetical Document Embedding (HyDE).

Instead of embedding the raw query, we ask a local LLM to write a short
hypothetical passage that *would* answer the query, then embed that passage.
The hypothesis lands closer to real document embeddings in latent space,
dramatically improving recall for abstract or jargon-heavy queries.

Reference: Gao et al., "Precise Zero-Shot Dense Retrieval without Relevance
Labels" (2022). https://arxiv.org/abs/2212.10496
"""

from __future__ import annotations

import httpx

from config import (
    HYDE_LM_STUDIO_URL,
    HYDE_MAX_TOKENS,
    HYDE_TEMPERATURE,
    HYDE_TIMEOUT_S,
)
from src.utils.logging import get_logger

log = get_logger("retrieval.hyde")

HYDE_PROMPT_TEMPLATE = (
    "Write a concise, factual passage (3-5 sentences) that directly answers "
    "the following question. Do not reference the question explicitly; write as "
    "if you are a knowledgeable expert composing a relevant paragraph.\n\n"
    "Question: {query}\n\n"
    "Passage:"
)


async def generate_hypothesis(
    query: str,
    lm_studio_url: str = HYDE_LM_STUDIO_URL,
    max_tokens: int = HYDE_MAX_TOKENS,
    temperature: float = HYDE_TEMPERATURE,
    timeout: float = HYDE_TIMEOUT_S,
) -> str | None:
    """
    Call the local LLM (LM Studio OpenAI-compat endpoint) to generate a
    hypothetical document passage for the given query.

    Returns the generated text on success, None on failure (so callers can
    gracefully fall back to the original query).
    """
    prompt = HYDE_PROMPT_TEMPLATE.format(query=query)
    payload = {
        "model":       "local-model",   # LM Studio ignores this field
        "prompt":      prompt,
        "max_tokens":  max_tokens,
        "temperature": temperature,
        "stop":        ["\n\n", "Question:", "Passage:"],
    }

    try:
        async with httpx.AsyncClient(timeout=timeout) as client:
            response = await client.post(lm_studio_url, json=payload)
            response.raise_for_status()
            data = response.json()
            text = data["choices"][0]["text"].strip()
            if text:
                log.info("HyDE hypothesis (%d chars): %s…", len(text), text[:100])
                return text
            return None
    except httpx.ConnectError:
        log.debug("HyDE: LM Studio not reachable at %s — skipping.", lm_studio_url)
        return None
    except Exception as exc:
        log.warning("HyDE generation failed: %s", exc)
        return None
cyper-rag-server