Project Files
src / retrieval / hyde.py
"""
retrieval/hyde.py ā Hypothetical Document Embedding (HyDE).
Instead of embedding the raw query, we ask a local LLM to write a short
hypothetical passage that *would* answer the query, then embed that passage.
The hypothesis lands closer to real document embeddings in latent space,
dramatically improving recall for abstract or jargon-heavy queries.
Reference: Gao et al., "Precise Zero-Shot Dense Retrieval without Relevance
Labels" (2022). https://arxiv.org/abs/2212.10496
"""
from __future__ import annotations
import httpx
from config import (
HYDE_LM_STUDIO_URL,
HYDE_MAX_TOKENS,
HYDE_TEMPERATURE,
HYDE_TIMEOUT_S,
)
from src.utils.logging import get_logger
log = get_logger("retrieval.hyde")
HYDE_PROMPT_TEMPLATE = (
"Write a concise, factual passage (3-5 sentences) that directly answers "
"the following question. Do not reference the question explicitly; write as "
"if you are a knowledgeable expert composing a relevant paragraph.\n\n"
"Question: {query}\n\n"
"Passage:"
)
async def generate_hypothesis(
query: str,
lm_studio_url: str = HYDE_LM_STUDIO_URL,
max_tokens: int = HYDE_MAX_TOKENS,
temperature: float = HYDE_TEMPERATURE,
timeout: float = HYDE_TIMEOUT_S,
) -> str | None:
"""
Call the local LLM (LM Studio OpenAI-compat endpoint) to generate a
hypothetical document passage for the given query.
Returns the generated text on success, None on failure (so callers can
gracefully fall back to the original query).
"""
prompt = HYDE_PROMPT_TEMPLATE.format(query=query)
payload = {
"model": "local-model", # LM Studio ignores this field
"prompt": prompt,
"max_tokens": max_tokens,
"temperature": temperature,
"stop": ["\n\n", "Question:", "Passage:"],
}
try:
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(lm_studio_url, json=payload)
response.raise_for_status()
data = response.json()
text = data["choices"][0]["text"].strip()
if text:
log.info("HyDE hypothesis (%d chars): %sā¦", len(text), text[:100])
return text
return None
except httpx.ConnectError:
log.debug("HyDE: LM Studio not reachable at %s ā skipping.", lm_studio_url)
return None
except Exception as exc:
log.warning("HyDE generation failed: %s", exc)
return None