Project Files
config.py
"""
config.py โ Master configuration & embedding model registry.
All tunable constants live here. Server flags override these defaults at runtime.
"""
from __future__ import annotations
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# Chunking
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
DEFAULT_CHUNK_SIZE = 512 # words per chunk
DEFAULT_CHUNK_OVERLAP = 64 # word overlap between consecutive chunks
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# Retrieval
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
DEFAULT_TOP_K = 5
DEFAULT_MIN_SCORE = 0.0
DEFAULT_COLLECTION = "rag"
# How many times more candidates to pull before reranking
CANDIDATE_MULTIPLIER = 4 # top_k * CANDIDATE_MULTIPLIER fed into reranker
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# Caching / dedup
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
RAG_CACHE_TTL_SECONDS = 30
WEB_CACHE_TTL_SECONDS = 60
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# Web search
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
DEFAULT_SEARCH_PROVIDER = "duckduckgo"
SUPPORTED_SEARCH_PROVIDERS = ("duckduckgo", "brave", "tavily", "serper")
DEFAULT_WEB_MAX_RESULTS = 5
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# HyDE (Hypothetical Document Embedding)
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
HYDE_LM_STUDIO_URL = "http://localhost:1234/v1/completions"
HYDE_MAX_TOKENS = 200
HYDE_TEMPERATURE = 0.3
HYDE_TIMEOUT_S = 8
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# Cross-encoder reranker model
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# Embedding model registry
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# Key โ CLI --embedding-model value
# model_name โ HuggingFace model id
# dim โ output dimension (informational)
# tier โ fast | balanced | powerful
# prefix_* โ instruction prefixes required by the model at query / doc time
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
EMBEDDING_MODELS: dict[str, dict] = {
# โโ FAST โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
"minilm-l6": {
"label": "all-MiniLM-L6-v2 (fast ยท 384-dim)",
"model_name": "sentence-transformers/all-MiniLM-L6-v2",
"dim": 384,
"tier": "fast",
"prefix_q": "",
"prefix_d": "",
"description": "Lightweight baseline โ good for RAM-constrained machines.",
},
"minilm-l12": {
"label": "all-MiniLM-L12-v2 (fast+ ยท 384-dim)",
"model_name": "sentence-transformers/all-MiniLM-L12-v2",
"dim": 384,
"tier": "fast",
"prefix_q": "",
"prefix_d": "",
"description": "Two-layer gain over L6 with negligible extra cost.",
},
# โโ BALANCED โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
"mpnet": {
"label": "all-mpnet-base-v2 (balanced ยท 768-dim)",
"model_name": "sentence-transformers/all-mpnet-base-v2",
"dim": 768,
"tier": "balanced",
"prefix_q": "",
"prefix_d": "",
"description": "Solid general-purpose model, strong on diverse domains.",
},
"gte-base": {
"label": "gte-base (balanced ยท 768-dim)",
"model_name": "thenlper/gte-base",
"dim": 768,
"tier": "balanced",
"prefix_q": "",
"prefix_d": "",
"description": "GTE family โ excellent BEIR benchmark scores at this size.",
},
"bge-base": {
"label": "bge-base-en-v1.5 (balanced ยท 768-dim)",
"model_name": "BAAI/bge-base-en-v1.5",
"dim": 768,
"tier": "balanced",
"prefix_q": "Represent this sentence for searching relevant passages: ",
"prefix_d": "",
"description": "BAAI BGE โ top MTEB scores for its size class.",
},
# โโ POWERFUL โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
"bge-large": {
"label": "bge-large-en-v1.5 (powerful ยท 1024-dim)",
"model_name": "BAAI/bge-large-en-v1.5",
"dim": 1024,
"tier": "powerful",
"prefix_q": "Represent this sentence for searching relevant passages: ",
"prefix_d": "",
"description": "Best English retrieval in the BGE family.",
},
"gte-large": {
"label": "gte-large (powerful ยท 1024-dim)",
"model_name": "thenlper/gte-large",
"dim": 1024,
"tier": "powerful",
"prefix_q": "",
"prefix_d": "",
"description": "Largest GTE โ great for long-document retrieval.",
},
"e5-large": {
"label": "e5-large-v2 (powerful ยท 1024-dim)",
"model_name": "intfloat/e5-large-v2",
"dim": 1024,
"tier": "powerful",
"prefix_q": "query: ",
"prefix_d": "passage: ",
"description": "Microsoft E5 โ instruction-aware, top BEIR scores.",
},
"e5-mistral": {
"label": "e5-mistral-7b-instruct (powerful ยท 4096-dim)",
"model_name": "intfloat/e5-mistral-7b-instruct",
"dim": 4096,
"tier": "powerful",
"prefix_q": "Instruct: Retrieve the most relevant document for the query\nQuery: ",
"prefix_d": "",
"description": "Mistral-based E5 โ highest quality, needs GPU / 16 GB+ RAM.",
},
"nomic-v1.5": {
"label": "nomic-embed-text-v1.5 (powerful ยท 768-dim ยท 8192-ctx)",
"model_name": "nomic-ai/nomic-embed-text-v1.5",
"dim": 768,
"tier": "powerful",
"prefix_q": "search_query: ",
"prefix_d": "search_document: ",
"description": "8192-token context โ ideal for very long documents.",
},
"jina-v3": {
"label": "jina-embeddings-v3 (powerful ยท 1024-dim ยท 8192-ctx)",
"model_name": "jinaai/jina-embeddings-v3",
"dim": 1024,
"tier": "powerful",
"prefix_q": "",
"prefix_d": "",
"description": "Jina v3 โ multilingual, 8192-ctx, strong on code + text.",
},
"stella-en-1.5b": {
"label": "Stella-en-1.5B-v5 (powerful ยท 1024-dim)",
"model_name": "dunzhang/stella_en_1.5B_v5",
"dim": 1024,
"tier": "powerful",
"prefix_q": "Instruct: Retrieve semantically similar text\nQuery: ",
"prefix_d": "",
"description": "State-of-the-art MTEB leader as of 2024. Needs ~6 GB RAM.",
},
}
DEFAULT_EMBEDDING_MODEL = "bge-base" # best quality/speed for most machines