Project Files
server.py
#!/usr/bin/env python3
"""
server.py β MCP RAG + Web Search Server (production entry point)
Usage
βββββ
python server.py --docs ./my_docs # minimal
python server.py --docs ./my_docs --embedding-model bge-large --reranker
python server.py --list-models # model browser
Run `python server.py --help` for all flags.
"""
from __future__ import annotations
import argparse
import asyncio
import sys
# ββ Ensure src/ is importable when running as a script βββββββββββββββββββββββ
import os
sys.path.insert(0, os.path.dirname(__file__))
from mcp import types
from mcp.server import Server
from mcp.server.stdio import stdio_server
from config import (
DEFAULT_CHUNK_OVERLAP,
DEFAULT_CHUNK_SIZE,
DEFAULT_COLLECTION,
DEFAULT_EMBEDDING_MODEL,
DEFAULT_MIN_SCORE,
DEFAULT_TOP_K,
DEFAULT_WEB_MAX_RESULTS,
EMBEDDING_MODELS,
RAG_CACHE_TTL_SECONDS,
RERANKER_MODEL,
WEB_CACHE_TTL_SECONDS,
)
from src.retrieval.engine import RAGEngine
from src.retrieval.reranker import RERANKER_MODELS, DEFAULT_RERANKER
from src.search.providers import WebSearchEngine
from src.tools.ingest_tool import build_ingest_tool_schema, handle_reindex
from src.tools.rag_tool import build_rag_tool_schema, handle_rag_search
from src.tools.web_tool import build_web_tool_schema, handle_web_search
from src.utils.cache import TTLCache
from src.utils.logging import get_logger
log = get_logger("server")
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# CLI
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
prog="mcp-rag-server",
description="Production MCP server β local RAG + web search for LM Studio",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python server.py --docs ./docs
python server.py --docs ./docs --embedding-model bge-large --reranker --hyde
python server.py --docs ./docs --chunk-strategy semantic --web-search
python server.py --list-models
python server.py --list-rerankers
""",
)
# ββ Documents βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
doc = p.add_argument_group("Documents")
doc.add_argument("--docs", metavar="PATH",
help="Folder containing documents to index (required to run).")
doc.add_argument("--collection", default=DEFAULT_COLLECTION, metavar="NAME",
help=f"ChromaDB collection prefix. Default: {DEFAULT_COLLECTION}")
# ββ Chunking ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
chunk = p.add_argument_group("Chunking")
chunk.add_argument("--chunk-strategy",
choices=["paragraph", "sentence", "semantic", "fixed"],
default="paragraph",
help="Chunking strategy. Default: paragraph.")
chunk.add_argument("--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE,
help=f"Target words per chunk. Default: {DEFAULT_CHUNK_SIZE}")
chunk.add_argument("--chunk-overlap", type=int, default=DEFAULT_CHUNK_OVERLAP,
help=f"Word overlap between chunks. Default: {DEFAULT_CHUNK_OVERLAP}")
# ββ Embedding βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
emb = p.add_argument_group("Embedding")
emb.add_argument("--embedding-model", default=DEFAULT_EMBEDDING_MODEL,
choices=list(EMBEDDING_MODELS.keys()), metavar="KEY",
help=f"Embedding model key. Default: {DEFAULT_EMBEDDING_MODEL}. "
"Run --list-models to see all options.")
emb.add_argument("--list-models", action="store_true",
help="Print all available embedding models and exit.")
# ββ Retrieval βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
ret = p.add_argument_group("Retrieval")
ret.add_argument("--top-k", type=int, default=DEFAULT_TOP_K,
help=f"Default results per query. Default: {DEFAULT_TOP_K}")
ret.add_argument("--min-score", type=float, default=DEFAULT_MIN_SCORE,
help=f"Minimum similarity score [0β1]. Default: {DEFAULT_MIN_SCORE}")
ret.add_argument("--no-hybrid", action="store_true",
help="Disable BM25+ hybrid search; use dense-only retrieval.")
# ββ Reranking βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
rer = p.add_argument_group("Reranking")
rer.add_argument("--reranker", action="store_true",
help="Enable cross-encoder reranking for higher precision.")
rer.add_argument("--reranker-model", default=DEFAULT_RERANKER,
choices=list(RERANKER_MODELS.keys()), metavar="KEY",
help=f"Reranker model key. Default: {DEFAULT_RERANKER}. "
"Run --list-rerankers to see all options.")
rer.add_argument("--list-rerankers", action="store_true",
help="Print all available reranker models and exit.")
# ββ HyDE βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
hyde = p.add_argument_group("HyDE (Hypothetical Document Embedding)")
hyde.add_argument("--hyde", action="store_true",
help="Enable HyDE query expansion via local LM Studio LLM.")
hyde.add_argument("--hyde-url", default=None, metavar="URL",
help="Override LM Studio completions URL for HyDE generation.")
# ββ Web search ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
web = p.add_argument_group("Web search")
web.add_argument("--web-search", action="store_true",
help="Enable the web_search MCP tool.")
web.add_argument("--search-provider", default="duckduckgo",
choices=["duckduckgo", "brave", "tavily", "serper"],
help="Web search provider. Default: duckduckgo.")
web.add_argument("--brave-api-key", default=None, metavar="KEY")
web.add_argument("--tavily-api-key", default=None, metavar="KEY")
web.add_argument("--serper-api-key", default=None, metavar="KEY")
web.add_argument("--web-max-results", type=int, default=DEFAULT_WEB_MAX_RESULTS,
help=f"Default web results per query. Default: {DEFAULT_WEB_MAX_RESULTS}")
return p.parse_args()
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Utility: model browser
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def print_models() -> None:
print("\nββ Embedding Models " + "β" * 55)
for key, cfg in EMBEDDING_MODELS.items():
marker = " β default" if key == DEFAULT_EMBEDDING_MODEL else ""
tier_badge = {"fast": "β‘", "balanced": "β ", "powerful": "π₯"}.get(cfg["tier"], "")
print(f"β {key:<20} {tier_badge} [{cfg['tier']:<9}] {cfg['label']}{marker}")
print(f"β {'':20} {cfg['description']}")
print("β" + "β" * 73)
def print_rerankers() -> None:
print("\nββ Reranker Models " + "β" * 55)
for key, cfg in RERANKER_MODELS.items():
marker = " β default" if key == DEFAULT_RERANKER else ""
print(f"β {key:<35} {cfg['label']}{marker}")
print(f"β {'':35} {cfg['description']}")
print("β" + "β" * 73)
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Bootstrap
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def bootstrap(args: argparse.Namespace) -> tuple[RAGEngine, WebSearchEngine | None]:
"""Initialize RAGEngine and optional WebSearchEngine."""
# Patch HyDE URL if overridden
if args.hyde and args.hyde_url:
import config as _cfg
_cfg.HYDE_LM_STUDIO_URL = args.hyde_url
log.info("β" * 60)
log.info("MCP RAG Server startingβ¦")
log.info("Documents : %s", args.docs)
log.info("Model : %s", args.embedding_model)
log.info("Strategy : %s (size=%d overlap=%d)",
args.chunk_strategy, args.chunk_size, args.chunk_overlap)
log.info("Hybrid : %s", "OFF" if args.no_hybrid else "ON (BM25+)")
log.info("Reranker : %s", args.reranker_model if args.reranker else "OFF")
log.info("HyDE : %s", "ON" if args.hyde else "OFF")
log.info("Web search: %s", args.search_provider if args.web_search else "OFF")
log.info("β" * 60)
engine = RAGEngine(
docs_path = args.docs,
collection_name = args.collection,
embedding_model = args.embedding_model,
chunk_strategy = args.chunk_strategy,
chunk_size = args.chunk_size,
chunk_overlap = args.chunk_overlap,
top_k = args.top_k,
min_score = args.min_score,
use_hybrid = not args.no_hybrid,
use_reranker = args.reranker,
reranker_model = args.reranker_model,
use_hyde = args.hyde,
)
engine.ingest()
web_engine: WebSearchEngine | None = None
if args.web_search:
web_engine = WebSearchEngine(
provider = args.search_provider,
brave_api_key = args.brave_api_key,
tavily_api_key = args.tavily_api_key,
serper_api_key = args.serper_api_key,
)
return engine, web_engine
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# MCP Server
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def build_server(
args: argparse.Namespace,
engine: RAGEngine,
web_engine: WebSearchEngine | None,
) -> Server:
server = Server("mcp-rag-server")
rag_cache = TTLCache[str](ttl_seconds=RAG_CACHE_TTL_SECONDS)
web_cache = TTLCache[str](ttl_seconds=WEB_CACHE_TTL_SECONDS)
@server.list_tools()
async def list_tools() -> list[types.Tool]:
tools = [
build_rag_tool_schema(args.top_k, args.min_score, engine),
build_ingest_tool_schema(),
]
if web_engine is not None:
tools.append(build_web_tool_schema(args.search_provider, args.web_max_results))
return tools
@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
if name == "rag_search":
return await handle_rag_search(
arguments, engine, rag_cache, RAG_CACHE_TTL_SECONDS
)
if name == "reindex_documents":
return await handle_reindex(engine)
if name == "web_search":
if web_engine is None:
return [types.TextContent(
type="text",
text="Web search is disabled. Restart with --web-search.",
)]
return await handle_web_search(
arguments, web_engine, web_cache,
WEB_CACHE_TTL_SECONDS, args.web_max_results,
)
return [types.TextContent(type="text", text=f"Unknown tool: '{name}'")]
return server
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Main
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
async def main() -> None:
args = parse_args()
if args.list_models:
print_models()
return
if args.list_rerankers:
print_rerankers()
return
if not args.docs:
print("ERROR: --docs is required.\n", file=sys.stderr)
print("Run with --list-models to browse available embedding models.", file=sys.stderr)
sys.exit(1)
engine, web_engine = bootstrap(args)
server = build_server(args, engine, web_engine)
log.info("MCP server ready β listening on stdio.")
async with stdio_server() as (read_stream, write_stream):
await server.run(
read_stream,
write_stream,
server.create_initialization_options(),
)
if __name__ == "__main__":
asyncio.run(main())