import { type DocEntry } from "./docs/openai";
interface VectorDB {
entries: { doc: DocEntry; tokens: string[] }[];
}
function tokenize(text: string): string[] {
return text
.toLowerCase()
.replace(/[^a-zа-яё0-9\s]/gi, " ")
.split(/\s+/)
.filter(w => w.length > 2)
.filter(w => !STOP_WORDS.has(w));
}
const STOP_WORDS = new Set([
"the", "and", "for", "are", "but", "not", "you", "all", "can", "had",
"her", "was", "one", "our", "out", "has", "have", "been", "from",
"this", "that", "with", "will", "each", "make", "like", "just",
"over", "such", "more", "than", "them", "very", "when", "what",
"which", "their", "there", "about", "string", "number", "type",
"object", "array", "boolean", "required", "default", "optional",
"example", "response", "request", "body", "header", "model",
"token", "tokens", "using", "should", "would", "could",
]);
function buildIndex(docs: DocEntry[]): VectorDB {
return {
entries: docs.map(doc => ({
doc,
tokens: tokenize(
doc.title + " " +
doc.content + " " +
doc.keywords.join(" ") + " " +
doc.provider + " " +
doc.category
),
})),
};
}
function bm25Score(queryTokens: string[], docTokens: string[], k1: number = 1.5, b: number = 0.75): number {
const docLen = docTokens.length;
const avgDocLen = 200;
let score = 0;
for (const qt of queryTokens) {
let freq = 0;
let docsWithTerm = 0;
for (const dt of docTokens) {
if (dt === qt) freq++;
}
docsWithTerm = freq > 0 ? 1 : 0;
const idf = Math.log((100 - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1);
const tf = (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * (docLen / avgDocLen)));
score += idf * tf;
}
return score;
}
function searchRAG(db: VectorDB, query: string, topN: number = 5): { doc: DocEntry; score: number }[] {
const queryTokens = tokenize(query);
const scored = db.entries
.map(entry => ({
doc: entry.doc,
score: bm25Score(queryTokens, entry.tokens),
}))
.filter(x => x.score > 0)
.sort((a, b) => b.score - a.score)
.slice(0, topN);
return scored;
}
let _vectorDB: VectorDB | null = null;
export function getVectorDB(docs: DocEntry[]): VectorDB {
if (!_vectorDB) {
_vectorDB = buildIndex(docs);
}
return _vectorDB;
}
export function searchDocsRAG(docs: DocEntry[], query: string, provider?: string, category?: string, topN: number = 5): string {
let filtered = docs;
if (provider) {
const p = provider.toLowerCase();
const providerMap: Record<string, string> = {
"openai": "openai", "chatgpt": "openai", "gpt": "openai",
"anthropic": "anthropic", "claude": "anthropic",
"minimax": "minimax",
"gemini": "gemini", "google": "gemini", "bard": "gemini",
"mistral": "mistral",
"cohere": "cohere",
"groq": "groq",
"together": "together", "togetherai": "together",
"deepseek": "deepseek",
"perplexity": "perplexity", "pplx": "perplexity",
"xai": "xai", "grok": "xai",
};
const mapped = providerMap[p];
if (mapped) {
filtered = filtered.filter(d => d.provider === mapped);
}
}
if (category) {
const c = category.toLowerCase();
filtered = filtered.filter(d => d.category === c || d.keywords.some((k: string) => k.toLowerCase().includes(c)));
}
const db = buildIndex(filtered);
const results = searchRAG(db, query, topN);
if (results.length === 0) {
let msg = `No documentation found for "${query}".\n\n`;
msg += "Available providers: OpenAI, Anthropic (Claude), MiniMax, Google Gemini, Mistral AI, Cohere, Groq, Together AI, DeepSeek, Perplexity, xAI (Grok)\n";
msg += "Available categories: chat, tools, embeddings, multimodal, output, limits, optimization, rerank, completion\n";
msg += "\nTry a more specific query like:\n";
msg += '- "how to call functions in OpenAI"\n';
msg += '- "Claude tool use format"\n';
msg += '- "Gemini multimodal image input"\n';
msg += '- "Mistral chat API endpoint"\n';
msg += '- "Cohere rerank API"\n';
msg += '- "Groq fast inference"\n';
msg += '- "DeepSeek pricing"\n';
msg += '- "Perplexity web search"\n';
msg += '- "xAI Grok chat"\n';
return msg;
}
const PROVIDER_NAMES: Record<string, string> = {
openai: "OpenAI (ChatGPT)",
anthropic: "Anthropic (Claude)",
minimax: "MiniMax",
gemini: "Google Gemini",
mistral: "Mistral AI",
cohere: "Cohere",
groq: "Groq",
together: "Together AI",
deepseek: "DeepSeek",
perplexity: "Perplexity",
xai: "xAI (Grok)",
};
let result = `Found ${results.length} documentation result(s) for "${query}" (RAG search):\n\n`;
results.forEach(({ doc, score }, i) => {
result += `## ${i + 1}. ${doc.title} [${PROVIDER_NAMES[doc.provider] || doc.provider}]\n`;
result += `Category: ${doc.category} | Relevance: ${score.toFixed(2)}\n`;
result += `Keywords: ${doc.keywords.join(", ")}\n\n`;
result += doc.content;
result += "\n\n---\n\n";
});
return result;
}