import type { Tool } from '@lmstudio/sdk';
import { tool } from '@lmstudio/sdk';
import { z } from 'zod';
import { search as ddgSearch } from 'duck-duck-scrape';
import { htmlToText } from 'html-to-text';
import type { PluginConfig } from '../config.js';
import { fetchWithRetry } from '../performanceUtils.js';
// ==================== Search Engine Implementations ====================
interface SearchResultItem {
title: string;
url: string;
description: string;
}
/** DuckDuckGo API (fastest, no browser needed) */
async function searchDDGApi(query: string): Promise<SearchResultItem[]> {
const results = await ddgSearch(query, { region: 'wt-wt' });
return (results.results as Array<Record<string, unknown>>).map((r: Record<string, unknown>) => ({
title: r.title as string,
url: r.url as string,
description: (r.description as string) || '',
}));
}
/** DuckDuckGo HTML Fetch (fallback when API fails) */
async function searchDDGFetch(query: string): Promise<SearchResultItem[]> {
const response = await fetchWithRetry(
`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`
);
if (!response.ok) throw new Error(`DuckDuckGo Fetch failed: ${response.status}`);
const html = await response.text();
// Simple regex-based parsing for Node.js (no DOMParser needed!)
const results: SearchResultItem[] = [];
// Extract titles from <a class="result__a" href="..." rel="...">Title</a>
const titleRegex = /<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>([^<]+)<\/a>/gi;
let match;
while ((match = titleRegex.exec(html)) !== null) {
results.push({
title: match[2].replace(/&/g, '&').trim(),
url: match[1],
description: '',
});
}
return results.slice(0, 10);
}
/** Google Search via HTML Fetch */
async function searchGoogle(query: string): Promise<SearchResultItem[]> {
const response = await fetchWithRetry(
`https://www.google.com/search?q=${encodeURIComponent(query)}&num=10`,
{ headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } }
);
if (!response.ok) throw new Error(`Google search failed: ${response.status}`);
const html = await response.text();
// Simple parsing — extract titles and URLs from Google's HTML structure
const results: SearchResultItem[] = [];
const titleRegex = /<h3[^>]*>(.*?)<\/h3>/g;
let match;
while ((match = titleRegex.exec(html)) !== null) {
results.push({
title: match[1].replace(/<[^>]*>/g, ''), // Remove HTML tags
url: '',
description: '',
});
}
return results.slice(0, 10);
}
/** Bing Search via HTML Fetch */
async function searchBing(query: string): Promise<SearchResultItem[]> {
const response = await fetchWithRetry(
`https://www.bing.com/search?q=${encodeURIComponent(query)}&count=10`,
{ headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } }
);
if (!response.ok) throw new Error(`Bing search failed: ${response.status}`);
const html = await response.text();
// Parse Bing results — similar approach to Google
const results: SearchResultItem[] = [];
const resultRegex = /<li class="b_algo"[^>]*>(.*?)<\/li>/gs;
let match;
while ((match = resultRegex.exec(html)) !== null) {
const block = match[1];
const titleMatch = block.match(/<a[^>]+href="([^"]+)"[^>]*>([^<]+)<\/a>/);
if (titleMatch) {
results.push({
title: titleMatch[2],
url: titleMatch[1],
description: '',
});
}
}
return results.slice(0, 10);
}
/** All available Search Engine Functions */
const SEARCH_ENGINES: Record<string, (query: string) => Promise<SearchResultItem[]>> = {
'ddg-api': searchDDGApi,
'ddg-fetch': searchDDGFetch,
'google': searchGoogle,
'bing': searchBing,
};
/** Hardcoded fallback order (when primary engine fails) */
const FALLBACK_ORDER = ['ddg-api', 'ddg-fetch', 'google', 'bing'];
// ==================== Fallback Chain Logic ====================
/**
* Web search with automatic fallback.
* Starts with the Config engine and automatically tries the next in the chain.
*/
async function searchWithFallbackChain(
query: string,
config: PluginConfig
): Promise<{ success: boolean; data?: { query: string; results: SearchResultItem[]; count: number; engine: string }; error?: string }> {
// Start engine from Config (Single Select)
const primaryEngine = config.searchFallbackChain || 'ddg-api';
// Fallback chain: primary engine + all others in defined order
const chain = [primaryEngine, ...FALLBACK_ORDER.filter(e => e !== primaryEngine)];
for (const engine of chain) {
try {
const searchFn = SEARCH_ENGINES[engine];
if (!searchFn) {
console.warn(`Search engine "${engine}" not found, skipping`);
continue;
}
const results = await searchFn(query);
// Validate result count - warn if low results
if (results.length < 2) {
console.warn(`Low search results for "${query}": ${results.length} results from ${engine}`);
}
return {
success: true,
data: { query, results, count: results.length, engine },
};
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
console.warn(`Search engine "${engine}" failed: ${message}`);
// Try next engine in the chain
continue;
}
}
return {
success: false,
error: `All search engines failed. Tried: ${chain.join(' → ')}`,
};
}
// ==================== Typed Params Interfaces ====================
interface WebSearchParams { query: string; }
interface WikipediaSearchParams { query: string; lang?: string; }
interface FetchWebContentParams { url: string; }
interface RagWebContentParams { url: string; query: string; }
export function registerWebResearchTools(config: PluginConfig): Tool[] {
const tools: Tool[] = [];
// web_search tool — uses primary engine from Config + automatic fallback
tools.push(tool({
name: 'web_search',
description: 'Search the web using a configurable search engine with automatic fallback to other engines if the primary one fails.',
parameters: {
query: z.string().describe('The search query'),
},
implementation: async ({ query }: WebSearchParams) => { // C5 FIX: typed params
return await searchWithFallbackChain(query, config);
},
}));
// wikipedia_search tool
tools.push(tool({
name: 'wikipedia_search',
description: 'Search Wikipedia for a given query and return page summaries.',
parameters: {
query: z.string().describe('The search query'),
lang: z.string().optional().default('en').describe('Language code (default: en)'),
},
implementation: async ({ query, lang }: WikipediaSearchParams) => { // C5 FIX: typed params
try {
const apiUrl = `https://${lang || 'en'}.wikipedia.org/w/api.php?action=query&list=search&srsearch=${encodeURIComponent(query)}&format=json&origin=*`;
const response = await fetchWithRetry(apiUrl);
if (!response.ok) {
throw new Error(`Wikipedia API error: ${response.status}`);
}
const data = (await response.json()) as Record<string, unknown>;
const queryData = data.query as Record<string, unknown> | undefined;
const searchResults = (queryData?.search as Array<Record<string, unknown>>) || [];
const pages = searchResults.map((item: Record<string, unknown>) => {
const title = typeof item.title === 'string' ? item.title : '';
const snippet = typeof item.snippet === 'string' ? item.snippet.replace(/<[^>]*>/g, '') : '';
return {
title,
snippet,
url: `https://${lang || 'en'}.wikipedia.org/wiki/${encodeURIComponent(title)}`,
};
});
return { success: true, data: { query, language: lang || 'en', results: pages, count: pages.length } };
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return { success: false, error: `Wikipedia search failed: ${message}` };
}
},
}));
// fetch_web_content tool
tools.push(tool({
name: 'fetch_web_content',
description: 'Fetch the clean, text-based content of a webpage URL.',
parameters: {
url: z.string().url().describe('The URL to fetch'),
},
implementation: async ({ url }: FetchWebContentParams) => { // C5 FIX: typed params
try {
const response = await fetchWithRetry(url);
if (!response.ok) {
throw new Error(`HTTP error: ${response.status}`);
}
const html = await response.text();
const text = htmlToText(html, {
wordwrap: false,
// Removed custom selector for 'img' as it caused "format is not a function" in v9.0.5
// Default behavior handles images adequately.
});
return { success: true, data: { url, content: text.substring(0, 5000) } }; // Limit length
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return { success: false, error: `Failed to fetch content: ${message}` };
}
},
}));
// rag_web_content tool
tools.push(tool({
name: 'rag_web_content',
description: 'Fetch content from a URL, and then use RAG to find and return only the text chunks most relevant to a specific query.',
parameters: {
url: z.string().url().describe('The URL to fetch'),
query: z.string().describe('The search query for relevance matching'),
},
implementation: async ({ url, query }: RagWebContentParams) => { // C5 FIX: typed params
try {
const response = await fetchWithRetry(url);
if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
const html = await response.text();
const text = htmlToText(html);
// Simple keyword-based relevance scoring (placeholder for real RAG)
const queryTerms = query.toLowerCase().split(/\s+/).filter((t: string) => t.length > 2);
const sentences = text.split(/[.!?]+/).map((s: string) => s.trim()).filter(Boolean);
const relevantChunks = sentences.filter((sentence: string) => {
return queryTerms.some((term: string) => sentence.toLowerCase().includes(term));
}).slice(0, 5); // Return top 5 hits
return { success: true, data: { url, query, chunks: relevantChunks } };
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return { success: false, error: `RAG search failed: ${message}` };
}
},
}));
return tools;
}