Forked from npacker/web-tools
Project Files
src / tools / web-search-tool.ts
/**
* Web Search tool factory.
*/
import { tool, type Tool, type ToolsProviderController } from "@lmstudio/sdk"
import { z } from "zod"
import { webSearchCacheKey, type TTLCache, type WebSearchResultsPayload } from "../cache"
import { resolveConfig } from "../config/resolve-config"
import { searchWeb, type WebSearchResult } from "../duckduckgo"
import { enrichSearchResults, type EnrichmentMetascraper } from "../enrichment"
import { formatToolError, NoWebResultsError } from "../errors"
import { createRetryNotifier } from "../http"
import { searchPageParameter } from "../search"
import type { RetryOptions } from "../http"
import type { FetchedPage } from "../page"
import type { PerHostRateLimiter, RateLimiter } from "../timing"
import type { Impit } from "impit"
/**
* Build the Web Search tool, which queries DuckDuckGo, optionally enriches each result through the
* shared metascraper instance, caches results, and returns structured records.
*
* @param ctl - Tools provider controller supplied by the LM Studio SDK.
* @param impit - Shared HTTP client used for outbound requests.
* @param searchCache - Cache holding prior web search results.
* @param pageCache - Cache holding recent fetched pages keyed by URL; reused for per-result enrichment.
* @param rateLimiter - Shared limiter enforcing the minimum gap between outbound requests.
* @param hostLimiter - Per-host limiter enforcing the minimum gap between requests to the same host.
* @param scraper - Shared metascraper instance used to extract metadata for each result.
* @param retry - Retry policy applied to every outbound request.
* @returns The configured web search tool.
*/
export function createWebSearchTool(
ctl: ToolsProviderController,
impit: Impit,
searchCache: TTLCache<WebSearchResultsPayload>,
pageCache: TTLCache<FetchedPage>,
rateLimiter: RateLimiter,
hostLimiter: PerHostRateLimiter,
scraper: EnrichmentMetascraper,
retry: RetryOptions
): Tool {
return tool({
name: "Web Search",
description:
"Search for web pages on DuckDuckGo using a query string, returning a list of URLs with titles, snippet previews, and metadata fields (page date, OpenGraph type, description) extracted from each result page.",
parameters: {
query: z.string().describe("The search query for finding web pages."),
page: searchPageParameter,
},
/**
* Execute a web search, honouring cached results when available and enriching each
* fresh result with metascraper-extracted metadata before caching the payload.
*
* @param arguments_ - Validated tool parameters.
* @param context - Runtime tool context supplied by the SDK.
* @returns Either the enriched result records or a user-facing error string.
*/
implementation: async (arguments_, context) => {
const { query, page } = arguments_
context.status("Initiating web search...")
try {
const { webMaxResults, webPageStride, safeSearch, includeSnippets, enrichResults, maxResponseBytes } =
resolveConfig(ctl)
/**
* Remove `snippet` from every record when snippets are disabled.
*
* @param results - Records being prepared for the tool response.
* @returns Records with `snippet` removed when disabled, otherwise unchanged.
*/
const stripSnippets = (results: WebSearchResult[]): WebSearchResult[] =>
includeSnippets ? results : results.map(({ snippet: _snippet, ...rest }) => rest)
const cacheKey = webSearchCacheKey(query, safeSearch, page, webMaxResults, enrichResults)
const cached = await searchCache.get(cacheKey)
if (cached !== undefined) {
context.status(`Found ${cached.count} web pages (cached).`)
return { results: stripSnippets(cached.results), count: cached.count }
}
const parameters = { query, pageStride: webPageStride, safeSearch, page }
let results = await searchWeb(impit, parameters, webMaxResults, {
signal: context.signal,
retry,
onFailedAttempt: createRetryNotifier(context.status, "web search"),
limiter: rateLimiter,
})
if (results.length === 0) {
throw new NoWebResultsError(query)
}
if (enrichResults) {
context.status(`Found ${results.length} web pages. Enriching metadata...`)
results = await enrichSearchResults(results, scraper, impit, pageCache, hostLimiter, {
signal: context.signal,
retry: { ...retry, retries: 0 },
status: context.status,
maxBytes: maxResponseBytes,
})
context.status(`Enriched ${results.length} results.`)
} else {
context.status(`Found ${results.length} web pages.`)
}
const payload: WebSearchResultsPayload = { results, count: results.length }
await searchCache.set(cacheKey, payload)
return { results: stripSnippets(results), count: results.length }
} catch (error) {
return formatToolError(error, context, "web-search")
}
},
})
}