Forked from brdcastro/maestro
"use strict";
/**
* @file webTools.ts
* Web tools: search, fetch content, Wikipedia.
* Search logic lives in webSearch.ts (shared with secondary agent).
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.createWebTools = createWebTools;
const sdk_1 = require("@lmstudio/sdk");
const zod_1 = require("zod");
const shared_1 = require("./shared");
const webSearch_1 = require("./webSearch");
const errorCodes_1 = require("./errorCodes");
const spillToDisk_1 = require("./spillToDisk");
/** Strip HTML to readable plain text. Shared by fetch_web_content and deep_research. */
function stripHtmlToText(html) {
let text = html;
let prev;
do {
prev = text.length;
text = text.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, "");
} while (text.length < prev);
text = text.replace(/<nav\b[^>]*>[\s\S]*?<\/nav>/gi, "").replace(/<footer\b[^>]*>[\s\S]*?<\/footer>/gi, "").replace(/<header\b[^>]*>[\s\S]*?<\/header>/gi, "").replace(/<aside\b[^>]*>[\s\S]*?<\/aside>/gi, "");
text = text.replace(/<\/div>/gi, "\n").replace(/<\/p>/gi, "\n").replace(/<br\s*\/?>/gi, "\n");
do {
prev = text.length;
text = text.replace(/<[^>]+>/g, "");
} while (text.length < prev);
text = text.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/ /g, " ").replace(/&/g, "&");
return text.replace(/[ \t]+/g, " ").replace(/\n\s*\n/g, "\n\n").trim();
}
function createWebTools(config, limits) {
const tools = [];
const searchDescription = config.searxngUrl
? "Search the web using SearXNG."
: "Search the web using DuckDuckGo.";
tools.push((0, sdk_1.tool)({
name: "web_search",
description: searchDescription,
parameters: { query: zod_1.z.string() },
implementation: async ({ query }) => {
// Strategy 1: SearXNG (if configured)
if (config.searxngUrl) {
const results = await (0, webSearch_1.searchSearXNG)(config.searxngUrl, query);
if (results)
return { results, total_found: results.length, source: "searxng" };
// SearXNG failed β fall through to DDG
}
// Strategy 2: DDG Lite
const results = await (0, webSearch_1.searchDDGLite)(query);
if (results)
return { results, total_found: results.length, source: "duckduckgo" };
return { ...(0, errorCodes_1.toolError)(errorCodes_1.SERVICE_UNAVAILABLE, "Search temporarily unavailable. Try again in a few minutes."), results: [] };
},
}));
tools.push((0, sdk_1.tool)({
name: "fetch_web_content",
description: "Fetch the clean, text-based content of a webpage URL.",
parameters: { url: zod_1.z.string() },
implementation: async ({ url }) => {
try {
const response = await fetch(url);
if (!response.ok)
throw new Error(`HTTP error! status: ${response.status}`);
const raw = await response.text();
const result = { url, status: response.status };
const titleMatch = raw.match(/<title[^>]*>([^<]+)<\/title>/i);
if (titleMatch)
result.title = titleMatch[1];
const text = stripHtmlToText(raw);
const MAX = limits?.maxWebContent ?? 6_000;
const spill = await (0, spillToDisk_1.spillIfNeeded)(text, MAX, "web");
result.content = spill.preview;
if (spill.spilled)
result.content_full = spill.spillPath;
return result;
}
catch (error) {
return (0, errorCodes_1.toolError)(errorCodes_1.HTTP_ERROR, `Failed to fetch URL: ${error instanceof Error ? error.message : String(error)}`);
}
},
}));
tools.push((0, sdk_1.tool)({
name: "wikipedia_search",
description: "Search Wikipedia for a given query and return page summaries.",
parameters: {
query: zod_1.z.string(),
lang: zod_1.z.string().optional().describe("Language code (default: en)"),
},
implementation: (0, shared_1.createSafeToolImplementation)(async ({ query, lang = "en" }) => {
try {
const searchUrl = `https://${lang}.wikipedia.org/w/api.php?action=query&list=search&srsearch=${encodeURIComponent(query)}&format=json`;
const searchData = await (await fetch(searchUrl)).json();
if (!searchData.query?.search?.length)
return { results: "No Wikipedia articles found." };
// Fetch pages in parallel instead of serial
const pages = await Promise.all(searchData.query.search.slice(0, 3).map(async (item) => {
const pageUrl = `https://${lang}.wikipedia.org/w/api.php?action=query&prop=extracts&exintro&explaintext&pageids=${item.pageid}&format=json`;
const pageData = await (await fetch(pageUrl)).json();
const page = pageData.query.pages[item.pageid];
return {
title: item.title,
summary: page.extract.substring(0, limits?.maxWikiSummary ?? 2_000) + (page.extract.length > (limits?.maxWikiSummary ?? 2_000) ? "..." : ""),
url: `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(item.title.replace(/ /g, "_"))}`,
};
}));
return { results: pages };
}
catch (error) {
return (0, errorCodes_1.toolError)(errorCodes_1.SERVICE_UNAVAILABLE, `Wikipedia search failed: ${error instanceof Error ? error.message : String(error)}`);
}
}, config.enableWikipedia, "wikipedia_search"),
}));
tools.push((0, sdk_1.tool)({
name: "deep_research",
description: "Multi-angle web research. You provide 2-5 sub-questions that decompose a topic; " +
"the tool searches each, fetches the top source, and returns a structured findings " +
"bundle for you to synthesize into an answer with citations. " +
"Free (SearXNG/DuckDuckGo) β no API key. Each finding is hard-truncated so the whole " +
"bundle stays context-safe. " +
"Use when a single web_search is not enough (comparisons, multi-part questions, " +
"topics with several angles). For one factual lookup, use web_search instead.",
parameters: {
topic: zod_1.z.string().describe("The overall research topic β used to frame your synthesis."),
questions: zod_1.z.array(zod_1.z.string()).min(2).max(5).describe("2-5 sub-questions decomposing the topic from DISTINCT angles. You generate these β " +
"think of the separate things you'd need to know to answer the topic well."),
},
implementation: async ({ topic, questions }) => {
// Total bundle budget β one fetch_web_content call, split across questions.
const perFinding = Math.max(800, Math.floor((limits?.maxWebContent ?? 6_000) / questions.length));
const findings = await Promise.all(questions.map(async (question) => {
// Search: SearXNG if configured, else DuckDuckGo. Top 3 results.
let results = config.searxngUrl ? await (0, webSearch_1.searchSearXNG)(config.searxngUrl, question, 3) : null;
if (!results)
results = await (0, webSearch_1.searchDDGLite)(question, 3);
if (!results || results.length === 0) {
return { question, error: "No search results for this sub-question." };
}
const top = results[0];
const otherSources = results.slice(1, 3).map(r => r.link).filter(Boolean);
// Fetch + strip the top source. Fall back to the search snippet on failure.
let extract = "";
try {
const resp = await fetch(top.link, { signal: AbortSignal.timeout(15_000) });
if (resp.ok) {
const stripped = stripHtmlToText(await resp.text());
extract = stripped.length > perFinding ? stripped.slice(0, perFinding) + " [β¦truncated]" : stripped;
}
}
catch { /* fetch failed β fall back to snippet below */ }
if (!extract) {
extract = `(could not fetch full page β search snippet only) ${top.snippet || ""}`.slice(0, perFinding);
}
return { question, top_source: top.link, top_title: top.title, extract, other_sources: otherSources };
}));
return {
topic,
questions_researched: questions.length,
findings,
synthesis: "Synthesize these findings into an answer to the topic. Cite sources by URL. " +
"Where a finding has an error or thin extract, note the gap instead of inventing facts.",
};
},
}));
return tools;
}
//# sourceMappingURL=webTools.js.map