src / toolsProvider.ts
import { text, tool, ToolsProviderController } from "@lmstudio/sdk";
import puppeteer from "puppeteer";
import { z } from "zod";
async function getHTMLContent(url: string) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url, {
waitUntil: ["domcontentloaded", "networkidle0"],
timeout: 30 * 1000,
});
await page.setRequestInterception(true);
page.on("request", (req) => {
const shoudlSkipLoading = ["image", "media", "stylesheet"].includes(req.resourceType());
shoudlSkipLoading ? req.abort() : req.continue();
});
const data = await page.evaluate(() => {
function processNode(node: Node) {
if (node.nodeType === Node.COMMENT_NODE) {
node.parentNode?.removeChild(node);
return;
}
if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
if (node.nodeType === Node.TEXT_NODE && !node.textContent?.trim()) {
node.parentNode?.removeChild(node);
}
return;
}
const element = node as Element;
if (element.tagName.toUpperCase() === "IMG") {
return;
}
for (const child of Array.from(element.childNodes)) {
processNode(child);
}
if (!element.hasChildNodes() || element.childNodes.length === 0) {
element.parentNode?.removeChild(element);
}
}
function removeAttributesRecursively(element: Element) {
for (let i = element.attributes.length - 1; i >= 0; i--) {
const attrName = element.attributes[i].name.toUpperCase();
if (attrName !== "SRC" && attrName !== "HREF") {
element.removeAttribute(attrName);
}
}
for (const child of Array.from(element.children)) {
removeAttributesRecursively(child);
}
}
const tagsToRemove = [
"script",
"noscript",
"style",
"link",
"form",
"input",
"textarea",
"select",
"button",
"header",
"footer",
"nav",
"svg",
"iframe",
"object",
"embed",
"audio",
"video",
"canvas",
"hr",
];
document.querySelectorAll(tagsToRemove.join()).forEach((el) => el.remove());
processNode(document.body);
removeAttributesRecursively(document.body);
return document.body.outerHTML;
});
await browser.close();
return data;
}
export async function toolsProvider(ctl: ToolsProviderController) {
const fetch = tool({
name: "fetch",
description: text`
Fetches the content of a web page at the given \`url\`, cleans the markup, and returns the result.
Use this tool when you need to retrieve and read the full contents of a webpage.
`,
parameters: { url: z.string().url() },
implementation: async ({ url }) => {
return await getHTMLContent(url);
},
});
const fetchAll = tool({
name: "fetch_all",
description: text`
Fetches the content of multiple web pages at the given \`urls\`, cleans the markup, and returns the results.
Use this tool when you need to retrieve and read the full contents of multiple webpages efficiently in a single call.
`,
parameters: { urls: z.array(z.string().url()) },
implementation: async ({ urls }) => {
return await Promise.all(urls.map(getHTMLContent));
},
});
return [fetch, fetchAll];
}