Project Files
src / lib / crawler.ts
import * as cheerio from "cheerio";
export async function fetchPage(url: string) {
const response = await fetch(url, {
headers: {
"User-Agent": "LMStudioCrawler/1.0"
}
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const html = await response.text();
const $ = cheerio.load(html);
$("script,style,noscript").remove();
const title =
$("title").first().text().trim() ||
$("h1").first().text().trim() ||
url;
const content = $("body")
.text()
.replace(/\s+/g, " ")
.trim()
.slice(0, 12000);
const links = $("a[href]")
.map((_, el) => $(el).attr("href"))
.get()
.filter(Boolean)
.slice(0, 50);
return {
title,
url,
content,
links
};
}