dist / index.js
"use strict";
// @ts-nocheck
Object.defineProperty(exports, "__esModule", { value: true });
exports.main = main;
const sdk_1 = require("@lmstudio/sdk");
const zod_1 = require("zod");
const DEFAULT_TIMEOUT_MS = 15000;
const DEFAULT_MAX_ROUNDS = 2;
const DEFAULT_MAX_SEARCHES_PER_ROUND = 4;
const DEFAULT_MAX_RESULTS_PER_SEARCH = 6;
const DEFAULT_MAX_PAGES = 12;
const DEFAULT_MAX_DEPTH = 2;
const DEFAULT_MAX_CHARS_PER_PAGE = 16000;
const MAX_LINKS_PER_PAGE = 12;
const USER_AGENT = "Mozilla/5.0 (compatible; LMStudioDeepResearch/2.0; +https://lmstudio.ai)";
const BLOCKED_HOSTS = ["facebook.com", "instagram.com", "x.com", "twitter.com", "tiktok.com", "reddit.com", "pinterest.com", "linkedin.com", "snapchat.com", "discord.com", "discord.gg", "tumblr.com", "quora.com", "fandom.com", "youtube.com", "youtu.be", "twitch.tv", "onlyfans.com"];
const BLOCKED_URL_PARTS = ["/share", "/sharer", "/intent/", "/status/", "/posts/", "/reels/", "/shorts/", "/video/", "/watch?", "/watch/", "/tiktok.com/", "/redd.it/"];
const BOILERPLATE_LINK_TEXT = ["home", "menu", "log in", "login", "sign in", "sign up", "subscribe", "newsletter", "privacy", "terms", "cookies", "cookie policy", "accept cookies", "contact", "about us", "about", "sitemap", "search", "share", "follow", "read more", "learn more"];
const STOP_WORDS = new Set(["the", "and", "for", "with", "that", "this", "from", "into", "about", "what", "when", "where", "which", "who", "how", "why", "can", "could", "would", "should", "please", "need", "want", "best", "latest", "current", "new", "old", "vs", "via", "of", "to", "in", "on", "by", "as", "is", "are", "be", "it", "or", "an", "a"]);
let client = null;
function getClient() { if (!client)
client = new sdk_1.LMStudioClient(); return client; }
function clampInt(value, fallback, min, max) {
let parsed;
if (typeof value === "number" && Number.isFinite(value))
parsed = Math.trunc(value);
else if (typeof value === "string" && /^-?\d+$/.test(value.trim()))
parsed = Math.trunc(Number(value));
else
parsed = fallback;
return Math.max(min, Math.min(max, parsed));
}
function clampFloat(value, fallback, min, max) {
let parsed;
if (typeof value === "number" && Number.isFinite(value))
parsed = value;
else if (typeof value === "string" && /^-?\d+(?:\.\d+)?$/.test(value.trim()))
parsed = Number(value);
else
parsed = fallback;
return Math.max(min, Math.min(max, parsed));
}
function normalizeWhitespace(value) { return value.replace(/\s+/g, " ").trim(); }
function trimString(value, maxChars) {
const normalized = value.trim();
if (normalized.length <= maxChars)
return normalized;
return `${normalized.slice(0, Math.max(0, maxChars - 1)).trimEnd()}…`;
}
function decodeHtmlEntities(value) {
return value.replace(/ /gi, " ").replace(/&/gi, "&").replace(/"/gi, '"').replace(/'/gi, "'").replace(/'/gi, "'").replace(/</gi, "<").replace(/>/gi, ">").replace(/'/gi, "'").replace(///gi, "/").replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))).replace(/&#x([0-9a-f]+);/gi, (_, n) => String.fromCharCode(parseInt(n, 16)));
}
function getHostname(url) { try {
return new URL(url).hostname.replace(/^www\./i, "").toLowerCase();
}
catch {
return "";
} }
function getRootDomain(hostname) { const parts = hostname.split(".").filter(Boolean); if (parts.length <= 2)
return hostname; return parts.slice(-2).join("."); }
function normalizeUrl(url) {
try {
const parsed = new URL(url);
parsed.hash = "";
if (parsed.pathname !== "/" && parsed.pathname.endsWith("/")) {
parsed.pathname = parsed.pathname.replace(/\/+$/, "");
if (!parsed.pathname)
parsed.pathname = "/";
}
return parsed.toString();
}
catch {
return normalizeWhitespace(url);
}
}
function resolveUrl(raw, baseUrl) {
const trimmed = raw.trim();
if (!trimmed)
return null;
if (/^(javascript|mailto|tel|data):/i.test(trimmed))
return null;
try {
return new URL(trimmed, baseUrl).toString();
}
catch {
return null;
}
}
function isBlockedHost(url) { const host = getHostname(url); return BLOCKED_HOSTS.some((blocked) => host === blocked || host.endsWith(`.${blocked}`)); }
function isBlockedUrl(url) { const lower = url.toLowerCase(); if (isBlockedHost(url))
return true; return BLOCKED_URL_PARTS.some((part) => lower.includes(part)); }
function scoreDomain(host) {
if (!host)
return 0;
const lower = host.toLowerCase();
if (lower.endsWith(".gov") || lower.endsWith(".edu") || lower.endsWith(".ac.uk"))
return 8;
if (lower.includes("nih.gov") || lower.includes("who.int") || lower.includes("arxiv.org"))
return 7;
if (lower.includes("wikipedia.org"))
return 5;
if (lower.includes("docs.") || lower.includes("developer."))
return 4;
return 1;
}
function sourceTypeForHost(host) {
const lower = host.toLowerCase();
if (BLOCKED_HOSTS.some((blocked) => lower === blocked || lower.endsWith(`.${blocked}`)))
return "blocked";
if (lower.endsWith(".gov") || lower.endsWith(".edu") || lower.includes("nih.gov"))
return "authoritative";
if (lower.includes("wikipedia.org"))
return "reference";
return "web";
}
function tokenizeQuery(query) { return Array.from(new Set(query.toLowerCase().split(/[^a-z0-9]+/i).filter((part) => part.length >= 3 && !STOP_WORDS.has(part)))); }
function countOccurrences(haystack, needle) {
if (!needle)
return 0;
let count = 0, start = 0;
while (true) {
const idx = haystack.indexOf(needle, start);
if (idx === -1)
break;
count += 1;
start = idx + needle.length;
}
return count;
}
function scoreText(text, tokens) {
const normalized = text.toLowerCase();
let score = 0;
for (const token of tokens)
score += countOccurrences(normalized, token);
return score;
}
function isBoilerplateLine(line) {
const lower = line.toLowerCase();
if (!lower || lower.length <= 2)
return true;
if (BOILERPLATE_LINK_TEXT.some((part) => lower === part || lower.includes(` ${part}`) || lower.startsWith(`${part} `)))
return true;
if (/(cookie|privacy|terms|subscribe|login)/i.test(lower) && lower.length < 90)
return true;
return false;
}
function stripTags(html) {
let t = html;
t = t.replace(/<!--[\s\S]*?-->/g, " ");
t = t.replace(/<script[\s\S]*?<\/script>/gi, " ");
t = t.replace(/<style[\s\S]*?<\/style>/gi, " ");
t = t.replace(/<noscript[\s\S]*?<\/noscript>/gi, " ");
t = t.replace(/<svg[\s\S]*?<\/svg>/gi, " ");
t = t.replace(/<iframe[\s\S]*?<\/iframe>/gi, " ");
t = t.replace(/<nav[\s\S]*?<\/nav>/gi, " ");
t = t.replace(/<footer[\s\S]*?<\/footer>/gi, " ");
t = t.replace(/<header[\s\S]*?<\/header>/gi, " ");
t = t.replace(/<form[\s\S]*?<\/form>/gi, " ");
t = t.replace(/<aside[\s\S]*?<\/aside>/gi, " ");
t = t.replace(/<br\s*\/?>/gi, "\n");
t = t.replace(/<\/(p|div|li|section|article|tr|table|blockquote|h[1-6])>/gi, "\n");
t = t.replace(/<li\b[^>]*>/gi, "• ");
t = t.replace(/<h[1-6]\b[^>]*>/gi, "\n");
t = t.replace(/<[^>]+>/g, " ");
t = decodeHtmlEntities(t);
const lines = t.replace(/\r/g, "").replace(/\u00a0/g, " ").split("\n").map(l => normalizeWhitespace(l)).filter(l => l.length > 0 && !isBoilerplateLine(l));
const seen = new Set();
return lines.filter(l => { const k = l.toLowerCase(); if (seen.has(k))
return false; seen.add(k); return true; }).join("\n").replace(/\n{3,}/g, "\n\n").trim();
}
function extractMetaDescription(html) {
const match = html.match(/<meta[^>]+name=["']description["'][^>]*content=["']([^"']+)["'][^>]*>/i) ?? html.match(/<meta[^>]+property=["']og:description["'][^>]*content=["']([^"']+)["'][^>]*>/i);
return match ? normalizeWhitespace(stripTags(match[1])) : "";
}
function extractTitle(html) {
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
if (titleMatch)
return normalizeWhitespace(stripTags(titleMatch[1]));
const ogTitle = html.match(/<meta[^>]+property=["']og:title["'][^>]*content=["']([^"']+)["'][^>]*>/i);
return ogTitle ? normalizeWhitespace(stripTags(ogTitle[1])) : "";
}
function extractCanonical(html) {
const match = html.match(/<link[^>]+rel=["']canonical["'][^>]*href=["']([^"']+)["'][^>]*>/i);
return match ? match[1].trim() : "";
}
function extractAnchors(html, baseUrl, limit) {
const anchors = [];
const seen = new Set();
const anchorRegex = /<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi;
let match;
while ((match = anchorRegex.exec(html)) !== null) {
const resolved = resolveUrl(match[1], baseUrl);
if (!resolved)
continue;
const normalized = normalizeUrl(resolved);
if (seen.has(normalized) || !/^https?:/i.test(normalized) || isBlockedUrl(normalized))
continue;
const text = normalizeWhitespace(stripTags(match[2]));
const host = getHostname(normalized);
let score = 0;
if (!text || BOILERPLATE_LINK_TEXT.some((part) => text.toLowerCase() === part))
score -= 3;
score += scoreDomain(host);
if (getRootDomain(host) === getRootDomain(getHostname(baseUrl)))
score += 4;
anchors.push({ url: normalized, text, score });
seen.add(normalized);
if (anchors.length >= limit * 2)
break;
}
return anchors.sort((a, b) => b.score - a.score).slice(0, limit);
}
function extractUsefulSentences(text, tokens, maxSentences = 4) {
const sentences = normalizeWhitespace(text).split(/(?<=[.!?])\s+/).map((part) => part.trim()).filter(Boolean);
const scored = sentences.map((sentence) => ({ sentence, score: scoreText(sentence, tokens) + Math.min(3, sentence.length / 120) })).filter((item) => item.score > 0).sort((a, b) => b.score - a.score).slice(0, maxSentences).map((item) => trimString(item.sentence, 300));
return scored;
}
async function fetchText(url, timeoutMs = DEFAULT_TIMEOUT_MS) {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(new Error("Request timed out")), timeoutMs);
try {
const response = await fetch(url, { signal: controller.signal, redirect: "follow", headers: { "user-agent": USER_AGENT, accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" } });
if (!response.ok)
throw new Error(`Request failed with status ${response.status} ${response.statusText}`);
const contentType = (response.headers.get("content-type") ?? "").toLowerCase();
if (!contentType.includes("text/html") && !contentType.includes("application/xhtml") && !contentType.includes("text/plain"))
throw new Error(`Unsupported content type: ${contentType || "unknown"}`);
return { text: await response.text(), contentType };
}
finally {
clearTimeout(timeout);
}
}
function decodeDuckDuckGoUrl(href) { try {
const parsed = new URL(href, "https://duckduckgo.com");
const uddg = parsed.searchParams.get("uddg");
if (uddg)
return decodeURIComponent(uddg);
return parsed.toString();
}
catch {
return href;
} }
function parseDuckDuckGoResults(html) {
const results = [];
const blocks = html.split(/<div class="result\b/gi);
for (const block of blocks.slice(1)) {
const linkMatch = block.match(/<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i) ?? block.match(/<a[^>]*href="([^"]+)"[^>]*class="[^"]*result__a[^"]*"[^>]*>([\s\S]*?)<\/a>/i);
if (!linkMatch)
continue;
const rawUrl = decodeDuckDuckGoUrl(linkMatch[1]);
const title = normalizeWhitespace(stripTags(linkMatch[2]));
if (!rawUrl || !title || isBlockedUrl(rawUrl))
continue;
const snippetMatch = block.match(/class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/(?:a|div|span)>/i) ?? block.match(/class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/div>/i);
const snippet = snippetMatch ? normalizeWhitespace(stripTags(snippetMatch[1])) : "";
let displayedUrl = rawUrl;
try {
const parsed = new URL(rawUrl);
displayedUrl = `${parsed.hostname.replace(/^www\./i, "")}${parsed.pathname}`;
}
catch { }
results.push({ title, url: rawUrl, displayedUrl, snippet, source: "duckduckgo" });
}
return results;
}
function parseDuckDuckGoLiteResults(html) {
const results = [];
const anchorRegex = /<a[^>]*rel="nofollow"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
let match;
while ((match = anchorRegex.exec(html)) !== null) {
const rawUrl = decodeDuckDuckGoUrl(match[1]);
const title = normalizeWhitespace(stripTags(match[2]));
if (!rawUrl || !title || isBlockedUrl(rawUrl))
continue;
let displayedUrl = rawUrl;
try {
const parsed = new URL(rawUrl);
displayedUrl = `${parsed.hostname.replace(/^www\./i, "")}${parsed.pathname}`;
}
catch { }
results.push({ title, url: rawUrl, displayedUrl, snippet: "", source: "duckduckgo-lite" });
}
return results;
}
async function searchDuckDuckGo(query, limit) {
const urls = [`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`, `https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(query)}`];
for (const searchUrl of urls) {
try {
const html = (await fetchText(searchUrl)).text;
const parsed = searchUrl.includes("/lite/") ? parseDuckDuckGoLiteResults(html) : parseDuckDuckGoResults(html);
if (parsed.length > 0)
return parsed.slice(0, limit);
}
catch { }
}
return [];
}
function uniqueByUrl(items) {
const seen = new Set();
const out = [];
for (const item of items) {
const key = normalizeUrl(item.url);
if (seen.has(key))
continue;
seen.add(key);
out.push(item);
}
return out;
}
function uniqueStrings(items) { return Array.from(new Set(items.map((item) => item.trim()).filter(Boolean))); }
function buildBaseSearchQueries(query, focus) {
const cleanQuery = normalizeWhitespace(query);
const tokens = tokenizeQuery(cleanQuery);
const rootPhrase = tokens.slice(0, Math.min(4, tokens.length)).join(" ");
const variations = [cleanQuery, `${cleanQuery} official`, `${cleanQuery} documentation`, `${cleanQuery} research`, `${cleanQuery} analysis`, `${cleanQuery} review`, `${cleanQuery} key facts`];
if (focus?.trim())
variations.unshift(`${cleanQuery} ${focus.trim()}`);
if (rootPhrase && rootPhrase !== cleanQuery.toLowerCase())
variations.push(rootPhrase);
if (tokens.length >= 3)
variations.push(`"${tokens.slice(0, 3).join(" ")}"`);
return uniqueStrings(variations);
}
function scoreSearchResult(result, query) {
const tokens = tokenizeQuery(query);
const host = getHostname(result.url);
const titleScore = scoreText(result.title, tokens) * 6;
const snippetScore = scoreText(result.snippet, tokens) * 3;
const domainScore = scoreDomain(host);
const lengthScore = Math.min(3, Math.floor((result.snippet.length || result.title.length) / 80));
return titleScore + snippetScore + domainScore + lengthScore;
}
function rankSearchResults(results, query, limit) {
const scored = uniqueByUrl(results).filter((item) => !isBlockedUrl(item.url)).map((item) => ({ item, score: scoreSearchResult(item, query) })).sort((a, b) => b.score - a.score);
const out = [];
const seenDomains = new Map();
for (const entry of scored) {
const host = getRootDomain(getHostname(entry.item.url));
const count = seenDomains.get(host) ?? 0;
if (count >= 2)
continue;
seenDomains.set(host, count + 1);
out.push(entry.item);
if (out.length >= limit)
break;
}
return out;
}
function pickTopPages(pages, limit) {
const ranked = [...pages].sort((a, b) => b.relevanceScore - a.relevanceScore);
const out = [];
const seenDomains = new Map();
for (const page of ranked) {
const host = getRootDomain(getHostname(page.url));
const count = seenDomains.get(host) ?? 0;
if (count >= 2)
continue;
seenDomains.set(host, count + 1);
out.push(page);
if (out.length >= limit)
break;
}
return out;
}
function buildLinkCandidates(html, baseUrl, queryTokens, maxLinks) {
return extractAnchors(html, baseUrl, maxLinks).map((link) => {
const host = getHostname(link.url);
let score = link.score;
score += scoreText(link.text, queryTokens) * 2;
score += scoreText(link.url, queryTokens);
if (/(docs|guide|help|support|manual|paper|study|report|blog|news|research|about|faq)/i.test(link.text + " " + link.url))
score += 2;
return { ...link, score: score + scoreDomain(host) };
}).sort((a, b) => b.score - a.score);
}
function buildExcerpt(text, maxChars) { return trimString(normalizeWhitespace(text), maxChars); }
function summarizePage(pageText, queryTokens, maxSentences = 3) {
const snippets = extractUsefulSentences(pageText, queryTokens, maxSentences);
if (snippets.length > 0)
return snippets.join(" ");
return trimString(pageText, 700);
}
async function crawlUrl(url, depth, queryTokens, maxChars = DEFAULT_MAX_CHARS_PER_PAGE, maxLinkCount = MAX_LINKS_PER_PAGE) {
const { text: html } = await fetchText(url);
const title = extractTitle(html) || getHostname(url) || url;
const description = extractMetaDescription(html);
const canonical = extractCanonical(html);
const content = trimString(stripTags(html), maxChars);
const finalUrl = canonical && !isBlockedUrl(canonical) ? normalizeUrl(resolveUrl(canonical, url) ?? url) : normalizeUrl(url);
const links = buildLinkCandidates(html, finalUrl, queryTokens, maxLinkCount);
const sourceHost = getHostname(finalUrl);
const sourceType = sourceTypeForHost(sourceHost);
const excerpt = buildExcerpt(description || summarizePage(content, queryTokens), 700);
const relevanceScore = scoreText(`${title}\n${description}\n${content}`, queryTokens) * 2 + scoreDomain(sourceHost) + Math.min(4, Math.floor(content.split(/\s+/).filter(Boolean).length / 250));
return { page: { url: finalUrl, title, description, content, wordCount: content ? content.split(/\s+/).filter(Boolean).length : 0, links: links.map((link) => link.url), excerpt, relevanceScore, depth, sourceType }, links };
}
function buildPlannerPrompt(query, focus, pages, maxQueries) {
const preview = pages.slice(0, 6).map((page, index) => [`Source ${index + 1}`, `Title: ${page.title}`, `URL: ${page.url}`, `Type: ${page.sourceType}`, `Snippet: ${trimString(page.excerpt || page.description || page.content, 280)}`].join("\n")).join("\n\n");
return [`You are a local deep-research planner.`, `Task: ${query}`, focus ? `Focus: ${focus}` : "", `You must return ONLY valid JSON with this shape:`, `{ "followUpQueries": string[], "gaps": string[], "contradictions": string[] }`, `Limit followUpQueries to at most ${maxQueries}.`, `Prefer queries that test evidence, fill missing details, or inspect authoritative sources.`, `Avoid social media, video sites, and shallow listicle search terms.`, `Evidence packet:`, preview || "(no sources yet)"].filter(Boolean).join("\n\n");
}
function buildSynthesisPrompt(query, focus, pages, searchResults) {
const sources = pages.slice(0, 10).map((page, index) => [`Source ${index + 1}`, `Title: ${page.title}`, `URL: ${page.url}`, `Type: ${page.sourceType}`, `Excerpt: ${page.excerpt}`, `Relevant sentences: ${trimString(page.content, 1800)}`].join("\n")).join("\n\n");
const trail = searchResults.slice(0, 12).map((result, index) => `${index + 1}. ${result.title}\n ${result.url}\n ${trimString(result.snippet, 220)}`).join("\n");
return [`You are a careful deep-research analyst.`, `Question: ${query}`, focus ? `Focus: ${focus}` : "", `Write a concise markdown report using only the evidence packet below.`, `Be explicit about uncertainty and conflicts.`, `Every important claim should be tied to one or more source numbers like [1] or [1][3].`, `Return sections in this order:`, `# Answer`, `# Key findings`, `# Conflicts / caveats`, `# Sources`, `Evidence packet:`, sources || "(no crawled sources)", `Search trail:`, trail || "(no search trail)"].filter(Boolean).join("\n\n");
}
function buildFallbackReport(query, focus, pages, searchResults, gaps = [], contradictions = []) {
const topPages = pages.slice(0, 10);
const sourceLines = topPages.length ? topPages.map((page, index) => `${index + 1}. ${page.title} — ${page.url}\n ${trimString(page.excerpt || page.description || page.content, 220)}`).join("\n") : "No pages could be crawled.";
const searchTrail = searchResults.slice(0, 12).map((r, index) => `${index + 1}. ${r.title} — ${r.url}`);
const keyFindings = topPages.slice(0, 6).map((page, index) => `- [${index + 1}] ${page.title}: ${trimString(page.excerpt || page.description || page.content, 180)}`);
return [`# Answer`, `I gathered ${topPages.length} crawled sources${focus ? ` for the focus area "${focus}"` : ""}. This is a best-effort local synthesis for: ${query}.`, ``, `# Key findings`, ...(keyFindings.length ? keyFindings : ["- No strong findings extracted yet."]), ``, `# Conflicts / caveats`, ...(contradictions.length ? contradictions.map((item) => `- ${item}`) : ["- No clear contradictions were detected automatically."]), ...(gaps.length ? gaps.map((item) => `- ${item}`) : ["- The search may need more targeted follow-up queries."]), ``, `# Sources`, sourceLines, ``, `# Search trail`, ...(searchTrail.length ? searchTrail : ["No search results were returned."])].join("\n");
}
function parseJsonLoose(text) {
const trimmed = text.trim();
const fenced = trimmed.match(/```json\s*([\s\S]*?)```/i);
const candidate = fenced ? fenced[1].trim() : trimmed;
try {
return JSON.parse(candidate);
}
catch {
const firstBrace = candidate.indexOf("{");
const lastBrace = candidate.lastIndexOf("}");
if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
try {
return JSON.parse(candidate.slice(firstBrace, lastBrace + 1));
}
catch { }
}
return null;
}
}
async function getLoadedModel(modelId) {
const loaded = await getClient().llm.listLoaded();
if (loaded.length === 0)
return null;
if (modelId) {
const match = loaded.find((m) => m.identifier === modelId);
if (!match)
return null;
return { identifier: match.identifier ?? modelId, model: match };
}
const first = loaded[0];
if (!first?.identifier)
return null;
return { identifier: first.identifier, model: first };
}
async function completeWithModel(modelId, prompt, maxTokens, temperature) {
const loaded = await getLoadedModel(modelId).catch(() => null);
if (!loaded)
return { text: null, modelUsed: null };
try {
const prediction = await loaded.model.complete(prompt, { maxTokens, temperature });
let text = "";
for await (const chunk of prediction)
text += chunk.content ?? "";
return { text: text.trim(), modelUsed: loaded.identifier };
}
catch {
return { text: null, modelUsed: null };
}
}
function makeHeuristicFollowUps(query, focus, pages, round) {
const followUps = new Set();
const tokens = tokenizeQuery(query);
const topPages = pages.slice(0, 4);
for (const page of topPages) {
const keyBits = page.title.split(/[^a-z0-9]+/i).map((part) => part.trim()).filter((part) => part.length >= 4).slice(0, 4).join(" ");
if (keyBits)
followUps.add(`${query} ${keyBits}`.trim());
const host = getHostname(page.url);
if (host && !BLOCKED_HOSTS.some((blocked) => host === blocked || host.endsWith(`.${blocked}`)))
followUps.add(`${query} site:${host}`);
}
if (focus)
followUps.add(`${query} ${focus}`);
if (tokens.length > 0) {
followUps.add(`${query} official`);
followUps.add(`${query} documentation`);
followUps.add(`${query} key facts`);
}
if (round > 1) {
followUps.add(`${query} controversy`);
followUps.add(`${query} analysis`);
followUps.add(`${query} evidence`);
}
return Array.from(followUps).slice(0, 8);
}
async function runDeepResearch(params, ctx) {
const query = normalizeWhitespace(params.query);
const focus = params.focus?.trim() || undefined;
const maxRounds = clampInt(params.maxRounds, DEFAULT_MAX_ROUNDS, 1, 4);
const maxSearchesPerRound = clampInt(params.maxSearchesPerRound, DEFAULT_MAX_SEARCHES_PER_ROUND, 1, 8);
const maxResultsPerSearch = clampInt(params.maxResultsPerSearch, DEFAULT_MAX_RESULTS_PER_SEARCH, 1, 10);
const maxPages = clampInt(params.maxPages, DEFAULT_MAX_PAGES, 1, 24);
const maxDepth = clampInt(params.maxDepth, DEFAULT_MAX_DEPTH, 1, 3);
const maxCharsPerPage = clampInt(params.maxCharsPerPage, DEFAULT_MAX_CHARS_PER_PAGE, 3000, 40000);
const maxTokens = clampInt(params.maxTokens, 650, 64, 1500);
const temperature = clampFloat(params.temperature, 0.2, 0, 2);
const queryTokens = tokenizeQuery(`${query} ${focus ?? ""}`);
const issuedQueries = new Set();
const visited = new Set();
const allSearchResults = [];
const allPages = [];
const queue = [];
let frontier = buildBaseSearchQueries(query, focus).slice(0, maxSearchesPerRound);
ctx.status(`Planning research for ${query}`);
for (let round = 1; round <= maxRounds; round += 1) {
if (ctx.signal.aborted)
throw new Error("Research was aborted");
const roundQueries = frontier.map((item) => item.trim()).filter((item) => item && !issuedQueries.has(item.toLowerCase()));
roundQueries.forEach((item) => issuedQueries.add(item.toLowerCase()));
if (roundQueries.length === 0)
break;
ctx.status(`Round ${round}/${maxRounds}: searching ${roundQueries.length} queries`);
for (const searchQuery of roundQueries.slice(0, maxSearchesPerRound)) {
if (ctx.signal.aborted)
throw new Error("Research was aborted");
ctx.status(`Searching: ${searchQuery}`);
let results = await searchDuckDuckGo(searchQuery, maxResultsPerSearch);
results = rankSearchResults(results, `${query} ${focus ?? ""}`, maxResultsPerSearch);
allSearchResults.push(...results);
for (const result of results)
queue.push({ url: normalizeUrl(result.url), depth: 0, score: scoreSearchResult(result, query) });
}
queue.sort((a, b) => b.score - a.score);
const nextQueue = [];
while (queue.length > 0) {
if (allPages.length >= maxPages)
break;
if (ctx.signal.aborted)
throw new Error("Research was aborted");
const current = queue.shift();
const url = normalizeUrl(current.url);
if (!url || visited.has(url) || isBlockedUrl(url))
continue;
if (current.depth > maxDepth)
continue;
visited.add(url);
ctx.status(`Crawling ${allPages.length + 1}/${maxPages}: ${getHostname(url) || url}`);
try {
const { page, links } = await crawlUrl(url, current.depth, queryTokens, maxCharsPerPage, MAX_LINKS_PER_PAGE);
page.relevanceScore = scoreText(`${page.title}\n${page.description}\n${page.content}`, queryTokens) * 2 + scoreDomain(getHostname(page.url)) + Math.min(4, Math.floor(page.wordCount / 250));
allPages.push(page);
if (current.depth < maxDepth) {
for (const link of links) {
if (visited.has(link.url) || isBlockedUrl(link.url))
continue;
nextQueue.push({ url: link.url, depth: current.depth + 1, score: link.score });
}
}
}
catch (error) {
ctx.warn(`Could not crawl ${url}: ${error instanceof Error ? error.message : String(error)}`);
}
}
queue.push(...nextQueue.sort((a, b) => b.score - a.score));
queue.sort((a, b) => b.score - a.score);
const rankedPages = pickTopPages(allPages, Math.min(maxPages, 10));
const modelInfo = await getLoadedModel(params.modelId).catch(() => null);
let followUps = [];
if (modelInfo) {
const plannerPrompt = buildPlannerPrompt(query, focus, rankedPages, 8);
const planner = await completeWithModel(params.modelId, plannerPrompt, 320, 0.15);
const parsed = planner.text ? parseJsonLoose(planner.text) : null;
if (parsed && Array.isArray(parsed.followUpQueries))
followUps = parsed.followUpQueries.map((item) => String(item).trim()).filter(Boolean);
}
if (followUps.length === 0)
followUps = makeHeuristicFollowUps(query, focus, rankedPages, round);
frontier = uniqueStrings(followUps).filter((item) => !issuedQueries.has(item.toLowerCase())).slice(0, maxSearchesPerRound);
if (frontier.length === 0)
break;
}
const finalPages = pickTopPages(allPages, maxPages);
const uniqueSearchResults = rankSearchResults(allSearchResults, `${query} ${focus ?? ""}`, Math.min(allSearchResults.length, maxPages * 2));
let contradictions = [];
let gaps = [];
let reportMarkdown = "";
let modelUsed = null;
const synthesisPrompt = buildSynthesisPrompt(query, focus, finalPages, uniqueSearchResults);
const synthesis = await completeWithModel(params.modelId, synthesisPrompt, maxTokens, temperature);
if (synthesis.text) {
reportMarkdown = synthesis.text;
modelUsed = synthesis.modelUsed;
}
else {
reportMarkdown = buildFallbackReport(query, focus, finalPages, uniqueSearchResults);
}
const auditEvidence = finalPages.slice(0, 8).map((page) => `${page.title}: ${trimString(page.excerpt || page.description || page.content, 240)}`).join("\n");
const auditPrompt = [`You are auditing a research packet for missing evidence, contradictions, and reliability concerns.`, `Question: ${query}`, focus ? `Focus: ${focus}` : "", `Evidence:`, auditEvidence || "(none)", `Return ONLY valid JSON with keys contradictions and gaps, both arrays of concise strings.`].filter(Boolean).join("\n\n");
const audit = await completeWithModel(params.modelId, auditPrompt, 220, 0.1);
const parsedAudit = audit.text ? parseJsonLoose(audit.text) : null;
if (parsedAudit?.contradictions && Array.isArray(parsedAudit.contradictions))
contradictions = parsedAudit.contradictions.map((item) => String(item).trim()).filter(Boolean).slice(0, 8);
if (parsedAudit?.gaps && Array.isArray(parsedAudit.gaps))
gaps = parsedAudit.gaps.map((item) => String(item).trim()).filter(Boolean).slice(0, 8);
if (!reportMarkdown || reportMarkdown === buildFallbackReport(query, focus, finalPages, uniqueSearchResults))
reportMarkdown = buildFallbackReport(query, focus, finalPages, uniqueSearchResults, gaps, contradictions);
if (audit.modelUsed && !modelUsed)
modelUsed = audit.modelUsed;
const sources = finalPages.map((page, index) => ({ rank: index + 1, title: page.title, url: page.url, domain: getHostname(page.url), sourceType: page.sourceType, excerpt: page.excerpt, wordCount: page.wordCount, relevanceScore: page.relevanceScore, depth: page.depth }));
return { query, focus: focus ?? null, modelUsed, rounds: maxRounds, searchQueries: Array.from(issuedQueries), searchResults: uniqueSearchResults, sources, contradictions, gaps, reportMarkdown };
}
const deepResearchTool = (0, sdk_1.tool)({
name: "deepResearch",
description: "Run a local autonomous deep-research workflow: it plans searches, crawls pages recursively, filters social sites, checks for gaps and contradictions, and returns a synthesized markdown report. No paid APIs are used.",
parameters: {
query: zod_1.z.string().min(1),
focus: zod_1.z.string().optional(),
maxRounds: zod_1.z.number().int().min(1).max(4).default(DEFAULT_MAX_ROUNDS),
maxSearchesPerRound: zod_1.z.number().int().min(1).max(8).default(DEFAULT_MAX_SEARCHES_PER_ROUND),
maxResultsPerSearch: zod_1.z.number().int().min(1).max(10).default(DEFAULT_MAX_RESULTS_PER_SEARCH),
maxPages: zod_1.z.number().int().min(1).max(24).default(DEFAULT_MAX_PAGES),
maxDepth: zod_1.z.number().int().min(1).max(3).default(DEFAULT_MAX_DEPTH),
maxCharsPerPage: zod_1.z.number().int().min(3000).max(40000).default(DEFAULT_MAX_CHARS_PER_PAGE),
modelId: zod_1.z.string().optional(),
maxTokens: zod_1.z.number().int().min(64).max(1500).default(650),
temperature: zod_1.z.number().min(0).max(2).default(0.2)
},
implementation: async (params, ctx) => { return await runDeepResearch(params, ctx); }
});
async function main(pluginContext) { pluginContext.withToolsProvider(async () => [deepResearchTool]); }
//# sourceMappingURL=index.js.map