src / index.ts
// @ts-nocheck
import { LMStudioClient, tool, type PluginContext } from "@lmstudio/sdk";
import { z } from "zod";
type SearchResult = {
title: string;
url: string;
displayedUrl: string;
snippet: string;
source: string;
};
type LinkCandidate = {
url: string;
text: string;
score: number;
};
type ResearchPage = {
url: string;
title: string;
description: string;
content: string;
wordCount: number;
links: string[];
excerpt: string;
relevanceScore: number;
depth: number;
sourceType: string;
};
type DeepResearchParams = {
query: string;
focus?: string;
maxRounds?: number;
maxSearchesPerRound?: number;
maxResultsPerSearch?: number;
maxPages?: number;
maxDepth?: number;
maxCharsPerPage?: number;
modelId?: string;
maxTokens?: number;
temperature?: number;
};
const DEFAULT_TIMEOUT_MS = 15000;
const DEFAULT_MAX_ROUNDS = 2;
const DEFAULT_MAX_SEARCHES_PER_ROUND = 4;
const DEFAULT_MAX_RESULTS_PER_SEARCH = 6;
const DEFAULT_MAX_PAGES = 12;
const DEFAULT_MAX_DEPTH = 2;
const DEFAULT_MAX_CHARS_PER_PAGE = 16000;
const MAX_LINKS_PER_PAGE = 12;
const USER_AGENT = "Mozilla/5.0 (compatible; LMStudioDeepResearch/2.0; +https://lmstudio.ai)";
const BLOCKED_HOSTS = ["facebook.com", "instagram.com", "x.com", "twitter.com", "tiktok.com", "reddit.com", "pinterest.com", "linkedin.com", "snapchat.com", "discord.com", "discord.gg", "tumblr.com", "quora.com", "fandom.com", "youtube.com", "youtu.be", "twitch.tv", "onlyfans.com"];
const BLOCKED_URL_PARTS = ["/share", "/sharer", "/intent/", "/status/", "/posts/", "/reels/", "/shorts/", "/video/", "/watch?", "/watch/", "/tiktok.com/", "/redd.it/"];
const BOILERPLATE_LINK_TEXT = ["home", "menu", "log in", "login", "sign in", "sign up", "subscribe", "newsletter", "privacy", "terms", "cookies", "cookie policy", "accept cookies", "contact", "about us", "about", "sitemap", "search", "share", "follow", "read more", "learn more"];
const STOP_WORDS = new Set(["the", "and", "for", "with", "that", "this", "from", "into", "about", "what", "when", "where", "which", "who", "how", "why", "can", "could", "would", "should", "please", "need", "want", "best", "latest", "current", "new", "old", "vs", "via", "of", "to", "in", "on", "by", "as", "is", "are", "be", "it", "or", "an", "a"]);
let client: LMStudioClient | null = null;
function getClient(): LMStudioClient { if (!client) client = new LMStudioClient(); return client; }
function clampInt(value: unknown, fallback: number, min: number, max: number): number {
let parsed: number;
if (typeof value === "number" && Number.isFinite(value)) parsed = Math.trunc(value);
else if (typeof value === "string" && /^-?\d+$/.test(value.trim())) parsed = Math.trunc(Number(value));
else parsed = fallback;
return Math.max(min, Math.min(max, parsed));
}
function clampFloat(value: unknown, fallback: number, min: number, max: number): number {
let parsed: number;
if (typeof value === "number" && Number.isFinite(value)) parsed = value;
else if (typeof value === "string" && /^-?\d+(?:\.\d+)?$/.test(value.trim())) parsed = Number(value);
else parsed = fallback;
return Math.max(min, Math.min(max, parsed));
}
function normalizeWhitespace(value: string): string { return value.replace(/\s+/g, " ").trim(); }
function trimString(value: string, maxChars: number): string {
const normalized = value.trim();
if (normalized.length <= maxChars) return normalized;
return `${normalized.slice(0, Math.max(0, maxChars - 1)).trimEnd()}…`;
}
function decodeHtmlEntities(value: string): string {
return value.replace(/ /gi, " ").replace(/&/gi, "&").replace(/"/gi, '"').replace(/'/gi, "'").replace(/'/gi, "'").replace(/</gi, "<").replace(/>/gi, ">").replace(/'/gi, "'").replace(///gi, "/").replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))).replace(/&#x([0-9a-f]+);/gi, (_, n) => String.fromCharCode(parseInt(n, 16)));
}
function getHostname(url: string): string { try { return new URL(url).hostname.replace(/^www\./i, "").toLowerCase(); } catch { return ""; } }
function getRootDomain(hostname: string): string { const parts = hostname.split(".").filter(Boolean); if (parts.length <= 2) return hostname; return parts.slice(-2).join("."); }
function normalizeUrl(url: string): string {
try {
const parsed = new URL(url);
parsed.hash = "";
if (parsed.pathname !== "/" && parsed.pathname.endsWith("/")) { parsed.pathname = parsed.pathname.replace(/\/+$/, ""); if (!parsed.pathname) parsed.pathname = "/"; }
return parsed.toString();
} catch { return normalizeWhitespace(url); }
}
function resolveUrl(raw: string, baseUrl: string): string | null {
const trimmed = raw.trim();
if (!trimmed) return null;
if (/^(javascript|mailto|tel|data):/i.test(trimmed)) return null;
try { return new URL(trimmed, baseUrl).toString(); } catch { return null; }
}
function isBlockedHost(url: string): boolean { const host = getHostname(url); return BLOCKED_HOSTS.some((blocked) => host === blocked || host.endsWith(`.${blocked}`)); }
function isBlockedUrl(url: string): boolean { const lower = url.toLowerCase(); if (isBlockedHost(url)) return true; return BLOCKED_URL_PARTS.some((part) => lower.includes(part)); }
function scoreDomain(host: string): number {
if (!host) return 0;
const lower = host.toLowerCase();
if (lower.endsWith(".gov") || lower.endsWith(".edu") || lower.endsWith(".ac.uk")) return 8;
if (lower.includes("nih.gov") || lower.includes("who.int") || lower.includes("arxiv.org")) return 7;
if (lower.includes("wikipedia.org")) return 5;
if (lower.includes("docs.") || lower.includes("developer.")) return 4;
return 1;
}
function sourceTypeForHost(host: string): string {
const lower = host.toLowerCase();
if (BLOCKED_HOSTS.some((blocked) => lower === blocked || lower.endsWith(`.${blocked}`))) return "blocked";
if (lower.endsWith(".gov") || lower.endsWith(".edu") || lower.includes("nih.gov")) return "authoritative";
if (lower.includes("wikipedia.org")) return "reference";
return "web";
}
function tokenizeQuery(query: string): string[] { return Array.from(new Set(query.toLowerCase().split(/[^a-z0-9]+/i).filter((part) => part.length >= 3 && !STOP_WORDS.has(part)))); }
function countOccurrences(haystack: string, needle: string): number {
if (!needle) return 0;
let count = 0, start = 0;
while (true) {
const idx = haystack.indexOf(needle, start);
if (idx === -1) break;
count += 1;
start = idx + needle.length;
}
return count;
}
function scoreText(text: string, tokens: string[]): number {
const normalized = text.toLowerCase();
let score = 0;
for (const token of tokens) score += countOccurrences(normalized, token);
return score;
}
function isBoilerplateLine(line: string): boolean {
const lower = line.toLowerCase();
if (!lower || lower.length <= 2) return true;
if (BOILERPLATE_LINK_TEXT.some((part) => lower === part || lower.includes(` ${part}`) || lower.startsWith(`${part} `))) return true;
if (/(cookie|privacy|terms|subscribe|login)/i.test(lower) && lower.length < 90) return true;
return false;
}
function stripTags(html: string): string {
let t = html;
t = t.replace(/<!--[\s\S]*?-->/g, " ");
t = t.replace(/<script[\s\S]*?<\/script>/gi, " ");
t = t.replace(/<style[\s\S]*?<\/style>/gi, " ");
t = t.replace(/<noscript[\s\S]*?<\/noscript>/gi, " ");
t = t.replace(/<svg[\s\S]*?<\/svg>/gi, " ");
t = t.replace(/<iframe[\s\S]*?<\/iframe>/gi, " ");
t = t.replace(/<nav[\s\S]*?<\/nav>/gi, " ");
t = t.replace(/<footer[\s\S]*?<\/footer>/gi, " ");
t = t.replace(/<header[\s\S]*?<\/header>/gi, " ");
t = t.replace(/<form[\s\S]*?<\/form>/gi, " ");
t = t.replace(/<aside[\s\S]*?<\/aside>/gi, " ");
t = t.replace(/<br\s*\/?>/gi, "\n");
t = t.replace(/<\/(p|div|li|section|article|tr|table|blockquote|h[1-6])>/gi, "\n");
t = t.replace(/<li\b[^>]*>/gi, "• ");
t = t.replace(/<h[1-6]\b[^>]*>/gi, "\n");
t = t.replace(/<[^>]+>/g, " ");
t = decodeHtmlEntities(t);
const lines = t.replace(/\r/g, "").replace(/\u00a0/g, " ").split("\n").map(l => normalizeWhitespace(l)).filter(l => l.length > 0 && !isBoilerplateLine(l));
const seen = new Set<string>();
return lines.filter(l => { const k = l.toLowerCase(); if (seen.has(k)) return false; seen.add(k); return true; }).join("\n").replace(/\n{3,}/g, "\n\n").trim();
}
function extractMetaDescription(html: string): string {
const match = html.match(/<meta[^>]+name=["']description["'][^>]*content=["']([^"']+)["'][^>]*>/i) ?? html.match(/<meta[^>]+property=["']og:description["'][^>]*content=["']([^"']+)["'][^>]*>/i);
return match ? normalizeWhitespace(stripTags(match[1])) : "";
}
function extractTitle(html: string): string {
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
if (titleMatch) return normalizeWhitespace(stripTags(titleMatch[1]));
const ogTitle = html.match(/<meta[^>]+property=["']og:title["'][^>]*content=["']([^"']+)["'][^>]*>/i);
return ogTitle ? normalizeWhitespace(stripTags(ogTitle[1])) : "";
}
function extractCanonical(html: string): string {
const match = html.match(/<link[^>]+rel=["']canonical["'][^>]*href=["']([^"']+)["'][^>]*>/i);
return match ? match[1].trim() : "";
}
function extractAnchors(html: string, baseUrl: string, limit: number): LinkCandidate[] {
const anchors: LinkCandidate[] = [];
const seen = new Set<string>();
const anchorRegex = /<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi;
let match: RegExpExecArray | null;
while ((match = anchorRegex.exec(html)) !== null) {
const resolved = resolveUrl(match[1], baseUrl);
if (!resolved) continue;
const normalized = normalizeUrl(resolved);
if (seen.has(normalized) || !/^https?:/i.test(normalized) || isBlockedUrl(normalized)) continue;
const text = normalizeWhitespace(stripTags(match[2]));
const host = getHostname(normalized);
let score = 0;
if (!text || BOILERPLATE_LINK_TEXT.some((part) => text.toLowerCase() === part)) score -= 3;
score += scoreDomain(host);
if (getRootDomain(host) === getRootDomain(getHostname(baseUrl))) score += 4;
anchors.push({ url: normalized, text, score });
seen.add(normalized);
if (anchors.length >= limit * 2) break;
}
return anchors.sort((a, b) => b.score - a.score).slice(0, limit);
}
function extractUsefulSentences(text: string, tokens: string[], maxSentences = 4): string[] {
const sentences = normalizeWhitespace(text).split(/(?<=[.!?])\s+/).map((part) => part.trim()).filter(Boolean);
const scored = sentences.map((sentence) => ({ sentence, score: scoreText(sentence, tokens) + Math.min(3, sentence.length / 120) })).filter((item) => item.score > 0).sort((a, b) => b.score - a.score).slice(0, maxSentences).map((item) => trimString(item.sentence, 300));
return scored;
}
async function fetchText(url: string, timeoutMs = DEFAULT_TIMEOUT_MS): Promise<{ text: string; contentType: string }> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(new Error("Request timed out")), timeoutMs);
try {
const response = await fetch(url, { signal: controller.signal, redirect: "follow", headers: { "user-agent": USER_AGENT, accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" } });
if (!response.ok) throw new Error(`Request failed with status ${response.status} ${response.statusText}`);
const contentType = (response.headers.get("content-type") ?? "").toLowerCase();
if (!contentType.includes("text/html") && !contentType.includes("application/xhtml") && !contentType.includes("text/plain")) throw new Error(`Unsupported content type: ${contentType || "unknown"}`);
return { text: await response.text(), contentType };
} finally { clearTimeout(timeout); }
}
function decodeDuckDuckGoUrl(href: string): string { try { const parsed = new URL(href, "https://duckduckgo.com"); const uddg = parsed.searchParams.get("uddg"); if (uddg) return decodeURIComponent(uddg); return parsed.toString(); } catch { return href; } }
function parseDuckDuckGoResults(html: string): SearchResult[] {
const results: SearchResult[] = [];
const blocks = html.split(/<div class="result\b/gi);
for (const block of blocks.slice(1)) {
const linkMatch = block.match(/<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i) ?? block.match(/<a[^>]*href="([^"]+)"[^>]*class="[^"]*result__a[^"]*"[^>]*>([\s\S]*?)<\/a>/i);
if (!linkMatch) continue;
const rawUrl = decodeDuckDuckGoUrl(linkMatch[1]);
const title = normalizeWhitespace(stripTags(linkMatch[2]));
if (!rawUrl || !title || isBlockedUrl(rawUrl)) continue;
const snippetMatch = block.match(/class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/(?:a|div|span)>/i) ?? block.match(/class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/div>/i);
const snippet = snippetMatch ? normalizeWhitespace(stripTags(snippetMatch[1])) : "";
let displayedUrl = rawUrl;
try { const parsed = new URL(rawUrl); displayedUrl = `${parsed.hostname.replace(/^www\./i, "")}${parsed.pathname}`; } catch {}
results.push({ title, url: rawUrl, displayedUrl, snippet, source: "duckduckgo" });
}
return results;
}
function parseDuckDuckGoLiteResults(html: string): SearchResult[] {
const results: SearchResult[] = [];
const anchorRegex = /<a[^>]*rel="nofollow"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
let match: RegExpExecArray | null;
while ((match = anchorRegex.exec(html)) !== null) {
const rawUrl = decodeDuckDuckGoUrl(match[1]);
const title = normalizeWhitespace(stripTags(match[2]));
if (!rawUrl || !title || isBlockedUrl(rawUrl)) continue;
let displayedUrl = rawUrl;
try { const parsed = new URL(rawUrl); displayedUrl = `${parsed.hostname.replace(/^www\./i, "")}${parsed.pathname}`; } catch {}
results.push({ title, url: rawUrl, displayedUrl, snippet: "", source: "duckduckgo-lite" });
}
return results;
}
async function searchDuckDuckGo(query: string, limit: number): Promise<SearchResult[]> {
const urls = [`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`, `https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(query)}`];
for (const searchUrl of urls) {
try {
const html = (await fetchText(searchUrl)).text;
const parsed = searchUrl.includes("/lite/") ? parseDuckDuckGoLiteResults(html) : parseDuckDuckGoResults(html);
if (parsed.length > 0) return parsed.slice(0, limit);
} catch {}
}
return [];
}
function uniqueByUrl<T extends { url: string }>(items: T[]): T[] {
const seen = new Set<string>();
const out: T[] = [];
for (const item of items) {
const key = normalizeUrl(item.url);
if (seen.has(key)) continue;
seen.add(key);
out.push(item);
}
return out;
}
function uniqueStrings(items: string[]): string[] { return Array.from(new Set(items.map((item) => item.trim()).filter(Boolean))); }
function buildBaseSearchQueries(query: string, focus?: string): string[] {
const cleanQuery = normalizeWhitespace(query);
const tokens = tokenizeQuery(cleanQuery);
const rootPhrase = tokens.slice(0, Math.min(4, tokens.length)).join(" ");
const variations = [cleanQuery, `${cleanQuery} official`, `${cleanQuery} documentation`, `${cleanQuery} research`, `${cleanQuery} analysis`, `${cleanQuery} review`, `${cleanQuery} key facts`];
if (focus?.trim()) variations.unshift(`${cleanQuery} ${focus.trim()}`);
if (rootPhrase && rootPhrase !== cleanQuery.toLowerCase()) variations.push(rootPhrase);
if (tokens.length >= 3) variations.push(`"${tokens.slice(0, 3).join(" ")}"`);
return uniqueStrings(variations);
}
function scoreSearchResult(result: SearchResult, query: string): number {
const tokens = tokenizeQuery(query);
const host = getHostname(result.url);
const titleScore = scoreText(result.title, tokens) * 6;
const snippetScore = scoreText(result.snippet, tokens) * 3;
const domainScore = scoreDomain(host);
const lengthScore = Math.min(3, Math.floor((result.snippet.length || result.title.length) / 80));
return titleScore + snippetScore + domainScore + lengthScore;
}
function rankSearchResults(results: SearchResult[], query: string, limit: number): SearchResult[] {
const scored = uniqueByUrl(results).filter((item) => !isBlockedUrl(item.url)).map((item) => ({ item, score: scoreSearchResult(item, query) })).sort((a, b) => b.score - a.score);
const out: SearchResult[] = [];
const seenDomains = new Map<string, number>();
for (const entry of scored) {
const host = getRootDomain(getHostname(entry.item.url));
const count = seenDomains.get(host) ?? 0;
if (count >= 2) continue;
seenDomains.set(host, count + 1);
out.push(entry.item);
if (out.length >= limit) break;
}
return out;
}
function pickTopPages(pages: ResearchPage[], limit: number): ResearchPage[] {
const ranked = [...pages].sort((a, b) => b.relevanceScore - a.relevanceScore);
const out: ResearchPage[] = [];
const seenDomains = new Map<string, number>();
for (const page of ranked) {
const host = getRootDomain(getHostname(page.url));
const count = seenDomains.get(host) ?? 0;
if (count >= 2) continue;
seenDomains.set(host, count + 1);
out.push(page);
if (out.length >= limit) break;
}
return out;
}
function buildLinkCandidates(html: string, baseUrl: string, queryTokens: string[], maxLinks: number): LinkCandidate[] {
return extractAnchors(html, baseUrl, maxLinks).map((link) => {
const host = getHostname(link.url);
let score = link.score;
score += scoreText(link.text, queryTokens) * 2;
score += scoreText(link.url, queryTokens);
if (/(docs|guide|help|support|manual|paper|study|report|blog|news|research|about|faq)/i.test(link.text + " " + link.url)) score += 2;
return { ...link, score: score + scoreDomain(host) };
}).sort((a, b) => b.score - a.score);
}
function buildExcerpt(text: string, maxChars: number): string { return trimString(normalizeWhitespace(text), maxChars); }
function summarizePage(pageText: string, queryTokens: string[], maxSentences = 3): string {
const snippets = extractUsefulSentences(pageText, queryTokens, maxSentences);
if (snippets.length > 0) return snippets.join(" ");
return trimString(pageText, 700);
}
async function crawlUrl(url: string, depth: number, queryTokens: string[], maxChars = DEFAULT_MAX_CHARS_PER_PAGE, maxLinkCount = MAX_LINKS_PER_PAGE): Promise<{ page: ResearchPage; links: LinkCandidate[] }> {
const { text: html } = await fetchText(url);
const title = extractTitle(html) || getHostname(url) || url;
const description = extractMetaDescription(html);
const canonical = extractCanonical(html);
const content = trimString(stripTags(html), maxChars);
const finalUrl = canonical && !isBlockedUrl(canonical) ? normalizeUrl(resolveUrl(canonical, url) ?? url) : normalizeUrl(url);
const links = buildLinkCandidates(html, finalUrl, queryTokens, maxLinkCount);
const sourceHost = getHostname(finalUrl);
const sourceType = sourceTypeForHost(sourceHost);
const excerpt = buildExcerpt(description || summarizePage(content, queryTokens), 700);
const relevanceScore = scoreText(`${title}\n${description}\n${content}`, queryTokens) * 2 + scoreDomain(sourceHost) + Math.min(4, Math.floor(content.split(/\s+/).filter(Boolean).length / 250));
return { page: { url: finalUrl, title, description, content, wordCount: content ? content.split(/\s+/).filter(Boolean).length : 0, links: links.map((link) => link.url), excerpt, relevanceScore, depth, sourceType }, links };
}
function buildPlannerPrompt(query: string, focus: string | undefined, pages: ResearchPage[], maxQueries: number): string {
const preview = pages.slice(0, 6).map((page, index) => [`Source ${index + 1}`, `Title: ${page.title}`, `URL: ${page.url}`, `Type: ${page.sourceType}`, `Snippet: ${trimString(page.excerpt || page.description || page.content, 280)}`].join("\n")).join("\n\n");
return [`You are a local deep-research planner.`, `Task: ${query}`, focus ? `Focus: ${focus}` : "", `You must return ONLY valid JSON with this shape:`, `{ "followUpQueries": string[], "gaps": string[], "contradictions": string[] }`, `Limit followUpQueries to at most ${maxQueries}.`, `Prefer queries that test evidence, fill missing details, or inspect authoritative sources.`, `Avoid social media, video sites, and shallow listicle search terms.`, `Evidence packet:`, preview || "(no sources yet)"].filter(Boolean).join("\n\n");
}
function buildSynthesisPrompt(query: string, focus: string | undefined, pages: ResearchPage[], searchResults: SearchResult[]): string {
const sources = pages.slice(0, 10).map((page, index) => [`Source ${index + 1}`, `Title: ${page.title}`, `URL: ${page.url}`, `Type: ${page.sourceType}`, `Excerpt: ${page.excerpt}`, `Relevant sentences: ${trimString(page.content, 1800)}`].join("\n")).join("\n\n");
const trail = searchResults.slice(0, 12).map((result, index) => `${index + 1}. ${result.title}\n ${result.url}\n ${trimString(result.snippet, 220)}`).join("\n");
return [`You are a careful deep-research analyst.`, `Question: ${query}`, focus ? `Focus: ${focus}` : "", `Write a concise markdown report using only the evidence packet below.`, `Be explicit about uncertainty and conflicts.`, `Every important claim should be tied to one or more source numbers like [1] or [1][3].`, `Return sections in this order:`, `# Answer`, `# Key findings`, `# Conflicts / caveats`, `# Sources`, `Evidence packet:`, sources || "(no crawled sources)", `Search trail:`, trail || "(no search trail)"].filter(Boolean).join("\n\n");
}
function buildFallbackReport(query: string, focus: string | undefined, pages: ResearchPage[], searchResults: SearchResult[], gaps: string[] = [], contradictions: string[] = []): string {
const topPages = pages.slice(0, 10);
const sourceLines = topPages.length ? topPages.map((page, index) => `${index + 1}. ${page.title} — ${page.url}\n ${trimString(page.excerpt || page.description || page.content, 220)}`).join("\n") : "No pages could be crawled.";
const searchTrail = searchResults.slice(0, 12).map((r, index) => `${index + 1}. ${r.title} — ${r.url}`);
const keyFindings = topPages.slice(0, 6).map((page, index) => `- [${index + 1}] ${page.title}: ${trimString(page.excerpt || page.description || page.content, 180)}`);
return [`# Answer`, `I gathered ${topPages.length} crawled sources${focus ? ` for the focus area "${focus}"` : ""}. This is a best-effort local synthesis for: ${query}.`, ``, `# Key findings`, ...(keyFindings.length ? keyFindings : ["- No strong findings extracted yet."]), ``, `# Conflicts / caveats`, ...(contradictions.length ? contradictions.map((item) => `- ${item}`) : ["- No clear contradictions were detected automatically."]), ...(gaps.length ? gaps.map((item) => `- ${item}`) : ["- The search may need more targeted follow-up queries."]), ``, `# Sources`, sourceLines, ``, `# Search trail`, ...(searchTrail.length ? searchTrail : ["No search results were returned."])].join("\n");
}
function parseJsonLoose(text: string): any | null {
const trimmed = text.trim();
const fenced = trimmed.match(/```json\s*([\s\S]*?)```/i);
const candidate = fenced ? fenced[1].trim() : trimmed;
try { return JSON.parse(candidate); } catch {
const firstBrace = candidate.indexOf("{");
const lastBrace = candidate.lastIndexOf("}");
if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) { try { return JSON.parse(candidate.slice(firstBrace, lastBrace + 1)); } catch {} }
return null;
}
}
async function getLoadedModel(modelId?: string): Promise<{ identifier: string; model: any } | null> {
const loaded = await getClient().llm.listLoaded();
if (loaded.length === 0) return null;
if (modelId) {
const match = loaded.find((m: any) => m.identifier === modelId);
if (!match) return null;
return { identifier: match.identifier ?? modelId, model: match };
}
const first = loaded[0];
if (!first?.identifier) return null;
return { identifier: first.identifier, model: first };
}
async function completeWithModel(modelId: string | undefined, prompt: string, maxTokens: number, temperature: number): Promise<{ text: string | null; modelUsed: string | null }> {
const loaded = await getLoadedModel(modelId).catch(() => null);
if (!loaded) return { text: null, modelUsed: null };
try {
const prediction: any = await loaded.model.complete(prompt, { maxTokens, temperature });
let text = "";
for await (const chunk of prediction) text += chunk.content ?? "";
return { text: text.trim(), modelUsed: loaded.identifier };
} catch { return { text: null, modelUsed: null }; }
}
function makeHeuristicFollowUps(query: string, focus: string | undefined, pages: ResearchPage[], round: number): string[] {
const followUps = new Set<string>();
const tokens = tokenizeQuery(query);
const topPages = pages.slice(0, 4);
for (const page of topPages) {
const keyBits = page.title.split(/[^a-z0-9]+/i).map((part) => part.trim()).filter((part) => part.length >= 4).slice(0, 4).join(" ");
if (keyBits) followUps.add(`${query} ${keyBits}`.trim());
const host = getHostname(page.url);
if (host && !BLOCKED_HOSTS.some((blocked) => host === blocked || host.endsWith(`.${blocked}`))) followUps.add(`${query} site:${host}`);
}
if (focus) followUps.add(`${query} ${focus}`);
if (tokens.length > 0) { followUps.add(`${query} official`); followUps.add(`${query} documentation`); followUps.add(`${query} key facts`); }
if (round > 1) { followUps.add(`${query} controversy`); followUps.add(`${query} analysis`); followUps.add(`${query} evidence`); }
return Array.from(followUps).slice(0, 8);
}
async function runDeepResearch(params: DeepResearchParams, ctx: { status: (text: string) => void; warn: (text: string) => void; signal: AbortSignal }): Promise<any> {
const query = normalizeWhitespace(params.query);
const focus = params.focus?.trim() || undefined;
const maxRounds = clampInt(params.maxRounds, DEFAULT_MAX_ROUNDS, 1, 4);
const maxSearchesPerRound = clampInt(params.maxSearchesPerRound, DEFAULT_MAX_SEARCHES_PER_ROUND, 1, 8);
const maxResultsPerSearch = clampInt(params.maxResultsPerSearch, DEFAULT_MAX_RESULTS_PER_SEARCH, 1, 10);
const maxPages = clampInt(params.maxPages, DEFAULT_MAX_PAGES, 1, 24);
const maxDepth = clampInt(params.maxDepth, DEFAULT_MAX_DEPTH, 1, 3);
const maxCharsPerPage = clampInt(params.maxCharsPerPage, DEFAULT_MAX_CHARS_PER_PAGE, 3000, 40000);
const maxTokens = clampInt(params.maxTokens, 650, 64, 1500);
const temperature = clampFloat(params.temperature, 0.2, 0, 2);
const queryTokens = tokenizeQuery(`${query} ${focus ?? ""}`);
const issuedQueries = new Set<string>();
const visited = new Set<string>();
const allSearchResults: SearchResult[] = [];
const allPages: ResearchPage[] = [];
const queue: { url: string; depth: number; score: number }[] = [];
let frontier = buildBaseSearchQueries(query, focus).slice(0, maxSearchesPerRound);
ctx.status(`Planning research for ${query}`);
for (let round = 1; round <= maxRounds; round += 1) {
if (ctx.signal.aborted) throw new Error("Research was aborted");
const roundQueries = frontier.map((item) => item.trim()).filter((item) => item && !issuedQueries.has(item.toLowerCase()));
roundQueries.forEach((item) => issuedQueries.add(item.toLowerCase()));
if (roundQueries.length === 0) break;
ctx.status(`Round ${round}/${maxRounds}: searching ${roundQueries.length} queries`);
for (const searchQuery of roundQueries.slice(0, maxSearchesPerRound)) {
if (ctx.signal.aborted) throw new Error("Research was aborted");
ctx.status(`Searching: ${searchQuery}`);
let results = await searchDuckDuckGo(searchQuery, maxResultsPerSearch);
results = rankSearchResults(results, `${query} ${focus ?? ""}`, maxResultsPerSearch);
allSearchResults.push(...results);
for (const result of results) queue.push({ url: normalizeUrl(result.url), depth: 0, score: scoreSearchResult(result, query) });
}
queue.sort((a, b) => b.score - a.score);
const nextQueue: typeof queue = [];
while (queue.length > 0) {
if (allPages.length >= maxPages) break;
if (ctx.signal.aborted) throw new Error("Research was aborted");
const current = queue.shift()!;
const url = normalizeUrl(current.url);
if (!url || visited.has(url) || isBlockedUrl(url)) continue;
if (current.depth > maxDepth) continue;
visited.add(url);
ctx.status(`Crawling ${allPages.length + 1}/${maxPages}: ${getHostname(url) || url}`);
try {
const { page, links } = await crawlUrl(url, current.depth, queryTokens, maxCharsPerPage, MAX_LINKS_PER_PAGE);
page.relevanceScore = scoreText(`${page.title}\n${page.description}\n${page.content}`, queryTokens) * 2 + scoreDomain(getHostname(page.url)) + Math.min(4, Math.floor(page.wordCount / 250));
allPages.push(page);
if (current.depth < maxDepth) {
for (const link of links) {
if (visited.has(link.url) || isBlockedUrl(link.url)) continue;
nextQueue.push({ url: link.url, depth: current.depth + 1, score: link.score });
}
}
} catch (error) { ctx.warn(`Could not crawl ${url}: ${error instanceof Error ? error.message : String(error)}`); }
}
queue.push(...nextQueue.sort((a, b) => b.score - a.score));
queue.sort((a, b) => b.score - a.score);
const rankedPages = pickTopPages(allPages, Math.min(maxPages, 10));
const modelInfo = await getLoadedModel(params.modelId).catch(() => null);
let followUps: string[] = [];
if (modelInfo) {
const plannerPrompt = buildPlannerPrompt(query, focus, rankedPages, 8);
const planner = await completeWithModel(params.modelId, plannerPrompt, 320, 0.15);
const parsed = planner.text ? parseJsonLoose(planner.text) : null;
if (parsed && Array.isArray(parsed.followUpQueries)) followUps = parsed.followUpQueries.map((item: any) => String(item).trim()).filter(Boolean);
}
if (followUps.length === 0) followUps = makeHeuristicFollowUps(query, focus, rankedPages, round);
frontier = uniqueStrings(followUps).filter((item) => !issuedQueries.has(item.toLowerCase())).slice(0, maxSearchesPerRound);
if (frontier.length === 0) break;
}
const finalPages = pickTopPages(allPages, maxPages);
const uniqueSearchResults = rankSearchResults(allSearchResults, `${query} ${focus ?? ""}`, Math.min(allSearchResults.length, maxPages * 2));
let contradictions: string[] = [];
let gaps: string[] = [];
let reportMarkdown = "";
let modelUsed: string | null = null;
const synthesisPrompt = buildSynthesisPrompt(query, focus, finalPages, uniqueSearchResults);
const synthesis = await completeWithModel(params.modelId, synthesisPrompt, maxTokens, temperature);
if (synthesis.text) { reportMarkdown = synthesis.text; modelUsed = synthesis.modelUsed; } else { reportMarkdown = buildFallbackReport(query, focus, finalPages, uniqueSearchResults); }
const auditEvidence = finalPages.slice(0, 8).map((page) => `${page.title}: ${trimString(page.excerpt || page.description || page.content, 240)}`).join("\n");
const auditPrompt = [`You are auditing a research packet for missing evidence, contradictions, and reliability concerns.`, `Question: ${query}`, focus ? `Focus: ${focus}` : "", `Evidence:`, auditEvidence || "(none)", `Return ONLY valid JSON with keys contradictions and gaps, both arrays of concise strings.`].filter(Boolean).join("\n\n");
const audit = await completeWithModel(params.modelId, auditPrompt, 220, 0.1);
const parsedAudit = audit.text ? parseJsonLoose(audit.text) : null;
if (parsedAudit?.contradictions && Array.isArray(parsedAudit.contradictions)) contradictions = parsedAudit.contradictions.map((item: any) => String(item).trim()).filter(Boolean).slice(0, 8);
if (parsedAudit?.gaps && Array.isArray(parsedAudit.gaps)) gaps = parsedAudit.gaps.map((item: any) => String(item).trim()).filter(Boolean).slice(0, 8);
if (!reportMarkdown || reportMarkdown === buildFallbackReport(query, focus, finalPages, uniqueSearchResults)) reportMarkdown = buildFallbackReport(query, focus, finalPages, uniqueSearchResults, gaps, contradictions);
if (audit.modelUsed && !modelUsed) modelUsed = audit.modelUsed;
const sources = finalPages.map((page, index) => ({ rank: index + 1, title: page.title, url: page.url, domain: getHostname(page.url), sourceType: page.sourceType, excerpt: page.excerpt, wordCount: page.wordCount, relevanceScore: page.relevanceScore, depth: page.depth }));
return { query, focus: focus ?? null, modelUsed, rounds: maxRounds, searchQueries: Array.from(issuedQueries), searchResults: uniqueSearchResults, sources, contradictions, gaps, reportMarkdown };
}
const deepResearchTool = tool({
name: "deepResearch",
description: "Run a local autonomous deep-research workflow: it plans searches, crawls pages recursively, filters social sites, checks for gaps and contradictions, and returns a synthesized markdown report. No paid APIs are used.",
parameters: {
query: z.string().min(1),
focus: z.string().optional(),
maxRounds: z.number().int().min(1).max(4).default(DEFAULT_MAX_ROUNDS),
maxSearchesPerRound: z.number().int().min(1).max(8).default(DEFAULT_MAX_SEARCHES_PER_ROUND),
maxResultsPerSearch: z.number().int().min(1).max(10).default(DEFAULT_MAX_RESULTS_PER_SEARCH),
maxPages: z.number().int().min(1).max(24).default(DEFAULT_MAX_PAGES),
maxDepth: z.number().int().min(1).max(3).default(DEFAULT_MAX_DEPTH),
maxCharsPerPage: z.number().int().min(3000).max(40000).default(DEFAULT_MAX_CHARS_PER_PAGE),
modelId: z.string().optional(),
maxTokens: z.number().int().min(64).max(1500).default(650),
temperature: z.number().min(0).max(2).default(0.2)
},
implementation: async (params: DeepResearchParams, ctx) => { return await runDeepResearch(params, ctx); }
});
export async function main(pluginContext: PluginContext) { pluginContext.withToolsProvider(async () => [deepResearchTool]); }