Project Files
dist
index.d.ts
index.d.ts.map
index.js
index.js.map
src
index.ts
manifest.json
package-lock.json
package.json
README.md
tsconfig.json
src / index.ts
// @ts-nocheck

import { LMStudioClient, tool, type PluginContext } from "@lmstudio/sdk";
import { z } from "zod";

type SearchResult = {
  title: string;
  url: string;
  displayedUrl: string;
  snippet: string;
  source: string;
};

type LinkCandidate = {
  url: string;
  text: string;
  score: number;
};

type ResearchPage = {
  url: string;
  title: string;
  description: string;
  content: string;
  wordCount: number;
  links: string[];
  excerpt: string;
  relevanceScore: number;
  depth: number;
  sourceType: string;
};

type DeepResearchParams = {
  query: string;
  focus?: string;
  maxRounds?: number;
  maxSearchesPerRound?: number;
  maxResultsPerSearch?: number;
  maxPages?: number;
  maxDepth?: number;
  maxCharsPerPage?: number;
  modelId?: string;
  maxTokens?: number;
  temperature?: number;
};

const DEFAULT_TIMEOUT_MS = 15000;
const DEFAULT_MAX_ROUNDS = 2;
const DEFAULT_MAX_SEARCHES_PER_ROUND = 4;
const DEFAULT_MAX_RESULTS_PER_SEARCH = 6;
const DEFAULT_MAX_PAGES = 12;
const DEFAULT_MAX_DEPTH = 2;
const DEFAULT_MAX_CHARS_PER_PAGE = 16000;
const MAX_LINKS_PER_PAGE = 12;
const USER_AGENT = "Mozilla/5.0 (compatible; LMStudioDeepResearch/2.0; +https://lmstudio.ai)";

const BLOCKED_HOSTS = ["facebook.com", "instagram.com", "x.com", "twitter.com", "tiktok.com", "reddit.com", "pinterest.com", "linkedin.com", "snapchat.com", "discord.com", "discord.gg", "tumblr.com", "quora.com", "fandom.com", "youtube.com", "youtu.be", "twitch.tv", "onlyfans.com"];
const BLOCKED_URL_PARTS = ["/share", "/sharer", "/intent/", "/status/", "/posts/", "/reels/", "/shorts/", "/video/", "/watch?", "/watch/", "/tiktok.com/", "/redd.it/"];
const BOILERPLATE_LINK_TEXT = ["home", "menu", "log in", "login", "sign in", "sign up", "subscribe", "newsletter", "privacy", "terms", "cookies", "cookie policy", "accept cookies", "contact", "about us", "about", "sitemap", "search", "share", "follow", "read more", "learn more"];
const STOP_WORDS = new Set(["the", "and", "for", "with", "that", "this", "from", "into", "about", "what", "when", "where", "which", "who", "how", "why", "can", "could", "would", "should", "please", "need", "want", "best", "latest", "current", "new", "old", "vs", "via", "of", "to", "in", "on", "by", "as", "is", "are", "be", "it", "or", "an", "a"]);

let client: LMStudioClient | null = null;
function getClient(): LMStudioClient { if (!client) client = new LMStudioClient(); return client; }

function clampInt(value: unknown, fallback: number, min: number, max: number): number {
  let parsed: number;
  if (typeof value === "number" && Number.isFinite(value)) parsed = Math.trunc(value);
  else if (typeof value === "string" && /^-?\d+$/.test(value.trim())) parsed = Math.trunc(Number(value));
  else parsed = fallback;
  return Math.max(min, Math.min(max, parsed));
}

function clampFloat(value: unknown, fallback: number, min: number, max: number): number {
  let parsed: number;
  if (typeof value === "number" && Number.isFinite(value)) parsed = value;
  else if (typeof value === "string" && /^-?\d+(?:\.\d+)?$/.test(value.trim())) parsed = Number(value);
  else parsed = fallback;
  return Math.max(min, Math.min(max, parsed));
}

function normalizeWhitespace(value: string): string { return value.replace(/\s+/g, " ").trim(); }
function trimString(value: string, maxChars: number): string {
  const normalized = value.trim();
  if (normalized.length <= maxChars) return normalized;
  return `${normalized.slice(0, Math.max(0, maxChars - 1)).trimEnd()}…`;
}

function decodeHtmlEntities(value: string): string {
  return value.replace(/&nbsp;/gi, " ").replace(/&amp;/gi, "&").replace(/&quot;/gi, '"').replace(/&#39;/gi, "'").replace(/&apos;/gi, "'").replace(/&lt;/gi, "<").replace(/&gt;/gi, ">").replace(/&#x27;/gi, "'").replace(/&#x2F;/gi, "/").replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))).replace(/&#x([0-9a-f]+);/gi, (_, n) => String.fromCharCode(parseInt(n, 16)));
}

function getHostname(url: string): string { try { return new URL(url).hostname.replace(/^www\./i, "").toLowerCase(); } catch { return ""; } }
function getRootDomain(hostname: string): string { const parts = hostname.split(".").filter(Boolean); if (parts.length <= 2) return hostname; return parts.slice(-2).join("."); }

function normalizeUrl(url: string): string {
  try {
    const parsed = new URL(url);
    parsed.hash = "";
    if (parsed.pathname !== "/" && parsed.pathname.endsWith("/")) { parsed.pathname = parsed.pathname.replace(/\/+$/, ""); if (!parsed.pathname) parsed.pathname = "/"; }
    return parsed.toString();
  } catch { return normalizeWhitespace(url); }
}


function resolveUrl(raw: string, baseUrl: string): string | null {
  const trimmed = raw.trim();
  if (!trimmed) return null;
  if (/^(javascript|mailto|tel|data):/i.test(trimmed)) return null;
  try { return new URL(trimmed, baseUrl).toString(); } catch { return null; }
}

function isBlockedHost(url: string): boolean { const host = getHostname(url); return BLOCKED_HOSTS.some((blocked) => host === blocked || host.endsWith(`.${blocked}`)); }
function isBlockedUrl(url: string): boolean { const lower = url.toLowerCase(); if (isBlockedHost(url)) return true; return BLOCKED_URL_PARTS.some((part) => lower.includes(part)); }

function scoreDomain(host: string): number {
  if (!host) return 0;
  const lower = host.toLowerCase();
  if (lower.endsWith(".gov") || lower.endsWith(".edu") || lower.endsWith(".ac.uk")) return 8;
  if (lower.includes("nih.gov") || lower.includes("who.int") || lower.includes("arxiv.org")) return 7;
  if (lower.includes("wikipedia.org")) return 5;
  if (lower.includes("docs.") || lower.includes("developer.")) return 4;
  return 1;
}

function sourceTypeForHost(host: string): string {
  const lower = host.toLowerCase();
  if (BLOCKED_HOSTS.some((blocked) => lower === blocked || lower.endsWith(`.${blocked}`))) return "blocked";
  if (lower.endsWith(".gov") || lower.endsWith(".edu") || lower.includes("nih.gov")) return "authoritative";
  if (lower.includes("wikipedia.org")) return "reference";
  return "web";
}

function tokenizeQuery(query: string): string[] { return Array.from(new Set(query.toLowerCase().split(/[^a-z0-9]+/i).filter((part) => part.length >= 3 && !STOP_WORDS.has(part)))); }

function countOccurrences(haystack: string, needle: string): number {
  if (!needle) return 0;
  let count = 0, start = 0;
  while (true) {
    const idx = haystack.indexOf(needle, start);
    if (idx === -1) break;
    count += 1;
    start = idx + needle.length;
  }
  return count;
}

function scoreText(text: string, tokens: string[]): number {
  const normalized = text.toLowerCase();
  let score = 0;
  for (const token of tokens) score += countOccurrences(normalized, token);
  return score;
}

function isBoilerplateLine(line: string): boolean {
  const lower = line.toLowerCase();
  if (!lower || lower.length <= 2) return true;
  if (BOILERPLATE_LINK_TEXT.some((part) => lower === part || lower.includes(` ${part}`) || lower.startsWith(`${part} `))) return true;
  if (/(cookie|privacy|terms|subscribe|login)/i.test(lower) && lower.length < 90) return true;
  return false;
}

function stripTags(html: string): string {
  let t = html;
  t = t.replace(/<!--[\s\S]*?-->/g, " ");
  t = t.replace(/<script[\s\S]*?<\/script>/gi, " ");
  t = t.replace(/<style[\s\S]*?<\/style>/gi, " ");
  t = t.replace(/<noscript[\s\S]*?<\/noscript>/gi, " ");
  t = t.replace(/<svg[\s\S]*?<\/svg>/gi, " ");
  t = t.replace(/<iframe[\s\S]*?<\/iframe>/gi, " ");
  t = t.replace(/<nav[\s\S]*?<\/nav>/gi, " ");
  t = t.replace(/<footer[\s\S]*?<\/footer>/gi, " ");
  t = t.replace(/<header[\s\S]*?<\/header>/gi, " ");
  t = t.replace(/<form[\s\S]*?<\/form>/gi, " ");
  t = t.replace(/<aside[\s\S]*?<\/aside>/gi, " ");
  t = t.replace(/<br\s*\/?>/gi, "\n");
  t = t.replace(/<\/(p|div|li|section|article|tr|table|blockquote|h[1-6])>/gi, "\n");
  t = t.replace(/<li\b[^>]*>/gi, "• ");
  t = t.replace(/<h[1-6]\b[^>]*>/gi, "\n");
  t = t.replace(/<[^>]+>/g, " ");
  t = decodeHtmlEntities(t);
  const lines = t.replace(/\r/g, "").replace(/\u00a0/g, " ").split("\n").map(l => normalizeWhitespace(l)).filter(l => l.length > 0 && !isBoilerplateLine(l));
  const seen = new Set<string>();
  return lines.filter(l => { const k = l.toLowerCase(); if (seen.has(k)) return false; seen.add(k); return true; }).join("\n").replace(/\n{3,}/g, "\n\n").trim();
}

function extractMetaDescription(html: string): string {
  const match = html.match(/<meta[^>]+name=["']description["'][^>]*content=["']([^"']+)["'][^>]*>/i) ?? html.match(/<meta[^>]+property=["']og:description["'][^>]*content=["']([^"']+)["'][^>]*>/i);
  return match ? normalizeWhitespace(stripTags(match[1])) : "";
}

function extractTitle(html: string): string {
  const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
  if (titleMatch) return normalizeWhitespace(stripTags(titleMatch[1]));
  const ogTitle = html.match(/<meta[^>]+property=["']og:title["'][^>]*content=["']([^"']+)["'][^>]*>/i);
  return ogTitle ? normalizeWhitespace(stripTags(ogTitle[1])) : "";
}

function extractCanonical(html: string): string {
  const match = html.match(/<link[^>]+rel=["']canonical["'][^>]*href=["']([^"']+)["'][^>]*>/i);
  return match ? match[1].trim() : "";
}

function extractAnchors(html: string, baseUrl: string, limit: number): LinkCandidate[] {
  const anchors: LinkCandidate[] = [];
  const seen = new Set<string>();
  const anchorRegex = /<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi;
  let match: RegExpExecArray | null;
  while ((match = anchorRegex.exec(html)) !== null) {
    const resolved = resolveUrl(match[1], baseUrl);
    if (!resolved) continue;
    const normalized = normalizeUrl(resolved);
    if (seen.has(normalized) || !/^https?:/i.test(normalized) || isBlockedUrl(normalized)) continue;
    const text = normalizeWhitespace(stripTags(match[2]));
    const host = getHostname(normalized);
    let score = 0;
    if (!text || BOILERPLATE_LINK_TEXT.some((part) => text.toLowerCase() === part)) score -= 3;
    score += scoreDomain(host);
    if (getRootDomain(host) === getRootDomain(getHostname(baseUrl))) score += 4;
    anchors.push({ url: normalized, text, score });
    seen.add(normalized);
    if (anchors.length >= limit * 2) break;
  }
  return anchors.sort((a, b) => b.score - a.score).slice(0, limit);
}

function extractUsefulSentences(text: string, tokens: string[], maxSentences = 4): string[] {
  const sentences = normalizeWhitespace(text).split(/(?<=[.!?])\s+/).map((part) => part.trim()).filter(Boolean);
  const scored = sentences.map((sentence) => ({ sentence, score: scoreText(sentence, tokens) + Math.min(3, sentence.length / 120) })).filter((item) => item.score > 0).sort((a, b) => b.score - a.score).slice(0, maxSentences).map((item) => trimString(item.sentence, 300));
  return scored;
}

async function fetchText(url: string, timeoutMs = DEFAULT_TIMEOUT_MS): Promise<{ text: string; contentType: string }> {
  const controller = new AbortController();
  const timeout = setTimeout(() => controller.abort(new Error("Request timed out")), timeoutMs);
  try {
    const response = await fetch(url, { signal: controller.signal, redirect: "follow", headers: { "user-agent": USER_AGENT, accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" } });
    if (!response.ok) throw new Error(`Request failed with status ${response.status} ${response.statusText}`);
    const contentType = (response.headers.get("content-type") ?? "").toLowerCase();
    if (!contentType.includes("text/html") && !contentType.includes("application/xhtml") && !contentType.includes("text/plain")) throw new Error(`Unsupported content type: ${contentType || "unknown"}`);
    return { text: await response.text(), contentType };
  } finally { clearTimeout(timeout); }
}

function decodeDuckDuckGoUrl(href: string): string { try { const parsed = new URL(href, "https://duckduckgo.com"); const uddg = parsed.searchParams.get("uddg"); if (uddg) return decodeURIComponent(uddg); return parsed.toString(); } catch { return href; } }
function parseDuckDuckGoResults(html: string): SearchResult[] {
  const results: SearchResult[] = [];
  const blocks = html.split(/<div class="result\b/gi);
  for (const block of blocks.slice(1)) {
    const linkMatch = block.match(/<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i) ?? block.match(/<a[^>]*href="([^"]+)"[^>]*class="[^"]*result__a[^"]*"[^>]*>([\s\S]*?)<\/a>/i);
    if (!linkMatch) continue;
    const rawUrl = decodeDuckDuckGoUrl(linkMatch[1]);
    const title = normalizeWhitespace(stripTags(linkMatch[2]));
    if (!rawUrl || !title || isBlockedUrl(rawUrl)) continue;
    const snippetMatch = block.match(/class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/(?:a|div|span)>/i) ?? block.match(/class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/div>/i);
    const snippet = snippetMatch ? normalizeWhitespace(stripTags(snippetMatch[1])) : "";
    let displayedUrl = rawUrl;
    try { const parsed = new URL(rawUrl); displayedUrl = `${parsed.hostname.replace(/^www\./i, "")}${parsed.pathname}`; } catch {}
    results.push({ title, url: rawUrl, displayedUrl, snippet, source: "duckduckgo" });
  }
  return results;
}

function parseDuckDuckGoLiteResults(html: string): SearchResult[] {
  const results: SearchResult[] = [];
  const anchorRegex = /<a[^>]*rel="nofollow"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
  let match: RegExpExecArray | null;
  while ((match = anchorRegex.exec(html)) !== null) {
    const rawUrl = decodeDuckDuckGoUrl(match[1]);
    const title = normalizeWhitespace(stripTags(match[2]));
    if (!rawUrl || !title || isBlockedUrl(rawUrl)) continue;
    let displayedUrl = rawUrl;
    try { const parsed = new URL(rawUrl); displayedUrl = `${parsed.hostname.replace(/^www\./i, "")}${parsed.pathname}`; } catch {}
    results.push({ title, url: rawUrl, displayedUrl, snippet: "", source: "duckduckgo-lite" });
  }
  return results;
}

async function searchDuckDuckGo(query: string, limit: number): Promise<SearchResult[]> {
  const urls = [`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`, `https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(query)}`];
  for (const searchUrl of urls) {
    try {
      const html = (await fetchText(searchUrl)).text;
      const parsed = searchUrl.includes("/lite/") ? parseDuckDuckGoLiteResults(html) : parseDuckDuckGoResults(html);
      if (parsed.length > 0) return parsed.slice(0, limit);
    } catch {}
  }
  return [];
}

function uniqueByUrl<T extends { url: string }>(items: T[]): T[] {
  const seen = new Set<string>();
  const out: T[] = [];
  for (const item of items) {
    const key = normalizeUrl(item.url);
    if (seen.has(key)) continue;
    seen.add(key);
    out.push(item);
  }
  return out;
}

function uniqueStrings(items: string[]): string[] { return Array.from(new Set(items.map((item) => item.trim()).filter(Boolean))); }
function buildBaseSearchQueries(query: string, focus?: string): string[] {
  const cleanQuery = normalizeWhitespace(query);
  const tokens = tokenizeQuery(cleanQuery);
  const rootPhrase = tokens.slice(0, Math.min(4, tokens.length)).join(" ");
  const variations = [cleanQuery, `${cleanQuery} official`, `${cleanQuery} documentation`, `${cleanQuery} research`, `${cleanQuery} analysis`, `${cleanQuery} review`, `${cleanQuery} key facts`];
  if (focus?.trim()) variations.unshift(`${cleanQuery} ${focus.trim()}`);
  if (rootPhrase && rootPhrase !== cleanQuery.toLowerCase()) variations.push(rootPhrase);
  if (tokens.length >= 3) variations.push(`"${tokens.slice(0, 3).join(" ")}"`);
  return uniqueStrings(variations);
}

function scoreSearchResult(result: SearchResult, query: string): number {
  const tokens = tokenizeQuery(query);
  const host = getHostname(result.url);
  const titleScore = scoreText(result.title, tokens) * 6;
  const snippetScore = scoreText(result.snippet, tokens) * 3;
  const domainScore = scoreDomain(host);
  const lengthScore = Math.min(3, Math.floor((result.snippet.length || result.title.length) / 80));
  return titleScore + snippetScore + domainScore + lengthScore;
}

function rankSearchResults(results: SearchResult[], query: string, limit: number): SearchResult[] {
  const scored = uniqueByUrl(results).filter((item) => !isBlockedUrl(item.url)).map((item) => ({ item, score: scoreSearchResult(item, query) })).sort((a, b) => b.score - a.score);
  const out: SearchResult[] = [];
  const seenDomains = new Map<string, number>();
  for (const entry of scored) {
    const host = getRootDomain(getHostname(entry.item.url));
    const count = seenDomains.get(host) ?? 0;
    if (count >= 2) continue;
    seenDomains.set(host, count + 1);
    out.push(entry.item);
    if (out.length >= limit) break;
  }
  return out;
}

function pickTopPages(pages: ResearchPage[], limit: number): ResearchPage[] {
  const ranked = [...pages].sort((a, b) => b.relevanceScore - a.relevanceScore);
  const out: ResearchPage[] = [];
  const seenDomains = new Map<string, number>();
  for (const page of ranked) {
    const host = getRootDomain(getHostname(page.url));
    const count = seenDomains.get(host) ?? 0;
    if (count >= 2) continue;
    seenDomains.set(host, count + 1);
    out.push(page);
    if (out.length >= limit) break;
  }
  return out;
}

function buildLinkCandidates(html: string, baseUrl: string, queryTokens: string[], maxLinks: number): LinkCandidate[] {
  return extractAnchors(html, baseUrl, maxLinks).map((link) => {
    const host = getHostname(link.url);
    let score = link.score;
    score += scoreText(link.text, queryTokens) * 2;
    score += scoreText(link.url, queryTokens);
    if (/(docs|guide|help|support|manual|paper|study|report|blog|news|research|about|faq)/i.test(link.text + " " + link.url)) score += 2;
    return { ...link, score: score + scoreDomain(host) };
  }).sort((a, b) => b.score - a.score);
}

function buildExcerpt(text: string, maxChars: number): string { return trimString(normalizeWhitespace(text), maxChars); }
function summarizePage(pageText: string, queryTokens: string[], maxSentences = 3): string {
  const snippets = extractUsefulSentences(pageText, queryTokens, maxSentences);
  if (snippets.length > 0) return snippets.join(" ");
  return trimString(pageText, 700);
}

async function crawlUrl(url: string, depth: number, queryTokens: string[], maxChars = DEFAULT_MAX_CHARS_PER_PAGE, maxLinkCount = MAX_LINKS_PER_PAGE): Promise<{ page: ResearchPage; links: LinkCandidate[] }> {
  const { text: html } = await fetchText(url);
  const title = extractTitle(html) || getHostname(url) || url;
  const description = extractMetaDescription(html);
  const canonical = extractCanonical(html);
  const content = trimString(stripTags(html), maxChars);
  const finalUrl = canonical && !isBlockedUrl(canonical) ? normalizeUrl(resolveUrl(canonical, url) ?? url) : normalizeUrl(url);
  const links = buildLinkCandidates(html, finalUrl, queryTokens, maxLinkCount);
  const sourceHost = getHostname(finalUrl);
  const sourceType = sourceTypeForHost(sourceHost);
  const excerpt = buildExcerpt(description || summarizePage(content, queryTokens), 700);
  const relevanceScore = scoreText(`${title}\n${description}\n${content}`, queryTokens) * 2 + scoreDomain(sourceHost) + Math.min(4, Math.floor(content.split(/\s+/).filter(Boolean).length / 250));
  return { page: { url: finalUrl, title, description, content, wordCount: content ? content.split(/\s+/).filter(Boolean).length : 0, links: links.map((link) => link.url), excerpt, relevanceScore, depth, sourceType }, links };
}

function buildPlannerPrompt(query: string, focus: string | undefined, pages: ResearchPage[], maxQueries: number): string {
  const preview = pages.slice(0, 6).map((page, index) => [`Source ${index + 1}`, `Title: ${page.title}`, `URL: ${page.url}`, `Type: ${page.sourceType}`, `Snippet: ${trimString(page.excerpt || page.description || page.content, 280)}`].join("\n")).join("\n\n");
  return [`You are a local deep-research planner.`, `Task: ${query}`, focus ? `Focus: ${focus}` : "", `You must return ONLY valid JSON with this shape:`, `{ "followUpQueries": string[], "gaps": string[], "contradictions": string[] }`, `Limit followUpQueries to at most ${maxQueries}.`, `Prefer queries that test evidence, fill missing details, or inspect authoritative sources.`, `Avoid social media, video sites, and shallow listicle search terms.`, `Evidence packet:`, preview || "(no sources yet)"].filter(Boolean).join("\n\n");
}

function buildSynthesisPrompt(query: string, focus: string | undefined, pages: ResearchPage[], searchResults: SearchResult[]): string {
  const sources = pages.slice(0, 10).map((page, index) => [`Source ${index + 1}`, `Title: ${page.title}`, `URL: ${page.url}`, `Type: ${page.sourceType}`, `Excerpt: ${page.excerpt}`, `Relevant sentences: ${trimString(page.content, 1800)}`].join("\n")).join("\n\n");
  const trail = searchResults.slice(0, 12).map((result, index) => `${index + 1}. ${result.title}\n   ${result.url}\n   ${trimString(result.snippet, 220)}`).join("\n");
  return [`You are a careful deep-research analyst.`, `Question: ${query}`, focus ? `Focus: ${focus}` : "", `Write a concise markdown report using only the evidence packet below.`, `Be explicit about uncertainty and conflicts.`, `Every important claim should be tied to one or more source numbers like [1] or [1][3].`, `Return sections in this order:`, `# Answer`, `# Key findings`, `# Conflicts / caveats`, `# Sources`, `Evidence packet:`, sources || "(no crawled sources)", `Search trail:`, trail || "(no search trail)"].filter(Boolean).join("\n\n");
}

function buildFallbackReport(query: string, focus: string | undefined, pages: ResearchPage[], searchResults: SearchResult[], gaps: string[] = [], contradictions: string[] = []): string {
  const topPages = pages.slice(0, 10);
  const sourceLines = topPages.length ? topPages.map((page, index) => `${index + 1}. ${page.title} — ${page.url}\n   ${trimString(page.excerpt || page.description || page.content, 220)}`).join("\n") : "No pages could be crawled.";
  const searchTrail = searchResults.slice(0, 12).map((r, index) => `${index + 1}. ${r.title} — ${r.url}`);
  const keyFindings = topPages.slice(0, 6).map((page, index) => `- [${index + 1}] ${page.title}: ${trimString(page.excerpt || page.description || page.content, 180)}`);
  return [`# Answer`, `I gathered ${topPages.length} crawled sources${focus ? ` for the focus area "${focus}"` : ""}. This is a best-effort local synthesis for: ${query}.`, ``, `# Key findings`, ...(keyFindings.length ? keyFindings : ["- No strong findings extracted yet."]), ``, `# Conflicts / caveats`, ...(contradictions.length ? contradictions.map((item) => `- ${item}`) : ["- No clear contradictions were detected automatically."]), ...(gaps.length ? gaps.map((item) => `- ${item}`) : ["- The search may need more targeted follow-up queries."]), ``, `# Sources`, sourceLines, ``, `# Search trail`, ...(searchTrail.length ? searchTrail : ["No search results were returned."])].join("\n");
}

function parseJsonLoose(text: string): any | null {
  const trimmed = text.trim();
  const fenced = trimmed.match(/```json\s*([\s\S]*?)```/i);
  const candidate = fenced ? fenced[1].trim() : trimmed;
  try { return JSON.parse(candidate); } catch {
    const firstBrace = candidate.indexOf("{");
    const lastBrace = candidate.lastIndexOf("}");
    if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) { try { return JSON.parse(candidate.slice(firstBrace, lastBrace + 1)); } catch {} }
    return null;
  }
}

async function getLoadedModel(modelId?: string): Promise<{ identifier: string; model: any } | null> {
  const loaded = await getClient().llm.listLoaded();
  if (loaded.length === 0) return null;
  if (modelId) {
    const match = loaded.find((m: any) => m.identifier === modelId);
    if (!match) return null;
    return { identifier: match.identifier ?? modelId, model: match };
  }
  const first = loaded[0];
  if (!first?.identifier) return null;
  return { identifier: first.identifier, model: first };
}

async function completeWithModel(modelId: string | undefined, prompt: string, maxTokens: number, temperature: number): Promise<{ text: string | null; modelUsed: string | null }> {
  const loaded = await getLoadedModel(modelId).catch(() => null);
  if (!loaded) return { text: null, modelUsed: null };
  try {
    const prediction: any = await loaded.model.complete(prompt, { maxTokens, temperature });
    let text = "";
    for await (const chunk of prediction) text += chunk.content ?? "";
    return { text: text.trim(), modelUsed: loaded.identifier };
  } catch { return { text: null, modelUsed: null }; }
}

function makeHeuristicFollowUps(query: string, focus: string | undefined, pages: ResearchPage[], round: number): string[] {
  const followUps = new Set<string>();
  const tokens = tokenizeQuery(query);
  const topPages = pages.slice(0, 4);
  for (const page of topPages) {
    const keyBits = page.title.split(/[^a-z0-9]+/i).map((part) => part.trim()).filter((part) => part.length >= 4).slice(0, 4).join(" ");
    if (keyBits) followUps.add(`${query} ${keyBits}`.trim());
    const host = getHostname(page.url);
    if (host && !BLOCKED_HOSTS.some((blocked) => host === blocked || host.endsWith(`.${blocked}`))) followUps.add(`${query} site:${host}`);
  }
  if (focus) followUps.add(`${query} ${focus}`);
  if (tokens.length > 0) { followUps.add(`${query} official`); followUps.add(`${query} documentation`); followUps.add(`${query} key facts`); }
  if (round > 1) { followUps.add(`${query} controversy`); followUps.add(`${query} analysis`); followUps.add(`${query} evidence`); }
  return Array.from(followUps).slice(0, 8);
}

async function runDeepResearch(params: DeepResearchParams, ctx: { status: (text: string) => void; warn: (text: string) => void; signal: AbortSignal }): Promise<any> {
  const query = normalizeWhitespace(params.query);
  const focus = params.focus?.trim() || undefined;
  const maxRounds = clampInt(params.maxRounds, DEFAULT_MAX_ROUNDS, 1, 4);
  const maxSearchesPerRound = clampInt(params.maxSearchesPerRound, DEFAULT_MAX_SEARCHES_PER_ROUND, 1, 8);
  const maxResultsPerSearch = clampInt(params.maxResultsPerSearch, DEFAULT_MAX_RESULTS_PER_SEARCH, 1, 10);
  const maxPages = clampInt(params.maxPages, DEFAULT_MAX_PAGES, 1, 24);
  const maxDepth = clampInt(params.maxDepth, DEFAULT_MAX_DEPTH, 1, 3);
  const maxCharsPerPage = clampInt(params.maxCharsPerPage, DEFAULT_MAX_CHARS_PER_PAGE, 3000, 40000);
  const maxTokens = clampInt(params.maxTokens, 650, 64, 1500);
  const temperature = clampFloat(params.temperature, 0.2, 0, 2);

  const queryTokens = tokenizeQuery(`${query} ${focus ?? ""}`);
  const issuedQueries = new Set<string>();
  const visited = new Set<string>();
  const allSearchResults: SearchResult[] = [];
  const allPages: ResearchPage[] = [];
  const queue: { url: string; depth: number; score: number }[] = [];

  let frontier = buildBaseSearchQueries(query, focus).slice(0, maxSearchesPerRound);
  ctx.status(`Planning research for ${query}`);

  for (let round = 1; round <= maxRounds; round += 1) {
    if (ctx.signal.aborted) throw new Error("Research was aborted");
    const roundQueries = frontier.map((item) => item.trim()).filter((item) => item && !issuedQueries.has(item.toLowerCase()));
    roundQueries.forEach((item) => issuedQueries.add(item.toLowerCase()));
    if (roundQueries.length === 0) break;
    ctx.status(`Round ${round}/${maxRounds}: searching ${roundQueries.length} queries`);
    for (const searchQuery of roundQueries.slice(0, maxSearchesPerRound)) {
      if (ctx.signal.aborted) throw new Error("Research was aborted");
      ctx.status(`Searching: ${searchQuery}`);
      let results = await searchDuckDuckGo(searchQuery, maxResultsPerSearch);
      results = rankSearchResults(results, `${query} ${focus ?? ""}`, maxResultsPerSearch);
      allSearchResults.push(...results);
      for (const result of results) queue.push({ url: normalizeUrl(result.url), depth: 0, score: scoreSearchResult(result, query) });
    }
    queue.sort((a, b) => b.score - a.score);
    const nextQueue: typeof queue = [];
    while (queue.length > 0) {
      if (allPages.length >= maxPages) break;
      if (ctx.signal.aborted) throw new Error("Research was aborted");
      const current = queue.shift()!;
      const url = normalizeUrl(current.url);
      if (!url || visited.has(url) || isBlockedUrl(url)) continue;
      if (current.depth > maxDepth) continue;
      visited.add(url);
      ctx.status(`Crawling ${allPages.length + 1}/${maxPages}: ${getHostname(url) || url}`);
      try {
        const { page, links } = await crawlUrl(url, current.depth, queryTokens, maxCharsPerPage, MAX_LINKS_PER_PAGE);
        page.relevanceScore = scoreText(`${page.title}\n${page.description}\n${page.content}`, queryTokens) * 2 + scoreDomain(getHostname(page.url)) + Math.min(4, Math.floor(page.wordCount / 250));
        allPages.push(page);
        if (current.depth < maxDepth) {
          for (const link of links) {
            if (visited.has(link.url) || isBlockedUrl(link.url)) continue;
            nextQueue.push({ url: link.url, depth: current.depth + 1, score: link.score });
          }
        }
      } catch (error) { ctx.warn(`Could not crawl ${url}: ${error instanceof Error ? error.message : String(error)}`); }
    }
    queue.push(...nextQueue.sort((a, b) => b.score - a.score));
    queue.sort((a, b) => b.score - a.score);
    const rankedPages = pickTopPages(allPages, Math.min(maxPages, 10));
    const modelInfo = await getLoadedModel(params.modelId).catch(() => null);
    let followUps: string[] = [];
    if (modelInfo) {
      const plannerPrompt = buildPlannerPrompt(query, focus, rankedPages, 8);
      const planner = await completeWithModel(params.modelId, plannerPrompt, 320, 0.15);
      const parsed = planner.text ? parseJsonLoose(planner.text) : null;
      if (parsed && Array.isArray(parsed.followUpQueries)) followUps = parsed.followUpQueries.map((item: any) => String(item).trim()).filter(Boolean);
    }
    if (followUps.length === 0) followUps = makeHeuristicFollowUps(query, focus, rankedPages, round);
    frontier = uniqueStrings(followUps).filter((item) => !issuedQueries.has(item.toLowerCase())).slice(0, maxSearchesPerRound);
    if (frontier.length === 0) break;
  }

  const finalPages = pickTopPages(allPages, maxPages);
  const uniqueSearchResults = rankSearchResults(allSearchResults, `${query} ${focus ?? ""}`, Math.min(allSearchResults.length, maxPages * 2));
  let contradictions: string[] = [];
  let gaps: string[] = [];
  let reportMarkdown = "";
  let modelUsed: string | null = null;
  const synthesisPrompt = buildSynthesisPrompt(query, focus, finalPages, uniqueSearchResults);
  const synthesis = await completeWithModel(params.modelId, synthesisPrompt, maxTokens, temperature);
  if (synthesis.text) { reportMarkdown = synthesis.text; modelUsed = synthesis.modelUsed; } else { reportMarkdown = buildFallbackReport(query, focus, finalPages, uniqueSearchResults); }
  const auditEvidence = finalPages.slice(0, 8).map((page) => `${page.title}: ${trimString(page.excerpt || page.description || page.content, 240)}`).join("\n");
  const auditPrompt = [`You are auditing a research packet for missing evidence, contradictions, and reliability concerns.`, `Question: ${query}`, focus ? `Focus: ${focus}` : "", `Evidence:`, auditEvidence || "(none)", `Return ONLY valid JSON with keys contradictions and gaps, both arrays of concise strings.`].filter(Boolean).join("\n\n");
  const audit = await completeWithModel(params.modelId, auditPrompt, 220, 0.1);
  const parsedAudit = audit.text ? parseJsonLoose(audit.text) : null;
  if (parsedAudit?.contradictions && Array.isArray(parsedAudit.contradictions)) contradictions = parsedAudit.contradictions.map((item: any) => String(item).trim()).filter(Boolean).slice(0, 8);
  if (parsedAudit?.gaps && Array.isArray(parsedAudit.gaps)) gaps = parsedAudit.gaps.map((item: any) => String(item).trim()).filter(Boolean).slice(0, 8);
  if (!reportMarkdown || reportMarkdown === buildFallbackReport(query, focus, finalPages, uniqueSearchResults)) reportMarkdown = buildFallbackReport(query, focus, finalPages, uniqueSearchResults, gaps, contradictions);
  if (audit.modelUsed && !modelUsed) modelUsed = audit.modelUsed;
  const sources = finalPages.map((page, index) => ({ rank: index + 1, title: page.title, url: page.url, domain: getHostname(page.url), sourceType: page.sourceType, excerpt: page.excerpt, wordCount: page.wordCount, relevanceScore: page.relevanceScore, depth: page.depth }));
  return { query, focus: focus ?? null, modelUsed, rounds: maxRounds, searchQueries: Array.from(issuedQueries), searchResults: uniqueSearchResults, sources, contradictions, gaps, reportMarkdown };
}

const deepResearchTool = tool({
  name: "deepResearch",
  description: "Run a local autonomous deep-research workflow: it plans searches, crawls pages recursively, filters social sites, checks for gaps and contradictions, and returns a synthesized markdown report. No paid APIs are used.",
  parameters: {
    query: z.string().min(1),
    focus: z.string().optional(),
    maxRounds: z.number().int().min(1).max(4).default(DEFAULT_MAX_ROUNDS),
    maxSearchesPerRound: z.number().int().min(1).max(8).default(DEFAULT_MAX_SEARCHES_PER_ROUND),
    maxResultsPerSearch: z.number().int().min(1).max(10).default(DEFAULT_MAX_RESULTS_PER_SEARCH),
    maxPages: z.number().int().min(1).max(24).default(DEFAULT_MAX_PAGES),
    maxDepth: z.number().int().min(1).max(3).default(DEFAULT_MAX_DEPTH),
    maxCharsPerPage: z.number().int().min(3000).max(40000).default(DEFAULT_MAX_CHARS_PER_PAGE),
    modelId: z.string().optional(),
    maxTokens: z.number().int().min(64).max(1500).default(650),
    temperature: z.number().min(0).max(2).default(0.2)
  },
  implementation: async (params: DeepResearchParams, ctx) => { return await runDeepResearch(params, ctx); }
});

export async function main(pluginContext: PluginContext) { pluginContext.withToolsProvider(async () => [deepResearchTool]); }
deep-search

deep-search