src / duckduckgoClient.ts
import type { SearchHit } from "./types";
export class DuckDuckGoError extends Error {
constructor(message: string) {
super(message);
this.name = "DuckDuckGoError";
}
}
export async function duckduckgoSearch(
query: string,
maxResults: number,
timeoutMs: number,
): Promise<SearchHit[]> {
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), timeoutMs);
let res: Response;
try {
res = await fetch("https://html.duckduckgo.com/html/", {
method: "POST",
headers: {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "lms-plugin-web-search/1.0 (+https://lmstudio.ai/zexigh/web-search)",
},
body: new URLSearchParams({ q: query }).toString(),
signal: ctrl.signal,
});
} catch (e: unknown) {
if (e instanceof Error && e.name === "AbortError") {
throw new DuckDuckGoError(`DuckDuckGo did not respond within ${timeoutMs} ms.`);
}
throw new DuckDuckGoError(`Could not reach DuckDuckGo: ${e instanceof Error ? e.message : String(e)}`);
} finally {
clearTimeout(timer);
}
if (!res.ok) {
throw new DuckDuckGoError(`DuckDuckGo returned HTTP ${res.status} ${res.statusText}.`);
}
const html = await res.text();
if (/anomaly|unusual traffic|captcha/i.test(html) && !/class="result__a"/.test(html)) {
throw new DuckDuckGoError(
"DuckDuckGo returned an anti-bot page instead of search results. Try again later or switch to the SearXNG backend.",
);
}
return parseDuckDuckGoHtml(html, maxResults);
}
function parseDuckDuckGoHtml(html: string, maxResults: number): SearchHit[] {
const results: SearchHit[] = [];
const marker = 'class="result__a"';
const snippetMarker = 'class="result__snippet"';
let pos = 0;
while (results.length < maxResults) {
const markerPos = html.indexOf(marker, pos);
if (markerPos === -1) break;
const before = html.slice(0, markerPos);
const hrefStart = before.lastIndexOf('href="');
if (hrefStart === -1) {
pos = markerPos + marker.length;
continue;
}
const hrefEnd = html.indexOf('"', hrefStart + 6);
if (hrefEnd === -1) {
pos = markerPos + marker.length;
continue;
}
const rawUrl = html.slice(hrefStart + 6, hrefEnd);
const url = rawUrl.includes("uddg=") ? extractDdgUrl(rawUrl) : rawUrl;
if (!url.startsWith("http")) {
pos = markerPos + marker.length;
continue;
}
const afterMarker = html.slice(markerPos + marker.length);
const gt = afterMarker.indexOf(">");
let title = "";
if (gt !== -1) {
const afterGt = afterMarker.slice(gt + 1);
const endA = afterGt.indexOf("</a>");
if (endA !== -1) title = stripTags(afterGt.slice(0, endA)).trim();
}
let snippet = "";
const snippetPos = html.indexOf(snippetMarker, markerPos);
if (snippetPos !== -1) {
const afterSnippet = html.slice(snippetPos + snippetMarker.length);
const sgt = afterSnippet.indexOf(">");
if (sgt !== -1) {
const content = afterSnippet.slice(sgt + 1);
const endA = content.indexOf("</a>");
if (endA !== -1) snippet = stripTags(content.slice(0, endA)).trim();
}
}
if (title && url) {
let host = "";
try {
host = new URL(url).hostname.replace(/^www\./, "");
} catch {
host = "";
}
results.push({ title, url, content: snippet, source: host });
}
pos = markerPos + marker.length;
}
return results;
}
function extractDdgUrl(raw: string): string {
const start = raw.indexOf("uddg=");
if (start === -1) return raw;
const tail = raw.slice(start + 5);
const amp = tail.indexOf("&");
const encoded = amp === -1 ? tail : tail.slice(0, amp);
try {
return decodeURIComponent(encoded.replace(/\+/g, " "));
} catch {
return raw;
}
}
function stripTags(s: string): string {
let out = "";
let inTag = false;
for (const c of s) {
if (c === "<") inTag = true;
else if (c === ">") inTag = false;
else if (!inTag) out += c;
}
return out
.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(/'/g, "'")
.replace(/ /g, " ");
}