"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.toolsProvider = toolsProvider;
const sdk_1 = require("@lmstudio/sdk");
const zod_1 = require("zod");
const configSchematics_1 = require("./configSchematics");
const engines_1 = require("./engines");
const extract_1 = require("./extract");
const ranking_1 = require("./ranking");
const cache_1 = require("./cache");
const weather_1 = require("./weather");
// Threshold below which we tell the model the results look irrelevant.
const LOW_RELEVANCE_THRESHOLD = 0.35;
function relevanceNote(ranked, query) {
    if (ranked.length === 0) {
        return `No results passed the noise filter for "${query}". Try rephrasing or a different engine. If this is a weather query, use get_weather instead.`;
    }
    const top = ranked[0].score;
    if (top < LOW_RELEVANCE_THRESHOLD) {
        return `Top result score is low (${top.toFixed(2)} < ${LOW_RELEVANCE_THRESHOLD}). Snippets may not actually answer "${query}" — consider rephrasing, switching engines, or using a specialized tool (e.g. get_weather for weather).`;
    }
    return undefined;
}
// Engine ids inlined into descriptions so the model can pick a specific
// engine without first calling list_search_engines.
const ENGINE_HINT = "Available engines: duckduckgo, brave, bing, qwant, ecosia, startpage, " +
    "metager, wikipedia, arxiv, reddit, stackoverflow, github, google-scholar, " +
    "devdocs. Pick wikipedia for encyclopedic summaries, arxiv for papers, " +
    "github for code/repos, stackoverflow for programming Q&A, reddit for " +
    "discussion. For everything else, duckduckgo / brave / bing are good general engines.";
// Cache lives per worker process (re-created on plugin reload).
const searchCache = new cache_1.TtlLruCache(100, 5 * 60 * 1000);
const fullSearchCache = new cache_1.TtlLruCache(50, 5 * 60 * 1000);
async function toolsProvider(ctl) {
    const config = ctl.getPluginConfig(configSchematics_1.configSchematics);
    const globalConfig = ctl.getPluginConfig(configSchematics_1.globalConfigSchematics);
    // Master kill-switch.
    if (!config.get("searchEnabled"))
        return [];
    const mode = config.get("mode"); // "single" | "multi"
    const topK = config.get("topK");
    const snippetMaxChars = config.get("snippetMaxChars");
    const includeScoreBreakdown = config.get("includeScoreBreakdown");
    const cacheTtlMs = config.get("cacheTtlSec") * 1000;
    function buildEngineOpts(max) {
        const cfgMax = config.get("maxResultsPerEngine");
        const maxResults = Math.max(1, Math.min(20, max ?? cfgMax));
        return {
            userAgent: globalConfig.get("userAgent"),
            timeoutMs: globalConfig.get("requestTimeoutMs"),
            maxResults,
        };
    }
    function buildFetchOpts(query) {
        return {
            userAgent: globalConfig.get("userAgent"),
            timeoutMs: globalConfig.get("pageFetchTimeoutMs"),
            maxContentLength: globalConfig.get("maxContentLength"),
            query,
        };
    }
    function enabledMultiEngines() {
        return engines_1.ALL_ENGINE_IDS.filter((id) => {
            const key = (0, configSchematics_1.engineToggleKey)(id);
            return Boolean(config.get(key));
        });
    }
    async function runSearch(args, warn) {
        const opts = buildEngineOpts(args.maxPerEngine);
        // Single mode.
        if (mode === "single") {
            const id = (args.engine ?? config.get("singleEngine")).toLowerCase();
            if (!(0, engines_1.isEngine)(id)) {
                return {
                    enginesUsed: [],
                    unknownEngines: [id],
                    perEngineCounts: {},
                    raw: [],
                    fromCache: false,
                };
            }
            const cacheKey = `s|${id}|${opts.maxResults}|${args.query}`;
            const cached = cacheTtlMs > 0 ? searchCache.get(cacheKey) : undefined;
            if (cached) {
                return {
                    enginesUsed: [id],
                    unknownEngines: [],
                    perEngineCounts: { [id]: cached.length },
                    raw: cached,
                    fromCache: true,
                };
            }
            let results = [];
            try {
                results = await engines_1.ENGINES[id](args.query, opts);
            }
            catch (e) {
                warn(`Engine ${id} failed: ${e?.message ?? e}`);
            }
            if (cacheTtlMs > 0) {
                searchCache.set(cacheKey, results);
            }
            return {
                enginesUsed: [id],
                unknownEngines: [],
                perEngineCounts: { [id]: results.length },
                raw: results,
                fromCache: false,
            };
        }
        // Multi mode.
        const requested = args.engines && args.engines.length > 0
            ? args.engines.map((e) => e.toLowerCase())
            : enabledMultiEngines();
        const unknown = [];
        const valid = [];
        for (const id of requested)
            ((0, engines_1.isEngine)(id) ? valid : unknown).push(id);
        if (valid.length === 0) {
            return {
                enginesUsed: [],
                unknownEngines: unknown,
                perEngineCounts: {},
                raw: [],
                fromCache: false,
            };
        }
        const cacheKey = `m|${valid.slice().sort().join(",")}|${opts.maxResults}|${args.query}`;
        const cached = cacheTtlMs > 0 ? searchCache.get(cacheKey) : undefined;
        if (cached) {
            return {
                enginesUsed: valid,
                unknownEngines: unknown,
                perEngineCounts: valid.reduce((a, id) => {
                    a[id] = 0;
                    return a;
                }, {}),
                raw: cached,
                fromCache: true,
            };
        }
        const perEngine = await Promise.all(valid.map(async (id) => {
            try {
                return { id, results: await engines_1.ENGINES[id](args.query, opts) };
            }
            catch (e) {
                warn(`Engine ${id} failed: ${e?.message ?? e}`);
                return { id, results: [] };
            }
        }));
        // Round-robin interleave (ranking will re-sort, but this gives a fair
        // starting order before scoring breaks ties).
        const maxLen = Math.max(...perEngine.map((p) => p.results.length), 0);
        const interleaved = [];
        for (let i = 0; i < maxLen; i++) {
            for (const p of perEngine) {
                const r = p.results[i];
                if (r)
                    interleaved.push(r);
            }
        }
        if (cacheTtlMs > 0)
            searchCache.set(cacheKey, interleaved);
        return {
            enginesUsed: valid,
            unknownEngines: unknown,
            perEngineCounts: Object.fromEntries(perEngine.map((p) => [p.id, p.results.length])),
            raw: interleaved,
            fromCache: false,
        };
    }
    // ---------- tools ----------
    const listSearchEnginesTool = (0, sdk_1.tool)({
        name: "list_search_engines",
        description: (0, sdk_1.text) `
      Returns the list of supported search engine ids and the user's current
      mode (single/multi). You usually do NOT need to call this — the engine
      ids are: duckduckgo, brave, bing, qwant, ecosia, startpage, metager,
      wikipedia, arxiv, reddit, stackoverflow, github, google-scholar, devdocs.
    `,
        parameters: {},
        implementation: async () => ({
            engines: engines_1.ALL_ENGINE_IDS,
            mode,
            single_engine: config.get("singleEngine"),
            multi_engines: enabledMultiEngines(),
            hint: "Prefer web_search_full over web_search+fetch_page when you need page content. For Wikipedia summaries pass engine: 'wikipedia'.",
        }),
    });
    // ----- single-engine web_search -----
    const singleSearchTool = (0, sdk_1.tool)({
        name: "web_search",
        description: (0, sdk_1.text) `
      Searches the web on a single engine and returns the top-K best results
      after de-duplication and host-trust ranking. Each result has: title,
      snippet (trimmed), url, source, host, score (0–1, higher is better).

      Usage rules:
      • Pass \`engine: 'wikipedia'\` for encyclopedic summaries, 'arxiv' for
        papers, 'github' for code, 'stackoverflow' for programming Q&A.
      • If you need the actual page content (not just snippets), call
        \`web_search_full\` INSTEAD of \`web_search\` + \`fetch_page\`. One call.
      • Do NOT retry the same query verbatim if results are poor — change the
        wording or switch engines.

      ${ENGINE_HINT}
    `,
        parameters: {
            query: zod_1.z.string().min(1).describe("The search query."),
            engine: zod_1.z
                .string()
                .optional()
                .describe("Engine id (see description). Defaults to the user's configured engine."),
            max_results: zod_1.z
                .number()
                .int()
                .min(1)
                .max(25)
                .optional()
                .describe("How many ranked results to return after filtering."),
        },
        implementation: async ({ query, engine, max_results }, { warn }) => {
            const r = await runSearch({ query, engine, maxPerEngine: max_results }, warn);
            if (r.enginesUsed.length === 0) {
                return {
                    error: `Unknown engine: ${r.unknownEngines[0] ?? engine}`,
                    available: engines_1.ALL_ENGINE_IDS,
                };
            }
            const ranked = (0, ranking_1.rankAndFilter)(r.raw, {
                query,
                topK: max_results ?? topK,
                snippetMaxChars,
                includeScoreBreakdown,
            });
            const note = relevanceNote(ranked, query);
            return {
                query,
                engine: r.enginesUsed[0],
                from_cache: r.fromCache,
                returned: ranked.length,
                dropped_as_noise: r.raw.length - ranked.length,
                results: ranked,
                ...(note ? { note } : {}),
            };
        },
    });
    // ----- multi-engine web_search -----
    const multiSearchTool = (0, sdk_1.tool)({
        name: "web_search",
        description: (0, sdk_1.text) `
      Searches the web across MULTIPLE engines in parallel (the engines the
      user pre-selected) and returns a single de-duplicated, host-trust-ranked
      top-K list. Each result has: title, snippet (trimmed), url, source,
      host, score (0–1).

      Usage rules:
      • If you need page CONTENT, call \`web_search_full\` instead — it does
        the search + page fetch in one call.
      • If the user mentions a specific site (Wikipedia, GitHub, arXiv,
        Reddit, StackOverflow), pass \`engines: ["wikipedia"]\` etc. to
        restrict the search. Don't dump everything if they asked for one source.
      • Don't retry the same query verbatim — change wording or engines.

      ${ENGINE_HINT}
    `,
        parameters: {
            query: zod_1.z.string().min(1).describe("The search query."),
            engines: zod_1.z
                .array(zod_1.z.string())
                .optional()
                .describe("Optional override engine list. Use this to target one site (e.g. ['wikipedia']) when the user asked for it."),
            max_results: zod_1.z
                .number()
                .int()
                .min(1)
                .max(25)
                .optional()
                .describe("How many ranked results to return after filtering."),
            max_results_per_engine: zod_1.z
                .number()
                .int()
                .min(1)
                .max(20)
                .optional()
                .describe("Raw results requested per engine before ranking."),
        },
        implementation: async ({ query, engines, max_results, max_results_per_engine }, { warn }) => {
            const r = await runSearch({ query, engines, maxPerEngine: max_results_per_engine }, warn);
            if (r.enginesUsed.length === 0) {
                return {
                    error: "No engines enabled for Multi mode. Either pass `engines: [...]` explicitly or have the user tick engines in plugin settings.",
                    unknown_engines: r.unknownEngines,
                    available: engines_1.ALL_ENGINE_IDS,
                };
            }
            const ranked = (0, ranking_1.rankAndFilter)(r.raw, {
                query,
                topK: max_results ?? topK,
                snippetMaxChars,
                includeScoreBreakdown,
            });
            const note = relevanceNote(ranked, query);
            return {
                query,
                engines_used: r.enginesUsed,
                unknown_engines: r.unknownEngines,
                per_engine_counts: r.perEngineCounts,
                from_cache: r.fromCache,
                returned: ranked.length,
                dropped_as_noise: r.raw.length - ranked.length,
                results: ranked,
                ...(note ? { note } : {}),
            };
        },
    });
    const activeSearchTool = mode === "multi" ? multiSearchTool : singleSearchTool;
    // ----- fetch_page -----
    const fetchPageTool = (0, sdk_1.tool)({
        name: "fetch_page",
        description: (0, sdk_1.text) `
      Fetches a single URL and returns its extracted main text (title + body,
      with ads/nav/scripts stripped). Use this when you ALREADY have a URL
      and only need that one page's content.

      If you want to search AND read top results, use \`web_search_full\`
      instead — it combines both in one call and uses query-aware truncation
      to keep the most relevant paragraphs.
    `,
        parameters: {
            url: zod_1.z.string().url().describe("The full http(s) URL of the page to fetch."),
            query: zod_1.z
                .string()
                .optional()
                .describe("Optional. If provided, when the page is too long the extractor keeps paragraphs containing these query terms instead of just the first N chars."),
            max_content_length: zod_1.z
                .number()
                .int()
                .min(500)
                .max(200000)
                .optional()
                .describe("Hard cap on returned content length. Defaults to plugin setting."),
        },
        implementation: async ({ url, query, max_content_length }) => {
            const opts = buildFetchOpts(query);
            if (max_content_length)
                opts.maxContentLength = max_content_length;
            return await (0, extract_1.fetchAndExtract)(url, opts);
        },
    });
    // ----- web_search_full -----
    const fullSearchTool = (0, sdk_1.tool)({
        name: "web_search_full",
        description: (0, sdk_1.text) `
      ONE-STOP tool: searches the web AND concurrently fetches the top N
      result pages, returning each result with its extracted main text
      attached as \`page.content\`. Page extraction is query-aware — for
      long pages it keeps paragraphs containing your query terms.

      Use this INSTEAD of doing \`web_search\` followed by multiple
      \`fetch_page\` calls. Keep \`fetch_top_n\` small (1–5) to limit context
      cost. Pass \`engines: ["wikipedia"]\` etc. to restrict to one source.

      ${ENGINE_HINT}
    `,
        parameters: {
            query: zod_1.z.string().min(1).describe("The search query."),
            engine: zod_1.z.string().optional().describe("Single-mode engine override."),
            engines: zod_1.z
                .array(zod_1.z.string())
                .optional()
                .describe("Multi-mode engine list override (e.g. ['wikipedia'])."),
            fetch_top_n: zod_1.z
                .number()
                .int()
                .min(0)
                .max(10)
                .optional()
                .describe("How many of the top-ranked results to fetch full content for (0–10)."),
            max_results: zod_1.z
                .number()
                .int()
                .min(1)
                .max(25)
                .optional()
                .describe("How many ranked results to return overall."),
            max_results_per_engine: zod_1.z
                .number()
                .int()
                .min(1)
                .max(20)
                .optional()
                .describe("Raw results requested per engine before ranking."),
        },
        implementation: async ({ query, engine, engines, fetch_top_n, max_results, max_results_per_engine }, { warn }) => {
            const fullKey = `f|${mode}|${engine ?? ""}|${(engines ?? []).slice().sort().join(",")}|${fetch_top_n ?? ""}|${max_results ?? ""}|${max_results_per_engine ?? ""}|${query}`;
            if (cacheTtlMs > 0) {
                const hit = fullSearchCache.get(fullKey);
                if (hit)
                    return { ...hit, from_cache: true };
            }
            const r = await runSearch({ query, engine, engines, maxPerEngine: max_results_per_engine }, warn);
            if (r.enginesUsed.length === 0) {
                return {
                    error: mode === "single"
                        ? `Unknown engine: ${r.unknownEngines[0] ?? engine}`
                        : "No engines enabled for Multi mode. Pass engines: [...] or have the user tick engines in settings.",
                    available: engines_1.ALL_ENGINE_IDS,
                };
            }
            const ranked = (0, ranking_1.rankAndFilter)(r.raw, {
                query,
                topK: max_results ?? topK,
                snippetMaxChars,
                includeScoreBreakdown,
            });
            const topN = Math.max(0, Math.min(10, fetch_top_n ?? config.get("fetchTopN")));
            const concurrency = config.get("fetchConcurrency");
            const fetchOpts = buildFetchOpts(query);
            const toFetch = ranked.slice(0, topN).map((x) => x.url);
            const pages = topN > 0 ? await (0, extract_1.fetchAndExtractMany)(toFetch, fetchOpts, concurrency) : [];
            const byUrl = new Map(pages.map((p) => [p.url, p]));
            const enriched = ranked.map((res, i) => ({
                ...res,
                page: i < topN ? byUrl.get(res.url) ?? null : null,
            }));
            const note = relevanceNote(ranked, query);
            const out = {
                query,
                mode,
                engines_used: r.enginesUsed,
                unknown_engines: r.unknownEngines,
                per_engine_counts: r.perEngineCounts,
                returned: ranked.length,
                dropped_as_noise: r.raw.length - ranked.length,
                fetched: pages.length,
                results: enriched,
                from_cache: false,
                ...(note ? { note } : {}),
            };
            if (cacheTtlMs > 0)
                fullSearchCache.set(fullKey, out);
            return out;
        },
    });
    // ----- get_weather -----
    const weatherTool = (0, sdk_1.tool)({
        name: "get_weather",
        description: (0, sdk_1.text) `
      Returns CURRENT weather conditions and a 3-day forecast for a city or
      location. USE THIS for any weather question — do NOT use web_search for
      weather, because search snippets rarely contain actual temperatures.

      Data source: wttr.in (free, no key). Pass any human location string:
      'Oulu', 'São Paulo, Brazil', 'New York', '94103' (US ZIP), 'London, UK',
      airport codes like 'JFK', etc.
    `,
        parameters: {
            location: zod_1.z
                .string()
                .min(1)
                .describe("City, region, ZIP, or airport code. e.g. 'Oulu' or 'São Paulo, Brazil'."),
            units: zod_1.z
                .enum(["metric", "imperial"])
                .optional()
                .describe("Temperature/wind units. Default: metric (°C, km/h)."),
            lang: zod_1.z
                .string()
                .optional()
                .describe("ISO language code for the weather description (e.g. 'en', 'fi', 'pt')."),
        },
        implementation: async ({ location, units, lang }) => {
            return await (0, weather_1.getWeather)(location, {
                userAgent: globalConfig.get("userAgent"),
                timeoutMs: globalConfig.get("requestTimeoutMs"),
                units,
                lang,
            });
        },
    });
    return [listSearchEnginesTool, activeSearchTool, fetchPageTool, fullSearchTool, weatherTool];
}
//# sourceMappingURL=toolsProvider.js.map
multi-search

multi-search