Project Files

.claude

settings.local.json

src

bing

build-urls.ts

index.ts

parse-results.ts

search-images.ts

cache

image-search-results-payload.ts

index.ts

search-cache-key.ts

ttl-cache.ts

web-search-result.ts

web-search-results-payload.ts

config

auto-sentinel.ts

config-schematics.ts

resolve-config.ts

duckduckgo

build-urls.ts

index.ts

parse-results.ts

search-web.ts

enrichment

enrich-search-results.ts

index.ts

metascraper-helpers.d.ts

metascraper.ts

errors

abort-error.ts

error-message.ts

index.ts

no-results-error.ts

tool-error.ts

unsupported-content-type-error.ts

index.ts

lmstudio-home.ts

markdown-path.ts

url-filename.ts

http

decode.ts

fetch-error.ts

fetch-retry.ts

fetch.ts

impit-client.ts

impit-error.ts

index.ts

parse-content-type.ts

redirects.ts

response-body.ts

ssrf.ts

url-schema.ts

images

download-image.ts

download-images.ts

index.ts

page

fetch-page.ts

fetched-page.ts

index.ts

page-kind.ts

parsers

image-extensions.ts

index.ts

page-images.ts

page-text.ts

pdf-text.ts

renderers

image-results.ts

index.ts

page-result.ts

retrieval

chunks.ts

excerpt.ts

index.ts

relevance.ts

selection.ts

index.ts

safe-search.ts

search-page-parameter.ts

text

escape-markdown.ts

html-to-markdown.ts

html-to-text.ts

index.ts

normalize-blank-lines.ts

normalize-text.ts

timing

index.ts

per-host-rate-limiter.ts

rate-limiter.ts

tools

fetch-images-tool.ts

image-search-tool.ts

visit-website-tool.ts

web-search-tool.ts

index.ts

tools-provider.ts

.gitignore

.prettierrc.json

CLAUDE.md

eslint.config.mjs

knip.json

LICENSE

manifest.json

package-lock.json

package.json

QWEN.md

README.md

thumbnail.png

tsconfig.json

tsdoc.json

src / renderers / page-result.ts

/**
 * Per-kind rendering that turns a `FetchedPage` into the `PageResult` by narrowing on the
 * fetched page's kind. HTML runs a single jsdom parse that yields both the headings and the full
 * readable content; every non-HTML kind (PDF, plain text, JSON) supplies its pre-extracted text.
 * Both kinds then route their content through the `retrieval` excerpt builder to bound it to the
 * configured budget. Built on the `parsers` extractors and the `retrieval` excerpt builder; sits a
 * layer above `fetchPage` — acquire, parse, then render.
 */

import { extractHtmlPage } from "../parsers"
import { buildExcerpt } from "../retrieval"

import type { ContentFormat } from "../config/resolve-config"
import type { FetchedPage } from "../page"

/**
 * Rendered result for a fetched page. Optional fields are omitted when empty to keep the
 * payload compact.
 */
export interface PageResult {
  /** URL that was visited, echoed back. */
  url: string
  /** Classified page kind. */
  kind: FetchedPage["kind"]
  /** Effective MIME type reported by the server or sniffed from the payload. */
  mimeType: string
  /** Page title when available (HTML `<title>` or PDF metadata `Title`). */
  title?: string
  /** First `<h1>` of an HTML page, omitted for non-HTML kinds. */
  h1?: string
  /** First `<h2>` of an HTML page, omitted for non-HTML kinds. */
  h2?: string
  /** Excerpt of the page content, truncated to the configured character budget. */
  content?: string
  /** Character count of the full extracted content before truncation or windowing. */
  contentLength?: number
}

/**
 * Options controlling how a fetched page is rendered, shared by both excerpt paths (HTML and
 * pre-extracted text).
 */
export interface PageResultOptions {
  /** Character budget for the returned excerpt. */
  contentLimit: number
  /** Optional search terms biasing content selection. */
  findInPage: string[] | undefined
  /** Output format applied to HTML content; the pre-extracted-text path ignores it. */
  contentFormat: ContentFormat
}

/**
 * Render a fetched page into its result, narrowing on the fetched page's kind to select between
 * the HTML jsdom+Readability pipeline and the pre-extracted-text pipeline.
 *
 * @param url - URL that was visited.
 * @param page - Fetched and classified page payload.
 * @param options - Options controlling excerpt selection and formatting.
 * @returns The page result with content and (for HTML) headings populated.
 */
export function renderPageResult(url: string, page: FetchedPage, options: PageResultOptions): PageResult {
  if (page.kind === "html") {
    const { headings, content } = extractHtmlPage(page.html, url, options.contentFormat)
    const excerpt = buildExcerpt(content, options.contentLimit, options.findInPage)

    return assembleResult(url, page.kind, page.mimeType, {
      title: headings.title,
      h1: headings.h1,
      h2: headings.h2,
      content: excerpt.content,
      contentLength: excerpt.totalLength,
    })
  }

  const excerpt = buildExcerpt(page.text, options.contentLimit, options.findInPage)

  return assembleResult(url, page.kind, page.mimeType, {
    title: page.title,
    content: excerpt.content,
    contentLength: excerpt.totalLength,
  })
}

/**
 * Optional fields collected for a page result before empty values are stripped.
 */
interface ResultCandidates {
  /** Candidate title; dropped when empty. */
  title?: string
  /** Candidate first-level heading; dropped when empty. */
  h1?: string
  /** Candidate second-level heading; dropped when empty. */
  h2?: string
  /** Candidate content excerpt; dropped when empty. */
  content?: string
  /** Candidate content length; dropped when zero. */
  contentLength?: number
}

/**
 * Assemble a `PageResult` from required identity fields and a set of optional candidates,
 * dropping empty strings and zero counts so the response stays compact.
 *
 * @param url - URL that was visited.
 * @param kind - Classified page kind.
 * @param mimeType - Effective MIME type.
 * @param candidates - Optional fields whose empty values should be elided.
 * @returns The page result with empty/zero fields removed.
 */
function assembleResult(
  url: string,
  kind: FetchedPage["kind"],
  mimeType: string,
  candidates: ResultCandidates
): PageResult {
  const result: PageResult = { url, kind, mimeType }

  for (const key of ["title", "h1", "h2", "content"] as const) {
    const value = candidates[key]

    if (value !== undefined && value.length > 0) {
      result[key] = value
    }
  }

  if (candidates.contentLength !== undefined && candidates.contentLength > 0) {
    result.contentLength = candidates.contentLength
  }

  return result
}

web-tools