Project Files

.claude

settings.local.json

src

bing

build-urls.ts

index.ts

parse-results.ts

search-images.ts

cache

image-search-results-payload.ts

index.ts

search-cache-key.ts

ttl-cache.ts

web-search-result.ts

web-search-results-payload.ts

config

auto-sentinel.ts

config-schematics.ts

resolve-config.ts

duckduckgo

build-urls.ts

index.ts

parse-results.ts

search-web.ts

enrichment

enrich-search-results.ts

index.ts

metascraper-helpers.d.ts

metascraper.ts

errors

abort-error.ts

error-message.ts

index.ts

no-results-error.ts

tool-error.ts

unsupported-content-type-error.ts

index.ts

lmstudio-home.ts

markdown-path.ts

url-filename.ts

http

decode-bytes.ts

fetch-error.ts

fetch-retry.ts

fetch.ts

impit-client.ts

impit-error.ts

index.ts

parse-content-type.ts

redirects.ts

response-body.ts

ssrf.ts

url-schema.ts

images

download-image.ts

download-images.ts

index.ts

page

fetch-page.ts

fetched-page.ts

index.ts

page-kind.ts

render-visit-result.ts

parsers

image-extensions.ts

index.ts

page-images.ts

page-text.ts

pdf-text.ts

index.ts

safe-search.ts

search-page-parameter.ts

text

escape-markdown.ts

html-to-markdown.ts

html-to-text.ts

index.ts

normalize-blank-lines.ts

normalize-text.ts

timing

index.ts

per-host-rate-limiter.ts

rate-limiter.ts

tools

fetch-images-tool.ts

image-search-tool.ts

visit-website-tool.ts

web-search-tool.ts

index.ts

tools-provider.ts

.gitignore

.prettierrc.json

CLAUDE.md

eslint.config.mjs

knip.json

LICENSE

manifest.json

package-lock.json

package.json

QWEN.md

README.md

thumbnail.png

tsconfig.json

tsdoc.json

src / page / render-visit-result.ts

/**
 * Per-kind rendering that builds the page-visit result by narrowing on the fetched page's
 * kind. HTML runs a single jsdom parse that yields both headings and the format-aware excerpt;
 * every non-HTML kind (PDF, plain text, JSON) feeds its pre-extracted text straight through the
 * text excerpt pipeline.
 */

import { buildTextExcerpt, extractHtmlPage } from "../parsers"

import type { FetchedPage } from "./fetched-page"
import type { ContentFormat } from "../config/resolve-config"

/**
 * Structured result of visiting a page. Optional fields are omitted when empty to keep the
 * payload compact.
 */
export interface VisitWebsiteResult {
  /** URL that was visited, echoed back. */
  url: string
  /** Classified page kind. */
  kind: FetchedPage["kind"]
  /** Effective MIME type reported by the server or sniffed from the payload. */
  mimeType: string
  /** Page title when available (HTML `<title>` or PDF metadata `Title`). */
  title?: string
  /** First `<h1>` of an HTML page, omitted for non-HTML kinds. */
  h1?: string
  /** First `<h2>` of an HTML page, omitted for non-HTML kinds. */
  h2?: string
  /** Excerpt of the page content, truncated to the configured character budget. */
  content?: string
  /** Character count of the full extracted content before truncation or windowing. */
  contentLength?: number
}

/**
 * Inputs shared by both excerpt paths (HTML and pre-extracted text).
 */
export interface ExcerptInputs {
  /** Character budget for the returned excerpt. */
  contentLimit: number
  /** Optional search terms biasing content selection. */
  findInPage: string[] | undefined
  /** Output format applied to HTML content; the pre-extracted-text path ignores it. */
  contentFormat: ContentFormat
}

/**
 * Assemble the per-kind response payload, narrowing on the fetched page's kind to select
 * between the HTML jsdom+Readability pipeline and the pre-extracted-text pipeline.
 *
 * @param url - URL that was visited.
 * @param page - Fetched and classified page payload.
 * @param inputs - Shared excerpt inputs.
 * @returns The user-facing result with content and (for HTML) headings populated.
 */
export function renderVisitResult(url: string, page: FetchedPage, inputs: ExcerptInputs): VisitWebsiteResult {
  if (page.kind === "html") {
    const { headings, excerpt } = extractHtmlPage(
      page.html,
      url,
      inputs.contentLimit,
      inputs.findInPage,
      inputs.contentFormat
    )

    return assembleResult(url, page.kind, page.mimeType, {
      title: headings.title,
      h1: headings.h1,
      h2: headings.h2,
      content: excerpt.content,
      contentLength: excerpt.totalLength,
    })
  }

  const excerpt = buildTextExcerpt(page.text, inputs.contentLimit, inputs.findInPage)

  return assembleResult(url, page.kind, page.mimeType, {
    title: page.title,
    content: excerpt.content,
    contentLength: excerpt.totalLength,
  })
}

/**
 * Optional fields collected for a Visit Website response before empty values are stripped.
 */
interface ResultCandidates {
  /** Candidate title; dropped when empty. */
  title?: string
  /** Candidate first-level heading; dropped when empty. */
  h1?: string
  /** Candidate second-level heading; dropped when empty. */
  h2?: string
  /** Candidate content excerpt; dropped when empty. */
  content?: string
  /** Candidate content length; dropped when zero. */
  contentLength?: number
}

/**
 * Assemble a `VisitWebsiteResult` from required identity fields and a set of optional candidates,
 * dropping empty strings and zero counts so the response stays compact.
 *
 * @param url - URL that was visited.
 * @param kind - Classified page kind.
 * @param mimeType - Effective MIME type.
 * @param candidates - Optional fields whose empty values should be elided.
 * @returns The user-facing result with empty/zero fields removed.
 */
function assembleResult(
  url: string,
  kind: FetchedPage["kind"],
  mimeType: string,
  candidates: ResultCandidates
): VisitWebsiteResult {
  const result: VisitWebsiteResult = { url, kind, mimeType }

  for (const key of ["title", "h1", "h2", "content"] as const) {
    const value = candidates[key]

    if (value !== undefined && value.length > 0) {
      result[key] = value
    }
  }

  if (candidates.contentLength !== undefined && candidates.contentLength > 0) {
    result.contentLength = candidates.contentLength
  }

  return result
}

web-tools