Project Files

.claude

settings.local.json

src

bing

build-urls.ts

index.ts

parse-results.ts

search-images.ts

cache

image-search-results-payload.ts

index.ts

search-cache-key.ts

ttl-cache.ts

web-search-result.ts

web-search-results-payload.ts

config

auto-sentinel.ts

config-schematics.ts

resolve-config.ts

duckduckgo

build-urls.ts

index.ts

parse-results.ts

search-web.ts

enrichment

enrich-search-results.ts

index.ts

metascraper-helpers.d.ts

metascraper.ts

errors

abort-error.ts

error-message.ts

index.ts

no-results-error.ts

tool-error.ts

unsupported-content-type-error.ts

index.ts

lmstudio-home.ts

markdown-path.ts

url-filename.ts

http

decode-bytes.ts

fetch-error.ts

fetch-retry.ts

fetch.ts

impit-client.ts

impit-error.ts

index.ts

parse-content-type.ts

redirects.ts

response-body.ts

ssrf.ts

url-schema.ts

images

download-image.ts

download-images.ts

index.ts

page

fetch-page.ts

fetched-page.ts

index.ts

page-kind.ts

render-visit-result.ts

parsers

image-extensions.ts

index.ts

page-images.ts

page-text.ts

pdf-text.ts

index.ts

safe-search.ts

search-page-parameter.ts

text

escape-markdown.ts

html-to-markdown.ts

html-to-text.ts

index.ts

normalize-blank-lines.ts

normalize-text.ts

timing

index.ts

per-host-rate-limiter.ts

rate-limiter.ts

tools

fetch-images-tool.ts

image-search-tool.ts

visit-website-tool.ts

web-search-tool.ts

index.ts

tools-provider.ts

.gitignore

.prettierrc.json

CLAUDE.md

eslint.config.mjs

knip.json

LICENSE

manifest.json

package-lock.json

package.json

QWEN.md

README.md

thumbnail.png

tsconfig.json

tsdoc.json

src / page / fetched-page.ts

/**
 * Shape of a fetched page after classification and per-kind decoding/extraction.
 *
 * Modelled as a discriminated union so the HTML variant carries no stray `title` field:
 * HTML titles are derived from the document at render time, not captured at fetch time.
 */

import type { PageKind } from "./page-kind"

/**
 * HTML page variant: carries the raw HTML string, with no pre-captured title since headings
 * are parsed from the document.
 */
export interface HtmlFetchedPage {
  /** Discriminant identifying an HTML (or XHTML) payload. */
  kind: "html"
  /** Raw, unparsed HTML body. */
  html: string
  /** Effective MIME type used to classify the payload. */
  mimeType: string
}

/**
 * Non-HTML variant: carries text that has already been extracted into its final form
 * (PDF body, raw text, or raw JSON) along with any metadata title captured at fetch time.
 */
export interface NonHtmlFetchedPage {
  /** Discriminant identifying a non-HTML page kind. */
  kind: Exclude<PageKind, "html">
  /** Pre-extracted text payload in its final form. */
  text: string
  /** Effective MIME type used to classify the payload. */
  mimeType: string
  /** Document-level title when the source format exposes one (for example PDF metadata). */
  title: string
}

/**
 * Discriminated union of the two fetched-page shapes. Consumers narrow via `page.kind`.
 */
export type FetchedPage = HtmlFetchedPage | NonHtmlFetchedPage

web-tools