Project Files

.claude

settings.local.json

src

bing

build-urls.ts

index.ts

parse-results.ts

search-images.ts

cache

image-search-results-payload.ts

index.ts

search-cache-key.ts

ttl-cache.ts

web-search-result.ts

web-search-results-payload.ts

config

auto-sentinel.ts

config-schematics.ts

resolve-config.ts

duckduckgo

build-urls.ts

index.ts

parse-results.ts

search-web.ts

enrichment

enrich-search-results.ts

index.ts

metascraper-helpers.d.ts

metascraper.ts

errors

abort-error.ts

error-message.ts

index.ts

no-results-error.ts

tool-error.ts

unsupported-content-type-error.ts

index.ts

lmstudio-home.ts

markdown-path.ts

url-filename.ts

http

decode.ts

fetch-error.ts

fetch-retry.ts

fetch.ts

impit-client.ts

impit-error.ts

index.ts

parse-content-type.ts

redirects.ts

response-body.ts

ssrf.ts

url-schema.ts

images

download-image.ts

download-images.ts

index.ts

page

fetch-page.ts

fetched-page.ts

index.ts

page-kind.ts

parsers

image-extensions.ts

index.ts

page-images.ts

page-text.ts

pdf-text.ts

renderers

image-results.ts

index.ts

page-result.ts

retrieval

chunks.ts

excerpt.ts

index.ts

relevance.ts

selection.ts

index.ts

safe-search.ts

search-page-parameter.ts

text

escape-markdown.ts

html-to-markdown.ts

html-to-text.ts

index.ts

normalize-blank-lines.ts

normalize-text.ts

timing

index.ts

per-host-rate-limiter.ts

rate-limiter.ts

tools

fetch-images-tool.ts

image-search-tool.ts

visit-website-tool.ts

web-search-tool.ts

index.ts

tools-provider.ts

.gitignore

.prettierrc.json

CLAUDE.md

eslint.config.mjs

knip.json

LICENSE

manifest.json

package-lock.json

package.json

QWEN.md

README.md

thumbnail.png

tsconfig.json

tsdoc.json

src / retrieval / selection.ts

/**
 * Select which ranked chunks to include within a character budget. Grows the selection outward from
 * the best-matching chunks through their neighbours until the budget is filled, then returns the
 * chosen chunk indices in source order; assembling them into text is the caller's concern. The
 * join-separator width is factored into the budget so the assembled text fits the same limit.
 */

/**
 * Select the chunks around the ranked matches that fit a character budget, in source order. Expands
 * outward from the matches through their neighbours, admitting each candidate that fits the
 * remaining budget and skipping those that would overshoot. The top-ranked chunk is admitted
 * unconditionally so an oversized match still yields a non-empty selection.
 *
 * @param rankedIndices - Chunk indices ordered from best to worst match.
 * @param chunks - Text chunks, indexed in source order.
 * @param limit - Total character budget the assembled chunks must fit within.
 * @param separatorLength - Character width of the separator the caller joins the chunks with.
 * @returns The selected chunk indices in source order; empty when nothing can be selected.
 */
export function selectChunks(
  rankedIndices: number[],
  chunks: string[],
  limit: number,
  separatorLength: number
): number[] {
  if (rankedIndices.length === 0 || limit <= 0) {
    return []
  }

  return [...growSelection(rankedIndices, chunks, limit, separatorLength)].toSorted((a, b) => a - b)
}

/**
 * Grow a selection outward from the ranked matches in priority order — matches first, then ±1
 * neighbours, ±2, and so on — admitting each candidate that fits the remaining budget and skipping
 * those that would overshoot. The top-ranked chunk is admitted unconditionally so an oversized
 * match still yields a non-empty selection.
 *
 * @param rankedIndices - Chunk indices ordered from best to worst fuzzy match.
 * @param chunks - Text chunks, indexed in source order.
 * @param limit - Character budget to fill, accounting for separators between chunks.
 * @param separatorLength - Character width of the separator that will join adjacent chunks.
 * @returns Set of chunk indices chosen for inclusion, unordered.
 */
function growSelection(rankedIndices: number[], chunks: string[], limit: number, separatorLength: number): Set<number> {
  const selected = new Set<number>()
  let total = 0

  for (const candidate of prioritizedCandidates(rankedIndices, chunks.length)) {
    if (total >= limit) {
      break
    }

    if (selected.has(candidate)) {
      continue
    }

    const separatorCost = selected.size > 0 ? separatorLength : 0
    const projected = total + separatorCost + chunks[candidate].length

    if (selected.size > 0 && projected > limit) {
      continue
    }

    selected.add(candidate)
    total = projected
  }

  return selected
}

/**
 * Enumerate chunk indices in priority order for symmetric neighbourhood expansion: the matches
 * themselves first, then their ±1 neighbours (iterated across all matches), then ±2, and so on out
 * to the document edges. Duplicate indices are emitted when multiple matches share a neighbour; the
 * caller is expected to deduplicate. Lazy so callers that fill their budget early avoid
 * materializing the tail of the sequence.
 *
 * @param rankedIndices - Chunk indices ordered from best to worst fuzzy match.
 * @param chunkCount - Total number of chunks in the source document.
 * @yields Candidate chunk indices in emission order.
 */
function* prioritizedCandidates(rankedIndices: number[], chunkCount: number): Generator<number> {
  const lastIndex = chunkCount - 1

  yield* rankedIndices

  let maxReach = 0

  for (const matchIndex of rankedIndices) {
    const reach = Math.max(matchIndex, lastIndex - matchIndex)

    if (reach > maxReach) {
      maxReach = reach
    }
  }

  for (let radius = 1; radius <= maxReach; radius++) {
    for (const matchIndex of rankedIndices) {
      const left = matchIndex - radius
      const right = matchIndex + radius

      if (left >= 0) {
        yield left
      }

      if (right <= lastIndex) {
        yield right
      }
    }
  }
}

web-tools