Project Files
src / sources / remoteImageResolver.ts
import { resolveUrl } from "./http.js";
export interface RemoteImageCandidate {
url: string;
altText?: string;
sourceId?: string;
sourceKind?: string;
}
export function parseRemoteImageRefsFromChunk(
chunkContent: string,
metadata: Record<string, unknown> | undefined
): RemoteImageCandidate[] {
const refs: RemoteImageCandidate[] = [];
const seen = new Set<string>();
const baseUrl = typeof metadata?.baseUrl === "string" ? metadata.baseUrl : undefined;
const sourceId = typeof metadata?.documentPath === "string" ? metadata.documentPath : undefined;
const sourceKind = typeof metadata?.sourceKind === "string" ? metadata.sourceKind : undefined;
function add(url: string | null, altText?: string): void {
if (!url || seen.has(url)) return;
seen.add(url);
refs.push({ url, altText, sourceId, sourceKind });
}
const markdownImage = /!\[([^\]]*)\]\((<([^>]+)>|[^)\s]+)(?:\s+"[^"]*")?\)/g;
let md: RegExpExecArray | null;
while ((md = markdownImage.exec(chunkContent)) !== null) {
const raw = (md[3] ?? md[2]).replace(/^<|>$/g, "").trim();
if (/^data:/i.test(raw)) continue;
if (/^https?:\/\//i.test(raw)) add(raw, md[1]?.trim() || undefined);
else if (baseUrl) add(resolveUrl(raw, baseUrl), md[1]?.trim() || undefined);
}
const htmlImage = /<img\b[^>]*>/gi;
let img: RegExpExecArray | null;
while ((img = htmlImage.exec(chunkContent)) !== null) {
const tag = img[0];
const src = attr(tag, "src");
const alt = attr(tag, "alt") || undefined;
if (src) add(/^https?:\/\//i.test(src) ? src : baseUrl ? resolveUrl(src, baseUrl) : null, alt);
}
return refs;
}
function attr(tag: string, name: string): string | null {
const re = new RegExp(`${name}=["']([^"']+)["']`, "i");
return re.exec(tag)?.[1] ?? null;
}