Project Files
src / fetch.ts
import * as fs from "node:fs";
import * as fsp from "node:fs/promises";
import * as path from "node:path";
import { URL } from "node:url";
import { assertUrlSafeForFetch, type FetchPolicy } from "./security";
export type FetchLimits = {
maxDownloadBytes: number;
connectTimeoutMs: number;
readTimeoutMs: number;
maxRedirects: number;
};
function mergeAbort(parent: AbortSignal, ms: number): { signal: AbortSignal; cancel: () => void } {
const c = new AbortController();
const t = setTimeout(() => c.abort(new Error("timeout")), ms);
const onParent = () => c.abort(parent.reason);
if (parent.aborted) {
clearTimeout(t);
c.abort(parent.reason);
} else {
parent.addEventListener("abort", onParent, { once: true });
}
return {
signal: c.signal,
cancel: () => {
clearTimeout(t);
parent.removeEventListener("abort", onParent);
},
};
}
export async function downloadUrlToPath(
urlStr: string,
destAbs: string,
policy: FetchPolicy,
limits: FetchLimits,
): Promise<Record<string, unknown>> {
await assertUrlSafeForFetch(urlStr, policy);
await fsp.mkdir(path.dirname(destAbs), { recursive: true });
let current = urlStr;
let redirects = 0;
const overall = new AbortController();
const overallTimer = setTimeout(
() => overall.abort(new Error("overall fetch timeout")),
limits.connectTimeoutMs + limits.readTimeoutMs + 5_000,
);
try {
while (true) {
await assertUrlSafeForFetch(current, policy);
const connectCtl = mergeAbort(overall.signal, limits.connectTimeoutMs);
let resp: Response;
try {
resp = await fetch(current, {
method: "GET",
redirect: "manual",
signal: connectCtl.signal,
headers: { "User-Agent": "LMEXIF-Sniffer/0.1" },
});
} finally {
connectCtl.cancel();
}
if ([301, 302, 303, 307, 308].includes(resp.status)) {
redirects += 1;
if (redirects > limits.maxRedirects) {
await fsp.rm(destAbs, { force: true });
throw new Error("Too many HTTP redirects");
}
const loc = resp.headers.get("location");
if (!loc) {
await fsp.rm(destAbs, { force: true });
throw new Error("Redirect without Location header");
}
current = new URL(loc.trim(), resp.url).toString();
continue;
}
if (!resp.ok) {
await fsp.rm(destAbs, { force: true });
throw new Error(`HTTP ${resp.status}`);
}
const readCtl = mergeAbort(overall.signal, limits.readTimeoutMs);
const body = resp.body;
if (!body) {
readCtl.cancel();
throw new Error("Response has no body");
}
const fh = fs.createWriteStream(destAbs);
let total = 0;
try {
const reader = body.getReader();
while (true) {
if (readCtl.signal.aborted) throw readCtl.signal.reason;
const { done, value } = await reader.read();
if (done) break;
if (!value) continue;
total += value.length;
if (total > limits.maxDownloadBytes) {
await reader.cancel();
await fsp.rm(destAbs, { force: true });
throw new Error(`Download exceeded max bytes (${limits.maxDownloadBytes})`);
}
await new Promise<void>((resolve, reject) => {
fh.write(value, (err) => (err ? reject(err) : resolve()));
});
}
} finally {
readCtl.cancel();
await new Promise<void>((resolve, reject) => fh.close((e) => (e ? reject(e) : resolve())));
}
return {
final_url: resp.url,
status_code: resp.status,
content_type: resp.headers.get("content-type"),
bytes_written: total,
redirects_followed: redirects,
};
}
} finally {
clearTimeout(overallTimer);
}
}