Forked from npacker/web-tools
Project Files
src / images / download-images.ts
/**
* Concurrent multi-URL image download, preserving input order in the result array.
*/
import { downloadImage } from "./download-image"
import type { RetryOptions } from "../http"
import type { RateLimiter } from "../timing"
import type { Impit } from "impit"
import type { Options as PRetryOptions } from "p-retry"
/**
* Options controlling placement of the downloaded batch and the per-image size cap.
*/
interface DownloadImagesOptions {
/** Directory into which downloaded files are written. */
workingDirectory: string
/** Epoch-millisecond timestamp used as the filename prefix for every file in the batch. */
timestamp: number
/** Hard upper bound on each image payload, in bytes. */
maxBytes: number
}
/**
* Contextual hooks provided by the caller for logging and cancellation.
*/
export interface DownloadImagesContext {
/** Logger used to surface non-fatal download failures. */
warn: (message: string) => void
/** Signal used to abort the in-flight downloads. */
signal: AbortSignal
/** Limiter capping the number of downloads in flight concurrently. */
limiter: RateLimiter
/** Retry policy applied to transient download failures. */
retry?: RetryOptions
/** Observer invoked after each failed attempt, before the backoff sleep. */
onFailedAttempt?: PRetryOptions["onFailedAttempt"]
}
/**
* Per-URL outcome reported by `downloadImages`.
*/
export type DownloadedImage =
| {
/** Discriminant marking a successful download. */
ok: true
/** Markdown-ready local path of the saved image. */
localPath: string
}
| {
/** Discriminant marking a failed download. */
ok: false
/** Original remote URL that could not be fetched. */
url: string
}
/**
* Download every URL in the batch concurrently, preserving the input order in the result array.
* Inputs must be absolute http(s) URLs.
*
* @param urls - URLs to download.
* @param impit - Shared HTTP client used for the downloads.
* @param options - Options controlling file placement and naming.
* @param context - Logging and cancellation hooks provided by the caller.
* @returns A parallel array of per-URL outcomes.
*/
export async function downloadImages(
urls: string[],
impit: Impit,
options: DownloadImagesOptions,
context: DownloadImagesContext
): Promise<DownloadedImage[]> {
return Promise.all(
urls.map(async (url, position) =>
context.limiter.schedule(async () => downloadOne(url, position, impit, options, context))
)
)
}
/**
* Resolve a single slot of the batch by delegating to `downloadImage`.
*
* @param url - URL for this slot.
* @param position - Zero-based index of this slot within the batch.
* @param impit - Shared HTTP client used for the download.
* @param options - Options controlling file placement and naming.
* @param context - Logging and cancellation hooks provided by the caller.
* @returns The outcome for this slot.
*/
async function downloadOne(
url: string,
position: number,
impit: Impit,
options: DownloadImagesOptions,
context: DownloadImagesContext
): Promise<DownloadedImage> {
const localPath = await downloadImage(
url,
impit,
{
workingDirectory: options.workingDirectory,
timestamp: options.timestamp,
index: position + 1,
maxBytes: options.maxBytes,
},
context
)
if (localPath === undefined) {
return { ok: false, url }
}
return { ok: true, localPath }
}