Project Files
src / sources / adapters / githubMarkdownSourceAdapter.ts
import path from "path";
import type { SourceAdapter, SourceAdapterContext, SourceDocument } from "../types.js";
import { fetchTextWithLimits } from "../http.js";
interface GithubTarget {
owner: string;
repo: string;
ref: string;
path?: string;
}
export class GithubMarkdownSourceAdapter implements SourceAdapter {
canHandle(source: string): boolean {
return parseGithubTarget(source.trim()) !== null;
}
async load(source: string, context: SourceAdapterContext): Promise<SourceDocument[]> {
const target = parseGithubTarget(source.trim());
if (!target) return [];
if (target.path && isMarkdownPath(target.path)) {
return [await this.loadMarkdownFile({ ...target, path: target.path }, context)];
}
if (target.path && !isMarkdownPath(target.path)) {
return [];
}
const paths = await this.enumerateMarkdownPaths(target, context);
const docs: SourceDocument[] = [];
for (const filePath of paths.slice(0, context.maxPages)) {
try {
docs.push(await this.loadMarkdownFile({ ...target, path: filePath }, context));
} catch (err) {
console.warn(`[sources/github] failed to load ${target.owner}/${target.repo}/${filePath}:`, String(err));
}
}
return docs;
}
private async enumerateMarkdownPaths(target: GithubTarget, context: SourceAdapterContext): Promise<string[]> {
const headers = githubHeaders(context);
const roots = ["", "docs"];
const paths: string[] = [];
const seen = new Set<string>();
for (const root of roots) {
const apiUrl = `https://api.github.com/repos/${target.owner}/${target.repo}/contents/${root}?ref=${encodeURIComponent(target.ref)}`;
try {
const { text } = await fetchTextWithLimits(apiUrl, {
timeoutMs: context.fetchTimeoutMs,
maxBytes: context.maxBytes,
headers,
});
const payload = JSON.parse(text);
const entries = Array.isArray(payload) ? payload : [payload];
for (const entry of entries) {
if (entry?.type !== "file" || typeof entry?.path !== "string") continue;
if (!isMarkdownPath(entry.path) || seen.has(entry.path)) continue;
seen.add(entry.path);
paths.push(entry.path);
}
} catch (err) {
console.warn(`[sources/github] failed to enumerate ${apiUrl}:`, String(err));
}
}
if (paths.length === 0) paths.push("README.md");
return paths;
}
private async loadMarkdownFile(target: GithubTarget & { path: string }, context: SourceAdapterContext): Promise<SourceDocument> {
const rawUrl = rawGithubUrl(target);
const headers = githubHeaders(context);
const { text, finalUrl, etag, lastModified } = await fetchTextWithLimits(rawUrl, {
timeoutMs: context.fetchTimeoutMs,
maxBytes: context.maxBytes,
headers,
});
const sourceId = `github://${target.owner}/${target.repo}/${target.ref}/${target.path}`;
return {
sourceId,
sourceKind: "github",
canonicalUrl: githubBlobUrl(target),
title: path.basename(target.path, path.extname(target.path)),
rawContent: text,
rawContentType: "markdown",
baseUrl: finalUrl.substring(0, finalUrl.lastIndexOf("/") + 1),
fetchedAt: new Date().toISOString(),
version: etag ?? lastModified,
metadata: {
owner: target.owner,
repo: target.repo,
ref: target.ref,
path: target.path,
rawUrl: finalUrl,
},
};
}
}
function parseGithubTarget(source: string): GithubTarget | null {
const githubScheme = /^github:\/\/([^/]+)\/([^/]+)\/([^/]+)(?:\/(.+))?$/i.exec(source);
if (githubScheme) {
return {
owner: githubScheme[1],
repo: githubScheme[2],
ref: githubScheme[3],
path: githubScheme[4],
};
}
const githubBlob = /^https:\/\/github\.com\/([^/]+)\/([^/]+)\/blob\/([^/]+)\/(.+)$/i.exec(source);
if (githubBlob) {
return {
owner: githubBlob[1],
repo: githubBlob[2],
ref: githubBlob[3],
path: githubBlob[4],
};
}
const raw = /^https:\/\/raw\.githubusercontent\.com\/([^/]+)\/([^/]+)\/([^/]+)\/(.+)$/i.exec(source);
if (raw) {
return {
owner: raw[1],
repo: raw[2],
ref: raw[3],
path: raw[4],
};
}
const shorthand = /^([A-Za-z0-9_.-]+)\/([A-Za-z0-9_.-]+)(?:\/(.+))?$/.exec(source);
if (shorthand) {
return {
owner: shorthand[1],
repo: shorthand[2],
ref: "main",
path: shorthand[3],
};
}
return null;
}
function rawGithubUrl(target: GithubTarget & { path: string }): string {
return `https://raw.githubusercontent.com/${target.owner}/${target.repo}/${target.ref}/${target.path}`;
}
function githubBlobUrl(target: GithubTarget & { path: string }): string {
return `https://github.com/${target.owner}/${target.repo}/blob/${target.ref}/${target.path}`;
}
function isMarkdownPath(filePath: string): boolean {
return /\.(md|markdown)$/i.test(filePath);
}
function githubHeaders(context: SourceAdapterContext): Record<string, string> {
const headers: Record<string, string> = {
"Accept": "application/vnd.github+json, text/plain;q=0.9, */*;q=0.8",
"User-Agent": "ceveyne-user-docs",
};
if (context.githubToken?.trim()) {
headers.Authorization = `Bearer ${context.githubToken.trim()}`;
}
return headers;
}