src / wikiLint.ts
import * as path from "node:path";
import * as fs from "node:fs/promises";
import { WikiLayout, ensureWikiInitialized, pathExists } from "./wikiPaths";
import { listPageEntries } from "./wikiCore";
export interface LintReport {
pages_scanned: number;
index_present: boolean;
missing_in_index: string[];
index_only: string[];
broken_links: Array<{ in_page: string; target: string }>;
orphans: string[];
duplicate_names: Array<{ canonical: string; files: string[] }>;
}
const LINK_RE = /\[[^\]]*\]\(([^)\s]+)\)|\[\[([^\]]+)\]\]/g;
export async function lintWiki(layout: WikiLayout): Promise<LintReport> {
await ensureWikiInitialized(layout);
const pages = await listPageEntries(layout);
const pageFiles = new Set(pages.map((p) => p.file));
const pageBases = new Set(pages.map((p) => p.name.toLowerCase()));
const indexPresent = await pathExists(layout.indexPath);
let inIndex: Set<string> = new Set();
if (indexPresent) {
const idx = await fs.readFile(layout.indexPath, "utf-8");
const m = idx.matchAll(/\(pages\/([^)\s]+)\)/g);
for (const r of m) inIndex.add(r[1]);
}
const missingInIndex: string[] = [];
const indexOnly: string[] = [];
for (const p of pages) {
if (!inIndex.has(p.file)) missingInIndex.push(p.file);
}
for (const f of inIndex) {
if (!pageFiles.has(f)) indexOnly.push(f);
}
// Incoming links count ONLY actual cross-references between pages.
// The auto-rebuilt index.md lists every page by construction, so counting
// it would make orphan detection trivial (no page is ever orphan).
const incomingLinks = new Map<string, number>();
for (const p of pages) incomingLinks.set(p.file, 0);
const brokenLinks: LintReport["broken_links"] = [];
for (const p of pages) {
const abs = path.join(layout.pagesDir, p.file);
let content: string;
try {
content = await fs.readFile(abs, "utf-8");
} catch {
continue;
}
let m;
LINK_RE.lastIndex = 0;
while ((m = LINK_RE.exec(content)) !== null) {
const target = (m[1] ?? m[2] ?? "").trim();
if (!target) continue;
if (target.startsWith("#")) continue;
if (/^[a-z][a-z0-9+.-]*:/i.test(target)) continue;
const targetPath = target.split("#")[0].split("?")[0];
if (!targetPath.toLowerCase().endsWith(".md") && !m[2]) continue;
const candidate = m[2]
? `${m[2].trim().replace(/\.md$/i, "")}.md`
: path.basename(targetPath);
if (pageFiles.has(candidate)) {
incomingLinks.set(candidate, (incomingLinks.get(candidate) ?? 0) + 1);
} else {
brokenLinks.push({ in_page: p.file, target: candidate });
}
}
}
const orphans: string[] = [];
for (const [file, n] of incomingLinks) {
if (n === 0) orphans.push(file);
}
const lowerToFiles = new Map<string, string[]>();
for (const p of pages) {
const k = p.name.toLowerCase();
const arr = lowerToFiles.get(k) ?? [];
arr.push(p.file);
lowerToFiles.set(k, arr);
}
const duplicates: LintReport["duplicate_names"] = [];
for (const [k, files] of lowerToFiles) {
if (files.length > 1) duplicates.push({ canonical: k, files });
}
void pageBases; // kept for potential future case-insensitive checks
return {
pages_scanned: pages.length,
index_present: indexPresent,
missing_in_index: missingInIndex.sort(),
index_only: indexOnly.sort(),
broken_links: brokenLinks,
orphans: orphans.sort(),
duplicate_names: duplicates,
};
}