Project Files
src
configSchematics.ts
index.ts
toolsProvider.ts
wikiCore.ts
wikiLint.ts
wikiPages.ts
wikiPaths.ts
wikiSearch.ts
wikiSources.ts
.gitignore
manifest.json
package.json
README.md
tsconfig.json
src / wikiLint.ts
import * as path from "node:path";
import * as fs from "node:fs/promises";
import { WikiLayout, ensureWikiInitialized, pathExists } from "./wikiPaths";
import { listPageEntries } from "./wikiCore";

export interface LintReport {
  pages_scanned: number;
  index_present: boolean;
  missing_in_index: string[];
  index_only: string[];
  broken_links: Array<{ in_page: string; target: string }>;
  orphans: string[];
  duplicate_names: Array<{ canonical: string; files: string[] }>;
}

const LINK_RE = /\[[^\]]*\]\(([^)\s]+)\)|\[\[([^\]]+)\]\]/g;

export async function lintWiki(layout: WikiLayout): Promise<LintReport> {
  await ensureWikiInitialized(layout);

  const pages = await listPageEntries(layout);
  const pageFiles = new Set(pages.map((p) => p.file));
  const pageBases = new Set(pages.map((p) => p.name.toLowerCase()));

  const indexPresent = await pathExists(layout.indexPath);
  let inIndex: Set<string> = new Set();
  if (indexPresent) {
    const idx = await fs.readFile(layout.indexPath, "utf-8");
    const m = idx.matchAll(/\(pages\/([^)\s]+)\)/g);
    for (const r of m) inIndex.add(r[1]);
  }

  const missingInIndex: string[] = [];
  const indexOnly: string[] = [];
  for (const p of pages) {
    if (!inIndex.has(p.file)) missingInIndex.push(p.file);
  }
  for (const f of inIndex) {
    if (!pageFiles.has(f)) indexOnly.push(f);
  }

  // Incoming links count ONLY actual cross-references between pages.
  // The auto-rebuilt index.md lists every page by construction, so counting
  // it would make orphan detection trivial (no page is ever orphan).
  const incomingLinks = new Map<string, number>();
  for (const p of pages) incomingLinks.set(p.file, 0);

  const brokenLinks: LintReport["broken_links"] = [];
  for (const p of pages) {
    const abs = path.join(layout.pagesDir, p.file);
    let content: string;
    try {
      content = await fs.readFile(abs, "utf-8");
    } catch {
      continue;
    }
    let m;
    LINK_RE.lastIndex = 0;
    while ((m = LINK_RE.exec(content)) !== null) {
      const target = (m[1] ?? m[2] ?? "").trim();
      if (!target) continue;
      if (target.startsWith("#")) continue;
      if (/^[a-z][a-z0-9+.-]*:/i.test(target)) continue;
      const targetPath = target.split("#")[0].split("?")[0];
      if (!targetPath.toLowerCase().endsWith(".md") && !m[2]) continue;
      const candidate = m[2]
        ? `${m[2].trim().replace(/\.md$/i, "")}.md`
        : path.basename(targetPath);
      if (pageFiles.has(candidate)) {
        incomingLinks.set(candidate, (incomingLinks.get(candidate) ?? 0) + 1);
      } else {
        brokenLinks.push({ in_page: p.file, target: candidate });
      }
    }
  }

  const orphans: string[] = [];
  for (const [file, n] of incomingLinks) {
    if (n === 0) orphans.push(file);
  }

  const lowerToFiles = new Map<string, string[]>();
  for (const p of pages) {
    const k = p.name.toLowerCase();
    const arr = lowerToFiles.get(k) ?? [];
    arr.push(p.file);
    lowerToFiles.set(k, arr);
  }
  const duplicates: LintReport["duplicate_names"] = [];
  for (const [k, files] of lowerToFiles) {
    if (files.length > 1) duplicates.push({ canonical: k, files });
  }

  void pageBases; // kept for potential future case-insensitive checks

  return {
    pages_scanned: pages.length,
    index_present: indexPresent,
    missing_in_index: missingInIndex.sort(),
    index_only: indexOnly.sort(),
    broken_links: brokenLinks,
    orphans: orphans.sort(),
    duplicate_names: duplicates,
  };
}