import * as https from "node:https";
import * as http from "node:http";
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
import { homedir } from "node:os";
import { join, dirname } from "node:path";
import { complete, type ChatMessage } from "./llm";
import { NOTIONS, canonicalNotion, normalizeNotion } from "./data/notions";
import { norm, lev, MATCH_THRESHOLD } from "./verifier";
import { extractJson } from "./json-utils";
import type { SessionConfig } from "./session";
import type { Citation } from "./corpus";

// ----- Whitelist de sources (Phase 1 : Wikisource uniquement) -----
// Ajouts futurs : classiques.uqac.ca, gallica.bnf.fr...
const WIKISOURCE_API = "https://fr.wikisource.org/w/api.php";

// ----- HTTP helper natif (pas de dépendance mAIfetch) -----

// S1 — Garde-fou SSRF. Avant 2026-05, /api/corpus/<id>/add-source acceptait
// n'importe quelle URL fournie par un utilisateur LAN, qui partait dans
// httpsGet sans validation. Avec le bind par défaut sur 0.0.0.0, un voisin
// pouvait scanner les services internes (192.168.x.x, 127.0.0.1, 169.254…)
// en se servant du plugin comme proxy SSRF. On filtre ici les IP
// littérales privées / loopback / link-local / multicast avant connexion
// ET à chaque redirect. (Les DNS publics qui résolvent vers du privé
// passent encore : c'est un trade-off — il faudrait résoudre + valider la
// résolution avant de connecter, mais ça change l'API en async.)
function isBlockedHost(host: string): string | null {
  const h = host.toLowerCase();
  // Hostnames spéciaux
  if (h === "localhost" || h.endsWith(".localhost") || h.endsWith(".local")) return "loopback";
  // IPv4 littérale ?
  const m = h.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
  if (m) {
    const [a, b] = m.slice(1, 5).map((x) => parseInt(x, 10));
    if (a === 10) return "RFC1918 10/8";
    if (a === 127) return "loopback 127/8";
    if (a === 0) return "0.0.0.0/8";
    if (a === 169 && b === 254) return "link-local 169.254/16";
    if (a === 172 && b >= 16 && b <= 31) return "RFC1918 172.16/12";
    if (a === 192 && b === 168) return "RFC1918 192.168/16";
    if (a >= 224 && a <= 239) return "multicast 224/4";
    if (a >= 240) return "reserved 240/4";
  }
  // IPv6 littérale ?
  if (h.startsWith("[") && h.endsWith("]")) {
    const ip = h.slice(1, -1);
    if (ip === "::1" || ip === "::") return "IPv6 loopback";
    if (ip.startsWith("fc") || ip.startsWith("fd")) return "IPv6 ULA";
    if (ip.startsWith("fe80")) return "IPv6 link-local";
  }
  return null;
}

function httpsGet(url: string, timeoutMs = 15000, redirectsLeft = 3): Promise<{ status: number; body: string; finalUrl: string }> {
  return new Promise((resolve, reject) => {
    let parsed: URL;
    try { parsed = new URL(url); } catch { return reject(new Error("URL invalide")); }
    if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
      return reject(new Error("Protocole non supporté"));
    }
    const blocked = isBlockedHost(parsed.hostname);
    if (blocked) return reject(new Error(`Host bloqué (${blocked}) — SSRF prévenu`));

    const isHttps = parsed.protocol === "https:";
    const client = isHttps ? https : http;
    const req = client.get(url, {
      headers: {
        "User-Agent": "livre-heros-bac/0.1 (LM Studio plugin pédagogique)",
        "Accept": "application/json, text/html, */*",
        "Accept-Language": "fr-FR,fr;q=0.9,en;q=0.5",
      },
    }, (res) => {
      const status = res.statusCode ?? 0;
      const loc = res.headers.location;
      if (status >= 300 && status < 400 && loc && redirectsLeft > 0) {
        res.resume();
        const next = loc.startsWith("http") ? loc : new URL(loc, url).toString();
        httpsGet(next, timeoutMs, redirectsLeft - 1).then(resolve, reject);
        return;
      }
      let body = "";
      res.setEncoding("utf8");
      res.on("data", (chunk: string) => { body += chunk; });
      res.on("end", () => resolve({ status, body, finalUrl: url }));
    });
    req.setTimeout(timeoutMs, () => { req.destroy(new Error("timeout")); });
    req.on("error", reject);
  });
}

// ----- Recherche Wikisource via API MediaWiki -----

export type WikisourceHit = {
  pageid: number;
  title: string;
  url: string;
  snippet: string; // brut, avec balises HTML de Wikisource
};

export async function searchWikisource(query: string, limit = 10): Promise<WikisourceHit[]> {
  const q = encodeURIComponent(query);
  const url = `${WIKISOURCE_API}?action=query&list=search&srsearch=${q}&format=json&srlimit=${limit}&srprop=snippet`;
  const { status, body } = await httpsGet(url);
  if (status !== 200) throw new Error(`Wikisource search HTTP ${status}`);
  const data = JSON.parse(body) as {
    query?: { search?: Array<{ pageid: number; title: string; snippet: string }> };
  };
  const hits = data.query?.search ?? [];
  return hits.map((h) => ({
    pageid: h.pageid,
    title: h.title,
    snippet: stripHtml(h.snippet),
    url: `https://fr.wikisource.org/wiki/${encodeURIComponent(h.title.replace(/ /g, "_"))}`,
  }));
}

// ----- Récupération du texte d'une page Wikisource -----

export async function fetchWikisourcePage(title: string): Promise<{ title: string; url: string; text: string }> {
  const t = encodeURIComponent(title);
  const url = `${WIKISOURCE_API}?action=parse&page=${t}&format=json&prop=text&disabletoc=1&disableeditsection=1`;
  const { status, body } = await httpsGet(url);
  if (status !== 200) throw new Error(`Wikisource parse HTTP ${status}`);
  const data = JSON.parse(body) as { parse?: { title: string; text?: { "*": string } } };
  if (!data.parse?.text) throw new Error("page introuvable");
  const html = data.parse.text["*"];
  // htmlToParagraphs préserve les sauts de paragraphe (htmlToCleanText, sa
  // variante one-liner, était cassée et inutilisée — supprimée 2026-05).
  const text = htmlToParagraphs(html).join("\n\n");
  return {
    title: data.parse.title,
    url: `https://fr.wikisource.org/wiki/${encodeURIComponent(title.replace(/ /g, "_"))}`,
    text,
  };
}

function stripHtml(s: string): string {
  return s
    .replace(/<\/?[^>]+>/g, "")
    .replace(/&nbsp;/g, " ")
    .replace(/&amp;/g, "&")
    .replace(/&lt;/g, "<")
    .replace(/&gt;/g, ">")
    .replace(/&quot;/g, '"')
    .replace(/&#39;/g, "'")
    .replace(/\s+/g, " ")
    .trim();
}

// Convertit le HTML d'une page Wikisource en paragraphes lisibles,
// en préservant les coupes de bloc (utile pour passer au LLM
// extractCandidates et au scan glissant verifyQuoteAtUrl).
function htmlToParagraphs(html: string): string[] {
  let s = html;
  s = s.replace(/<style[\s\S]*?<\/style>/gi, "");
  s = s.replace(/<script[\s\S]*?<\/script>/gi, "");
  s = s.replace(/<sup[\s\S]*?<\/sup>/gi, "");
  s = s.replace(/<table[\s\S]*?<\/table>/gi, "");
  s = s.replace(/<!--[\s\S]*?-->/g, "");
  s = s.replace(/<br\s*\/?>/gi, "\n");
  // Coupe sur les fermetures de bloc.
  const chunks = s.split(/<\/(?:p|div|li|h[1-6])>/i);
  return chunks
    .map((c) => stripHtml(c))
    .filter((c) => c.length > 30); // élimine le bruit court (numérotation, liens)
}

// ----- Extraction de citations candidates via LLM -----

export type Candidate = {
  texte_fr: string;
  auteur: string;
  oeuvre: string;
  partie?: string;
  contexte: string;
};

export async function extractCandidates(args: {
  pageTitle: string;
  pageText: string;
  query: string;
  cfg: SessionConfig;
  maxCandidates?: number;
}): Promise<Candidate[]> {
  const max = args.maxCandidates ?? 5;
  // Tronque le texte pour ne pas exploser le contexte. 8000 chars ≈ ~2k tokens.
  const truncated = args.pageText.slice(0, 8000);
  const messages: ChatMessage[] = [
    {
      role: "system",
      content:
        "Tu es un assistant qui extrait des citations philosophiques pertinentes " +
        "depuis un extrait d'œuvre. Tu réponds UNIQUEMENT en JSON valide.",
    },
    {
      role: "user",
      content:
        `Page : « ${args.pageTitle} »\n` +
        `Requête utilisateur (thème recherché) : « ${args.query} »\n\n` +
        `Extrait :\n---\n${truncated}\n---\n\n` +
        `Extrais jusqu'à ${max} citations marquantes de cet extrait qui correspondent au thème.\n` +
        `Critères : phrases courtes (15-300 caractères), citables verbatim, pertinentes philosophiquement.\n` +
        `Pour chaque citation, devine l'auteur et l'œuvre à partir du titre de page si possible.\n\n` +
        `Réponds en JSON strict :\n` +
        `{"candidates": [\n` +
        `  {"texte_fr": "...", "auteur": "...", "oeuvre": "...", "partie": "...", "contexte": "<phrase de contexte précédente>"},\n` +
        `  ...\n` +
        `]}\n` +
        `Si rien de pertinent : {"candidates": []}.`,
    },
  ];
  const raw = await complete(messages, { ...args.cfg, temperature: 0 });
  const json = extractJson(raw);
  if (!json) return [];
  const list = Array.isArray(json.candidates) ? json.candidates : [];
  return list
    .map((c) => {
      const obj = c as Record<string, unknown>;
      return {
        texte_fr: String(obj.texte_fr ?? "").trim(),
        auteur: String(obj.auteur ?? "").trim(),
        oeuvre: String(obj.oeuvre ?? "").trim(),
        partie: obj.partie ? String(obj.partie).trim() : undefined,
        contexte: String(obj.contexte ?? "").trim(),
      };
    })
    .filter((c) => c.texte_fr.length >= 15 && c.texte_fr.length <= 400);
}

// ----- Détermination des notions pertinentes (LLM) -----

export async function classifyCitationNotions(args: {
  texte_fr: string;
  auteur: string;
  oeuvre: string;
  cfg: SessionConfig;
}): Promise<string[]> {
  const messages: ChatMessage[] = [
    {
      role: "system",
      content:
        "Tu classes une citation philosophique selon les notions du programme de terminale. " +
        "Tu réponds UNIQUEMENT en JSON valide.",
    },
    {
      role: "user",
      content:
        `Citation : « ${args.texte_fr} »\n` +
        `Auteur : ${args.auteur}\n` +
        `Œuvre : ${args.oeuvre}\n\n` +
        `Indique les notions du programme auxquelles cette citation se rattache (1 à 3 max).\n` +
        `Notions disponibles : ${NOTIONS.join(", ")}\n\n` +
        `JSON strict : {"notions": ["<libellé exact>", "..."]}`,
    },
  ];
  const raw = await complete(messages, { ...args.cfg, temperature: 0 });
  const json = extractJson(raw);
  if (!json || !Array.isArray(json.notions)) return [];
  const slugs = new Set<string>();
  for (const n of json.notions) {
    const canon = canonicalNotion(String(n));
    if (canon) slugs.add(normalizeNotion(canon));
  }
  return [...slugs];
}

// ----- Storage user corpus -----

export type UserCitation = Citation & {
  source: "user";
  addedAt: number;
  deletedAt?: number;
};

const USER_CORPUS_FILE = join(homedir(), ".livre-heros-bac", "user-corpus.jsonl");

function ensureCorpusFile(): string {
  mkdirSync(dirname(USER_CORPUS_FILE), { recursive: true });
  return USER_CORPUS_FILE;
}

export function getUserCorpusFile(): string {
  return ensureCorpusFile();
}

export function readUserCorpus(): UserCitation[] {
  const path = ensureCorpusFile();
  if (!existsSync(path)) return [];
  const raw = readFileSync(path, "utf8");
  const byId = new Map<string, UserCitation>();
  for (const line of raw.split("\n")) {
    if (!line.trim()) continue;
    try {
      const c = JSON.parse(line) as UserCitation;
      byId.set(c.id, c);
    } catch { /* skip */ }
  }
  return [...byId.values()];
}

export function listUserCitations(): UserCitation[] {
  return readUserCorpus().filter((c) => !c.deletedAt);
}

export function addUserCitation(args: {
  texte_fr: string;
  auteur: string;
  oeuvre: string;
  partie?: string;
  reference?: string;
  notions: string[];
  sources: Array<{ url: string; fetched: string; kind: string; extract?: string }>;
}): UserCitation {
  const auteurSlug = slugify(args.auteur);
  const id = "user-" + Date.now().toString(36) + "-" + Math.random().toString(36).slice(2, 6);
  const c: UserCitation = {
    id,
    auteur: args.auteur,
    auteur_slug: auteurSlug,
    oeuvre: args.oeuvre,
    partie: args.partie,
    reference: args.reference,
    texte_fr: args.texte_fr,
    notions: args.notions,
    sources: args.sources,
    source: "user",
    addedAt: Date.now(),
  };
  appendFileSync(ensureCorpusFile(), JSON.stringify(c) + "\n");
  return c;
}

export function deleteUserCitation(id: string): boolean {
  const all = readUserCorpus();
  const existing = all.find((c) => c.id === id);
  if (!existing) return false;
  const tomb: UserCitation = { ...existing, deletedAt: Date.now() };
  appendFileSync(ensureCorpusFile(), JSON.stringify(tomb) + "\n");
  return true;
}

export function addSourceToCitation(id: string, source: { url: string; fetched: string; kind: string; extract?: string }): UserCitation | null {
  const all = readUserCorpus();
  const existing = all.find((c) => c.id === id);
  if (!existing) return null;
  const sources = [...(existing.sources ?? []), source];
  const updated: UserCitation = { ...existing, sources };
  appendFileSync(ensureCorpusFile(), JSON.stringify(updated) + "\n");
  return updated;
}

function slugify(s: string): string {
  return s
    .toLowerCase()
    .normalize("NFD")
    .replace(/[̀-ͯ]/g, "")
    .replace(/[^a-z0-9]+/g, "-")
    .replace(/(^-|-$)/g, "");
}

// ----- Vérification d'une 2e source externe -----

export type VerifySourceResult =
  | { ok: true; bestDist: number; matchedSnippet: string }
  | { ok: false; reason: string; bestDist?: number; bestSnippet?: string };

// MATCH_THRESHOLD est importé depuis verifier.ts pour garantir que le
// narrateur et le vérificateur de 2e source utilisent la même barre.

export async function verifyQuoteAtUrl(args: {
  url: string;
  quote: string;
  forbiddenHosts?: string[]; // hosts à exclure (ex: domaine de la 1ère source)
}): Promise<VerifySourceResult> {
  let parsed: URL;
  try { parsed = new URL(args.url); }
  catch { return { ok: false, reason: "URL invalide." }; }
  if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
    return { ok: false, reason: "URL doit être http ou https." };
  }
  const host = parsed.host.toLowerCase().replace(/^www\./, "");
  if (args.forbiddenHosts) {
    for (const fh of args.forbiddenHosts) {
      const f = fh.toLowerCase().replace(/^www\./, "");
      if (host === f) return { ok: false, reason: `Cette source est sur le même domaine (${host}) que la 1ère source. Trouve une URL sur un autre site pour garantir l'indépendance.` };
    }
  }

  let fetched;
  try { fetched = await httpsGet(args.url, 15000); }
  catch (e) { return { ok: false, reason: `Impossible de récupérer la page : ${e instanceof Error ? e.message : String(e)}` }; }
  if (fetched.status !== 200) return { ok: false, reason: `La page a répondu HTTP ${fetched.status}.` };

  // Extraction texte brut depuis HTML
  let text = fetched.body;
  if (/<html|<body|<p|<div/i.test(text)) {
    // C'est du HTML, on extrait via htmlToParagraphs
    text = htmlToParagraphs(text).join("\n");
  }
  if (text.length < 50) return { ok: false, reason: "Page trop courte ou vide après extraction." };

  // Cherche la citation par fenêtre glissante normalisée
  const normQuote = norm(args.quote);
  const qLen = normQuote.length;
  if (qLen < 15) return { ok: false, reason: "Citation trop courte pour vérification." };

  // On scanne le texte normalisé par fenêtres de taille proche de la citation.
  const normText = norm(text);
  let bestDist = 1;
  let bestStart = -1;
  const step = Math.max(1, Math.floor(qLen / 10));
  for (let i = 0; i <= normText.length - qLen; i += step) {
    const window = normText.slice(i, i + qLen);
    const d = lev(normQuote, window);
    if (d < bestDist) {
      bestDist = d;
      bestStart = i;
      if (bestDist === 0) break;
    }
  }
  // Étape de raffinement autour du meilleur match
  if (bestStart >= 0) {
    const refineRange = Math.floor(qLen / 2);
    for (let i = Math.max(0, bestStart - refineRange); i <= Math.min(normText.length - qLen, bestStart + refineRange); i++) {
      const window = normText.slice(i, i + qLen);
      const d = lev(normQuote, window);
      if (d < bestDist) { bestDist = d; bestStart = i; }
    }
  }
  const matchedSnippet = bestStart >= 0 ? text.slice(Math.max(0, bestStart - 20), bestStart + qLen + 40).slice(0, 200) : "";
  if (bestDist <= MATCH_THRESHOLD) {
    return { ok: true, bestDist, matchedSnippet };
  }
  return {
    ok: false,
    reason: `La citation n'a pas été trouvée sur cette page (distance ${bestDist.toFixed(2)}, seuil ${MATCH_THRESHOLD}).`,
    bestDist,
    bestSnippet: matchedSnippet,
  };
}

// ----- Stats par notion -----

export type NotionStats = {
  notion: string; // canonique ("La liberté")
  slug: string;   // "liberte"
  activeCount: number;   // citations >= 2 sources
  pendingCount: number;  // citations < 2 sources
  level: "faible" | "moyen" | "riche";
};

export function computeNotionStats(allCitations: Citation[]): NotionStats[] {
  const out: NotionStats[] = [];
  for (const notion of NOTIONS) {
    const slug = normalizeNotion(notion);
    let active = 0, pending = 0;
    for (const c of allCitations) {
      if (!c.notions || !c.notions.includes(slug)) continue;
      const sc = Array.isArray(c.sources) ? c.sources.length : 0;
      if (sc >= 2) active++;
      else pending++;
    }
    const level: NotionStats["level"] = active >= 6 ? "riche" : active >= 3 ? "moyen" : "faible";
    out.push({ notion, slug, activeCount: active, pendingCount: pending, level });
  }
  return out;
}

// extractJson est mutualisé dans src/json-utils.ts (partagé avec sujets.ts).