Project Files
src / detectors.test.ts
// NIR fixtures derived from MIT-licensed reference validators:
// - betagouv/nir_validate (© SGMAP, MIT)
// - aymericbouzy/french-ssn (© Aymeric Bouzy, MIT)
import { test } from "node:test";
import { strict as assert } from "node:assert";
import { luhn, nirChecksum, ibanCheck, detectAll } from "./detectors.ts";
test("luhn: valid card numbers", () => {
assert.equal(luhn("4111111111111111"), true); // Visa test
assert.equal(luhn("5555555555554444"), true); // Mastercard test
assert.equal(luhn("378282246310005"), true); // Amex test (15 digits)
assert.equal(luhn("30569309025904"), true); // Diners (14)
});
test("luhn: invalid card numbers", () => {
assert.equal(luhn("4111111111111112"), false);
assert.equal(luhn("0000000000000001"), false);
assert.equal(luhn("1234567890123456"), false);
});
test("luhn: length bounds", () => {
assert.equal(luhn("411111111111"), false); // 12 digits — too short
assert.equal(luhn("41111111111111111111"), false); // 20 — too long
});
test("nirChecksum: nominal cases", () => {
// Calvados (dept 14), from SGMAP test suite.
assert.equal(nirChecksum("2550814168025", "38"), true);
assert.equal(nirChecksum("2550814168025", "39"), false);
// From french-ssn test suite.
assert.equal(nirChecksum("2890478342163", "49"), true);
});
test("nirChecksum: Corsica 2A", () => {
// SGMAP: 255082A16802597 → body 255082A168025, key 97.
assert.equal(nirChecksum("255082A168025", "97"), true);
// french-ssn: 289042A34216390 → body 289042A342163, key 90.
assert.equal(nirChecksum("289042A342163", "90"), true);
// Lower-case letter accepted (we use /i).
assert.equal(nirChecksum("289042a342163", "90"), true);
});
test("nirChecksum: Corsica 2B", () => {
// french-ssn: 289042B34216320 → body 289042B342163, key 20.
assert.equal(nirChecksum("289042B342163", "20"), true);
assert.equal(nirChecksum("289042b342163", "20"), true);
assert.equal(nirChecksum("289042B342163", "21"), false);
});
test("nirChecksum: extreme keys (97 and 01)", () => {
// french-ssn: key wraps at 97, 01, 02.
assert.equal(nirChecksum("2890478342212", "97"), true);
assert.equal(nirChecksum("2890478342211", "01"), true);
assert.equal(nirChecksum("2890478342210", "02"), true);
});
test("nirChecksum: malformed input", () => {
assert.equal(nirChecksum("289047834216", "49"), false); // 12 chars
assert.equal(nirChecksum("28904783421634", "49"), false); // 14 chars
assert.equal(nirChecksum("2890478342163", "4"), false); // 1-digit key
assert.equal(nirChecksum("28X0478342163", "49"), false); // stray letter outside dept slot
assert.equal(nirChecksum("2890478342163", "XX"), false); // non-numeric key
});
test("ibanCheck: valid IBANs", () => {
// FR specimen from ECBS.
assert.equal(ibanCheck("FR1420041010050500013M02606"), true);
// Lowercase variant — ibanCheck normalises internally.
assert.equal(ibanCheck("fr1420041010050500013m02606"), true);
// With spaces.
assert.equal(ibanCheck("FR14 2004 1010 0505 0001 3M02 606"), true);
// German specimen.
assert.equal(ibanCheck("DE89370400440532013000"), true);
});
test("ibanCheck: invalid IBANs", () => {
assert.equal(ibanCheck("FR1420041010050500013M02607"), false); // bad check
assert.equal(ibanCheck("FR14"), false); // too short
assert.equal(ibanCheck("ZZ00ABCD"), false); // junk
});
test("detectAll: finds an email", () => {
const spans = detectAll("Contact: alice@example.com please.");
assert.equal(spans.length, 1);
assert.equal(spans[0].type, "EMAIL");
assert.equal(spans[0].value, "alice@example.com");
});
test("detectAll: French phone formats", () => {
const cases = [
"06 12 34 56 78",
"0612345678",
"06.12.34.56.78",
"06-12-34-56-78",
"+33 6 12 34 56 78",
"+33612345678",
"0033 6 12 34 56 78",
"+33 (0)6 12 34 56 78",
"+33(0)612345678",
"0033 (0) 6 12 34 56 78",
];
for (const variant of cases) {
const spans = detectAll(`Appelle-moi au ${variant}.`);
const tel = spans.find((s) => s.type === "TEL");
assert.ok(tel, `should detect TEL in: ${variant}`);
}
});
test("detectAll: phone does not match too-short or wrong-prefix", () => {
// Starts with 1 (invalid French mobile/landline prefix).
const a = detectAll("Numéro bizarre: 1612345678").filter((s) => s.type === "TEL");
assert.equal(a.length, 0);
// Only 9 digits.
const b = detectAll("Code: 061234567").filter((s) => s.type === "TEL");
assert.equal(b.length, 0);
});
test("detectAll: NIR Corsica detected in realistic text", () => {
const text = "NIR: 2 89 04 2A 342 163 90 — vérifier.";
const nir = detectAll(text).find((s) => s.type === "NIR");
assert.ok(nir, "should detect Corsican NIR");
});
test("detectAll: card with formatting passes Luhn", () => {
const spans = detectAll("CB: 4111 1111 1111 1111 expire 12/27");
const cb = spans.find((s) => s.type === "CB");
assert.ok(cb);
assert.equal(cb!.value.replace(/[ -]/g, ""), "4111111111111111");
});
test("detectAll: random 16-digit run that fails Luhn is rejected", () => {
const spans = detectAll("Référence interne: 1234567890123456");
assert.equal(spans.filter((s) => s.type === "CB").length, 0);
});
test("detectAll: 16-digit run starting outside BIN range is rejected even if Luhn-valid", () => {
// "9999999999999995" is a Luhn-valid 16-digit string but no real card starts
// with 9 — the BIN-prefix [3-6] gate must reject it.
// (Luhn-validate manually: alt digits doubled from the right:
// 9*2=18→9, 9, 9*2=18→9, 9, 9*2=18→9, 9, 9*2=18→9, 9, 9*2=18→9, 9,
// 9*2=18→9, 9, 9*2=18→9, 9, 5 → sum 9*15 + 5 = 140 → 14*10 OK.)
assert.equal(detectAll("Ref: 9999999999999995").filter((s) => s.type === "CB").length, 0);
});
test("detectAll: all major card brands still detected after BIN tightening", () => {
const cards = [
"4111111111111111", // Visa
"5555555555554444", // Mastercard
"378282246310005", // Amex (15 digits, starts with 3)
"6011111111111117", // Discover (starts with 6)
];
for (const card of cards) {
assert.equal(
detectAll(`Carte: ${card}`).filter((s) => s.type === "CB").length,
1,
`expected to detect ${card}`,
);
}
});
test("detectAll: IBAN lowercase in prose is detected", () => {
const text = "Mon iban : fr14 2004 1010 0505 0001 3m02 606 merci";
const iban = detectAll(text).find((s) => s.type === "IBAN");
assert.ok(iban, "should detect lowercase IBAN");
});
test("detectAll: phone regex does not match inside a longer digit run", () => {
// Regression: TEL_RE used to match "0850359840" inside the card-like run
// "6210850359840497", causing 5000+ false positives on AI4Privacy FR.
const text = "Carte: 6210850359840497 et ID: L1506614655112";
const tels = detectAll(text).filter((s) => s.type === "TEL");
assert.equal(tels.length, 0, "no TEL inside long digit identifiers");
});
test("detectAll: phone regex does not match inside a dot-separated AVS / identifier", () => {
// Regression: TEL_RE matched "0405.6369.62" inside the Swiss AVS
// "756.0405.6369.62" because the leading `.` separator slipped past
// the original `(?<!\d)` boundary.
for (const ctx of [
"AVS: 756.0405.6369.62",
"Réf interne 123.0612345678.456",
"ID L1506614655112",
]) {
const tels = detectAll(ctx).filter((s) => s.type === "TEL");
assert.equal(tels.length, 0, `no spurious TEL in: ${ctx}`);
}
});
test("detectAll: phone still matches when adjacent to non-digit punctuation", () => {
for (const ctx of [
"Tel:0612345678,",
"(06.12.34.56.78)",
"[0612345678]",
"tel=+33612345678&",
]) {
const tels = detectAll(ctx).filter((s) => s.type === "TEL");
assert.equal(tels.length, 1, `expected TEL in: ${ctx}`);
}
});
test("detectAll: international phones not detected by default", () => {
const text = "DE: +49 30 12345678 UK: +44 20 7946 0958 US: +1 555 123 4567";
const tels = detectAll(text).filter((s) => s.type === "TEL");
assert.equal(tels.length, 0);
});
test("detectAll: international phones detected with option", () => {
const text = "DE: +49 30 12345678 UK: +44 20 7946 0958 US: +1 555 123 4567";
const tels = detectAll(text, { international: true }).filter((s) => s.type === "TEL");
assert.equal(tels.length, 3);
});
test("detectAll: international option preserves French detection", () => {
// detectAll may produce overlapping spans (FR + INT for the same +33 number);
// dedup happens at the anonymizer layer. Here we just verify each format is
// covered by at least one candidate.
const text = "FR: +33 6 12 34 56 78 DE: +49 30 12345678 domestic: 0612345678";
const tels = detectAll(text, { international: true }).filter((s) => s.type === "TEL");
assert.ok(tels.some((t) => t.value.startsWith("+33")), "FR international form");
assert.ok(tels.some((t) => t.value.startsWith("+49")), "DE international form");
assert.ok(tels.some((t) => t.value === "0612345678"), "FR domestic form");
});
test("detectAll: international regex does not slice inside long digit runs", () => {
// Regression equivalent to the AVS bug, but for international form.
const text = "Ref: 99+44.20.7946.0958.123 should not match a phone.";
const tels = detectAll(text, { international: true }).filter((s) => s.type === "TEL");
assert.equal(tels.length, 0);
});
test("detectAll: invalid IBAN format is not redacted", () => {
// Looks like an IBAN by length but mod-97 fails.
const text = "Ref: FR9999999999999999999999999";
assert.equal(detectAll(text).filter((s) => s.type === "IBAN").length, 0);
});
test("detectAll: passport number is detected when passport-labeled", () => {
const cases = [
"Passeport n° 19FH84235",
"passeport numéro 19FH84235",
"Passport # AB123456",
];
for (const text of cases) {
const ids = detectAll(text).filter((s) => s.type === "IDDOC");
assert.equal(ids.length, 1, `expected passport ID in: ${text}`);
}
});
test("detectAll: passport-like token without context is not redacted", () => {
const ids = detectAll("Référence interne 19FH84235 à vérifier.").filter((s) => s.type === "IDDOC");
assert.equal(ids.length, 0);
});
test("detectAll: labelled French address is detected", () => {
const text = "Adresse : 27 rue de la République, 69002 Lyon. Contact : a@b.fr";
const addresses = detectAll(text).filter((s) => s.type === "ADDRESS");
assert.equal(addresses.length, 1);
assert.equal(addresses[0].value, "27 rue de la République, 69002 Lyon");
});
test("detectAll: address detector does not redact unlabeled street-like prose", () => {
const text = "Le magasin au 27 rue de la République est fermé.";
const addresses = detectAll(text).filter((s) => s.type === "ADDRESS");
assert.equal(addresses.length, 0);
});