Project Files
eval / demo.ts
// Live demo — runs the same French paragraph through the anonymizer under
// four configurations, from regex-only to regex + full ML coverage. Prints
// each result with ANSI highlights so the contribution of each layer is
// obvious.
//
// Run: `npm run demo`
// First call downloads ~280 MB of model weights to the HF cache.
// Subsequent runs reuse the cache (sub-second model load).
import { anonymize } from "../src/anonymizer.ts";
import { detectWithModel, type PiiModelOptions } from "../src/piiModel.ts";
// ANSI helpers — colorize pseudonyms so the changes between scenarios pop.
const C = {
reset: "\x1b[0m",
bold: "\x1b[1m",
dim: "\x1b[2m",
green: "\x1b[32m",
cyan: "\x1b[36m",
yellow: "\x1b[33m",
magenta: "\x1b[35m",
red: "\x1b[31m",
blue: "\x1b[34m",
};
const COLOR_BY_TYPE: Record<string, string> = {
EMAIL: C.cyan,
TEL: C.cyan,
CB: C.yellow,
NIR: C.yellow,
IBAN: C.yellow,
NOM: C.green,
CUSTOM: C.magenta,
ADDRESS: C.blue,
DATE: C.red,
IDDOC: C.red,
};
function colorize(text: string): string {
return text.replace(/\[(\w+)_\d+\]/g, (m, type) => {
const c = COLOR_BY_TYPE[type] ?? C.bold;
return `${c}${C.bold}${m}${C.reset}`;
});
}
// A deliberately diverse French paragraph: it mixes the formats our regex
// already covers (email/phone/IBAN/NIR/CB) with the unstructured PII the
// model is supposed to add (names, addresses, dates, ID document numbers).
const SAMPLE = [
"Compte-rendu — Marie Dupont, née le 14/03/1985 à Lille.",
"Adresse : 27 rue de la République, 69002 Lyon.",
"Contact : marie.dupont@example.fr, +33 6 12 34 56 78.",
"Passeport n° 19FH84235, NIR 2 89 04 78 342 163 49.",
"Virement reçu sur FR14 2004 1010 0505 0001 3M02 606 ; carte 4111 1111 1111 1111.",
"Le dossier a été confirmé par Pierre Martin lors de la réunion du 12/05/2025.",
].join(" ");
type Scenario = {
name: string;
description: string;
modelOptions: PiiModelOptions;
};
const SCENARIOS: Scenario[] = [
{
name: "1. Regex only (baseline)",
description: "Plugin in its default config — no ML model loaded.",
modelOptions: {},
},
{
name: "2. Regex + names",
description: "detectPiiWithModel ON, modelDetectNames ON. Pseudonyms for names appear.",
modelOptions: { detectNames: true },
},
{
name: "3. Regex + names + addresses",
description: "Adds modelDetectAddresses. Streets, cities, building numbers, zipcodes get [ADDRESS_N].",
modelOptions: { detectNames: true, detectAddresses: true },
},
{
name: "4. Regex + all ML categories",
description: "All four ML sub-flags ON. Dates and ID documents now redacted too.",
modelOptions: {
detectNames: true,
detectAddresses: true,
detectDates: true,
detectIdDocs: true,
},
},
];
function fmtCounts(counts: Record<string, number>): string {
const entries = Object.entries(counts);
if (entries.length === 0) return "(nothing redacted)";
return entries
.sort(([a], [b]) => a.localeCompare(b))
.map(([k, v]) => `${k}=${v}`)
.join(" ");
}
async function main() {
console.log(`${C.bold}== anonymize plugin — live demo ==${C.reset}`);
console.log(`${C.dim}Sample text (${SAMPLE.length} chars):${C.reset}`);
console.log(SAMPLE);
console.log("");
for (const scenario of SCENARIOS) {
console.log(`${C.bold}${scenario.name}${C.reset}`);
console.log(`${C.dim}${scenario.description}${C.reset}`);
const t0 = Date.now();
const modelSpans = await detectWithModel(SAMPLE, scenario.modelOptions);
const tModel = Date.now() - t0;
const t1 = Date.now();
const result = anonymize(SAMPLE, { modelSpans });
const tRedact = Date.now() - t1;
console.log(`${C.dim}Timing: model=${tModel} ms, redact=${tRedact} ms${C.reset}`);
console.log(`${C.dim}Counts: ${fmtCounts(result.counts)}${C.reset}`);
console.log("");
console.log(colorize(result.anonymized));
console.log("");
if (Object.keys(result.mapping).length > 0) {
console.log(`${C.dim}Mapping:${C.reset}`);
for (const [pseudo, original] of Object.entries(result.mapping)) {
const c = COLOR_BY_TYPE[pseudo.replace(/[\[\]_\d]/g, "")] ?? "";
console.log(` ${c}${pseudo}${C.reset} → ${original}`);
}
}
console.log(`${C.dim}${"─".repeat(72)}${C.reset}\n`);
}
console.log(`${C.dim}Tip: scenarios 2-4 reuse the model from scenario 2 — note the timing drop.${C.reset}`);
}
main().catch((e) => { console.error(e); process.exit(1); });