Project Files
src / piiModel.ts
// PII-NER client — talks to a standalone Node service at PII_SERVICE_URL
// (default http://127.0.0.1:7878) which runs the XLM-RoBERTa NER model.
//
// Rationale: LM Studio's app process has strict macOS Library Validation and
// refuses to load non-Apple native binaries (onnxruntime-node, sharp). We
// side-step that by running the model in a separate process. The plugin
// auto-spawns that process on first use, via the SYSTEM `node` (not the
// bundled LM Studio node — that one is also signed strict and would fail).
//
// If the service can't be started (no system node found, script missing),
// `detectWithModel()` throws and the redaction falls back to regex + LLM-
// supplied names.
import * as fs from "node:fs";
import * as fsp from "node:fs/promises";
import * as os from "node:os";
import * as path from "node:path";
import { spawn } from "node:child_process";
import type { Span, SpanType } from "./detectors";
// ─── Logging ─────────────────────────────────────────────────────────────
// Plugin-side debug log: structured JSON events, one per line. Service-side
// stdout/stderr is piped to a separate file (see ensurePiiService).
//
// Path strategy:
// 1. Walk up from __dirname to find LM Studio's data root (the dir
// containing `extensions/`). This works on macOS, Linux, AND Windows
// regardless of where LM Studio stores its data.
// 2. Fall back to ~/.lmstudio (common convention) if walk-up fails
// (e.g. running from the source tree via test.js).
// 3. Then os.tmpdir(), then os.homedir() as last resorts.
function findLmStudioDataRoot(): string | null {
let dir = __dirname;
for (let i = 0; i < 10; i++) {
// Found if current dir is named "extensions": parent is the data root.
if (path.basename(dir) === "extensions") return path.dirname(dir);
const parent = path.dirname(dir);
if (parent === dir) return null; // filesystem root
dir = parent;
}
return null;
}
function resolveLogDir(): string {
const candidates: string[] = [];
const lmRoot = findLmStudioDataRoot();
if (lmRoot) candidates.push(lmRoot);
candidates.push(path.join(os.homedir(), ".lmstudio"));
candidates.push(os.tmpdir());
candidates.push(os.homedir());
for (const dir of candidates) {
try {
fs.mkdirSync(dir, { recursive: true });
fs.accessSync(dir, fs.constants.W_OK);
return dir;
} catch { /* try next */ }
}
return os.tmpdir();
}
const LOG_DIR = resolveLogDir();
const DEBUG_LOG = path.join(LOG_DIR, "anonymize-debug.log");
const SERVICE_LOG = path.join(LOG_DIR, "anonymize-pii-service.log");
// Write the resolved path to a known-good location once at startup so we
// can find the real path even if dbg() itself silently fails.
try {
fs.appendFileSync(
DEBUG_LOG,
`[${new Date().toISOString()}] [boot] log paths: debug=${DEBUG_LOG} service=${SERVICE_LOG}\n`,
);
// Also emit on stdout — LM Studio's plugin host may capture this.
console.log(`[anonymize] log paths: debug=${DEBUG_LOG} service=${SERVICE_LOG}`);
} catch (e) {
console.error(`[anonymize] cannot write logs to ${LOG_DIR}: ${e}`);
}
function dbg(scope: string, payload?: unknown): void {
try {
const line = `[${new Date().toISOString()}] [${scope}] ${
payload === undefined ? "" : JSON.stringify(payload)
}\n`;
fs.appendFileSync(DEBUG_LOG, line);
} catch { /* never let logging break the tool */ }
}
export function getDebugLogPaths(): { plugin: string; service: string } {
return { plugin: DEBUG_LOG, service: SERVICE_LOG };
}
export type PiiModelOptions = {
detectNames?: boolean;
detectAddresses?: boolean;
detectDates?: boolean;
detectIdDocs?: boolean;
threshold?: number;
onProgress?: (message: string) => void;
// Override the auto-detected system node path. Plumbed from
// configSchematics.piiServiceNodePath.
nodeBinaryPath?: string;
};
const PII_SERVICE_URL = process.env.PII_SERVICE_URL ?? "http://127.0.0.1:7878";
const REQUEST_TIMEOUT_MS = 60 * 1000;
const SPAWN_TOTAL_TIMEOUT_MS = 120 * 1000; // up to 2 min for model load
const HEALTH_POLL_INTERVAL_MS = 2000;
let _spawnPromise: Promise<void> | null = null;
async function getHealth(): Promise<{ ok: boolean; ready: boolean; lastStatus?: string }> {
try {
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), 1500);
const res = await fetch(`${PII_SERVICE_URL}/health`, { signal: ctrl.signal });
clearTimeout(timer);
if (!res.ok) return { ok: false, ready: false };
const data = (await res.json()) as { ok?: boolean; model_ready?: boolean; last_status?: string };
return { ok: !!data.ok, ready: !!data.model_ready, lastStatus: data.last_status };
} catch {
return { ok: false, ready: false };
}
}
async function findSystemNode(override?: string): Promise<string> {
if (override && override.trim()) {
try {
await fsp.access(override);
return override;
} catch {
throw new Error(
`Configured nodeBinaryPath does not exist: ${override}. Clear the field to fall back to auto-detection.`,
);
}
}
const candidates: string[] = [
"/opt/homebrew/bin/node",
"/usr/local/bin/node",
"/usr/bin/node",
];
try {
const nvmDir = path.join(os.homedir(), ".nvm", "versions", "node");
const versions = (await fsp.readdir(nvmDir)).sort().reverse();
for (const v of versions) candidates.push(path.join(nvmDir, v, "bin", "node"));
} catch {
/* no nvm */
}
for (const c of candidates) {
try {
await fsp.access(c);
return c;
} catch {
/* try next */
}
}
throw new Error(
`Cannot find a system 'node' binary. Tried: ${candidates.join(", ")}. ` +
`Set 'piiServiceNodePath' in the plugin settings to the absolute path of your node binary (e.g. ` +
`\`which node\` in a terminal).`,
);
}
async function findServiceScript(): Promise<string> {
// The plugin is installed at <plugin-root>/, with src/ alongside scripts/build/.
// At runtime this file lives somewhere under .lmstudio/ (compiled) or src/
// depending on how LM Studio loads us. Walk up to find scripts/build/.
const candidates: string[] = [];
let dir = __dirname;
for (let i = 0; i < 5; i++) {
candidates.push(path.join(dir, "scripts", "build", "pii-service.js"));
dir = path.dirname(dir);
}
for (const c of candidates) {
if (fs.existsSync(c)) return c;
}
throw new Error(
`pii-service.js not found near plugin install. Searched: ${candidates.join(", ")}. ` +
`Run \`npx tsc -p scripts/\` in the plugin source folder before installing.`,
);
}
async function ensurePiiService(opts: PiiModelOptions): Promise<void> {
const h = await getHealth();
if (h.ok && h.ready) return;
// ok but not ready → service is running but model still loading, just wait
if (h.ok && !h.ready) return waitForReady(opts);
// Service not running → spawn it (deduped across concurrent calls)
if (_spawnPromise) return _spawnPromise;
_spawnPromise = (async () => {
const node = await findSystemNode(opts.nodeBinaryPath);
const script = await findServiceScript();
dbg("spawn.starting", { node, script, logFile: SERVICE_LOG });
opts.onProgress?.(`Spawning pii-service: ${node}`);
// Pipe stdout/stderr to a tail-able log file so console.log statements
// in the service are preserved instead of vanishing into stdio:ignore.
// 'a' = append mode; the file accumulates across spawns.
const out = fs.openSync(SERVICE_LOG, "a");
const err = fs.openSync(SERVICE_LOG, "a");
fs.appendFileSync(SERVICE_LOG, `\n--- spawn ${new Date().toISOString()} ---\n`);
const child = spawn(node, [script], {
detached: true,
stdio: ["ignore", out, err],
env: { ...process.env, PII_SERVICE_URL: undefined },
});
child.unref();
// Don't fs.close(out/err) — the child inherits these fds and needs them
// for its lifetime. The OS reclaims them when the service exits.
dbg("spawn.pid", { pid: child.pid });
await waitForReady(opts);
})().finally(() => {
_spawnPromise = null;
});
return _spawnPromise;
}
async function waitForReady(opts: PiiModelOptions): Promise<void> {
const start = Date.now();
let lastForwarded = "";
while (Date.now() - start < SPAWN_TOTAL_TIMEOUT_MS) {
const h = await getHealth();
if (h.ok && h.ready) {
opts.onProgress?.(`pii-service ready (${Math.round((Date.now() - start) / 1000)}s)`);
return;
}
const elapsedSec = Math.round((Date.now() - start) / 1000);
// Forward the service's live status if it changed since last poll;
// otherwise emit a fallback "still waiting" tick with elapsed time.
if (h.lastStatus && h.lastStatus !== lastForwarded) {
opts.onProgress?.(`[pii-service ${elapsedSec}s] ${h.lastStatus}`);
lastForwarded = h.lastStatus;
} else if (!h.lastStatus) {
opts.onProgress?.(`Waiting for pii-service… (${elapsedSec}s)`);
}
await new Promise((r) => setTimeout(r, HEALTH_POLL_INTERVAL_MS));
}
throw new Error(
`pii-service didn't become ready after ${SPAWN_TOTAL_TIMEOUT_MS / 1000}s. ` +
`Check that ${PII_SERVICE_URL}/health returns model_ready=true.`,
);
}
export async function detectWithModel(
text: string,
options: PiiModelOptions = {},
): Promise<Span[]> {
const anyEnabled =
options.detectNames || options.detectAddresses ||
options.detectDates || options.detectIdDocs;
if (!anyEnabled) return [];
dbg("detectWithModel.enter", {
textLength: text.length,
detectNames: options.detectNames,
detectAddresses: options.detectAddresses,
detectDates: options.detectDates,
detectIdDocs: options.detectIdDocs,
});
await ensurePiiService(options);
options.onProgress?.(`Querying pii-service (${text.length} chars)…`);
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
try {
const res = await fetch(`${PII_SERVICE_URL}/detect`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
text,
options: {
detectNames: options.detectNames,
detectAddresses: options.detectAddresses,
detectDates: options.detectDates,
detectIdDocs: options.detectIdDocs,
threshold: options.threshold,
},
}),
signal: controller.signal,
});
if (!res.ok) {
const body = await res.text().catch(() => "");
dbg("detect.http_error", { status: res.status, body: body.slice(0, 200) });
throw new Error(`pii-service HTTP ${res.status}: ${body.slice(0, 200)}`);
}
const data = (await res.json()) as { spans?: unknown; ms?: number };
if (!Array.isArray(data.spans)) {
dbg("detect.malformed_response", { data });
throw new Error("pii-service: malformed response");
}
dbg("detect.success", { spans: data.spans.length, ms: data.ms });
options.onProgress?.(`PII detection done — ${data.spans.length} span(s) in ${data.ms ?? "?"} ms.`);
return data.spans as Span[];
} finally {
clearTimeout(timer);
}
}