Project Files
src / tests / parseDocument.test.ts
import { test } from "node:test";
import * as assert from "node:assert/strict";
import * as path from "path";
import { parseDocument } from "../parsers/documentParser";
const FIXTURE_DIR = path.resolve(__dirname, "../../test-fixtures");
test("parseDocument extracts clean text from HTML files", async () => {
const htmlPath = path.join(FIXTURE_DIR, "sample.html");
const result = await parseDocument(htmlPath);
assert.equal(result.success, true, `Expected success but got ${result.success ? "success" : result.reason}`);
if (!result.success) {
return;
}
assert.ok(result.document.text.includes("Hello There"));
assert.ok(!result.document.text.includes("console.log"));
});
test("parseDocument flattens Markdown formatting", async () => {
const mdPath = path.join(FIXTURE_DIR, "sample.md");
const result = await parseDocument(mdPath);
assert.equal(result.success, true, `Expected success but got ${result.success ? "success" : result.reason}`);
if (!result.success) {
return;
}
const text = result.document.text;
assert.ok(text.includes("Sample Markdown Title"));
assert.ok(!text.includes("const block"));
assert.ok(!text.includes("https://example.com"), "Markdown links should drop raw URLs");
});
test("parseDocument preserves paragraph spacing for plain text", async () => {
const txtPath = path.join(FIXTURE_DIR, "sample.txt");
const result = await parseDocument(txtPath);
assert.equal(result.success, true, `Expected success but got ${result.success ? "success" : result.reason}`);
if (!result.success) {
return;
}
const text = result.document.text;
assert.ok(text.includes("\n\n"), "Plain text should preserve blank lines between paragraphs");
assert.ok(text.startsWith("This is a plain text document."));
});