Project Files

.github

workflows

build-and-release.yml

.turbo

turbo-build.log

docs

assets

settings.png

01_PRD.md

02_ARCHITECTURE.md

03_IMPLEMENTATION_STRATEGY.md

skills

manuscript_audit

SKILL.md

zotero

SKILL.md

tests

embedder.test.ts

modelResolution.test.ts

obsidianTools.test.ts

registry.test.ts

tracker.test.ts

zoteroTools.test.ts

.gitignore

CHANGELOG.md

DEVELOPING.md

esbuild.config.js

LICENSE

manifest.json

package-lock.json

package.json

PRD.md

README.md

tsconfig.json

tests / embedder.test.ts

import test from 'node:test';
import assert from 'node:assert';
import { EmbeddingPipeline } from '../src/ingestion/embedder';

test('EmbeddingPipeline', async (t) => {
  const mockClient = {
    embedding: {
      model: async () => ({
        embed: async (textOrArray: string | string[]) => {
          if (Array.isArray(textOrArray)) {
            return textOrArray.map(() => ({ embedding: [0.1, 0.2, 0.3] }));
          }
          return { embedding: [0.1, 0.2, 0.3] };
        }
      })
    }
  };

  await t.test('generateEmbedding returns vector array', async () => {
    const pipeline = new EmbeddingPipeline(mockClient);
    const result = await pipeline.generateEmbedding("Test string");
    assert.deepStrictEqual(result, [0.1, 0.2, 0.3]);
  });

  await t.test('processDocument chunks and batches correctly', async () => {
    const pipeline = new EmbeddingPipeline(mockClient);
    const batches: any[] = [];
    
    const longText = "word ".repeat(500); // 2500 chars

    const processedChunks = await pipeline.processDocument(
      'obsidian',
      'path/to/file.md',
      'Test Doc',
      longText,
      [],
      async (batch) => {
        batches.push(batch);
      }
    );

    // chunkText defaults to 1000 chars with 200 overlap. 
    // 2500 chars should result in around 3-4 chunks
    assert.strictEqual(processedChunks > 1, true, 'Should produce multiple chunks');
    assert.strictEqual(batches.length > 0, true, 'Should process at least one batch');
    
    // Check structure of a processed chunk
    const firstChunk = batches[0][0];
    assert.strictEqual(firstChunk.source, 'obsidian');
    assert.strictEqual(firstChunk.path, 'path/to/file.md');
    assert.deepStrictEqual(firstChunk.vector, [0.1, 0.2, 0.3]);
    assert.strictEqual(firstChunk.text.startsWith('Source: Test Doc\n\n'), true);
  });
});

omnimind