src / chunker.ts
import { Chunk } from "./types";
import { createHash } from "node:crypto";
export class Chunker {
constructor(
private maxChunkSize: number = 1000,
private overlap: number = 50,
) {}
chunk(artifactId: string, content: string): Chunk[] {
const chunks: Chunk[] = [];
const lines = content.split("\n");
let currentChunk: string[] = [];
let currentSize = 0;
let startLine = 0;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const lineSize = line.length + 1;
if (currentSize + lineSize > this.maxChunkSize && currentChunk.length > 0) {
chunks.push(this.makeChunk(artifactId, currentChunk, startLine, i - 1));
const overlapStart = Math.max(0, currentChunk.length - this.overlap);
currentChunk = currentChunk.slice(overlapStart);
currentSize = currentChunk.reduce((s, l) => s + l.length + 1, 0);
startLine = i - (currentChunk.length - overlapStart);
}
currentChunk.push(line);
currentSize += lineSize;
}
if (currentChunk.length > 0) {
chunks.push(this.makeChunk(artifactId, currentChunk, startLine, lines.length - 1));
}
return chunks;
}
private makeChunk(artifactId: string, lines: string[], startLine: number, endLine: number): Chunk {
const content = lines.join("\n");
const hash = createHash("sha256").update(`${artifactId}:${startLine}:${endLine}`).digest("hex").slice(0, 12);
return {
id: `chunk_${hash}`,
artifactId,
content,
startLine,
endLine,
};
}
}