Forked from altra/research
src / db.ts
import Database from "better-sqlite3";
import { join } from "path";
import { homedir } from "os";
import { mkdirSync } from "fs";
export interface DbEntity {
id: number;
name: string;
type: string;
tags: string;
createdAt: string;
updatedAt: string;
}
export function openDb(dataPath: string): Database.Database {
const raw = dataPath.trim() || join(homedir(), "research-data");
const dir = raw.startsWith("~/") ? join(homedir(), raw.slice(2)) : raw;
mkdirSync(dir, { recursive: true });
const db = new Database(join(dir, "research.db"));
db.pragma("journal_mode = WAL");
db.exec(`
CREATE TABLE IF NOT EXISTS entities (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
type TEXT NOT NULL,
tags TEXT DEFAULT '[]',
createdAt TEXT NOT NULL,
updatedAt TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS claims (
id INTEGER PRIMARY KEY AUTOINCREMENT,
entityId INTEGER REFERENCES entities(id),
claim TEXT NOT NULL,
type TEXT DEFAULT 'factual',
confidence REAL DEFAULT 0.5,
sourceUrl TEXT DEFAULT '',
verbatimQuote TEXT DEFAULT '',
corroborated INTEGER DEFAULT 0,
contradicted INTEGER DEFAULT 0,
createdAt TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS sources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT UNIQUE NOT NULL,
domain TEXT DEFAULT '',
score INTEGER DEFAULT 0,
verdict TEXT DEFAULT 'unknown',
signals TEXT DEFAULT '{}',
fetchedAt TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS timeline_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
entityId INTEGER NOT NULL REFERENCES entities(id),
event TEXT NOT NULL,
date TEXT DEFAULT '',
sourceUrl TEXT DEFAULT ''
);
CREATE TABLE IF NOT EXISTS reports (
id INTEGER PRIMARY KEY AUTOINCREMENT,
topic TEXT NOT NULL,
format TEXT DEFAULT 'briefing',
entityIds TEXT DEFAULT '[]',
sourceUrls TEXT DEFAULT '[]',
createdAt TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name);
CREATE INDEX IF NOT EXISTS idx_claims_entityId ON claims(entityId);
CREATE INDEX IF NOT EXISTS idx_claims_sourceUrl ON claims(sourceUrl);
CREATE INDEX IF NOT EXISTS idx_reports_topic ON reports(topic);
CREATE INDEX IF NOT EXISTS idx_timeline_entityId ON timeline_events(entityId);
`);
return db;
}
export function upsertEntity(
db: Database.Database,
name: string,
type: string,
tags: string[],
): number {
const now = new Date().toISOString();
const existing = db.prepare("SELECT id FROM entities WHERE name = ?").get(name) as { id: number } | undefined;
if (existing) {
db.prepare("UPDATE entities SET type = ?, tags = ?, updatedAt = ? WHERE id = ?")
.run(type, JSON.stringify(tags), now, existing.id);
return existing.id;
}
const r = db.prepare(
"INSERT INTO entities (name, type, tags, createdAt, updatedAt) VALUES (?, ?, ?, ?, ?)"
).run(name, type, JSON.stringify(tags), now, now);
return r.lastInsertRowid as number;
}
export function insertClaim(
db: Database.Database,
entityId: number | null,
claim: string,
type: string,
confidence: number,
sourceUrl: string,
verbatimQuote: string,
): void {
db.prepare(
"INSERT INTO claims (entityId, claim, type, confidence, sourceUrl, verbatimQuote, createdAt) VALUES (?, ?, ?, ?, ?, ?, ?)"
).run(entityId, claim, type, confidence, sourceUrl, verbatimQuote, new Date().toISOString());
}
export function upsertSource(
db: Database.Database,
url: string,
domain: string,
score: number,
verdict: string,
signals: object,
): void {
db.prepare(`
INSERT INTO sources (url, domain, score, verdict, signals, fetchedAt)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(url) DO UPDATE SET
score = excluded.score,
verdict = excluded.verdict,
signals = excluded.signals,
fetchedAt = excluded.fetchedAt
`).run(url, domain, score, verdict, JSON.stringify(signals), new Date().toISOString());
}
export function insertTimelineEvent(
db: Database.Database,
entityId: number,
event: string,
date: string,
sourceUrl: string,
): void {
db.prepare(
"INSERT INTO timeline_events (entityId, event, date, sourceUrl) VALUES (?, ?, ?, ?)"
).run(entityId, event, date, sourceUrl);
}
export function insertReport(
db: Database.Database,
topic: string,
format: string,
entityIds: number[],
sourceUrls: string[],
): number {
const r = db.prepare(
"INSERT INTO reports (topic, format, entityIds, sourceUrls, createdAt) VALUES (?, ?, ?, ?, ?)"
).run(topic, format, JSON.stringify(entityIds), JSON.stringify(sourceUrls), new Date().toISOString());
return r.lastInsertRowid as number;
}
export function listReports(
db: Database.Database,
topicFilter?: string,
limit = 20,
): Array<{ id: number; topic: string; format: string; createdAt: string; entityCount: number; sourceCount: number }> {
const sql = topicFilter
? "SELECT id, topic, format, createdAt, json_array_length(entityIds) as entityCount, json_array_length(sourceUrls) as sourceCount FROM reports WHERE topic LIKE ? ORDER BY createdAt DESC LIMIT ?"
: "SELECT id, topic, format, createdAt, json_array_length(entityIds) as entityCount, json_array_length(sourceUrls) as sourceCount FROM reports ORDER BY createdAt DESC LIMIT ?";
const params = topicFilter ? [`%${topicFilter}%`, limit] : [limit];
return db.prepare(sql).all(...params) as Array<{ id: number; topic: string; format: string; createdAt: string; entityCount: number; sourceCount: number }>;
}
export function getEntityByName(db: Database.Database, name: string): DbEntity | undefined {
return db.prepare("SELECT * FROM entities WHERE name = ?").get(name) as DbEntity | undefined;
}
export function getEntityIdsByNames(db: Database.Database, names: string[]): number[] {
if (names.length === 0) return [];
const placeholders = names.map(() => "?").join(", ");
const rows = db.prepare(`SELECT id FROM entities WHERE name IN (${placeholders})`).all(...names) as Array<{ id: number }>;
return rows.map(r => r.id);
}