Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "traul",
"version": "0.1.0",
"version": "0.2.0",
"description": "Personal Intelligence Engine — watches communication streams, identifies patterns, surfaces actionable insights",
"license": "AGPL-3.0-only",
"repository": {
Expand Down
13 changes: 13 additions & 0 deletions skill.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,19 @@ Structured overview with three sections:
2. **Stats** — total messages, channels, contacts, active signals
3. **Volume** — last 7 days message bar chart

### `traul reset`

Reset a data layer to force regeneration. Useful when you need to re-sync, re-chunk, or re-embed data.

| Subcommand | Description |
|------------|-------------|
| `traul reset sync [--source <source>]` | Clear sync cursors; full refetch on next sync. Optional `--source` flag filters to a specific connector (e.g., `markdown`, `slack`). |
| `traul reset chunks` | Delete all chunks and embeddings; rechunk on next sync. |
| `traul reset embed` | Drop and recreate vector tables; re-embed with `traul embed`. |
| `traul reset all` | Reset everything: sync cursors + chunks + embeddings. |

**Auto-migration:** Traul automatically detects version changes on startup. If the chunking algorithm or embedding model/dimensions change between versions, affected data layers are reset automatically. No manual action needed after upgrading.

### Global Options

| Option | Description |
Expand Down
36 changes: 36 additions & 0 deletions src/commands/reset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import type { TraulDB } from "../db/database";
import { EMBED_DIMS } from "../lib/embeddings";

type Layer = "sync" | "chunks" | "embed" | "all";

const VALID_LAYERS: Layer[] = ["sync", "chunks", "embed", "all"];

export function runReset(
db: TraulDB,
layer: string,
options: { source?: string }
): void {
if (!VALID_LAYERS.includes(layer as Layer)) {
throw new Error(`Unknown layer: ${layer}. Valid layers: ${VALID_LAYERS.join(", ")}`);
}

const doSync = layer === "sync" || layer === "all";
const doChunks = layer === "chunks" || layer === "all";
const doEmbed = layer === "embed" || layer === "all" || layer === "chunks";

if (doSync) {
db.resetSyncCursors(options.source);
const scope = options.source ? `${options.source} sync cursors` : "all sync cursors";
console.log(`Reset ${scope}. Run 'traul sync' to refetch.`);
}

if (doChunks) {
db.resetChunks();
console.log("Reset all chunks. They will be regenerated on next 'traul sync' or 'traul embed'.");
}

if (doEmbed) {
db.resetEmbeddings(EMBED_DIMS);
console.log("Reset all embeddings. Run 'traul embed' to regenerate.");
}
}
29 changes: 29 additions & 0 deletions src/db/database.ts
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,35 @@ export class TraulDB {
this.db.run("DELETE FROM sync_cursors WHERE source = ? AND key = ?", [source, key]);
}

resetSyncCursors(source?: string): void {
if (source) {
this.db.run("DELETE FROM sync_cursors WHERE source = ?", [source]);
} else {
this.db.run("DELETE FROM sync_cursors");
}
}

resetChunks(): void {
this.db.run("DELETE FROM vec_chunks");
this.db.run("DELETE FROM chunks");
}

getMeta(key: string): string | null {
const row = this.db
.query<{ value: string }, [string]>(
"SELECT value FROM traul_meta WHERE key = ?"
)
.get(key);
return row?.value ?? null;
}

setMeta(key: string, value: string): void {
this.db.run(
"INSERT INTO traul_meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value",
[key, value]
);
}

close(): void {
this.db.close();
}
Expand Down
57 changes: 57 additions & 0 deletions src/db/migrations.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import type { TraulDB } from "./database";
import { CHUNKER_VERSION } from "../lib/chunker";
import { EMBED_MODEL, EMBED_DIMS } from "../lib/embeddings";
import * as log from "../lib/logger";

export interface MigrationResult {
chunksReset: boolean;
embeddingsReset: boolean;
syncCursorsReset: boolean;
}

export function runMigrations(db: TraulDB): MigrationResult {
const result: MigrationResult = {
chunksReset: false,
embeddingsReset: false,
syncCursorsReset: false,
};

const storedChunkerVersion = db.getMeta("chunker_version");
const storedEmbedModel = db.getMeta("embed_model");
const storedEmbedDims = db.getMeta("embed_dims");

const currentDims = String(EMBED_DIMS);

// Chunker version change → reset chunks + embeddings + markdown cursors
if (storedChunkerVersion !== null && storedChunkerVersion !== CHUNKER_VERSION) {
log.info(`Chunker updated (v${storedChunkerVersion} → v${CHUNKER_VERSION}), rechunking on next sync...`);
db.resetChunks();
db.resetEmbeddings(EMBED_DIMS);
db.resetSyncCursors("markdown");
result.chunksReset = true;
result.embeddingsReset = true;
result.syncCursorsReset = true;
}

// Embed model or dims change → reset embeddings only
if (
!result.embeddingsReset &&
storedEmbedModel !== null &&
(storedEmbedModel !== EMBED_MODEL || storedEmbedDims !== currentDims)
) {
const reason =
storedEmbedModel !== EMBED_MODEL
? `model changed (${storedEmbedModel} → ${EMBED_MODEL})`
: `dimensions changed (${storedEmbedDims} → ${currentDims})`;
log.info(`Embedding ${reason}, re-embed with 'traul embed'...`);
db.resetEmbeddings(EMBED_DIMS);
result.embeddingsReset = true;
}

// Update stored values
db.setMeta("chunker_version", CHUNKER_VERSION);
db.setMeta("embed_model", EMBED_MODEL);
db.setMeta("embed_dims", currentDims);

return result;
}
5 changes: 5 additions & 0 deletions src/db/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ const SCHEMA_SQL = `
INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES ('delete', old.id, old.content);
INSERT INTO chunks_fts(rowid, content) VALUES (new.id, new.content);
END;

CREATE TABLE IF NOT EXISTS traul_meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
`;

export function initializeDatabase(path: string): Database {
Expand Down
23 changes: 17 additions & 6 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@ import { runWhatsAppAuth } from "./commands/whatsapp-auth";
import { runDaemonStart, runDaemonStop, runDaemonStatus } from "./commands/daemon";
import { runSql, runSchema } from "./commands/sql";
import { runGet } from "./commands/get";
import { runReset } from "./commands/reset";
import { runMigrations } from "./db/migrations";

const config = loadConfig();
ensureDbDir(config.database.path);
const db = new TraulDB(config.database.path);
runMigrations(db);

const program = new Command();

program
.name("traul")
.description("Traul — Personal Intelligence Engine")
.version("0.1.0")
.version("0.2.0")
.option("-v, --verbose", "enable verbose output")
.hook("preAction", () => {
if (program.opts().verbose) {
Expand Down Expand Up @@ -137,14 +140,22 @@ program
db.close();
});

program
.command("reset")
.description("Reset a data layer (sync, chunks, embed, all)")
.argument("<layer>", "layer to reset: sync, chunks, embed, all")
.option("-s, --source <source>", "filter by source (for sync layer)")
.action(async (layer: string, options) => {
runReset(db, layer, options);
db.close();
});

program
.command("reset-embed")
.description("Drop all embeddings and recreate vec tables (run 'embed' after to regenerate)")
.description("(deprecated: use 'traul reset embed') Drop all embeddings")
.action(async () => {
const { EMBED_DIMS } = await import("./lib/embeddings");
console.log(`Resetting vec tables to ${EMBED_DIMS} dimensions...`);
db.resetEmbeddings(EMBED_DIMS);
console.log("Done. Run 'traul embed' to regenerate embeddings.");
console.log("Note: 'reset-embed' is deprecated, use 'traul reset embed' instead.");
runReset(db, "embed", {});
db.close();
});

Expand Down
1 change: 1 addition & 0 deletions src/lib/chunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export interface Chunk {
const DEFAULT_CHUNK_SIZE = 1500;
const DEFAULT_OVERLAP = 200;
export const CHUNK_THRESHOLD = 2000;
export const CHUNKER_VERSION = "1";

export function shouldChunk(text: string, threshold: number = CHUNK_THRESHOLD): boolean {
return text.length > threshold;
Expand Down
75 changes: 75 additions & 0 deletions test/commands/reset.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { describe, it, expect, beforeEach } from "bun:test";
import { TraulDB } from "../../src/db/database";
import { runReset } from "../../src/commands/reset";

describe("runReset", () => {
let db: TraulDB;

beforeEach(() => {
db = new TraulDB(":memory:");
// Seed data
db.upsertMessage({
source: "slack",
source_id: "C1:1",
channel_name: "eng",
author_name: "bob",
content: "hello",
sent_at: 1700000000,
});
db.upsertMessage({
source: "markdown",
source_id: "md:abc",
channel_name: "notes",
author_name: "doc",
content: "x".repeat(3000),
sent_at: 1700000001,
});
const msg = db.db
.query<{ id: number }, [string]>("SELECT id FROM messages WHERE source_id = ?")
.get("md:abc");
db.replaceChunks(msg!.id, [
{ index: 0, content: "chunk 0", embeddingInput: "chunk 0" },
]);
db.setSyncCursor("slack", "channel:C1", "ts1");
db.setSyncCursor("markdown", "file:a.md", "hash1");
});

it("reset sync clears all cursors", () => {
runReset(db, "sync", {});
expect(db.getSyncCursor("slack", "channel:C1")).toBeNull();
expect(db.getSyncCursor("markdown", "file:a.md")).toBeNull();
});

it("reset sync with --source filters by source", () => {
runReset(db, "sync", { source: "markdown" });
expect(db.getSyncCursor("markdown", "file:a.md")).toBeNull();
expect(db.getSyncCursor("slack", "channel:C1")).toBe("ts1");
});

it("reset chunks deletes chunks and resets embeddings", () => {
runReset(db, "chunks", {});
expect(db.getChunkEmbeddingStats().total_chunks).toBe(0);
expect(db.getEmbeddingStats().embedded_messages).toBe(0);
});

it("reset embed drops vec tables", () => {
runReset(db, "embed", {});
expect(db.getEmbeddingStats().embedded_messages).toBe(0);
});

it("reset all clears everything", () => {
runReset(db, "all", {});
expect(db.getSyncCursor("slack", "channel:C1")).toBeNull();
expect(db.getChunkEmbeddingStats().total_chunks).toBe(0);
expect(db.getEmbeddingStats().embedded_messages).toBe(0);
});

it("preserves messages on all reset layers", () => {
runReset(db, "all", {});
expect(db.getStats().total_messages).toBe(2);
});

it("throws on invalid layer", () => {
expect(() => runReset(db, "invalid", {})).toThrow("Unknown layer");
});
});
87 changes: 87 additions & 0 deletions test/db/database.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,93 @@ describe("TraulDB", () => {
});
});

describe("resetSyncCursors", () => {
it("clears all cursors for a source", () => {
db.setSyncCursor("markdown", "file:a.md", "hash1");
db.setSyncCursor("markdown", "file:b.md", "hash2");
db.setSyncCursor("slack", "channel:C1", "ts1");

db.resetSyncCursors("markdown");

expect(db.getSyncCursor("markdown", "file:a.md")).toBeNull();
expect(db.getSyncCursor("markdown", "file:b.md")).toBeNull();
expect(db.getSyncCursor("slack", "channel:C1")).toBe("ts1");
});

it("clears all cursors when no source given", () => {
db.setSyncCursor("markdown", "file:a.md", "hash1");
db.setSyncCursor("slack", "channel:C1", "ts1");

db.resetSyncCursors();

expect(db.getSyncCursor("markdown", "file:a.md")).toBeNull();
expect(db.getSyncCursor("slack", "channel:C1")).toBeNull();
});
});

describe("resetChunks", () => {
it("deletes all chunks and their embeddings", () => {
db.upsertMessage({
source: "markdown",
source_id: "md:abc",
channel_name: "notes",
author_name: "doc",
content: "x".repeat(3000),
sent_at: 1700000000,
});

const msg = db.db
.query<{ id: number }, [string]>("SELECT id FROM messages WHERE source_id = ?")
.get("md:abc");

db.replaceChunks(msg!.id, [
{ index: 0, content: "chunk 0", embeddingInput: "chunk 0" },
{ index: 1, content: "chunk 1", embeddingInput: "chunk 1" },
]);

const chunksBefore = db.getChunkEmbeddingStats();
expect(chunksBefore.total_chunks).toBe(2);

db.resetChunks();

const chunksAfter = db.getChunkEmbeddingStats();
expect(chunksAfter.total_chunks).toBe(0);
});

it("does not delete messages", () => {
db.upsertMessage({
source: "markdown",
source_id: "md:abc",
channel_name: "notes",
author_name: "doc",
content: "some content",
sent_at: 1700000000,
});

db.resetChunks();

const stats = db.getStats();
expect(stats.total_messages).toBe(1);
});
});

describe("meta", () => {
it("returns null for missing key", () => {
expect(db.getMeta("nonexistent")).toBeNull();
});

it("stores and retrieves a value", () => {
db.setMeta("chunker_version", "1");
expect(db.getMeta("chunker_version")).toBe("1");
});

it("overwrites existing value", () => {
db.setMeta("chunker_version", "1");
db.setMeta("chunker_version", "2");
expect(db.getMeta("chunker_version")).toBe("2");
});
});

describe("stats", () => {
it("returns correct counts", () => {
db.upsertMessage({
Expand Down
Loading
Loading