diff --git a/.gitignore b/.gitignore index 9308a4b..f26d29b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Node.js +node_modules/ + ## Core latex/pdflatex auxiliary files: *.aux *.lof diff --git a/apps/indexer/.gitignore b/apps/indexer/.gitignore new file mode 100644 index 0000000..de4d1f0 --- /dev/null +++ b/apps/indexer/.gitignore @@ -0,0 +1,2 @@ +dist +node_modules diff --git a/apps/indexer/package-lock.json b/apps/indexer/package-lock.json new file mode 100644 index 0000000..57e0595 --- /dev/null +++ b/apps/indexer/package-lock.json @@ -0,0 +1,48 @@ +{ + "name": "indexer", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "indexer", + "version": "1.0.0", + "license": "ISC", + "devDependencies": { + "@types/node": "^25.3.5", + "typescript": "^5.9.3" + } + }, + "node_modules/@types/node": { + "version": "25.3.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.5.tgz", + "integrity": "sha512-oX8xrhvpiyRCQkG1MFchB09f+cXftgIXb3a7UUa4Y3wpmZPw5tyZGTLWhlESOLq1Rq6oDlc8npVU2/9xiCuXMA==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~7.18.0" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "7.18.2", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", + "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", + "dev": true, + "license": "MIT" + } + } +} diff --git a/apps/indexer/package.json b/apps/indexer/package.json new file mode 100644 index 0000000..1020d10 --- /dev/null +++ b/apps/indexer/package.json @@ -0,0 +1,18 @@ +{ + "name": "indexer", + "version": "1.0.0", + "description": "", + "main": "dist/index.js", + "scripts": { + "build": "tsc", + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "type": "commonjs", + "devDependencies": { + "@types/node": "^25.3.5", + "typescript": "^5.9.3" + } +} diff --git a/apps/indexer/src/IndexQueue.ts b/apps/indexer/src/IndexQueue.ts new file mode 100644 index 0000000..8de3026 --- /dev/null +++ b/apps/indexer/src/IndexQueue.ts @@ -0,0 +1,31 @@ +import { IndexJob } from "./types" + +/** + * Simple sequential job queue for indexing tasks. + * Ensures indexing operations run in order. + */ +export class IndexQueue { + private queue: IndexJob[] = [] + private processing: Promise | null = null + + enqueue(job: IndexJob) { + this.queue.push(job) + } + + process(handler: (job: IndexJob) => Promise): Promise { + if (this.processing) return this.processing + + this.processing = (async () => { + while (this.queue.length > 0) { + const job = this.queue.shift() + if (!job) continue + + await handler(job) + } + })().finally(() => { + this.processing = null + }) + + return this.processing + } +} diff --git a/apps/indexer/src/IndexingEngine.ts b/apps/indexer/src/IndexingEngine.ts new file mode 100644 index 0000000..65ef271 --- /dev/null +++ b/apps/indexer/src/IndexingEngine.ts @@ -0,0 +1,106 @@ +import { VaultAdapter } from "./adapters/VaultAdapter" +import { EmbeddingAdapter } from "./adapters/EmbeddingAdapter" +import { IndexStore } from "./adapters/IndexStore" +import { NoteChunker } from "./NoteChunker" +import { IndexQueue } from "./IndexQueue" +import { IndexResult, IndexJob } from "./types" + +/** + * Coordinates the incremental indexing pipeline. + */ +export class IndexingEngine { + private vault: VaultAdapter + private embedder: EmbeddingAdapter + private store: IndexStore + private chunker: NoteChunker + private queue: IndexQueue + + constructor( + vault: VaultAdapter, + embedder: EmbeddingAdapter, + store: IndexStore + ) { + this.vault = vault + this.embedder = embedder + this.store = store + this.chunker = new NoteChunker() + this.queue = new IndexQueue() + } + + /** + * Schedule indexing for a note. + */ + scheduleUpdate(notePath: string): Promise { + const job: IndexJob = { + type: "update", + notePath, + } + + this.queue.enqueue(job) + + return this.queue.process(this.processJob.bind(this)) + } + + /** + * Schedule deletion of a note from the index. + */ + scheduleDelete(notePath: string): Promise { + const job: IndexJob = { + type: "delete", + notePath, + } + + this.queue.enqueue(job) + + return this.queue.process(this.processJob.bind(this)) + } + + /** + * Process jobs coming from the queue. + */ + private async processJob(job: IndexJob) { + if (job.type === "update") { + await this.indexNote(job.notePath) + } + + if (job.type === "delete") { + await this.removeNote(job.notePath) + } + } + + /** + * Full indexing pipeline for a note. + */ + private async indexNote(notePath: string): Promise { + const markdown = await this.vault.readNote(notePath) + + const chunks = this.chunker.split(notePath, markdown) + + const chunkTexts = chunks.map((c) => c.text) + + const embeddings = await this.embedder.embed(chunkTexts) + + if (embeddings.length !== chunks.length) { + throw new Error( + `Embedding adapter returned ${embeddings.length} embeddings for ${chunks.length} chunks` + ) + } + + const result: IndexResult = { + notePath, + chunks, + embeddings, + } + + await this.store.saveChunks(notePath, chunks, embeddings) + + return result + } + + /** + * Remove a note from the index. + */ + private async removeNote(notePath: string) { + await this.store.deleteNote(notePath) + } +} diff --git a/apps/indexer/src/NoteChunker.ts b/apps/indexer/src/NoteChunker.ts new file mode 100644 index 0000000..5da928f --- /dev/null +++ b/apps/indexer/src/NoteChunker.ts @@ -0,0 +1,31 @@ +import type { NoteChunk } from "./types" +import crypto from "crypto" + +/** + * Splits markdown notes into chunks. + * Current implementation is simple paragraph-based splitting. + */ +export class NoteChunker { + split(notePath: string, markdown: string): NoteChunk[] { + const paragraphs = markdown + .split(/\n\s*\n/) + .map((p) => p.trim()) + .filter((p) => p.length > 0) + + const chunks: NoteChunk[] = paragraphs.map((text, index) => { + const id = crypto + .createHash("sha1") + .update(`${notePath}\0${index}\0${text}`) + .digest("hex") + + return { + id, + notePath, + text, + position: index, + } + }) + + return chunks + } +} diff --git a/apps/indexer/src/adapters/EmbeddingAdapter.ts b/apps/indexer/src/adapters/EmbeddingAdapter.ts new file mode 100644 index 0000000..8b304db --- /dev/null +++ b/apps/indexer/src/adapters/EmbeddingAdapter.ts @@ -0,0 +1,10 @@ +/** + * Adapter interface for embedding generation. + * Allows plugging different embedding models. + */ +export interface EmbeddingAdapter { + /** + * Generate embeddings for chunks. + */ + embed(chunks: string[]): Promise +} diff --git a/apps/indexer/src/adapters/IndexStore.ts b/apps/indexer/src/adapters/IndexStore.ts new file mode 100644 index 0000000..4c08063 --- /dev/null +++ b/apps/indexer/src/adapters/IndexStore.ts @@ -0,0 +1,24 @@ +import { NoteChunk } from "../types" + +/** + * Storage abstraction for indexed notes. + * Allows plugging SQLite / vector DB / other stores. + */ +export interface IndexStore { + /** + * Atomically replace all indexed chunks and embeddings for the given notePath. + * + * Implementations must remove any previously stored chunks that no longer + * exist after a note edit. + */ + saveChunks( + notePath: string, + chunks: NoteChunk[], + embeddings: number[][] + ): Promise + + /** + * Remove all chunks belonging to a note. + */ + deleteNote(notePath: string): Promise +} diff --git a/apps/indexer/src/adapters/VaultAdapter.ts b/apps/indexer/src/adapters/VaultAdapter.ts new file mode 100644 index 0000000..5664a24 --- /dev/null +++ b/apps/indexer/src/adapters/VaultAdapter.ts @@ -0,0 +1,16 @@ +/** + * Adapter interface for reading notes from the vault. + * This keeps the indexing engine independent of the + * underlying filesystem implementation. + */ +export interface VaultAdapter { + /** + * Read the contents of a note. + */ + readNote(notePath: string): Promise + + /** + * List all notes in the vault. + */ + listNotes(): Promise +} diff --git a/apps/indexer/src/demo/DemoRunner.ts b/apps/indexer/src/demo/DemoRunner.ts new file mode 100644 index 0000000..73d13e7 --- /dev/null +++ b/apps/indexer/src/demo/DemoRunner.ts @@ -0,0 +1,59 @@ +import { IndexingEngine } from "../IndexingEngine" +import { VaultAdapter } from "../adapters/VaultAdapter" +import { EmbeddingAdapter } from "../adapters/EmbeddingAdapter" +import { IndexStore } from "../adapters/IndexStore" +import { NoteChunk } from "../types" + +/** + * Simple in-memory demo implementations + */ + +class DemoVault implements VaultAdapter { + async readNote(notePath: string): Promise { + return ` +# Example Note + +This is the first paragraph. + +This is another paragraph about Smart Notes. +` + } + + async listNotes(): Promise { + return ["demo.md"] + } +} + +class DemoEmbedder implements EmbeddingAdapter { + async embed(chunks: string[]): Promise { + return chunks.map(() => [Math.random(), Math.random(), Math.random()]) + } +} + +class DemoStore implements IndexStore { + async saveChunks( + notePath: string, + chunks: NoteChunk[], + embeddings: number[][] + ): Promise { + console.log("Indexed note:", notePath) + console.log("Chunks:", chunks.length) + console.log("Embeddings:", embeddings.length) + } + + async deleteNote(notePath: string): Promise { + console.log("Deleted note:", notePath) + } +} + +async function runDemo() { + const engine = new IndexingEngine( + new DemoVault(), + new DemoEmbedder(), + new DemoStore() + ) + + engine.scheduleUpdate("demo.md") +} + +runDemo() diff --git a/apps/indexer/src/index.ts b/apps/indexer/src/index.ts new file mode 100644 index 0000000..fca5706 --- /dev/null +++ b/apps/indexer/src/index.ts @@ -0,0 +1,8 @@ +export * from "./types" +export * from "./IndexingEngine" +export * from "./IndexQueue" +export * from "./NoteChunker" + +export * from "./adapters/VaultAdapter" +export * from "./adapters/EmbeddingAdapter" +export * from "./adapters/IndexStore" diff --git a/apps/indexer/src/types.ts b/apps/indexer/src/types.ts new file mode 100644 index 0000000..fc4d293 --- /dev/null +++ b/apps/indexer/src/types.ts @@ -0,0 +1,26 @@ +/** + * Represents a semantic chunk extracted from a note. + */ +export type NoteChunk = { + id: string + notePath: string + text: string + position: number +} + +/** + * Job sent to the indexing queue. + */ +export type IndexJob = { + type: "update" | "delete" + notePath: string +} + +/** + * Result produced by the indexing pipeline. + */ +export type IndexResult = { + notePath: string + chunks: NoteChunk[] + embeddings: number[][] +} diff --git a/apps/indexer/tsconfig.json b/apps/indexer/tsconfig.json new file mode 100644 index 0000000..39c5bd9 --- /dev/null +++ b/apps/indexer/tsconfig.json @@ -0,0 +1,12 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "CommonJS", + "rootDir": "src", + "outDir": "dist", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true + }, + "include": ["src"] +}