# Directory Structure ``` ├── .env.example ├── .eslintrc.js ├── .gitignore ├── .prettierrc ├── LICENSE ├── nodemon.json ├── package-lock.json ├── package.json ├── README.md ├── requirements.txt ├── scripts │ ├── setup.js │ └── start.js ├── src │ ├── index.ts │ ├── lib │ │ ├── chunking-util.ts │ │ ├── context-manager.ts │ │ ├── embedding-service.ts │ │ ├── errors.ts │ │ ├── http-server.ts │ │ ├── logger.ts │ │ ├── namespace-manager.ts │ │ ├── search-module.ts │ │ ├── sqlite-store.ts │ │ ├── tool-definitions.ts │ │ └── tool-implementations.ts │ └── python │ └── embedding_service.py └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- ``` # Path to SQLite database file DB_PATH=./data/context.db PORT=3000 # Use HTTP SSE or Stdio USE_HTTP_SSE=true # Logging Configuration: debug, info, warn, error LOG_LEVEL=info ``` -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- ``` { "singleQuote": true, "trailingComma": "es5", "printWidth": 100, "tabWidth": 2, "bracketSpacing": true, "jsxBracketSameLine": false, "semi": true, "useTabs": false, "proseWrap": "always" } ``` -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- ```javascript module.exports = { "env": { "browser": true, "es2021": true }, "plugins": ["simple-import-sort", "prettier", "n", "promise"], "extends": "standard-with-typescript", "overrides": [ { "env": { "node": true }, "files": [ ".eslintrc.{js,cjs}" ], "parserOptions": { "sourceType": "script" } } ], "parserOptions": { "ecmaVersion": "latest", "sourceType": "module" }, "rules": { '@typescript-eslint/no-explicit-any': 'off', '@typescript-eslint/explicit-module-boundary-types': 'off', '@typescript-eslint/no-unused-vars': 'error', 'simple-import-sort/imports': 'warn', 'simple-import-sort/exports': 'warn', 'no-async-promise-executor': 'off', 'prefer-arrow-callback': 'error', 'no-prototype-builtins': 'off', 'prefer-const': 'error', 'no-var': 'error', 'prefer-template': 'error', 'no-useless-escape': 'off', "indent": "off", "no-use-before-define": "off", '@typescript-eslint/indent': 'off', '@typescript-eslint/restrict-template-expressions': 'off', '@typescript-eslint/prefer-nullish-coalescing': 'off', '@typescript-eslint/semi': 'off', '@typescript-eslint/explicit-function-return-type': 'off', '@typescript-eslint/space-before-function-paren': 'off', '@typescript-eslint/no-floating-promises': 'off', '@typescript-eslint/strict-boolean-expressions': 'off', '@typescript-eslint/comma-dangle': 'off', "@typescript-eslint/member-delimiter-style": "off" } } ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` # Logs logs *.log npm-debug.log* yarn-debug.log* yarn-error.log* lerna-debug.log* .pnpm-debug.log* # Diagnostic reports (https://nodejs.org/api/report.html) report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json # Runtime data pids *.pid *.seed *.pid.lock # Directory for instrumented libs generated by jscoverage/JSCover lib-cov # Coverage directory used by tools like istanbul coverage *.lcov # nyc test coverage .nyc_output # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) .grunt # Bower dependency directory (https://bower.io/) bower_components # node-waf configuration .lock-wscript # Compiled binary addons (https://nodejs.org/api/addons.html) build/Release # Dependency directories node_modules/ jspm_packages/ # Snowpack dependency directory (https://snowpack.dev/) web_modules/ # TypeScript cache *.tsbuildinfo # Optional npm cache directory .npm # Optional eslint cache .eslintcache # Optional stylelint cache .stylelintcache # Microbundle cache .rpt2_cache/ .rts2_cache_cjs/ .rts2_cache_es/ .rts2_cache_umd/ # Optional REPL history .node_repl_history # Output of 'npm pack' *.tgz # Yarn Integrity file .yarn-integrity # dotenv environment variable files .env .env.development.local .env.test.local .env.production.local .env.local # parcel-bundler cache (https://parceljs.org/) .cache .parcel-cache # Next.js build output .next out # Nuxt.js build / generate output .nuxt dist # Gatsby files .cache/ # Comment in the public line in if your project uses Gatsby and not Next.js # https://nextjs.org/blog/next-9-1#public-directory-support # public # vuepress build output .vuepress/dist # vuepress v2.x temp and cache directory .temp .cache # Docusaurus cache and generated files .docusaurus # Serverless directories .serverless/ # FuseBox cache .fusebox/ # DynamoDB Local files .dynamodb/ # TernJS port file .tern-port # Stores VSCode versions used for testing VSCode extensions .vscode-test # yarn v2 .yarn/cache .yarn/unplugged .yarn/build-state.yml .yarn/install-state.gz .pnp.* # misc .idea venv /data/ src/python/download_model.py ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown # Simple Memory Extension MCP Server An MCP server to extend the context window / memory of agents. Useful when coding big features or vibe coding and need to store/recall progress, key moments or changes or anything worth remembering. Simply ask the agent to store memories and recall whenever you need or ask the agent to fully manage its memory (through cursor rules for example) however it sees fit. ## Usage ### Starting the Server ```bash npm install npm start ``` ### Available Tools #### Context Item Management - `store_context_item` - Store a value with key in namespace - `retrieve_context_item_by_key` - Get value by key - `delete_context_item` - Delete key-value pair #### Namespace Management - `create_namespace` - Create new namespace - `delete_namespace` - Delete namespace and all contents - `list_namespaces` - List all namespaces - `list_context_item_keys` - List keys in a namespace #### Semantic Search - `retrieve_context_items_by_semantic_search` - Find items by meaning ### Semantic Search Implementation 1. Query converted to vector using E5 model 2. Text automatically split into chunks for better matching 3. Cosine similarity calculated between query and stored chunks 4. Results filtered by threshold and sorted by similarity 5. Top matches returned with full item values ## Development ```bash # Dev server npm run dev # Format code npm run format ``` ## .env ``` # Path to SQLite database file DB_PATH=./data/context.db PORT=3000 # Use HTTP SSE or Stdio USE_HTTP_SSE=true # Logging Configuration: debug, info, warn, error LOG_LEVEL=info ``` ## Semantic Search This project includes semantic search capabilities using the E5 embedding model from Hugging Face. This allows you to find context items based on their meaning rather than just exact key matches. ### Setup The semantic search feature requires Python dependencies, but these *should be* automatically installed when you run: `npm run start` ### Embedding Model We use the [intfloat/multilingual-e5-large-instruct](https://huggingface.co/intfloat/multilingual-e5-large-instruct) ### Notes Developed mostly while vibe coding, so don't expect much :D. But it works, and I found it helpful so w/e. Feel free to contribute or suggest improvements. ``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- ``` torch>=2.0.0 transformers>=4.30.0 sentencepiece>=0.1.99 ``` -------------------------------------------------------------------------------- /nodemon.json: -------------------------------------------------------------------------------- ```json { "watch": ["src"], "ext": "ts,json", "ignore": ["src/**/*.spec.ts"], "exec": "node --loader ts-node/esm src/index.ts" } ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json { "name": "simple-memory-extension-mcp-server", "version": "1.0.0", "description": "An MCP server providing a persistent key-value memory store with semantic search for your agents", "main": "dist/index.js", "start": "node dist/index.js", "bin": { "enhanced-kv-mcp-server": "./dist/index.js" }, "type": "module", "scripts": { "build": "tsc", "start": "node scripts/start.js", "setup": "node scripts/setup.js", "dev": "nodemon", "format": "prettier --write \"src/**/*.ts\"" }, "keywords": [ "mcp", "key-value", "sqlite", "persistence", "namespace", "agents", "llm", "model-context-protocol", "semantic-search" ], "author": "gmacev", "dependencies": { "@modelcontextprotocol/sdk": "^1.4.1", "dotenv": "^16.4.5", "sqlite3": "^5.1.7", "zod": "^3.22.4" }, "devDependencies": { "@types/node": "^22.13.9", "@types/sqlite3": "^3.1.11", "@typescript-eslint/eslint-plugin": "^6.21.0", "@typescript-eslint/parser": "^6.21.0", "eslint": "^8.57.1", "eslint-config-prettier": "^9.1.0", "eslint-config-standard-with-typescript": "^39.1.1", "eslint-plugin-import": "^2.31.0", "eslint-plugin-n": "^16.6.2", "eslint-plugin-prettier": "^5.2.3", "eslint-plugin-promise": "^6.6.0", "eslint-plugin-simple-import-sort": "^10.0.0", "nodemon": "^3.0.1", "prettier": "^3.5.3", "ts-node": "^10.9.2", "typescript": "^5.8.2" }, "engines": { "node": ">=18.0.0" }, "license": "MIT" } ``` -------------------------------------------------------------------------------- /src/lib/errors.ts: -------------------------------------------------------------------------------- ```typescript /** * Custom error classes for better error classification and handling */ /** * Base error class for all MCP server errors */ export class McpServerError extends Error { constructor(message: string) { super(message); this.name = 'McpServerError'; // Ensure proper prototype chain for instanceof checks Object.setPrototypeOf(this, McpServerError.prototype); } } /** * Database-related errors */ export class DatabaseError extends McpServerError { constructor( message: string, public readonly cause?: Error ) { super(message); this.name = 'DatabaseError'; Object.setPrototypeOf(this, DatabaseError.prototype); } } /** * Validation-related errors */ export class ValidationError extends McpServerError { constructor(message: string) { super(message); this.name = 'ValidationError'; Object.setPrototypeOf(this, ValidationError.prototype); } } /** * Error for invalid namespace names */ export class InvalidNamespaceError extends ValidationError { constructor(message: string) { super(message); this.name = 'InvalidNamespaceError'; Object.setPrototypeOf(this, InvalidNamespaceError.prototype); } } /** * Error for invalid key names */ export class InvalidKeyError extends ValidationError { constructor(message: string) { super(message); this.name = 'InvalidKeyError'; Object.setPrototypeOf(this, InvalidKeyError.prototype); } } /** * Helper function to convert unknown errors to typed errors * @param error The original error * @param defaultMessage Default message if error is not an Error object * @returns A properly typed error */ export function normalizeError(error: unknown, defaultMessage = 'Unknown error'): Error { if (error instanceof Error) { return error; } if (typeof error === 'string') { return new Error(error); } return new Error(defaultMessage); } ``` -------------------------------------------------------------------------------- /src/lib/namespace-manager.ts: -------------------------------------------------------------------------------- ```typescript import { SQLiteDataStore } from './sqlite-store.js'; import { InvalidNamespaceError } from './errors.js'; import { logger } from './logger.js'; /** * Creates a new namespace * @param db Database connection * @param namespace Name of the namespace to create * @returns Promise that resolves to true if a new namespace was created, false if it already existed */ export async function createNamespace(db: SQLiteDataStore, namespace: string): Promise<boolean> { if (!namespace) { logger.error('Cannot create namespace: namespace is required'); throw new InvalidNamespaceError('Namespace is required'); } logger.debug('Creating namespace', { namespace }); const created = await db.createNamespace(namespace); if (created) { logger.info('Created new namespace', { namespace }); } else { logger.debug('Namespace already exists', { namespace }); } return created; } /** * Deletes a namespace and all items within it * @param db Database connection * @param namespace Name of the namespace to delete * @returns Promise that resolves to true if the namespace was deleted, false otherwise */ export async function deleteNamespace(db: SQLiteDataStore, namespace: string): Promise<boolean> { if (!namespace) { logger.error('Cannot delete namespace: namespace is required'); throw new InvalidNamespaceError('Namespace is required'); } logger.debug('Deleting namespace', { namespace }); const deleted = await db.deleteNamespace(namespace); if (deleted) { logger.info('Deleted namespace', { namespace }); } else { logger.debug('Namespace does not exist, nothing to delete', { namespace }); } return deleted; } /** * Lists all namespaces * @param db Database connection * @returns Array of namespace names */ export async function listNamespaces(db: SQLiteDataStore): Promise<string[]> { logger.debug('Listing namespaces'); return await db.listAllNamespaces(); } ``` -------------------------------------------------------------------------------- /src/lib/context-manager.ts: -------------------------------------------------------------------------------- ```typescript import { SQLiteDataStore } from './sqlite-store.js'; import { InvalidNamespaceError, InvalidKeyError } from './errors.js'; import { logger } from './logger.js'; /** * Stores a context item * @param db Database connection * @param namespace Namespace for the context item * @param key Key for the context item * @param value Value to store * @returns Promise that resolves when the operation is complete */ export async function storeContextItem( db: SQLiteDataStore, namespace: string, key: string, value: any ): Promise<void> { if (!namespace || namespace.trim() === '') { logger.warn('Attempted to store context item with empty namespace', { key }); throw new InvalidNamespaceError('Namespace cannot be empty'); } if (!key || key.trim() === '') { logger.warn('Attempted to store context item with empty key', { namespace }); throw new InvalidKeyError('Key cannot be empty'); } logger.debug('Storing context item', { namespace, key }); // Store the item in the database await db.storeDataItem(namespace, key, value); } /** * Retrieves a context item by its key * @param db Database connection * @param namespace Namespace for the context item * @param key Key for the context item * @returns The context item with timestamps or null if not found */ export async function retrieveContextItemByKey( db: SQLiteDataStore, namespace: string, key: string ): Promise<{ value: any; created_at: string; updated_at: string } | null> { if (!namespace || namespace.trim() === '') { logger.warn('Attempted to retrieve context item with empty namespace', { key }); throw new InvalidNamespaceError('Namespace cannot be empty'); } if (!key || key.trim() === '') { logger.warn('Attempted to retrieve context item with empty key', { namespace }); throw new InvalidKeyError('Key cannot be empty'); } logger.debug('Retrieving context item by key', { namespace, key }); const item = await db.retrieveDataItem(namespace, key); if (!item) { logger.debug('Context item not found', { namespace, key }); return null; } logger.debug('Context item retrieved successfully', { namespace, key }); return { value: item.value, created_at: item.created_at, updated_at: item.updated_at, }; } /** * Delete a context item * @param db Database connection * @param namespace Namespace to delete from * @param key Key to delete * @returns True if item was deleted, false if it did not exist */ export async function deleteContextItem( db: SQLiteDataStore, namespace: string, key: string ): Promise<boolean> { if (!namespace || namespace.trim() === '') { logger.warn('Attempted to delete context item with empty namespace', { key }); throw new InvalidNamespaceError('Namespace cannot be empty'); } if (!key || key.trim() === '') { logger.warn('Attempted to delete context item with empty key', { namespace }); throw new InvalidKeyError('Key cannot be empty'); } logger.debug('Deleting context item', { namespace, key }); const deleted = await db.deleteDataItem(namespace, key); logger.debug('Context item deletion result', { namespace, key, deleted }); return deleted; } /** * List context item keys in a namespace * @param db Database instance * @param namespace Namespace to list items from * @returns Array of key objects for all items in the namespace */ export async function listContextItemKeys( db: SQLiteDataStore, namespace: string ): Promise< Array<{ key: string; created_at: string; updated_at: string; }> > { if (!namespace || namespace.trim() === '') { logger.warn('Attempted to list context item keys with empty namespace'); throw new InvalidNamespaceError('Namespace cannot be empty'); } logger.debug('Listing context item keys', { namespace }); const keys = await db.listContextItemKeys(namespace); logger.debug('Listed context item keys', { namespace, count: keys.length }); return keys; } ``` -------------------------------------------------------------------------------- /src/lib/logger.ts: -------------------------------------------------------------------------------- ```typescript /** * Structured logging utility for the MCP server * Provides consistent logging format across the application */ // Log levels export enum LogLevel { DEBUG = 'debug', INFO = 'info', WARN = 'warn', ERROR = 'error', } // Log entry structure export interface LogEntry { timestamp: string; level: LogLevel; message: string; context?: Record<string, any>; error?: Error | unknown; } /** * Logger class for structured logging */ export class Logger { private static instance: Logger; private logLevel: LogLevel = LogLevel.INFO; private constructor() {} /** * Get the singleton logger instance */ public static getInstance(): Logger { if (!Logger.instance) { Logger.instance = new Logger(); } return Logger.instance; } /** * Set the minimum log level * @param level Minimum log level to display */ public setLogLevel(level: LogLevel): void { this.logLevel = level; } /** * Log a debug message * @param message Log message * @param context Optional context object */ public debug(message: string, context?: Record<string, any>): void { this.log(LogLevel.DEBUG, message, context); } /** * Log an info message * @param message Log message * @param context Optional context object */ public info(message: string, context?: Record<string, any>): void { this.log(LogLevel.INFO, message, context); } /** * Log a warning message * @param message Log message * @param context Optional context object */ public warn(message: string, context?: Record<string, any>): void { this.log(LogLevel.WARN, message, context); } /** * Log an error message * @param message Log message * @param error Error object * @param context Optional context object */ public error(message: string, error?: Error | unknown, context?: Record<string, any>): void { this.log(LogLevel.ERROR, message, context, error); } /** * Internal logging method * @param level Log level * @param message Log message * @param context Optional context object * @param error Optional error object */ private log( level: LogLevel, message: string, context?: Record<string, any>, error?: Error | unknown ): void { // Skip logging if level is below configured level if (!this.shouldLog(level)) { return; } const entry: LogEntry = { timestamp: new Date().toISOString(), level, message, context, error, }; // Format and output the log entry this.output(entry); } /** * Check if a log level should be displayed * @param level Log level to check * @returns True if the log should be displayed */ private shouldLog(level: LogLevel): boolean { const levels = [LogLevel.DEBUG, LogLevel.INFO, LogLevel.WARN, LogLevel.ERROR]; const configuredIndex = levels.indexOf(this.logLevel); const messageIndex = levels.indexOf(level); return messageIndex >= configuredIndex; } /** * Output a log entry * @param entry Log entry to output */ private output(entry: LogEntry): void { // Format the log entry let output = `[${entry.timestamp}] [${entry.level.toUpperCase()}] ${entry.message}`; // Add context if available if (entry.context && Object.keys(entry.context).length > 0) { output += ` | Context: ${JSON.stringify(entry.context)}`; } // Add error details if available if (entry.error) { if (entry.error instanceof Error) { output += ` | Error: ${entry.error.message}`; if (entry.error.stack) { output += `\n${entry.error.stack}`; } } else { output += ` | Error: ${String(entry.error)}`; } } // Output to console based on log level switch (entry.level) { case LogLevel.DEBUG: case LogLevel.INFO: console.log(output); break; case LogLevel.WARN: console.warn(output); break; case LogLevel.ERROR: console.error(output); break; } } } // Export a singleton instance export const logger = Logger.getInstance(); ``` -------------------------------------------------------------------------------- /scripts/start.js: -------------------------------------------------------------------------------- ```javascript #!/usr/bin/env node /** * This script orchestrates the complete startup process: * 1. Checks if Python setup is needed (first run) * 2. Builds the TypeScript code * 3. Sets up Python environment and pre-downloads model if needed * 4. Starts the server */ import { spawn } from 'child_process'; import path from 'path'; import fs from 'fs'; import os from 'os'; import { fileURLToPath } from 'url'; // Get the directory name in ES modules const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Important paths const VENV_DIR = path.join(__dirname, '..', 'venv'); const PYTHON_SCRIPT_DIR = path.join(__dirname, '..', 'src', 'python'); const EMBEDDING_SCRIPT = path.join(PYTHON_SCRIPT_DIR, 'embedding_service.py'); const SERVER_PATH = path.join(__dirname, '..', 'dist', 'index.js'); // Colors for console output const colors = { reset: '\x1b[0m', green: '\x1b[32m', yellow: '\x1b[33m', blue: '\x1b[34m', red: '\x1b[31m', }; /** * Logs a message with color */ function log(message, color = colors.reset) { console.log(`${color}${message}${colors.reset}`); } /** * Runs a command and returns a promise */ function runCommand(command, args, options = {}) { return new Promise((resolve, reject) => { log(`Running: ${command} ${args.join(' ')}`, colors.blue); const proc = spawn(command, args, { stdio: 'inherit', shell: true, ...options }); proc.on('close', (code) => { if (code === 0) { resolve(); } else { reject(new Error(`Command failed with exit code ${code}`)); } }); proc.on('error', (err) => { reject(err); }); }); } /** * Checks if setup is needed */ function isSetupNeeded() { // Check if Python virtual environment exists const venvPython = os.platform() === 'win32' ? path.join(VENV_DIR, 'Scripts', 'python.exe') : path.join(VENV_DIR, 'bin', 'python'); const venvExists = fs.existsSync(venvPython); // Also make sure the embedding script exists const scriptExists = fs.existsSync(EMBEDDING_SCRIPT); // If either doesn't exist, setup is needed const setupNeeded = !venvExists || !scriptExists; if (setupNeeded) { log('First-time setup is needed', colors.yellow); } else { log('Setup already completed, skipping setup phase', colors.green); } return setupNeeded; } /** * Builds the TypeScript code */ async function buildTypeScript() { log('Building TypeScript code...', colors.blue); try { // On Windows use npm directly with shell: true to handle the npm batch file await runCommand('npm', ['run', 'build']); log('TypeScript build completed successfully.', colors.green); } catch (error) { log(`TypeScript build failed: ${error.message}`, colors.red); throw error; } } /** * Runs the Python setup if needed */ async function setupIfNeeded() { // Check if setup is needed if (!isSetupNeeded()) { return; } log('Setting up Python environment...', colors.blue); try { const setupScript = path.join(__dirname, 'setup.js'); // Make setup script executable on Unix systems if (process.platform !== 'win32') { try { fs.chmodSync(setupScript, '755'); } catch (error) { // Ignore errors here, the script will still run with Node } } // Use node with shell: true to handle scripts on Windows await runCommand('node', [setupScript]); log('Python setup completed successfully.', colors.green); } catch (error) { log(`Python setup failed: ${error.message}`, colors.red); throw error; } } /** * Starts the server */ async function startServer() { log('Starting server...', colors.blue); try { // Use node with shell: true to run the built server await runCommand('node', [SERVER_PATH]); } catch (error) { log(`Server failed to start: ${error.message}`, colors.red); throw error; } } /** * Main function */ async function main() { log('Starting launch sequence...', colors.yellow); try { // Step 1: Build TypeScript await buildTypeScript(); // Step 2: Setup Python if needed await setupIfNeeded(); // Step 3: Start server await startServer(); } catch (error) { log(`Launch sequence failed: ${error.message}`, colors.red); process.exit(1); } } // Run the main function main(); ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript #!/usr/bin/env node import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import * as dotenv from 'dotenv'; import { SQLiteDataStore } from './lib/sqlite-store.js'; import { registerTools } from './lib/tool-implementations.js'; import { logger, LogLevel } from './lib/logger.js'; import { setupHttpServer, closeAllTransports } from './lib/http-server.js'; import http from 'http'; import path from 'path'; // Load environment variables from .env file dotenv.config(); // Configure logging level from environment variable const configuredLogLevel = process.env.LOG_LEVEL || 'info'; if (configuredLogLevel) { switch (configuredLogLevel.toLowerCase()) { case 'debug': logger.setLogLevel(LogLevel.DEBUG); break; case 'info': logger.setLogLevel(LogLevel.INFO); break; case 'warn': logger.setLogLevel(LogLevel.WARN); break; case 'error': logger.setLogLevel(LogLevel.ERROR); break; default: logger.warn(`Unknown log level: ${configuredLogLevel}, using default`); } } /** * Main entry point for the server */ async function main() { let db: SQLiteDataStore; let server: McpServer; try { logger.info('Initializing simple-memory-extension-mcp-server...'); // Get database path from environment variable or use default path const dbPath = process.env.DB_PATH || path.join(process.cwd(), 'data', 'context.db'); logger.info(`Using database at: ${dbPath}`); // Initialize the SQLite database try { db = new SQLiteDataStore({ dbPath: dbPath, enableChunking: true, }); logger.info('Database initialized successfully'); } catch (dbError) { logger.error('Failed to initialize database', dbError); process.exit(1); } // Create an MCP server server = new McpServer({ name: 'simple-memory-extension-mcp-server', version: '1.0.0', }); // Register all tools with the server using schemas from tool-definitions.ts registerTools(server, db); logger.info('All tools registered with the server'); // Check if we should use HTTP server const useHttpSSE = process.env.USE_HTTP_SSE === 'true'; const port = parseInt(process.env.PORT || '3000', 10); if (useHttpSSE) { // Set up HTTP server SSE transport const httpServer = setupHttpServer(server, port); // Setup graceful shutdown for HTTP server setupGracefulShutdown(db, httpServer); } else { // Use standard stdio transport const transport = new StdioServerTransport(); await server.connect(transport); logger.info('Server ready to receive requests via stdio'); // Setup graceful shutdown setupGracefulShutdown(db); } } catch (error) { logger.error('Error initializing server', error); process.exit(1); } } /** * Set up graceful shutdown to properly close database connections */ function setupGracefulShutdown(db: SQLiteDataStore, httpServer?: http.Server) { const shutdown = async () => { logger.info('Shutting down server...'); // Set a timeout to force exit if graceful shutdown takes too long const forceExitTimeout = setTimeout(() => { logger.error('Forced shutdown after timeout'); process.exit(1); }, 5000); // Force exit after 5 seconds try { // Close all active SSE transports await closeAllTransports(); if (httpServer) { await new Promise<void>((resolve) => { httpServer.close(() => { logger.info('HTTP server closed'); resolve(); }); }); } if (db) { // Get underlying DB and close it const sqliteDb = await db.getDb(); if (sqliteDb) { await new Promise<void>((resolve) => { sqliteDb.close(() => { logger.info('Database connection closed'); resolve(); }); }); } } logger.info('Server shutdown complete'); clearTimeout(forceExitTimeout); process.exit(0); } catch (error) { logger.error('Error during shutdown', error); clearTimeout(forceExitTimeout); process.exit(1); } }; // Register shutdown handlers process.on('SIGINT', shutdown); process.on('SIGTERM', shutdown); process.on('unhandledRejection', (reason, promise) => { logger.error('Unhandled Promise rejection', { reason, promise }); }); } // Start the server main().catch((error) => { logger.error('Unhandled error in main function', error); process.exit(1); }); ``` -------------------------------------------------------------------------------- /src/python/embedding_service.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Embedding service using the E5 model from Hugging Face. This script provides a bridge between Node.js and the E5 model. """ import sys import json import torch import torch.nn.functional as F from transformers import AutoTokenizer, AutoModel # Initialize model MODEL_NAME = "intfloat/multilingual-e5-large-instruct" tokenizer = None model = None def initialize_model(): """Initialize the model and tokenizer.""" global tokenizer, model if tokenizer is None or model is None: print("Initializing E5 model...", file=sys.stderr) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModel.from_pretrained(MODEL_NAME) print(f"Model initialized: {MODEL_NAME}", file=sys.stderr) def average_pool(last_hidden_states, attention_mask): """Average pooling function for the model output.""" last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0) return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None] def format_text_for_embedding(text, is_query=False): """ Format text for embedding based on whether it's a query or passage. For queries, we add an instruction prefix. For passages, we use the text as is. """ if is_query: # For queries, add instruction task_description = "Given a web search query, retrieve relevant passages that answer the query" return f'Instruct: {task_description}\nQuery: {text}' else: # For passages, use as is return text def generate_embedding(text, is_query=False): """Generate embedding for a single text.""" initialize_model() # Format text based on type input_text = format_text_for_embedding(text, is_query) # Tokenize encoded_input = tokenizer( input_text, max_length=512, padding=True, truncation=True, return_tensors='pt' ) # Generate embedding with torch.no_grad(): model_output = model(**encoded_input) # Pool and normalize embedding = average_pool(model_output.last_hidden_state, encoded_input['attention_mask']) embedding = F.normalize(embedding, p=2, dim=1) # Convert to list return embedding[0].tolist() def generate_embeddings(texts, is_query=False): """Generate embeddings for multiple texts.""" initialize_model() if not texts or not isinstance(texts, list): raise ValueError("Texts must be a non-empty list of strings") # Format each text based on type input_texts = [format_text_for_embedding(text, is_query) for text in texts] # Tokenize encoded_input = tokenizer( input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt' ) # Generate embeddings with torch.no_grad(): model_output = model(**encoded_input) # Pool and normalize embeddings = average_pool(model_output.last_hidden_state, encoded_input['attention_mask']) embeddings = F.normalize(embeddings, p=2, dim=1) # Convert to list return embeddings.tolist() def process_command(command_json): """Process a command from Node.js.""" try: command = command_json.get("command") if command == "initialize": initialize_model() return {"status": "initialized"} elif command == "generate_embedding": text = command_json.get("text") is_query = command_json.get("is_query", False) if not text: return {"error": "No text provided"} embedding = generate_embedding(text, is_query) return {"embedding": embedding} elif command == "generate_embeddings": texts = command_json.get("texts") is_query = command_json.get("is_query", False) if not texts or not isinstance(texts, list): return {"error": "No texts provided or invalid format"} embeddings = generate_embeddings(texts, is_query) return {"embeddings": embeddings} else: return {"error": f"Unknown command: {command}"} except Exception as e: return {"error": str(e)} def main(): """Main function to process commands from stdin.""" print("E5 Embedding Service started", file=sys.stderr) initialize_model() for line in sys.stdin: try: command_json = json.loads(line) result = process_command(command_json) print(json.dumps(result), flush=True) except json.JSONDecodeError: print(json.dumps({"error": "Invalid JSON"}), flush=True) except Exception as e: print(json.dumps({"error": str(e)}), flush=True) if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /src/lib/tool-definitions.ts: -------------------------------------------------------------------------------- ```typescript import { z } from 'zod'; // Define Zod schemas for tool parameters const namespaceSchema = z .string() .min(1, 'Namespace must not be empty') .describe( 'A unique identifier for a collection of related key-value pairs. Use logical naming patterns like "user_preferences", "conversation_context", or "task_data" to organize information.' ); const keySchema = z .string() .min(1, 'Key must not be empty') .describe( 'A unique identifier for a value within a namespace. Use descriptive keys that indicate the data\'s purpose, like "last_search_query", "user_location", or "current_task_id".' ); const nSchema = z .number() .int('Number of turns must be an integer') .positive('Number of turns must be greater than 0') .describe( 'A positive integer specifying the quantity of items to retrieve. Use smaller values for recent context, larger values for more comprehensive history.' ); // Define tool schemas export const toolSchemas = { // Context Item Management retrieve_context_item_by_key: { name: 'retrieve_context_item_by_key', description: 'Retrieves a stored value by its key from a specific namespace. Although values are stored as strings, they are automatically parsed and returned with their original data types. For example, JSON objects/arrays will be returned as structured data, numbers as numeric values, booleans as true/false, and null as null.', schema: z.object({ namespace: namespaceSchema, key: keySchema, }), }, store_context_item: { name: 'store_context_item', description: 'Stores a NEW value associated with a key in a specified namespace. IMPORTANT: All values must be passed as strings, but the system will intelligently parse them to preserve their data types. This tool will fail if the key already exists - use update_context_item for modifying existing items.', schema: z.object({ namespace: namespaceSchema, key: keySchema, value: z .string() .describe( 'The value to store as a string. Examples: "{\"name\":\"John\"}" for objects, "[1,2,3]" for arrays, "42.5" for numbers, "true"/"false" for booleans, "null" for null. The system will automatically detect and preserve the appropriate data type when retrieving.' ), }), }, update_context_item: { name: 'update_context_item', description: 'Updates an EXISTING value associated with a key in a specified namespace. This tool will fail if the key does not exist - use store_context_item for creating new items. All values must be passed as strings but will be intelligently parsed to preserve data types.', schema: z.object({ namespace: namespaceSchema, key: keySchema, value: z .string() .describe( 'The new value to store as a string. Examples: "{\"name\":\"John\"}" for objects, "[1,2,3]" for arrays, "42.5" for numbers, "true"/"false" for booleans, "null" for null. The system will automatically detect and preserve the appropriate data type when retrieving.' ), }), }, delete_context_item: { name: 'delete_context_item', description: 'Deletes a key-value pair from a namespace. Use this to remove data that is no longer needed or to clean up temporary storage.', schema: z.object({ namespace: namespaceSchema, key: keySchema, }), }, // Namespace Management create_namespace: { name: 'create_namespace', description: 'Creates a new namespace for storing key-value pairs. Use this before storing items in a new namespace. If the namespace already exists, this is a no-op and returns success.', schema: z.object({ namespace: namespaceSchema, }), }, delete_namespace: { name: 'delete_namespace', description: 'Deletes an entire namespace and all key-value pairs within it. Use this for cleanup when all data in a namespace is no longer needed.', schema: z.object({ namespace: namespaceSchema, }), }, list_namespaces: { name: 'list_namespaces', description: 'Lists all available namespaces. Use this to discover what namespaces exist before retrieving or storing data.', schema: z.object({}), }, // Semantic search retrieve_context_items_by_semantic_search: { name: 'retrieve_context_items_by_semantic_search', description: 'Retrieves context items using semantic search based on query relevance', schema: z.object({ namespace: namespaceSchema, query: z.string().describe('The semantic query to search for'), similarity_threshold: z .number() .optional() .describe('Minimum similarity score (0-1) to include in results'), limit: z.number().optional().describe('Maximum number of results to return'), }), }, // Context Item Keys Listing list_context_item_keys: { name: 'list_context_item_keys', description: 'Lists keys and timestamps for all items in a namespace without retrieving their values. Use this to discover what data is available before retrieving specific items.', schema: z.object({ namespace: namespaceSchema, }), }, }; ``` -------------------------------------------------------------------------------- /src/lib/search-module.ts: -------------------------------------------------------------------------------- ```typescript import { SQLiteDataStore } from './sqlite-store.js'; import { ContextItem } from './sqlite-store.js'; import { logger } from './logger.js'; import { EmbeddingService } from './embedding-service.js'; // Singleton embedding service instance let embeddingService: EmbeddingService | null = null; /** * Gets or creates the embedding service */ async function getEmbeddingService(): Promise<EmbeddingService> { if (!embeddingService) { embeddingService = new EmbeddingService(); await embeddingService.initialize(); } return embeddingService; } /** * Retrieves context items using semantic search * @param db Database connection * @param namespace Namespace to search in * @param query The semantic query text * @param options Additional search options * @returns Array of matching context items with similarity scores */ export async function retrieveBySemantic( db: SQLiteDataStore, namespace: string, query: string, options: { limit?: number; threshold?: number; tags?: string[]; } = {} ): Promise<Array<ItemWithEmbedding & { similarity: number }>> { if (!namespace || namespace.trim() === '') { throw new Error('Namespace cannot be empty'); } if (!query || query.trim() === '') { throw new Error('Query cannot be empty'); } logger.debug('Performing semantic search', { namespace, query, options }); try { // Generate embedding for the query const queryEmbedding = await generateEmbedding(query); // Get all items from the namespace with optional tag filtering const items = await getNamespaceItems(db, namespace); // For items with embeddings, calculate similarity and sort const results = await calculateSimilarities(items, queryEmbedding); // Filter by threshold and limit results return results .filter((item) => item.similarity >= (options.threshold || 0.7)) .sort((a, b) => b.similarity - a.similarity) .slice(0, options.limit || 10); } catch (error: any) { logger.error('Error in semantic search', error, { namespace, query }); throw new Error(`Failed to perform semantic search: ${error.message}`); } } /** * Generates embedding for text using the E5 model */ async function generateEmbedding(text: string): Promise<number[]> { try { logger.debug('Generating embedding for text', { textLength: text.length }); const service = await getEmbeddingService(); return service.generateEmbedding(text); } catch (error: any) { logger.error('Error generating embedding', error); // Fallback to random vector if embedding generation fails logger.debug('Using fallback random embedding'); return Array.from({ length: 1024 }, () => Math.random() - 0.5); } } /** * Retrieve items from namespace with optional tag filtering */ async function getNamespaceItems(db: SQLiteDataStore, namespace: string): Promise<ContextItem[]> { logger.debug(`Getting all items in namespace: ${namespace}`); return await db.retrieveAllItemsInNamespace(namespace); } // Define a type for items with parsed embeddings interface ItemWithEmbedding extends Omit<ContextItem, 'embedding'> { embedding: number[]; } /** * Calculate similarity between query embedding and items */ async function calculateSimilarities( items: ContextItem[], queryEmbedding: number[] ): Promise<Array<ItemWithEmbedding & { similarity: number }>> { logger.debug('Calculating similarities', { itemCount: items.length }); // Generate embeddings for all items const itemsWithEmbeddings = await generateEmbeddingsForItems(items); // Calculate cosine similarity for each item return itemsWithEmbeddings.map((item) => ({ ...item, similarity: calculateCosineSimilarity(queryEmbedding, item.embedding), })); } /** * Generates embeddings for a list of items */ async function generateEmbeddingsForItems(items: ContextItem[]): Promise<ItemWithEmbedding[]> { logger.debug(`Generating embeddings for ${items.length} items`); const service = await getEmbeddingService(); const results: ItemWithEmbedding[] = []; for (const item of items) { try { const text = extractTextFromItem(item); const embedding = await service.generateEmbedding(text); results.push({ ...item, embedding, }); } catch (error: any) { logger.error('Failed to generate embedding for item', { namespace: item.namespace, key: item.key, error: error.message, }); // Skip items that fail embedding generation continue; } } return results; } /** * Extract text representation from an item */ function extractTextFromItem(item: ContextItem): string { // Convert item value to text for embedding if (typeof item.value === 'string') { return item.value; } if (typeof item.value === 'object') { // For objects, concatenate string values return Object.entries(item.value) .filter(([_, v]) => typeof v === 'string') .map(([k, v]) => `${k}: ${v}`) .join('\n'); } return `${item.key}: ${JSON.stringify(item.value)}`; } /** * Calculate cosine similarity between two embeddings */ function calculateCosineSimilarity(vec1: number[], vec2: number[]): number { if (vec1.length !== vec2.length) { throw new Error('Vectors must have the same dimensions'); } const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0); const norm1 = Math.sqrt(vec1.reduce((sum, val) => sum + val * val, 0)); const norm2 = Math.sqrt(vec2.reduce((sum, val) => sum + val * val, 0)); if (norm1 === 0 || norm2 === 0) { return 0; // Avoid division by zero } return dotProduct / (norm1 * norm2); } ``` -------------------------------------------------------------------------------- /src/lib/chunking-util.ts: -------------------------------------------------------------------------------- ```typescript import { logger } from './logger.js'; /** * Configuration for text chunking */ export interface ChunkingConfig { // Maximum tokens per chunk (default matches E5 model constraints) maxTokens: number; // Tokens to overlap between chunks for context preservation overlapTokens: number; // Rough estimation of characters per token (varies by language) charsPerToken: number; } // Default configuration optimized for the E5 model export const DEFAULT_CHUNKING_CONFIG: ChunkingConfig = { maxTokens: 400, // Target 400 tokens to stay safely below 512 limit overlapTokens: 50, // ~12.5% overlap to maintain context charsPerToken: 4, // Rough estimate - varies by language }; /** * Estimates token count from character count * This is a rough approximation - actual tokenization depends on model and language */ export function estimateTokenCount( text: string, charsPerToken: number = DEFAULT_CHUNKING_CONFIG.charsPerToken ): number { return Math.ceil(text.length / charsPerToken); } /** * Chunk text by semantic boundaries like paragraphs and sentences * Tries to respect natural text boundaries while staying within token limits */ export function chunkTextBySemanticBoundaries( text: string, config: ChunkingConfig = DEFAULT_CHUNKING_CONFIG ): string[] { logger.debug(`Chunking text of length ${text.length} characters`); // If text is already small enough, return as-is if (estimateTokenCount(text, config.charsPerToken) <= config.maxTokens) { logger.debug('Text fits in a single chunk, no chunking needed'); return [text]; } const chunks: string[] = []; // First split by double newlines (paragraphs) const paragraphs = text.split(/\n\s*\n/); logger.debug(`Split into ${paragraphs.length} paragraphs`); let currentChunk = ''; let currentTokenCount = 0; // Process paragraph by paragraph for (let i = 0; i < paragraphs.length; i++) { const para = paragraphs[i]; const paraTokens = estimateTokenCount(para, config.charsPerToken); // If this paragraph alone exceeds max tokens, split it into sentences if (paraTokens > config.maxTokens) { logger.debug(`Large paragraph found (est. ${paraTokens} tokens), splitting into sentences`); // If we have accumulated content in current chunk, save it first if (currentChunk) { chunks.push(currentChunk); currentChunk = ''; currentTokenCount = 0; } // Split large paragraph into sentences and process them const sentences = para.split(/(?<=[.!?])\s+/); let sentenceChunk = ''; let sentenceTokenCount = 0; for (const sentence of sentences) { const sentenceTokens = estimateTokenCount(sentence, config.charsPerToken); // If single sentence exceeds limit, we have to split it by character count if (sentenceTokens > config.maxTokens) { logger.debug( `Very long sentence found (est. ${sentenceTokens} tokens), splitting by character count` ); // Save any accumulated content first if (sentenceChunk) { chunks.push(sentenceChunk); sentenceChunk = ''; sentenceTokenCount = 0; } // Force split the long sentence into multiple chunks const maxChars = config.maxTokens * config.charsPerToken; for (let j = 0; j < sentence.length; j += maxChars) { const subChunk = sentence.substring(j, j + maxChars); chunks.push(subChunk); } } // If adding this sentence exceeds limit, save current and start new else if (sentenceTokenCount + sentenceTokens > config.maxTokens) { chunks.push(sentenceChunk); // Start new chunk with overlap if possible if (sentenceChunk && config.overlapTokens > 0) { // Extract last N tokens worth of text as overlap const overlapChars = config.overlapTokens * config.charsPerToken; const overlapText = sentenceChunk.substring( Math.max(0, sentenceChunk.length - overlapChars) ); sentenceChunk = overlapText + ' ' + sentence; sentenceTokenCount = estimateTokenCount(sentenceChunk, config.charsPerToken); } else { sentenceChunk = sentence; sentenceTokenCount = sentenceTokens; } } // Otherwise add to current sentence chunk else { sentenceChunk = sentenceChunk ? `${sentenceChunk} ${sentence}` : sentence; sentenceTokenCount += sentenceTokens; } } // Add the last sentence chunk if not empty if (sentenceChunk) { chunks.push(sentenceChunk); } } // If adding this paragraph would exceed the token limit else if (currentTokenCount + paraTokens > config.maxTokens) { // Save current chunk chunks.push(currentChunk); // Start new chunk with overlap if possible if (currentChunk && config.overlapTokens > 0) { // Extract last N tokens worth of text as overlap const overlapChars = config.overlapTokens * config.charsPerToken; const overlapText = currentChunk.substring(Math.max(0, currentChunk.length - overlapChars)); currentChunk = overlapText + '\n\n' + para; currentTokenCount = estimateTokenCount(currentChunk, config.charsPerToken); } else { currentChunk = para; currentTokenCount = paraTokens; } } // Otherwise add to current chunk else { if (currentChunk) { currentChunk += '\n\n' + para; } else { currentChunk = para; } currentTokenCount += paraTokens; } } // Add the last chunk if not empty if (currentChunk) { chunks.push(currentChunk); } logger.debug(`Text chunked into ${chunks.length} semantic chunks`); return chunks; } ``` -------------------------------------------------------------------------------- /scripts/setup.js: -------------------------------------------------------------------------------- ```javascript #!/usr/bin/env node /** * This script automates the setup process for the semantic search feature: * - Creates a Python virtual environment (if needed) * - Installs Python dependencies * - Pre-downloads the E5 model to avoid delays on first use */ import { spawn, execSync } from 'child_process'; import fs from 'fs'; import path from 'path'; import os from 'os'; import { fileURLToPath } from 'url'; // Get the directory name in ES modules const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const PYTHON_COMMAND = os.platform() === 'win32' ? 'python' : 'python3'; const VENV_DIR = path.join(__dirname, '..', 'venv'); const REQUIREMENTS_FILE = path.join(__dirname, '..', 'requirements.txt'); const PYTHON_SCRIPT_DIR = path.join(__dirname, '..', 'src', 'python'); const MODEL_DIR = path.join(os.homedir(), '.cache', 'huggingface', 'transformers'); // Ensure the Python script directory exists if (!fs.existsSync(PYTHON_SCRIPT_DIR)) { fs.mkdirSync(PYTHON_SCRIPT_DIR, { recursive: true }); } // Colors for console output const colors = { reset: '\x1b[0m', green: '\x1b[32m', yellow: '\x1b[33m', blue: '\x1b[34m', red: '\x1b[31m', }; /** * Logs a message with color */ function log(message, color = colors.reset) { console.log(`${color}${message}${colors.reset}`); } /** * Checks if a command exists in PATH */ function commandExists(command) { try { const devNull = os.platform() === 'win32' ? 'NUL' : '/dev/null'; execSync(`${command} --version`, { stdio: 'ignore' }); return true; } catch (e) { return false; } } /** * Checks if virtual environment exists */ function venvExists() { const venvPython = os.platform() === 'win32' ? path.join(VENV_DIR, 'Scripts', 'python.exe') : path.join(VENV_DIR, 'bin', 'python'); return fs.existsSync(venvPython); } /** * Creates a Python virtual environment */ async function createVirtualEnv() { if (venvExists()) { log('Python virtual environment already exists.', colors.green); return; } log('Creating Python virtual environment...', colors.blue); return new Promise((resolve, reject) => { const proc = spawn(PYTHON_COMMAND, ['-m', 'venv', VENV_DIR]); proc.on('close', (code) => { if (code === 0) { log('Python virtual environment created successfully.', colors.green); resolve(); } else { log(`Failed to create virtual environment (exit code ${code}).`, colors.red); reject(new Error(`Failed to create virtual environment (exit code ${code})`)); } }); proc.on('error', (err) => { log(`Error creating virtual environment: ${err.message}`, colors.red); reject(err); }); }); } /** * Installs Python dependencies */ async function installDependencies() { const pipCmd = os.platform() === 'win32' ? path.join(VENV_DIR, 'Scripts', 'pip.exe') : path.join(VENV_DIR, 'bin', 'pip'); log('Installing Python dependencies...', colors.blue); return new Promise((resolve, reject) => { const proc = spawn(pipCmd, ['install', '-r', REQUIREMENTS_FILE]); proc.stdout.on('data', (data) => { process.stdout.write(data.toString()); }); proc.stderr.on('data', (data) => { process.stderr.write(data.toString()); }); proc.on('close', (code) => { if (code === 0) { log('Python dependencies installed successfully.', colors.green); resolve(); } else { log(`Failed to install dependencies (exit code ${code}).`, colors.red); reject(new Error(`Failed to install dependencies (exit code ${code})`)); } }); proc.on('error', (err) => { log(`Error installing dependencies: ${err.message}`, colors.red); reject(err); }); }); } /** * Pre-downloads the E5 model */ async function preDownloadModel() { const pythonCmd = os.platform() === 'win32' ? path.join(VENV_DIR, 'Scripts', 'python.exe') : path.join(VENV_DIR, 'bin', 'python'); const downloadScriptPath = path.join(PYTHON_SCRIPT_DIR, 'download_model.py'); // Check if the download script already exists if (!fs.existsSync(downloadScriptPath)) { // Create a simple script to download the model const downloadScript = ` import os from transformers import AutoTokenizer, AutoModel # Set the model name MODEL_NAME = "intfloat/multilingual-e5-large-instruct" print(f"Pre-downloading model: {MODEL_NAME}") print("This might take a few minutes...") # Download the model and tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModel.from_pretrained(MODEL_NAME) print(f"Model downloaded and cached at: {os.path.expanduser('~/.cache/huggingface/transformers')}") print("Setup completed successfully!") `; fs.writeFileSync(downloadScriptPath, downloadScript); } log('Pre-downloading the E5 model (this may take a few minutes)...', colors.blue); return new Promise((resolve, reject) => { const proc = spawn(pythonCmd, [downloadScriptPath]); proc.stdout.on('data', (data) => { process.stdout.write(data.toString()); }); proc.stderr.on('data', (data) => { process.stderr.write(data.toString()); }); proc.on('close', (code) => { if (code === 0) { log('E5 model downloaded successfully.', colors.green); resolve(); } else { log(`Failed to download model (exit code ${code}).`, colors.red); // Don't reject since this is not critical - the model will be downloaded on first use resolve(); } }); proc.on('error', (err) => { log(`Error downloading model: ${err.message}`, colors.red); // Don't reject since this is not critical - the model will be downloaded on first use resolve(); }); }); } /** * Main function */ async function main() { log('Starting setup for semantic search...', colors.blue); // Check if Python is installed if (!commandExists(PYTHON_COMMAND)) { log(`${PYTHON_COMMAND} not found. Please install Python 3.8 or later.`, colors.red); process.exit(1); } try { // Create virtual environment await createVirtualEnv(); // Install dependencies await installDependencies(); // Pre-download model (optional) await preDownloadModel(); log('Setup completed successfully!', colors.green); } catch (error) { log(`Setup failed: ${error.message}`, colors.red); process.exit(1); } } // Run the main function main(); ``` -------------------------------------------------------------------------------- /src/lib/embedding-service.ts: -------------------------------------------------------------------------------- ```typescript import { spawn } from 'child_process'; import path from 'path'; import os from 'os'; import { logger } from './logger.js'; /** * Service for generating embeddings using the E5 model */ export class EmbeddingService { private pythonProcess: any; private initialized: boolean = false; private initializationPromise: Promise<void> | null = null; private readonly pythonScriptPath: string; private readonly pythonCommand: string; private readonly taskDescription: string; /** * Creates a new embedding service * @param options Configuration options */ constructor( options: { pythonScriptPath?: string; taskDescription?: string; useVenv?: boolean; } = {} ) { this.pythonScriptPath = options.pythonScriptPath || path.join(process.cwd(), 'src', 'python', 'embedding_service.py'); this.taskDescription = options.taskDescription || 'Given a document, find semantically similar documents'; // Determine which Python executable to use based on the useVenv option const useVenv = options.useVenv !== undefined ? options.useVenv : true; if (useVenv) { const venvDir = path.join(process.cwd(), 'venv'); this.pythonCommand = os.platform() === 'win32' ? path.join(venvDir, 'Scripts', 'python.exe') : path.join(venvDir, 'bin', 'python'); } else { this.pythonCommand = os.platform() === 'win32' ? 'python' : 'python3'; } } /** * Initializes the embedding service */ async initialize(): Promise<void> { if (this.initialized) { return; } if (this.initializationPromise) { return this.initializationPromise; } this.initializationPromise = new Promise<void>((resolve, reject) => { try { logger.debug('Initializing embedding service', { scriptPath: this.pythonScriptPath, pythonCommand: this.pythonCommand, }); // Spawn Python process this.pythonProcess = spawn(this.pythonCommand, [this.pythonScriptPath]); // Handle process exit this.pythonProcess.on('exit', (code: number) => { logger.error('Embedding service process exited', { code }); this.initialized = false; this.pythonProcess = null; }); // Handle process errors this.pythonProcess.on('error', (error: Error) => { logger.error('Error in embedding service process', error); reject(error); }); // Log stderr output this.pythonProcess.stderr.on('data', (data: Buffer) => { logger.debug('Embedding service stderr:', { message: data.toString().trim() }); }); // Initialize the model this.sendCommand({ command: 'initialize' }) .then(() => { this.initialized = true; logger.debug('Embedding service initialized successfully'); resolve(); }) .catch((error) => { logger.error('Failed to initialize embedding service', error); reject(error); }); } catch (error) { logger.error('Error initializing embedding service', error); reject(error); } }); return this.initializationPromise; } /** * Generates an embedding for a single text * @param text The text to generate an embedding for * @returns The embedding vector */ async generateEmbedding(text: string): Promise<number[]> { if (!this.initialized) { await this.initialize(); } const result = await this.sendCommand({ command: 'generate_embedding', text, task: this.taskDescription, }); if (result.error) { throw new Error(`Embedding generation failed: ${result.error}`); } return result.embedding; } /** * Generate embeddings for multiple texts * @param texts Array of texts to generate embeddings for * @param options Options for embedding generation * @returns Promise that resolves to an array of embedding vectors */ async generateEmbeddings( texts: string[], options?: { input_type?: 'query' | 'passage' } ): Promise<number[][]> { if (!this.initialized) { await this.initialize(); } if (!texts || texts.length === 0) { throw new Error('No texts provided for embedding generation'); } logger.debug(`Generating embeddings for ${texts.length} texts`); try { const command = { command: 'generate_embeddings', texts: texts, is_query: options?.input_type === 'query', }; const result = await this.sendCommand(command); if (result.error) { throw new Error(`Batch embedding generation failed: ${result.error}`); } if (!result.embeddings || !Array.isArray(result.embeddings)) { throw new Error('Invalid response from embedding service'); } return result.embeddings; } catch (error: any) { logger.error('Error generating embeddings', error); throw new Error(`Batch embedding generation failed: ${error.message}`); } } /** * Sends a command to the Python process * @param command The command to send * @returns The result from the Python process */ private sendCommand(command: any): Promise<any> { return new Promise((resolve, reject) => { if (!this.pythonProcess) { reject(new Error('Python process not initialized')); return; } // Buffer to accumulate response chunks let responseBuffer = ''; // Set up data handler that accumulates chunks const responseHandler = (data: Buffer) => { const chunk = data.toString(); responseBuffer += chunk; try { // Try to parse the accumulated buffer const response = JSON.parse(responseBuffer); // If successful parsing, clean up and resolve this.pythonProcess.stdout.removeListener('data', responseHandler); resolve(response); } catch (error) { // Incomplete JSON, continue accumulating // This is expected for large responses split across chunks // We'll keep collecting chunks until we get valid JSON } }; // Handle error and end events const errorHandler = (error: Error) => { this.pythonProcess.stdout.removeListener('data', responseHandler); reject(error); }; // Set up event listeners this.pythonProcess.stdout.on('data', responseHandler); this.pythonProcess.stdout.once('error', errorHandler); // Send command to Python process this.pythonProcess.stdin.write(JSON.stringify(command) + '\n'); // Set a reasonable timeout (30 seconds) setTimeout(() => { this.pythonProcess.stdout.removeListener('data', responseHandler); reject(new Error('Timeout waiting for embedding service response')); }, 30000); }); } /** * Closes the embedding service */ async close(): Promise<void> { if (this.pythonProcess) { this.pythonProcess.kill(); this.pythonProcess = null; this.initialized = false; this.initializationPromise = null; logger.debug('Embedding service closed'); } } } ``` -------------------------------------------------------------------------------- /src/lib/http-server.ts: -------------------------------------------------------------------------------- ```typescript import http from 'http'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js'; import { logger } from './logger.js'; import { URL } from 'url'; // Store active SSE transports by session ID const activeTransports = new Map<string, SSEServerTransport>(); // Store heartbeat intervals by session ID const heartbeatIntervals = new Map<string, NodeJS.Timeout>(); /** * Get transport by session ID - useful for reconnection */ export function getTransport(sessionId: string): SSEServerTransport | undefined { return activeTransports.get(sessionId); } /** * Set up an HTTP server with SSE transport for the MCP server */ export function setupHttpServer(server: McpServer, port: number): http.Server { // Create HTTP server const httpServer = http.createServer((req, res) => { res.setHeader('Access-Control-Allow-Origin', '*'); res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS'); res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization'); // Handle preflight requests if (req.method === 'OPTIONS') { res.writeHead(204); res.end(); return; } // Parse URL const reqUrl = new URL(req.url || '/', `http://localhost:${port}`); const path = reqUrl.pathname; // Health check endpoint if (path === '/health' && req.method === 'GET') { res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ status: 'ok' })); return; } // SSE endpoint if (path === '/sse' && req.method === 'GET') { logger.info('New SSE connection established'); // Prevent timeout for long-running connections req.socket.setTimeout(0); req.socket.setKeepAlive(true); req.socket.setMaxListeners(20); // Check if there's a session ID for reconnection const urlParams = new URLSearchParams(reqUrl.search); const existingSessionId = urlParams.get('sessionId'); // Create SSE transport with a fixed endpoint path const sseTransport = new SSEServerTransport('message', res); // If reconnecting with an existing session ID, store it if (existingSessionId) { logger.debug(`Attempting to reconnect with existing session ID: ${existingSessionId}`); } // Store the transport in our map const sessionId = sseTransport.sessionId; activeTransports.set(sessionId, sseTransport); logger.debug(`Created new SSE transport with session ID: ${sessionId}`); // Set up heartbeat to keep connection alive // This is critical for preventing connection timeouts and ensuring we don't lose message correlation // The heartbeat sends an empty comment every 30 seconds to keep the connection open const heartbeatInterval = setInterval(() => { if (!res.writableEnded) { try { // Send a comment as heartbeat res.write(`:heartbeat\n\n`); logger.debug(`Sent heartbeat to session ${sessionId}`); } catch (err) { logger.error(`Failed to send heartbeat to session ${sessionId}`, err); clearInterval(heartbeatInterval); activeTransports.delete(sessionId); } } else { // Connection already ended clearInterval(heartbeatInterval); activeTransports.delete(sessionId); } }, 30000); // Store the heartbeat interval heartbeatIntervals.set(sessionId, heartbeatInterval); // Connect server to transport server.connect(sseTransport).catch((error) => { logger.error('Failed to connect to SSE transport', error); clearInterval(heartbeatIntervals.get(sessionId)!); heartbeatIntervals.delete(sessionId); activeTransports.delete(sessionId); }); // Handle connection close req.on('close', () => { logger.info(`SSE connection closed for session: ${sessionId}`); // Clean up resources const interval = heartbeatIntervals.get(sessionId); if (interval) { clearInterval(interval); heartbeatIntervals.delete(sessionId); } activeTransports.delete(sessionId); sseTransport.close().catch((error) => { logger.error('Error closing SSE transport', error); }); }); return; } // Message endpoint if (path === '/message' && req.method === 'POST') { const urlParams = new URLSearchParams(reqUrl.search); const sessionId = urlParams.get('sessionId'); if (!sessionId) { res.writeHead(400, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Missing sessionId parameter' })); return; } const transport = activeTransports.get(sessionId); if (!transport) { res.writeHead(404, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Session not found or expired', reconnect: true })); return; } // Pass the request to the SSE transport's handlePostMessage method try { transport.handlePostMessage(req, res); } catch (error) { logger.error(`Error handling message for session ${sessionId}`, error); if (!res.headersSent) { res.writeHead(500, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Internal server error', details: error instanceof Error ? error.message : String(error) })); } } return; } // Not found res.writeHead(404, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Not found' })); }); // Start HTTP server httpServer.listen(port, () => { logger.info(`HTTP server listening on port ${port}`); logger.info('Server ready to receive requests via HTTP'); logger.info(`SSE endpoint: http://localhost:${port}/sse`); logger.info(`Message endpoint: http://localhost:${port}/message?sessionId=<SESSION_ID>`); }); return httpServer; } /** * Close all active SSE transports */ export async function closeAllTransports(): Promise<void> { logger.info(`Closing ${activeTransports.size} active SSE transports`); // Clear all heartbeat intervals for (const [sessionId, interval] of heartbeatIntervals.entries()) { clearInterval(interval); heartbeatIntervals.delete(sessionId); } // Use Promise.allSettled to ensure we attempt to close all transports // even if some fail const closePromises = Array.from(activeTransports.entries()).map( async ([sessionId, transport]) => { try { logger.debug(`Closing SSE transport for session: ${sessionId}`); await Promise.race([ transport.close(), // Add a timeout to prevent hanging new Promise((_, reject) => setTimeout(() => reject(new Error('Transport close timeout')), 1000) ), ]); return { sessionId, success: true }; } catch (error) { logger.error(`Error closing SSE transport for session ${sessionId}`, error); return { sessionId, success: false }; } } ); await Promise.allSettled(closePromises); // Clear the map regardless of success/failure activeTransports.clear(); logger.info('All SSE transports closed or timed out'); } ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { /* Visit https://aka.ms/tsconfig to read more about this file */ /* Projects */ // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */ // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */ // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */ // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ /* Language and Environment */ "target": "ES2020", // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ // "jsx": "preserve", /* Specify what JSX code is generated. */ // "libReplacement": true, /* Enable lib replacement. */ // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */ // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ /* Modules */ "module": "NodeNext", "moduleResolution": "NodeNext", // "rootDir": "./", /* Specify the root folder within your source files. */ // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */ // "types": [], /* Specify type package names to be included without being referenced in a source file. */ // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */ // "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */ // "rewriteRelativeImportExtensions": true, /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */ // "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */ // "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */ // "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */ // "noUncheckedSideEffectImports": true, /* Check side effect imports. */ "resolveJsonModule": true, // "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */ // "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */ /* JavaScript Support */ // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */ // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */ /* Emit */ "declaration": true, // "declarationMap": true, /* Create sourcemaps for d.ts files. */ // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ "sourceMap": true, // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ // "noEmit": true, /* Disable emitting files from a compilation. */ // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */ "outDir": "./dist", // "removeComments": true, /* Disable emitting comments. */ // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ // "newLine": "crlf", /* Set the newline character for emitting files. */ // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */ // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */ // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */ // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ /* Interop Constraints */ // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ // "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */ // "isolatedDeclarations": true, /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */ // "erasableSyntaxOnly": true, /* Do not allow runtime constructs that are not part of ECMAScript. */ // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ "esModuleInterop": true, // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ "forceConsistentCasingInFileNames": true, /* Type Checking */ "strict": true, // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */ // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */ // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */ // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ // "strictBuiltinIteratorReturn": true, /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */ // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */ // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */ // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */ // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */ // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */ // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */ // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ /* Completeness */ // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ "skipLibCheck": true }, "include": ["src/**/*"], "exclude": ["node_modules", "dist"] } ``` -------------------------------------------------------------------------------- /src/lib/tool-implementations.ts: -------------------------------------------------------------------------------- ```typescript import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { SQLiteDataStore } from './sqlite-store.js'; import { createNamespace, deleteNamespace, listNamespaces } from './namespace-manager.js'; import { storeContextItem, retrieveContextItemByKey, deleteContextItem, listContextItemKeys, } from './context-manager.js'; import { toolSchemas } from './tool-definitions.js'; import { logger } from './logger.js'; import { normalizeError } from './errors.js'; import { retrieveBySemantic } from './search-module.js'; /** * Registers all tool implementations with the MCP server * @param server MCP server instance * @param db Database connection */ export function registerTools(server: McpServer, db: SQLiteDataStore): void { server.tool( toolSchemas.retrieve_context_item_by_key.name, toolSchemas.retrieve_context_item_by_key.description, toolSchemas.retrieve_context_item_by_key.schema.shape, async ({ namespace, key }) => { try { logger.debug('Executing retrieve_context_item_by_key tool', { namespace, key }); const result = await retrieveContextItemByKey(db, namespace, key); return { content: [{ type: 'text', text: JSON.stringify({ result }) }], }; } catch (error: any) { const normalizedError = normalizeError(error); logger.error('Error retrieving context item', normalizedError, { namespace, key }); return { isError: true, content: [ { type: 'text', text: `Error retrieving context item: ${normalizedError.message}` }, ], }; } } ); server.tool( toolSchemas.store_context_item.name, toolSchemas.store_context_item.description, toolSchemas.store_context_item.schema.shape, async ({ namespace, key, value }) => { try { logger.debug('Executing store_context_item tool', { namespace, key }); // Try to parse the value as JSON or number before storing let parsedValue: any = value; // First try to parse as JSON try { // Only attempt to parse if the value looks like JSON (starts with { or [) if (value.trim().startsWith('{') || value.trim().startsWith('[')) { parsedValue = JSON.parse(value); logger.debug('Parsed value as JSON object', { namespace, key }); } } catch (parseError: any) { // If JSON parsing fails, keep the original string value logger.debug('Failed to parse value as JSON, keeping as string', { namespace, key, error: parseError.message, }); } // If it's still a string, try to parse as other types if (typeof parsedValue === 'string') { const trimmedValue = parsedValue.trim(); // Check for number if (/^-?\d+(\.\d+)?$/.test(trimmedValue)) { const numValue = Number(trimmedValue); if (!isNaN(numValue)) { parsedValue = numValue; logger.debug('Parsed value as number', { namespace, key }); } } // Check for boolean else if (trimmedValue === 'true' || trimmedValue === 'false') { parsedValue = trimmedValue === 'true'; logger.debug('Parsed value as boolean', { namespace, key }); } // Check for null else if (trimmedValue === 'null') { parsedValue = null; logger.debug('Parsed value as null', { namespace, key }); } } // Check if the item already exists const existingItem = await retrieveContextItemByKey(db, namespace, key); if (existingItem) { // Item already exists, return as an error return { isError: true, content: [ { type: 'text', text: `Error storing context item: An item with the key "${key}" already exists in namespace "${namespace}". Use update_context_item to modify existing items.`, }, ], }; } // Store the parsed value await storeContextItem(db, namespace, key, parsedValue); return { content: [{ type: 'text', text: JSON.stringify({ result: true }) }], }; } catch (error: any) { const normalizedError = normalizeError(error); logger.error('Error storing context item', normalizedError, { namespace, key }); return { isError: true, content: [ { type: 'text', text: `Error storing context item: ${normalizedError.message}` }, ], }; } } ); server.tool( toolSchemas.update_context_item.name, toolSchemas.update_context_item.description, toolSchemas.update_context_item.schema.shape, async ({ namespace, key, value }) => { try { logger.debug('Executing update_context_item tool', { namespace, key }); // Check if the item exists const existingItem = await retrieveContextItemByKey(db, namespace, key); if (!existingItem) { // Item doesn't exist, return as an error return { isError: true, content: [ { type: 'text', text: `Error updating context item: No item with the key "${key}" exists in namespace "${namespace}". Use store_context_item to create new items.`, }, ], }; } // Try to parse the value as JSON or number before storing let parsedValue: any = value; // First try to parse as JSON try { // Only attempt to parse if the value looks like JSON (starts with { or [) if (value.trim().startsWith('{') || value.trim().startsWith('[')) { parsedValue = JSON.parse(value); logger.debug('Parsed value as JSON object', { namespace, key }); } } catch (parseError: any) { // If JSON parsing fails, keep the original string value logger.debug('Failed to parse value as JSON, keeping as string', { namespace, key, error: parseError.message, }); } // If it's still a string, try to parse as other types if (typeof parsedValue === 'string') { const trimmedValue = parsedValue.trim(); // Check for number if (/^-?\d+(\.\d+)?$/.test(trimmedValue)) { const numValue = Number(trimmedValue); if (!isNaN(numValue)) { parsedValue = numValue; logger.debug('Parsed value as number', { namespace, key }); } } // Check for boolean else if (trimmedValue === 'true' || trimmedValue === 'false') { parsedValue = trimmedValue === 'true'; logger.debug('Parsed value as boolean', { namespace, key }); } // Check for null else if (trimmedValue === 'null') { parsedValue = null; logger.debug('Parsed value as null', { namespace, key }); } } // Update the item await storeContextItem(db, namespace, key, parsedValue); return { content: [{ type: 'text', text: JSON.stringify({ result: true }) }], }; } catch (error: any) { const normalizedError = normalizeError(error); logger.error('Error updating context item', normalizedError, { namespace, key }); return { isError: true, content: [ { type: 'text', text: `Error updating context item: ${normalizedError.message}` }, ], }; } } ); server.tool( toolSchemas.delete_context_item.name, toolSchemas.delete_context_item.description, toolSchemas.delete_context_item.schema.shape, async ({ namespace, key }) => { try { logger.debug('Executing delete_context_item tool', { namespace, key }); const deleted = await deleteContextItem(db, namespace, key); return { content: [ { type: 'text', text: JSON.stringify({ result: deleted, // Will be true if item was deleted, false if it didn't exist }), }, ], }; } catch (error: any) { const normalizedError = normalizeError(error); logger.error('Error deleting context item', normalizedError, { namespace, key }); return { isError: true, content: [ { type: 'text', text: `Error deleting context item: ${normalizedError.message}` }, ], }; } } ); // Namespace Management server.tool( toolSchemas.create_namespace.name, toolSchemas.create_namespace.description, toolSchemas.create_namespace.schema.shape, async ({ namespace }) => { try { logger.debug('Executing create_namespace tool', { namespace }); const created = await createNamespace(db, namespace); if (!created) { // Namespace already exists, return as an error return { isError: true, content: [ { type: 'text', text: `Error creating namespace: A namespace with the name "${namespace}" already exists.`, }, ], }; } return { content: [ { type: 'text', text: JSON.stringify({ result: true, }), }, ], }; } catch (error: any) { const normalizedError = normalizeError(error); logger.error('Error creating namespace', normalizedError, { namespace }); return { isError: true, content: [{ type: 'text', text: `Error creating namespace: ${normalizedError.message}` }], }; } } ); server.tool( toolSchemas.delete_namespace.name, toolSchemas.delete_namespace.description, toolSchemas.delete_namespace.schema.shape, async ({ namespace }) => { try { logger.debug('Executing delete_namespace tool', { namespace }); const deleted = await deleteNamespace(db, namespace); return { content: [ { type: 'text', text: JSON.stringify({ result: deleted, // Will be true if namespace was deleted, false if it didn't exist }), }, ], }; } catch (error: any) { const normalizedError = normalizeError(error); logger.error('Error deleting namespace', normalizedError, { namespace }); return { isError: true, content: [{ type: 'text', text: `Error deleting namespace: ${normalizedError.message}` }], }; } } ); server.tool( toolSchemas.list_namespaces.name, toolSchemas.list_namespaces.description, toolSchemas.list_namespaces.schema.shape, async () => { try { logger.debug('Executing list_namespaces tool'); const namespaces = await listNamespaces(db); return { content: [{ type: 'text', text: JSON.stringify({ result: namespaces }) }], }; } catch (error: any) { const normalizedError = normalizeError(error); logger.error('Error listing namespaces', normalizedError); return { isError: true, content: [{ type: 'text', text: `Error listing namespaces: ${normalizedError.message}` }], }; } } ); // Keys Listing server.tool( toolSchemas.list_context_item_keys.name, toolSchemas.list_context_item_keys.description, toolSchemas.list_context_item_keys.schema.shape, async ({ namespace }) => { try { logger.debug('Executing list_context_item_keys tool', { namespace }); const keys = await listContextItemKeys(db, namespace); return { content: [{ type: 'text', text: JSON.stringify({ result: keys }) }], }; } catch (error: any) { const normalizedError = normalizeError(error); logger.error('Error listing context item keys', normalizedError, { namespace }); return { isError: true, content: [ { type: 'text', text: `Error listing context item keys: ${normalizedError.message}`, }, ], }; } } ); // Semantic search server.tool( 'retrieve_context_items_by_semantic_search', toolSchemas.retrieve_context_items_by_semantic_search.schema.shape, async ({ namespace, query, limit, similarity_threshold }) => { try { logger.debug('Executing retrieve_context_items_by_semantic_search tool', { namespace, query, limit, similarity_threshold, }); const items = await retrieveBySemantic(db, namespace, query, { limit: limit, threshold: similarity_threshold, }); logger.debug('Retrieved items by semantic search', { namespace, query, count: items.length, }); return { content: [ { type: 'text', text: JSON.stringify({ result: items.map((item) => ({ value: item.value, similarity: item.similarity, created_at: item.created_at, updated_at: item.updated_at, })), }), }, ], }; } catch (error: any) { const normalizedError = normalizeError(error); logger.error('Error retrieving items by semantic search', normalizedError, { namespace, query, }); return { isError: true, content: [ { type: 'text', text: `Error retrieving items by semantic search: ${normalizedError.message}`, }, ], }; } } ); } ``` -------------------------------------------------------------------------------- /src/lib/sqlite-store.ts: -------------------------------------------------------------------------------- ```typescript import sqlite3 from 'sqlite3'; import { promisify } from 'util'; import fs from 'fs'; import path from 'path'; import { logger } from './logger.js'; import { DatabaseError, normalizeError } from './errors.js'; import { chunkTextBySemanticBoundaries } from './chunking-util.js'; // Define types for database operations export interface ContextItem { namespace: string; key: string; value: any; created_at: string; updated_at: string; } export interface Database { run: (sql: string, ...params: any[]) => Promise<void>; get: (sql: string, ...params: any[]) => Promise<any>; all: (sql: string, ...params: any[]) => Promise<any[]>; close: () => Promise<void>; } export interface SQLiteDataStoreOptions { dbPath: string; // Add optional chunking flag enableChunking?: boolean; } // Define the DataStore interface interface DataStore { storeDataItem(namespace: string, key: string, value: any): Promise<void>; retrieveDataItem(namespace: string, key: string): Promise<any | null>; deleteDataItem(namespace: string, key: string): Promise<boolean>; dataItemExists(namespace: string, key: string): Promise<boolean>; listAllNamespaces(): Promise<string[]>; listContextItemKeys(namespace: string): Promise<any[]>; createNamespace(namespace: string): Promise<boolean>; deleteNamespace(namespace: string): Promise<boolean>; // Optional method for non-chunked embeddings retrieveDataItemsByEmbeddingSimilarity?( namespace: string, queryText: string, embeddingService: any, options: any ): Promise<any[]>; retrieveAllItemsInNamespace(namespace: string): Promise<any[]>; } export class SQLiteDataStore implements DataStore { private dbPath: string; private db: sqlite3.Database | null = null; // Add chunking option private enableChunking: boolean; constructor(options: SQLiteDataStoreOptions) { this.dbPath = options.dbPath; // Default to false for backward compatibility this.enableChunking = options.enableChunking ?? false; logger.debug( `SQLite data store initialized, chunking ${this.enableChunking ? 'enabled' : 'disabled'}` ); } /** * Get or create a database connection */ async getDb(): Promise<sqlite3.Database> { if (!this.db) { logger.debug('Creating new database connection'); this.db = new sqlite3.Database(this.dbPath); // Enable foreign key constraints const run = promisify(this.db.run.bind(this.db)) as ( sql: string, ...params: any[] ) => Promise<void>; await run('PRAGMA foreign_keys = ON'); logger.debug('Foreign key constraints enabled'); await this.initializeDatabase(); } return this.db; } /** * Check if a data item exists */ async dataItemExists(namespace: string, key: string): Promise<boolean> { const db = await this.getDb(); const get = promisify(db.get.bind(db)) as (sql: string, ...params: any[]) => Promise<any>; try { const result = await get( 'SELECT 1 FROM context_items WHERE namespace = ? AND key = ?', namespace, key ); return !!result; } catch (error) { logger.error( `Error checking if data item exists | Context: ${JSON.stringify({ namespace, key, error })}` ); throw error; } } /** * Initializes the SQLite database and creates necessary tables if they don't exist * @param dbPath Path to the SQLite database file * @returns A database connection object */ async initializeDatabase(): Promise<Database> { try { logger.info('Initializing database', { dbPath: this.dbPath }); // Ensure the directory exists const dbDir = path.dirname(this.dbPath); if (!fs.existsSync(dbDir)) { logger.debug('Creating database directory', { dbDir }); fs.mkdirSync(dbDir, { recursive: true }); } // Create a new database connection const db = new sqlite3.Database(this.dbPath); logger.debug('Database connection established'); // Promisify database methods with proper type assertions const run = promisify(db.run.bind(db)) as (sql: string, ...params: any[]) => Promise<void>; const get = promisify(db.get.bind(db)) as (sql: string, ...params: any[]) => Promise<any>; const all = promisify(db.all.bind(db)) as (sql: string, ...params: any[]) => Promise<any[]>; const close = promisify(db.close.bind(db)) as () => Promise<void>; // Create tables if they don't exist logger.debug("Creating tables if they don't exist"); await run(` CREATE TABLE IF NOT EXISTS namespaces ( namespace TEXT PRIMARY KEY, created_at TEXT NOT NULL ) `); await run(` CREATE TABLE IF NOT EXISTS context_items ( namespace TEXT NOT NULL, key TEXT NOT NULL, value TEXT NOT NULL, created_at TEXT NOT NULL, updated_at TEXT NOT NULL, PRIMARY KEY (namespace, key), FOREIGN KEY (namespace) REFERENCES namespaces(namespace) ON DELETE CASCADE ) `); // Create embeddings table for chunked items await run(` CREATE TABLE IF NOT EXISTS embeddings ( id INTEGER PRIMARY KEY AUTOINCREMENT, namespace TEXT NOT NULL, item_key TEXT NOT NULL, chunk_index INTEGER NOT NULL, chunk_text TEXT NOT NULL, embedding TEXT, created_at TEXT NOT NULL, FOREIGN KEY (namespace, item_key) REFERENCES context_items(namespace, key) ON DELETE CASCADE ) `); // Create indexes for embeddings table await run( 'CREATE INDEX IF NOT EXISTS idx_embeddings_namespace_item_key ON embeddings(namespace, item_key)' ); await run( 'CREATE INDEX IF NOT EXISTS idx_embeddings_has_embedding ON embeddings(namespace, embedding IS NOT NULL)' ); // Create indexes for performance logger.debug("Creating indexes if they don't exist"); await run( 'CREATE INDEX IF NOT EXISTS idx_context_items_namespace ON context_items(namespace)' ); logger.info('Database initialization completed successfully'); return { run, get, all, close, }; } catch (error) { const normalizedError = normalizeError(error); logger.error('Failed to initialize database', normalizedError, { dbPath: this.dbPath }); throw new DatabaseError(`Failed to initialize database: ${normalizedError.message}`); } } /** * Store a data item with optional chunking support */ async storeDataItem(namespace: string, key: string, value: any): Promise<void> { const db = await this.getDb(); const run = promisify(db.run.bind(db)) as (sql: string, ...params: any[]) => Promise<void>; const all = promisify(db.all.bind(db)) as (sql: string, ...params: any[]) => Promise<any[]>; logger.debug(`Storing data item | Context: ${JSON.stringify({ namespace, key })}`); // Check if the item already exists const exists = await this.dataItemExists(namespace, key); const timestamp = new Date().toISOString(); try { logger.debug( `Beginning transaction for storing data item | Context: ${JSON.stringify({ namespace, key })}` ); await run('BEGIN TRANSACTION'); if (exists) { logger.debug(`Updating existing item | Context: ${JSON.stringify({ namespace, key })}`); await run( `UPDATE context_items SET value = ?, updated_at = ? WHERE namespace = ? AND key = ?`, JSON.stringify(value), timestamp, namespace, key ); // If chunking is enabled, delete existing chunks before adding new ones if (this.enableChunking) { logger.debug( `Deleting existing chunks for updated item | Context: ${JSON.stringify({ namespace, key })}` ); await run('DELETE FROM embeddings WHERE namespace = ? AND item_key = ?', namespace, key); } } else { logger.debug(`Inserting new item | Context: ${JSON.stringify({ namespace, key })}`); await run( `INSERT INTO context_items (namespace, key, value, created_at, updated_at) VALUES (?, ?, ?, ?, ?)`, namespace, key, JSON.stringify(value), timestamp, timestamp ); } // Process chunking if enabled if (this.enableChunking) { // Extract the text value for chunking - stringify objects const textValue = typeof value === 'string' ? value : JSON.stringify(value); // Create chunks based on semantic boundaries const chunks = chunkTextBySemanticBoundaries(textValue); logger.debug( `Storing ${chunks.length} chunks for item | Context: ${JSON.stringify({ namespace, key })}` ); // Store each chunk with its index for (let i = 0; i < chunks.length; i++) { await run( `INSERT INTO embeddings (namespace, item_key, chunk_index, chunk_text, created_at) VALUES (?, ?, ?, ?, ?)`, namespace, key, i, chunks[i], timestamp ); } // Immediately generate embeddings for the newly created chunks try { // Import the embedding service dynamically to avoid circular dependencies const { EmbeddingService } = await import('./embedding-service.js'); const embeddingService = new EmbeddingService(); await embeddingService.initialize(); logger.debug( `Generating embeddings for chunks | Context: ${JSON.stringify({ namespace, key, count: chunks.length })}` ); // Get the chunk IDs for the newly inserted chunks const chunkIds = await all( `SELECT id, chunk_text FROM embeddings WHERE namespace = ? AND item_key = ? AND embedding IS NULL ORDER BY chunk_index`, namespace, key ); if (chunkIds.length > 0) { // Extract just the text for embedding generation const chunkTexts = chunkIds.map((chunk) => chunk.chunk_text); // Generate embeddings const embeddings = await embeddingService.generateEmbeddings(chunkTexts); // Update each chunk with its embedding for (let i = 0; i < chunkIds.length; i++) { const embedding = JSON.stringify(embeddings[i]); await run( `UPDATE embeddings SET embedding = ? WHERE id = ?`, embedding, chunkIds[i].id ); } logger.debug( `Embeddings generated and stored for ${chunkIds.length} chunks | Context: ${JSON.stringify({ namespace, key })}` ); } } catch (error) { // Log the error but don't fail the store operation logger.error( `Error generating embeddings for chunks | Context: ${JSON.stringify({ namespace, key, error })}` ); } } await run('COMMIT'); logger.debug( `Transaction committed successfully | Context: ${JSON.stringify({ namespace, key })}` ); } catch (error) { logger.error( `Error storing data item | Context: ${JSON.stringify({ namespace, key, error })}` ); await run('ROLLBACK'); throw error; } } /** * Generate embeddings for all chunks that don't have them yet */ async generateEmbeddingsForChunks(namespace: string, embeddingService: any): Promise<number> { if (!this.enableChunking) { logger.debug(`Chunking is disabled, skipping embedding generation`); return 0; } const db = await this.getDb(); const all = promisify(db.all.bind(db)) as (sql: string, ...params: any[]) => Promise<any[]>; const run = promisify(db.run.bind(db)) as (sql: string, ...params: any[]) => Promise<void>; // Find chunks without embeddings const chunksWithoutEmbeddings = await all( `SELECT id, namespace, item_key, chunk_index, chunk_text FROM embeddings WHERE namespace = ? AND embedding IS NULL ORDER BY item_key, chunk_index`, namespace ); if (chunksWithoutEmbeddings.length === 0) { logger.debug(`No chunks found without embeddings for namespace: ${namespace}`); return 0; } logger.debug( `Generating embeddings for ${chunksWithoutEmbeddings.length} chunks | Context: { namespace: ${namespace} }` ); let processedCount = 0; // Process chunks in batches to avoid memory issues const BATCH_SIZE = 10; for (let i = 0; i < chunksWithoutEmbeddings.length; i += BATCH_SIZE) { const batch = chunksWithoutEmbeddings.slice(i, i + BATCH_SIZE); const chunkTexts = batch.map((chunk) => chunk.chunk_text); try { // Generate embeddings for the batch const embeddings = await embeddingService.generateEmbeddings(chunkTexts); // Update each chunk with its embedding await run('BEGIN TRANSACTION'); for (let j = 0; j < batch.length; j++) { const chunk = batch[j]; const embedding = JSON.stringify(embeddings[j]); await run( `UPDATE embeddings SET embedding = ? WHERE id = ?`, embedding, chunk.id ); processedCount++; } await run('COMMIT'); logger.debug( `Generated embeddings for batch ${i / BATCH_SIZE + 1} | Context: { count: ${batch.length} }` ); } catch (error) { logger.error( `Error generating embeddings for batch ${i / BATCH_SIZE + 1} | Context: ${JSON.stringify(error)}` ); await run('ROLLBACK'); throw error; } } logger.debug(`Completed embedding generation | Context: { processed: ${processedCount} }`); return processedCount; } /** * Retrieve items by semantic search, using the chunked embeddings if enabled */ async retrieveDataItemsBySemanticSearch( namespace: string, queryText: string, embeddingService: any, options: { limit?: number; similarityThreshold?: number; } = {} ): Promise<any[]> { logger.debug( `Performing semantic search | Context: ${JSON.stringify({ namespace, queryText, options })}` ); const { limit = 10, similarityThreshold = 0.5 } = options; // If chunking is not enabled, fall back to the legacy method if it exists if (!this.enableChunking) { logger.debug(`Chunking disabled, using legacy semantic search method`); return this.retrieveDataItemsByEmbeddingSimilarity( namespace, queryText, embeddingService, options ); } const db = await this.getDb(); const all = promisify(db.all.bind(db)) as (sql: string, ...params: any[]) => Promise<any[]>; try { // Generate embedding for the query logger.debug( `Generating embedding for text | Context: ${JSON.stringify({ textLength: queryText.length })}` ); const queryEmbedding = await embeddingService.generateEmbeddings([queryText], { input_type: 'query', }); if (!queryEmbedding || !queryEmbedding[0] || queryEmbedding[0].length === 0) { logger.error(`Failed to generate query embedding`); return []; } // Get all chunks with embeddings const chunksWithEmbeddings = await all( `SELECT c.namespace, c.item_key, c.chunk_index, c.chunk_text, c.embedding, i.value, i.tags, i.description, i.created_at, i.updated_at FROM embeddings c JOIN context_items i ON c.namespace = i.namespace AND c.item_key = i.key WHERE c.namespace = ? AND c.embedding IS NOT NULL`, namespace ); if (chunksWithEmbeddings.length === 0) { logger.debug(`No chunks with embeddings found for namespace: ${namespace}`); return []; } logger.debug( `Calculating similarities | Context: ${JSON.stringify({ itemCount: chunksWithEmbeddings.length })}` ); // Calculate similarity scores const similarities = chunksWithEmbeddings.map((chunk) => { const chunkEmbedding = JSON.parse(chunk.embedding); const similarity = this.calculateCosineSimilarity(queryEmbedding[0], chunkEmbedding); return { ...chunk, similarity, }; }); // Filter by similarity threshold and sort by similarity const filteredResults = similarities .filter((item) => item.similarity >= similarityThreshold) .sort((a, b) => b.similarity - a.similarity); // Group by item to avoid duplicates, taking the highest similarity score const itemMap = new Map(); for (const result of filteredResults) { const itemKey = `${result.namespace}:${result.item_key}`; if (!itemMap.has(itemKey) || itemMap.get(itemKey).similarity < result.similarity) { // Parse the value field try { result.value = JSON.parse(result.value); } catch (e) { // If parsing fails, keep the original value } // Parse tags try { result.tags = JSON.parse(result.tags); } catch (e) { result.tags = []; } itemMap.set(itemKey, result); } } // Convert back to array and take top results const results = Array.from(itemMap.values()) .slice(0, limit) .map((item) => ({ namespace: item.namespace, key: item.item_key, value: item.value, tags: item.tags, description: item.description, matchedChunk: item.chunk_text, similarity: item.similarity, created_at: item.created_at, updated_at: item.updated_at, })); logger.debug( `Retrieved items by semantic search | Context: ${JSON.stringify({ namespace, queryText, count: results.length })}` ); return results; } catch (error) { logger.error( `Error retrieving items by semantic search | Context: ${JSON.stringify({ namespace, error })}` ); throw error; } } /** * Legacy method for retrieving items by embedding similarity (without chunking) */ async retrieveDataItemsByEmbeddingSimilarity( namespace: string, queryText: string, embeddingService: any, options: { limit?: number; similarityThreshold?: number; } = {} ): Promise<any[]> { logger.warn(`Legacy embedding similarity search called, but not implemented`); return []; // Empty implementation - this would be implemented in your system if needed } /** * Retrieve a data item by namespace and key */ async retrieveDataItem(namespace: string, key: string): Promise<any | null> { const db = await this.getDb(); const get = promisify(db.get.bind(db)) as (sql: string, ...params: any[]) => Promise<any>; try { logger.debug(`Retrieving data item | Context: ${JSON.stringify({ namespace, key })}`); const result = await get( 'SELECT * FROM context_items WHERE namespace = ? AND key = ?', namespace, key ); if (!result) { logger.debug(`Data item not found | Context: ${JSON.stringify({ namespace, key })}`); return null; } try { // Parse JSON value result.value = JSON.parse(result.value); } catch (e) { // If parsing fails, keep the original value logger.warn( `Failed to parse value as JSON | Context: ${JSON.stringify({ namespace, key })}` ); } logger.debug( `Data item retrieved successfully | Context: ${JSON.stringify({ namespace, key })}` ); return result; } catch (error) { logger.error( `Error retrieving data item | Context: ${JSON.stringify({ namespace, key, error })}` ); throw error; } } /** * Delete a data item */ async deleteDataItem(namespace: string, key: string): Promise<boolean> { const db = await this.getDb(); const run = promisify(db.run.bind(db)) as (sql: string, ...params: any[]) => Promise<void>; try { logger.debug(`Deleting data item | Context: ${JSON.stringify({ namespace, key })}`); await run('BEGIN TRANSACTION'); // First delete any associated chunks if chunking is enabled if (this.enableChunking) { await run('DELETE FROM embeddings WHERE namespace = ? AND item_key = ?', namespace, key); } // Then delete the main item const result = await run( 'DELETE FROM context_items WHERE namespace = ? AND key = ?', namespace, key ); await run('COMMIT'); logger.debug(`Data item deleted | Context: ${JSON.stringify({ namespace, key, result })}`); return true; // SQLite doesn't return affected rows in the same way as other DBs } catch (error) { await run('ROLLBACK'); logger.error( `Error deleting data item | Context: ${JSON.stringify({ namespace, key, error })}` ); throw error; } } /** * List all namespaces */ async listAllNamespaces(): Promise<string[]> { const db = await this.getDb(); const all = promisify(db.all.bind(db)) as (sql: string, ...params: any[]) => Promise<any[]>; try { logger.debug('Listing all namespaces'); const results = await all('SELECT namespace FROM namespaces ORDER BY namespace'); const namespaces = results.map((row) => row.namespace); logger.debug(`Retrieved ${namespaces.length} namespaces`); return namespaces; } catch (error) { logger.error(`Error listing namespaces | Context: ${JSON.stringify(error)}`); throw error; } } /** * List context item keys */ async listContextItemKeys(namespace: string): Promise<any[]> { const db = await this.getDb(); const all = promisify(db.all.bind(db)) as (sql: string, ...params: any[]) => Promise<any[]>; try { logger.debug(`Listing context item keys | Context: ${JSON.stringify({ namespace })}`); const results = await all( 'SELECT key, created_at, updated_at FROM context_items WHERE namespace = ? ORDER BY key', namespace ); logger.debug( `Retrieved keys for ${results.length} items | Context: ${JSON.stringify({ namespace })}` ); return results; } catch (error) { logger.error( `Error listing context item keys | Context: ${JSON.stringify({ namespace, error })}` ); throw error; } } /** * Create a namespace */ async createNamespace(namespace: string): Promise<boolean> { const db = await this.getDb(); const get = promisify(db.get.bind(db)) as (sql: string, ...params: any[]) => Promise<any>; const run = promisify(db.run.bind(db)) as (sql: string, ...params: any[]) => Promise<void>; try { logger.debug(`Creating namespace | Context: ${JSON.stringify({ namespace })}`); // Check if namespace already exists const existingNamespace = await get( 'SELECT 1 FROM namespaces WHERE namespace = ?', namespace ); if (existingNamespace) { logger.debug(`Namespace already exists | Context: ${JSON.stringify({ namespace })}`); return false; } // Create the namespace await run( 'INSERT INTO namespaces (namespace, created_at) VALUES (?, ?)', namespace, new Date().toISOString() ); logger.debug(`Namespace created successfully | Context: ${JSON.stringify({ namespace })}`); return true; } catch (error) { logger.error(`Error creating namespace | Context: ${JSON.stringify({ namespace, error })}`); throw error; } } /** * Delete a namespace */ async deleteNamespace(namespace: string): Promise<boolean> { const db = await this.getDb(); const get = promisify(db.get.bind(db)) as (sql: string, ...params: any[]) => Promise<any>; const run = promisify(db.run.bind(db)) as (sql: string, ...params: any[]) => Promise<void>; try { logger.debug(`Attempting to delete namespace | Context: ${JSON.stringify({ namespace })}`); // Check if namespace exists const existingNamespace = await get( 'SELECT 1 FROM namespaces WHERE namespace = ?', namespace ); if (!existingNamespace) { logger.debug( `Namespace does not exist, nothing to delete | Context: ${JSON.stringify({ namespace })}` ); return false; } // Delete using a transaction to ensure atomicity logger.debug( `Beginning transaction for namespace deletion | Context: ${JSON.stringify({ namespace })}` ); await run('BEGIN TRANSACTION'); // Delete all context items in namespace (cascades to embeddings due to foreign key) logger.debug( `Deleting all context items in namespace | Context: ${JSON.stringify({ namespace })}` ); await run('DELETE FROM context_items WHERE namespace = ?', namespace); // Delete the namespace logger.debug(`Deleting namespace | Context: ${JSON.stringify({ namespace })}`); await run('DELETE FROM namespaces WHERE namespace = ?', namespace); await run('COMMIT'); logger.debug(`Namespace deleted successfully | Context: ${JSON.stringify({ namespace })}`); return true; } catch (error) { await run('ROLLBACK'); logger.error(`Error deleting namespace | Context: ${JSON.stringify({ namespace, error })}`); throw error; } } /** * Calculate cosine similarity between two embedding vectors */ private calculateCosineSimilarity(vec1: number[], vec2: number[]): number { if (vec1.length !== vec2.length) { throw new Error(`Vector dimensions don't match: ${vec1.length} vs ${vec2.length}`); } let dotProduct = 0; let mag1 = 0; let mag2 = 0; for (let i = 0; i < vec1.length; i++) { dotProduct += vec1[i] * vec2[i]; mag1 += vec1[i] * vec1[i]; mag2 += vec2[i] * vec2[i]; } mag1 = Math.sqrt(mag1); mag2 = Math.sqrt(mag2); if (mag1 === 0 || mag2 === 0) { return 0; } return dotProduct / (mag1 * mag2); } /** * Execute a SQL query with parameters and return a single row * (For compatibility with old Database interface) */ async get(sql: string, ...params: any[]): Promise<any> { const db = await this.getDb(); const get = promisify(db.get.bind(db)) as (sql: string, ...params: any[]) => Promise<any>; try { logger.debug(`Executing SQL get query: ${sql}`); const result = await get(sql, ...params); return result; } catch (error) { logger.error(`Error executing SQL get query: ${error}`); throw error; } } /** * Execute a SQL query with parameters and return all rows * (For compatibility with old Database interface) */ async all(sql: string, ...params: any[]): Promise<any[]> { const db = await this.getDb(); const all = promisify(db.all.bind(db)) as (sql: string, ...params: any[]) => Promise<any[]>; try { logger.debug(`Executing SQL all query: ${sql}`); const results = await all(sql, ...params); return results; } catch (error) { logger.error(`Error executing SQL all query: ${error}`); throw error; } } /** * Execute a SQL query with parameters (no return value) * (For compatibility with old Database interface) */ async run(sql: string, ...params: any[]): Promise<void> { const db = await this.getDb(); const run = promisify(db.run.bind(db)) as (sql: string, ...params: any[]) => Promise<void>; try { logger.debug(`Executing SQL run query: ${sql}`); await run(sql, ...params); } catch (error) { logger.error(`Error executing SQL run query: ${error}`); throw error; } } /** * Close the database connection * (For compatibility with old Database interface) */ async close(): Promise<void> { if (this.db) { const db = this.db; await new Promise<void>((resolve, reject) => { db.close((err) => { if (err) { reject(err); } else { this.db = null; resolve(); } }); }); } } /** * Retrieve all items in a namespace */ async retrieveAllItemsInNamespace(namespace: string): Promise<any[]> { const db = await this.getDb(); const all = promisify(db.all.bind(db)) as (sql: string, ...params: any[]) => Promise<any[]>; const results = await all( 'SELECT * FROM context_items WHERE namespace = ? ORDER BY key', namespace ); return results.map((result) => { try { result.value = JSON.parse(result.value); } catch (e) { // If parsing fails, keep the original value } return result; }); } } ```