This is page 35 of 47. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .claude
│ ├── agents
│ │ ├── amp-bridge.md
│ │ ├── amp-pr-automator.md
│ │ ├── code-quality-guard.md
│ │ ├── gemini-pr-automator.md
│ │ └── github-release-manager.md
│ ├── settings.local.json.backup
│ └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── feature_request.yml
│ │ └── performance_issue.yml
│ ├── pull_request_template.md
│ └── workflows
│ ├── bridge-tests.yml
│ ├── CACHE_FIX.md
│ ├── claude-code-review.yml
│ ├── claude.yml
│ ├── cleanup-images.yml.disabled
│ ├── dev-setup-validation.yml
│ ├── docker-publish.yml
│ ├── LATEST_FIXES.md
│ ├── main-optimized.yml.disabled
│ ├── main.yml
│ ├── publish-and-test.yml
│ ├── README_OPTIMIZATION.md
│ ├── release-tag.yml.disabled
│ ├── release.yml
│ ├── roadmap-review-reminder.yml
│ ├── SECRET_CONDITIONAL_FIX.md
│ └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│ ├── .gitignore
│ └── reports
│ └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│ ├── deployment
│ │ ├── deploy_fastmcp_fixed.sh
│ │ ├── deploy_http_with_mcp.sh
│ │ └── deploy_mcp_v4.sh
│ ├── deployment-configs
│ │ ├── empty_config.yml
│ │ └── smithery.yaml
│ ├── development
│ │ └── test_fastmcp.py
│ ├── docs-removed-2025-08-23
│ │ ├── authentication.md
│ │ ├── claude_integration.md
│ │ ├── claude-code-compatibility.md
│ │ ├── claude-code-integration.md
│ │ ├── claude-code-quickstart.md
│ │ ├── claude-desktop-setup.md
│ │ ├── complete-setup-guide.md
│ │ ├── database-synchronization.md
│ │ ├── development
│ │ │ ├── autonomous-memory-consolidation.md
│ │ │ ├── CLEANUP_PLAN.md
│ │ │ ├── CLEANUP_README.md
│ │ │ ├── CLEANUP_SUMMARY.md
│ │ │ ├── dream-inspired-memory-consolidation.md
│ │ │ ├── hybrid-slm-memory-consolidation.md
│ │ │ ├── mcp-milestone.md
│ │ │ ├── multi-client-architecture.md
│ │ │ ├── test-results.md
│ │ │ └── TIMESTAMP_FIX_SUMMARY.md
│ │ ├── distributed-sync.md
│ │ ├── invocation_guide.md
│ │ ├── macos-intel.md
│ │ ├── master-guide.md
│ │ ├── mcp-client-configuration.md
│ │ ├── multi-client-server.md
│ │ ├── service-installation.md
│ │ ├── sessions
│ │ │ └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│ │ ├── UBUNTU_SETUP.md
│ │ ├── ubuntu.md
│ │ ├── windows-setup.md
│ │ └── windows.md
│ ├── docs-root-cleanup-2025-08-23
│ │ ├── AWESOME_LIST_SUBMISSION.md
│ │ ├── CLOUDFLARE_IMPLEMENTATION.md
│ │ ├── DOCUMENTATION_ANALYSIS.md
│ │ ├── DOCUMENTATION_CLEANUP_PLAN.md
│ │ ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│ │ ├── LITESTREAM_SETUP_GUIDE.md
│ │ ├── lm_studio_system_prompt.md
│ │ ├── PYTORCH_DOWNLOAD_FIX.md
│ │ └── README-ORIGINAL-BACKUP.md
│ ├── investigations
│ │ └── MACOS_HOOKS_INVESTIGATION.md
│ ├── litestream-configs-v6.3.0
│ │ ├── install_service.sh
│ │ ├── litestream_master_config_fixed.yml
│ │ ├── litestream_master_config.yml
│ │ ├── litestream_replica_config_fixed.yml
│ │ ├── litestream_replica_config.yml
│ │ ├── litestream_replica_simple.yml
│ │ ├── litestream-http.service
│ │ ├── litestream.service
│ │ └── requirements-cloudflare.txt
│ ├── release-notes
│ │ └── release-notes-v7.1.4.md
│ └── setup-development
│ ├── README.md
│ ├── setup_consolidation_mdns.sh
│ ├── STARTUP_SETUP_GUIDE.md
│ └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│ ├── memory-context.md
│ ├── memory-health.md
│ ├── memory-ingest-dir.md
│ ├── memory-ingest.md
│ ├── memory-recall.md
│ ├── memory-search.md
│ ├── memory-store.md
│ ├── README.md
│ └── session-start.md
├── claude-hooks
│ ├── config.json
│ ├── config.template.json
│ ├── CONFIGURATION.md
│ ├── core
│ │ ├── memory-retrieval.js
│ │ ├── mid-conversation.js
│ │ ├── session-end.js
│ │ ├── session-start.js
│ │ └── topic-change.js
│ ├── debug-pattern-test.js
│ ├── install_claude_hooks_windows.ps1
│ ├── install_hooks.py
│ ├── memory-mode-controller.js
│ ├── MIGRATION.md
│ ├── README-NATURAL-TRIGGERS.md
│ ├── README-phase2.md
│ ├── README.md
│ ├── simple-test.js
│ ├── statusline.sh
│ ├── test-adaptive-weights.js
│ ├── test-dual-protocol-hook.js
│ ├── test-mcp-hook.js
│ ├── test-natural-triggers.js
│ ├── test-recency-scoring.js
│ ├── tests
│ │ ├── integration-test.js
│ │ ├── phase2-integration-test.js
│ │ ├── test-code-execution.js
│ │ ├── test-cross-session.json
│ │ ├── test-session-tracking.json
│ │ └── test-threading.json
│ ├── utilities
│ │ ├── adaptive-pattern-detector.js
│ │ ├── context-formatter.js
│ │ ├── context-shift-detector.js
│ │ ├── conversation-analyzer.js
│ │ ├── dynamic-context-updater.js
│ │ ├── git-analyzer.js
│ │ ├── mcp-client.js
│ │ ├── memory-client.js
│ │ ├── memory-scorer.js
│ │ ├── performance-manager.js
│ │ ├── project-detector.js
│ │ ├── session-tracker.js
│ │ ├── tiered-conversation-monitor.js
│ │ └── version-checker.js
│ └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│ ├── amp-cli-bridge.md
│ ├── api
│ │ ├── code-execution-interface.md
│ │ ├── memory-metadata-api.md
│ │ ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_REPORT.md
│ │ └── tag-standardization.md
│ ├── architecture
│ │ ├── search-enhancement-spec.md
│ │ └── search-examples.md
│ ├── architecture.md
│ ├── archive
│ │ └── obsolete-workflows
│ │ ├── load_memory_context.md
│ │ └── README.md
│ ├── assets
│ │ └── images
│ │ ├── dashboard-v3.3.0-preview.png
│ │ ├── memory-awareness-hooks-example.png
│ │ ├── project-infographic.svg
│ │ └── README.md
│ ├── CLAUDE_CODE_QUICK_REFERENCE.md
│ ├── cloudflare-setup.md
│ ├── deployment
│ │ ├── docker.md
│ │ ├── dual-service.md
│ │ ├── production-guide.md
│ │ └── systemd-service.md
│ ├── development
│ │ ├── ai-agent-instructions.md
│ │ ├── code-quality
│ │ │ ├── phase-2a-completion.md
│ │ │ ├── phase-2a-handle-get-prompt.md
│ │ │ ├── phase-2a-index.md
│ │ │ ├── phase-2a-install-package.md
│ │ │ └── phase-2b-session-summary.md
│ │ ├── code-quality-workflow.md
│ │ ├── dashboard-workflow.md
│ │ ├── issue-management.md
│ │ ├── pr-review-guide.md
│ │ ├── refactoring-notes.md
│ │ ├── release-checklist.md
│ │ └── todo-tracker.md
│ ├── docker-optimized-build.md
│ ├── document-ingestion.md
│ ├── DOCUMENTATION_AUDIT.md
│ ├── enhancement-roadmap-issue-14.md
│ ├── examples
│ │ ├── analysis-scripts.js
│ │ ├── maintenance-session-example.md
│ │ ├── memory-distribution-chart.jsx
│ │ └── tag-schema.json
│ ├── first-time-setup.md
│ ├── glama-deployment.md
│ ├── guides
│ │ ├── advanced-command-examples.md
│ │ ├── chromadb-migration.md
│ │ ├── commands-vs-mcp-server.md
│ │ ├── mcp-enhancements.md
│ │ ├── mdns-service-discovery.md
│ │ ├── memory-consolidation-guide.md
│ │ ├── migration.md
│ │ ├── scripts.md
│ │ └── STORAGE_BACKENDS.md
│ ├── HOOK_IMPROVEMENTS.md
│ ├── hooks
│ │ └── phase2-code-execution-migration.md
│ ├── http-server-management.md
│ ├── ide-compatability.md
│ ├── IMAGE_RETENTION_POLICY.md
│ ├── images
│ │ └── dashboard-placeholder.md
│ ├── implementation
│ │ ├── health_checks.md
│ │ └── performance.md
│ ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│ ├── integration
│ │ ├── homebrew.md
│ │ └── multi-client.md
│ ├── integrations
│ │ ├── gemini.md
│ │ ├── groq-bridge.md
│ │ ├── groq-integration-summary.md
│ │ └── groq-model-comparison.md
│ ├── integrations.md
│ ├── legacy
│ │ └── dual-protocol-hooks.md
│ ├── LM_STUDIO_COMPATIBILITY.md
│ ├── maintenance
│ │ └── memory-maintenance.md
│ ├── mastery
│ │ ├── api-reference.md
│ │ ├── architecture-overview.md
│ │ ├── configuration-guide.md
│ │ ├── local-setup-and-run.md
│ │ ├── testing-guide.md
│ │ └── troubleshooting.md
│ ├── migration
│ │ └── code-execution-api-quick-start.md
│ ├── natural-memory-triggers
│ │ ├── cli-reference.md
│ │ ├── installation-guide.md
│ │ └── performance-optimization.md
│ ├── oauth-setup.md
│ ├── pr-graphql-integration.md
│ ├── quick-setup-cloudflare-dual-environment.md
│ ├── README.md
│ ├── remote-configuration-wiki-section.md
│ ├── research
│ │ ├── code-execution-interface-implementation.md
│ │ └── code-execution-interface-summary.md
│ ├── ROADMAP.md
│ ├── sqlite-vec-backend.md
│ ├── statistics
│ │ ├── charts
│ │ │ ├── activity_patterns.png
│ │ │ ├── contributors.png
│ │ │ ├── growth_trajectory.png
│ │ │ ├── monthly_activity.png
│ │ │ └── october_sprint.png
│ │ ├── data
│ │ │ ├── activity_by_day.csv
│ │ │ ├── activity_by_hour.csv
│ │ │ ├── contributors.csv
│ │ │ └── monthly_activity.csv
│ │ ├── generate_charts.py
│ │ └── REPOSITORY_STATISTICS.md
│ ├── technical
│ │ ├── development.md
│ │ ├── memory-migration.md
│ │ ├── migration-log.md
│ │ ├── sqlite-vec-embedding-fixes.md
│ │ └── tag-storage.md
│ ├── testing
│ │ └── regression-tests.md
│ ├── testing-cloudflare-backend.md
│ ├── troubleshooting
│ │ ├── cloudflare-api-token-setup.md
│ │ ├── cloudflare-authentication.md
│ │ ├── general.md
│ │ ├── hooks-quick-reference.md
│ │ ├── pr162-schema-caching-issue.md
│ │ ├── session-end-hooks.md
│ │ └── sync-issues.md
│ └── tutorials
│ ├── advanced-techniques.md
│ ├── data-analysis.md
│ └── demo-session-walkthrough.md
├── examples
│ ├── claude_desktop_config_template.json
│ ├── claude_desktop_config_windows.json
│ ├── claude-desktop-http-config.json
│ ├── config
│ │ └── claude_desktop_config.json
│ ├── http-mcp-bridge.js
│ ├── memory_export_template.json
│ ├── README.md
│ ├── setup
│ │ └── setup_multi_client_complete.py
│ └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│ ├── .claude
│ │ └── settings.local.json
│ ├── archive
│ │ └── check_missing_timestamps.py
│ ├── backup
│ │ ├── backup_memories.py
│ │ ├── backup_sqlite_vec.sh
│ │ ├── export_distributable_memories.sh
│ │ └── restore_memories.py
│ ├── benchmarks
│ │ ├── benchmark_code_execution_api.py
│ │ ├── benchmark_hybrid_sync.py
│ │ └── benchmark_server_caching.py
│ ├── database
│ │ ├── analyze_sqlite_vec_db.py
│ │ ├── check_sqlite_vec_status.py
│ │ ├── db_health_check.py
│ │ └── simple_timestamp_check.py
│ ├── development
│ │ ├── debug_server_initialization.py
│ │ ├── find_orphaned_files.py
│ │ ├── fix_mdns.sh
│ │ ├── fix_sitecustomize.py
│ │ ├── remote_ingest.sh
│ │ ├── setup-git-merge-drivers.sh
│ │ ├── uv-lock-merge.sh
│ │ └── verify_hybrid_sync.py
│ ├── hooks
│ │ └── pre-commit
│ ├── installation
│ │ ├── install_linux_service.py
│ │ ├── install_macos_service.py
│ │ ├── install_uv.py
│ │ ├── install_windows_service.py
│ │ ├── install.py
│ │ ├── setup_backup_cron.sh
│ │ ├── setup_claude_mcp.sh
│ │ └── setup_cloudflare_resources.py
│ ├── linux
│ │ ├── service_status.sh
│ │ ├── start_service.sh
│ │ ├── stop_service.sh
│ │ ├── uninstall_service.sh
│ │ └── view_logs.sh
│ ├── maintenance
│ │ ├── assign_memory_types.py
│ │ ├── check_memory_types.py
│ │ ├── cleanup_corrupted_encoding.py
│ │ ├── cleanup_memories.py
│ │ ├── cleanup_organize.py
│ │ ├── consolidate_memory_types.py
│ │ ├── consolidation_mappings.json
│ │ ├── delete_orphaned_vectors_fixed.py
│ │ ├── fast_cleanup_duplicates_with_tracking.sh
│ │ ├── find_all_duplicates.py
│ │ ├── find_cloudflare_duplicates.py
│ │ ├── find_duplicates.py
│ │ ├── memory-types.md
│ │ ├── README.md
│ │ ├── recover_timestamps_from_cloudflare.py
│ │ ├── regenerate_embeddings.py
│ │ ├── repair_malformed_tags.py
│ │ ├── repair_memories.py
│ │ ├── repair_sqlite_vec_embeddings.py
│ │ ├── repair_zero_embeddings.py
│ │ ├── restore_from_json_export.py
│ │ └── scan_todos.sh
│ ├── migration
│ │ ├── cleanup_mcp_timestamps.py
│ │ ├── legacy
│ │ │ └── migrate_chroma_to_sqlite.py
│ │ ├── mcp-migration.py
│ │ ├── migrate_sqlite_vec_embeddings.py
│ │ ├── migrate_storage.py
│ │ ├── migrate_tags.py
│ │ ├── migrate_timestamps.py
│ │ ├── migrate_to_cloudflare.py
│ │ ├── migrate_to_sqlite_vec.py
│ │ ├── migrate_v5_enhanced.py
│ │ ├── TIMESTAMP_CLEANUP_README.md
│ │ └── verify_mcp_timestamps.py
│ ├── pr
│ │ ├── amp_collect_results.sh
│ │ ├── amp_detect_breaking_changes.sh
│ │ ├── amp_generate_tests.sh
│ │ ├── amp_pr_review.sh
│ │ ├── amp_quality_gate.sh
│ │ ├── amp_suggest_fixes.sh
│ │ ├── auto_review.sh
│ │ ├── detect_breaking_changes.sh
│ │ ├── generate_tests.sh
│ │ ├── lib
│ │ │ └── graphql_helpers.sh
│ │ ├── quality_gate.sh
│ │ ├── resolve_threads.sh
│ │ ├── run_pyscn_analysis.sh
│ │ ├── run_quality_checks.sh
│ │ ├── thread_status.sh
│ │ └── watch_reviews.sh
│ ├── quality
│ │ ├── fix_dead_code_install.sh
│ │ ├── phase1_dead_code_analysis.md
│ │ ├── phase2_complexity_analysis.md
│ │ ├── README_PHASE1.md
│ │ ├── README_PHASE2.md
│ │ ├── track_pyscn_metrics.sh
│ │ └── weekly_quality_review.sh
│ ├── README.md
│ ├── run
│ │ ├── run_mcp_memory.sh
│ │ ├── run-with-uv.sh
│ │ └── start_sqlite_vec.sh
│ ├── run_memory_server.py
│ ├── server
│ │ ├── check_http_server.py
│ │ ├── check_server_health.py
│ │ ├── memory_offline.py
│ │ ├── preload_models.py
│ │ ├── run_http_server.py
│ │ ├── run_memory_server.py
│ │ ├── start_http_server.bat
│ │ └── start_http_server.sh
│ ├── service
│ │ ├── deploy_dual_services.sh
│ │ ├── install_http_service.sh
│ │ ├── mcp-memory-http.service
│ │ ├── mcp-memory.service
│ │ ├── memory_service_manager.sh
│ │ ├── service_control.sh
│ │ ├── service_utils.py
│ │ └── update_service.sh
│ ├── sync
│ │ ├── check_drift.py
│ │ ├── claude_sync_commands.py
│ │ ├── export_memories.py
│ │ ├── import_memories.py
│ │ ├── litestream
│ │ │ ├── apply_local_changes.sh
│ │ │ ├── enhanced_memory_store.sh
│ │ │ ├── init_staging_db.sh
│ │ │ ├── io.litestream.replication.plist
│ │ │ ├── manual_sync.sh
│ │ │ ├── memory_sync.sh
│ │ │ ├── pull_remote_changes.sh
│ │ │ ├── push_to_remote.sh
│ │ │ ├── README.md
│ │ │ ├── resolve_conflicts.sh
│ │ │ ├── setup_local_litestream.sh
│ │ │ ├── setup_remote_litestream.sh
│ │ │ ├── staging_db_init.sql
│ │ │ ├── stash_local_changes.sh
│ │ │ ├── sync_from_remote_noconfig.sh
│ │ │ └── sync_from_remote.sh
│ │ ├── README.md
│ │ ├── safe_cloudflare_update.sh
│ │ ├── sync_memory_backends.py
│ │ └── sync_now.py
│ ├── testing
│ │ ├── run_complete_test.py
│ │ ├── run_memory_test.sh
│ │ ├── simple_test.py
│ │ ├── test_cleanup_logic.py
│ │ ├── test_cloudflare_backend.py
│ │ ├── test_docker_functionality.py
│ │ ├── test_installation.py
│ │ ├── test_mdns.py
│ │ ├── test_memory_api.py
│ │ ├── test_memory_simple.py
│ │ ├── test_migration.py
│ │ ├── test_search_api.py
│ │ ├── test_sqlite_vec_embeddings.py
│ │ ├── test_sse_events.py
│ │ ├── test-connection.py
│ │ └── test-hook.js
│ ├── utils
│ │ ├── claude_commands_utils.py
│ │ ├── generate_personalized_claude_md.sh
│ │ ├── groq
│ │ ├── groq_agent_bridge.py
│ │ ├── list-collections.py
│ │ ├── memory_wrapper_uv.py
│ │ ├── query_memories.py
│ │ ├── smithery_wrapper.py
│ │ ├── test_groq_bridge.sh
│ │ └── uv_wrapper.py
│ └── validation
│ ├── check_dev_setup.py
│ ├── check_documentation_links.py
│ ├── diagnose_backend_config.py
│ ├── validate_configuration_complete.py
│ ├── validate_memories.py
│ ├── validate_migration.py
│ ├── validate_timestamp_integrity.py
│ ├── verify_environment.py
│ ├── verify_pytorch_windows.py
│ └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│ └── mcp_memory_service
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── operations.py
│ │ ├── sync_wrapper.py
│ │ └── types.py
│ ├── backup
│ │ ├── __init__.py
│ │ └── scheduler.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── ingestion.py
│ │ ├── main.py
│ │ └── utils.py
│ ├── config.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── associations.py
│ │ ├── base.py
│ │ ├── clustering.py
│ │ ├── compression.py
│ │ ├── consolidator.py
│ │ ├── decay.py
│ │ ├── forgetting.py
│ │ ├── health.py
│ │ └── scheduler.py
│ ├── dependency_check.py
│ ├── discovery
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── mdns_service.py
│ ├── embeddings
│ │ ├── __init__.py
│ │ └── onnx_embeddings.py
│ ├── ingestion
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── chunker.py
│ │ ├── csv_loader.py
│ │ ├── json_loader.py
│ │ ├── pdf_loader.py
│ │ ├── registry.py
│ │ ├── semtools_loader.py
│ │ └── text_loader.py
│ ├── lm_studio_compat.py
│ ├── mcp_server.py
│ ├── models
│ │ ├── __init__.py
│ │ └── memory.py
│ ├── server.py
│ ├── services
│ │ ├── __init__.py
│ │ └── memory_service.py
│ ├── storage
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cloudflare.py
│ │ ├── factory.py
│ │ ├── http_client.py
│ │ ├── hybrid.py
│ │ └── sqlite_vec.py
│ ├── sync
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── importer.py
│ │ └── litestream_config.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cache_manager.py
│ │ ├── content_splitter.py
│ │ ├── db_utils.py
│ │ ├── debug.py
│ │ ├── document_processing.py
│ │ ├── gpu_detection.py
│ │ ├── hashing.py
│ │ ├── http_server_manager.py
│ │ ├── port_detection.py
│ │ ├── system_detection.py
│ │ └── time_parser.py
│ └── web
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── analytics.py
│ │ ├── backup.py
│ │ ├── consolidation.py
│ │ ├── documents.py
│ │ ├── events.py
│ │ ├── health.py
│ │ ├── manage.py
│ │ ├── mcp.py
│ │ ├── memories.py
│ │ ├── search.py
│ │ └── sync.py
│ ├── app.py
│ ├── dependencies.py
│ ├── oauth
│ │ ├── __init__.py
│ │ ├── authorization.py
│ │ ├── discovery.py
│ │ ├── middleware.py
│ │ ├── models.py
│ │ ├── registration.py
│ │ └── storage.py
│ ├── sse.py
│ └── static
│ ├── app.js
│ ├── index.html
│ ├── README.md
│ ├── sse_test.html
│ └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── test_compact_types.py
│ │ └── test_operations.py
│ ├── bridge
│ │ ├── mock_responses.js
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ └── test_http_mcp_bridge.js
│ ├── conftest.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_associations.py
│ │ ├── test_clustering.py
│ │ ├── test_compression.py
│ │ ├── test_consolidator.py
│ │ ├── test_decay.py
│ │ └── test_forgetting.py
│ ├── contracts
│ │ └── api-specification.yml
│ ├── integration
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ ├── test_api_key_fallback.py
│ │ ├── test_api_memories_chronological.py
│ │ ├── test_api_tag_time_search.py
│ │ ├── test_api_with_memory_service.py
│ │ ├── test_bridge_integration.js
│ │ ├── test_cli_interfaces.py
│ │ ├── test_cloudflare_connection.py
│ │ ├── test_concurrent_clients.py
│ │ ├── test_data_serialization_consistency.py
│ │ ├── test_http_server_startup.py
│ │ ├── test_mcp_memory.py
│ │ ├── test_mdns_integration.py
│ │ ├── test_oauth_basic_auth.py
│ │ ├── test_oauth_flow.py
│ │ ├── test_server_handlers.py
│ │ └── test_store_memory.py
│ ├── performance
│ │ ├── test_background_sync.py
│ │ └── test_hybrid_live.py
│ ├── README.md
│ ├── smithery
│ │ └── test_smithery.py
│ ├── sqlite
│ │ └── simple_sqlite_vec_test.py
│ ├── test_client.py
│ ├── test_content_splitting.py
│ ├── test_database.py
│ ├── test_hybrid_cloudflare_limits.py
│ ├── test_hybrid_storage.py
│ ├── test_memory_ops.py
│ ├── test_semantic_search.py
│ ├── test_sqlite_vec_storage.py
│ ├── test_time_parser.py
│ ├── test_timestamp_preservation.py
│ ├── timestamp
│ │ ├── test_hook_vs_manual_storage.py
│ │ ├── test_issue99_final_validation.py
│ │ ├── test_search_retrieval_inconsistency.py
│ │ ├── test_timestamp_issue.py
│ │ └── test_timestamp_simple.py
│ └── unit
│ ├── conftest.py
│ ├── test_cloudflare_storage.py
│ ├── test_csv_loader.py
│ ├── test_fastapi_dependencies.py
│ ├── test_import.py
│ ├── test_json_loader.py
│ ├── test_mdns_simple.py
│ ├── test_mdns.py
│ ├── test_memory_service.py
│ ├── test_memory.py
│ ├── test_semtools_loader.py
│ ├── test_storage_interface_compatibility.py
│ └── test_tag_time_filtering.py
├── tools
│ ├── docker
│ │ ├── DEPRECATED.md
│ │ ├── docker-compose.http.yml
│ │ ├── docker-compose.pythonpath.yml
│ │ ├── docker-compose.standalone.yml
│ │ ├── docker-compose.uv.yml
│ │ ├── docker-compose.yml
│ │ ├── docker-entrypoint-persistent.sh
│ │ ├── docker-entrypoint-unified.sh
│ │ ├── docker-entrypoint.sh
│ │ ├── Dockerfile
│ │ ├── Dockerfile.glama
│ │ ├── Dockerfile.slim
│ │ ├── README.md
│ │ └── test-docker-modes.sh
│ └── README.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/src/mcp_memory_service/config.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | MCP Memory Service Configuration
17 |
18 | Environment Variables:
19 | - MCP_MEMORY_STORAGE_BACKEND: Storage backend ('sqlite_vec', 'cloudflare', or 'hybrid')
20 | - MCP_MEMORY_SQLITE_PATH: SQLite-vec database file path
21 | - MCP_MEMORY_USE_ONNX: Use ONNX embeddings ('true'/'false')
22 |
23 | Copyright (c) 2024 Heinrich Krupp
24 | Licensed under the Apache License, Version 2.0
25 | """
26 | import os
27 | import sys
28 | import secrets
29 | from pathlib import Path
30 | from typing import Optional
31 | import time
32 | import logging
33 |
34 | # Load environment variables from .env file if it exists
35 | try:
36 | from dotenv import load_dotenv
37 | env_file = Path(__file__).parent.parent.parent / ".env"
38 | if env_file.exists():
39 | load_dotenv(env_file)
40 | logging.getLogger(__name__).info(f"Loaded environment from {env_file}")
41 | except ImportError:
42 | # dotenv not available, skip loading
43 | pass
44 |
45 | logger = logging.getLogger(__name__)
46 |
47 | def safe_get_int_env(env_var: str, default: int, min_value: int = None, max_value: int = None) -> int:
48 | """
49 | Safely parse an integer environment variable with validation and error handling.
50 |
51 | Args:
52 | env_var: Environment variable name
53 | default: Default value if not set or invalid
54 | min_value: Minimum allowed value (optional)
55 | max_value: Maximum allowed value (optional)
56 |
57 | Returns:
58 | Parsed and validated integer value
59 |
60 | Raises:
61 | ValueError: If the value is outside the specified range
62 | """
63 | env_value = os.getenv(env_var)
64 | if not env_value:
65 | return default
66 |
67 | try:
68 | value = int(env_value)
69 |
70 | # Validate range if specified
71 | if min_value is not None and value < min_value:
72 | logger.error(f"Environment variable {env_var}={value} is below minimum {min_value}, using default {default}")
73 | return default
74 |
75 | if max_value is not None and value > max_value:
76 | logger.error(f"Environment variable {env_var}={value} is above maximum {max_value}, using default {default}")
77 | return default
78 |
79 | logger.debug(f"Environment variable {env_var}={value} parsed successfully")
80 | return value
81 |
82 | except ValueError as e:
83 | logger.error(f"Invalid integer value for {env_var}='{env_value}': {e}. Using default {default}")
84 | return default
85 |
86 | def safe_get_optional_int_env(env_var: str, default: Optional[int] = None, min_value: int = None, max_value: int = None, none_values: tuple = ('none', 'null', 'unlimited', '')) -> Optional[int]:
87 | """
88 | Safely parse an optional integer environment variable with validation and error handling.
89 |
90 | Args:
91 | env_var: Environment variable name
92 | default: Default value if not set or invalid (None for unlimited)
93 | min_value: Minimum allowed value (optional)
94 | max_value: Maximum allowed value (optional)
95 | none_values: Tuple of string values that should be interpreted as None
96 |
97 | Returns:
98 | Parsed and validated integer value, or None if explicitly set to a none_value
99 | """
100 | env_value = os.getenv(env_var)
101 | if not env_value:
102 | return default
103 |
104 | # Check if value should be interpreted as None/unlimited
105 | if env_value.lower().strip() in none_values:
106 | return None
107 |
108 | try:
109 | value = int(env_value.strip())
110 |
111 | # Validate range if specified
112 | if min_value is not None and value < min_value:
113 | logger.warning(f"Environment variable {env_var}={value} is below minimum {min_value}. Using default {default}")
114 | return default
115 |
116 | if max_value is not None and value > max_value:
117 | logger.warning(f"Environment variable {env_var}={value} is above maximum {max_value}. Using default {default}")
118 | return default
119 |
120 | return value
121 |
122 | except ValueError:
123 | logger.warning(f"Invalid value for {env_var}='{env_value}'. Expected integer or {'/'.join(none_values)}. Using default {default}")
124 | return default
125 |
126 | def safe_get_bool_env(env_var: str, default: bool) -> bool:
127 | """
128 | Safely parse a boolean environment variable with validation and error handling.
129 |
130 | Args:
131 | env_var: Environment variable name
132 | default: Default value if not set or invalid
133 |
134 | Returns:
135 | Parsed boolean value
136 | """
137 | env_value = os.getenv(env_var)
138 | if not env_value:
139 | return default
140 |
141 | env_value_lower = env_value.lower().strip()
142 |
143 | if env_value_lower in ('true', '1', 'yes', 'on', 'enabled'):
144 | return True
145 | elif env_value_lower in ('false', '0', 'no', 'off', 'disabled'):
146 | return False
147 | else:
148 | logger.error(f"Invalid boolean value for {env_var}='{env_value}'. Expected true/false, 1/0, yes/no, on/off, enabled/disabled. Using default {default}")
149 | return default
150 |
151 | def validate_and_create_path(path: str) -> str:
152 | """Validate and create a directory path, ensuring it's writable.
153 |
154 | This function ensures that the specified directory path exists and is writable.
155 | It performs several checks and has a retry mechanism to handle potential race
156 | conditions, especially when running in environments like Claude Desktop where
157 | file system operations might be more restricted.
158 | """
159 | try:
160 | # Convert to absolute path and expand user directory if present (e.g. ~)
161 | abs_path = os.path.abspath(os.path.expanduser(path))
162 | logger.debug(f"Validating path: {abs_path}")
163 |
164 | # Create directory and all parents if they don't exist
165 | try:
166 | os.makedirs(abs_path, exist_ok=True)
167 | logger.debug(f"Created directory (or already exists): {abs_path}")
168 | except Exception as e:
169 | logger.error(f"Error creating directory {abs_path}: {str(e)}")
170 | raise PermissionError(f"Cannot create directory {abs_path}: {str(e)}")
171 |
172 | # Add small delay to prevent potential race conditions on macOS during initial write test
173 | time.sleep(0.1)
174 |
175 | # Verify that the path exists and is a directory
176 | if not os.path.exists(abs_path):
177 | logger.error(f"Path does not exist after creation attempt: {abs_path}")
178 | raise PermissionError(f"Path does not exist: {abs_path}")
179 |
180 | if not os.path.isdir(abs_path):
181 | logger.error(f"Path is not a directory: {abs_path}")
182 | raise PermissionError(f"Path is not a directory: {abs_path}")
183 |
184 | # Write test with retry mechanism
185 | max_retries = 3
186 | retry_delay = 0.5
187 | test_file = os.path.join(abs_path, '.write_test')
188 |
189 | for attempt in range(max_retries):
190 | try:
191 | logger.debug(f"Testing write permissions (attempt {attempt+1}/{max_retries}): {test_file}")
192 | with open(test_file, 'w') as f:
193 | f.write('test')
194 |
195 | if os.path.exists(test_file):
196 | logger.debug(f"Successfully wrote test file: {test_file}")
197 | os.remove(test_file)
198 | logger.debug(f"Successfully removed test file: {test_file}")
199 | logger.info(f"Directory {abs_path} is writable.")
200 | return abs_path
201 | else:
202 | logger.warning(f"Test file was not created: {test_file}")
203 | except Exception as e:
204 | logger.warning(f"Error during write test (attempt {attempt+1}/{max_retries}): {str(e)}")
205 | if attempt < max_retries - 1:
206 | logger.debug(f"Retrying after {retry_delay}s...")
207 | time.sleep(retry_delay)
208 | else:
209 | logger.error(f"All write test attempts failed for {abs_path}")
210 | raise PermissionError(f"Directory {abs_path} is not writable: {str(e)}")
211 |
212 | return abs_path
213 | except Exception as e:
214 | logger.error(f"Error validating path {path}: {str(e)}")
215 | raise
216 |
217 | # Determine base directory - prefer local over Cloud
218 | def get_base_directory() -> str:
219 | """Get base directory for storage, with fallback options."""
220 | # First choice: Environment variable
221 | if base_dir := os.getenv('MCP_MEMORY_BASE_DIR'):
222 | return validate_and_create_path(base_dir)
223 |
224 | # Second choice: Local app data directory
225 | home = str(Path.home())
226 | if sys.platform == 'darwin': # macOS
227 | base = os.path.join(home, 'Library', 'Application Support', 'mcp-memory')
228 | elif sys.platform == 'win32': # Windows
229 | base = os.path.join(os.getenv('LOCALAPPDATA', ''), 'mcp-memory')
230 | else: # Linux and others
231 | base = os.path.join(home, '.local', 'share', 'mcp-memory')
232 |
233 | return validate_and_create_path(base)
234 |
235 | # Initialize paths
236 | try:
237 | BASE_DIR = get_base_directory()
238 |
239 | # Try multiple environment variable names for backups path
240 | backups_path = None
241 | for env_var in ['MCP_MEMORY_BACKUPS_PATH', 'mcpMemoryBackupsPath']:
242 | if path := os.getenv(env_var):
243 | backups_path = path
244 | logger.info(f"Using {env_var}={path} for backups path")
245 | break
246 |
247 | # If no environment variable is set, use the default path
248 | if not backups_path:
249 | backups_path = os.path.join(BASE_DIR, 'backups')
250 | logger.info(f"No backups path environment variable found, using default: {backups_path}")
251 |
252 | BACKUPS_PATH = validate_and_create_path(backups_path)
253 |
254 | # Print the final paths used
255 | logger.info(f"Using backups path: {BACKUPS_PATH}")
256 |
257 | except Exception as e:
258 | logger.error(f"Fatal error initializing paths: {str(e)}")
259 | sys.exit(1)
260 |
261 | # Server settings
262 | SERVER_NAME = "memory"
263 | # Import version from main package for consistency
264 | from . import __version__ as SERVER_VERSION
265 |
266 | # Storage backend configuration
267 | SUPPORTED_BACKENDS = ['sqlite_vec', 'sqlite-vec', 'cloudflare', 'hybrid']
268 | STORAGE_BACKEND = os.getenv('MCP_MEMORY_STORAGE_BACKEND', 'sqlite_vec').lower()
269 |
270 | # Normalize backend names (sqlite-vec -> sqlite_vec)
271 | if STORAGE_BACKEND == 'sqlite-vec':
272 | STORAGE_BACKEND = 'sqlite_vec'
273 |
274 | # Validate backend selection
275 | if STORAGE_BACKEND not in SUPPORTED_BACKENDS:
276 | logger.warning(f"Unknown storage backend: {STORAGE_BACKEND}, falling back to sqlite_vec")
277 | STORAGE_BACKEND = 'sqlite_vec'
278 |
279 | logger.info(f"Using storage backend: {STORAGE_BACKEND}")
280 |
281 | # =============================================================================
282 | # Content Length Limits Configuration (v7.5.0+)
283 | # =============================================================================
284 |
285 | # Backend-specific content length limits based on embedding model constraints
286 | # These limits prevent embedding failures and enable automatic content splitting
287 |
288 | # Cloudflare: BGE-base-en-v1.5 model has 512 token limit
289 | # Using 800 characters as safe limit (~400 tokens with overhead)
290 | CLOUDFLARE_MAX_CONTENT_LENGTH = safe_get_int_env(
291 | 'MCP_CLOUDFLARE_MAX_CONTENT_LENGTH',
292 | default=800,
293 | min_value=100,
294 | max_value=10000
295 | )
296 |
297 | # SQLite-vec: No inherent limit (local storage)
298 | # Set to None for unlimited, or configure via environment variable
299 | SQLITEVEC_MAX_CONTENT_LENGTH = safe_get_optional_int_env(
300 | 'MCP_SQLITEVEC_MAX_CONTENT_LENGTH',
301 | default=None,
302 | min_value=100,
303 | max_value=10000
304 | )
305 |
306 | # Hybrid: Constrained by Cloudflare secondary storage (configurable)
307 | HYBRID_MAX_CONTENT_LENGTH = safe_get_int_env(
308 | 'MCP_HYBRID_MAX_CONTENT_LENGTH',
309 | default=CLOUDFLARE_MAX_CONTENT_LENGTH,
310 | min_value=100,
311 | max_value=10000
312 | )
313 |
314 | # Enable automatic content splitting when limits are exceeded
315 | ENABLE_AUTO_SPLIT = safe_get_bool_env('MCP_ENABLE_AUTO_SPLIT', default=True)
316 |
317 | # Content splitting configuration
318 | CONTENT_SPLIT_OVERLAP = safe_get_int_env(
319 | 'MCP_CONTENT_SPLIT_OVERLAP',
320 | default=50,
321 | min_value=0,
322 | max_value=500
323 | )
324 | CONTENT_PRESERVE_BOUNDARIES = safe_get_bool_env('MCP_CONTENT_PRESERVE_BOUNDARIES', default=True)
325 |
326 | logger.info(f"Content length limits - Cloudflare: {CLOUDFLARE_MAX_CONTENT_LENGTH}, "
327 | f"SQLite-vec: {'unlimited' if SQLITEVEC_MAX_CONTENT_LENGTH is None else SQLITEVEC_MAX_CONTENT_LENGTH}, "
328 | f"Auto-split: {ENABLE_AUTO_SPLIT}")
329 |
330 | # =============================================================================
331 | # End Content Length Limits Configuration
332 | # =============================================================================
333 |
334 | # SQLite-vec specific configuration (also needed for hybrid backend)
335 | if STORAGE_BACKEND == 'sqlite_vec' or STORAGE_BACKEND == 'hybrid':
336 | # Try multiple environment variable names for SQLite-vec path
337 | sqlite_vec_path = None
338 | for env_var in ['MCP_MEMORY_SQLITE_PATH', 'MCP_MEMORY_SQLITEVEC_PATH']:
339 | if path := os.getenv(env_var):
340 | sqlite_vec_path = path
341 | logger.info(f"Using {env_var}={path} for SQLite-vec database path")
342 | break
343 |
344 | # If no environment variable is set, use the default path
345 | if not sqlite_vec_path:
346 | sqlite_vec_path = os.path.join(BASE_DIR, 'sqlite_vec.db')
347 | logger.info(f"No SQLite-vec path environment variable found, using default: {sqlite_vec_path}")
348 |
349 | # Ensure directory exists for SQLite database
350 | sqlite_dir = os.path.dirname(sqlite_vec_path)
351 | if sqlite_dir:
352 | os.makedirs(sqlite_dir, exist_ok=True)
353 |
354 | SQLITE_VEC_PATH = sqlite_vec_path
355 | logger.info(f"Using SQLite-vec database path: {SQLITE_VEC_PATH}")
356 | else:
357 | SQLITE_VEC_PATH = None
358 |
359 | # ONNX Configuration
360 | USE_ONNX = os.getenv('MCP_MEMORY_USE_ONNX', '').lower() in ('1', 'true', 'yes')
361 | if USE_ONNX:
362 | logger.info("ONNX embeddings enabled - using PyTorch-free embedding generation")
363 | # ONNX model cache directory
364 | ONNX_MODEL_CACHE = os.path.join(BASE_DIR, 'onnx_models')
365 | os.makedirs(ONNX_MODEL_CACHE, exist_ok=True)
366 |
367 | # Cloudflare specific configuration (also needed for hybrid backend)
368 | if STORAGE_BACKEND == 'cloudflare' or STORAGE_BACKEND == 'hybrid':
369 | # Required Cloudflare settings
370 | CLOUDFLARE_API_TOKEN = os.getenv('CLOUDFLARE_API_TOKEN')
371 | CLOUDFLARE_ACCOUNT_ID = os.getenv('CLOUDFLARE_ACCOUNT_ID')
372 | CLOUDFLARE_VECTORIZE_INDEX = os.getenv('CLOUDFLARE_VECTORIZE_INDEX')
373 | CLOUDFLARE_D1_DATABASE_ID = os.getenv('CLOUDFLARE_D1_DATABASE_ID')
374 |
375 | # Optional Cloudflare settings
376 | CLOUDFLARE_R2_BUCKET = os.getenv('CLOUDFLARE_R2_BUCKET') # For large content storage
377 | CLOUDFLARE_EMBEDDING_MODEL = os.getenv('CLOUDFLARE_EMBEDDING_MODEL', '@cf/baai/bge-base-en-v1.5')
378 | CLOUDFLARE_LARGE_CONTENT_THRESHOLD = int(os.getenv('CLOUDFLARE_LARGE_CONTENT_THRESHOLD', '1048576')) # 1MB
379 | CLOUDFLARE_MAX_RETRIES = int(os.getenv('CLOUDFLARE_MAX_RETRIES', '3'))
380 | CLOUDFLARE_BASE_DELAY = float(os.getenv('CLOUDFLARE_BASE_DELAY', '1.0'))
381 |
382 | # Validate required settings
383 | missing_vars = []
384 | if not CLOUDFLARE_API_TOKEN:
385 | missing_vars.append('CLOUDFLARE_API_TOKEN')
386 | if not CLOUDFLARE_ACCOUNT_ID:
387 | missing_vars.append('CLOUDFLARE_ACCOUNT_ID')
388 | if not CLOUDFLARE_VECTORIZE_INDEX:
389 | missing_vars.append('CLOUDFLARE_VECTORIZE_INDEX')
390 | if not CLOUDFLARE_D1_DATABASE_ID:
391 | missing_vars.append('CLOUDFLARE_D1_DATABASE_ID')
392 |
393 | if missing_vars:
394 | logger.error(f"Missing required environment variables for Cloudflare backend: {', '.join(missing_vars)}")
395 | logger.error("Please set the required variables or switch to a different backend")
396 | sys.exit(1)
397 |
398 | logger.info(f"Using Cloudflare backend with:")
399 | logger.info(f" Vectorize Index: {CLOUDFLARE_VECTORIZE_INDEX}")
400 | logger.info(f" D1 Database: {CLOUDFLARE_D1_DATABASE_ID}")
401 | logger.info(f" R2 Bucket: {CLOUDFLARE_R2_BUCKET or 'Not configured'}")
402 | logger.info(f" Embedding Model: {CLOUDFLARE_EMBEDDING_MODEL}")
403 | logger.info(f" Large Content Threshold: {CLOUDFLARE_LARGE_CONTENT_THRESHOLD} bytes")
404 | else:
405 | # Set Cloudflare variables to None when not using Cloudflare backend
406 | CLOUDFLARE_API_TOKEN = None
407 | CLOUDFLARE_ACCOUNT_ID = None
408 | CLOUDFLARE_VECTORIZE_INDEX = None
409 | CLOUDFLARE_D1_DATABASE_ID = None
410 | CLOUDFLARE_R2_BUCKET = None
411 | CLOUDFLARE_EMBEDDING_MODEL = None
412 | CLOUDFLARE_LARGE_CONTENT_THRESHOLD = None
413 | CLOUDFLARE_MAX_RETRIES = None
414 | CLOUDFLARE_BASE_DELAY = None
415 |
416 | # Hybrid backend specific configuration
417 | if STORAGE_BACKEND == 'hybrid':
418 | # Sync service configuration
419 | HYBRID_SYNC_INTERVAL = int(os.getenv('MCP_HYBRID_SYNC_INTERVAL', '300')) # 5 minutes default
420 | HYBRID_BATCH_SIZE = int(os.getenv('MCP_HYBRID_BATCH_SIZE', '50'))
421 | HYBRID_MAX_QUEUE_SIZE = int(os.getenv('MCP_HYBRID_MAX_QUEUE_SIZE', '1000'))
422 | HYBRID_MAX_RETRIES = int(os.getenv('MCP_HYBRID_MAX_RETRIES', '3'))
423 |
424 | # Sync ownership control (v8.27.0+) - Prevents duplicate sync queues
425 | # Values: "http" (HTTP server only), "mcp" (MCP server only), "both" (both servers sync)
426 | # Recommended: "http" to avoid duplicate sync work
427 | HYBRID_SYNC_OWNER = os.getenv('MCP_HYBRID_SYNC_OWNER', 'both').lower()
428 |
429 | # Performance tuning
430 | HYBRID_ENABLE_HEALTH_CHECKS = os.getenv('MCP_HYBRID_ENABLE_HEALTH_CHECKS', 'true').lower() == 'true'
431 | HYBRID_HEALTH_CHECK_INTERVAL = int(os.getenv('MCP_HYBRID_HEALTH_CHECK_INTERVAL', '60')) # 1 minute
432 | HYBRID_SYNC_ON_STARTUP = os.getenv('MCP_HYBRID_SYNC_ON_STARTUP', 'true').lower() == 'true'
433 |
434 | # Drift detection and metadata sync (v8.25.0+)
435 | HYBRID_SYNC_UPDATES = os.getenv('MCP_HYBRID_SYNC_UPDATES', 'true').lower() == 'true'
436 | HYBRID_DRIFT_CHECK_INTERVAL = int(os.getenv('MCP_HYBRID_DRIFT_CHECK_INTERVAL', '3600')) # 1 hour default
437 | HYBRID_DRIFT_BATCH_SIZE = int(os.getenv('MCP_HYBRID_DRIFT_BATCH_SIZE', '100'))
438 |
439 | # Initial sync behavior tuning (v7.5.4+)
440 | HYBRID_MAX_EMPTY_BATCHES = safe_get_int_env('MCP_HYBRID_MAX_EMPTY_BATCHES', 20, min_value=1) # Stop after N batches without new syncs
441 | HYBRID_MIN_CHECK_COUNT = safe_get_int_env('MCP_HYBRID_MIN_CHECK_COUNT', 1000, min_value=1) # Minimum memories to check before early stop
442 |
443 | # Fallback behavior
444 | HYBRID_FALLBACK_TO_PRIMARY = os.getenv('MCP_HYBRID_FALLBACK_TO_PRIMARY', 'true').lower() == 'true'
445 | HYBRID_WARN_ON_SECONDARY_FAILURE = os.getenv('MCP_HYBRID_WARN_ON_SECONDARY_FAILURE', 'true').lower() == 'true'
446 |
447 | logger.info(f"Hybrid storage configuration: sync_interval={HYBRID_SYNC_INTERVAL}s, batch_size={HYBRID_BATCH_SIZE}")
448 |
449 | # Cloudflare Service Limits (for validation and monitoring)
450 | CLOUDFLARE_D1_MAX_SIZE_GB = 10 # D1 database hard limit
451 | CLOUDFLARE_VECTORIZE_MAX_VECTORS = 5_000_000 # Maximum vectors per index
452 | CLOUDFLARE_MAX_METADATA_SIZE_KB = 10 # Maximum metadata size per vector
453 | CLOUDFLARE_MAX_FILTER_SIZE_BYTES = 2048 # Maximum filter query size
454 | CLOUDFLARE_MAX_STRING_INDEX_SIZE_BYTES = 64 # Maximum indexed string size
455 | CLOUDFLARE_BATCH_INSERT_LIMIT = 200_000 # Maximum batch insert size
456 |
457 | # Limit warning thresholds (percentage)
458 | CLOUDFLARE_WARNING_THRESHOLD_PERCENT = 80 # Warn at 80% capacity
459 | CLOUDFLARE_CRITICAL_THRESHOLD_PERCENT = 95 # Critical at 95% capacity
460 |
461 | # Validate Cloudflare configuration for hybrid mode
462 | if not (CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID and CLOUDFLARE_VECTORIZE_INDEX and CLOUDFLARE_D1_DATABASE_ID):
463 | logger.warning("Hybrid mode requires Cloudflare configuration. Missing required variables:")
464 | if not CLOUDFLARE_API_TOKEN:
465 | logger.warning(" - CLOUDFLARE_API_TOKEN")
466 | if not CLOUDFLARE_ACCOUNT_ID:
467 | logger.warning(" - CLOUDFLARE_ACCOUNT_ID")
468 | if not CLOUDFLARE_VECTORIZE_INDEX:
469 | logger.warning(" - CLOUDFLARE_VECTORIZE_INDEX")
470 | if not CLOUDFLARE_D1_DATABASE_ID:
471 | logger.warning(" - CLOUDFLARE_D1_DATABASE_ID")
472 | logger.warning("Hybrid mode will operate in SQLite-only mode until Cloudflare is configured")
473 | else:
474 | # Set hybrid-specific variables to None when not using hybrid backend
475 | HYBRID_SYNC_INTERVAL = None
476 | HYBRID_BATCH_SIZE = None
477 | HYBRID_MAX_QUEUE_SIZE = None
478 | HYBRID_MAX_RETRIES = None
479 | HYBRID_SYNC_OWNER = None
480 | HYBRID_ENABLE_HEALTH_CHECKS = None
481 | HYBRID_HEALTH_CHECK_INTERVAL = None
482 | HYBRID_SYNC_ON_STARTUP = None
483 | HYBRID_SYNC_UPDATES = None
484 | HYBRID_DRIFT_CHECK_INTERVAL = None
485 | HYBRID_DRIFT_BATCH_SIZE = None
486 | HYBRID_MAX_EMPTY_BATCHES = None
487 | HYBRID_MIN_CHECK_COUNT = None
488 | HYBRID_FALLBACK_TO_PRIMARY = None
489 | HYBRID_WARN_ON_SECONDARY_FAILURE = None
490 |
491 | # Also set limit constants to None
492 | CLOUDFLARE_D1_MAX_SIZE_GB = None
493 | CLOUDFLARE_VECTORIZE_MAX_VECTORS = None
494 | CLOUDFLARE_MAX_METADATA_SIZE_KB = None
495 | CLOUDFLARE_MAX_FILTER_SIZE_BYTES = None
496 | CLOUDFLARE_MAX_STRING_INDEX_SIZE_BYTES = None
497 | CLOUDFLARE_BATCH_INSERT_LIMIT = None
498 | CLOUDFLARE_WARNING_THRESHOLD_PERCENT = None
499 | CLOUDFLARE_CRITICAL_THRESHOLD_PERCENT = None
500 |
501 | # HTTP Server Configuration
502 | HTTP_ENABLED = os.getenv('MCP_HTTP_ENABLED', 'false').lower() == 'true'
503 | HTTP_PORT = safe_get_int_env('MCP_HTTP_PORT', 8000, min_value=1024, max_value=65535) # Non-privileged ports only
504 | HTTP_HOST = os.getenv('MCP_HTTP_HOST', '0.0.0.0')
505 | CORS_ORIGINS = os.getenv('MCP_CORS_ORIGINS', '*').split(',')
506 | SSE_HEARTBEAT_INTERVAL = safe_get_int_env('MCP_SSE_HEARTBEAT', 30, min_value=5, max_value=300) # 5 seconds to 5 minutes
507 | API_KEY = os.getenv('MCP_API_KEY', None) # Optional authentication
508 |
509 | # HTTPS Configuration
510 | HTTPS_ENABLED = os.getenv('MCP_HTTPS_ENABLED', 'false').lower() == 'true'
511 | SSL_CERT_FILE = os.getenv('MCP_SSL_CERT_FILE', None)
512 | SSL_KEY_FILE = os.getenv('MCP_SSL_KEY_FILE', None)
513 |
514 | # mDNS Service Discovery Configuration
515 | MDNS_ENABLED = os.getenv('MCP_MDNS_ENABLED', 'true').lower() == 'true'
516 | MDNS_SERVICE_NAME = os.getenv('MCP_MDNS_SERVICE_NAME', 'MCP Memory Service')
517 | MDNS_SERVICE_TYPE = os.getenv('MCP_MDNS_SERVICE_TYPE', '_mcp-memory._tcp.local.')
518 | MDNS_DISCOVERY_TIMEOUT = int(os.getenv('MCP_MDNS_DISCOVERY_TIMEOUT', '5'))
519 |
520 | # Database path for HTTP interface (use SQLite-vec by default)
521 | if (STORAGE_BACKEND in ['sqlite_vec', 'hybrid']) and SQLITE_VEC_PATH:
522 | DATABASE_PATH = SQLITE_VEC_PATH
523 | else:
524 | # Fallback to a default SQLite-vec path for HTTP interface
525 | DATABASE_PATH = os.path.join(BASE_DIR, 'memory_http.db')
526 |
527 | # Embedding model configuration
528 | EMBEDDING_MODEL_NAME = os.getenv('MCP_EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
529 |
530 | # =============================================================================
531 | # Document Processing Configuration (Semtools Integration)
532 | # =============================================================================
533 |
534 | # Semtools configuration for enhanced document parsing
535 | # LlamaParse API key for advanced OCR and table extraction
536 | LLAMAPARSE_API_KEY = os.getenv('LLAMAPARSE_API_KEY', None)
537 |
538 | # Document chunking configuration
539 | DOCUMENT_CHUNK_SIZE = safe_get_int_env('MCP_DOCUMENT_CHUNK_SIZE', 1000, min_value=100, max_value=10000)
540 | DOCUMENT_CHUNK_OVERLAP = safe_get_int_env('MCP_DOCUMENT_CHUNK_OVERLAP', 200, min_value=0, max_value=1000)
541 |
542 | # Log semtools configuration
543 | if LLAMAPARSE_API_KEY:
544 | logger.info("LlamaParse API key configured - enhanced document parsing available")
545 | else:
546 | logger.debug("LlamaParse API key not set - semtools will use basic parsing mode")
547 |
548 | logger.info(f"Document chunking: size={DOCUMENT_CHUNK_SIZE}, overlap={DOCUMENT_CHUNK_OVERLAP}")
549 |
550 | # =============================================================================
551 | # End Document Processing Configuration
552 | # =============================================================================
553 |
554 | # =============================================================================
555 | # Automatic Backup Configuration
556 | # =============================================================================
557 |
558 | BACKUP_ENABLED = safe_get_bool_env('MCP_BACKUP_ENABLED', True)
559 | BACKUP_INTERVAL = os.getenv('MCP_BACKUP_INTERVAL', 'daily').lower() # 'hourly', 'daily', 'weekly'
560 | BACKUP_RETENTION = safe_get_int_env('MCP_BACKUP_RETENTION', 7, min_value=1, max_value=365) # days
561 | BACKUP_MAX_COUNT = safe_get_int_env('MCP_BACKUP_MAX_COUNT', 10, min_value=1, max_value=100) # max backups to keep
562 |
563 | # Validate backup interval
564 | if BACKUP_INTERVAL not in ['hourly', 'daily', 'weekly']:
565 | logger.warning(f"Invalid backup interval: {BACKUP_INTERVAL}, falling back to 'daily'")
566 | BACKUP_INTERVAL = 'daily'
567 |
568 | logger.info(f"Backup configuration: enabled={BACKUP_ENABLED}, interval={BACKUP_INTERVAL}, retention={BACKUP_RETENTION} days")
569 |
570 | # =============================================================================
571 | # End Automatic Backup Configuration
572 | # =============================================================================
573 |
574 | # Dream-inspired consolidation configuration
575 | CONSOLIDATION_ENABLED = os.getenv('MCP_CONSOLIDATION_ENABLED', 'false').lower() == 'true'
576 |
577 | # Machine identification configuration
578 | INCLUDE_HOSTNAME = os.getenv('MCP_MEMORY_INCLUDE_HOSTNAME', 'false').lower() == 'true'
579 |
580 | # Consolidation archive location
581 | consolidation_archive_path = None
582 | for env_var in ['MCP_CONSOLIDATION_ARCHIVE_PATH', 'MCP_MEMORY_ARCHIVE_PATH']:
583 | if path := os.getenv(env_var):
584 | consolidation_archive_path = path
585 | logger.info(f"Using {env_var}={path} for consolidation archive path")
586 | break
587 |
588 | if not consolidation_archive_path:
589 | consolidation_archive_path = os.path.join(BASE_DIR, 'consolidation_archive')
590 | logger.info(f"No consolidation archive path environment variable found, using default: {consolidation_archive_path}")
591 |
592 | try:
593 | CONSOLIDATION_ARCHIVE_PATH = validate_and_create_path(consolidation_archive_path)
594 | logger.info(f"Using consolidation archive path: {CONSOLIDATION_ARCHIVE_PATH}")
595 | except Exception as e:
596 | logger.error(f"Error creating consolidation archive path: {e}")
597 | CONSOLIDATION_ARCHIVE_PATH = None
598 |
599 | # Consolidation settings with environment variable overrides
600 | CONSOLIDATION_CONFIG = {
601 | # Decay settings
602 | 'decay_enabled': os.getenv('MCP_DECAY_ENABLED', 'true').lower() == 'true',
603 | 'retention_periods': {
604 | 'critical': int(os.getenv('MCP_RETENTION_CRITICAL', '365')),
605 | 'reference': int(os.getenv('MCP_RETENTION_REFERENCE', '180')),
606 | 'standard': int(os.getenv('MCP_RETENTION_STANDARD', '30')),
607 | 'temporary': int(os.getenv('MCP_RETENTION_TEMPORARY', '7'))
608 | },
609 |
610 | # Association settings
611 | 'associations_enabled': os.getenv('MCP_ASSOCIATIONS_ENABLED', 'true').lower() == 'true',
612 | 'min_similarity': float(os.getenv('MCP_ASSOCIATION_MIN_SIMILARITY', '0.3')),
613 | 'max_similarity': float(os.getenv('MCP_ASSOCIATION_MAX_SIMILARITY', '0.7')),
614 | 'max_pairs_per_run': int(os.getenv('MCP_ASSOCIATION_MAX_PAIRS', '100')),
615 |
616 | # Clustering settings
617 | 'clustering_enabled': os.getenv('MCP_CLUSTERING_ENABLED', 'true').lower() == 'true',
618 | 'min_cluster_size': int(os.getenv('MCP_CLUSTERING_MIN_SIZE', '5')),
619 | 'clustering_algorithm': os.getenv('MCP_CLUSTERING_ALGORITHM', 'dbscan'), # 'dbscan', 'hierarchical', 'simple'
620 |
621 | # Compression settings
622 | 'compression_enabled': os.getenv('MCP_COMPRESSION_ENABLED', 'true').lower() == 'true',
623 | 'max_summary_length': int(os.getenv('MCP_COMPRESSION_MAX_LENGTH', '500')),
624 | 'preserve_originals': os.getenv('MCP_COMPRESSION_PRESERVE_ORIGINALS', 'true').lower() == 'true',
625 |
626 | # Forgetting settings
627 | 'forgetting_enabled': os.getenv('MCP_FORGETTING_ENABLED', 'true').lower() == 'true',
628 | 'relevance_threshold': float(os.getenv('MCP_FORGETTING_RELEVANCE_THRESHOLD', '0.1')),
629 | 'access_threshold_days': int(os.getenv('MCP_FORGETTING_ACCESS_THRESHOLD', '90')),
630 | 'archive_location': CONSOLIDATION_ARCHIVE_PATH,
631 |
632 | # Incremental consolidation settings
633 | 'batch_size': int(os.getenv('MCP_CONSOLIDATION_BATCH_SIZE', '500')),
634 | 'incremental_mode': os.getenv('MCP_CONSOLIDATION_INCREMENTAL', 'true').lower() == 'true'
635 | }
636 |
637 | # Consolidation scheduling settings (for APScheduler integration)
638 | CONSOLIDATION_SCHEDULE = {
639 | 'daily': os.getenv('MCP_SCHEDULE_DAILY', '02:00'), # 2 AM daily
640 | 'weekly': os.getenv('MCP_SCHEDULE_WEEKLY', 'SUN 03:00'), # 3 AM on Sundays
641 | 'monthly': os.getenv('MCP_SCHEDULE_MONTHLY', '01 04:00'), # 4 AM on 1st of month
642 | 'quarterly': os.getenv('MCP_SCHEDULE_QUARTERLY', 'disabled'), # Disabled by default
643 | 'yearly': os.getenv('MCP_SCHEDULE_YEARLY', 'disabled') # Disabled by default
644 | }
645 |
646 | logger.info(f"Consolidation enabled: {CONSOLIDATION_ENABLED}")
647 | if CONSOLIDATION_ENABLED:
648 | logger.info(f"Consolidation configuration: {CONSOLIDATION_CONFIG}")
649 | logger.info(f"Consolidation schedule: {CONSOLIDATION_SCHEDULE}")
650 |
651 | # OAuth 2.1 Configuration
652 | OAUTH_ENABLED = safe_get_bool_env('MCP_OAUTH_ENABLED', True)
653 |
654 | # RSA key pair configuration for JWT signing (RS256)
655 | # Private key for signing tokens
656 | OAUTH_PRIVATE_KEY = os.getenv('MCP_OAUTH_PRIVATE_KEY')
657 | # Public key for verifying tokens
658 | OAUTH_PUBLIC_KEY = os.getenv('MCP_OAUTH_PUBLIC_KEY')
659 |
660 | # Generate RSA key pair if not provided
661 | if not OAUTH_PRIVATE_KEY or not OAUTH_PUBLIC_KEY:
662 | try:
663 | from cryptography.hazmat.primitives import serialization
664 | from cryptography.hazmat.primitives.asymmetric import rsa
665 | from cryptography.hazmat.backends import default_backend
666 |
667 | # Generate 2048-bit RSA key pair
668 | private_key = rsa.generate_private_key(
669 | public_exponent=65537,
670 | key_size=2048,
671 | backend=default_backend()
672 | )
673 |
674 | # Serialize private key to PEM format
675 | OAUTH_PRIVATE_KEY = private_key.private_bytes(
676 | encoding=serialization.Encoding.PEM,
677 | format=serialization.PrivateFormat.PKCS8,
678 | encryption_algorithm=serialization.NoEncryption()
679 | ).decode('utf-8')
680 |
681 | # Serialize public key to PEM format
682 | public_key = private_key.public_key()
683 | OAUTH_PUBLIC_KEY = public_key.public_bytes(
684 | encoding=serialization.Encoding.PEM,
685 | format=serialization.PublicFormat.SubjectPublicKeyInfo
686 | ).decode('utf-8')
687 |
688 | logger.info("Generated RSA key pair for OAuth JWT signing (set MCP_OAUTH_PRIVATE_KEY and MCP_OAUTH_PUBLIC_KEY for persistence)")
689 |
690 | except ImportError:
691 | logger.warning("cryptography package not available, falling back to HS256 symmetric key")
692 | # Fallback to symmetric key for HS256
693 | OAUTH_SECRET_KEY = os.getenv('MCP_OAUTH_SECRET_KEY')
694 | if not OAUTH_SECRET_KEY:
695 | OAUTH_SECRET_KEY = secrets.token_urlsafe(32)
696 | logger.info("Generated random OAuth secret key (set MCP_OAUTH_SECRET_KEY for persistence)")
697 | OAUTH_PRIVATE_KEY = None
698 | OAUTH_PUBLIC_KEY = None
699 |
700 | # JWT algorithm and key helper functions
701 | def get_jwt_algorithm() -> str:
702 | """Get the JWT algorithm to use based on available keys."""
703 | return "RS256" if OAUTH_PRIVATE_KEY and OAUTH_PUBLIC_KEY else "HS256"
704 |
705 | def get_jwt_signing_key() -> str:
706 | """Get the appropriate key for JWT signing."""
707 | if OAUTH_PRIVATE_KEY and OAUTH_PUBLIC_KEY:
708 | return OAUTH_PRIVATE_KEY
709 | elif hasattr(globals(), 'OAUTH_SECRET_KEY'):
710 | return OAUTH_SECRET_KEY
711 | else:
712 | raise ValueError("No JWT signing key available")
713 |
714 | def get_jwt_verification_key() -> str:
715 | """Get the appropriate key for JWT verification."""
716 | if OAUTH_PRIVATE_KEY and OAUTH_PUBLIC_KEY:
717 | return OAUTH_PUBLIC_KEY
718 | elif hasattr(globals(), 'OAUTH_SECRET_KEY'):
719 | return OAUTH_SECRET_KEY
720 | else:
721 | raise ValueError("No JWT verification key available")
722 |
723 | def validate_oauth_configuration() -> None:
724 | """
725 | Validate OAuth configuration at startup.
726 |
727 | Raises:
728 | ValueError: If OAuth configuration is invalid
729 | """
730 | if not OAUTH_ENABLED:
731 | logger.info("OAuth validation skipped: OAuth disabled")
732 | return
733 |
734 | errors = []
735 | warnings = []
736 |
737 | # Validate issuer URL
738 | if not OAUTH_ISSUER:
739 | errors.append("OAuth issuer URL is not configured")
740 | elif not OAUTH_ISSUER.startswith(('http://', 'https://')):
741 | errors.append(f"OAuth issuer URL must start with http:// or https://: {OAUTH_ISSUER}")
742 |
743 | # Validate JWT configuration
744 | try:
745 | algorithm = get_jwt_algorithm()
746 | logger.debug(f"OAuth JWT algorithm validation: {algorithm}")
747 |
748 | # Test key access
749 | signing_key = get_jwt_signing_key()
750 | verification_key = get_jwt_verification_key()
751 |
752 | if algorithm == "RS256":
753 | if not OAUTH_PRIVATE_KEY or not OAUTH_PUBLIC_KEY:
754 | errors.append("RS256 algorithm selected but RSA keys are missing")
755 | elif len(signing_key) < 100: # Basic length check for PEM format
756 | warnings.append("RSA private key appears to be too short")
757 | elif algorithm == "HS256":
758 | if not hasattr(globals(), 'OAUTH_SECRET_KEY') or not OAUTH_SECRET_KEY:
759 | errors.append("HS256 algorithm selected but secret key is missing")
760 | elif len(signing_key) < 32: # Basic length check for symmetric key
761 | warnings.append("OAuth secret key is shorter than recommended (32+ characters)")
762 |
763 | except Exception as e:
764 | errors.append(f"JWT configuration error: {e}")
765 |
766 | # Validate token expiry settings
767 | if OAUTH_ACCESS_TOKEN_EXPIRE_MINUTES <= 0:
768 | errors.append(f"OAuth access token expiry must be positive: {OAUTH_ACCESS_TOKEN_EXPIRE_MINUTES}")
769 | elif OAUTH_ACCESS_TOKEN_EXPIRE_MINUTES > 1440: # 24 hours
770 | warnings.append(f"OAuth access token expiry is very long: {OAUTH_ACCESS_TOKEN_EXPIRE_MINUTES} minutes")
771 |
772 | if OAUTH_AUTHORIZATION_CODE_EXPIRE_MINUTES <= 0:
773 | errors.append(f"OAuth authorization code expiry must be positive: {OAUTH_AUTHORIZATION_CODE_EXPIRE_MINUTES}")
774 | elif OAUTH_AUTHORIZATION_CODE_EXPIRE_MINUTES > 60: # 1 hour
775 | warnings.append(f"OAuth authorization code expiry is longer than recommended: {OAUTH_AUTHORIZATION_CODE_EXPIRE_MINUTES} minutes")
776 |
777 | # Validate security settings
778 | if "localhost" in OAUTH_ISSUER or "127.0.0.1" in OAUTH_ISSUER:
779 | if not os.getenv('MCP_OAUTH_ISSUER'):
780 | warnings.append("OAuth issuer contains localhost/127.0.0.1. For production, set MCP_OAUTH_ISSUER to external URL")
781 |
782 | # Check for production readiness
783 | if ALLOW_ANONYMOUS_ACCESS:
784 | warnings.append("Anonymous access is enabled - consider disabling for production")
785 |
786 | # Check for insecure transport in production
787 | if OAUTH_ISSUER.startswith('http://') and not ("localhost" in OAUTH_ISSUER or "127.0.0.1" in OAUTH_ISSUER):
788 | warnings.append("OAuth issuer uses HTTP (non-encrypted) transport - use HTTPS for production")
789 |
790 | # Check for weak algorithm in production environments
791 | if get_jwt_algorithm() == "HS256" and not os.getenv('MCP_OAUTH_SECRET_KEY'):
792 | warnings.append("Using auto-generated HS256 secret key - set MCP_OAUTH_SECRET_KEY for production")
793 |
794 | # Log validation results
795 | if errors:
796 | error_msg = "OAuth configuration validation failed:\n" + "\n".join(f" - {err}" for err in errors)
797 | logger.error(error_msg)
798 | raise ValueError(f"Invalid OAuth configuration: {'; '.join(errors)}")
799 |
800 | if warnings:
801 | warning_msg = "OAuth configuration warnings:\n" + "\n".join(f" - {warn}" for warn in warnings)
802 | logger.warning(warning_msg)
803 |
804 | logger.info("OAuth configuration validation successful")
805 |
806 | # OAuth server configuration
807 | def get_oauth_issuer() -> str:
808 | """
809 | Get the OAuth issuer URL based on server configuration.
810 |
811 | For reverse proxy deployments, set MCP_OAUTH_ISSUER environment variable
812 | to override auto-detection (e.g., "https://api.example.com").
813 |
814 | This ensures OAuth discovery endpoints return the correct external URLs
815 | that clients can actually reach, rather than internal server addresses.
816 | """
817 | scheme = "https" if HTTPS_ENABLED else "http"
818 | host = "localhost" if HTTP_HOST == "0.0.0.0" else HTTP_HOST
819 |
820 | # Only include port if it's not the standard port for the scheme
821 | if (scheme == "https" and HTTP_PORT != 443) or (scheme == "http" and HTTP_PORT != 80):
822 | return f"{scheme}://{host}:{HTTP_PORT}"
823 | else:
824 | return f"{scheme}://{host}"
825 |
826 | # OAuth issuer URL - CRITICAL for reverse proxy deployments
827 | # Production: Set MCP_OAUTH_ISSUER to external URL (e.g., "https://api.example.com")
828 | # Development: Auto-detects from server configuration
829 | OAUTH_ISSUER = os.getenv('MCP_OAUTH_ISSUER') or get_oauth_issuer()
830 |
831 | # OAuth token configuration
832 | OAUTH_ACCESS_TOKEN_EXPIRE_MINUTES = safe_get_int_env('MCP_OAUTH_ACCESS_TOKEN_EXPIRE_MINUTES', 60, min_value=1, max_value=1440) # 1 minute to 24 hours
833 | OAUTH_AUTHORIZATION_CODE_EXPIRE_MINUTES = safe_get_int_env('MCP_OAUTH_AUTHORIZATION_CODE_EXPIRE_MINUTES', 10, min_value=1, max_value=60) # 1 minute to 1 hour
834 |
835 | # OAuth security configuration
836 | ALLOW_ANONYMOUS_ACCESS = safe_get_bool_env('MCP_ALLOW_ANONYMOUS_ACCESS', False)
837 |
838 | logger.info(f"OAuth enabled: {OAUTH_ENABLED}")
839 | if OAUTH_ENABLED:
840 | logger.info(f"OAuth issuer: {OAUTH_ISSUER}")
841 | logger.info(f"OAuth JWT algorithm: {get_jwt_algorithm()}")
842 | logger.info(f"OAuth access token expiry: {OAUTH_ACCESS_TOKEN_EXPIRE_MINUTES} minutes")
843 | logger.info(f"Anonymous access allowed: {ALLOW_ANONYMOUS_ACCESS}")
844 |
845 | # Warn about potential reverse proxy configuration issues
846 | if not os.getenv('MCP_OAUTH_ISSUER') and ("localhost" in OAUTH_ISSUER or "127.0.0.1" in OAUTH_ISSUER):
847 | logger.warning(
848 | "OAuth issuer contains localhost/127.0.0.1. For reverse proxy deployments, "
849 | "set MCP_OAUTH_ISSUER to the external URL (e.g., 'https://api.example.com')"
850 | )
851 |
852 | # Validate OAuth configuration at startup
853 | try:
854 | validate_oauth_configuration()
855 | except ValueError as e:
856 | logger.error(f"OAuth configuration validation failed: {e}")
857 | raise
858 |
```
--------------------------------------------------------------------------------
/docs/research/code-execution-interface-implementation.md:
--------------------------------------------------------------------------------
```markdown
1 | # Code Execution Interface Implementation Research
2 | ## Issue #206: 90-95% Token Reduction Strategy
3 |
4 | **Research Date:** November 6, 2025
5 | **Target:** Implement Python code API for mcp-memory-service to reduce token consumption by 90-95%
6 | **Current Status:** Research & Architecture Phase
7 |
8 | ---
9 |
10 | ## Executive Summary
11 |
12 | This document provides comprehensive research findings and implementation recommendations for transitioning mcp-memory-service from tool-based MCP interactions to a direct code execution interface. Based on industry best practices, real-world examples, and analysis of current codebase architecture, this research identifies concrete strategies to achieve the target 90-95% token reduction.
13 |
14 | ### Key Findings
15 |
16 | 1. **Token Reduction Potential Validated**: Research confirms 75-90% reductions are achievable through code execution interfaces
17 | 2. **Industry Momentum**: Anthropic's November 2025 announcement of MCP code execution aligns with our proposal
18 | 3. **Proven Patterns**: Multiple successful implementations exist (python-interpreter MCP, CodeAgents framework)
19 | 4. **Architecture Ready**: Current codebase structure well-positioned for gradual migration
20 |
21 | ---
22 |
23 | ## 1. Current State Analysis
24 |
25 | ### Token Consumption Breakdown
26 |
27 | **Current Architecture:**
28 | - **33 MCP tools** generating ~4,125 tokens per interaction
29 | - **Document ingestion:** 57,400 tokens for 50 PDFs
30 | - **Session hooks:** 3,600-9,600 tokens per session start
31 | - **Tool definitions:** Loaded upfront into context window
32 |
33 | **Example: Session-Start Hook**
34 | ```javascript
35 | // Current approach: MCP tool invocation
36 | // Each tool call includes full schema (~125 tokens/tool)
37 | await memoryClient.callTool('retrieve_memory', {
38 | query: gitContext.query,
39 | limit: 8,
40 | similarity_threshold: 0.6
41 | });
42 |
43 | // Result includes full Memory objects with all fields:
44 | // - content, content_hash, tags, memory_type, metadata
45 | // - embedding (768 floats for all-MiniLM-L6-v2)
46 | // - created_at, created_at_iso, updated_at, updated_at_iso
47 | // - Total: ~500-800 tokens per memory
48 | ```
49 |
50 | ### Codebase Architecture Analysis
51 |
52 | **Strengths:**
53 | - ✅ Clean separation of concerns (storage/models/web layers)
54 | - ✅ Abstract base class (`MemoryStorage`) for consistent interface
55 | - ✅ Async/await throughout for performance
56 | - ✅ Strong type hints (Python 3.10+)
57 | - ✅ Multiple storage backends (SQLite-Vec, Cloudflare, Hybrid)
58 | - ✅ Existing HTTP client (`HTTPClientStorage`) demonstrates remote access pattern
59 |
60 | **Current Entry Points:**
61 | ```python
62 | # Public API (src/mcp_memory_service/__init__.py)
63 | __all__ = [
64 | 'Memory',
65 | 'MemoryQueryResult',
66 | 'MemoryStorage',
67 | 'generate_content_hash'
68 | ]
69 | ```
70 |
71 | **Infrastructure Files:**
72 | - `src/mcp_memory_service/server.py` - 3,721 lines (MCP server implementation)
73 | - `src/mcp_memory_service/storage/base.py` - Abstract interface with 20+ methods
74 | - `src/mcp_memory_service/models/memory.py` - Memory data model with timestamp handling
75 | - `src/mcp_memory_service/web/api/mcp.py` - MCP protocol endpoints
76 |
77 | ---
78 |
79 | ## 2. Best Practices from Research
80 |
81 | ### 2.1 Token-Efficient API Design
82 |
83 | **Key Principles from CodeAgents Framework:**
84 |
85 | 1. **Codified Structures Over Natural Language**
86 | - Use pseudocode/typed structures instead of verbose descriptions
87 | - Control structures (loops, conditionals) reduce repeated instructions
88 | - Typed variables eliminate ambiguity and error-prone parsing
89 |
90 | 2. **Modular Subroutines**
91 | - Encapsulate common patterns in reusable functions
92 | - Single import replaces repeated tool definitions
93 | - Function signatures convey requirements compactly
94 |
95 | 3. **Compact Result Types**
96 | - Return only essential data fields
97 | - Use structured types (namedtuple, TypedDict) for clarity
98 | - Avoid redundant metadata in response payloads
99 |
100 | **Anthropic's MCP Code Execution Approach (Nov 2025):**
101 |
102 | ```python
103 | # Before: Tool invocation (125 tokens for schema + 500 tokens for result)
104 | result = await call_tool("retrieve_memory", {
105 | "query": "recent architecture decisions",
106 | "limit": 5,
107 | "similarity_threshold": 0.7
108 | })
109 |
110 | # After: Code execution (5 tokens import + 20 tokens call)
111 | from mcp_memory_service.api import search
112 | results = search("recent architecture decisions", limit=5)
113 | ```
114 |
115 | ### 2.2 Python Data Structure Performance
116 |
117 | **Benchmark Results (from research):**
118 |
119 | | Structure | Creation Speed | Access Speed | Memory | Immutability | Type Safety |
120 | |-----------|---------------|--------------|---------|--------------|-------------|
121 | | `dict` | Fastest | Fast | High | No | Runtime only |
122 | | `dataclass` | 8% faster than NamedTuple | Fast | Medium (with `__slots__`) | Optional | Static + Runtime |
123 | | `NamedTuple` | Fast | Fastest (C-based) | Low | Yes | Static + Runtime |
124 | | `TypedDict` | Same as dict | Same as dict | High | No | Static only |
125 |
126 | **Recommendation for Token Efficiency:**
127 |
128 | ```python
129 | from typing import NamedTuple
130 |
131 | class CompactMemory(NamedTuple):
132 | """Minimal memory representation for hooks (50-80 tokens vs 500-800)."""
133 | hash: str # 8 chars
134 | content: str # First 200 chars
135 | tags: tuple[str] # Tag list
136 | created: float # Unix timestamp
137 | score: float # Relevance score
138 | ```
139 |
140 | **Benefits:**
141 | - ✅ **60-90% size reduction**: Essential fields only
142 | - ✅ **Immutable**: Safer for concurrent access in hooks
143 | - ✅ **Type-safe**: Static checking with mypy/pyright
144 | - ✅ **Fast**: C-based tuple operations
145 | - ✅ **Readable**: Named field access (`memory.hash` not `memory[0]`)
146 |
147 | ### 2.3 Migration Strategy Best Practices
148 |
149 | **Lessons from Python 2→3 Migrations:**
150 |
151 | 1. **Compatibility Layers Work**
152 | - `python-future` provided seamless 2.6/2.7/3.3+ compatibility
153 | - Gradual migration reduced risk and allowed testing
154 | - Tool-based automation (futurize) caught 65-80% of changes
155 |
156 | 2. **Feature Flags Enable Rollback**
157 | - Dual implementations run side-by-side during transition
158 | - Environment variable switches between old/new paths
159 | - Observability metrics validate equivalence
160 |
161 | 3. **Incremental Adoption**
162 | - Start with low-risk, high-value targets (session hooks)
163 | - Gather metrics before expanding scope
164 | - Maintain backward compatibility throughout
165 |
166 | ---
167 |
168 | ## 3. Architecture Recommendations
169 |
170 | ### 3.1 Filesystem Structure
171 |
172 | ```
173 | src/mcp_memory_service/
174 | ├── api/ # NEW: Code execution interface
175 | │ ├── __init__.py # Public API exports
176 | │ ├── compact.py # Compact result types
177 | │ ├── search.py # Search operations
178 | │ ├── storage.py # Storage operations
179 | │ └── utils.py # Helper functions
180 | ├── models/
181 | │ ├── memory.py # EXISTING: Full Memory model
182 | │ └── compact.py # NEW: CompactMemory types
183 | ├── storage/
184 | │ ├── base.py # EXISTING: Abstract interface
185 | │ ├── sqlite_vec.py # EXISTING: SQLite backend
186 | │ └── ...
187 | └── server.py # EXISTING: MCP server (keep for compatibility)
188 | ```
189 |
190 | ### 3.2 Compact Result Types
191 |
192 | **Design Principles:**
193 | 1. Return minimal data for common use cases
194 | 2. Provide "expand" functions for full details when needed
195 | 3. Use immutable types (NamedTuple) for safety
196 |
197 | **Implementation:**
198 |
199 | ```python
200 | # src/mcp_memory_service/models/compact.py
201 | from typing import NamedTuple, Optional
202 |
203 | class CompactMemory(NamedTuple):
204 | """Minimal memory for efficient token usage (~80 tokens vs ~600)."""
205 | hash: str # Content hash (8 chars)
206 | preview: str # First 200 chars of content
207 | tags: tuple[str, ...] # Immutable tag tuple
208 | created: float # Unix timestamp
209 | score: float = 0.0 # Relevance score
210 |
211 | class CompactSearchResult(NamedTuple):
212 | """Search result with minimal overhead."""
213 | memories: tuple[CompactMemory, ...]
214 | total: int
215 | query: str
216 |
217 | def __repr__(self) -> str:
218 | """Compact string representation."""
219 | return f"SearchResult(found={self.total}, shown={len(self.memories)})"
220 |
221 | class CompactStorageInfo(NamedTuple):
222 | """Health check result (~20 tokens vs ~100)."""
223 | backend: str # 'sqlite_vec' | 'cloudflare' | 'hybrid'
224 | count: int # Total memories
225 | ready: bool # Service operational
226 | ```
227 |
228 | **Token Comparison:**
229 |
230 | ```python
231 | # Full Memory object (current):
232 | {
233 | "content": "Long text...", # ~400 tokens
234 | "content_hash": "abc123...", # ~10 tokens
235 | "tags": ["tag1", "tag2"], # ~15 tokens
236 | "memory_type": "note", # ~5 tokens
237 | "metadata": {...}, # ~50 tokens
238 | "embedding": [0.1, 0.2, ...], # ~300 tokens (768 dims)
239 | "created_at": 1730928000.0, # ~8 tokens
240 | "created_at_iso": "2025-11-06...", # ~12 tokens
241 | "updated_at": 1730928000.0, # ~8 tokens
242 | "updated_at_iso": "2025-11-06..." # ~12 tokens
243 | }
244 | # Total: ~820 tokens per memory
245 |
246 | # CompactMemory (proposed):
247 | CompactMemory(
248 | hash='abc123', # ~5 tokens
249 | preview='Long text...'[:200], # ~50 tokens
250 | tags=('tag1', 'tag2'), # ~10 tokens
251 | created=1730928000.0, # ~5 tokens
252 | score=0.85 # ~3 tokens
253 | )
254 | # Total: ~73 tokens per memory (91% reduction!)
255 | ```
256 |
257 | ### 3.3 Core API Functions
258 |
259 | **Design Goals:**
260 | - Single import statement replaces tool definitions
261 | - Type hints provide inline documentation
262 | - Sync wrappers for non-async contexts (hooks)
263 | - Automatic connection management
264 |
265 | **Implementation:**
266 |
267 | ```python
268 | # src/mcp_memory_service/api/__init__.py
269 | """
270 | Code execution API for mcp-memory-service.
271 |
272 | This module provides a lightweight, token-efficient interface
273 | for direct Python code execution, replacing MCP tool calls.
274 |
275 | Token Efficiency:
276 | - Import: ~10 tokens (once per session)
277 | - Function call: ~5-20 tokens (vs 125+ for MCP tools)
278 | - Results: 73-200 tokens (vs 500-800 for full Memory objects)
279 |
280 | Example:
281 | from mcp_memory_service.api import search, store, health
282 |
283 | # Search (20 tokens vs 625 tokens for MCP)
284 | results = search("architecture decisions", limit=5)
285 | for m in results.memories:
286 | print(f"{m.hash}: {m.preview}")
287 |
288 | # Store (15 tokens vs 150 tokens for MCP)
289 | store("New memory", tags=['note', 'important'])
290 |
291 | # Health (5 tokens vs 125 tokens for MCP)
292 | info = health()
293 | print(f"Backend: {info.backend}, Count: {info.count}")
294 | """
295 |
296 | from typing import Optional, Union
297 | from .compact import CompactMemory, CompactSearchResult, CompactStorageInfo
298 | from .search import search, search_by_tag, recall
299 | from .storage import store, delete, update
300 | from .utils import health, expand_memory
301 |
302 | __all__ = [
303 | # Search operations
304 | 'search', # Semantic search with compact results
305 | 'search_by_tag', # Tag-based search
306 | 'recall', # Time-based natural language search
307 |
308 | # Storage operations
309 | 'store', # Store new memory
310 | 'delete', # Delete by hash
311 | 'update', # Update metadata
312 |
313 | # Utilities
314 | 'health', # Service health check
315 | 'expand_memory', # Get full Memory from hash
316 |
317 | # Types
318 | 'CompactMemory',
319 | 'CompactSearchResult',
320 | 'CompactStorageInfo',
321 | ]
322 |
323 | # Version for API compatibility tracking
324 | __api_version__ = "1.0.0"
325 | ```
326 |
327 | ```python
328 | # src/mcp_memory_service/api/search.py
329 | """Search operations with compact results."""
330 |
331 | import asyncio
332 | from typing import Optional, Union
333 | from ..storage.factory import create_storage_backend
334 | from ..models.compact import CompactMemory, CompactSearchResult
335 |
336 | # Thread-local storage for connection reuse
337 | _storage_instance = None
338 |
339 | def _get_storage():
340 | """Get or create storage backend instance."""
341 | global _storage_instance
342 | if _storage_instance is None:
343 | _storage_instance = create_storage_backend()
344 | # Initialize in sync context (run once)
345 | asyncio.run(_storage_instance.initialize())
346 | return _storage_instance
347 |
348 | def search(
349 | query: str,
350 | limit: int = 5,
351 | threshold: float = 0.0
352 | ) -> CompactSearchResult:
353 | """
354 | Search memories using semantic similarity.
355 |
356 | Token efficiency: ~25 tokens (query + params + results)
357 | vs ~625 tokens for MCP tool call with full Memory objects.
358 |
359 | Args:
360 | query: Search query text
361 | limit: Maximum results to return (default: 5)
362 | threshold: Minimum similarity score 0.0-1.0 (default: 0.0)
363 |
364 | Returns:
365 | CompactSearchResult with minimal memory representations
366 |
367 | Example:
368 | >>> results = search("recent architecture changes", limit=3)
369 | >>> print(results)
370 | SearchResult(found=3, shown=3)
371 | >>> for m in results.memories:
372 | ... print(f"{m.hash}: {m.preview[:50]}...")
373 | """
374 | storage = _get_storage()
375 |
376 | # Run async operation in sync context
377 | async def _search():
378 | query_results = await storage.retrieve(query, n_results=limit)
379 |
380 | # Convert to compact format
381 | compact = [
382 | CompactMemory(
383 | hash=r.memory.content_hash[:8], # 8 char hash
384 | preview=r.memory.content[:200], # First 200 chars
385 | tags=tuple(r.memory.tags), # Immutable tuple
386 | created=r.memory.created_at,
387 | score=r.relevance_score
388 | )
389 | for r in query_results
390 | if r.relevance_score >= threshold
391 | ]
392 |
393 | return CompactSearchResult(
394 | memories=tuple(compact),
395 | total=len(compact),
396 | query=query
397 | )
398 |
399 | return asyncio.run(_search())
400 |
401 | def search_by_tag(
402 | tags: Union[str, list[str]],
403 | limit: Optional[int] = None
404 | ) -> CompactSearchResult:
405 | """
406 | Search memories by tags.
407 |
408 | Args:
409 | tags: Single tag or list of tags
410 | limit: Maximum results (None for all)
411 |
412 | Returns:
413 | CompactSearchResult with matching memories
414 | """
415 | storage = _get_storage()
416 | tag_list = [tags] if isinstance(tags, str) else tags
417 |
418 | async def _search():
419 | memories = await storage.search_by_tag(tag_list)
420 | if limit:
421 | memories = memories[:limit]
422 |
423 | compact = [
424 | CompactMemory(
425 | hash=m.content_hash[:8],
426 | preview=m.content[:200],
427 | tags=tuple(m.tags),
428 | created=m.created_at,
429 | score=1.0 # Tag match = perfect relevance
430 | )
431 | for m in memories
432 | ]
433 |
434 | return CompactSearchResult(
435 | memories=tuple(compact),
436 | total=len(compact),
437 | query=f"tags:{','.join(tag_list)}"
438 | )
439 |
440 | return asyncio.run(_search())
441 |
442 | def recall(query: str, n_results: int = 5) -> CompactSearchResult:
443 | """
444 | Retrieve memories using natural language time expressions.
445 |
446 | Examples:
447 | - "last week"
448 | - "yesterday afternoon"
449 | - "this month"
450 | - "2 days ago"
451 |
452 | Args:
453 | query: Natural language time query
454 | n_results: Maximum results to return
455 |
456 | Returns:
457 | CompactSearchResult with time-filtered memories
458 | """
459 | storage = _get_storage()
460 |
461 | async def _recall():
462 | memories = await storage.recall_memory(query, n_results)
463 |
464 | compact = [
465 | CompactMemory(
466 | hash=m.content_hash[:8],
467 | preview=m.content[:200],
468 | tags=tuple(m.tags),
469 | created=m.created_at,
470 | score=1.0
471 | )
472 | for m in memories
473 | ]
474 |
475 | return CompactSearchResult(
476 | memories=tuple(compact),
477 | total=len(compact),
478 | query=query
479 | )
480 |
481 | return asyncio.run(_recall())
482 | ```
483 |
484 | ```python
485 | # src/mcp_memory_service/api/storage.py
486 | """Storage operations (store, delete, update)."""
487 |
488 | import asyncio
489 | from typing import Optional, Union
490 | from ..models.memory import Memory
491 | from ..utils.hashing import generate_content_hash
492 | from .search import _get_storage
493 |
494 | def store(
495 | content: str,
496 | tags: Optional[Union[str, list[str]]] = None,
497 | memory_type: Optional[str] = None,
498 | metadata: Optional[dict] = None
499 | ) -> str:
500 | """
501 | Store a new memory.
502 |
503 | Token efficiency: ~15 tokens (params only)
504 | vs ~150 tokens for MCP tool call with schema.
505 |
506 | Args:
507 | content: Memory content text
508 | tags: Single tag or list of tags
509 | memory_type: Memory type classification
510 | metadata: Additional metadata dictionary
511 |
512 | Returns:
513 | Content hash of stored memory (8 chars)
514 |
515 | Example:
516 | >>> hash = store("Important decision", tags=['architecture', 'decision'])
517 | >>> print(f"Stored: {hash}")
518 | Stored: abc12345
519 | """
520 | storage = _get_storage()
521 |
522 | # Normalize tags
523 | if isinstance(tags, str):
524 | tag_list = [tags]
525 | elif tags is None:
526 | tag_list = []
527 | else:
528 | tag_list = list(tags)
529 |
530 | # Create memory object
531 | content_hash = generate_content_hash(content)
532 | memory = Memory(
533 | content=content,
534 | content_hash=content_hash,
535 | tags=tag_list,
536 | memory_type=memory_type,
537 | metadata=metadata or {}
538 | )
539 |
540 | # Store
541 | async def _store():
542 | success, message = await storage.store(memory)
543 | if not success:
544 | raise RuntimeError(f"Failed to store memory: {message}")
545 | return content_hash[:8]
546 |
547 | return asyncio.run(_store())
548 |
549 | def delete(hash: str) -> bool:
550 | """
551 | Delete a memory by its content hash.
552 |
553 | Args:
554 | hash: Content hash (8+ characters)
555 |
556 | Returns:
557 | True if deleted, False if not found
558 | """
559 | storage = _get_storage()
560 |
561 | async def _delete():
562 | # If short hash provided, expand to full hash
563 | if len(hash) == 8:
564 | # Get full hash from short form (requires index lookup)
565 | memories = await storage.get_recent_memories(n=10000)
566 | full_hash = next(
567 | (m.content_hash for m in memories if m.content_hash.startswith(hash)),
568 | hash
569 | )
570 | else:
571 | full_hash = hash
572 |
573 | success, _ = await storage.delete(full_hash)
574 | return success
575 |
576 | return asyncio.run(_delete())
577 |
578 | def update(
579 | hash: str,
580 | tags: Optional[list[str]] = None,
581 | memory_type: Optional[str] = None,
582 | metadata: Optional[dict] = None
583 | ) -> bool:
584 | """
585 | Update memory metadata.
586 |
587 | Args:
588 | hash: Content hash (8+ characters)
589 | tags: New tags (replaces existing)
590 | memory_type: New memory type
591 | metadata: New metadata (merges with existing)
592 |
593 | Returns:
594 | True if updated successfully
595 | """
596 | storage = _get_storage()
597 |
598 | updates = {}
599 | if tags is not None:
600 | updates['tags'] = tags
601 | if memory_type is not None:
602 | updates['memory_type'] = memory_type
603 | if metadata is not None:
604 | updates['metadata'] = metadata
605 |
606 | async def _update():
607 | success, _ = await storage.update_memory_metadata(hash, updates)
608 | return success
609 |
610 | return asyncio.run(_update())
611 | ```
612 |
613 | ```python
614 | # src/mcp_memory_service/api/utils.py
615 | """Utility functions for API."""
616 |
617 | import asyncio
618 | from typing import Optional
619 | from ..models.memory import Memory
620 | from ..models.compact import CompactStorageInfo
621 | from .search import _get_storage
622 |
623 | def health() -> CompactStorageInfo:
624 | """
625 | Get service health and status.
626 |
627 | Token efficiency: ~20 tokens
628 | vs ~125 tokens for MCP health check tool.
629 |
630 | Returns:
631 | CompactStorageInfo with backend, count, and ready status
632 |
633 | Example:
634 | >>> info = health()
635 | >>> print(f"Using {info.backend}, {info.count} memories")
636 | Using sqlite_vec, 1247 memories
637 | """
638 | storage = _get_storage()
639 |
640 | async def _health():
641 | stats = await storage.get_stats()
642 | return CompactStorageInfo(
643 | backend=stats.get('storage_backend', 'unknown'),
644 | count=stats.get('total_memories', 0),
645 | ready=stats.get('status', 'unknown') == 'operational'
646 | )
647 |
648 | return asyncio.run(_health())
649 |
650 | def expand_memory(hash: str) -> Optional[Memory]:
651 | """
652 | Get full Memory object from compact hash.
653 |
654 | Use when you need complete memory details (content, embedding, etc.)
655 | after working with compact results.
656 |
657 | Args:
658 | hash: Content hash (8+ characters)
659 |
660 | Returns:
661 | Full Memory object or None if not found
662 |
663 | Example:
664 | >>> results = search("architecture", limit=5)
665 | >>> full = expand_memory(results.memories[0].hash)
666 | >>> print(full.content) # Complete content, not preview
667 | """
668 | storage = _get_storage()
669 |
670 | async def _expand():
671 | # Handle short hash
672 | if len(hash) == 8:
673 | memories = await storage.get_recent_memories(n=10000)
674 | full_hash = next(
675 | (m.content_hash for m in memories if m.content_hash.startswith(hash)),
676 | None
677 | )
678 | if full_hash is None:
679 | return None
680 | else:
681 | full_hash = hash
682 |
683 | return await storage.get_by_hash(full_hash)
684 |
685 | return asyncio.run(_expand())
686 | ```
687 |
688 | ### 3.4 Hook Integration Pattern
689 |
690 | **Before (MCP Tool Invocation):**
691 |
692 | ```javascript
693 | // ~/.claude/hooks/core/session-start.js
694 | const { MemoryClient } = require('../utilities/memory-client');
695 |
696 | async function retrieveMemories(gitContext) {
697 | const memoryClient = new MemoryClient(config);
698 |
699 | // MCP tool call: ~625 tokens (tool def + result)
700 | const result = await memoryClient.callTool('retrieve_memory', {
701 | query: gitContext.query,
702 | limit: 8,
703 | similarity_threshold: 0.6
704 | });
705 |
706 | // Result parsing adds more tokens
707 | const memories = parseToolResult(result);
708 | return memories; // 8 full Memory objects = ~6,400 tokens
709 | }
710 | ```
711 |
712 | **After (Code Execution):**
713 |
714 | ```javascript
715 | // ~/.claude/hooks/core/session-start.js
716 | const { execSync } = require('child_process');
717 |
718 | async function retrieveMemories(gitContext) {
719 | // Execute Python code directly: ~25 tokens total
720 | const pythonCode = `
721 | from mcp_memory_service.api import search
722 | results = search("${gitContext.query}", limit=8)
723 | for m in results.memories:
724 | print(f"{m.hash}|{m.preview}|{','.join(m.tags)}|{m.created}")
725 | `;
726 |
727 | const output = execSync(`python -c "${pythonCode}"`, {
728 | encoding: 'utf8',
729 | timeout: 5000
730 | });
731 |
732 | // Parse compact results (8 memories = ~600 tokens total)
733 | const memories = output.trim().split('\n').map(line => {
734 | const [hash, preview, tags, created] = line.split('|');
735 | return { hash, preview, tags: tags.split(','), created: parseFloat(created) };
736 | });
737 |
738 | return memories; // 90% token reduction: 6,400 → 600 tokens
739 | }
740 | ```
741 |
742 | ---
743 |
744 | ## 4. Implementation Examples from Similar Projects
745 |
746 | ### 4.1 MCP Python Interpreter (Nov 2024)
747 |
748 | **Key Features:**
749 | - Sandboxed code execution in isolated directories
750 | - File read/write capabilities through code
751 | - Iterative error correction (write → run → fix → repeat)
752 |
753 | **Relevant Patterns:**
754 | ```python
755 | # Tool exposure as filesystem
756 | # Instead of: 33 tool definitions in context
757 | # Use: import from known locations
758 |
759 | from mcp_memory_service.api import search, store, health
760 |
761 | # LLM can discover functions via IDE-like introspection
762 | help(search) # Returns compact docstring
763 | ```
764 |
765 | ### 4.2 CodeAgents Framework
766 |
767 | **Token Efficiency Techniques:**
768 | 1. **Typed Variables**: `memories: list[CompactMemory]` (10 tokens) vs "a list of memory objects with content and metadata" (15+ tokens)
769 | 2. **Control Structures**: `for m in memories if m.score > 0.7` (12 tokens) vs calling filter tool (125+ tokens)
770 | 3. **Reusable Subroutines**: Single function encapsulates common pattern
771 |
772 | **Application to Memory Service:**
773 | ```python
774 | # Compact search and filter in code (30 tokens total)
775 | from mcp_memory_service.api import search
776 |
777 | results = search("architecture", limit=20)
778 | relevant = [m for m in results.memories if 'decision' in m.tags and m.score > 0.7]
779 | print(f"Found {len(relevant)} relevant memories")
780 |
781 | # vs MCP tools (625+ tokens)
782 | # 1. retrieve_memory tool call (125 tokens)
783 | # 2. Full results parsing (400 tokens)
784 | # 3. search_by_tag tool call (125 tokens)
785 | # 4. Manual filtering logic in prompt (100+ tokens)
786 | ```
787 |
788 | ---
789 |
790 | ## 5. Potential Challenges and Mitigation Strategies
791 |
792 | ### Challenge 1: Async/Sync Context Mismatch
793 |
794 | **Problem:** Hooks run in Node.js (sync), storage backends use asyncio (async)
795 |
796 | **Mitigation:**
797 | ```python
798 | # Provide sync wrappers that handle async internally
799 | import asyncio
800 |
801 | def search(query: str, limit: int = 5):
802 | """Sync wrapper for async storage operations."""
803 | async def _search():
804 | storage = _get_storage()
805 | results = await storage.retrieve(query, limit)
806 | return _convert_to_compact(results)
807 |
808 | # Run in event loop
809 | return asyncio.run(_search())
810 | ```
811 |
812 | **Trade-offs:**
813 | - ✅ Simple API for hook developers
814 | - ✅ No async/await in JavaScript
815 | - ⚠️ Small overhead (~1-2ms) for event loop creation
816 | - ✅ Acceptable for hooks (not high-frequency calls)
817 |
818 | ### Challenge 2: Connection Management
819 |
820 | **Problem:** Multiple calls from hooks shouldn't create new connections each time
821 |
822 | **Mitigation:**
823 | ```python
824 | # Thread-local storage instance (reused across calls)
825 | _storage_instance = None
826 |
827 | def _get_storage():
828 | global _storage_instance
829 | if _storage_instance is None:
830 | _storage_instance = create_storage_backend()
831 | asyncio.run(_storage_instance.initialize())
832 | return _storage_instance
833 | ```
834 |
835 | **Benefits:**
836 | - ✅ Single connection per process
837 | - ✅ Automatic initialization on first use
838 | - ✅ No manual connection cleanup needed
839 |
840 | ### Challenge 3: Backward Compatibility
841 |
842 | **Problem:** Existing users rely on MCP tools, can't break them
843 |
844 | **Mitigation Strategy:**
845 | ```python
846 | # Phase 1: Add code execution API alongside MCP tools
847 | # Both interfaces work simultaneously
848 | # - MCP server (server.py) continues operating
849 | # - New api/ module available for direct import
850 | # - Users opt-in to new approach
851 |
852 | # Phase 2: Encourage migration with documentation
853 | # - Performance comparison benchmarks
854 | # - Token usage metrics
855 | # - Migration guide with examples
856 |
857 | # Phase 3 (Optional): Deprecation path
858 | # - Log warnings when MCP tools used
859 | # - Offer automatic migration scripts
860 | # - Eventually remove or maintain minimal MCP support
861 | ```
862 |
863 | **Migration Timeline:**
864 | ```
865 | Week 1-2: Core API implementation + tests
866 | Week 3: Session hook migration + validation
867 | Week 4-5: Search operation migration
868 | Week 6+: Optional optimizations + additional operations
869 | ```
870 |
871 | ### Challenge 4: Error Handling in Compact Mode
872 |
873 | **Problem:** Less context in compact results makes debugging harder
874 |
875 | **Mitigation:**
876 | ```python
877 | # Compact results for normal operation
878 | results = search("query", limit=5)
879 |
880 | # Expand individual memory for debugging
881 | if results.memories:
882 | full_memory = expand_memory(results.memories[0].hash)
883 | print(full_memory.content) # Complete content
884 | print(full_memory.metadata) # All metadata
885 |
886 | # Health check provides diagnostics
887 | info = health()
888 | if not info.ready:
889 | raise RuntimeError(f"Storage backend {info.backend} not ready")
890 | ```
891 |
892 | ### Challenge 5: Performance with Large Result Sets
893 |
894 | **Problem:** Converting 1000s of memories to compact format
895 |
896 | **Mitigation:**
897 | ```python
898 | # Lazy evaluation for large queries
899 | from typing import Iterator
900 |
901 | def search_iter(query: str, batch_size: int = 50) -> Iterator[CompactMemory]:
902 | """Streaming search results for large queries."""
903 | storage = _get_storage()
904 | offset = 0
905 |
906 | while True:
907 | batch = storage.retrieve(query, n_results=batch_size, offset=offset)
908 | if not batch:
909 | break
910 |
911 | for result in batch:
912 | yield CompactMemory(...)
913 |
914 | offset += batch_size
915 |
916 | # Use in hooks
917 | for memory in search_iter("query", batch_size=10):
918 | if some_condition(memory):
919 | break # Early termination saves processing
920 | ```
921 |
922 | ---
923 |
924 | ## 6. Recommended Tools and Libraries
925 |
926 | ### 6.1 Type Safety and Validation
927 |
928 | **Pydantic v2** (optional, for advanced use cases)
929 | ```python
930 | from pydantic import BaseModel, Field, field_validator
931 |
932 | class SearchParams(BaseModel):
933 | query: str = Field(min_length=1, max_length=1000)
934 | limit: int = Field(default=5, ge=1, le=100)
935 | threshold: float = Field(default=0.0, ge=0.0, le=1.0)
936 |
937 | @field_validator('query')
938 | def query_not_empty(cls, v):
939 | if not v.strip():
940 | raise ValueError('Query cannot be empty')
941 | return v
942 | ```
943 |
944 | **Benefits:**
945 | - ✅ Runtime validation with clear error messages
946 | - ✅ JSON schema generation for documentation
947 | - ✅ 25-50% overhead acceptable for API boundaries
948 | - ⚠️ Use NamedTuple for internal compact types (lighter weight)
949 |
950 | ### 6.2 Testing and Validation
951 |
952 | **pytest-asyncio** (already in use)
953 | ```python
954 | # tests/api/test_search.py
955 | import pytest
956 | from mcp_memory_service.api import search, store, health
957 |
958 | def test_search_returns_compact_results():
959 | """Verify search returns CompactSearchResult."""
960 | results = search("test query", limit=3)
961 |
962 | assert results.total >= 0
963 | assert len(results.memories) <= 3
964 | assert all(isinstance(m.hash, str) for m in results.memories)
965 | assert all(len(m.hash) == 8 for m in results.memories)
966 |
967 | def test_token_efficiency():
968 | """Benchmark token usage vs MCP tools."""
969 | import tiktoken
970 | enc = tiktoken.encoding_for_model("gpt-4")
971 |
972 | # Compact API
973 | results = search("architecture", limit=5)
974 | compact_repr = str(results.memories)
975 | compact_tokens = len(enc.encode(compact_repr))
976 |
977 | # Compare with full Memory objects
978 | from mcp_memory_service.storage import get_storage
979 | full_results = get_storage().retrieve("architecture", n_results=5)
980 | full_repr = str([r.memory.to_dict() for r in full_results])
981 | full_tokens = len(enc.encode(full_repr))
982 |
983 | reduction = (1 - compact_tokens / full_tokens) * 100
984 | assert reduction >= 85, f"Expected 85%+ reduction, got {reduction:.1f}%"
985 | ```
986 |
987 | ### 6.3 Documentation Generation
988 |
989 | **Sphinx with autodoc** (existing infrastructure)
990 | ```python
991 | # Docstrings optimized for both humans and LLMs
992 | def search(query: str, limit: int = 5) -> CompactSearchResult:
993 | """
994 | Search memories using semantic similarity.
995 |
996 | This function provides a token-efficient alternative to the
997 | retrieve_memory MCP tool, reducing token usage by ~90%.
998 |
999 | Token Cost Analysis:
1000 | - Function call: ~20 tokens (import + call)
1001 | - Results: ~73 tokens per memory
1002 | - Total for 5 results: ~385 tokens
1003 |
1004 | vs MCP Tool:
1005 | - Tool definition: ~125 tokens
1006 | - Full Memory results: ~500 tokens per memory
1007 | - Total for 5 results: ~2,625 tokens
1008 |
1009 | Reduction: 85% (2,625 → 385 tokens)
1010 |
1011 | Performance:
1012 | - Cold call: ~50ms (storage initialization)
1013 | - Warm call: ~5ms (connection reused)
1014 |
1015 | Args:
1016 | query: Search query text. Supports natural language.
1017 | Examples: "recent architecture decisions",
1018 | "authentication implementation notes"
1019 | limit: Maximum number of results to return.
1020 | Higher values increase token cost proportionally.
1021 | Recommended: 3-8 for hooks, 10-20 for interactive use.
1022 |
1023 | Returns:
1024 | CompactSearchResult containing:
1025 | - memories: Tuple of CompactMemory objects
1026 | - total: Number of results found
1027 | - query: Original query string
1028 |
1029 | Raises:
1030 | RuntimeError: If storage backend not initialized
1031 | ValueError: If query empty or limit invalid
1032 |
1033 | Example:
1034 | >>> from mcp_memory_service.api import search
1035 | >>> results = search("authentication setup", limit=3)
1036 | >>> print(results)
1037 | SearchResult(found=3, shown=3)
1038 | >>> for m in results.memories:
1039 | ... print(f"{m.hash}: {m.preview[:50]}...")
1040 | abc12345: Implemented OAuth 2.1 authentication with...
1041 | def67890: Added JWT token validation middleware for...
1042 | ghi11121: Fixed authentication race condition in...
1043 |
1044 | See Also:
1045 | - search_by_tag: Filter by specific tags
1046 | - recall: Time-based natural language queries
1047 | - expand_memory: Get full Memory object from hash
1048 | """
1049 | ...
1050 | ```
1051 |
1052 | ### 6.4 Performance Monitoring
1053 |
1054 | **structlog** (lightweight, JSON-compatible)
1055 | ```python
1056 | import structlog
1057 |
1058 | logger = structlog.get_logger(__name__)
1059 |
1060 | def search(query: str, limit: int = 5):
1061 | with logger.contextualize(operation="search", query=query, limit=limit):
1062 | start = time.perf_counter()
1063 |
1064 | try:
1065 | results = _do_search(query, limit)
1066 | duration_ms = (time.perf_counter() - start) * 1000
1067 |
1068 | logger.info(
1069 | "search_completed",
1070 | duration_ms=duration_ms,
1071 | results_count=len(results.memories),
1072 | token_estimate=len(results.memories) * 73 # Compact token estimate
1073 | )
1074 |
1075 | return results
1076 | except Exception as e:
1077 | logger.error("search_failed", error=str(e), exc_info=True)
1078 | raise
1079 | ```
1080 |
1081 | ---
1082 |
1083 | ## 7. Migration Approach: Gradual Transition
1084 |
1085 | ### Phase 1: Core Infrastructure (Week 1-2)
1086 |
1087 | **Deliverables:**
1088 | - ✅ `src/mcp_memory_service/api/` module structure
1089 | - ✅ `CompactMemory`, `CompactSearchResult`, `CompactStorageInfo` types
1090 | - ✅ `search()`, `store()`, `health()` functions
1091 | - ✅ Unit tests with 90%+ coverage
1092 | - ✅ Documentation with token usage benchmarks
1093 |
1094 | **Success Criteria:**
1095 | - All functions work in sync context (no async/await in API)
1096 | - Connection reuse validated (single storage instance)
1097 | - Token reduction measured: 85%+ for search operations
1098 | - Performance overhead <5ms per call (warm)
1099 |
1100 | **Risk: Low** - New code, no existing dependencies
1101 |
1102 | ### Phase 2: Session Hook Optimization (Week 3)
1103 |
1104 | **Target:** Session-start hook (highest impact: 3,600-9,600 tokens → 900-2,400 tokens)
1105 |
1106 | **Changes:**
1107 | ```javascript
1108 | // Before: MCP tool invocation
1109 | const { MemoryClient } = require('../utilities/memory-client');
1110 | const memoryClient = new MemoryClient(config);
1111 | const result = await memoryClient.callTool('retrieve_memory', {...});
1112 |
1113 | // After: Code execution with fallback
1114 | const { execSync } = require('child_process');
1115 |
1116 | try {
1117 | // Try code execution first (fast, efficient)
1118 | const output = execSync('python -c "from mcp_memory_service.api import search; ..."');
1119 | const memories = parseCompactResults(output);
1120 | } catch (error) {
1121 | // Fallback to MCP if code execution fails
1122 | console.warn('Code execution failed, falling back to MCP:', error);
1123 | const result = await memoryClient.callTool('retrieve_memory', {...});
1124 | const memories = parseMCPResult(result);
1125 | }
1126 | ```
1127 |
1128 | **Success Criteria:**
1129 | - 75%+ token reduction measured in real sessions
1130 | - Fallback mechanism validates graceful degradation
1131 | - Hook execution time <500ms (no user-facing latency increase)
1132 | - Zero breaking changes for users
1133 |
1134 | **Risk: Medium** - Touches production hook code, but has fallback
1135 |
1136 | ### Phase 3: Search Operation Optimization (Week 4-5)
1137 |
1138 | **Target:** Mid-conversation and topic-change hooks
1139 |
1140 | **Deliverables:**
1141 | - ✅ `search_by_tag()` implementation
1142 | - ✅ `recall()` natural language time queries
1143 | - ✅ Streaming search (`search_iter()`) for large results
1144 | - ✅ Migration guide with side-by-side examples
1145 |
1146 | **Success Criteria:**
1147 | - 90%+ token reduction for search-heavy workflows
1148 | - Documentation shows before/after comparison
1149 | - Community feedback collected and addressed
1150 |
1151 | **Risk: Low** - Builds on Phase 1 foundation
1152 |
1153 | ### Phase 4: Extended Operations (Week 6+)
1154 |
1155 | **Optional Enhancements:**
1156 | - Document ingestion API
1157 | - Batch operations (store/delete multiple)
1158 | - Memory consolidation triggers
1159 | - Advanced filtering (memory_type, time ranges)
1160 |
1161 | ---
1162 |
1163 | ## 8. Success Metrics and Validation
1164 |
1165 | ### 8.1 Token Reduction Targets
1166 |
1167 | | Operation | Current (MCP) | Target (Code Exec) | Reduction |
1168 | |-----------|---------------|-------------------|-----------|
1169 | | Session start hook | 3,600-9,600 | 900-2,400 | 75% |
1170 | | Search (5 results) | 2,625 | 385 | 85% |
1171 | | Store memory | 150 | 15 | 90% |
1172 | | Health check | 125 | 20 | 84% |
1173 | | Document ingestion (50 PDFs) | 57,400 | 8,610 | 85% |
1174 |
1175 | **Annual Savings (Conservative):**
1176 | - 10 users x 5 sessions/day x 365 days x 6,000 tokens saved = **109.5M tokens/year**
1177 | - At $0.15/1M tokens (Claude Opus input): **$16.43/year saved** per 10-user deployment
1178 |
1179 | **Annual Savings (Aggressive - 100 users):**
1180 | - 100 users x 10 sessions/day x 365 days x 6,000 tokens = **2.19B tokens/year**
1181 | - At $0.15/1M tokens: **$328.50/year saved**
1182 |
1183 | ### 8.2 Performance Metrics
1184 |
1185 | **Latency Targets:**
1186 | - Cold start (first call): <100ms
1187 | - Warm calls: <10ms
1188 | - Hook total execution: <500ms (no degradation from current)
1189 |
1190 | **Memory Usage:**
1191 | - Compact result set (5 memories): <5KB
1192 | - Full result set (5 memories): ~50KB
1193 | - 90% memory reduction for hook injection
1194 |
1195 | ### 8.3 Compatibility Validation
1196 |
1197 | **Testing Matrix:**
1198 | - ✅ Existing MCP tools continue working (100% backward compat)
1199 | - ✅ New code execution API available alongside MCP
1200 | - ✅ Fallback mechanism activates on code execution failure
1201 | - ✅ All storage backends compatible (SQLite-Vec, Cloudflare, Hybrid)
1202 | - ✅ No breaking changes to server.py or existing APIs
1203 |
1204 | ---
1205 |
1206 | ## 9. Implementation Timeline
1207 |
1208 | ```
1209 | Week 1: Core Infrastructure
1210 | ├── Design compact types (CompactMemory, CompactSearchResult)
1211 | ├── Implement api/__init__.py with public exports
1212 | ├── Create search.py with search(), search_by_tag(), recall()
1213 | ├── Add storage.py with store(), delete(), update()
1214 | └── Write utils.py with health(), expand_memory()
1215 |
1216 | Week 2: Testing & Documentation
1217 | ├── Unit tests for all API functions
1218 | ├── Integration tests with storage backends
1219 | ├── Token usage benchmarking
1220 | ├── API documentation with examples
1221 | └── Migration guide draft
1222 |
1223 | Week 3: Session Hook Migration
1224 | ├── Update session-start.js to use code execution
1225 | ├── Add fallback to MCP tools
1226 | ├── Test with SQLite-Vec and Cloudflare backends
1227 | ├── Validate token reduction (target: 75%+)
1228 | └── Deploy to beta testers
1229 |
1230 | Week 4-5: Search Operations
1231 | ├── Update mid-conversation.js
1232 | ├── Update topic-change.js
1233 | ├── Implement streaming search for large queries
1234 | ├── Document best practices
1235 | └── Gather community feedback
1236 |
1237 | Week 6+: Polish & Extensions
1238 | ├── Additional API functions (batch ops, etc.)
1239 | ├── Performance optimizations
1240 | ├── Developer tools (token calculators, debuggers)
1241 | └── Comprehensive documentation
1242 | ```
1243 |
1244 | ---
1245 |
1246 | ## 10. Recommendations Summary
1247 |
1248 | ### Immediate Next Steps (Week 1)
1249 |
1250 | 1. **Create API Module Structure**
1251 | ```bash
1252 | mkdir -p src/mcp_memory_service/api
1253 | touch src/mcp_memory_service/api/{__init__,compact,search,storage,utils}.py
1254 | ```
1255 |
1256 | 2. **Implement Compact Types**
1257 | - `CompactMemory` with NamedTuple
1258 | - `CompactSearchResult` with tuple of memories
1259 | - `CompactStorageInfo` for health checks
1260 |
1261 | 3. **Core Functions**
1262 | - `search()` - Semantic search with compact results
1263 | - `store()` - Store with minimal params
1264 | - `health()` - Quick status check
1265 |
1266 | 4. **Testing Infrastructure**
1267 | - Unit tests for each function
1268 | - Token usage benchmarks
1269 | - Performance profiling
1270 |
1271 | ### Key Design Decisions
1272 |
1273 | 1. **Use NamedTuple for Compact Types**
1274 | - Fast (C-based), immutable, type-safe
1275 | - 60-90% size reduction vs dataclass
1276 | - Clear field names (`.hash` not `[0]`)
1277 |
1278 | 2. **Sync Wrappers for Async Operations**
1279 | - Hide asyncio complexity from hooks
1280 | - Use `asyncio.run()` internally
1281 | - Connection reuse via global instance
1282 |
1283 | 3. **Graceful Degradation**
1284 | - Code execution primary
1285 | - MCP tools fallback
1286 | - Zero breaking changes
1287 |
1288 | 4. **Incremental Migration**
1289 | - Start with session hooks (high impact)
1290 | - Gather metrics and feedback
1291 | - Expand to other operations
1292 |
1293 | ### Expected Outcomes
1294 |
1295 | **Token Efficiency:**
1296 | - ✅ 75% reduction in session hooks
1297 | - ✅ 85-90% reduction in search operations
1298 | - ✅ 13-183 million tokens saved annually
1299 |
1300 | **Performance:**
1301 | - ✅ <10ms per API call (warm)
1302 | - ✅ <500ms hook execution (no degradation)
1303 | - ✅ 90% memory footprint reduction
1304 |
1305 | **Compatibility:**
1306 | - ✅ 100% backward compatible
1307 | - ✅ Opt-in adoption model
1308 | - ✅ MCP tools continue working
1309 |
1310 | ---
1311 |
1312 | ## 11. References and Further Reading
1313 |
1314 | ### Research Sources
1315 |
1316 | 1. **Anthropic Resources:**
1317 | - "Code execution with MCP: Building more efficient agents" (Nov 2025)
1318 | - "Claude Code Best Practices" - Token efficiency guidelines
1319 | - MCP Protocol Documentation - Tool use patterns
1320 |
1321 | 2. **Academic Research:**
1322 | - "CodeAgents: A Token-Efficient Framework for Codified Multi-Agent Reasoning" (arxiv.org/abs/2507.03254)
1323 | - Token consumption analysis in LLM agent systems
1324 |
1325 | 3. **Python Best Practices:**
1326 | - "Dataclasses vs NamedTuple vs TypedDict" performance comparisons
1327 | - Python API design patterns (hakibenita.com)
1328 | - Async/sync bridging patterns
1329 |
1330 | 4. **Real-World Implementations:**
1331 | - mcp-python-interpreter server
1332 | - Anthropic's MCP server examples
1333 | - LangChain compact result types
1334 |
1335 | ### Internal Documentation
1336 |
1337 | - `src/mcp_memory_service/storage/base.py` - Storage interface
1338 | - `src/mcp_memory_service/models/memory.py` - Memory model
1339 | - `src/mcp_memory_service/server.py` - MCP server (3,721 lines)
1340 | - `~/.claude/hooks/core/session-start.js` - Current hook implementation
1341 |
1342 | ---
1343 |
1344 | ## Conclusion
1345 |
1346 | The research validates the feasibility and high value of implementing a code execution interface for mcp-memory-service. Industry trends (Anthropic's MCP code execution announcement, CodeAgents framework) align with the proposal, and the current codebase architecture provides a solid foundation for gradual migration.
1347 |
1348 | **Key Takeaways:**
1349 |
1350 | 1. **85-90% token reduction is achievable** through compact types and direct function calls
1351 | 2. **Backward compatibility is maintained** via fallback mechanisms and parallel operation
1352 | 3. **Proven patterns exist** in mcp-python-interpreter and similar projects
1353 | 4. **Incremental approach reduces risk** while delivering immediate value
1354 | 5. **Annual savings of 13-183M tokens** justify development investment
1355 |
1356 | **Recommended Action:** Proceed with Phase 1 implementation (Core Infrastructure) targeting Week 1-2 completion, with session hook migration as first production use case.
1357 |
1358 | ---
1359 |
1360 | **Document Version:** 1.0
1361 | **Last Updated:** November 6, 2025
1362 | **Author:** Research conducted for Issue #206
1363 | **Status:** Ready for Review and Implementation
1364 |
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/web/app.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | FastAPI application for MCP Memory Service HTTP/SSE interface.
17 |
18 | Provides REST API and Server-Sent Events using SQLite-vec backend.
19 | """
20 |
21 | import asyncio
22 | import logging
23 | import os
24 | from contextlib import asynccontextmanager
25 | from typing import Optional, Any
26 |
27 | from fastapi import FastAPI, HTTPException
28 | from fastapi.middleware.cors import CORSMiddleware
29 | from fastapi.staticfiles import StaticFiles
30 | from fastapi.responses import HTMLResponse
31 |
32 | from .. import __version__
33 | from ..config import (
34 | HTTP_PORT,
35 | HTTP_HOST,
36 | CORS_ORIGINS,
37 | DATABASE_PATH,
38 | EMBEDDING_MODEL_NAME,
39 | MDNS_ENABLED,
40 | HTTPS_ENABLED,
41 | OAUTH_ENABLED,
42 | CONSOLIDATION_ENABLED,
43 | CONSOLIDATION_CONFIG,
44 | CONSOLIDATION_SCHEDULE
45 | )
46 | from .dependencies import set_storage, get_storage, create_storage_backend
47 | from .api.health import router as health_router
48 | from .api.memories import router as memories_router
49 | from .api.search import router as search_router
50 | from .api.events import router as events_router
51 | from .api.sync import router as sync_router
52 | from .api.manage import router as manage_router
53 | from .api.analytics import router as analytics_router
54 | from .api.documents import router as documents_router
55 | from .api.mcp import router as mcp_router
56 | from .api.consolidation import router as consolidation_router
57 | from .api.backup import router as backup_router
58 | from .sse import sse_manager
59 |
60 | logger = logging.getLogger(__name__)
61 |
62 | # Global storage instance
63 | storage: Optional["MemoryStorage"] = None
64 |
65 | # Global mDNS advertiser instance
66 | mdns_advertiser: Optional[Any] = None
67 |
68 | # Global OAuth cleanup task
69 | oauth_cleanup_task: Optional[asyncio.Task] = None
70 |
71 | # Global consolidation instances
72 | consolidator: Optional["DreamInspiredConsolidator"] = None
73 | consolidation_scheduler: Optional["ConsolidationScheduler"] = None
74 |
75 |
76 | async def oauth_cleanup_background_task():
77 | """Background task to periodically clean up expired OAuth tokens and codes."""
78 | from .oauth.storage import oauth_storage
79 |
80 | while True:
81 | try:
82 | # Clean up expired tokens every 5 minutes
83 | await asyncio.sleep(300) # 5 minutes
84 |
85 | cleanup_stats = await oauth_storage.cleanup_expired()
86 | if cleanup_stats["expired_codes_cleaned"] > 0 or cleanup_stats["expired_tokens_cleaned"] > 0:
87 | logger.info(f"OAuth cleanup: removed {cleanup_stats['expired_codes_cleaned']} codes, "
88 | f"{cleanup_stats['expired_tokens_cleaned']} tokens")
89 |
90 | except asyncio.CancelledError:
91 | logger.info("OAuth cleanup task cancelled")
92 | break
93 | except Exception as e:
94 | logger.error(f"Error in OAuth cleanup task: {e}")
95 | # Continue running even if there's an error
96 |
97 |
98 | @asynccontextmanager
99 | async def lifespan(app: FastAPI):
100 | """Application lifespan management."""
101 | global storage, mdns_advertiser, oauth_cleanup_task, consolidator, consolidation_scheduler
102 |
103 | # Startup
104 | logger.info("Starting MCP Memory Service HTTP interface...")
105 | try:
106 | storage = await create_storage_backend()
107 | set_storage(storage) # Set the global storage instance
108 |
109 | # Initialize consolidation system if enabled
110 | if CONSOLIDATION_ENABLED:
111 | try:
112 | from ..consolidation.base import ConsolidationConfig
113 | from ..consolidation.consolidator import DreamInspiredConsolidator
114 | from ..consolidation.scheduler import ConsolidationScheduler
115 | from ..api import set_consolidator, set_scheduler
116 |
117 | # Create consolidation config
118 | config = ConsolidationConfig(**CONSOLIDATION_CONFIG)
119 |
120 | # Initialize consolidator with storage
121 | consolidator = DreamInspiredConsolidator(storage, config)
122 | logger.info("Dream-inspired consolidator initialized")
123 |
124 | # Set global consolidator for API access
125 | set_consolidator(consolidator)
126 |
127 | # Initialize scheduler if any schedules are enabled
128 | if any(schedule != 'disabled' for schedule in CONSOLIDATION_SCHEDULE.values()):
129 | consolidation_scheduler = ConsolidationScheduler(
130 | consolidator,
131 | CONSOLIDATION_SCHEDULE,
132 | enabled=True
133 | )
134 |
135 | # Start the scheduler
136 | if await consolidation_scheduler.start():
137 | logger.info("Consolidation scheduler started successfully")
138 | # Set global scheduler for API access
139 | set_scheduler(consolidation_scheduler)
140 | else:
141 | logger.warning("Failed to start consolidation scheduler")
142 | consolidation_scheduler = None
143 | else:
144 | logger.info("Consolidation scheduler disabled (all schedules set to 'disabled')")
145 |
146 | except Exception as e:
147 | logger.error(f"Failed to initialize consolidation system: {e}")
148 | consolidator = None
149 | consolidation_scheduler = None
150 | else:
151 | logger.info("Consolidation system disabled")
152 |
153 | # Start SSE manager
154 | await sse_manager.start()
155 | logger.info("SSE Manager started")
156 |
157 | # Start OAuth cleanup task if enabled
158 | if OAUTH_ENABLED:
159 | oauth_cleanup_task = asyncio.create_task(oauth_cleanup_background_task())
160 | logger.info("OAuth cleanup background task started")
161 |
162 | # Start mDNS service advertisement if enabled
163 | if MDNS_ENABLED:
164 | try:
165 | from ..discovery.mdns_service import ServiceAdvertiser
166 | mdns_advertiser = ServiceAdvertiser(
167 | host=HTTP_HOST,
168 | port=HTTP_PORT,
169 | https_enabled=HTTPS_ENABLED
170 | )
171 | success = await mdns_advertiser.start()
172 | if success:
173 | logger.info("mDNS service advertisement started")
174 | else:
175 | logger.warning("Failed to start mDNS service advertisement")
176 | mdns_advertiser = None
177 | except ImportError:
178 | logger.warning("mDNS support not available (zeroconf not installed)")
179 | mdns_advertiser = None
180 | except Exception as e:
181 | logger.error(f"Error starting mDNS advertisement: {e}")
182 | mdns_advertiser = None
183 | else:
184 | logger.info("mDNS service advertisement disabled")
185 |
186 | except Exception as e:
187 | logger.error(f"Failed to initialize storage: {e}")
188 | raise
189 |
190 | yield
191 |
192 | # Shutdown
193 | logger.info("Shutting down MCP Memory Service HTTP interface...")
194 |
195 | # Stop consolidation scheduler
196 | if consolidation_scheduler:
197 | try:
198 | await consolidation_scheduler.stop()
199 | logger.info("Consolidation scheduler stopped")
200 | except Exception as e:
201 | logger.error(f"Error stopping consolidation scheduler: {e}")
202 |
203 | # Stop mDNS advertisement
204 | if mdns_advertiser:
205 | try:
206 | await mdns_advertiser.stop()
207 | logger.info("mDNS service advertisement stopped")
208 | except Exception as e:
209 | logger.error(f"Error stopping mDNS advertisement: {e}")
210 |
211 | # Stop OAuth cleanup task
212 | if oauth_cleanup_task:
213 | try:
214 | oauth_cleanup_task.cancel()
215 | await oauth_cleanup_task
216 | logger.info("OAuth cleanup task stopped")
217 | except asyncio.CancelledError:
218 | logger.info("OAuth cleanup task cancelled successfully")
219 | except Exception as e:
220 | logger.error(f"Error stopping OAuth cleanup task: {e}")
221 |
222 | # Stop SSE manager
223 | await sse_manager.stop()
224 | logger.info("SSE Manager stopped")
225 |
226 | if storage:
227 | await storage.close()
228 |
229 |
230 | def create_app() -> FastAPI:
231 | """Create and configure the FastAPI application."""
232 |
233 | app = FastAPI(
234 | title="MCP Memory Service",
235 | description="HTTP REST API and SSE interface for semantic memory storage",
236 | version=__version__,
237 | lifespan=lifespan,
238 | docs_url="/api/docs",
239 | redoc_url="/api/redoc"
240 | )
241 |
242 | # CORS middleware
243 | app.add_middleware(
244 | CORSMiddleware,
245 | allow_origins=CORS_ORIGINS,
246 | allow_credentials=True,
247 | allow_methods=["*"],
248 | allow_headers=["*"],
249 | )
250 |
251 | # Include API routers
252 | logger.info("Including API routers...")
253 | app.include_router(health_router, prefix="/api", tags=["health"])
254 | logger.info(f"✓ Included health router with {len(health_router.routes)} routes")
255 | app.include_router(memories_router, prefix="/api", tags=["memories"])
256 | logger.info(f"✓ Included memories router with {len(memories_router.routes)} routes")
257 | app.include_router(search_router, prefix="/api", tags=["search"])
258 | logger.info(f"✓ Included search router with {len(search_router.routes)} routes")
259 | app.include_router(manage_router, prefix="/api/manage", tags=["management"])
260 | logger.info(f"✓ Included manage router with {len(manage_router.routes)} routes")
261 | app.include_router(analytics_router, prefix="/api/analytics", tags=["analytics"])
262 | logger.info(f"✓ Included analytics router with {len(analytics_router.routes)} routes")
263 | app.include_router(events_router, prefix="/api", tags=["events"])
264 | logger.info(f"✓ Included events router with {len(events_router.routes)} routes")
265 | app.include_router(sync_router, prefix="/api", tags=["sync"])
266 | logger.info(f"✓ Included sync router with {len(sync_router.routes)} routes")
267 | app.include_router(backup_router, prefix="/api", tags=["backup"])
268 | logger.info(f"✓ Included backup router with {len(backup_router.routes)} routes")
269 | try:
270 | app.include_router(documents_router, prefix="/api/documents", tags=["documents"])
271 | logger.info(f"✓ Included documents router with {len(documents_router.routes)} routes")
272 | except Exception as e:
273 | logger.error(f"✗ Failed to include documents router: {e}")
274 | import traceback
275 | logger.error(traceback.format_exc())
276 |
277 | # Include consolidation router
278 | app.include_router(consolidation_router, tags=["consolidation"])
279 | logger.info(f"✓ Included consolidation router with {len(consolidation_router.routes)} routes")
280 |
281 | # Include MCP protocol router
282 | app.include_router(mcp_router, tags=["mcp-protocol"])
283 |
284 | # Include OAuth routers if enabled
285 | if OAUTH_ENABLED:
286 | from .oauth.discovery import router as oauth_discovery_router
287 | from .oauth.registration import router as oauth_registration_router
288 | from .oauth.authorization import router as oauth_authorization_router
289 |
290 | app.include_router(oauth_discovery_router, tags=["oauth-discovery"])
291 | app.include_router(oauth_registration_router, prefix="/oauth", tags=["oauth"])
292 | app.include_router(oauth_authorization_router, prefix="/oauth", tags=["oauth"])
293 |
294 | logger.info("OAuth 2.1 endpoints enabled")
295 | else:
296 | logger.info("OAuth 2.1 endpoints disabled")
297 |
298 | # Serve static files (dashboard)
299 | static_path = os.path.join(os.path.dirname(__file__), "static")
300 | if os.path.exists(static_path):
301 | app.mount("/static", StaticFiles(directory=static_path), name="static")
302 |
303 | def get_api_overview_html():
304 | """Generate the API overview HTML template."""
305 | return """
306 | <!DOCTYPE html>
307 | <html lang="en">
308 | <head>
309 | <title>MCP Memory Service v""" + __version__ + """</title>
310 | <meta charset="utf-8">
311 | <meta name="viewport" content="width=device-width, initial-scale=1">
312 | <style>
313 | * {
314 | margin: 0;
315 | padding: 0;
316 | box-sizing: border-box;
317 | }
318 |
319 | :root {
320 | --primary: #3b82f6;
321 | --primary-dark: #2563eb;
322 | --secondary: #8b5cf6;
323 | --success: #10b981;
324 | --warning: #f59e0b;
325 | --danger: #ef4444;
326 | --dark: #1e293b;
327 | --gray: #64748b;
328 | --light: #f8fafc;
329 | --white: #ffffff;
330 | --shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06);
331 | --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
332 | }
333 |
334 | body {
335 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
336 | background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
337 | min-height: 100vh;
338 | color: var(--dark);
339 | line-height: 1.6;
340 | }
341 |
342 | .container {
343 | max-width: 1200px;
344 | margin: 0 auto;
345 | padding: 2rem;
346 | }
347 |
348 | header {
349 | text-align: center;
350 | margin-bottom: 3rem;
351 | padding: 2rem;
352 | background: var(--white);
353 | border-radius: 1rem;
354 | box-shadow: var(--shadow-lg);
355 | }
356 |
357 | .logo {
358 | display: inline-flex;
359 | align-items: center;
360 | gap: 1rem;
361 | margin-bottom: 1rem;
362 | }
363 |
364 | .logo-icon {
365 | width: 60px;
366 | height: 60px;
367 | background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
368 | border-radius: 1rem;
369 | display: flex;
370 | align-items: center;
371 | justify-content: center;
372 | color: var(--white);
373 | font-size: 2rem;
374 | font-weight: bold;
375 | }
376 |
377 | h1 {
378 | font-size: 2.5rem;
379 | font-weight: 800;
380 | background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
381 | -webkit-background-clip: text;
382 | -webkit-text-fill-color: transparent;
383 | background-clip: text;
384 | margin-bottom: 0.5rem;
385 | }
386 |
387 | .subtitle {
388 | color: var(--gray);
389 | font-size: 1.25rem;
390 | margin-bottom: 1rem;
391 | }
392 |
393 | .version-badge {
394 | display: inline-flex;
395 | align-items: center;
396 | gap: 0.5rem;
397 | background: var(--success);
398 | color: var(--white);
399 | padding: 0.25rem 1rem;
400 | border-radius: 2rem;
401 | font-size: 0.875rem;
402 | font-weight: 600;
403 | }
404 |
405 | .stats {
406 | display: grid;
407 | grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
408 | gap: 1rem;
409 | margin-bottom: 3rem;
410 | }
411 |
412 | .stat-card {
413 | background: var(--white);
414 | padding: 1.5rem;
415 | border-radius: 0.75rem;
416 | box-shadow: var(--shadow);
417 | text-align: center;
418 | transition: transform 0.2s ease, box-shadow 0.2s ease;
419 | }
420 |
421 | .stat-card:hover {
422 | transform: translateY(-2px);
423 | box-shadow: var(--shadow-lg);
424 | }
425 |
426 | .stat-value {
427 | font-size: 2rem;
428 | font-weight: 700;
429 | color: var(--primary);
430 | margin-bottom: 0.25rem;
431 | }
432 |
433 | .stat-label {
434 | color: var(--gray);
435 | font-size: 0.875rem;
436 | text-transform: uppercase;
437 | letter-spacing: 0.05em;
438 | }
439 |
440 | .endpoint-grid {
441 | display: grid;
442 | grid-template-columns: repeat(auto-fit, minmax(350px, 1fr));
443 | gap: 1.5rem;
444 | margin-bottom: 3rem;
445 | }
446 |
447 | .endpoint-card {
448 | background: var(--white);
449 | border-radius: 0.75rem;
450 | box-shadow: var(--shadow);
451 | overflow: hidden;
452 | transition: transform 0.2s ease, box-shadow 0.2s ease;
453 | }
454 |
455 | .endpoint-card:hover {
456 | transform: translateY(-4px);
457 | box-shadow: var(--shadow-lg);
458 | }
459 |
460 | .endpoint-header {
461 | padding: 1.5rem;
462 | background: linear-gradient(135deg, var(--primary) 0%, var(--primary-dark) 100%);
463 | color: var(--white);
464 | }
465 |
466 | .endpoint-header h3 {
467 | font-size: 1.25rem;
468 | margin-bottom: 0.5rem;
469 | display: flex;
470 | align-items: center;
471 | gap: 0.5rem;
472 | }
473 |
474 | .endpoint-icon {
475 | font-size: 1.5rem;
476 | }
477 |
478 | .endpoint-description {
479 | opacity: 0.9;
480 | font-size: 0.875rem;
481 | }
482 |
483 | .endpoint-list {
484 | padding: 1.5rem;
485 | }
486 |
487 | .endpoint-item {
488 | padding: 0.75rem;
489 | border-radius: 0.5rem;
490 | margin-bottom: 0.5rem;
491 | background: var(--light);
492 | transition: background-color 0.2s ease;
493 | cursor: pointer;
494 | }
495 |
496 | .endpoint-item:hover {
497 | background: #e2e8f0;
498 | }
499 |
500 | .method {
501 | display: inline-block;
502 | padding: 0.125rem 0.5rem;
503 | border-radius: 0.25rem;
504 | font-size: 0.75rem;
505 | font-weight: 700;
506 | margin-right: 0.5rem;
507 | text-transform: uppercase;
508 | }
509 |
510 | .method-get { background: var(--success); color: var(--white); }
511 | .method-post { background: var(--primary); color: var(--white); }
512 | .method-delete { background: var(--danger); color: var(--white); }
513 |
514 | .endpoint-path {
515 | font-family: 'Courier New', monospace;
516 | font-size: 0.875rem;
517 | color: var(--dark);
518 | }
519 |
520 | .endpoint-desc {
521 | font-size: 0.75rem;
522 | color: var(--gray);
523 | margin-top: 0.25rem;
524 | }
525 |
526 | .action-buttons {
527 | display: flex;
528 | gap: 1rem;
529 | justify-content: center;
530 | margin-bottom: 3rem;
531 | }
532 |
533 | .btn {
534 | display: inline-flex;
535 | align-items: center;
536 | gap: 0.5rem;
537 | padding: 0.75rem 1.5rem;
538 | border-radius: 0.5rem;
539 | font-weight: 600;
540 | text-decoration: none;
541 | transition: all 0.2s ease;
542 | border: none;
543 | cursor: pointer;
544 | }
545 |
546 | .btn-primary {
547 | background: var(--primary);
548 | color: var(--white);
549 | }
550 |
551 | .btn-primary:hover {
552 | background: var(--primary-dark);
553 | transform: translateY(-2px);
554 | box-shadow: var(--shadow-lg);
555 | }
556 |
557 | .btn-secondary {
558 | background: var(--white);
559 | color: var(--primary);
560 | border: 2px solid var(--primary);
561 | }
562 |
563 | .btn-secondary:hover {
564 | background: var(--primary);
565 | color: var(--white);
566 | transform: translateY(-2px);
567 | box-shadow: var(--shadow-lg);
568 | }
569 |
570 | footer {
571 | text-align: center;
572 | padding: 2rem;
573 | color: var(--gray);
574 | }
575 |
576 | .tech-stack {
577 | display: flex;
578 | justify-content: center;
579 | gap: 2rem;
580 | margin-top: 1rem;
581 | flex-wrap: wrap;
582 | }
583 |
584 | .tech-badge {
585 | display: flex;
586 | align-items: center;
587 | gap: 0.5rem;
588 | padding: 0.5rem 1rem;
589 | background: var(--white);
590 | border-radius: 0.5rem;
591 | box-shadow: var(--shadow);
592 | font-size: 0.875rem;
593 | font-weight: 600;
594 | }
595 |
596 | .nav-buttons {
597 | display: flex;
598 | gap: 1rem;
599 | margin-top: 1rem;
600 | justify-content: center;
601 | }
602 |
603 | .nav-btn {
604 | display: inline-flex;
605 | align-items: center;
606 | gap: 0.5rem;
607 | padding: 0.75rem 1.5rem;
608 | background: var(--primary);
609 | color: var(--white);
610 | text-decoration: none;
611 | border-radius: 0.5rem;
612 | font-weight: 600;
613 | transition: background-color 0.2s;
614 | box-shadow: var(--shadow);
615 | }
616 |
617 | .nav-btn:hover {
618 | background: var(--primary-dark);
619 | text-decoration: none;
620 | color: var(--white);
621 | }
622 |
623 | .nav-btn.secondary {
624 | background: var(--gray);
625 | }
626 |
627 | .nav-btn.secondary:hover {
628 | background: #475569;
629 | }
630 |
631 | .loading {
632 | display: inline-block;
633 | width: 1rem;
634 | height: 1rem;
635 | border: 2px solid var(--light);
636 | border-top-color: var(--primary);
637 | border-radius: 50%;
638 | animation: spin 0.6s linear infinite;
639 | }
640 |
641 | @keyframes spin {
642 | to { transform: rotate(360deg); }
643 | }
644 |
645 | @media (max-width: 768px) {
646 | .container { padding: 1rem; }
647 | h1 { font-size: 2rem; }
648 | .endpoint-grid { grid-template-columns: 1fr; }
649 | .stats { grid-template-columns: 1fr; }
650 | .action-buttons {
651 | flex-direction: column;
652 | align-items: center;
653 | gap: 0.75rem;
654 | }
655 | .btn {
656 | width: 100%;
657 | max-width: 300px;
658 | justify-content: center;
659 | }
660 | }
661 | </style>
662 | </head>
663 | <body>
664 | <div class="container">
665 | <header>
666 | <div class="logo">
667 | <div class="logo-icon">🧠</div>
668 | <div>
669 | <h1>MCP Memory Service</h1>
670 | <p class="subtitle" id="subtitle">Intelligent Semantic Memory with <span id="backend-name">Loading...</span></p>
671 | </div>
672 | </div>
673 | <div class="version-badge">
674 | <span>✅</span> <span id="version-display">Loading...</span> - Latest Release
675 | </div>
676 | <div class="nav-buttons">
677 | <a href="/" class="nav-btn">
678 | <svg width="20" height="20" fill="currentColor" viewBox="0 0 24 24">
679 | <path d="M10,20V14H14V20H19V12H22L12,3L2,12H5V20H10Z"/>
680 | </svg>
681 | Interactive Dashboard
682 | </a>
683 | <a href="/api/docs" class="nav-btn secondary" target="_blank">
684 | <svg width="20" height="20" fill="currentColor" viewBox="0 0 24 24">
685 | <path d="M14,17H7V15H14M17,13H7V11H17M17,9H7V7H17M19,3H5C3.89,3 3,3.89 3,5V19A2,2 0 0,0 5,21H19A2,2 0 0,0 21,19V5C21,3.89 20.1,3 19,3Z"/>
686 | </svg>
687 | Swagger UI
688 | </a>
689 | </div>
690 | </header>
691 |
692 | <div class="stats" id="stats">
693 | <div class="stat-card">
694 | <div class="stat-value"><span class="loading"></span></div>
695 | <div class="stat-label">Total Memories</div>
696 | </div>
697 | <div class="stat-card">
698 | <div class="stat-value"><span class="loading"></span></div>
699 | <div class="stat-label">Embedding Model</div>
700 | </div>
701 | <div class="stat-card">
702 | <div class="stat-value"><span class="loading"></span></div>
703 | <div class="stat-label">Server Status</div>
704 | </div>
705 | <div class="stat-card">
706 | <div class="stat-value"><span class="loading"></span></div>
707 | <div class="stat-label">Response Time</div>
708 | </div>
709 | </div>
710 |
711 | <div class="action-buttons">
712 | <a href="/api/docs" class="btn btn-primary">
713 | <span>📚</span> Interactive API Docs
714 | </a>
715 | <a href="/api/redoc" class="btn btn-secondary">
716 | <span>📖</span> ReDoc Documentation
717 | </a>
718 | <a href="https://github.com/doobidoo/mcp-memory-service" class="btn btn-secondary" target="_blank">
719 | <span>🚀</span> GitHub Repository
720 | </a>
721 | </div>
722 |
723 | <div class="endpoint-grid">
724 | <div class="endpoint-card">
725 | <div class="endpoint-header">
726 | <h3><span class="endpoint-icon">💾</span> Memory Management</h3>
727 | <p class="endpoint-description">Store, retrieve, and manage semantic memories</p>
728 | </div>
729 | <div class="endpoint-list">
730 | <div class="endpoint-item" onclick="window.location.href='/api/docs#/memories/store_memory_api_memories_post'">
731 | <span class="method method-post">POST</span>
732 | <span class="endpoint-path">/api/memories</span>
733 | <div class="endpoint-desc">Store a new memory with automatic embedding generation</div>
734 | </div>
735 | <div class="endpoint-item" onclick="window.location.href='/api/docs#/memories/list_memories_api_memories_get'">
736 | <span class="method method-get">GET</span>
737 | <span class="endpoint-path">/api/memories</span>
738 | <div class="endpoint-desc">List all memories with pagination support</div>
739 | </div>
740 | <div class="endpoint-item" onclick="window.location.href='/api/docs#/memories/get_memory_api_memories__content_hash__get'">
741 | <span class="method method-get">GET</span>
742 | <span class="endpoint-path">/api/memories/{hash}</span>
743 | <div class="endpoint-desc">Retrieve a specific memory by content hash</div>
744 | </div>
745 | <div class="endpoint-item" onclick="window.location.href='/api/docs#/memories/delete_memory_api_memories__content_hash__delete'">
746 | <span class="method method-delete">DELETE</span>
747 | <span class="endpoint-path">/api/memories/{hash}</span>
748 | <div class="endpoint-desc">Delete a memory and its embeddings</div>
749 | </div>
750 | </div>
751 | </div>
752 |
753 | <div class="endpoint-card">
754 | <div class="endpoint-header">
755 | <h3><span class="endpoint-icon">🔍</span> Search Operations</h3>
756 | <p class="endpoint-description">Powerful semantic and tag-based search</p>
757 | </div>
758 | <div class="endpoint-list">
759 | <div class="endpoint-item" onclick="window.location.href='/api/docs#/search/semantic_search_api_search_post'">
760 | <span class="method method-post">POST</span>
761 | <span class="endpoint-path">/api/search</span>
762 | <div class="endpoint-desc">Semantic similarity search using embeddings</div>
763 | </div>
764 | <div class="endpoint-item" onclick="window.location.href='/api/docs#/search/tag_search_api_search_by_tag_post'">
765 | <span class="method method-post">POST</span>
766 | <span class="endpoint-path">/api/search/by-tag</span>
767 | <div class="endpoint-desc">Search memories by tags (AND/OR logic)</div>
768 | </div>
769 | <div class="endpoint-item" onclick="window.location.href='/api/docs#/search/time_search_api_search_by_time_post'">
770 | <span class="method method-post">POST</span>
771 | <span class="endpoint-path">/api/search/by-time</span>
772 | <div class="endpoint-desc">Natural language time-based queries</div>
773 | </div>
774 | <div class="endpoint-item" onclick="window.location.href='/api/docs#/search/find_similar_api_search_similar__content_hash__get'">
775 | <span class="method method-get">GET</span>
776 | <span class="endpoint-path">/api/search/similar/{hash}</span>
777 | <div class="endpoint-desc">Find memories similar to a specific one</div>
778 | </div>
779 | </div>
780 | </div>
781 |
782 | <div class="endpoint-card">
783 | <div class="endpoint-header">
784 | <h3><span class="endpoint-icon">📡</span> Real-time Events</h3>
785 | <p class="endpoint-description">Server-Sent Events for live updates</p>
786 | </div>
787 | <div class="endpoint-list">
788 | <div class="endpoint-item" onclick="window.location.href='/api/events'">
789 | <span class="method method-get">GET</span>
790 | <span class="endpoint-path">/api/events</span>
791 | <div class="endpoint-desc">Subscribe to real-time memory events stream</div>
792 | </div>
793 | <div class="endpoint-item" onclick="window.location.href='/api/events/stats'">
794 | <span class="method method-get">GET</span>
795 | <span class="endpoint-path">/api/events/stats</span>
796 | <div class="endpoint-desc">View SSE connection statistics</div>
797 | </div>
798 | <div class="endpoint-item" onclick="window.location.href='/static/sse_test.html'">
799 | <span class="method method-get">GET</span>
800 | <span class="endpoint-path">/static/sse_test.html</span>
801 | <div class="endpoint-desc">Interactive SSE testing interface</div>
802 | </div>
803 | </div>
804 | </div>
805 |
806 | <div class="endpoint-card">
807 | <div class="endpoint-header">
808 | <h3><span class="endpoint-icon">🏥</span> Health & Status</h3>
809 | <p class="endpoint-description">Monitor service health and performance</p>
810 | </div>
811 | <div class="endpoint-list">
812 | <div class="endpoint-item" onclick="window.location.href='/api/health'">
813 | <span class="method method-get">GET</span>
814 | <span class="endpoint-path">/api/health</span>
815 | <div class="endpoint-desc">Quick health check endpoint</div>
816 | </div>
817 | <div class="endpoint-item" onclick="window.location.href='/api/health/detailed'">
818 | <span class="method method-get">GET</span>
819 | <span class="endpoint-path">/api/health/detailed</span>
820 | <div class="endpoint-desc">Detailed health with database statistics</div>
821 | </div>
822 | <div class="endpoint-item" onclick="window.location.href='/api/docs'">
823 | <span class="method method-get">GET</span>
824 | <span class="endpoint-path">/api/docs</span>
825 | <div class="endpoint-desc">Interactive Swagger UI documentation</div>
826 | </div>
827 | <div class="endpoint-item" onclick="window.location.href='/api/redoc'">
828 | <span class="method method-get">GET</span>
829 | <span class="endpoint-path">/api/redoc</span>
830 | <div class="endpoint-desc">Alternative ReDoc documentation</div>
831 | </div>
832 | </div>
833 | </div>
834 | </div>
835 |
836 | <footer>
837 | <p>Powered by cutting-edge technology</p>
838 | <div class="tech-stack">
839 | <div class="tech-badge">
840 | <span>🐍</span> FastAPI
841 | </div>
842 | <div class="tech-badge">
843 | <span>🗄️</span> SQLite-vec
844 | </div>
845 | <div class="tech-badge">
846 | <span>🧠</span> Sentence Transformers
847 | </div>
848 | <div class="tech-badge">
849 | <span>🔥</span> PyTorch
850 | </div>
851 | <div class="tech-badge">
852 | <span>🌐</span> mDNS Discovery
853 | </div>
854 | </div>
855 | <p style="margin-top: 2rem; opacity: 0.8;">
856 | © 2025 MCP Memory Service | Apache 2.0 License
857 | </p>
858 | </footer>
859 | </div>
860 |
861 | <script>
862 | // Fetch and display live stats
863 | async function updateStats() {
864 | try {
865 | const healthResponse = await fetch('/api/health');
866 | const health = await healthResponse.json();
867 |
868 | const detailedResponse = await fetch('/api/health/detailed');
869 | const detailed = await detailedResponse.json();
870 |
871 | const stats = document.getElementById('stats');
872 | stats.innerHTML = `
873 | <div class="stat-card">
874 | <div class="stat-value">${detailed.statistics?.total_memories || 0}</div>
875 | <div class="stat-label">Total Memories</div>
876 | </div>
877 | <div class="stat-card">
878 | <div class="stat-value">all-MiniLM-L6-v2</div>
879 | <div class="stat-label">Embedding Model</div>
880 | </div>
881 | <div class="stat-card">
882 | <div class="stat-value" style="color: var(--success);">● Healthy</div>
883 | <div class="stat-label">Server Status</div>
884 | </div>
885 | <div class="stat-card">
886 | <div class="stat-value"><1ms</div>
887 | <div class="stat-label">Response Time</div>
888 | </div>
889 | `;
890 | } catch (error) {
891 | console.error('Failed to fetch stats:', error);
892 | }
893 | }
894 |
895 | // Update stats on page load
896 | updateStats();
897 |
898 | // Update stats every 30 seconds
899 | setInterval(updateStats, 30000);
900 | </script>
901 |
902 | <script>
903 | // Dynamic content loading for API overview
904 | function getBackendDisplayName(backend) {
905 | const backendMap = {
906 | 'sqlite-vec': 'SQLite-vec',
907 | 'sqlite_vec': 'SQLite-vec',
908 | 'cloudflare': 'Cloudflare D1 + Vectorize',
909 | 'hybrid': 'Hybrid (SQLite-vec + Cloudflare)'
910 | };
911 | return backendMap[backend] || backend || 'Unknown Backend';
912 | }
913 |
914 | async function loadDynamicInfo() {
915 | try {
916 | // Load detailed health information
917 | const response = await fetch('/api/health/detailed');
918 | if (!response.ok) {
919 | throw new Error(`HTTP ${response.status}`);
920 | }
921 | const healthData = await response.json();
922 |
923 | // Update version display
924 | const versionEl = document.getElementById('version-display');
925 | if (versionEl && healthData.version) {
926 | versionEl.textContent = `v${healthData.version}`;
927 | }
928 |
929 | // Update backend name and subtitle
930 | const backendNameEl = document.getElementById('backend-name');
931 | const subtitleEl = document.getElementById('subtitle');
932 |
933 | if (healthData.storage && healthData.storage.backend) {
934 | const backendDisplay = getBackendDisplayName(healthData.storage.backend);
935 |
936 | if (backendNameEl) {
937 | backendNameEl.textContent = backendDisplay;
938 | }
939 |
940 | if (subtitleEl) {
941 | subtitleEl.innerHTML = `Intelligent Semantic Memory with <span id="backend-name">${backendDisplay}</span>`;
942 | }
943 | }
944 |
945 | } catch (error) {
946 | console.error('Error loading dynamic info:', error);
947 |
948 | // Fallback values on error
949 | const versionEl = document.getElementById('version-display');
950 | const backendNameEl = document.getElementById('backend-name');
951 | const subtitleEl = document.getElementById('subtitle');
952 |
953 | if (versionEl) {
954 | versionEl.textContent = 'v?.?.?';
955 | }
956 |
957 | if (backendNameEl) {
958 | backendNameEl.textContent = 'Unknown Backend';
959 | }
960 |
961 | if (subtitleEl) {
962 | subtitleEl.innerHTML = 'Intelligent Semantic Memory with <span id="backend-name">Unknown Backend</span>';
963 | }
964 | }
965 | }
966 |
967 | // Load dynamic content when page loads
968 | document.addEventListener('DOMContentLoaded', loadDynamicInfo);
969 | </script>
970 | </body>
971 | </html>
972 | """
973 |
974 | @app.get("/api-overview", response_class=HTMLResponse)
975 | async def api_overview():
976 | """Serve the API documentation overview page."""
977 | return get_api_overview_html()
978 |
979 | @app.get("/", response_class=HTMLResponse)
980 | async def dashboard():
981 | """Serve the dashboard homepage."""
982 | # Serve the migrated interactive dashboard instead of hardcoded template
983 | try:
984 | # Path to the migrated dashboard HTML file
985 | dashboard_path = os.path.join(os.path.dirname(__file__), "static", "index.html")
986 |
987 | if os.path.exists(dashboard_path):
988 | # Read and serve the migrated dashboard
989 | with open(dashboard_path, 'r', encoding='utf-8') as f:
990 | return f.read()
991 | else:
992 | # Fallback to original template if dashboard not found
993 | return html_template
994 | except Exception as e:
995 | # Error fallback to original template
996 | logger.warning(f"Error loading migrated dashboard: {e}")
997 | return html_template
998 |
999 | return app
1000 |
1001 |
1002 | # Create the app instance
1003 | app = create_app()
1004 |
1005 |
1006 | # Storage getter is now in dependencies.py
```