This is page 24 of 47. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .claude
│ ├── agents
│ │ ├── amp-bridge.md
│ │ ├── amp-pr-automator.md
│ │ ├── code-quality-guard.md
│ │ ├── gemini-pr-automator.md
│ │ └── github-release-manager.md
│ ├── settings.local.json.backup
│ └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── feature_request.yml
│ │ └── performance_issue.yml
│ ├── pull_request_template.md
│ └── workflows
│ ├── bridge-tests.yml
│ ├── CACHE_FIX.md
│ ├── claude-code-review.yml
│ ├── claude.yml
│ ├── cleanup-images.yml.disabled
│ ├── dev-setup-validation.yml
│ ├── docker-publish.yml
│ ├── LATEST_FIXES.md
│ ├── main-optimized.yml.disabled
│ ├── main.yml
│ ├── publish-and-test.yml
│ ├── README_OPTIMIZATION.md
│ ├── release-tag.yml.disabled
│ ├── release.yml
│ ├── roadmap-review-reminder.yml
│ ├── SECRET_CONDITIONAL_FIX.md
│ └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│ ├── .gitignore
│ └── reports
│ └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│ ├── deployment
│ │ ├── deploy_fastmcp_fixed.sh
│ │ ├── deploy_http_with_mcp.sh
│ │ └── deploy_mcp_v4.sh
│ ├── deployment-configs
│ │ ├── empty_config.yml
│ │ └── smithery.yaml
│ ├── development
│ │ └── test_fastmcp.py
│ ├── docs-removed-2025-08-23
│ │ ├── authentication.md
│ │ ├── claude_integration.md
│ │ ├── claude-code-compatibility.md
│ │ ├── claude-code-integration.md
│ │ ├── claude-code-quickstart.md
│ │ ├── claude-desktop-setup.md
│ │ ├── complete-setup-guide.md
│ │ ├── database-synchronization.md
│ │ ├── development
│ │ │ ├── autonomous-memory-consolidation.md
│ │ │ ├── CLEANUP_PLAN.md
│ │ │ ├── CLEANUP_README.md
│ │ │ ├── CLEANUP_SUMMARY.md
│ │ │ ├── dream-inspired-memory-consolidation.md
│ │ │ ├── hybrid-slm-memory-consolidation.md
│ │ │ ├── mcp-milestone.md
│ │ │ ├── multi-client-architecture.md
│ │ │ ├── test-results.md
│ │ │ └── TIMESTAMP_FIX_SUMMARY.md
│ │ ├── distributed-sync.md
│ │ ├── invocation_guide.md
│ │ ├── macos-intel.md
│ │ ├── master-guide.md
│ │ ├── mcp-client-configuration.md
│ │ ├── multi-client-server.md
│ │ ├── service-installation.md
│ │ ├── sessions
│ │ │ └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│ │ ├── UBUNTU_SETUP.md
│ │ ├── ubuntu.md
│ │ ├── windows-setup.md
│ │ └── windows.md
│ ├── docs-root-cleanup-2025-08-23
│ │ ├── AWESOME_LIST_SUBMISSION.md
│ │ ├── CLOUDFLARE_IMPLEMENTATION.md
│ │ ├── DOCUMENTATION_ANALYSIS.md
│ │ ├── DOCUMENTATION_CLEANUP_PLAN.md
│ │ ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│ │ ├── LITESTREAM_SETUP_GUIDE.md
│ │ ├── lm_studio_system_prompt.md
│ │ ├── PYTORCH_DOWNLOAD_FIX.md
│ │ └── README-ORIGINAL-BACKUP.md
│ ├── investigations
│ │ └── MACOS_HOOKS_INVESTIGATION.md
│ ├── litestream-configs-v6.3.0
│ │ ├── install_service.sh
│ │ ├── litestream_master_config_fixed.yml
│ │ ├── litestream_master_config.yml
│ │ ├── litestream_replica_config_fixed.yml
│ │ ├── litestream_replica_config.yml
│ │ ├── litestream_replica_simple.yml
│ │ ├── litestream-http.service
│ │ ├── litestream.service
│ │ └── requirements-cloudflare.txt
│ ├── release-notes
│ │ └── release-notes-v7.1.4.md
│ └── setup-development
│ ├── README.md
│ ├── setup_consolidation_mdns.sh
│ ├── STARTUP_SETUP_GUIDE.md
│ └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│ ├── memory-context.md
│ ├── memory-health.md
│ ├── memory-ingest-dir.md
│ ├── memory-ingest.md
│ ├── memory-recall.md
│ ├── memory-search.md
│ ├── memory-store.md
│ ├── README.md
│ └── session-start.md
├── claude-hooks
│ ├── config.json
│ ├── config.template.json
│ ├── CONFIGURATION.md
│ ├── core
│ │ ├── memory-retrieval.js
│ │ ├── mid-conversation.js
│ │ ├── session-end.js
│ │ ├── session-start.js
│ │ └── topic-change.js
│ ├── debug-pattern-test.js
│ ├── install_claude_hooks_windows.ps1
│ ├── install_hooks.py
│ ├── memory-mode-controller.js
│ ├── MIGRATION.md
│ ├── README-NATURAL-TRIGGERS.md
│ ├── README-phase2.md
│ ├── README.md
│ ├── simple-test.js
│ ├── statusline.sh
│ ├── test-adaptive-weights.js
│ ├── test-dual-protocol-hook.js
│ ├── test-mcp-hook.js
│ ├── test-natural-triggers.js
│ ├── test-recency-scoring.js
│ ├── tests
│ │ ├── integration-test.js
│ │ ├── phase2-integration-test.js
│ │ ├── test-code-execution.js
│ │ ├── test-cross-session.json
│ │ ├── test-session-tracking.json
│ │ └── test-threading.json
│ ├── utilities
│ │ ├── adaptive-pattern-detector.js
│ │ ├── context-formatter.js
│ │ ├── context-shift-detector.js
│ │ ├── conversation-analyzer.js
│ │ ├── dynamic-context-updater.js
│ │ ├── git-analyzer.js
│ │ ├── mcp-client.js
│ │ ├── memory-client.js
│ │ ├── memory-scorer.js
│ │ ├── performance-manager.js
│ │ ├── project-detector.js
│ │ ├── session-tracker.js
│ │ ├── tiered-conversation-monitor.js
│ │ └── version-checker.js
│ └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│ ├── amp-cli-bridge.md
│ ├── api
│ │ ├── code-execution-interface.md
│ │ ├── memory-metadata-api.md
│ │ ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_REPORT.md
│ │ └── tag-standardization.md
│ ├── architecture
│ │ ├── search-enhancement-spec.md
│ │ └── search-examples.md
│ ├── architecture.md
│ ├── archive
│ │ └── obsolete-workflows
│ │ ├── load_memory_context.md
│ │ └── README.md
│ ├── assets
│ │ └── images
│ │ ├── dashboard-v3.3.0-preview.png
│ │ ├── memory-awareness-hooks-example.png
│ │ ├── project-infographic.svg
│ │ └── README.md
│ ├── CLAUDE_CODE_QUICK_REFERENCE.md
│ ├── cloudflare-setup.md
│ ├── deployment
│ │ ├── docker.md
│ │ ├── dual-service.md
│ │ ├── production-guide.md
│ │ └── systemd-service.md
│ ├── development
│ │ ├── ai-agent-instructions.md
│ │ ├── code-quality
│ │ │ ├── phase-2a-completion.md
│ │ │ ├── phase-2a-handle-get-prompt.md
│ │ │ ├── phase-2a-index.md
│ │ │ ├── phase-2a-install-package.md
│ │ │ └── phase-2b-session-summary.md
│ │ ├── code-quality-workflow.md
│ │ ├── dashboard-workflow.md
│ │ ├── issue-management.md
│ │ ├── pr-review-guide.md
│ │ ├── refactoring-notes.md
│ │ ├── release-checklist.md
│ │ └── todo-tracker.md
│ ├── docker-optimized-build.md
│ ├── document-ingestion.md
│ ├── DOCUMENTATION_AUDIT.md
│ ├── enhancement-roadmap-issue-14.md
│ ├── examples
│ │ ├── analysis-scripts.js
│ │ ├── maintenance-session-example.md
│ │ ├── memory-distribution-chart.jsx
│ │ └── tag-schema.json
│ ├── first-time-setup.md
│ ├── glama-deployment.md
│ ├── guides
│ │ ├── advanced-command-examples.md
│ │ ├── chromadb-migration.md
│ │ ├── commands-vs-mcp-server.md
│ │ ├── mcp-enhancements.md
│ │ ├── mdns-service-discovery.md
│ │ ├── memory-consolidation-guide.md
│ │ ├── migration.md
│ │ ├── scripts.md
│ │ └── STORAGE_BACKENDS.md
│ ├── HOOK_IMPROVEMENTS.md
│ ├── hooks
│ │ └── phase2-code-execution-migration.md
│ ├── http-server-management.md
│ ├── ide-compatability.md
│ ├── IMAGE_RETENTION_POLICY.md
│ ├── images
│ │ └── dashboard-placeholder.md
│ ├── implementation
│ │ ├── health_checks.md
│ │ └── performance.md
│ ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│ ├── integration
│ │ ├── homebrew.md
│ │ └── multi-client.md
│ ├── integrations
│ │ ├── gemini.md
│ │ ├── groq-bridge.md
│ │ ├── groq-integration-summary.md
│ │ └── groq-model-comparison.md
│ ├── integrations.md
│ ├── legacy
│ │ └── dual-protocol-hooks.md
│ ├── LM_STUDIO_COMPATIBILITY.md
│ ├── maintenance
│ │ └── memory-maintenance.md
│ ├── mastery
│ │ ├── api-reference.md
│ │ ├── architecture-overview.md
│ │ ├── configuration-guide.md
│ │ ├── local-setup-and-run.md
│ │ ├── testing-guide.md
│ │ └── troubleshooting.md
│ ├── migration
│ │ └── code-execution-api-quick-start.md
│ ├── natural-memory-triggers
│ │ ├── cli-reference.md
│ │ ├── installation-guide.md
│ │ └── performance-optimization.md
│ ├── oauth-setup.md
│ ├── pr-graphql-integration.md
│ ├── quick-setup-cloudflare-dual-environment.md
│ ├── README.md
│ ├── remote-configuration-wiki-section.md
│ ├── research
│ │ ├── code-execution-interface-implementation.md
│ │ └── code-execution-interface-summary.md
│ ├── ROADMAP.md
│ ├── sqlite-vec-backend.md
│ ├── statistics
│ │ ├── charts
│ │ │ ├── activity_patterns.png
│ │ │ ├── contributors.png
│ │ │ ├── growth_trajectory.png
│ │ │ ├── monthly_activity.png
│ │ │ └── october_sprint.png
│ │ ├── data
│ │ │ ├── activity_by_day.csv
│ │ │ ├── activity_by_hour.csv
│ │ │ ├── contributors.csv
│ │ │ └── monthly_activity.csv
│ │ ├── generate_charts.py
│ │ └── REPOSITORY_STATISTICS.md
│ ├── technical
│ │ ├── development.md
│ │ ├── memory-migration.md
│ │ ├── migration-log.md
│ │ ├── sqlite-vec-embedding-fixes.md
│ │ └── tag-storage.md
│ ├── testing
│ │ └── regression-tests.md
│ ├── testing-cloudflare-backend.md
│ ├── troubleshooting
│ │ ├── cloudflare-api-token-setup.md
│ │ ├── cloudflare-authentication.md
│ │ ├── general.md
│ │ ├── hooks-quick-reference.md
│ │ ├── pr162-schema-caching-issue.md
│ │ ├── session-end-hooks.md
│ │ └── sync-issues.md
│ └── tutorials
│ ├── advanced-techniques.md
│ ├── data-analysis.md
│ └── demo-session-walkthrough.md
├── examples
│ ├── claude_desktop_config_template.json
│ ├── claude_desktop_config_windows.json
│ ├── claude-desktop-http-config.json
│ ├── config
│ │ └── claude_desktop_config.json
│ ├── http-mcp-bridge.js
│ ├── memory_export_template.json
│ ├── README.md
│ ├── setup
│ │ └── setup_multi_client_complete.py
│ └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│ ├── .claude
│ │ └── settings.local.json
│ ├── archive
│ │ └── check_missing_timestamps.py
│ ├── backup
│ │ ├── backup_memories.py
│ │ ├── backup_sqlite_vec.sh
│ │ ├── export_distributable_memories.sh
│ │ └── restore_memories.py
│ ├── benchmarks
│ │ ├── benchmark_code_execution_api.py
│ │ ├── benchmark_hybrid_sync.py
│ │ └── benchmark_server_caching.py
│ ├── database
│ │ ├── analyze_sqlite_vec_db.py
│ │ ├── check_sqlite_vec_status.py
│ │ ├── db_health_check.py
│ │ └── simple_timestamp_check.py
│ ├── development
│ │ ├── debug_server_initialization.py
│ │ ├── find_orphaned_files.py
│ │ ├── fix_mdns.sh
│ │ ├── fix_sitecustomize.py
│ │ ├── remote_ingest.sh
│ │ ├── setup-git-merge-drivers.sh
│ │ ├── uv-lock-merge.sh
│ │ └── verify_hybrid_sync.py
│ ├── hooks
│ │ └── pre-commit
│ ├── installation
│ │ ├── install_linux_service.py
│ │ ├── install_macos_service.py
│ │ ├── install_uv.py
│ │ ├── install_windows_service.py
│ │ ├── install.py
│ │ ├── setup_backup_cron.sh
│ │ ├── setup_claude_mcp.sh
│ │ └── setup_cloudflare_resources.py
│ ├── linux
│ │ ├── service_status.sh
│ │ ├── start_service.sh
│ │ ├── stop_service.sh
│ │ ├── uninstall_service.sh
│ │ └── view_logs.sh
│ ├── maintenance
│ │ ├── assign_memory_types.py
│ │ ├── check_memory_types.py
│ │ ├── cleanup_corrupted_encoding.py
│ │ ├── cleanup_memories.py
│ │ ├── cleanup_organize.py
│ │ ├── consolidate_memory_types.py
│ │ ├── consolidation_mappings.json
│ │ ├── delete_orphaned_vectors_fixed.py
│ │ ├── fast_cleanup_duplicates_with_tracking.sh
│ │ ├── find_all_duplicates.py
│ │ ├── find_cloudflare_duplicates.py
│ │ ├── find_duplicates.py
│ │ ├── memory-types.md
│ │ ├── README.md
│ │ ├── recover_timestamps_from_cloudflare.py
│ │ ├── regenerate_embeddings.py
│ │ ├── repair_malformed_tags.py
│ │ ├── repair_memories.py
│ │ ├── repair_sqlite_vec_embeddings.py
│ │ ├── repair_zero_embeddings.py
│ │ ├── restore_from_json_export.py
│ │ └── scan_todos.sh
│ ├── migration
│ │ ├── cleanup_mcp_timestamps.py
│ │ ├── legacy
│ │ │ └── migrate_chroma_to_sqlite.py
│ │ ├── mcp-migration.py
│ │ ├── migrate_sqlite_vec_embeddings.py
│ │ ├── migrate_storage.py
│ │ ├── migrate_tags.py
│ │ ├── migrate_timestamps.py
│ │ ├── migrate_to_cloudflare.py
│ │ ├── migrate_to_sqlite_vec.py
│ │ ├── migrate_v5_enhanced.py
│ │ ├── TIMESTAMP_CLEANUP_README.md
│ │ └── verify_mcp_timestamps.py
│ ├── pr
│ │ ├── amp_collect_results.sh
│ │ ├── amp_detect_breaking_changes.sh
│ │ ├── amp_generate_tests.sh
│ │ ├── amp_pr_review.sh
│ │ ├── amp_quality_gate.sh
│ │ ├── amp_suggest_fixes.sh
│ │ ├── auto_review.sh
│ │ ├── detect_breaking_changes.sh
│ │ ├── generate_tests.sh
│ │ ├── lib
│ │ │ └── graphql_helpers.sh
│ │ ├── quality_gate.sh
│ │ ├── resolve_threads.sh
│ │ ├── run_pyscn_analysis.sh
│ │ ├── run_quality_checks.sh
│ │ ├── thread_status.sh
│ │ └── watch_reviews.sh
│ ├── quality
│ │ ├── fix_dead_code_install.sh
│ │ ├── phase1_dead_code_analysis.md
│ │ ├── phase2_complexity_analysis.md
│ │ ├── README_PHASE1.md
│ │ ├── README_PHASE2.md
│ │ ├── track_pyscn_metrics.sh
│ │ └── weekly_quality_review.sh
│ ├── README.md
│ ├── run
│ │ ├── run_mcp_memory.sh
│ │ ├── run-with-uv.sh
│ │ └── start_sqlite_vec.sh
│ ├── run_memory_server.py
│ ├── server
│ │ ├── check_http_server.py
│ │ ├── check_server_health.py
│ │ ├── memory_offline.py
│ │ ├── preload_models.py
│ │ ├── run_http_server.py
│ │ ├── run_memory_server.py
│ │ ├── start_http_server.bat
│ │ └── start_http_server.sh
│ ├── service
│ │ ├── deploy_dual_services.sh
│ │ ├── install_http_service.sh
│ │ ├── mcp-memory-http.service
│ │ ├── mcp-memory.service
│ │ ├── memory_service_manager.sh
│ │ ├── service_control.sh
│ │ ├── service_utils.py
│ │ └── update_service.sh
│ ├── sync
│ │ ├── check_drift.py
│ │ ├── claude_sync_commands.py
│ │ ├── export_memories.py
│ │ ├── import_memories.py
│ │ ├── litestream
│ │ │ ├── apply_local_changes.sh
│ │ │ ├── enhanced_memory_store.sh
│ │ │ ├── init_staging_db.sh
│ │ │ ├── io.litestream.replication.plist
│ │ │ ├── manual_sync.sh
│ │ │ ├── memory_sync.sh
│ │ │ ├── pull_remote_changes.sh
│ │ │ ├── push_to_remote.sh
│ │ │ ├── README.md
│ │ │ ├── resolve_conflicts.sh
│ │ │ ├── setup_local_litestream.sh
│ │ │ ├── setup_remote_litestream.sh
│ │ │ ├── staging_db_init.sql
│ │ │ ├── stash_local_changes.sh
│ │ │ ├── sync_from_remote_noconfig.sh
│ │ │ └── sync_from_remote.sh
│ │ ├── README.md
│ │ ├── safe_cloudflare_update.sh
│ │ ├── sync_memory_backends.py
│ │ └── sync_now.py
│ ├── testing
│ │ ├── run_complete_test.py
│ │ ├── run_memory_test.sh
│ │ ├── simple_test.py
│ │ ├── test_cleanup_logic.py
│ │ ├── test_cloudflare_backend.py
│ │ ├── test_docker_functionality.py
│ │ ├── test_installation.py
│ │ ├── test_mdns.py
│ │ ├── test_memory_api.py
│ │ ├── test_memory_simple.py
│ │ ├── test_migration.py
│ │ ├── test_search_api.py
│ │ ├── test_sqlite_vec_embeddings.py
│ │ ├── test_sse_events.py
│ │ ├── test-connection.py
│ │ └── test-hook.js
│ ├── utils
│ │ ├── claude_commands_utils.py
│ │ ├── generate_personalized_claude_md.sh
│ │ ├── groq
│ │ ├── groq_agent_bridge.py
│ │ ├── list-collections.py
│ │ ├── memory_wrapper_uv.py
│ │ ├── query_memories.py
│ │ ├── smithery_wrapper.py
│ │ ├── test_groq_bridge.sh
│ │ └── uv_wrapper.py
│ └── validation
│ ├── check_dev_setup.py
│ ├── check_documentation_links.py
│ ├── diagnose_backend_config.py
│ ├── validate_configuration_complete.py
│ ├── validate_memories.py
│ ├── validate_migration.py
│ ├── validate_timestamp_integrity.py
│ ├── verify_environment.py
│ ├── verify_pytorch_windows.py
│ └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│ └── mcp_memory_service
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── operations.py
│ │ ├── sync_wrapper.py
│ │ └── types.py
│ ├── backup
│ │ ├── __init__.py
│ │ └── scheduler.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── ingestion.py
│ │ ├── main.py
│ │ └── utils.py
│ ├── config.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── associations.py
│ │ ├── base.py
│ │ ├── clustering.py
│ │ ├── compression.py
│ │ ├── consolidator.py
│ │ ├── decay.py
│ │ ├── forgetting.py
│ │ ├── health.py
│ │ └── scheduler.py
│ ├── dependency_check.py
│ ├── discovery
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── mdns_service.py
│ ├── embeddings
│ │ ├── __init__.py
│ │ └── onnx_embeddings.py
│ ├── ingestion
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── chunker.py
│ │ ├── csv_loader.py
│ │ ├── json_loader.py
│ │ ├── pdf_loader.py
│ │ ├── registry.py
│ │ ├── semtools_loader.py
│ │ └── text_loader.py
│ ├── lm_studio_compat.py
│ ├── mcp_server.py
│ ├── models
│ │ ├── __init__.py
│ │ └── memory.py
│ ├── server.py
│ ├── services
│ │ ├── __init__.py
│ │ └── memory_service.py
│ ├── storage
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cloudflare.py
│ │ ├── factory.py
│ │ ├── http_client.py
│ │ ├── hybrid.py
│ │ └── sqlite_vec.py
│ ├── sync
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── importer.py
│ │ └── litestream_config.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cache_manager.py
│ │ ├── content_splitter.py
│ │ ├── db_utils.py
│ │ ├── debug.py
│ │ ├── document_processing.py
│ │ ├── gpu_detection.py
│ │ ├── hashing.py
│ │ ├── http_server_manager.py
│ │ ├── port_detection.py
│ │ ├── system_detection.py
│ │ └── time_parser.py
│ └── web
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── analytics.py
│ │ ├── backup.py
│ │ ├── consolidation.py
│ │ ├── documents.py
│ │ ├── events.py
│ │ ├── health.py
│ │ ├── manage.py
│ │ ├── mcp.py
│ │ ├── memories.py
│ │ ├── search.py
│ │ └── sync.py
│ ├── app.py
│ ├── dependencies.py
│ ├── oauth
│ │ ├── __init__.py
│ │ ├── authorization.py
│ │ ├── discovery.py
│ │ ├── middleware.py
│ │ ├── models.py
│ │ ├── registration.py
│ │ └── storage.py
│ ├── sse.py
│ └── static
│ ├── app.js
│ ├── index.html
│ ├── README.md
│ ├── sse_test.html
│ └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── test_compact_types.py
│ │ └── test_operations.py
│ ├── bridge
│ │ ├── mock_responses.js
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ └── test_http_mcp_bridge.js
│ ├── conftest.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_associations.py
│ │ ├── test_clustering.py
│ │ ├── test_compression.py
│ │ ├── test_consolidator.py
│ │ ├── test_decay.py
│ │ └── test_forgetting.py
│ ├── contracts
│ │ └── api-specification.yml
│ ├── integration
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ ├── test_api_key_fallback.py
│ │ ├── test_api_memories_chronological.py
│ │ ├── test_api_tag_time_search.py
│ │ ├── test_api_with_memory_service.py
│ │ ├── test_bridge_integration.js
│ │ ├── test_cli_interfaces.py
│ │ ├── test_cloudflare_connection.py
│ │ ├── test_concurrent_clients.py
│ │ ├── test_data_serialization_consistency.py
│ │ ├── test_http_server_startup.py
│ │ ├── test_mcp_memory.py
│ │ ├── test_mdns_integration.py
│ │ ├── test_oauth_basic_auth.py
│ │ ├── test_oauth_flow.py
│ │ ├── test_server_handlers.py
│ │ └── test_store_memory.py
│ ├── performance
│ │ ├── test_background_sync.py
│ │ └── test_hybrid_live.py
│ ├── README.md
│ ├── smithery
│ │ └── test_smithery.py
│ ├── sqlite
│ │ └── simple_sqlite_vec_test.py
│ ├── test_client.py
│ ├── test_content_splitting.py
│ ├── test_database.py
│ ├── test_hybrid_cloudflare_limits.py
│ ├── test_hybrid_storage.py
│ ├── test_memory_ops.py
│ ├── test_semantic_search.py
│ ├── test_sqlite_vec_storage.py
│ ├── test_time_parser.py
│ ├── test_timestamp_preservation.py
│ ├── timestamp
│ │ ├── test_hook_vs_manual_storage.py
│ │ ├── test_issue99_final_validation.py
│ │ ├── test_search_retrieval_inconsistency.py
│ │ ├── test_timestamp_issue.py
│ │ └── test_timestamp_simple.py
│ └── unit
│ ├── conftest.py
│ ├── test_cloudflare_storage.py
│ ├── test_csv_loader.py
│ ├── test_fastapi_dependencies.py
│ ├── test_import.py
│ ├── test_json_loader.py
│ ├── test_mdns_simple.py
│ ├── test_mdns.py
│ ├── test_memory_service.py
│ ├── test_memory.py
│ ├── test_semtools_loader.py
│ ├── test_storage_interface_compatibility.py
│ └── test_tag_time_filtering.py
├── tools
│ ├── docker
│ │ ├── DEPRECATED.md
│ │ ├── docker-compose.http.yml
│ │ ├── docker-compose.pythonpath.yml
│ │ ├── docker-compose.standalone.yml
│ │ ├── docker-compose.uv.yml
│ │ ├── docker-compose.yml
│ │ ├── docker-entrypoint-persistent.sh
│ │ ├── docker-entrypoint-unified.sh
│ │ ├── docker-entrypoint.sh
│ │ ├── Dockerfile
│ │ ├── Dockerfile.glama
│ │ ├── Dockerfile.slim
│ │ ├── README.md
│ │ └── test-docker-modes.sh
│ └── README.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/src/mcp_memory_service/web/oauth/middleware.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | OAuth 2.1 authentication middleware for MCP Memory Service.
17 |
18 | Provides Bearer token validation with fallback to API key authentication.
19 | """
20 |
21 | import logging
22 | from typing import Optional, Dict, Any
23 | from fastapi import HTTPException, status, Depends
24 | from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
25 | from jose import JWTError, jwt, ExpiredSignatureError
26 | from jose.jwt import JWTClaimsError
27 |
28 | from ...config import (
29 | OAUTH_ISSUER,
30 | API_KEY,
31 | ALLOW_ANONYMOUS_ACCESS,
32 | OAUTH_ENABLED,
33 | get_jwt_algorithm,
34 | get_jwt_verification_key
35 | )
36 | from .storage import oauth_storage
37 |
38 | logger = logging.getLogger(__name__)
39 |
40 | # Optional Bearer token security scheme
41 | bearer_scheme = HTTPBearer(auto_error=False)
42 |
43 |
44 | class AuthenticationResult:
45 | """Result of authentication attempt."""
46 |
47 | def __init__(
48 | self,
49 | authenticated: bool,
50 | client_id: Optional[str] = None,
51 | scope: Optional[str] = None,
52 | auth_method: Optional[str] = None,
53 | error: Optional[str] = None
54 | ):
55 | self.authenticated = authenticated
56 | self.client_id = client_id
57 | self.scope = scope
58 | self.auth_method = auth_method # "oauth", "api_key", or "none"
59 | self.error = error
60 |
61 | def has_scope(self, required_scope: str) -> bool:
62 | """Check if the authenticated user has the required scope."""
63 | if not self.authenticated or not self.scope:
64 | return False
65 |
66 | # Split scopes and check if required scope is present
67 | scopes = self.scope.split()
68 | return required_scope in scopes
69 |
70 | def require_scope(self, required_scope: str) -> None:
71 | """Raise an exception if the required scope is not present."""
72 | if not self.has_scope(required_scope):
73 | raise HTTPException(
74 | status_code=status.HTTP_403_FORBIDDEN,
75 | detail={
76 | "error": "insufficient_scope",
77 | "error_description": f"Required scope '{required_scope}' not granted"
78 | }
79 | )
80 |
81 |
82 | def validate_jwt_token(token: str) -> Optional[Dict[str, Any]]:
83 | """
84 | Validate a JWT access token with comprehensive error handling.
85 |
86 | Supports both RS256 and HS256 algorithms based on available keys.
87 | Provides detailed error logging for debugging purposes.
88 |
89 | Returns:
90 | JWT payload if valid, None if invalid
91 | """
92 | # Input validation
93 | if not token or not isinstance(token, str):
94 | logger.debug("Invalid token: empty or non-string token provided")
95 | return None
96 |
97 | # Basic token format validation
98 | token = token.strip()
99 | if not token:
100 | logger.debug("Invalid token: empty token after stripping")
101 | return None
102 |
103 | # JWT tokens should have 3 parts separated by dots
104 | parts = token.split('.')
105 | if len(parts) != 3:
106 | logger.debug(f"Invalid token format: expected 3 parts, got {len(parts)}")
107 | return None
108 |
109 | try:
110 | algorithm = get_jwt_algorithm()
111 | verification_key = get_jwt_verification_key()
112 |
113 | logger.debug(f"Validating JWT token with algorithm: {algorithm}")
114 | payload = jwt.decode(
115 | token,
116 | verification_key,
117 | algorithms=[algorithm],
118 | issuer=OAUTH_ISSUER,
119 | audience="mcp-memory-service"
120 | )
121 |
122 | # Additional payload validation
123 | required_claims = ['sub', 'iss', 'aud', 'exp', 'iat']
124 | missing_claims = [claim for claim in required_claims if claim not in payload]
125 | if missing_claims:
126 | logger.warning(f"JWT token missing required claims: {missing_claims}")
127 | return None
128 |
129 | logger.debug(f"JWT validation successful for subject: {payload.get('sub')}")
130 | return payload
131 |
132 | except ExpiredSignatureError:
133 | logger.debug("JWT validation failed: token has expired")
134 | return None
135 | except JWTClaimsError as e:
136 | logger.debug(f"JWT validation failed: invalid claims - {e}")
137 | return None
138 | except ValueError as e:
139 | logger.debug(f"JWT validation failed: configuration error - {e}")
140 | return None
141 | except JWTError as e:
142 | # Catch-all for other JWT-related errors
143 | error_type = type(e).__name__
144 | logger.debug(f"JWT validation failed: {error_type} - {e}")
145 | return None
146 | except Exception as e:
147 | # Unexpected errors should be logged but not crash the system
148 | error_type = type(e).__name__
149 | logger.error(f"Unexpected error during JWT validation: {error_type} - {e}")
150 | return None
151 |
152 |
153 | async def authenticate_bearer_token(token: str) -> AuthenticationResult:
154 | """
155 | Authenticate using OAuth Bearer token with comprehensive error handling.
156 |
157 | Returns:
158 | AuthenticationResult with authentication status and details
159 | """
160 | # Input validation
161 | if not token or not isinstance(token, str):
162 | logger.debug("Bearer token authentication failed: invalid token input")
163 | return AuthenticationResult(
164 | authenticated=False,
165 | auth_method="oauth",
166 | error="invalid_token"
167 | )
168 |
169 | token = token.strip()
170 | if not token:
171 | logger.debug("Bearer token authentication failed: empty token")
172 | return AuthenticationResult(
173 | authenticated=False,
174 | auth_method="oauth",
175 | error="invalid_token"
176 | )
177 |
178 | try:
179 | # First, try JWT validation
180 | jwt_payload = validate_jwt_token(token)
181 | if jwt_payload:
182 | client_id = jwt_payload.get("sub")
183 | scope = jwt_payload.get("scope", "")
184 |
185 | # Validate client_id is present
186 | if not client_id:
187 | logger.warning("JWT authentication failed: missing client_id in token payload")
188 | return AuthenticationResult(
189 | authenticated=False,
190 | auth_method="oauth",
191 | error="invalid_token"
192 | )
193 |
194 | logger.debug(f"JWT authentication successful: client_id={client_id}, scope={scope}")
195 | return AuthenticationResult(
196 | authenticated=True,
197 | client_id=client_id,
198 | scope=scope,
199 | auth_method="oauth"
200 | )
201 |
202 | # Fallback: check if token is stored in OAuth storage
203 | token_data = await oauth_storage.get_access_token(token)
204 | if token_data:
205 | client_id = token_data.get("client_id")
206 | if not client_id:
207 | logger.warning("OAuth storage authentication failed: missing client_id in stored token")
208 | return AuthenticationResult(
209 | authenticated=False,
210 | auth_method="oauth",
211 | error="invalid_token"
212 | )
213 |
214 | logger.debug(f"OAuth storage authentication successful: client_id={client_id}")
215 | return AuthenticationResult(
216 | authenticated=True,
217 | client_id=client_id,
218 | scope=token_data.get("scope", ""),
219 | auth_method="oauth"
220 | )
221 |
222 | except Exception as e:
223 | # Catch any unexpected errors during authentication
224 | error_type = type(e).__name__
225 | logger.error(f"Unexpected error during bearer token authentication: {error_type} - {e}")
226 | return AuthenticationResult(
227 | authenticated=False,
228 | auth_method="oauth",
229 | error="server_error"
230 | )
231 |
232 | logger.debug("Bearer token authentication failed: token not found or invalid")
233 | return AuthenticationResult(
234 | authenticated=False,
235 | auth_method="oauth",
236 | error="invalid_token"
237 | )
238 |
239 |
240 | def authenticate_api_key(api_key: str) -> AuthenticationResult:
241 | """
242 | Authenticate using legacy API key with enhanced validation.
243 |
244 | Returns:
245 | AuthenticationResult with authentication status
246 | """
247 | # Input validation
248 | if not api_key or not isinstance(api_key, str):
249 | logger.debug("API key authentication failed: invalid input")
250 | return AuthenticationResult(
251 | authenticated=False,
252 | auth_method="api_key",
253 | error="invalid_api_key"
254 | )
255 |
256 | api_key = api_key.strip()
257 | if not api_key:
258 | logger.debug("API key authentication failed: empty key")
259 | return AuthenticationResult(
260 | authenticated=False,
261 | auth_method="api_key",
262 | error="invalid_api_key"
263 | )
264 |
265 | # Check if API key is configured
266 | if not API_KEY:
267 | logger.debug("API key authentication failed: no API key configured")
268 | return AuthenticationResult(
269 | authenticated=False,
270 | auth_method="api_key",
271 | error="api_key_not_configured"
272 | )
273 |
274 | # Validate API key
275 | if api_key == API_KEY:
276 | logger.debug("API key authentication successful")
277 | return AuthenticationResult(
278 | authenticated=True,
279 | client_id="api_key_client",
280 | scope="read write admin", # API key gets full access
281 | auth_method="api_key"
282 | )
283 |
284 | logger.debug("API key authentication failed: key mismatch")
285 | return AuthenticationResult(
286 | authenticated=False,
287 | auth_method="api_key",
288 | error="invalid_api_key"
289 | )
290 |
291 |
292 | async def get_current_user(
293 | credentials: Optional[HTTPAuthorizationCredentials] = Depends(bearer_scheme)
294 | ) -> AuthenticationResult:
295 | """
296 | Get current authenticated user with fallback authentication methods.
297 |
298 | Tries in order:
299 | 1. OAuth Bearer token (JWT or stored token) - only if OAuth is enabled
300 | 2. Legacy API key authentication
301 | 3. Anonymous access (if explicitly enabled)
302 |
303 | Returns:
304 | AuthenticationResult with authentication details
305 | """
306 | # Try OAuth Bearer token authentication first (only if OAuth is enabled)
307 | if credentials and credentials.scheme.lower() == "bearer":
308 | # OAuth Bearer token validation only if OAuth is enabled
309 | if OAUTH_ENABLED:
310 | auth_result = await authenticate_bearer_token(credentials.credentials)
311 | if auth_result.authenticated:
312 | return auth_result
313 |
314 | # OAuth token provided but invalid - log the attempt
315 | logger.debug(f"OAuth Bearer token validation failed for enabled OAuth system")
316 |
317 | # Try API key authentication as fallback (works regardless of OAuth state)
318 | if API_KEY:
319 | # Some clients might send API key as Bearer token
320 | api_key_result = authenticate_api_key(credentials.credentials)
321 | if api_key_result.authenticated:
322 | return api_key_result
323 |
324 | # Determine appropriate error message based on OAuth state
325 | if OAUTH_ENABLED:
326 | error_msg = "The access token provided is expired, revoked, malformed, or invalid"
327 | logger.warning("Invalid Bearer token provided and API key fallback failed")
328 | else:
329 | error_msg = "OAuth is disabled. Use API key authentication or enable anonymous access."
330 | logger.debug("Bearer token provided but OAuth is disabled, API key fallback failed")
331 |
332 | # All Bearer token authentication methods failed
333 | raise HTTPException(
334 | status_code=status.HTTP_401_UNAUTHORIZED,
335 | detail={
336 | "error": "invalid_token",
337 | "error_description": error_msg
338 | },
339 | headers={"WWW-Authenticate": "Bearer"}
340 | )
341 |
342 | # Allow anonymous access only if explicitly enabled
343 | if ALLOW_ANONYMOUS_ACCESS:
344 | logger.debug("Anonymous access explicitly enabled, granting read-only access")
345 | return AuthenticationResult(
346 | authenticated=True,
347 | client_id="anonymous",
348 | scope="read", # Anonymous users get read-only access for security
349 | auth_method="none"
350 | )
351 |
352 | # No credentials provided and anonymous access not allowed
353 | if API_KEY or OAUTH_ENABLED:
354 | logger.debug("No valid authentication provided")
355 | if OAUTH_ENABLED and API_KEY:
356 | error_msg = "Authorization required. Provide valid OAuth Bearer token or API key."
357 | elif OAUTH_ENABLED:
358 | error_msg = "Authorization required. Provide valid OAuth Bearer token."
359 | else:
360 | error_msg = "Authorization required. Provide valid API key."
361 | else:
362 | logger.debug("No authentication configured and anonymous access disabled")
363 | error_msg = "Authentication is required. Set MCP_ALLOW_ANONYMOUS_ACCESS=true to enable anonymous access."
364 |
365 | raise HTTPException(
366 | status_code=status.HTTP_401_UNAUTHORIZED,
367 | detail={
368 | "error": "authorization_required",
369 | "error_description": error_msg
370 | },
371 | headers={"WWW-Authenticate": "Bearer"}
372 | )
373 |
374 |
375 | # Convenience dependency for requiring specific scopes
376 | def require_scope(scope: str):
377 | """
378 | Create a dependency that requires a specific OAuth scope.
379 |
380 | Usage:
381 | @app.get("/admin", dependencies=[Depends(require_scope("admin"))])
382 | """
383 | async def scope_dependency(user: AuthenticationResult = Depends(get_current_user)):
384 | user.require_scope(scope)
385 | return user
386 |
387 | return scope_dependency
388 |
389 |
390 | # Convenience dependencies for common access patterns
391 | async def require_read_access(user: AuthenticationResult = Depends(get_current_user)) -> AuthenticationResult:
392 | """Require read access to the resource."""
393 | user.require_scope("read")
394 | return user
395 |
396 |
397 | async def require_write_access(user: AuthenticationResult = Depends(get_current_user)) -> AuthenticationResult:
398 | """Require write access to the resource."""
399 | user.require_scope("write")
400 | return user
401 |
402 |
403 | async def require_admin_access(user: AuthenticationResult = Depends(get_current_user)) -> AuthenticationResult:
404 | """Require admin access to the resource."""
405 | user.require_scope("admin")
406 | return user
407 |
408 |
409 | # Optional authentication (for endpoints that work with or without auth)
410 | async def get_optional_user(
411 | credentials: Optional[HTTPAuthorizationCredentials] = Depends(bearer_scheme)
412 | ) -> Optional[AuthenticationResult]:
413 | """
414 | Get current user but don't require authentication.
415 |
416 | Returns:
417 | AuthenticationResult if authenticated, None if not
418 | """
419 | try:
420 | return await get_current_user(credentials)
421 | except HTTPException:
422 | return None
```
--------------------------------------------------------------------------------
/scripts/testing/test_installation.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | # Copyright 2024 Heinrich Krupp
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """
17 | Installation Verification Script for MCP Memory Service.
18 |
19 | This script tests all critical components of MCP Memory Service to verify
20 | that the installation is working correctly on the current platform.
21 | """
22 | import os
23 | import sys
24 | import platform
25 | import subprocess
26 | import traceback
27 | import importlib
28 | from pathlib import Path
29 |
30 | # ANSI color codes for terminal output
31 | GREEN = "\033[92m"
32 | YELLOW = "\033[93m"
33 | RED = "\033[91m"
34 | RESET = "\033[0m"
35 | BOLD = "\033[1m"
36 |
37 | def print_header(text):
38 | """Print a formatted header."""
39 | print(f"\n{BOLD}{'=' * 80}{RESET}")
40 | print(f"{BOLD} {text}{RESET}")
41 | print(f"{BOLD}{'=' * 80}{RESET}")
42 |
43 | def print_success(text):
44 | """Print a success message."""
45 | print(f"{GREEN}✅ {text}{RESET}")
46 |
47 | def print_warning(text):
48 | """Print a warning message."""
49 | print(f"{YELLOW}⚠️ {text}{RESET}")
50 |
51 | def print_error(text):
52 | """Print an error message."""
53 | print(f"{RED}❌ {text}{RESET}")
54 |
55 | def print_info(text):
56 | """Print an info message."""
57 | print(f"➔ {text}")
58 |
59 | def check_python_version():
60 | """Check if Python version is compatible."""
61 | print_info(f"Python version: {sys.version}")
62 | major, minor, _ = platform.python_version_tuple()
63 | major, minor = int(major), int(minor)
64 |
65 | if major < 3 or (major == 3 and minor < 10):
66 | print_error(f"Python version {major}.{minor} is too old. MCP Memory Service requires Python 3.10+")
67 | return False
68 | else:
69 | print_success(f"Python version {major}.{minor} is compatible")
70 | return True
71 |
72 | def check_dependencies():
73 | """Check if all required dependencies are installed and compatible."""
74 | required_packages = [
75 | "torch",
76 | "sentence_transformers",
77 | "chromadb",
78 | "mcp",
79 | "websockets",
80 | "numpy"
81 | ]
82 |
83 | success = True
84 |
85 | for package in required_packages:
86 | try:
87 | module = importlib.import_module(package)
88 | if hasattr(module, "__version__"):
89 | print_success(f"{package} is installed (version: {module.__version__})")
90 | else:
91 | print_success(f"{package} is installed")
92 |
93 | # Specific checks for critical packages
94 | if package == "torch":
95 | # Check PyTorch on different platforms
96 | check_torch_compatibility()
97 | elif package == "sentence_transformers":
98 | # Check sentence-transformers compatibility
99 | check_sentence_transformers_compatibility()
100 | elif package == "chromadb":
101 | # Check ChromaDB
102 | check_chromadb()
103 |
104 | except ImportError:
105 | print_error(f"{package} is not installed")
106 | success = False
107 | except Exception as e:
108 | print_error(f"Error checking {package}: {str(e)}")
109 | success = False
110 |
111 | return success
112 |
113 | def check_torch_compatibility():
114 | """Check if PyTorch is compatible with the system."""
115 | import torch
116 |
117 | # Get system info
118 | system = platform.system().lower()
119 | machine = platform.machine().lower()
120 | is_windows = system == "windows"
121 | is_macos = system == "darwin"
122 | is_linux = system == "linux"
123 | is_arm = machine in ("arm64", "aarch64")
124 | is_x86 = machine in ("x86_64", "amd64", "x64")
125 |
126 | # Display torch info
127 | print_info(f"PyTorch version: {torch.__version__}")
128 |
129 | # Check CUDA availability
130 | if torch.cuda.is_available():
131 | device_count = torch.cuda.device_count()
132 | device_name = torch.cuda.get_device_name(0) if device_count > 0 else "Unknown"
133 | print_success(f"CUDA is available (version: {torch.version.cuda})")
134 | print_info(f"GPU Device: {device_name}")
135 | print_info(f"Device Count: {device_count}")
136 | # Check MPS availability (Apple Silicon)
137 | elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
138 | print_success("MPS (Metal Performance Shaders) is available")
139 | if not torch.backends.mps.is_built():
140 | print_warning("PyTorch is not built with MPS support")
141 | # Check DirectML (Windows)
142 | elif is_windows:
143 | try:
144 | import torch_directml
145 | print_success(f"DirectML is available (version: {torch_directml.__version__})")
146 | except ImportError:
147 | print_info("DirectML is not available, using CPU only")
148 | else:
149 | print_info("Using CPU only")
150 |
151 | # Special check for macOS Intel
152 | if is_macos and is_x86:
153 | torch_version = [int(x) for x in torch.__version__.split('.')[:2]]
154 |
155 | if torch_version[0] == 2 and torch_version[1] == 0:
156 | print_success("PyTorch 2.0.x detected, which is optimal for macOS Intel")
157 | elif torch_version[0] == 1 and torch_version[1] >= 13:
158 | print_success("PyTorch 1.13.x detected, which is compatible for macOS Intel")
159 | elif torch_version[0] == 1 and torch_version[1] < 11:
160 | print_warning("PyTorch version is below 1.11.0, which may be too old for sentence-transformers")
161 | elif torch_version[0] > 2 or (torch_version[0] == 2 and torch_version[1] > 0):
162 | print_warning("PyTorch version is newer than 2.0.x, which may have compatibility issues on macOS Intel")
163 |
164 | def check_sentence_transformers_compatibility():
165 | """Check if sentence-transformers is compatible with the system and PyTorch."""
166 | import torch
167 | import sentence_transformers
168 |
169 | # Check compatibility
170 | torch_version = [int(x) for x in torch.__version__.split('.')[:2]]
171 | st_version = [int(x) for x in sentence_transformers.__version__.split('.')[:2]]
172 |
173 | print_info(f"sentence-transformers version: {sentence_transformers.__version__}")
174 |
175 | # Critical compatibility check
176 | system = platform.system().lower()
177 | machine = platform.machine().lower()
178 | is_macos = system == "darwin"
179 | is_x86 = machine in ("x86_64", "amd64", "x64")
180 |
181 | if is_macos and is_x86:
182 | if st_version[0] >= 3 and (torch_version[0] < 1 or (torch_version[0] == 1 and torch_version[1] < 11)):
183 | print_error("Incompatible versions: sentence-transformers 3.x+ requires torch>=1.11.0")
184 | return False
185 | elif st_version[0] == 2 and st_version[1] == 2 and (torch_version[0] == 2 and torch_version[1] == 0):
186 | print_success("Optimal combination: sentence-transformers 2.2.x with torch 2.0.x")
187 | elif st_version[0] == 2 and st_version[1] == 2 and (torch_version[0] == 1 and torch_version[1] >= 13):
188 | print_success("Compatible combination: sentence-transformers 2.2.x with torch 1.13.x")
189 | else:
190 | print_warning("Untested version combination. May work but not officially supported.")
191 |
192 | # Test sentence-transformers with a small model
193 | try:
194 | print_info("Testing model loading (paraphrase-MiniLM-L3-v2)...")
195 | start_time = __import__('time').time()
196 | model = sentence_transformers.SentenceTransformer('paraphrase-MiniLM-L3-v2')
197 | load_time = __import__('time').time() - start_time
198 | print_success(f"Model loaded successfully in {load_time:.2f}s")
199 |
200 | # Test encoding
201 | print_info("Testing encoding...")
202 | start_time = __import__('time').time()
203 | _ = model.encode("This is a test sentence")
204 | encode_time = __import__('time').time() - start_time
205 | print_success(f"Encoding successful in {encode_time:.2f}s")
206 |
207 | return True
208 | except Exception as e:
209 | print_error(f"Error testing sentence-transformers: {str(e)}")
210 | print(traceback.format_exc())
211 | return False
212 |
213 | def check_chromadb():
214 | """Check if ChromaDB works correctly."""
215 | import chromadb
216 |
217 | print_info(f"ChromaDB version: {chromadb.__version__}")
218 |
219 | # Test in-memory client
220 | try:
221 | print_info("Testing in-memory ChromaDB client...")
222 | client = chromadb.Client()
223 | collection = client.create_collection("test_collection")
224 | collection.add(
225 | documents=["This is a test document"],
226 | metadatas=[{"source": "test"}],
227 | ids=["id1"]
228 | )
229 | results = collection.query(
230 | query_texts=["test document"],
231 | n_results=1
232 | )
233 |
234 | if results and len(results["ids"]) > 0:
235 | print_success("ChromaDB in-memory test successful")
236 | else:
237 | print_warning("ChromaDB query returned empty results")
238 |
239 | return True
240 | except Exception as e:
241 | print_error(f"Error testing ChromaDB: {str(e)}")
242 | print(traceback.format_exc())
243 | return False
244 |
245 | def check_mcp_protocol():
246 | """Check if MCP protocol handler is working correctly."""
247 | try:
248 | import mcp
249 | from mcp.types import TextContent
250 | from mcp.server import Server
251 |
252 | print_info(f"MCP version: {mcp.__version__}")
253 |
254 | # Basic protocol functionality check
255 | server = Server("test_server")
256 |
257 | # Check if we can register handlers
258 | @server.list_tools()
259 | async def handle_list_tools():
260 | return []
261 |
262 | print_success("MCP protocol handler initialized successfully")
263 | return True
264 | except Exception as e:
265 | print_error(f"Error testing MCP protocol: {str(e)}")
266 | return False
267 |
268 | def check_memory_service_installation():
269 | """Check if the MCP Memory Service package is installed correctly."""
270 | try:
271 | from mcp_memory_service import __file__ as package_path
272 | print_success(f"MCP Memory Service installed at: {package_path}")
273 |
274 | # Check if important modules are importable
275 | from mcp_memory_service.storage.chroma import ChromaMemoryStorage
276 | from mcp_memory_service.models.memory import Memory
277 | from mcp_memory_service.utils.time_parser import parse_time_expression
278 |
279 | print_success("All required MCP Memory Service modules imported successfully")
280 | return True
281 | except ImportError:
282 | print_error("MCP Memory Service package is not installed or importable")
283 | return False
284 | except Exception as e:
285 | print_error(f"Error importing MCP Memory Service modules: {str(e)}")
286 | return False
287 |
288 | def check_system_paths():
289 | """Check if system paths are set up correctly."""
290 | print_info(f"System: {platform.system()} {platform.release()}")
291 | print_info(f"Architecture: {platform.machine()}")
292 | print_info(f"Python executable: {sys.executable}")
293 |
294 | # Check virtual environment
295 | in_venv = sys.prefix != sys.base_prefix
296 | if in_venv:
297 | print_success(f"Running in virtual environment: {sys.prefix}")
298 | else:
299 | print_warning("Not running in a virtual environment")
300 |
301 | # Check if 'memory' command is in PATH
302 | try:
303 | memory_cmd = subprocess.check_output(
304 | ["which", "memory"] if platform.system() != "Windows" else ["where", "memory"],
305 | stderr=subprocess.PIPE,
306 | text=True
307 | ).strip()
308 | print_success(f"'memory' command found at: {memory_cmd}")
309 | except subprocess.SubprocessError:
310 | print_warning("'memory' command not found in PATH")
311 |
312 | # Check for ChromaDB and backup paths
313 | chroma_path = os.environ.get("MCP_MEMORY_CHROMA_PATH")
314 | backups_path = os.environ.get("MCP_MEMORY_BACKUPS_PATH")
315 |
316 | if chroma_path:
317 | print_info(f"ChromaDB path: {chroma_path}")
318 | path = Path(chroma_path)
319 | if path.exists():
320 | print_success("ChromaDB path exists")
321 | else:
322 | print_warning("ChromaDB path does not exist yet")
323 | else:
324 | print_info("ChromaDB path not set in environment")
325 |
326 | if backups_path:
327 | print_info(f"Backups path: {backups_path}")
328 | path = Path(backups_path)
329 | if path.exists():
330 | print_success("Backups path exists")
331 | else:
332 | print_warning("Backups path does not exist yet")
333 | else:
334 | print_info("Backups path not set in environment")
335 |
336 | return True
337 |
338 | def check_torch_operations():
339 | """Perform basic PyTorch operations to verify functionality."""
340 | try:
341 | import torch
342 |
343 | # Create a simple tensor
344 | print_info("Creating and manipulating tensors...")
345 | x = torch.rand(5, 3)
346 | y = torch.rand(5, 3)
347 | z = x + y
348 |
349 | # Try a basic neural network
350 | from torch import nn
351 |
352 | class SimpleNet(nn.Module):
353 | def __init__(self):
354 | super().__init__()
355 | self.fc1 = nn.Linear(10, 5)
356 | self.fc2 = nn.Linear(5, 2)
357 |
358 | def forward(self, x):
359 | x = torch.relu(self.fc1(x))
360 | x = self.fc2(x)
361 | return x
362 |
363 | model = SimpleNet()
364 | input_tensor = torch.rand(1, 10)
365 | output = model(input_tensor)
366 |
367 | print_success("PyTorch operations completed successfully")
368 | return True
369 | except Exception as e:
370 | print_error(f"Error in PyTorch operations: {str(e)}")
371 | return False
372 |
373 | def run_verification():
374 | """Run all verification tests."""
375 | print_header("MCP Memory Service Installation Verification")
376 |
377 | # Track overall success
378 | success = True
379 |
380 | # Check Python version
381 | print_header("1. Python Environment")
382 | if not check_python_version():
383 | success = False
384 |
385 | check_system_paths()
386 |
387 | # Check dependencies
388 | print_header("2. Dependency Verification")
389 | if not check_dependencies():
390 | success = False
391 |
392 | # Check MCP Memory Service
393 | print_header("3. MCP Memory Service Installation")
394 | if not check_memory_service_installation():
395 | success = False
396 |
397 | if not check_mcp_protocol():
398 | success = False
399 |
400 | # Check PyTorch operations
401 | print_header("4. PyTorch Operations")
402 | if not check_torch_operations():
403 | success = False
404 |
405 | # Overall result
406 | print_header("Verification Results")
407 | if success:
408 | print_success("All verification tests passed! The installation appears to be working correctly.")
409 | else:
410 | print_warning("Some verification tests failed. Check the errors above for details.")
411 |
412 | return success
413 |
414 | if __name__ == "__main__":
415 | success = run_verification()
416 | sys.exit(0 if success else 1)
```
--------------------------------------------------------------------------------
/Development-Sprint-November-2025.md:
--------------------------------------------------------------------------------
```markdown
1 | # Development Sprint - November 2025
2 |
3 | **Two Weeks. Seven Releases. Extraordinary Results.**
4 |
5 | Between November 12-26, 2025, the MCP Memory Service project achieved a remarkable development sprint combining performance breakthroughs, code quality milestones, and workflow automation at unprecedented speed.
6 |
7 | ---
8 |
9 | ## 📊 Sprint Overview
10 |
11 | | Metric | Achievement |
12 | |--------|-------------|
13 | | **Releases Shipped** | 7 major/minor versions |
14 | | **Performance Gains** | 10x to 534,628x improvements |
15 | | **Code Quality** | Grade D → Grade B (68-72/100) |
16 | | **Fastest Release Cycle** | 35 minutes (issue → production) |
17 | | **Lines of Duplicate Code Eliminated** | 176-186 lines |
18 | | **Critical Bugs Prevented** | 2 (caught by AI review) |
19 |
20 | ---
21 |
22 | ## 🚀 Performance Breakthroughs
23 |
24 | ### v8.39.0 - Storage-Layer Date-Range Filtering (Nov 26)
25 | **10x performance improvement** by moving analytics queries from application layer to database layer.
26 |
27 | #### The Problem
28 | Analytics endpoints were fetching ALL memories (10,000+) into Python, then filtering by date range in application code:
29 | ```python
30 | # Old approach - inefficient
31 | memories = await storage.get_all_memories(limit=10000)
32 | for memory in memories:
33 | if start_time <= memory.created_at <= end_time:
34 | # Process memory
35 | ```
36 |
37 | #### The Solution
38 | Push filtering to SQL database layer with indexed WHERE clauses:
39 | ```python
40 | # New approach - 10x faster
41 | async def get_memories_by_time_range(self, start_time: float, end_time: float):
42 | sql = """
43 | SELECT m.*
44 | FROM memories m
45 | WHERE m.created_at BETWEEN ? AND ?
46 | ORDER BY m.created_at DESC
47 | """
48 | # Database handles filtering with indexes
49 | ```
50 |
51 | #### Performance Impact
52 | | Backend | Before | After | Improvement |
53 | |---------|--------|-------|-------------|
54 | | **SQLite-vec** | ~500ms | ~50ms | **10x faster** |
55 | | **Cloudflare D1** | ~2-3s | ~200ms | **10-15x faster** |
56 | | **Data Transfer** | 50MB | 1.5MB | **97% reduction** |
57 |
58 | **Scalability**: Now handles databases with unlimited memories efficiently (previously hard-limited to 10,000).
59 |
60 | **Development Speed**: Issue #238 → Production release in **35 minutes** using automated workflows.
61 |
62 | ---
63 |
64 | ### v8.26.0 - MCP Global Caching Breakthrough (Nov 16)
65 | **MCP tools transformed from slowest to FASTEST** method for memory operations.
66 |
67 | #### Revolutionary Achievement
68 | **534,628x speedup** on cache hits - the most dramatic performance improvement in project history.
69 |
70 | #### Before v8.26.0
71 | - MCP Tools: ~1,810ms (slowest method)
72 | - HTTP API: ~479ms (fastest method)
73 |
74 | #### After v8.26.0
75 | - **MCP Tools (cached)**: ~0.01ms ← **NEW FASTEST**
76 | - MCP Tools (first call): ~2,485ms (one-time cost)
77 | - HTTP API: ~479ms
78 |
79 | #### Technical Implementation
80 | Created `CacheManager` class with global storage/service caching:
81 |
82 | ```python
83 | # Module-level cache persists across HTTP calls
84 | _storage_cache: Dict[str, Any] = {}
85 | _memory_service_cache: Dict[str, MemoryService] = {}
86 |
87 | async def get_or_create_storage(backend: str, path: str):
88 | cache_key = f"{backend}:{path}"
89 | if cache_key not in _storage_cache:
90 | _storage_cache[cache_key] = await create_storage(backend, path)
91 | return _storage_cache[cache_key]
92 | ```
93 |
94 | #### Real-World Results
95 | - **90%+ cache hit rate** in production
96 | - **41x faster than HTTP API** after warm-up
97 | - **99.9996% latency reduction** on cached operations
98 |
99 | **Impact**: Sub-millisecond response times transform the user experience for Claude Desktop and Claude Code users.
100 |
101 | ---
102 |
103 | ## 🎯 Code Quality Journey: Grade D → Grade B
104 |
105 | ### Three-Release Sprint (Nov 22-24)
106 | Achieved **100% of Phase 2 complexity reduction targets** across three coordinated releases.
107 |
108 | #### v8.34.0 - First Function (Nov 22)
109 | **40 minutes**: Analysis → PR → Review → Merge → Release
110 |
111 | - `analytics.py::get_memory_growth()` complexity: 11 → 6-7 (-4 to -5 points)
112 | - Pattern: PeriodType Enum + data-driven approach
113 | - gemini-pr-automator: 3 review iterations, exceeded target
114 |
115 | #### v8.35.0 - Batch 1 High Priority (Nov 24)
116 | **45 minutes**: 2 high-priority functions
117 |
118 | - `install.py::configure_paths()` 15 → 5 (**-10 points**)
119 | - Extracted 4 helpers: `get_platform_base_dir()`, `setup_storage_directories()`, `build_mcp_env_config()`, `update_claude_config_file()`
120 | - `cloudflare.py::_search_by_tags_internal()` 13 → 8 (-5 points)
121 | - Extracted 3 helpers for tag normalization and query building
122 |
123 | #### v8.36.0 - Completion (Nov 24)
124 | **60 minutes**: Remaining 7 functions (100% complete!)
125 |
126 | - **2 consolidation functions** (-8 points): Context managers + config-driven patterns
127 | - **3 analytics functions** (-8 points): 70+ lines extracted
128 | - **1 GPU detection** (-2 points): Platform-specific checks unified
129 | - **1 Cloudflare helper** (-1 point): Timestamp fetching
130 |
131 | **CRITICAL**: Gemini Code Assist caught 2 bugs before release:
132 | 1. ❌→✅ Timezone bug: `datetime.now()` → `datetime.now(timezone.utc)` (would have caused incorrect consolidation timestamps)
133 | 2. ❌→✅ Analytics double-counting: Fixed total_memories calculation (would have shown incorrect percentages)
134 |
135 | #### Final Metrics - 100% Achievement
136 |
137 | | Metric | Target | Achieved | Result |
138 | |--------|--------|----------|--------|
139 | | Functions Refactored | 10 | 10 | ✅ 100% |
140 | | Complexity Points Reduced | -39 | -39 | ✅ 100% |
141 | | Complexity Score Gain | +10 | +11 | ✅ 110% |
142 | | Health Score | 66-70 | 68-72 | ✅ **Grade B** |
143 |
144 | **Before Phase 2**: Health 63/100 (Grade D)
145 | **After Phase 2**: Health 68-72/100 (Grade B) ← **Full grade improvement**
146 |
147 | ---
148 |
149 | ### v8.38.0 - Phase 2b Duplication Reduction (Nov 25)
150 | **176-186 lines of duplicate code eliminated** across 10 consolidation commits.
151 |
152 | #### Helper Extraction Pattern
153 | Consistently applied methodology across all consolidations:
154 |
155 | ```python
156 | def _helper_function_name(param1, param2, optional=None):
157 | """
158 | Brief description of consolidation purpose.
159 |
160 | Args:
161 | param1: Varying parameter between original blocks
162 | param2: Another variation point
163 | optional: Optional parameter with sensible default
164 |
165 | Returns:
166 | Result type
167 | """
168 | # Consolidated logic with parameterized differences
169 | pass
170 | ```
171 |
172 | #### Key Consolidations
173 | 1. **`parse_mcp_response()`** - MCP protocol error handling (3 blocks, 47 lines)
174 | 2. **`_get_or_create_memory_service()`** - Two-tier cache management (3 blocks, 65 lines)
175 | 3. **`_calculate_season_date_range()`** - Winter boundary logic (2 blocks, 24 lines)
176 | 4. **`_process_and_store_chunk()`** - Document processing (3 blocks, ~40-50 lines)
177 |
178 | #### Strategic Decisions
179 | **4 groups intentionally deferred** with documented rationale:
180 | - High-risk backend logic (60 lines, critical startup code)
181 | - Different semantic contexts (error handling patterns)
182 | - Low-priority test/script duplication
183 |
184 | **Key Insight**: Quality over arbitrary metrics - pursuing <3% duplication target would require high-risk, low-benefit consolidations.
185 |
186 | #### Results
187 | - **Duplication**: 5.5% → 4.5-4.7% (approaching <3% target)
188 | - **Test Coverage**: 100% maintained throughout
189 | - **Breaking Changes**: Zero - complete backward compatibility
190 |
191 | ---
192 |
193 | ## 🤖 AI-Assisted Development Workflow
194 |
195 | ### Agent Ecosystem
196 | Three specialized agents orchestrated the development workflow:
197 |
198 | #### 1. github-release-manager
199 | **Complete release automation** - Zero manual steps
200 |
201 | **Workflow**:
202 | 1. Four-file version bump (\_\_init\_\_.py, pyproject.toml, README.md, uv.lock)
203 | 2. CHANGELOG.md updates with detailed metrics
204 | 3. Git operations (commit, tag, push)
205 | 4. GitHub Release creation with release notes
206 | 5. CI/CD verification (Docker Publish, PyPI Publish, HTTP-MCP Bridge)
207 |
208 | **Impact**: 3 complete releases in Phase 2 sprint with consistent documentation quality.
209 |
210 | #### 2. gemini-pr-automator
211 | **Automated PR review cycles** - Eliminates "Wait 1min → /gemini review" loops
212 |
213 | **Features**:
214 | - Automated Gemini Code Assist review iteration
215 | - Breaking change detection
216 | - Test generation for new code
217 | - Quality gate checks
218 |
219 | **v8.36.0 Example**:
220 | - 5 review iterations
221 | - Caught 2 CRITICAL bugs before release
222 | - Saved 2-3 hours of manual review
223 |
224 | **Time Savings**: 10-30 minutes per PR across 9 total review iterations in Phase 2.
225 |
226 | #### 3. amp-bridge
227 | **Complete code generation** - Not just analysis
228 |
229 | **Usage**:
230 | - Provided full implementations (not just suggestions)
231 | - Zero syntax errors in generated code
232 | - Strategic token conservation (~50-60K tokens saved)
233 |
234 | **User Feedback**: "way faster than claude code"
235 |
236 | ---
237 |
238 | ## 📈 Development Velocity Metrics
239 |
240 | ### Release Cycle Times
241 |
242 | | Release | Date | Development Time | Notable |
243 | |---------|------|------------------|---------|
244 | | **v8.39.0** | Nov 26 | **35 minutes** | Issue → Production (fastest ever) |
245 | | v8.38.0 | Nov 25 | ~90 minutes | 10 consolidation commits |
246 | | v8.36.0 | Nov 24 | 60 minutes | 7 functions, 2 critical bugs caught |
247 | | v8.35.0 | Nov 24 | 45 minutes | 2 high-priority functions |
248 | | v8.34.0 | Nov 22 | 40 minutes | First Phase 2 function |
249 |
250 | ### Phase 2 Complete Sprint
251 | **Total Time**: ~4 hours across 3 days for 10-function refactoring
252 | **vs Manual Estimate**: 8-12 hours
253 | **Time Savings**: 50-67% with AI agents
254 |
255 | ### Critical Bug Prevention
256 | **2 bugs caught by Gemini Code Assist before release**:
257 | - Timezone handling in consolidation scheduler
258 | - Analytics calculation errors
259 |
260 | **Impact**: Would have required emergency hotfixes if shipped to production.
261 |
262 | ---
263 |
264 | ## 🔧 Technical Patterns Established
265 |
266 | ### 1. Database-Layer Filtering
267 | **Pattern**: Push filtering to SQL WHERE clauses instead of application code
268 | ```python
269 | # Bad: Application-layer filtering
270 | memories = await get_all_memories(limit=10000)
271 | filtered = [m for m in memories if start <= m.created_at <= end]
272 |
273 | # Good: Database-layer filtering
274 | memories = await get_memories_by_time_range(start, end)
275 | ```
276 | **Benefit**: 10x performance, leverages indexes, scales to unlimited data
277 |
278 | ### 2. Global Caching Strategy
279 | **Pattern**: Module-level cache dictionaries for stateless HTTP environments
280 | ```python
281 | _cache: Dict[str, Any] = {}
282 |
283 | def get_or_create(key: str):
284 | if key not in _cache:
285 | _cache[key] = create_expensive_resource()
286 | return _cache[key]
287 | ```
288 | **Benefit**: 534,628x speedup, 90%+ hit rate, sub-millisecond response
289 |
290 | ### 3. Helper Extraction for Duplication
291 | **Pattern**: Parameterize differences, extract to helper function
292 | ```python
293 | # Before: 3 duplicate blocks
294 | # After: 1 helper function with 3 callers
295 | def _helper(varying_param, optional=default):
296 | # Consolidated logic
297 | pass
298 | ```
299 | **Benefit**: 176-186 lines eliminated, improved maintainability
300 |
301 | ### 4. Configuration-Driven Logic
302 | **Pattern**: Replace if/elif chains with dictionary lookups
303 | ```python
304 | # Before
305 | if horizon == 'daily':
306 | days = 1
307 | elif horizon == 'weekly':
308 | days = 7
309 | # ... more elif
310 |
311 | # After
312 | HORIZON_CONFIGS = {
313 | 'daily': {'days': 1, ...},
314 | 'weekly': {'days': 7, ...},
315 | }
316 | config = HORIZON_CONFIGS[horizon]
317 | ```
318 | **Benefit**: Reduced complexity, easier to extend, config-as-data
319 |
320 | ---
321 |
322 | ## 📚 Key Lessons Learned
323 |
324 | ### What Worked Excellently
325 |
326 | 1. **Agent-First Approach**
327 | - Using specialized agents (amp-bridge, github-release-manager, gemini-pr-automator) dramatically improved efficiency
328 | - 50-67% time savings vs manual workflows
329 |
330 | 2. **Small Batch Releases**
331 | - v8.34.0 (1 function) had deepest review quality
332 | - Easier to reason about changes, faster iteration
333 |
334 | 3. **Gemini Code Assist Integration**
335 | - Caught 2 critical bugs before release
336 | - Provided portability fixes and API modernization suggestions
337 | - Iterative review cycles improved code quality
338 |
339 | 4. **Pattern Consistency**
340 | - Establishing helper extraction pattern early made subsequent work systematic
341 | - 10 consolidation commits followed same methodology
342 |
343 | ### Process Improvements Demonstrated
344 |
345 | 1. **Token Conservation**
346 | - Strategic use of amp-bridge for heavy work saved ~50-60K tokens
347 | - Allowed more complex work within context limits
348 |
349 | 2. **Quality Over Metrics**
350 | - Deferring high-risk groups showed mature engineering judgment
351 | - Grade B achieved without compromising stability
352 |
353 | 3. **Release Automation**
354 | - github-release-manager ensured no documentation steps missed
355 | - Consistent release quality across 7 versions
356 |
357 | 4. **Test Coverage**
358 | - 100% coverage throughout maintained confidence in changes
359 | - All changes backward compatible (zero breaking changes)
360 |
361 | ---
362 |
363 | ## 🎉 Sprint Highlights
364 |
365 | ### By The Numbers
366 | - **7 releases** in 14 days
367 | - **10x to 534,628x** performance improvements
368 | - **35-minute** fastest release cycle
369 | - **176-186 lines** of duplicate code eliminated
370 | - **Grade D → Grade B** health score improvement
371 | - **2 critical bugs** prevented before release
372 | - **50-67% time savings** with AI agents
373 | - **100% test coverage** maintained
374 | - **0 breaking changes** across all releases
375 |
376 | ### Most Impressive Achievement
377 | **v8.39.0 in 35 minutes**: From issue analysis (#238) to production release with 10x performance improvement, comprehensive tests, and full documentation - all in half an hour.
378 |
379 | ### Innovation Breakthrough
380 | **MCP Global Caching (v8.26.0)**: Transformed MCP tools from slowest (1,810ms) to fastest (0.01ms) method - a 534,628x improvement that sets new standards for MCP server performance.
381 |
382 | ### Quality Milestone
383 | **Phase 2 Complete (v8.34-36)**: Achieved 100% of complexity reduction targets across three coordinated releases in 4 hours, with AI code review catching critical bugs before production.
384 |
385 | ---
386 |
387 | ## 🔮 Future Implications
388 |
389 | ### Performance Standards
390 | - Database-layer filtering now standard for all analytics endpoints
391 | - Global caching pattern applicable to all stateless HTTP environments
392 | - Sub-millisecond response times set user experience baseline
393 |
394 | ### Code Quality Foundation
395 | - Helper extraction pattern established for future consolidations
396 | - Configuration-driven logic reduces complexity systematically
397 | - 100% test coverage requirement proven sustainable
398 |
399 | ### Development Velocity
400 | - 35-minute release cycles achievable with agent automation
401 | - AI code review preventing bugs before production
402 | - Agent-first workflows becoming default approach
403 |
404 | ---
405 |
406 | ## 📖 Related Resources
407 |
408 | **GitHub Releases**:
409 | - [v8.39.0 - Storage-Layer Date-Range Filtering](https://github.com/doobidoo/mcp-memory-service/releases/tag/v8.39.0)
410 | - [v8.38.0 - Phase 2b Duplication Reduction](https://github.com/doobidoo/mcp-memory-service/releases/tag/v8.38.0)
411 | - [v8.36.0 - Phase 2 Complete](https://github.com/doobidoo/mcp-memory-service/releases/tag/v8.36.0)
412 | - [v8.26.0 - MCP Global Caching](https://github.com/doobidoo/mcp-memory-service/releases/tag/v8.26.0)
413 |
414 | **Project Repository**: https://github.com/doobidoo/mcp-memory-service
415 |
416 | **Issues**:
417 | - [#238 - Analytics Performance Optimization](https://github.com/doobidoo/mcp-memory-service/issues/238)
418 | - [#240 - Phase 2 Code Quality](https://github.com/doobidoo/mcp-memory-service/issues/240)
419 | - [#246 - Phase 2b Duplication Reduction](https://github.com/doobidoo/mcp-memory-service/issues/246)
420 |
421 | ---
422 |
423 | **Last Updated**: November 26, 2025
424 | **Sprint Duration**: November 12-26, 2025 (14 days)
425 | **Total Releases**: 7 major/minor versions
426 |
```
--------------------------------------------------------------------------------
/docs/api/tag-standardization.md:
--------------------------------------------------------------------------------
```markdown
1 | # Tag Standardization Guide
2 |
3 | A comprehensive guide to creating and maintaining a consistent, professional tag system for optimal knowledge organization in the MCP Memory Service.
4 |
5 | ## 🎯 Overview
6 |
7 | Effective tag standardization is the foundation of a powerful knowledge management system. This guide establishes proven tag schemas, naming conventions, and organizational patterns that transform chaotic information into searchable, structured knowledge.
8 |
9 | ## 📋 Core Principles
10 |
11 | ### 1. Consistency
12 | - Use standardized naming conventions
13 | - Apply tags systematically across similar content
14 | - Maintain format consistency (lowercase, hyphens, etc.)
15 |
16 | ### 2. Hierarchy
17 | - Organize tags from general to specific
18 | - Use multiple category levels for comprehensive organization
19 | - Create logical groupings that reflect actual usage patterns
20 |
21 | ### 3. Utility
22 | - Tags should enhance discoverability
23 | - Focus on how information will be retrieved
24 | - Balance detail with practical searchability
25 |
26 | ### 4. Evolution
27 | - Tag schemas should adapt to changing needs
28 | - Regular review and refinement process
29 | - Documentation of changes and rationale
30 |
31 | ## 🏷️ Standardized Tag Schema
32 |
33 | ### Category 1: Projects & Repositories
34 |
35 | **Primary Projects:**
36 | ```
37 | mcp-memory-service # Core memory service development
38 | memory-dashboard # Dashboard application
39 | github-integration # GitHub connectivity and automation
40 | mcp-protocol # Protocol-level development
41 | cloudflare-workers # Edge computing integration
42 | ```
43 |
44 | **Project Components:**
45 | ```
46 | frontend # User interface components
47 | backend # Server-side development
48 | api # API design and implementation
49 | database # Data storage and management
50 | infrastructure # Deployment and DevOps
51 | ```
52 |
53 | **Usage Example:**
54 | ```javascript
55 | {
56 | "tags": ["mcp-memory-service", "backend", "database", "chromadb"]
57 | }
58 | ```
59 |
60 | ### Category 2: Technologies & Tools
61 |
62 | **Programming Languages:**
63 | ```
64 | python # Python development
65 | typescript # TypeScript development
66 | javascript # JavaScript development
67 | bash # Shell scripting
68 | sql # Database queries
69 | ```
70 |
71 | **Frameworks & Libraries:**
72 | ```
73 | react # React development
74 | fastapi # FastAPI framework
75 | chromadb # ChromaDB vector database
76 | sentence-transformers # Embedding models
77 | pytest # Testing framework
78 | ```
79 |
80 | **Tools & Platforms:**
81 | ```
82 | git # Version control
83 | docker # Containerization
84 | github # Repository management
85 | aws # Amazon Web Services
86 | npm # Node package management
87 | ```
88 |
89 | **Usage Example:**
90 | ```javascript
91 | {
92 | "tags": ["python", "chromadb", "sentence-transformers", "pytest"]
93 | }
94 | ```
95 |
96 | ### Category 3: Activities & Processes
97 |
98 | **Development Activities:**
99 | ```
100 | development # General development work
101 | implementation # Feature implementation
102 | debugging # Bug investigation and fixing
103 | testing # Quality assurance activities
104 | refactoring # Code improvement
105 | optimization # Performance enhancement
106 | ```
107 |
108 | **Documentation Activities:**
109 | ```
110 | documentation # Writing documentation
111 | tutorial # Creating tutorials
112 | guide # Step-by-step guides
113 | reference # Reference materials
114 | examples # Code examples
115 | ```
116 |
117 | **Operational Activities:**
118 | ```
119 | deployment # Application deployment
120 | monitoring # System monitoring
121 | backup # Data backup processes
122 | migration # Data or system migration
123 | maintenance # System maintenance
124 | troubleshooting # Problem resolution
125 | ```
126 |
127 | **Usage Example:**
128 | ```javascript
129 | {
130 | "tags": ["debugging", "troubleshooting", "testing", "verification"]
131 | }
132 | ```
133 |
134 | ### Category 4: Content Types & Formats
135 |
136 | **Knowledge Types:**
137 | ```
138 | concept # Conceptual information
139 | architecture # System architecture
140 | design # Design decisions and patterns
141 | best-practices # Proven methodologies
142 | methodology # Systematic approaches
143 | workflow # Process workflows
144 | ```
145 |
146 | **Documentation Formats:**
147 | ```
148 | tutorial # Step-by-step instructions
149 | reference # Quick reference materials
150 | example # Code or process examples
151 | template # Reusable templates
152 | checklist # Verification checklists
153 | summary # Condensed information
154 | ```
155 |
156 | **Technical Content:**
157 | ```
158 | configuration # System configuration
159 | specification # Technical specifications
160 | analysis # Technical analysis
161 | research # Research findings
162 | review # Code or process reviews
163 | ```
164 |
165 | **Usage Example:**
166 | ```javascript
167 | {
168 | "tags": ["architecture", "design", "best-practices", "reference"]
169 | }
170 | ```
171 |
172 | ### Category 5: Status & Progress
173 |
174 | **Development Status:**
175 | ```
176 | resolved # Completed and verified
177 | in-progress # Currently being worked on
178 | blocked # Waiting for external dependencies
179 | needs-investigation # Requires further analysis
180 | planned # Scheduled for future work
181 | cancelled # No longer being pursued
182 | ```
183 |
184 | **Quality Status:**
185 | ```
186 | verified # Tested and confirmed working
187 | tested # Has undergone testing
188 | reviewed # Has been peer reviewed
189 | approved # Officially approved
190 | experimental # Proof of concept stage
191 | deprecated # No longer recommended
192 | ```
193 |
194 | **Priority Levels:**
195 | ```
196 | urgent # Immediate attention required
197 | high-priority # Important, should be addressed soon
198 | normal-priority # Standard priority
199 | low-priority # Can be addressed when time allows
200 | nice-to-have # Enhancement, not critical
201 | ```
202 |
203 | **Usage Example:**
204 | ```javascript
205 | {
206 | "tags": ["resolved", "verified", "high-priority", "production-ready"]
207 | }
208 | ```
209 |
210 | ### Category 6: Context & Temporal
211 |
212 | **Temporal Markers:**
213 | ```
214 | january-2025 # Specific month context
215 | q1-2025 # Quarterly context
216 | milestone-v1 # Version milestones
217 | release-candidate # Release stages
218 | sprint-3 # Development sprints
219 | ```
220 |
221 | **Environmental Context:**
222 | ```
223 | development # Development environment
224 | staging # Staging environment
225 | production # Production environment
226 | testing # Testing environment
227 | local # Local development
228 | ```
229 |
230 | **Scope & Impact:**
231 | ```
232 | breaking-change # Introduces breaking changes
233 | feature # New feature development
234 | enhancement # Improvement to existing feature
235 | hotfix # Critical fix
236 | security # Security-related
237 | performance # Performance-related
238 | ```
239 |
240 | **Usage Example:**
241 | ```javascript
242 | {
243 | "tags": ["june-2025", "production", "security", "hotfix", "critical"]
244 | }
245 | ```
246 |
247 | ## 🎨 Tag Naming Conventions
248 |
249 | ### Format Standards
250 |
251 | **Basic Rules:**
252 | - Use lowercase letters
253 | - Replace spaces with hyphens: `memory-service` not `memory service`
254 | - Use descriptive but concise terms
255 | - Avoid abbreviations unless widely understood
256 | - Use singular form when possible: `bug` not `bugs`
257 |
258 | **Multi-word Tags:**
259 | ```
260 | ✅ Good: memory-service, github-integration, best-practices
261 | ❌ Bad: memoryservice, GitHub_Integration, bestPractices
262 | ```
263 |
264 | **Version and Date Tags:**
265 | ```
266 | ✅ Good: v1-2-0, january-2025, q1-2025
267 | ❌ Bad: v1.2.0, Jan2025, Q1/2025
268 | ```
269 |
270 | **Status and State Tags:**
271 | ```
272 | ✅ Good: in-progress, needs-investigation, high-priority
273 | ❌ Bad: inProgress, needsInvestigation, highPriority
274 | ```
275 |
276 | ### Hierarchical Naming
277 |
278 | **Use progressive specificity:**
279 | ```
280 | General → Specific
281 | project → mcp-memory-service → backend → database
282 | testing → integration-testing → api-testing
283 | issue → bug → critical-bug → data-corruption
284 | ```
285 |
286 | **Example Progression:**
287 | ```javascript
288 | // General testing memory
289 | {"tags": ["testing", "verification"]}
290 |
291 | // Specific test type
292 | {"tags": ["testing", "unit-testing", "python", "pytest"]}
293 |
294 | // Very specific test
295 | {"tags": ["testing", "unit-testing", "memory-storage", "chromadb", "pytest"]}
296 | ```
297 |
298 | ## 📊 Tag Application Patterns
299 |
300 | ### Multi-Category Tagging
301 |
302 | **Recommended Pattern:**
303 | Apply tags from 3-6 categories for comprehensive organization:
304 |
305 | ```javascript
306 | {
307 | "tags": [
308 | // Project/Repository (1-2 tags)
309 | "mcp-memory-service", "backend",
310 |
311 | // Technology (1-3 tags)
312 | "python", "chromadb",
313 |
314 | // Activity (1-2 tags)
315 | "debugging", "troubleshooting",
316 |
317 | // Content Type (1 tag)
318 | "troubleshooting-guide",
319 |
320 | // Status (1 tag)
321 | "resolved",
322 |
323 | // Context (0-2 tags)
324 | "june-2025", "production"
325 | ]
326 | }
327 | ```
328 |
329 | ### Content-Specific Patterns
330 |
331 | **Bug Reports and Issues:**
332 | ```javascript
333 | {
334 | "tags": [
335 | "issue-7", // Specific issue reference
336 | "timestamp-corruption", // Problem description
337 | "critical-bug", // Severity
338 | "mcp-memory-service", // Project
339 | "chromadb", // Technology
340 | "resolved" // Status
341 | ]
342 | }
343 | ```
344 |
345 | **Documentation:**
346 | ```javascript
347 | {
348 | "tags": [
349 | "documentation", // Content type
350 | "memory-maintenance", // Topic
351 | "best-practices", // Knowledge type
352 | "tutorial", // Format
353 | "mcp-memory-service", // Project
354 | "reference" // Usage type
355 | ]
356 | }
357 | ```
358 |
359 | **Development Milestones:**
360 | ```javascript
361 | {
362 | "tags": [
363 | "milestone", // Event type
364 | "v1-2-0", // Version
365 | "production-ready", // Status
366 | "mcp-memory-service", // Project
367 | "feature-complete", // Achievement
368 | "june-2025" // Timeline
369 | ]
370 | }
371 | ```
372 |
373 | **Research and Concepts:**
374 | ```javascript
375 | {
376 | "tags": [
377 | "concept", // Content type
378 | "memory-consolidation", // Topic
379 | "architecture", // Category
380 | "research", // Activity
381 | "cognitive-processing", // Domain
382 | "system-design" // Application
383 | ]
384 | }
385 | ```
386 |
387 | ## 🔍 Tag Selection Guidelines
388 |
389 | ### Step-by-Step Tag Selection
390 |
391 | **1. Start with Primary Context**
392 | - What project or domain does this relate to?
393 | - What's the main subject matter?
394 |
395 | **2. Add Technical Details**
396 | - What technologies are involved?
397 | - What tools or platforms?
398 |
399 | **3. Describe the Activity**
400 | - What was being done?
401 | - What type of work or process?
402 |
403 | **4. Classify the Content**
404 | - What kind of information is this?
405 | - How will it be used in the future?
406 |
407 | **5. Add Status Information**
408 | - What's the current state?
409 | - What's the priority or urgency?
410 |
411 | **6. Include Temporal Context**
412 | - When is this relevant?
413 | - What timeline or milestone?
414 |
415 | ### Tag Selection Examples
416 |
417 | **Example 1: Debug Session Memory**
418 |
419 | Content: "Fixed issue with ChromaDB connection timeout in production"
420 |
421 | **Analysis:**
422 | - Primary Context: MCP Memory Service, backend
423 | - Technical: ChromaDB, connection issues, production
424 | - Activity: Debugging, troubleshooting, problem resolution
425 | - Content: Troubleshooting solution, fix documentation
426 | - Status: Resolved, production issue
427 | - Temporal: Current work, immediate fix
428 |
429 | **Selected Tags:**
430 | ```javascript
431 | {
432 | "tags": [
433 | "mcp-memory-service", "backend",
434 | "chromadb", "connection-timeout", "production",
435 | "debugging", "troubleshooting",
436 | "solution", "hotfix",
437 | "resolved", "critical"
438 | ]
439 | }
440 | ```
441 |
442 | **Example 2: Planning Document**
443 |
444 | Content: "Q2 2025 roadmap for memory service improvements"
445 |
446 | **Analysis:**
447 | - Primary Context: MCP Memory Service, planning
448 | - Technical: General service improvements
449 | - Activity: Planning, roadmap development
450 | - Content: Strategic document, planning guide
451 | - Status: Planning phase, future work
452 | - Temporal: Q2 2025, quarterly planning
453 |
454 | **Selected Tags:**
455 | ```javascript
456 | {
457 | "tags": [
458 | "mcp-memory-service", "planning",
459 | "roadmap", "improvements",
460 | "strategy", "planning-document",
461 | "q2-2025", "quarterly",
462 | "future-work", "enhancement"
463 | ]
464 | }
465 | ```
466 |
467 | ## 🛠️ Tag Management Tools
468 |
469 | ### Quality Control Queries
470 |
471 | **Find inconsistent tagging:**
472 | ```javascript
473 | // Look for similar content with different tag patterns
474 | retrieve_memory({"query": "debugging troubleshooting", "n_results": 10})
475 | search_by_tag({"tags": ["debug"]}) // vs search_by_tag({"tags": ["debugging"]})
476 | ```
477 |
478 | **Identify tag standardization opportunities:**
479 | ```javascript
480 | // Find memories that might need additional tags
481 | retrieve_memory({"query": "issue bug problem", "n_results": 15})
482 | search_by_tag({"tags": ["test"]}) // Check if generic tags need specificity
483 | ```
484 |
485 | ### Tag Analysis Scripts
486 |
487 | **Tag frequency analysis:**
488 | ```javascript
489 | // Analyze which tags are most/least used
490 | check_database_health() // Get overall statistics
491 | search_by_tag({"tags": ["frequent-tag"]}) // Count instances
492 | ```
493 |
494 | **Pattern consistency check:**
495 | ```javascript
496 | // Verify similar content has similar tagging
497 | const patterns = [
498 | "mcp-memory-service",
499 | "debugging",
500 | "issue-",
501 | "resolved"
502 | ];
503 | // Check each pattern for consistency
504 | ```
505 |
506 | ## 📈 Tag Schema Evolution
507 |
508 | ### Regular Review Process
509 |
510 | **Monthly Review Questions:**
511 | 1. Are there new tag categories needed?
512 | 2. Are existing tags being used consistently?
513 | 3. Should any tags be merged or split?
514 | 4. Are there emerging patterns that need standardization?
515 |
516 | **Quarterly Schema Updates:**
517 | 1. Analyze tag usage statistics
518 | 2. Identify inconsistencies or gaps
519 | 3. Propose schema improvements
520 | 4. Document rationale for changes
521 | 5. Implement updates systematically
522 |
523 | ### Schema Version Control
524 |
525 | **Track changes with metadata:**
526 | ```javascript
527 | store_memory({
528 | "content": "Tag Schema Update v2.1: Added security-related tags, consolidated testing categories...",
529 | "metadata": {
530 | "tags": ["tag-schema", "version-2-1", "schema-update", "documentation"],
531 | "type": "schema-documentation"
532 | }
533 | })
534 | ```
535 |
536 | ## 🎯 Best Practices Summary
537 |
538 | ### Do's
539 |
540 | ✅ **Be Consistent**: Use the same tag patterns for similar content
541 | ✅ **Use Multiple Categories**: Apply tags from different categories for comprehensive organization
542 | ✅ **Follow Naming Conventions**: Stick to lowercase, hyphenated format
543 | ✅ **Think About Retrieval**: Tag based on how you'll search for information
544 | ✅ **Document Decisions**: Record rationale for tag choices
545 | ✅ **Review Regularly**: Update and improve tag schemas over time
546 |
547 | ### Don'ts
548 |
549 | ❌ **Over-tag**: Don't add too many tags; focus on the most relevant
550 | ❌ **Under-tag**: Don't use too few tags; aim for 4-8 well-chosen tags
551 | ❌ **Use Inconsistent Formats**: Avoid mixing naming conventions
552 | ❌ **Create Redundant Tags**: Don't duplicate information already in content
553 | ❌ **Ignore Context**: Don't forget temporal or project context
554 | ❌ **Set and Forget**: Don't create tags without ongoing maintenance
555 |
556 | ---
557 |
558 | *This standardization guide provides the foundation for creating a professional, searchable, and maintainable knowledge management system. Consistent application of these standards will dramatically improve the value and usability of your MCP Memory Service.*
```
--------------------------------------------------------------------------------
/scripts/validation/verify_environment.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | # Copyright 2024 Heinrich Krupp
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """
17 | Enhanced environment verification script for MCP Memory Service.
18 | This script checks the system environment, hardware capabilities,
19 | and installed dependencies to ensure compatibility.
20 | """
21 | import os
22 | import sys
23 | import platform
24 | import subprocess
25 | import json
26 | import importlib
27 | import pkg_resources
28 | from pathlib import Path
29 | import traceback
30 | import ctypes
31 |
32 | # Import shared GPU detection utilities
33 | try:
34 | from mcp_memory_service.utils.gpu_detection import detect_gpu as shared_detect_gpu
35 | except ImportError:
36 | # Fallback for scripts directory context
37 | sys.path.insert(0, str(Path(__file__).parent.parent.parent))
38 | from src.mcp_memory_service.utils.gpu_detection import detect_gpu as shared_detect_gpu
39 |
40 | class EnvironmentVerifier:
41 | def __init__(self):
42 | self.verification_results = []
43 | self.critical_failures = []
44 | self.warnings = []
45 | self.system_info = self.detect_system()
46 | self.gpu_info = self.detect_gpu()
47 | self.claude_config = self.load_claude_config()
48 |
49 | def detect_system(self):
50 | """Detect system architecture and platform."""
51 | system_info = {
52 | "os_name": platform.system().lower(),
53 | "os_version": platform.version(),
54 | "architecture": platform.machine().lower(),
55 | "python_version": platform.python_version(),
56 | "cpu_count": os.cpu_count() or 1,
57 | "memory_gb": self.get_system_memory(),
58 | "in_virtual_env": sys.prefix != sys.base_prefix
59 | }
60 |
61 | self.verification_results.append(
62 | f"[OK] System: {platform.system()} {platform.version()}"
63 | )
64 | self.verification_results.append(
65 | f"[OK] Architecture: {system_info['architecture']}"
66 | )
67 | self.verification_results.append(
68 | f"[OK] Python: {system_info['python_version']}"
69 | )
70 |
71 | if system_info["in_virtual_env"]:
72 | self.verification_results.append(
73 | f"[OK] Virtual environment: {sys.prefix}"
74 | )
75 | else:
76 | self.warnings.append(
77 | "Not running in a virtual environment"
78 | )
79 |
80 | return system_info
81 |
82 | def get_system_memory(self):
83 | """Get the total system memory in GB."""
84 | try:
85 | if self.system_info["os_name"] == "linux":
86 | with open('/proc/meminfo', 'r') as f:
87 | for line in f:
88 | if line.startswith('MemTotal:'):
89 | memory_kb = int(line.split()[1])
90 | return round(memory_kb / (1024 * 1024), 2)
91 |
92 | elif self.system_info["os_name"] == "darwin":
93 | output = subprocess.check_output(['sysctl', '-n', 'hw.memsize']).decode('utf-8').strip()
94 | memory_bytes = int(output)
95 | return round(memory_bytes / (1024**3), 2)
96 |
97 | elif self.system_info["os_name"] == "windows":
98 | class MEMORYSTATUSEX(ctypes.Structure):
99 | _fields_ = [
100 | ('dwLength', ctypes.c_ulong),
101 | ('dwMemoryLoad', ctypes.c_ulong),
102 | ('ullTotalPhys', ctypes.c_ulonglong),
103 | ('ullAvailPhys', ctypes.c_ulonglong),
104 | ('ullTotalPageFile', ctypes.c_ulonglong),
105 | ('ullAvailPageFile', ctypes.c_ulonglong),
106 | ('ullTotalVirtual', ctypes.c_ulonglong),
107 | ('ullAvailVirtual', ctypes.c_ulonglong),
108 | ('ullAvailExtendedVirtual', ctypes.c_ulonglong),
109 | ]
110 |
111 | memoryStatus = MEMORYSTATUSEX()
112 | memoryStatus.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
113 | ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(memoryStatus))
114 | return round(memoryStatus.ullTotalPhys / (1024**3), 2)
115 |
116 | except Exception as e:
117 | self.warnings.append(f"Failed to get system memory: {e}")
118 |
119 | return 4.0 # Conservative default
120 |
121 | def detect_gpu(self):
122 | """Detect GPU and acceleration capabilities.
123 |
124 | Uses shared GPU detection module for platform detection.
125 | """
126 | # Adapt system info format for shared module
127 | adapted_system_info = {
128 | "is_windows": self.system_info["os_name"] == "windows",
129 | "is_linux": self.system_info["os_name"] == "linux",
130 | "is_macos": self.system_info["os_name"] == "darwin",
131 | "is_arm": self.system_info["architecture"] in ("arm64", "aarch64")
132 | }
133 |
134 | # Use shared GPU detection module
135 | gpu_info = shared_detect_gpu(adapted_system_info)
136 |
137 | # Append verification results (maintain verifier output format)
138 | if gpu_info.get("has_cuda"):
139 | cuda_version = gpu_info.get("cuda_version")
140 | self.verification_results.append(
141 | f"[OK] CUDA detected: {cuda_version or 'Unknown version'}"
142 | )
143 | elif gpu_info.get("has_rocm"):
144 | rocm_version = gpu_info.get("rocm_version")
145 | self.verification_results.append(
146 | f"[OK] ROCm detected: {rocm_version or 'Unknown version'}"
147 | )
148 | elif gpu_info.get("has_mps"):
149 | self.verification_results.append(
150 | "[OK] Apple Metal Performance Shaders (MPS) detected"
151 | )
152 | elif gpu_info.get("has_directml"):
153 | directml_version = gpu_info.get("directml_version")
154 | if directml_version:
155 | self.verification_results.append(f"[OK] DirectML detected: {directml_version}")
156 | else:
157 | self.verification_results.append("[OK] DirectML detected")
158 | else:
159 | self.verification_results.append(
160 | "[OK] Using CPU-only mode (no GPU acceleration detected)"
161 | )
162 |
163 | return gpu_info
164 |
165 | def load_claude_config(self):
166 | """Load configuration from Claude Desktop config."""
167 | try:
168 | home_dir = Path.home()
169 | possible_paths = [
170 | home_dir / "Library/Application Support/Claude/claude_desktop_config.json",
171 | home_dir / ".config/Claude/claude_desktop_config.json",
172 | Path(__file__).parent.parent / "claude_config/claude_desktop_config.json"
173 | ]
174 |
175 | for config_path in possible_paths:
176 | if config_path.exists():
177 | with open(config_path) as f:
178 | config = json.load(f)
179 | self.verification_results.append(
180 | f"[OK] Found Claude Desktop config at {config_path}"
181 | )
182 | return config
183 |
184 | self.warnings.append(
185 | "Could not find Claude Desktop config file in any standard location"
186 | )
187 | return None
188 |
189 | except Exception as e:
190 | self.critical_failures.append(
191 | f"Error loading Claude Desktop config: {str(e)}"
192 | )
193 | return None
194 |
195 | def verify_python_version(self):
196 | """Verify Python interpreter version matches production requirements."""
197 | try:
198 | python_version = sys.version.split()[0]
199 | required_version = "3.10" # Updated to match current requirements
200 |
201 | if not python_version.startswith(required_version):
202 | self.critical_failures.append(
203 | f"Python version mismatch: Found {python_version}, required {required_version}"
204 | )
205 | else:
206 | self.verification_results.append(
207 | f"[OK] Python version verified: {python_version}"
208 | )
209 | except Exception as e:
210 | self.critical_failures.append(f"Failed to verify Python version: {str(e)}")
211 |
212 | def verify_virtual_environment(self):
213 | """Verify we're running in a virtual environment."""
214 | try:
215 | if sys.prefix == sys.base_prefix:
216 | self.critical_failures.append(
217 | "Not running in a virtual environment!"
218 | )
219 | else:
220 | self.verification_results.append(
221 | f"[OK] Virtual environment verified: {sys.prefix}"
222 | )
223 | except Exception as e:
224 | self.critical_failures.append(
225 | f"Failed to verify virtual environment: {str(e)}"
226 | )
227 |
228 | def verify_critical_packages(self):
229 | """Verify critical packages are installed with correct versions."""
230 | required_packages = {
231 | 'chromadb': '0.5.23',
232 | 'sentence-transformers': '2.2.2',
233 | 'urllib3': '1.26.6',
234 | 'python-dotenv': '1.0.0'
235 | }
236 |
237 | for package, required_version in required_packages.items():
238 | try:
239 | installed_version = pkg_resources.get_distribution(package).version
240 | if required_version and installed_version != required_version:
241 | self.critical_failures.append(
242 | f"Package version mismatch: {package} "
243 | f"(found {installed_version}, required {required_version})"
244 | )
245 | else:
246 | self.verification_results.append(
247 | f"[OK] Package verified: {package} {installed_version}"
248 | )
249 | except pkg_resources.DistributionNotFound:
250 | self.critical_failures.append(f"Required package not found: {package}")
251 | except Exception as e:
252 | self.critical_failures.append(
253 | f"Failed to verify package {package}: {str(e)}"
254 | )
255 |
256 | def verify_claude_paths(self):
257 | """Verify paths from Claude Desktop config."""
258 | if not self.claude_config:
259 | return
260 |
261 | try:
262 | chroma_path = self.claude_config.get('mcp-memory', {}).get('chroma_db')
263 | backup_path = self.claude_config.get('mcp-memory', {}).get('backup_path')
264 |
265 | if chroma_path:
266 | os.environ['CHROMA_DB_PATH'] = str(chroma_path)
267 | self.verification_results.append(
268 | f"[OK] Set CHROMA_DB_PATH from config: {chroma_path}"
269 | )
270 | else:
271 | self.critical_failures.append("CHROMA_DB_PATH not found in Claude config")
272 |
273 | if backup_path:
274 | os.environ['MCP_MEMORY_BACKUP_PATH'] = str(backup_path)
275 | self.verification_results.append(
276 | f"[OK] Set MCP_MEMORY_BACKUP_PATH from config: {backup_path}"
277 | )
278 | else:
279 | self.critical_failures.append("MCP_MEMORY_BACKUP_PATH not found in Claude config")
280 |
281 | except Exception as e:
282 | self.critical_failures.append(f"Failed to verify Claude paths: {str(e)}")
283 |
284 | def verify_import_functionality(self):
285 | """Verify critical imports work correctly."""
286 | critical_imports = [
287 | 'chromadb',
288 | 'sentence_transformers',
289 | ]
290 |
291 | for module_name in critical_imports:
292 | try:
293 | module = importlib.import_module(module_name)
294 | self.verification_results.append(f"[OK] Successfully imported {module_name}")
295 | except ImportError as e:
296 | self.critical_failures.append(f"Failed to import {module_name}: {str(e)}")
297 |
298 | def verify_paths(self):
299 | """Verify critical paths exist and are accessible."""
300 | critical_paths = [
301 | os.environ.get('CHROMA_DB_PATH', ''),
302 | os.environ.get('MCP_MEMORY_BACKUP_PATH', '')
303 | ]
304 |
305 | for path in critical_paths:
306 | if not path:
307 | continue
308 | try:
309 | path_obj = Path(path)
310 | if not path_obj.exists():
311 | self.critical_failures.append(f"Critical path does not exist: {path}")
312 | elif not os.access(path, os.R_OK | os.W_OK):
313 | self.critical_failures.append(f"Insufficient permissions for path: {path}")
314 | else:
315 | self.verification_results.append(f"[OK] Path verified: {path}")
316 | except Exception as e:
317 | self.critical_failures.append(f"Failed to verify path {path}: {str(e)}")
318 |
319 | def run_verifications(self):
320 | """Run all verifications."""
321 | self.verify_python_version()
322 | self.verify_virtual_environment()
323 | self.verify_critical_packages()
324 | self.verify_claude_paths()
325 | self.verify_import_functionality()
326 | self.verify_paths()
327 |
328 | def print_results(self):
329 | """Print verification results."""
330 | print("\n=== Environment Verification Results ===\n")
331 |
332 | if self.verification_results:
333 | print("Successful Verifications:")
334 | for result in self.verification_results:
335 | print(f" {result}")
336 |
337 | if self.warnings:
338 | print("\nWarnings:")
339 | for warning in self.warnings:
340 | print(f" [!] {warning}")
341 |
342 | if self.critical_failures:
343 | print("\nCritical Failures:")
344 | for failure in self.critical_failures:
345 | print(f" [X] {failure}")
346 |
347 | print("\nSummary:")
348 | print(f" Passed: {len(self.verification_results)}")
349 | print(f" Warnings: {len(self.warnings)}")
350 | print(f" Failed: {len(self.critical_failures)}")
351 |
352 | if self.critical_failures:
353 | print("\nTo fix these issues:")
354 | print("1. Create a new virtual environment:")
355 | print(" conda create -n mcp-env python=3.10")
356 | print(" conda activate mcp-env")
357 | print("\n2. Install requirements:")
358 | print(" pip install -r requirements.txt")
359 | print("\n3. Ensure Claude Desktop config is properly set up with required paths")
360 |
361 | return len(self.critical_failures) == 0
362 |
363 | def main():
364 | verifier = EnvironmentVerifier()
365 | verifier.run_verifications()
366 | environment_ok = verifier.print_results()
367 |
368 | if not environment_ok:
369 | print("\n[WARNING] Environment verification failed! Please fix the issues above.")
370 | sys.exit(1)
371 | else:
372 | print("\n[OK] Environment verification passed! Safe to proceed.")
373 | sys.exit(0)
374 |
375 | if __name__ == "__main__":
376 | main()
377 |
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/api/operations.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | Core operations for code execution interface.
17 |
18 | Provides token-efficient functions for memory operations:
19 | - search: Semantic search with compact results
20 | - store: Store new memories with minimal parameters
21 | - health: Service health and status check
22 |
23 | Token Efficiency:
24 | - search(5 results): ~385 tokens (vs ~2,625, 85% reduction)
25 | - store(): ~15 tokens (vs ~150, 90% reduction)
26 | - health(): ~20 tokens (vs ~125, 84% reduction)
27 |
28 | Performance:
29 | - Cold call: ~50ms (storage initialization)
30 | - Warm call: ~5-10ms (connection reused)
31 | - Memory overhead: <10MB
32 | """
33 |
34 | import logging
35 | import time
36 | from typing import Optional, Union, List
37 | from .types import (
38 | CompactMemory, CompactSearchResult, CompactHealthInfo,
39 | CompactConsolidationResult, CompactSchedulerStatus
40 | )
41 | from .client import get_storage_async, get_consolidator, get_scheduler
42 | from .sync_wrapper import sync_wrapper
43 | from ..models.memory import Memory
44 | from ..utils.hashing import generate_content_hash
45 |
46 | logger = logging.getLogger(__name__)
47 |
48 |
49 | @sync_wrapper
50 | async def search(
51 | query: str,
52 | limit: int = 5,
53 | tags: Optional[List[str]] = None
54 | ) -> CompactSearchResult:
55 | """
56 | Search memories using semantic similarity.
57 |
58 | Token efficiency: ~25 tokens (query + params) + ~73 tokens per result
59 | Example (5 results): ~385 tokens vs ~2,625 tokens (85% reduction)
60 |
61 | Args:
62 | query: Search query text (natural language)
63 | limit: Maximum number of results to return (default: 5)
64 | tags: Optional list of tags to filter results
65 |
66 | Returns:
67 | CompactSearchResult with minimal memory representations
68 |
69 | Raises:
70 | RuntimeError: If storage backend is not available
71 | ValueError: If query is empty or limit is invalid
72 |
73 | Example:
74 | >>> from mcp_memory_service.api import search
75 | >>> results = search("recent architecture changes", limit=3)
76 | >>> print(results)
77 | SearchResult(found=3, shown=3)
78 | >>> for m in results.memories:
79 | ... print(f"{m.hash}: {m.preview[:50]}...")
80 | abc12345: Implemented OAuth 2.1 authentication for...
81 | def67890: Refactored storage backend to support...
82 | ghi11121: Added hybrid mode for Cloudflare sync...
83 |
84 | Performance:
85 | - First call: ~50ms (includes storage initialization)
86 | - Subsequent calls: ~5-10ms (connection reused)
87 | - Scales linearly with limit (5ms + 1ms per result)
88 | """
89 | # Validate input
90 | if not query or not query.strip():
91 | raise ValueError("Query cannot be empty")
92 | if limit < 1:
93 | raise ValueError("Limit must be at least 1")
94 | if limit > 100:
95 | logger.warning(f"Large limit ({limit}) may impact performance")
96 |
97 | # Get storage instance
98 | storage = await get_storage_async()
99 |
100 | # Perform semantic search
101 | query_results = await storage.retrieve(query, n_results=limit)
102 |
103 | # Filter by tags if specified
104 | if tags:
105 | tag_set = set(tags)
106 | query_results = [
107 | r for r in query_results
108 | if any(tag in tag_set for tag in r.memory.tags)
109 | ]
110 |
111 | # Convert to compact format
112 | compact_memories = tuple(
113 | CompactMemory(
114 | hash=r.memory.content_hash[:8], # 8-char hash
115 | preview=r.memory.content[:200], # First 200 chars
116 | tags=tuple(r.memory.tags), # Immutable tuple
117 | created=r.memory.created_at, # Unix timestamp
118 | score=r.relevance_score # Relevance score
119 | )
120 | for r in query_results
121 | )
122 |
123 | return CompactSearchResult(
124 | memories=compact_memories,
125 | total=len(compact_memories),
126 | query=query
127 | )
128 |
129 |
130 | @sync_wrapper
131 | async def store(
132 | content: str,
133 | tags: Optional[Union[str, List[str]]] = None,
134 | memory_type: str = "note"
135 | ) -> str:
136 | """
137 | Store a new memory.
138 |
139 | Token efficiency: ~15 tokens (params only)
140 | vs ~150 tokens for MCP tool call with schema (90% reduction)
141 |
142 | Args:
143 | content: Memory content text
144 | tags: Single tag or list of tags (optional)
145 | memory_type: Memory type classification (default: "note")
146 |
147 | Returns:
148 | 8-character content hash of stored memory
149 |
150 | Raises:
151 | RuntimeError: If storage operation fails
152 | ValueError: If content is empty
153 |
154 | Example:
155 | >>> from mcp_memory_service.api import store
156 | >>> hash = store(
157 | ... "Implemented OAuth 2.1 authentication",
158 | ... tags=["authentication", "security", "feature"]
159 | ... )
160 | >>> print(f"Stored: {hash}")
161 | Stored: abc12345
162 |
163 | Performance:
164 | - First call: ~50ms (includes storage initialization)
165 | - Subsequent calls: ~10-20ms (includes embedding generation)
166 | - Scales with content length (20ms + 0.5ms per 100 chars)
167 | """
168 | # Validate input
169 | if not content or not content.strip():
170 | raise ValueError("Content cannot be empty")
171 |
172 | # Normalize tags to list
173 | if tags is None:
174 | tag_list = []
175 | elif isinstance(tags, str):
176 | tag_list = [tags]
177 | else:
178 | tag_list = list(tags)
179 |
180 | # Generate content hash
181 | content_hash = generate_content_hash(content)
182 |
183 | # Create memory object
184 | memory = Memory(
185 | content=content,
186 | content_hash=content_hash,
187 | tags=tag_list,
188 | memory_type=memory_type,
189 | metadata={}
190 | )
191 |
192 | # Get storage instance
193 | storage = await get_storage_async()
194 |
195 | # Store memory
196 | success, message = await storage.store(memory)
197 |
198 | if not success:
199 | raise RuntimeError(f"Failed to store memory: {message}")
200 |
201 | # Return short hash (8 chars)
202 | return content_hash[:8]
203 |
204 |
205 | @sync_wrapper
206 | async def health() -> CompactHealthInfo:
207 | """
208 | Get service health and status.
209 |
210 | Token efficiency: ~20 tokens
211 | vs ~125 tokens for MCP health check tool (84% reduction)
212 |
213 | Returns:
214 | CompactHealthInfo with backend, count, and ready status
215 |
216 | Raises:
217 | RuntimeError: If unable to retrieve health information
218 |
219 | Example:
220 | >>> from mcp_memory_service.api import health
221 | >>> info = health()
222 | >>> print(f"Status: {info.status}")
223 | Status: healthy
224 | >>> print(f"Backend: {info.backend}, Count: {info.count}")
225 | Backend: sqlite_vec, Count: 1247
226 |
227 | Performance:
228 | - First call: ~50ms (includes storage initialization)
229 | - Subsequent calls: ~5ms (cached stats)
230 | """
231 | try:
232 | # Get storage instance
233 | storage = await get_storage_async()
234 |
235 | # Get storage statistics
236 | stats = await storage.get_stats()
237 |
238 | # Determine status
239 | status = "healthy"
240 | if stats.get("status") == "degraded":
241 | status = "degraded"
242 | elif stats.get("status") == "error":
243 | status = "error"
244 | elif not stats.get("initialized", True):
245 | status = "error"
246 |
247 | # Extract backend type
248 | backend = stats.get("storage_backend", "unknown")
249 |
250 | # Extract memory count
251 | count = stats.get("total_memories", 0)
252 |
253 | return CompactHealthInfo(
254 | status=status,
255 | count=count,
256 | backend=backend
257 | )
258 |
259 | except Exception as e:
260 | logger.error(f"Health check failed: {e}")
261 | return CompactHealthInfo(
262 | status="error",
263 | count=0,
264 | backend="unknown"
265 | )
266 |
267 |
268 | async def _consolidate_async(time_horizon: str) -> CompactConsolidationResult:
269 | """
270 | Internal async implementation of consolidation.
271 |
272 | This function contains the core consolidation logic and is used by both
273 | the sync-wrapped API function and the FastAPI endpoint to avoid duplication.
274 | """
275 | # Validate time horizon
276 | valid_horizons = ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']
277 | if time_horizon not in valid_horizons:
278 | raise ValueError(
279 | f"Invalid time_horizon: {time_horizon}. "
280 | f"Must be one of: {', '.join(valid_horizons)}"
281 | )
282 |
283 | # Get consolidator instance
284 | consolidator = get_consolidator()
285 | if consolidator is None:
286 | raise RuntimeError(
287 | "Consolidator not available. "
288 | "Consolidation requires HTTP server with MCP_CONSOLIDATION_ENABLED=true. "
289 | "Start the HTTP server first."
290 | )
291 |
292 | try:
293 | # Record start time
294 | start_time = time.time()
295 |
296 | # Run consolidation
297 | logger.info(f"Running {time_horizon} consolidation...")
298 | result = await consolidator.consolidate(time_horizon)
299 |
300 | # Calculate duration
301 | duration = time.time() - start_time
302 |
303 | # Extract metrics from result (ConsolidationReport object)
304 | processed = result.memories_processed
305 | compressed = result.memories_compressed
306 | forgotten = result.memories_archived
307 | status = 'completed' if not result.errors else 'completed_with_errors'
308 |
309 | logger.info(
310 | f"🎉 Consolidation completed successfully! Processed: {processed}, Compressed: {compressed}, Forgotten: {forgotten} (Total time: {duration:.1f}s)"
311 | )
312 |
313 | return CompactConsolidationResult(
314 | status=status,
315 | horizon=time_horizon,
316 | processed=processed,
317 | compressed=compressed,
318 | forgotten=forgotten,
319 | duration=duration
320 | )
321 |
322 | except Exception as e:
323 | logger.error(f"Consolidation failed: {e}")
324 | return CompactConsolidationResult(
325 | status="failed",
326 | horizon=time_horizon,
327 | processed=0,
328 | compressed=0,
329 | forgotten=0,
330 | duration=0.0
331 | )
332 |
333 |
334 | @sync_wrapper
335 | async def consolidate(time_horizon: str = "weekly") -> CompactConsolidationResult:
336 | """
337 | Trigger memory consolidation for a specific time horizon.
338 |
339 | Token efficiency: ~40 tokens (result only)
340 | vs ~250 tokens for MCP consolidation result (84% reduction)
341 |
342 | Args:
343 | time_horizon: Time horizon for consolidation
344 | ('daily' | 'weekly' | 'monthly' | 'quarterly' | 'yearly')
345 |
346 | Returns:
347 | CompactConsolidationResult with operation metrics
348 |
349 | Raises:
350 | RuntimeError: If consolidation fails or consolidator not available
351 | ValueError: If time_horizon is invalid
352 |
353 | Example:
354 | >>> from mcp_memory_service.api import consolidate
355 | >>> result = consolidate('weekly')
356 | >>> print(result)
357 | Consolidation(completed, weekly, 2418 processed)
358 | >>> print(f"Compressed: {result.compressed}, Forgotten: {result.forgotten}")
359 | Compressed: 156, Forgotten: 43
360 |
361 | Performance:
362 | - Typical duration: 10-30 seconds (depends on memory count)
363 | - Scales linearly with total memories (~10ms per memory)
364 | - Background operation (non-blocking in HTTP server context)
365 |
366 | Note:
367 | Requires HTTP server with consolidation enabled. If called when
368 | HTTP server is not running, will raise RuntimeError.
369 | """
370 | return await _consolidate_async(time_horizon)
371 |
372 |
373 | async def _scheduler_status_async() -> CompactSchedulerStatus:
374 | """
375 | Internal async implementation of scheduler status.
376 |
377 | This function contains the core status logic and is used by both
378 | the sync-wrapped API function and the FastAPI endpoint to avoid duplication.
379 | """
380 | # Get scheduler instance
381 | scheduler = get_scheduler()
382 | if scheduler is None:
383 | logger.warning("Scheduler not available")
384 | return CompactSchedulerStatus(
385 | running=False,
386 | next_daily=None,
387 | next_weekly=None,
388 | next_monthly=None,
389 | jobs_executed=0,
390 | jobs_failed=0
391 | )
392 |
393 | try:
394 | # Get scheduler status
395 | if hasattr(scheduler, 'scheduler') and scheduler.scheduler is not None:
396 | # Scheduler is running
397 | jobs = scheduler.scheduler.get_jobs()
398 |
399 | # Extract next run times for each horizon
400 | next_daily = None
401 | next_weekly = None
402 | next_monthly = None
403 |
404 | for job in jobs:
405 | if job.next_run_time:
406 | timestamp = job.next_run_time.timestamp()
407 | if 'daily' in job.id.lower():
408 | next_daily = timestamp
409 | elif 'weekly' in job.id.lower():
410 | next_weekly = timestamp
411 | elif 'monthly' in job.id.lower():
412 | next_monthly = timestamp
413 |
414 | # Get execution statistics
415 | jobs_executed = scheduler.execution_stats.get('successful_jobs', 0)
416 | jobs_failed = scheduler.execution_stats.get('failed_jobs', 0)
417 |
418 | return CompactSchedulerStatus(
419 | running=True,
420 | next_daily=next_daily,
421 | next_weekly=next_weekly,
422 | next_monthly=next_monthly,
423 | jobs_executed=jobs_executed,
424 | jobs_failed=jobs_failed
425 | )
426 | else:
427 | # Scheduler exists but not running
428 | return CompactSchedulerStatus(
429 | running=False,
430 | next_daily=None,
431 | next_weekly=None,
432 | next_monthly=None,
433 | jobs_executed=0,
434 | jobs_failed=0
435 | )
436 |
437 | except Exception as e:
438 | logger.error(f"Failed to get scheduler status: {e}")
439 | return CompactSchedulerStatus(
440 | running=False,
441 | next_daily=None,
442 | next_weekly=None,
443 | next_monthly=None,
444 | jobs_executed=0,
445 | jobs_failed=0
446 | )
447 |
448 |
449 | @sync_wrapper
450 | async def scheduler_status() -> CompactSchedulerStatus:
451 | """
452 | Get consolidation scheduler status and next run times.
453 |
454 | Token efficiency: ~25 tokens
455 | vs ~150 tokens for MCP scheduler_status tool (83% reduction)
456 |
457 | Returns:
458 | CompactSchedulerStatus with scheduler state and job statistics
459 |
460 | Raises:
461 | RuntimeError: If scheduler not available
462 |
463 | Example:
464 | >>> from mcp_memory_service.api import scheduler_status
465 | >>> status = scheduler_status()
466 | >>> print(status)
467 | Scheduler(running, executed=42, failed=0)
468 | >>> if status.next_daily:
469 | ... from datetime import datetime
470 | ... next_run = datetime.fromtimestamp(status.next_daily)
471 | ... print(f"Next daily: {next_run}")
472 |
473 | Performance:
474 | - Execution time: <5ms (reads cached state)
475 | - No storage access required
476 | - Lightweight status query
477 |
478 | Note:
479 | Requires HTTP server with consolidation scheduler enabled.
480 | Returns STOPPED status if scheduler not running.
481 | """
482 | return await _scheduler_status_async()
483 |
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/utils/db_utils.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Utilities for database validation and health checks."""
16 | from typing import Dict, Any, Tuple
17 | import logging
18 | import os
19 | import json
20 | from datetime import datetime
21 | import importlib
22 |
23 | logger = logging.getLogger(__name__)
24 |
25 | async def validate_database(storage) -> Tuple[bool, str]:
26 | """Validate database health and configuration."""
27 | try:
28 | # Check if storage is properly initialized
29 | if storage is None:
30 | return False, "Storage is not initialized"
31 |
32 | # Special case for direct access without checking for attribute 'collection'
33 | # This fixes compatibility issues with SQLite-vec and other storage backends
34 | storage_type = storage.__class__.__name__
35 |
36 | # First, use the 'is_initialized' method if available (preferred)
37 | if hasattr(storage, 'is_initialized') and callable(storage.is_initialized):
38 | try:
39 | init_status = storage.is_initialized()
40 | if not init_status:
41 | # Get detailed status for debugging
42 | if hasattr(storage, 'get_initialization_status') and callable(storage.get_initialization_status):
43 | status = storage.get_initialization_status()
44 | return False, f"Storage not fully initialized: {status}"
45 | else:
46 | return False, "Storage initialization incomplete"
47 | except Exception as init_error:
48 | logger.warning(f"Error checking initialization status: {init_error}")
49 | # Continue with alternative checks
50 |
51 | # SQLite-vec backend validation
52 | if storage_type == "SqliteVecMemoryStorage":
53 | if not hasattr(storage, 'conn') or storage.conn is None:
54 | return False, "SQLite database connection is not initialized"
55 |
56 | # Check for database health
57 | try:
58 | # Make sure the tables exist
59 | try:
60 | cursor = storage.conn.execute('SELECT name FROM sqlite_master WHERE type="table" AND name="memories"')
61 | if not cursor.fetchone():
62 | return False, "SQLite database is missing required tables"
63 | except Exception as table_error:
64 | return False, f"Failed to check for tables: {str(table_error)}"
65 |
66 | # Try a simple query to verify database connection
67 | cursor = storage.conn.execute('SELECT COUNT(*) FROM memories')
68 | memory_count = cursor.fetchone()[0]
69 | logger.info(f"SQLite-vec database contains {memory_count} memories")
70 |
71 | # Test if embedding generation works (if model is available)
72 | if hasattr(storage, 'embedding_model') and storage.embedding_model:
73 | test_text = "Database validation test"
74 | embedding = storage._generate_embedding(test_text)
75 | if not embedding or len(embedding) != storage.embedding_dimension:
76 | logger.warning("Embedding generation may not be working properly")
77 | else:
78 | logger.warning("No embedding model available, some functionality may be limited")
79 |
80 | return True, "SQLite-vec database validation successful"
81 |
82 | except Exception as e:
83 | return False, f"SQLite database access error: {str(e)}"
84 |
85 | # Cloudflare storage validation
86 | elif storage_type == "CloudflareStorage":
87 | try:
88 | # Check if storage is properly initialized
89 | if not hasattr(storage, 'client') or storage.client is None:
90 | return False, "Cloudflare storage client is not initialized"
91 |
92 | # Check basic connectivity by getting stats
93 | stats = await storage.get_stats()
94 | memory_count = stats.get("total_memories", 0)
95 | logger.info(f"Cloudflare storage contains {memory_count} memories")
96 |
97 | # Test embedding generation if available
98 | test_text = "Database validation test"
99 | try:
100 | embedding = await storage._generate_embedding(test_text)
101 | if not embedding or not isinstance(embedding, list):
102 | logger.warning("Embedding generation may not be working properly")
103 | except Exception as embed_error:
104 | logger.warning(f"Embedding test failed: {str(embed_error)}")
105 |
106 | return True, "Cloudflare storage validation successful"
107 |
108 | except Exception as e:
109 | return False, f"Cloudflare storage access error: {str(e)}"
110 |
111 | else:
112 | return False, f"Unknown storage type: {storage_type}"
113 |
114 | except Exception as e:
115 | logger.error(f"Database validation failed: {str(e)}")
116 | return False, f"Database validation failed: {str(e)}"
117 |
118 | async def get_database_stats(storage) -> Dict[str, Any]:
119 | """Get detailed database statistics with proper error handling."""
120 | try:
121 | # Check if storage is properly initialized
122 | if storage is None:
123 | return {
124 | "status": "error",
125 | "error": "Storage is not initialized"
126 | }
127 |
128 | # Determine storage type
129 | storage_type = storage.__class__.__name__
130 |
131 | # SQLite-vec backend stats
132 | if storage_type == "SqliteVecMemoryStorage":
133 | # Use the storage's own stats method if available
134 | if hasattr(storage, 'get_stats') and callable(storage.get_stats):
135 | try:
136 | stats = storage.get_stats()
137 | stats["status"] = "healthy"
138 | return stats
139 | except Exception as stats_error:
140 | logger.warning(f"Error calling get_stats method: {stats_error}")
141 | # Fall back to our implementation
142 |
143 | # Otherwise, gather basic stats
144 | if not hasattr(storage, 'conn') or storage.conn is None:
145 | return {
146 | "status": "error",
147 | "error": "SQLite database connection is not initialized"
148 | }
149 |
150 | try:
151 | # Check if tables exist
152 | cursor = storage.conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
153 | tables = [row[0] for row in cursor.fetchall()]
154 |
155 | # Count memories if the table exists
156 | memory_count = 0
157 | if 'memories' in tables:
158 | cursor = storage.conn.execute('SELECT COUNT(*) FROM memories')
159 | memory_count = cursor.fetchone()[0]
160 |
161 | # Get unique tags if the table exists
162 | unique_tags = 0
163 | if 'memories' in tables:
164 | cursor = storage.conn.execute('SELECT COUNT(DISTINCT tags) FROM memories WHERE tags != ""')
165 | unique_tags = cursor.fetchone()[0]
166 |
167 | # Get database file size
168 | db_path = storage.db_path if hasattr(storage, 'db_path') else "unknown"
169 | file_size = os.path.getsize(db_path) if isinstance(db_path, str) and os.path.exists(db_path) else 0
170 |
171 | # Get embedding model info
172 | embedding_model = "unknown"
173 | embedding_dimension = 0
174 |
175 | if hasattr(storage, 'embedding_model_name'):
176 | embedding_model = storage.embedding_model_name
177 |
178 | if hasattr(storage, 'embedding_dimension'):
179 | embedding_dimension = storage.embedding_dimension
180 |
181 | # Gather tables information
182 | tables_info = {}
183 | for table in tables:
184 | try:
185 | cursor = storage.conn.execute(f"SELECT COUNT(*) FROM {table}")
186 | count = cursor.fetchone()[0]
187 | tables_info[table] = {"count": count}
188 | except Exception:
189 | tables_info[table] = {"count": "unknown"}
190 |
191 | return {
192 | "backend": "sqlite-vec",
193 | "status": "healthy",
194 | "total_memories": memory_count,
195 | "unique_tags": unique_tags,
196 | "database_size_bytes": file_size,
197 | "database_size_mb": round(file_size / (1024 * 1024), 2) if file_size > 0 else 0,
198 | "embedding_model": embedding_model,
199 | "embedding_dimension": embedding_dimension,
200 | "tables": tables,
201 | "tables_info": tables_info
202 | }
203 | except Exception as e:
204 | return {
205 | "status": "error",
206 | "error": f"Error getting SQLite-vec stats: {str(e)}"
207 | }
208 |
209 | # Cloudflare storage stats
210 | elif storage_type == "CloudflareStorage":
211 | try:
212 | # Get storage stats from the Cloudflare storage implementation
213 | storage_stats = await storage.get_stats()
214 |
215 | # Add cloudflare-specific info
216 | cloudflare_info = {
217 | "vectorize_index": storage.vectorize_index,
218 | "d1_database_id": storage.d1_database_id,
219 | "r2_bucket": storage.r2_bucket,
220 | "embedding_model": storage.embedding_model,
221 | "large_content_threshold": storage.large_content_threshold
222 | }
223 |
224 | return {
225 | **storage_stats,
226 | "cloudflare": cloudflare_info,
227 | "backend": "cloudflare",
228 | "status": "healthy"
229 | }
230 |
231 | except Exception as stats_error:
232 | return {
233 | "status": "error",
234 | "error": f"Error getting Cloudflare stats: {str(stats_error)}",
235 | "backend": "cloudflare"
236 | }
237 |
238 | else:
239 | return {
240 | "status": "error",
241 | "error": f"Unknown storage type: {storage_type}"
242 | }
243 |
244 | except Exception as e:
245 | logger.error(f"Error getting database stats: {str(e)}")
246 | return {
247 | "status": "error",
248 | "error": str(e)
249 | }
250 |
251 | async def repair_database(storage) -> Tuple[bool, str]:
252 | """Attempt to repair database issues."""
253 | try:
254 | # Determine storage type
255 | storage_type = storage.__class__.__name__
256 |
257 | # SQLite-vec backend repair
258 | if storage_type == "SqliteVecMemoryStorage":
259 | # For SQLite, we'll try to check and recreate the tables if needed
260 | if not hasattr(storage, 'conn') or storage.conn is None:
261 | # Try to reconnect
262 | try:
263 | storage.conn = storage.conn or __import__('sqlite3').connect(storage.db_path)
264 |
265 | # Try to reload the extension
266 | if importlib.util.find_spec('sqlite_vec'):
267 | import sqlite_vec
268 | storage.conn.enable_load_extension(True)
269 | sqlite_vec.load(storage.conn)
270 | storage.conn.enable_load_extension(False)
271 |
272 | # Recreate tables if needed
273 | storage.conn.execute('''
274 | CREATE TABLE IF NOT EXISTS memories (
275 | id INTEGER PRIMARY KEY AUTOINCREMENT,
276 | content_hash TEXT UNIQUE NOT NULL,
277 | content TEXT NOT NULL,
278 | tags TEXT,
279 | memory_type TEXT,
280 | metadata TEXT,
281 | created_at REAL,
282 | updated_at REAL,
283 | created_at_iso TEXT,
284 | updated_at_iso TEXT
285 | )
286 | ''')
287 |
288 | # Create virtual table for vector embeddings
289 | embedding_dimension = getattr(storage, 'embedding_dimension', 384)
290 | storage.conn.execute(f'''
291 | CREATE VIRTUAL TABLE IF NOT EXISTS memory_embeddings USING vec0(
292 | content_embedding FLOAT[{embedding_dimension}]
293 | )
294 | ''')
295 |
296 | # Create indexes for better performance
297 | storage.conn.execute('CREATE INDEX IF NOT EXISTS idx_content_hash ON memories(content_hash)')
298 | storage.conn.execute('CREATE INDEX IF NOT EXISTS idx_created_at ON memories(created_at)')
299 | storage.conn.execute('CREATE INDEX IF NOT EXISTS idx_memory_type ON memories(memory_type)')
300 |
301 | storage.conn.commit()
302 | return True, "SQLite-vec database repaired"
303 |
304 | except Exception as e:
305 | return False, f"SQLite-vec repair failed: {str(e)}"
306 |
307 | # Cloudflare storage repair
308 | elif storage_type == "CloudflareStorage":
309 | # For Cloudflare storage, we can't repair infrastructure (Vectorize, D1, R2)
310 | # but we can validate the connection and re-initialize if needed
311 | try:
312 | # Validate current state
313 | is_valid, message = await validate_database(storage)
314 | if is_valid:
315 | return True, "Cloudflare storage is already healthy"
316 |
317 | # Try to re-initialize the storage connection
318 | await storage.initialize()
319 |
320 | # Validate repair
321 | is_valid, message = await validate_database(storage)
322 | if is_valid:
323 | return True, "Cloudflare storage connection successfully repaired"
324 | else:
325 | return False, f"Cloudflare storage repair failed: {message}"
326 |
327 | except Exception as repair_error:
328 | return False, f"Cloudflare storage repair failed: {str(repair_error)}"
329 |
330 | else:
331 | return False, f"Unknown storage type: {storage_type}, cannot repair"
332 |
333 | except Exception as e:
334 | logger.error(f"Error repairing database: {str(e)}")
335 | return False, f"Error repairing database: {str(e)}"
```
--------------------------------------------------------------------------------
/tests/integration/test_data_serialization_consistency.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """
3 | Test script to examine data serialization differences that could cause Issue #99.
4 | This focuses on how Memory objects are serialized/deserialized in different contexts
5 | without requiring the full MCP server stack.
6 | """
7 |
8 | import sys
9 | import os
10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
11 |
12 | import asyncio
13 | import json
14 | import tempfile
15 | from typing import Dict, List, Any
16 | from datetime import datetime
17 | import time
18 |
19 | from mcp_memory_service.models.memory import Memory
20 | from mcp_memory_service.utils.hashing import generate_content_hash
21 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
22 |
23 | class DataSerializationTest:
24 | """Test suite examining memory data serialization consistency."""
25 |
26 | def __init__(self):
27 | self.storage = None
28 | self.test_memories = []
29 |
30 | async def setup(self):
31 | """Set up test environment."""
32 | print("=== Setting up data serialization test environment ===")
33 |
34 | self.temp_db = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
35 | self.temp_db.close()
36 |
37 | self.storage = SqliteVecMemoryStorage(
38 | db_path=self.temp_db.name,
39 | embedding_model="all-MiniLM-L6-v2"
40 | )
41 | await self.storage.initialize()
42 | print(f"✅ Storage initialized: {self.temp_db.name}")
43 |
44 | async def cleanup(self):
45 | """Clean up test environment."""
46 | self.storage = None
47 | if hasattr(self, 'temp_db') and os.path.exists(self.temp_db.name):
48 | os.unlink(self.temp_db.name)
49 | print("✅ Test database cleaned up")
50 |
51 | def create_hook_style_memory_with_metadata(self) -> Memory:
52 | """Create a memory simulating hook-generated content with rich metadata."""
53 | content = "Implemented comprehensive testing for Issue #99 memory storage inconsistency"
54 |
55 | memory = Memory(
56 | content=content,
57 | content_hash=generate_content_hash(content),
58 | tags=["claude-code-session", "session-consolidation", "issue-99", "mcp-memory-service", "language:python"],
59 | memory_type="session-summary",
60 | metadata={
61 | "session_analysis": {
62 | "topics": ["testing", "storage", "consistency"],
63 | "decisions_count": 3,
64 | "insights_count": 5,
65 | "confidence": 0.92
66 | },
67 | "project_context": {
68 | "name": "mcp-memory-service",
69 | "language": "python",
70 | "frameworks": ["fastapi", "chromadb"]
71 | },
72 | "generated_by": "claude-code-session-end-hook",
73 | "generated_at": "2025-09-14T04:42:15.123Z"
74 | }
75 | )
76 |
77 | print(f"🔧 Created hook-style memory with {len(memory.tags)} tags and rich metadata")
78 | return memory
79 |
80 | def create_manual_memory_with_minimal_metadata(self) -> Memory:
81 | """Create a memory simulating manual /memory-store with minimal metadata."""
82 | content = "Manual note about Issue #99 analysis findings"
83 |
84 | memory = Memory(
85 | content=content,
86 | content_hash=generate_content_hash(content),
87 | tags=["issue-99", "analysis", "findings", "manual-note"],
88 | memory_type="note",
89 | metadata={
90 | "source": "user-input",
91 | "created_by": "manual-storage"
92 | }
93 | )
94 |
95 | print(f"📝 Created manual-style memory with {len(memory.tags)} tags and minimal metadata")
96 | return memory
97 |
98 | async def test_memory_serialization_roundtrip(self):
99 | """Test 1: Examine serialization/deserialization consistency."""
100 | print("\n🧪 Test 1: Memory Serialization Roundtrip Analysis")
101 | print("-" * 60)
102 |
103 | # Create test memories
104 | hook_memory = self.create_hook_style_memory_with_metadata()
105 | manual_memory = self.create_manual_memory_with_minimal_metadata()
106 |
107 | # Test serialization to dict and back
108 | hook_dict = hook_memory.to_dict()
109 | manual_dict = manual_memory.to_dict()
110 |
111 | print(f"📊 Hook memory dict keys: {sorted(hook_dict.keys())}")
112 | print(f"📊 Manual memory dict keys: {sorted(manual_dict.keys())}")
113 |
114 | # Test deserialization from dict
115 | hook_restored = Memory.from_dict(hook_dict)
116 | manual_restored = Memory.from_dict(manual_dict)
117 |
118 | # Compare original vs restored
119 | hook_consistency = {
120 | "content_match": hook_memory.content == hook_restored.content,
121 | "tags_match": hook_memory.tags == hook_restored.tags,
122 | "metadata_match": hook_memory.metadata == hook_restored.metadata,
123 | "created_at_preserved": abs((hook_memory.created_at or 0) - (hook_restored.created_at or 0)) < 0.001,
124 | "created_at_iso_preserved": hook_memory.created_at_iso == hook_restored.created_at_iso
125 | }
126 |
127 | manual_consistency = {
128 | "content_match": manual_memory.content == manual_restored.content,
129 | "tags_match": manual_memory.tags == manual_restored.tags,
130 | "metadata_match": manual_memory.metadata == manual_restored.metadata,
131 | "created_at_preserved": abs((manual_memory.created_at or 0) - (manual_restored.created_at or 0)) < 0.001,
132 | "created_at_iso_preserved": manual_memory.created_at_iso == manual_restored.created_at_iso
133 | }
134 |
135 | print(f"\n📋 Hook memory serialization consistency:")
136 | for key, value in hook_consistency.items():
137 | status = "✅" if value else "❌"
138 | print(f" {status} {key}: {value}")
139 |
140 | print(f"\n📋 Manual memory serialization consistency:")
141 | for key, value in manual_consistency.items():
142 | status = "✅" if value else "❌"
143 | print(f" {status} {key}: {value}")
144 |
145 | return {
146 | "hook_consistency": hook_consistency,
147 | "manual_consistency": manual_consistency,
148 | "hook_dict": hook_dict,
149 | "manual_dict": manual_dict
150 | }
151 |
152 | async def test_storage_backend_handling(self):
153 | """Test 2: Examine how storage backend handles different memory types."""
154 | print("\n🧪 Test 2: Storage Backend Handling Analysis")
155 | print("-" * 60)
156 |
157 | # Create and store different memory types
158 | hook_memory = self.create_hook_style_memory_with_metadata()
159 | manual_memory = self.create_manual_memory_with_minimal_metadata()
160 |
161 | # Store both memories
162 | hook_store_result = await self.storage.store(hook_memory)
163 | manual_store_result = await self.storage.store(manual_memory)
164 |
165 | print(f"📤 Hook memory storage result: {hook_store_result}")
166 | print(f"📤 Manual memory storage result: {manual_store_result}")
167 |
168 | # Retrieve memories back
169 | hook_retrieved = await self.storage.retrieve(hook_memory.content, n_results=1)
170 | manual_retrieved = await self.storage.retrieve(manual_memory.content, n_results=1)
171 |
172 | storage_analysis = {
173 | "hook_stored_successfully": hook_store_result[0],
174 | "manual_stored_successfully": manual_store_result[0],
175 | "hook_retrieved_count": len(hook_retrieved),
176 | "manual_retrieved_count": len(manual_retrieved)
177 | }
178 |
179 | if hook_retrieved:
180 | retrieved_hook = hook_retrieved[0].memory
181 | storage_analysis["hook_retrieval_analysis"] = {
182 | "content_preserved": retrieved_hook.content == hook_memory.content,
183 | "tags_preserved": retrieved_hook.tags == hook_memory.tags,
184 | "timestamp_preserved": (
185 | retrieved_hook.created_at is not None and
186 | retrieved_hook.created_at_iso is not None
187 | ),
188 | "metadata_preserved": bool(retrieved_hook.metadata)
189 | }
190 |
191 | print(f"\n📥 Retrieved hook memory analysis:")
192 | for key, value in storage_analysis["hook_retrieval_analysis"].items():
193 | status = "✅" if value else "❌"
194 | print(f" {status} {key}: {value}")
195 |
196 | if manual_retrieved:
197 | retrieved_manual = manual_retrieved[0].memory
198 | storage_analysis["manual_retrieval_analysis"] = {
199 | "content_preserved": retrieved_manual.content == manual_memory.content,
200 | "tags_preserved": retrieved_manual.tags == manual_memory.tags,
201 | "timestamp_preserved": (
202 | retrieved_manual.created_at is not None and
203 | retrieved_manual.created_at_iso is not None
204 | ),
205 | "metadata_preserved": bool(retrieved_manual.metadata)
206 | }
207 |
208 | print(f"\n📥 Retrieved manual memory analysis:")
209 | for key, value in storage_analysis["manual_retrieval_analysis"].items():
210 | status = "✅" if value else "❌"
211 | print(f" {status} {key}: {value}")
212 |
213 | return storage_analysis
214 |
215 | async def test_timestamp_precision_handling(self):
216 | """Test 3: Examine timestamp precision across serialization boundaries."""
217 | print("\n🧪 Test 3: Timestamp Precision Analysis")
218 | print("-" * 60)
219 |
220 | # Create memory with very specific timestamp
221 | precise_timestamp = time.time()
222 | precise_iso = datetime.fromtimestamp(precise_timestamp).isoformat() + "Z"
223 |
224 | memory = Memory(
225 | content="Testing timestamp precision across storage boundaries",
226 | content_hash=generate_content_hash("Testing timestamp precision"),
227 | tags=["timestamp-test", "precision"],
228 | memory_type="test",
229 | created_at=precise_timestamp,
230 | created_at_iso=precise_iso
231 | )
232 |
233 | print(f"🕐 Original timestamp (float): {precise_timestamp}")
234 | print(f"🕐 Original timestamp (ISO): {precise_iso}")
235 |
236 | # Test serialization
237 | memory_dict = memory.to_dict()
238 | print(f"🔄 Serialized timestamp fields: {json.dumps({k:v for k,v in memory_dict.items() if 'timestamp' in k or 'created_at' in k}, indent=2)}")
239 |
240 | # Test deserialization
241 | restored_memory = Memory.from_dict(memory_dict)
242 | print(f"🔄 Restored timestamp (float): {restored_memory.created_at}")
243 | print(f"🔄 Restored timestamp (ISO): {restored_memory.created_at_iso}")
244 |
245 | # Test storage roundtrip
246 | store_result = await self.storage.store(memory)
247 | retrieved_results = await self.storage.retrieve(memory.content, n_results=1)
248 |
249 | precision_analysis = {
250 | "serialization_preserves_precision": abs(precise_timestamp - (restored_memory.created_at or 0)) < 0.001,
251 | "iso_format_preserved": precise_iso == restored_memory.created_at_iso,
252 | "storage_successful": store_result[0],
253 | "retrieval_successful": len(retrieved_results) > 0
254 | }
255 |
256 | if retrieved_results:
257 | stored_memory = retrieved_results[0].memory
258 | precision_analysis.update({
259 | "storage_preserves_float_precision": abs(precise_timestamp - (stored_memory.created_at or 0)) < 0.001,
260 | "storage_preserves_iso_format": precise_iso == stored_memory.created_at_iso
261 | })
262 |
263 | print(f"💾 Storage preserved timestamp (float): {stored_memory.created_at}")
264 | print(f"💾 Storage preserved timestamp (ISO): {stored_memory.created_at_iso}")
265 |
266 | print(f"\n📊 Precision analysis:")
267 | for key, value in precision_analysis.items():
268 | status = "✅" if value else "❌"
269 | print(f" {status} {key}: {value}")
270 |
271 | return precision_analysis
272 |
273 | async def run_all_tests(self):
274 | """Run all data serialization tests."""
275 | print("=" * 70)
276 | print("MCP Memory Service: Data Serialization Consistency Analysis")
277 | print("Investigating Issue #99 - Root Cause Analysis")
278 | print("=" * 70)
279 |
280 | try:
281 | await self.setup()
282 |
283 | # Run individual tests
284 | serialization_test = await self.test_memory_serialization_roundtrip()
285 | storage_test = await self.test_storage_backend_handling()
286 | precision_test = await self.test_timestamp_precision_handling()
287 |
288 | # Analyze overall results
289 | print("\n" + "=" * 70)
290 | print("COMPREHENSIVE ANALYSIS SUMMARY")
291 | print("=" * 70)
292 |
293 | tests_passed = 0
294 | total_tests = 3
295 |
296 | # Serialization consistency
297 | hook_serialization_ok = all(serialization_test["hook_consistency"].values())
298 | manual_serialization_ok = all(serialization_test["manual_consistency"].values())
299 |
300 | if hook_serialization_ok and manual_serialization_ok:
301 | print("✅ PASS: Memory serialization/deserialization consistent")
302 | tests_passed += 1
303 | else:
304 | print("❌ FAIL: Serialization inconsistencies detected")
305 |
306 | # Storage backend handling
307 | storage_ok = (
308 | storage_test.get("hook_stored_successfully", False) and
309 | storage_test.get("manual_stored_successfully", False) and
310 | storage_test.get("hook_retrieved_count", 0) > 0 and
311 | storage_test.get("manual_retrieved_count", 0) > 0
312 | )
313 |
314 | if storage_ok:
315 | print("✅ PASS: Storage backend handles both memory types consistently")
316 | tests_passed += 1
317 | else:
318 | print("❌ FAIL: Storage backend handling inconsistencies")
319 |
320 | # Timestamp precision
321 | precision_ok = (
322 | precision_test.get("serialization_preserves_precision", False) and
323 | precision_test.get("storage_preserves_float_precision", False)
324 | )
325 |
326 | if precision_ok:
327 | print("✅ PASS: Timestamp precision maintained across boundaries")
328 | tests_passed += 1
329 | else:
330 | print("❌ FAIL: Timestamp precision issues detected")
331 |
332 | print(f"\nOverall Result: {tests_passed}/{total_tests} tests passed")
333 |
334 | # Root cause analysis
335 | print("\n🔍 ROOT CAUSE ANALYSIS:")
336 |
337 | if tests_passed == total_tests:
338 | print("• ✅ Memory objects themselves are consistent")
339 | print("• ✅ Storage backend handles both types properly")
340 | print("• ✅ Timestamp precision is maintained")
341 | print("\n🎯 CONCLUSION: Issue #99 is likely in:")
342 | print(" - Search/retrieval query logic")
343 | print(" - Time-based filtering implementation")
344 | print(" - Tag-based search differences")
345 | print(" - Client-side display logic")
346 | print("\n💡 RECOMMENDATION: Focus investigation on search and retrieval functions")
347 | else:
348 | print("• ❌ Detected inconsistencies in core data handling")
349 | print("• This confirms the storage-level issues described in Discussion #98")
350 |
351 | return tests_passed == total_tests
352 |
353 | finally:
354 | await self.cleanup()
355 |
356 | async def main():
357 | """Main test execution."""
358 | test_suite = DataSerializationTest()
359 | success = await test_suite.run_all_tests()
360 | return 0 if success else 1
361 |
362 | if __name__ == "__main__":
363 | exit_code = asyncio.run(main())
364 | sys.exit(exit_code)
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/web/static/sse_test.html:
--------------------------------------------------------------------------------
```html
1 | <!DOCTYPE html>
2 | <html lang="en">
3 | <head>
4 | <meta charset="UTF-8">
5 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
6 | <title>MCP Memory Service - SSE Test</title>
7 | <style>
8 | body {
9 | font-family: 'Courier New', monospace;
10 | margin: 20px;
11 | background-color: #f5f5f5;
12 | }
13 | .container {
14 | max-width: 1200px;
15 | margin: 0 auto;
16 | }
17 | h1 {
18 | color: #333;
19 | text-align: center;
20 | }
21 | .status {
22 | padding: 10px;
23 | margin: 10px 0;
24 | border-radius: 5px;
25 | font-weight: bold;
26 | }
27 | .status.connected {
28 | background-color: #d4edda;
29 | color: #155724;
30 | border: 1px solid #c3e6cb;
31 | }
32 | .status.disconnected {
33 | background-color: #f8d7da;
34 | color: #721c24;
35 | border: 1px solid #f5c6cb;
36 | }
37 | .controls {
38 | margin: 20px 0;
39 | text-align: center;
40 | }
41 | button {
42 | padding: 10px 20px;
43 | margin: 5px;
44 | border: none;
45 | border-radius: 5px;
46 | cursor: pointer;
47 | font-size: 14px;
48 | }
49 | .btn-primary {
50 | background-color: #007bff;
51 | color: white;
52 | }
53 | .btn-secondary {
54 | background-color: #6c757d;
55 | color: white;
56 | }
57 | .btn-danger {
58 | background-color: #dc3545;
59 | color: white;
60 | }
61 | .events-container {
62 | display: flex;
63 | gap: 20px;
64 | }
65 | .events-log {
66 | flex: 1;
67 | background-color: #000;
68 | color: #00ff00;
69 | padding: 15px;
70 | border-radius: 5px;
71 | height: 500px;
72 | overflow-y: auto;
73 | font-size: 12px;
74 | line-height: 1.4;
75 | }
76 | .stats-panel {
77 | width: 300px;
78 | background-color: white;
79 | padding: 15px;
80 | border-radius: 5px;
81 | border: 1px solid #ddd;
82 | }
83 | .event-entry {
84 | margin: 5px 0;
85 | padding: 5px;
86 | border-left: 3px solid #00ff00;
87 | padding-left: 10px;
88 | }
89 | .event-memory { border-left-color: #00bfff; }
90 | .event-search { border-left-color: #ffa500; }
91 | .event-heartbeat { border-left-color: #ff69b4; }
92 | .event-connection { border-left-color: #32cd32; }
93 | .stats-item {
94 | margin: 10px 0;
95 | padding: 5px;
96 | background-color: #f8f9fa;
97 | border-radius: 3px;
98 | }
99 | </style>
100 | </head>
101 | <body>
102 | <div class="container">
103 | <h1>🔥 MCP Memory Service - Real-time Events</h1>
104 |
105 | <div id="status" class="status disconnected">
106 | ❌ Disconnected from SSE stream
107 | </div>
108 |
109 | <div class="controls">
110 | <button id="connectBtn" class="btn-primary" onclick="connectSSE()">Connect to Events</button>
111 | <button id="disconnectBtn" class="btn-secondary" onclick="disconnectSSE()" disabled>Disconnect</button>
112 | <button class="btn-primary" onclick="testMemoryOperations()">Test Memory Operations</button>
113 | <button class="btn-danger" onclick="clearLog()">Clear Log</button>
114 | </div>
115 |
116 | <div class="events-container">
117 | <div class="events-log" id="eventsLog">
118 | <div>🚀 SSE Event Log - Waiting for connection...</div>
119 | </div>
120 |
121 | <div class="stats-panel">
122 | <h3>📊 Connection Stats</h3>
123 | <div class="stats-item">
124 | <strong>Status:</strong> <span id="statsStatus">Disconnected</span>
125 | </div>
126 | <div class="stats-item">
127 | <strong>Events Received:</strong> <span id="statsEvents">0</span>
128 | </div>
129 | <div class="stats-item">
130 | <strong>Connection Time:</strong> <span id="statsTime">-</span>
131 | </div>
132 | <div class="stats-item">
133 | <strong>Last Event:</strong> <span id="statsLastEvent">-</span>
134 | </div>
135 |
136 | <h3>🎯 Event Types</h3>
137 | <div class="stats-item">
138 | 💾 Memory Stored: <span id="statMemoryStored">0</span>
139 | </div>
140 | <div class="stats-item">
141 | 🗑️ Memory Deleted: <span id="statMemoryDeleted">0</span>
142 | </div>
143 | <div class="stats-item">
144 | 🔍 Search Completed: <span id="statSearchCompleted">0</span>
145 | </div>
146 | <div class="stats-item">
147 | 💓 Heartbeats: <span id="statHeartbeat">0</span>
148 | </div>
149 | </div>
150 | </div>
151 | </div>
152 |
153 | <script>
154 | let eventSource = null;
155 | let eventCount = 0;
156 | let connectionStart = null;
157 | let eventStats = {
158 | memory_stored: 0,
159 | memory_deleted: 0,
160 | search_completed: 0,
161 | heartbeat: 0
162 | };
163 |
164 | function updateStatus(connected) {
165 | const statusEl = document.getElementById('status');
166 | const connectBtn = document.getElementById('connectBtn');
167 | const disconnectBtn = document.getElementById('disconnectBtn');
168 | const statsStatus = document.getElementById('statsStatus');
169 |
170 | if (connected) {
171 | statusEl.className = 'status connected';
172 | statusEl.innerHTML = '✅ Connected to SSE stream';
173 | connectBtn.disabled = true;
174 | disconnectBtn.disabled = false;
175 | statsStatus.textContent = 'Connected';
176 | connectionStart = new Date();
177 | } else {
178 | statusEl.className = 'status disconnected';
179 | statusEl.innerHTML = '❌ Disconnected from SSE stream';
180 | connectBtn.disabled = false;
181 | disconnectBtn.disabled = true;
182 | statsStatus.textContent = 'Disconnected';
183 | connectionStart = null;
184 | }
185 | }
186 |
187 | function addLogEntry(message, type = 'info') {
188 | const log = document.getElementById('eventsLog');
189 | const entry = document.createElement('div');
190 | entry.className = `event-entry event-${type}`;
191 | entry.innerHTML = `<span style="color: #666;">${new Date().toLocaleTimeString()}</span> ${message}`;
192 | log.appendChild(entry);
193 | log.scrollTop = log.scrollHeight;
194 | }
195 |
196 | function updateStats() {
197 | document.getElementById('statsEvents').textContent = eventCount;
198 | document.getElementById('statMemoryStored').textContent = eventStats.memory_stored;
199 | document.getElementById('statMemoryDeleted').textContent = eventStats.memory_deleted;
200 | document.getElementById('statSearchCompleted').textContent = eventStats.search_completed;
201 | document.getElementById('statHeartbeat').textContent = eventStats.heartbeat;
202 |
203 | if (connectionStart) {
204 | const duration = Math.floor((new Date() - connectionStart) / 1000);
205 | document.getElementById('statsTime').textContent = `${duration}s`;
206 | }
207 | }
208 |
209 | function connectSSE() {
210 | if (eventSource) {
211 | eventSource.close();
212 | }
213 |
214 | addLogEntry('🔌 Connecting to SSE stream...', 'connection');
215 |
216 | eventSource = new EventSource('/api/events');
217 |
218 | eventSource.onopen = function(event) {
219 | updateStatus(true);
220 | addLogEntry('✅ Connected to SSE stream', 'connection');
221 | };
222 |
223 | eventSource.onmessage = function(event) {
224 | try {
225 | const data = JSON.parse(event.data);
226 | handleEvent(data, event.type || 'message');
227 | } catch (e) {
228 | addLogEntry(`❌ Invalid JSON: ${event.data}`, 'error');
229 | }
230 | };
231 |
232 | eventSource.addEventListener('memory_stored', function(event) {
233 | const data = JSON.parse(event.data);
234 | handleEvent(data, 'memory_stored');
235 | });
236 |
237 | eventSource.addEventListener('memory_deleted', function(event) {
238 | const data = JSON.parse(event.data);
239 | handleEvent(data, 'memory_deleted');
240 | });
241 |
242 | eventSource.addEventListener('search_completed', function(event) {
243 | const data = JSON.parse(event.data);
244 | handleEvent(data, 'search_completed');
245 | });
246 |
247 | eventSource.addEventListener('heartbeat', function(event) {
248 | const data = JSON.parse(event.data);
249 | handleEvent(data, 'heartbeat');
250 | });
251 |
252 | eventSource.addEventListener('connection_established', function(event) {
253 | const data = JSON.parse(event.data);
254 | handleEvent(data, 'connection');
255 | });
256 |
257 | eventSource.onerror = function(event) {
258 | updateStatus(false);
259 | addLogEntry('❌ SSE connection error', 'error');
260 | };
261 | }
262 |
263 | function disconnectSSE() {
264 | if (eventSource) {
265 | eventSource.close();
266 | eventSource = null;
267 | }
268 | updateStatus(false);
269 | addLogEntry('🔌 Disconnected from SSE stream', 'connection');
270 | }
271 |
272 | function handleEvent(data, eventType) {
273 | eventCount++;
274 | document.getElementById('statsLastEvent').textContent = eventType;
275 |
276 | // Update event type stats
277 | if (eventStats.hasOwnProperty(eventType)) {
278 | eventStats[eventType]++;
279 | }
280 |
281 | // Format event message
282 | let message = '';
283 | let logType = 'info';
284 |
285 | switch (eventType) {
286 | case 'memory_stored':
287 | const hash = data.content_hash ? data.content_hash.substring(0, 12) + '...' : 'unknown';
288 | const preview = data.content_preview || 'No preview';
289 | message = `💾 <strong>Memory Stored:</strong> ${hash}<br> ${preview}`;
290 | logType = 'memory';
291 | break;
292 |
293 | case 'memory_deleted':
294 | const delHash = data.content_hash ? data.content_hash.substring(0, 12) + '...' : 'unknown';
295 | const success = data.success ? '✅' : '❌';
296 | message = `🗑️ <strong>Memory Deleted:</strong> ${success} ${delHash}`;
297 | logType = 'memory';
298 | break;
299 |
300 | case 'search_completed':
301 | const query = data.query || 'Unknown query';
302 | const count = data.results_count || 0;
303 | const time = data.processing_time_ms || 0;
304 | message = `🔍 <strong>Search:</strong> "${query}" → ${count} results (${time.toFixed(1)}ms)`;
305 | logType = 'search';
306 | break;
307 |
308 | case 'heartbeat':
309 | const connections = data.active_connections || 0;
310 | message = `💓 <strong>Heartbeat:</strong> ${connections} active connections`;
311 | logType = 'heartbeat';
312 | break;
313 |
314 | case 'connection_established':
315 | message = `🔌 <strong>Connected:</strong> ${data.message || 'Connection established'}`;
316 | logType = 'connection';
317 | break;
318 |
319 | default:
320 | message = `📨 <strong>${eventType}:</strong> ${JSON.stringify(data)}`;
321 | }
322 |
323 | addLogEntry(message, logType);
324 | updateStats();
325 | }
326 |
327 | async function testMemoryOperations() {
328 | addLogEntry('🚀 Starting test memory operations...', 'info');
329 |
330 | try {
331 | // Store a test memory
332 | const testMemory = {
333 | content: `SSE test memory created at ${new Date().toLocaleString()}`,
334 | tags: ['sse-test', 'browser-test', 'demo'],
335 | memory_type: 'test',
336 | metadata: { source: 'sse_test_page' }
337 | };
338 |
339 | const storeResponse = await fetch('/api/memories', {
340 | method: 'POST',
341 | headers: { 'Content-Type': 'application/json' },
342 | body: JSON.stringify(testMemory)
343 | });
344 |
345 | if (storeResponse.ok) {
346 | const result = await storeResponse.json();
347 | addLogEntry(`✅ Test memory stored: ${result.content_hash?.substring(0, 12)}...`, 'info');
348 |
349 | // Wait a moment, then perform a search
350 | setTimeout(async () => {
351 | const searchResponse = await fetch('/api/search', {
352 | method: 'POST',
353 | headers: { 'Content-Type': 'application/json' },
354 | body: JSON.stringify({ query: 'SSE test memory', n_results: 3 })
355 | });
356 |
357 | if (searchResponse.ok) {
358 | addLogEntry('✅ Test search completed', 'info');
359 |
360 | // Wait another moment, then delete the memory
361 | setTimeout(async () => {
362 | if (result.content_hash) {
363 | const deleteResponse = await fetch(`/api/memories/${result.content_hash}`, {
364 | method: 'DELETE'
365 | });
366 |
367 | if (deleteResponse.ok) {
368 | addLogEntry('✅ Test memory deleted', 'info');
369 | }
370 | }
371 | }, 2000);
372 | }
373 | }, 2000);
374 | }
375 | } catch (error) {
376 | addLogEntry(`❌ Test operation failed: ${error.message}`, 'error');
377 | }
378 | }
379 |
380 | function clearLog() {
381 | document.getElementById('eventsLog').innerHTML = '<div>🚀 SSE Event Log - Log cleared</div>';
382 | eventCount = 0;
383 | eventStats = { memory_stored: 0, memory_deleted: 0, search_completed: 0, heartbeat: 0 };
384 | updateStats();
385 | }
386 |
387 | // Auto-connect on page load
388 | window.onload = function() {
389 | // connectSSE(); // Uncomment to auto-connect
390 | };
391 |
392 | // Clean up on page unload
393 | window.onbeforeunload = function() {
394 | if (eventSource) {
395 | eventSource.close();
396 | }
397 | };
398 | </script>
399 | </body>
400 | </html>
```
--------------------------------------------------------------------------------
/scripts/migration/cleanup_mcp_timestamps.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """
3 | Migration script to clean up timestamp mess in MCP Memory ChromaDB database.
4 | This script will:
5 | 1. Backup the database
6 | 2. Standardize timestamps to use only the 'timestamp' field as integer
7 | 3. Remove redundant timestamp fields
8 | 4. Ensure all memories have proper timestamps
9 | """
10 |
11 | import sqlite3
12 | import shutil
13 | import os
14 | import json
15 | from datetime import datetime
16 | import sys
17 | from pathlib import Path
18 |
19 | def find_database_path():
20 | """Find the database path from Cloud Desktop Config or environment variables."""
21 | # First check environment variables (highest priority)
22 | if 'MCP_MEMORY_CHROMA_PATH' in os.environ:
23 | db_dir = os.environ.get('MCP_MEMORY_CHROMA_PATH')
24 | return os.path.join(db_dir, "chroma.sqlite3")
25 |
26 | # Try to find Cloud Desktop Config
27 | config_paths = [
28 | os.path.expanduser("~/AppData/Local/DesktopCommander/desktop_config.json"),
29 | os.path.expanduser("~/.config/DesktopCommander/desktop_config.json"),
30 | os.path.expanduser("~/DesktopCommander/desktop_config.json")
31 | ]
32 |
33 | for config_path in config_paths:
34 | if os.path.exists(config_path):
35 | try:
36 | with open(config_path, 'r') as f:
37 | config = json.load(f)
38 |
39 | # Look for memory config
40 | if 'services' in config and 'memory' in config['services']:
41 | memory_config = config['services']['memory']
42 | if 'env' in memory_config and 'MCP_MEMORY_CHROMA_PATH' in memory_config['env']:
43 | db_dir = memory_config['env']['MCP_MEMORY_CHROMA_PATH']
44 | return os.path.join(db_dir, "chroma.sqlite3")
45 | except Exception as e:
46 | print(f"Warning: Could not parse config at {config_path}: {e}")
47 |
48 | # Fallback paths
49 | fallback_paths = [
50 | os.path.expanduser("~/AppData/Local/mcp-memory/chroma.sqlite3"),
51 | os.path.expanduser("~/.local/share/mcp-memory/chroma.sqlite3"),
52 | ]
53 |
54 | for path in fallback_paths:
55 | if os.path.exists(path):
56 | return path
57 |
58 | # Ask user if nothing found
59 | print("Could not automatically determine database path.")
60 | user_path = input("Please enter the full path to the chroma.sqlite3 database: ")
61 | return user_path if user_path else None
62 |
63 | # Database paths
64 | DB_PATH = find_database_path()
65 | BACKUP_PATH = f"{DB_PATH}.backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}" if DB_PATH else None
66 |
67 | def backup_database():
68 | """Create a backup of the database before migration."""
69 | if not DB_PATH or not BACKUP_PATH:
70 | print("❌ Cannot create backup: Database path not determined")
71 | return False
72 |
73 | # Ensure backup directory exists
74 | backup_dir = os.path.dirname(BACKUP_PATH)
75 | if not os.path.exists(backup_dir):
76 | try:
77 | os.makedirs(backup_dir, exist_ok=True)
78 | except Exception as e:
79 | print(f"❌ Failed to create backup directory: {e}")
80 | return False
81 |
82 | print(f"Creating backup at: {BACKUP_PATH}")
83 | try:
84 | shutil.copy2(DB_PATH, BACKUP_PATH)
85 | print("✅ Backup created successfully")
86 | return True
87 | except Exception as e:
88 | print(f"❌ Failed to create backup: {e}")
89 | return False
90 |
91 | def analyze_timestamps(conn):
92 | """Analyze current timestamp situation."""
93 | print("\n📊 Analyzing current timestamp fields...")
94 |
95 | cursor = conn.cursor()
96 |
97 | # Get all unique timestamp-related keys
98 | cursor.execute("""
99 | SELECT key,
100 | COUNT(DISTINCT id) as memory_count,
101 | COUNT(CASE WHEN string_value IS NOT NULL THEN 1 END) as string_values,
102 | COUNT(CASE WHEN int_value IS NOT NULL THEN 1 END) as int_values,
103 | COUNT(CASE WHEN float_value IS NOT NULL THEN 1 END) as float_values
104 | FROM embedding_metadata
105 | WHERE key IN ('timestamp', 'created_at', 'created_at_iso', 'timestamp_float',
106 | 'timestamp_str', 'updated_at', 'updated_at_iso', 'date')
107 | GROUP BY key
108 | ORDER BY key
109 | """)
110 |
111 | results = cursor.fetchall()
112 | print("\nTimestamp field usage:")
113 | print("-" * 70)
114 | print(f"{'Field':<20} {'Memories':<12} {'String':<10} {'Int':<10} {'Float':<10}")
115 | print("-" * 70)
116 |
117 | for row in results:
118 | print(f"{row[0]:<20} {row[1]:<12} {row[2]:<10} {row[3]:<10} {row[4]:<10}")
119 |
120 | return results
121 |
122 | def migrate_timestamps(conn):
123 | """Migrate all timestamps to standardized format."""
124 | print("\n🔄 Starting timestamp migration...")
125 |
126 | cursor = conn.cursor()
127 |
128 | # First, ensure all memories have a timestamp value
129 | # Priority: timestamp (int) > created_at (float) > timestamp_float (float)
130 |
131 | print("Step 1: Ensuring all memories have timestamp values...")
132 |
133 | # Count memories without any timestamp
134 | cursor.execute("""
135 | SELECT COUNT(DISTINCT em.id)
136 | FROM embedding_metadata em
137 | WHERE em.id NOT IN (
138 | SELECT id FROM embedding_metadata
139 | WHERE key = 'timestamp' AND int_value IS NOT NULL
140 | )
141 | """)
142 |
143 | missing_count = cursor.fetchone()[0]
144 | print(f" Found {missing_count} memories without integer timestamp")
145 |
146 | if missing_count > 0:
147 | # Get memories that need timestamp migration
148 | cursor.execute("""
149 | SELECT DISTINCT
150 | em.id,
151 | MAX(CASE WHEN em2.key = 'created_at' THEN em2.float_value END) as created_at_float,
152 | MAX(CASE WHEN em2.key = 'timestamp_float' THEN em2.float_value END) as timestamp_float
153 | FROM embedding_metadata em
154 | LEFT JOIN embedding_metadata em2 ON em.id = em2.id
155 | AND em2.key IN ('created_at', 'timestamp_float')
156 | WHERE em.id NOT IN (
157 | SELECT id FROM embedding_metadata
158 | WHERE key = 'timestamp' AND int_value IS NOT NULL
159 | )
160 | GROUP BY em.id
161 | """)
162 |
163 | memories_to_fix = cursor.fetchall()
164 | fixed_count = 0
165 |
166 | for memory_id, created_at, timestamp_float in memories_to_fix:
167 | # Use the first available timestamp
168 | timestamp_value = None
169 | if created_at:
170 | timestamp_value = int(created_at)
171 | elif timestamp_float:
172 | timestamp_value = int(timestamp_float)
173 | else:
174 | # If no timestamp found, use current time (this shouldn't happen)
175 | timestamp_value = int(datetime.now().timestamp())
176 | print(f" ⚠️ Memory {memory_id} has no timestamp, using current time")
177 |
178 | # Check if a timestamp entry already exists for this memory
179 | cursor.execute("""
180 | SELECT 1 FROM embedding_metadata
181 | WHERE id = ? AND key = 'timestamp'
182 | """, (memory_id,))
183 |
184 | if cursor.fetchone():
185 | # Update existing timestamp record
186 | cursor.execute("""
187 | UPDATE embedding_metadata
188 | SET string_value = NULL,
189 | int_value = ?,
190 | float_value = NULL
191 | WHERE id = ? AND key = 'timestamp'
192 | """, (timestamp_value, memory_id))
193 | else:
194 | # Insert new timestamp record
195 | cursor.execute("""
196 | INSERT INTO embedding_metadata (id, key, string_value, int_value, float_value)
197 | VALUES (?, 'timestamp', NULL, ?, NULL)
198 | """, (memory_id, timestamp_value))
199 |
200 | fixed_count += 1
201 |
202 | conn.commit()
203 | print(f" ✅ Fixed {fixed_count} memories with missing timestamps")
204 |
205 | # Step 2: Update existing timestamp fields that have wrong data type
206 | print("\nStep 2: Standardizing timestamp data types...")
207 |
208 | # Find timestamps stored as floats that should be ints
209 | cursor.execute("""
210 | UPDATE embedding_metadata
211 | SET int_value = CAST(float_value AS INTEGER),
212 | float_value = NULL
213 | WHERE key = 'timestamp'
214 | AND float_value IS NOT NULL
215 | AND int_value IS NULL
216 | """)
217 |
218 | float_fixes = cursor.rowcount
219 | print(f" ✅ Converted {float_fixes} float timestamps to integers")
220 |
221 | # Find timestamps stored as strings that should be ints
222 | cursor.execute("""
223 | SELECT id, string_value
224 | FROM embedding_metadata
225 | WHERE key = 'timestamp'
226 | AND string_value IS NOT NULL
227 | AND int_value IS NULL
228 | """)
229 |
230 | string_timestamps = cursor.fetchall()
231 | string_fixes = 0
232 |
233 | for row_id, string_value in string_timestamps:
234 | try:
235 | # Try to convert string to float then to int
236 | int_timestamp = int(float(string_value))
237 |
238 | # Update the record
239 | cursor.execute("""
240 | UPDATE embedding_metadata
241 | SET int_value = ?,
242 | string_value = NULL
243 | WHERE id = ? AND key = 'timestamp'
244 | """, (int_timestamp, row_id))
245 |
246 | string_fixes += 1
247 | except (ValueError, TypeError):
248 | # Skip strings that can't be converted to float/int
249 | print(f" ⚠️ Could not convert string timestamp for memory {row_id}: '{string_value}'")
250 |
251 | conn.commit()
252 | print(f" ✅ Converted {string_fixes} string timestamps to integers")
253 |
254 | def cleanup_redundant_fields(conn):
255 | """Remove redundant timestamp fields."""
256 | print("\n🧹 Cleaning up redundant timestamp fields...")
257 |
258 | cursor = conn.cursor()
259 |
260 | # List of redundant fields to remove
261 | redundant_fields = [
262 | 'created_at', 'created_at_iso', 'timestamp_float',
263 | 'timestamp_str', 'updated_at', 'updated_at_iso', 'date'
264 | ]
265 |
266 | total_deleted = 0
267 |
268 | for field in redundant_fields:
269 | cursor.execute("""
270 | DELETE FROM embedding_metadata
271 | WHERE key = ?
272 | """, (field,))
273 |
274 | deleted = cursor.rowcount
275 | total_deleted += deleted
276 |
277 | if deleted > 0:
278 | print(f" ✅ Removed {deleted} '{field}' entries")
279 |
280 | conn.commit()
281 | print(f"\n Total redundant entries removed: {total_deleted}")
282 |
283 | def verify_migration(conn):
284 | """Verify the migration was successful."""
285 | print("\n✔️ Verifying migration results...")
286 |
287 | cursor = conn.cursor()
288 |
289 | # Check that all memories have timestamps
290 | cursor.execute("""
291 | SELECT COUNT(DISTINCT e.id)
292 | FROM embeddings e
293 | LEFT JOIN embedding_metadata em
294 | ON e.id = em.id AND em.key = 'timestamp'
295 | WHERE em.int_value IS NULL
296 | """)
297 |
298 | missing = cursor.fetchone()[0]
299 |
300 | if missing > 0:
301 | print(f" ⚠️ WARNING: {missing} memories still missing timestamps")
302 | else:
303 | print(" ✅ All memories have timestamps")
304 |
305 | # Check for any remaining redundant fields
306 | cursor.execute("""
307 | SELECT key, COUNT(*) as count
308 | FROM embedding_metadata
309 | WHERE key IN ('created_at', 'created_at_iso', 'timestamp_float',
310 | 'timestamp_str', 'updated_at', 'updated_at_iso', 'date')
311 | GROUP BY key
312 | """)
313 |
314 | redundant = cursor.fetchall()
315 |
316 | if redundant:
317 | print(" ⚠️ WARNING: Found remaining redundant fields:")
318 | for field, count in redundant:
319 | print(f" - {field}: {count} entries")
320 | else:
321 | print(" ✅ All redundant timestamp fields removed")
322 |
323 | # Show final timestamp field stats
324 | cursor.execute("""
325 | SELECT
326 | COUNT(DISTINCT id) as total_memories,
327 | COUNT(CASE WHEN int_value IS NOT NULL THEN 1 END) as valid_timestamps,
328 | MIN(int_value) as earliest_timestamp,
329 | MAX(int_value) as latest_timestamp
330 | FROM embedding_metadata
331 | WHERE key = 'timestamp'
332 | """)
333 |
334 | stats = cursor.fetchone()
335 |
336 | print(f"\n📊 Final Statistics:")
337 | print(f" Total memories with timestamps: {stats[0]}")
338 | print(f" Valid integer timestamps: {stats[1]}")
339 |
340 | if stats[2] and stats[3]:
341 | earliest = datetime.fromtimestamp(stats[2]).strftime('%Y-%m-%d %H:%M:%S')
342 | latest = datetime.fromtimestamp(stats[3]).strftime('%Y-%m-%d %H:%M:%S')
343 | print(f" Date range: {earliest} to {latest}")
344 |
345 | def main():
346 | """Main migration function."""
347 | print("=" * 70)
348 | print("MCP Memory Timestamp Migration Script")
349 | print("=" * 70)
350 |
351 | # Check if database path was found
352 | if not DB_PATH:
353 | print("❌ Could not determine database path")
354 | return 1
355 |
356 | print(f"📂 Using database: {DB_PATH}")
357 |
358 | # Check if database exists
359 | if not os.path.exists(DB_PATH):
360 | print(f"❌ Database not found at: {DB_PATH}")
361 | return 1
362 |
363 | # Create backup
364 | if not backup_database():
365 | print("❌ Migration aborted - could not create backup")
366 | return 1
367 |
368 | # Connect to database
369 | try:
370 | conn = sqlite3.connect(DB_PATH)
371 | conn.set_trace_callback(print) # Print all SQL statements for debugging
372 | print(f"\n✅ Connected to database: {DB_PATH}")
373 | except Exception as e:
374 | print(f"❌ Failed to connect to database: {e}")
375 | return 1
376 |
377 | try:
378 | # Analyze current state
379 | analyze_timestamps(conn)
380 |
381 | # Ask for confirmation
382 | print("\n" + "=" * 70)
383 | print("⚠️ This migration will:")
384 | print(" 1. Standardize all timestamps to integer format in 'timestamp' field")
385 | print(" 2. Remove all redundant timestamp fields")
386 | print(" 3. Ensure all memories have valid timestamps")
387 | print("\nA backup has been created at:")
388 | print(f" {BACKUP_PATH}")
389 | print("=" * 70)
390 |
391 | response = input("\nProceed with migration? (yes/no): ").strip().lower()
392 |
393 | if response != 'yes':
394 | print("Migration cancelled.")
395 | conn.close()
396 | return 0
397 |
398 | # Perform migration steps one by one with transaction control
399 | try:
400 | # Start with ensuring timestamps
401 | migrate_timestamps(conn)
402 | print(" ✅ Timestamp migration successful")
403 | except Exception as e:
404 | conn.rollback()
405 | print(f" ❌ Failed during timestamp migration: {e}")
406 | raise
407 |
408 | try:
409 | # Then cleanup redundant fields
410 | cleanup_redundant_fields(conn)
411 | print(" ✅ Cleanup successful")
412 | except Exception as e:
413 | conn.rollback()
414 | print(f" ❌ Failed during cleanup: {e}")
415 | raise
416 |
417 | # Verify results
418 | verify_migration(conn)
419 |
420 | # Vacuum database to reclaim space
421 | print("\n🔧 Optimizing database...")
422 | conn.execute("VACUUM")
423 | conn.commit()
424 | print(" ✅ Database optimized")
425 |
426 | print("\n✅ Migration completed successfully!")
427 | print(f"\nBackup saved at: {BACKUP_PATH}")
428 | print("You can restore the backup if needed by copying it back to the original location.")
429 |
430 | except Exception as e:
431 | print(f"\n❌ Migration failed: {e}")
432 | print("Rolling back changes...")
433 | conn.rollback()
434 | print(f"Please restore from backup: {BACKUP_PATH}")
435 | return 1
436 | finally:
437 | conn.close()
438 |
439 | return 0
440 |
441 | if __name__ == "__main__":
442 | sys.exit(main())
443 |
```