This is page 13 of 35. Use http://codebase.md/doobidoo/mcp-memory-service?page={x} to view the full context.
# Directory Structure
```
├── .claude
│ ├── agents
│ │ ├── amp-bridge.md
│ │ ├── amp-pr-automator.md
│ │ ├── code-quality-guard.md
│ │ ├── gemini-pr-automator.md
│ │ └── github-release-manager.md
│ ├── settings.local.json.backup
│ └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── feature_request.yml
│ │ └── performance_issue.yml
│ ├── pull_request_template.md
│ └── workflows
│ ├── bridge-tests.yml
│ ├── CACHE_FIX.md
│ ├── claude-code-review.yml
│ ├── claude.yml
│ ├── cleanup-images.yml.disabled
│ ├── dev-setup-validation.yml
│ ├── docker-publish.yml
│ ├── LATEST_FIXES.md
│ ├── main-optimized.yml.disabled
│ ├── main.yml
│ ├── publish-and-test.yml
│ ├── README_OPTIMIZATION.md
│ ├── release-tag.yml.disabled
│ ├── release.yml
│ ├── roadmap-review-reminder.yml
│ ├── SECRET_CONDITIONAL_FIX.md
│ └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│ ├── .gitignore
│ └── reports
│ └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│ ├── deployment
│ │ ├── deploy_fastmcp_fixed.sh
│ │ ├── deploy_http_with_mcp.sh
│ │ └── deploy_mcp_v4.sh
│ ├── deployment-configs
│ │ ├── empty_config.yml
│ │ └── smithery.yaml
│ ├── development
│ │ └── test_fastmcp.py
│ ├── docs-removed-2025-08-23
│ │ ├── authentication.md
│ │ ├── claude_integration.md
│ │ ├── claude-code-compatibility.md
│ │ ├── claude-code-integration.md
│ │ ├── claude-code-quickstart.md
│ │ ├── claude-desktop-setup.md
│ │ ├── complete-setup-guide.md
│ │ ├── database-synchronization.md
│ │ ├── development
│ │ │ ├── autonomous-memory-consolidation.md
│ │ │ ├── CLEANUP_PLAN.md
│ │ │ ├── CLEANUP_README.md
│ │ │ ├── CLEANUP_SUMMARY.md
│ │ │ ├── dream-inspired-memory-consolidation.md
│ │ │ ├── hybrid-slm-memory-consolidation.md
│ │ │ ├── mcp-milestone.md
│ │ │ ├── multi-client-architecture.md
│ │ │ ├── test-results.md
│ │ │ └── TIMESTAMP_FIX_SUMMARY.md
│ │ ├── distributed-sync.md
│ │ ├── invocation_guide.md
│ │ ├── macos-intel.md
│ │ ├── master-guide.md
│ │ ├── mcp-client-configuration.md
│ │ ├── multi-client-server.md
│ │ ├── service-installation.md
│ │ ├── sessions
│ │ │ └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│ │ ├── UBUNTU_SETUP.md
│ │ ├── ubuntu.md
│ │ ├── windows-setup.md
│ │ └── windows.md
│ ├── docs-root-cleanup-2025-08-23
│ │ ├── AWESOME_LIST_SUBMISSION.md
│ │ ├── CLOUDFLARE_IMPLEMENTATION.md
│ │ ├── DOCUMENTATION_ANALYSIS.md
│ │ ├── DOCUMENTATION_CLEANUP_PLAN.md
│ │ ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│ │ ├── LITESTREAM_SETUP_GUIDE.md
│ │ ├── lm_studio_system_prompt.md
│ │ ├── PYTORCH_DOWNLOAD_FIX.md
│ │ └── README-ORIGINAL-BACKUP.md
│ ├── investigations
│ │ └── MACOS_HOOKS_INVESTIGATION.md
│ ├── litestream-configs-v6.3.0
│ │ ├── install_service.sh
│ │ ├── litestream_master_config_fixed.yml
│ │ ├── litestream_master_config.yml
│ │ ├── litestream_replica_config_fixed.yml
│ │ ├── litestream_replica_config.yml
│ │ ├── litestream_replica_simple.yml
│ │ ├── litestream-http.service
│ │ ├── litestream.service
│ │ └── requirements-cloudflare.txt
│ ├── release-notes
│ │ └── release-notes-v7.1.4.md
│ └── setup-development
│ ├── README.md
│ ├── setup_consolidation_mdns.sh
│ ├── STARTUP_SETUP_GUIDE.md
│ └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│ ├── memory-context.md
│ ├── memory-health.md
│ ├── memory-ingest-dir.md
│ ├── memory-ingest.md
│ ├── memory-recall.md
│ ├── memory-search.md
│ ├── memory-store.md
│ ├── README.md
│ └── session-start.md
├── claude-hooks
│ ├── config.json
│ ├── config.template.json
│ ├── CONFIGURATION.md
│ ├── core
│ │ ├── memory-retrieval.js
│ │ ├── mid-conversation.js
│ │ ├── session-end.js
│ │ ├── session-start.js
│ │ └── topic-change.js
│ ├── debug-pattern-test.js
│ ├── install_claude_hooks_windows.ps1
│ ├── install_hooks.py
│ ├── memory-mode-controller.js
│ ├── MIGRATION.md
│ ├── README-NATURAL-TRIGGERS.md
│ ├── README-phase2.md
│ ├── README.md
│ ├── simple-test.js
│ ├── statusline.sh
│ ├── test-adaptive-weights.js
│ ├── test-dual-protocol-hook.js
│ ├── test-mcp-hook.js
│ ├── test-natural-triggers.js
│ ├── test-recency-scoring.js
│ ├── tests
│ │ ├── integration-test.js
│ │ ├── phase2-integration-test.js
│ │ ├── test-code-execution.js
│ │ ├── test-cross-session.json
│ │ ├── test-session-tracking.json
│ │ └── test-threading.json
│ ├── utilities
│ │ ├── adaptive-pattern-detector.js
│ │ ├── context-formatter.js
│ │ ├── context-shift-detector.js
│ │ ├── conversation-analyzer.js
│ │ ├── dynamic-context-updater.js
│ │ ├── git-analyzer.js
│ │ ├── mcp-client.js
│ │ ├── memory-client.js
│ │ ├── memory-scorer.js
│ │ ├── performance-manager.js
│ │ ├── project-detector.js
│ │ ├── session-tracker.js
│ │ ├── tiered-conversation-monitor.js
│ │ └── version-checker.js
│ └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│ ├── amp-cli-bridge.md
│ ├── api
│ │ ├── code-execution-interface.md
│ │ ├── memory-metadata-api.md
│ │ ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_REPORT.md
│ │ └── tag-standardization.md
│ ├── architecture
│ │ ├── search-enhancement-spec.md
│ │ └── search-examples.md
│ ├── architecture.md
│ ├── archive
│ │ └── obsolete-workflows
│ │ ├── load_memory_context.md
│ │ └── README.md
│ ├── assets
│ │ └── images
│ │ ├── dashboard-v3.3.0-preview.png
│ │ ├── memory-awareness-hooks-example.png
│ │ ├── project-infographic.svg
│ │ └── README.md
│ ├── CLAUDE_CODE_QUICK_REFERENCE.md
│ ├── cloudflare-setup.md
│ ├── deployment
│ │ ├── docker.md
│ │ ├── dual-service.md
│ │ ├── production-guide.md
│ │ └── systemd-service.md
│ ├── development
│ │ ├── ai-agent-instructions.md
│ │ ├── code-quality
│ │ │ ├── phase-2a-completion.md
│ │ │ ├── phase-2a-handle-get-prompt.md
│ │ │ ├── phase-2a-index.md
│ │ │ ├── phase-2a-install-package.md
│ │ │ └── phase-2b-session-summary.md
│ │ ├── code-quality-workflow.md
│ │ ├── dashboard-workflow.md
│ │ ├── issue-management.md
│ │ ├── pr-review-guide.md
│ │ ├── refactoring-notes.md
│ │ ├── release-checklist.md
│ │ └── todo-tracker.md
│ ├── docker-optimized-build.md
│ ├── document-ingestion.md
│ ├── DOCUMENTATION_AUDIT.md
│ ├── enhancement-roadmap-issue-14.md
│ ├── examples
│ │ ├── analysis-scripts.js
│ │ ├── maintenance-session-example.md
│ │ ├── memory-distribution-chart.jsx
│ │ └── tag-schema.json
│ ├── first-time-setup.md
│ ├── glama-deployment.md
│ ├── guides
│ │ ├── advanced-command-examples.md
│ │ ├── chromadb-migration.md
│ │ ├── commands-vs-mcp-server.md
│ │ ├── mcp-enhancements.md
│ │ ├── mdns-service-discovery.md
│ │ ├── memory-consolidation-guide.md
│ │ ├── migration.md
│ │ ├── scripts.md
│ │ └── STORAGE_BACKENDS.md
│ ├── HOOK_IMPROVEMENTS.md
│ ├── hooks
│ │ └── phase2-code-execution-migration.md
│ ├── http-server-management.md
│ ├── ide-compatability.md
│ ├── IMAGE_RETENTION_POLICY.md
│ ├── images
│ │ └── dashboard-placeholder.md
│ ├── implementation
│ │ ├── health_checks.md
│ │ └── performance.md
│ ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│ ├── integration
│ │ ├── homebrew.md
│ │ └── multi-client.md
│ ├── integrations
│ │ ├── gemini.md
│ │ ├── groq-bridge.md
│ │ ├── groq-integration-summary.md
│ │ └── groq-model-comparison.md
│ ├── integrations.md
│ ├── legacy
│ │ └── dual-protocol-hooks.md
│ ├── LM_STUDIO_COMPATIBILITY.md
│ ├── maintenance
│ │ └── memory-maintenance.md
│ ├── mastery
│ │ ├── api-reference.md
│ │ ├── architecture-overview.md
│ │ ├── configuration-guide.md
│ │ ├── local-setup-and-run.md
│ │ ├── testing-guide.md
│ │ └── troubleshooting.md
│ ├── migration
│ │ └── code-execution-api-quick-start.md
│ ├── natural-memory-triggers
│ │ ├── cli-reference.md
│ │ ├── installation-guide.md
│ │ └── performance-optimization.md
│ ├── oauth-setup.md
│ ├── pr-graphql-integration.md
│ ├── quick-setup-cloudflare-dual-environment.md
│ ├── README.md
│ ├── remote-configuration-wiki-section.md
│ ├── research
│ │ ├── code-execution-interface-implementation.md
│ │ └── code-execution-interface-summary.md
│ ├── ROADMAP.md
│ ├── sqlite-vec-backend.md
│ ├── statistics
│ │ ├── charts
│ │ │ ├── activity_patterns.png
│ │ │ ├── contributors.png
│ │ │ ├── growth_trajectory.png
│ │ │ ├── monthly_activity.png
│ │ │ └── october_sprint.png
│ │ ├── data
│ │ │ ├── activity_by_day.csv
│ │ │ ├── activity_by_hour.csv
│ │ │ ├── contributors.csv
│ │ │ └── monthly_activity.csv
│ │ ├── generate_charts.py
│ │ └── REPOSITORY_STATISTICS.md
│ ├── technical
│ │ ├── development.md
│ │ ├── memory-migration.md
│ │ ├── migration-log.md
│ │ ├── sqlite-vec-embedding-fixes.md
│ │ └── tag-storage.md
│ ├── testing
│ │ └── regression-tests.md
│ ├── testing-cloudflare-backend.md
│ ├── troubleshooting
│ │ ├── cloudflare-api-token-setup.md
│ │ ├── cloudflare-authentication.md
│ │ ├── general.md
│ │ ├── hooks-quick-reference.md
│ │ ├── pr162-schema-caching-issue.md
│ │ ├── session-end-hooks.md
│ │ └── sync-issues.md
│ └── tutorials
│ ├── advanced-techniques.md
│ ├── data-analysis.md
│ └── demo-session-walkthrough.md
├── examples
│ ├── claude_desktop_config_template.json
│ ├── claude_desktop_config_windows.json
│ ├── claude-desktop-http-config.json
│ ├── config
│ │ └── claude_desktop_config.json
│ ├── http-mcp-bridge.js
│ ├── memory_export_template.json
│ ├── README.md
│ ├── setup
│ │ └── setup_multi_client_complete.py
│ └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│ ├── .claude
│ │ └── settings.local.json
│ ├── archive
│ │ └── check_missing_timestamps.py
│ ├── backup
│ │ ├── backup_memories.py
│ │ ├── backup_sqlite_vec.sh
│ │ ├── export_distributable_memories.sh
│ │ └── restore_memories.py
│ ├── benchmarks
│ │ ├── benchmark_code_execution_api.py
│ │ ├── benchmark_hybrid_sync.py
│ │ └── benchmark_server_caching.py
│ ├── database
│ │ ├── analyze_sqlite_vec_db.py
│ │ ├── check_sqlite_vec_status.py
│ │ ├── db_health_check.py
│ │ └── simple_timestamp_check.py
│ ├── development
│ │ ├── debug_server_initialization.py
│ │ ├── find_orphaned_files.py
│ │ ├── fix_mdns.sh
│ │ ├── fix_sitecustomize.py
│ │ ├── remote_ingest.sh
│ │ ├── setup-git-merge-drivers.sh
│ │ ├── uv-lock-merge.sh
│ │ └── verify_hybrid_sync.py
│ ├── hooks
│ │ └── pre-commit
│ ├── installation
│ │ ├── install_linux_service.py
│ │ ├── install_macos_service.py
│ │ ├── install_uv.py
│ │ ├── install_windows_service.py
│ │ ├── install.py
│ │ ├── setup_backup_cron.sh
│ │ ├── setup_claude_mcp.sh
│ │ └── setup_cloudflare_resources.py
│ ├── linux
│ │ ├── service_status.sh
│ │ ├── start_service.sh
│ │ ├── stop_service.sh
│ │ ├── uninstall_service.sh
│ │ └── view_logs.sh
│ ├── maintenance
│ │ ├── assign_memory_types.py
│ │ ├── check_memory_types.py
│ │ ├── cleanup_corrupted_encoding.py
│ │ ├── cleanup_memories.py
│ │ ├── cleanup_organize.py
│ │ ├── consolidate_memory_types.py
│ │ ├── consolidation_mappings.json
│ │ ├── delete_orphaned_vectors_fixed.py
│ │ ├── fast_cleanup_duplicates_with_tracking.sh
│ │ ├── find_all_duplicates.py
│ │ ├── find_cloudflare_duplicates.py
│ │ ├── find_duplicates.py
│ │ ├── memory-types.md
│ │ ├── README.md
│ │ ├── recover_timestamps_from_cloudflare.py
│ │ ├── regenerate_embeddings.py
│ │ ├── repair_malformed_tags.py
│ │ ├── repair_memories.py
│ │ ├── repair_sqlite_vec_embeddings.py
│ │ ├── repair_zero_embeddings.py
│ │ ├── restore_from_json_export.py
│ │ └── scan_todos.sh
│ ├── migration
│ │ ├── cleanup_mcp_timestamps.py
│ │ ├── legacy
│ │ │ └── migrate_chroma_to_sqlite.py
│ │ ├── mcp-migration.py
│ │ ├── migrate_sqlite_vec_embeddings.py
│ │ ├── migrate_storage.py
│ │ ├── migrate_tags.py
│ │ ├── migrate_timestamps.py
│ │ ├── migrate_to_cloudflare.py
│ │ ├── migrate_to_sqlite_vec.py
│ │ ├── migrate_v5_enhanced.py
│ │ ├── TIMESTAMP_CLEANUP_README.md
│ │ └── verify_mcp_timestamps.py
│ ├── pr
│ │ ├── amp_collect_results.sh
│ │ ├── amp_detect_breaking_changes.sh
│ │ ├── amp_generate_tests.sh
│ │ ├── amp_pr_review.sh
│ │ ├── amp_quality_gate.sh
│ │ ├── amp_suggest_fixes.sh
│ │ ├── auto_review.sh
│ │ ├── detect_breaking_changes.sh
│ │ ├── generate_tests.sh
│ │ ├── lib
│ │ │ └── graphql_helpers.sh
│ │ ├── quality_gate.sh
│ │ ├── resolve_threads.sh
│ │ ├── run_pyscn_analysis.sh
│ │ ├── run_quality_checks.sh
│ │ ├── thread_status.sh
│ │ └── watch_reviews.sh
│ ├── quality
│ │ ├── fix_dead_code_install.sh
│ │ ├── phase1_dead_code_analysis.md
│ │ ├── phase2_complexity_analysis.md
│ │ ├── README_PHASE1.md
│ │ ├── README_PHASE2.md
│ │ ├── track_pyscn_metrics.sh
│ │ └── weekly_quality_review.sh
│ ├── README.md
│ ├── run
│ │ ├── run_mcp_memory.sh
│ │ ├── run-with-uv.sh
│ │ └── start_sqlite_vec.sh
│ ├── run_memory_server.py
│ ├── server
│ │ ├── check_http_server.py
│ │ ├── check_server_health.py
│ │ ├── memory_offline.py
│ │ ├── preload_models.py
│ │ ├── run_http_server.py
│ │ ├── run_memory_server.py
│ │ ├── start_http_server.bat
│ │ └── start_http_server.sh
│ ├── service
│ │ ├── deploy_dual_services.sh
│ │ ├── install_http_service.sh
│ │ ├── mcp-memory-http.service
│ │ ├── mcp-memory.service
│ │ ├── memory_service_manager.sh
│ │ ├── service_control.sh
│ │ ├── service_utils.py
│ │ └── update_service.sh
│ ├── sync
│ │ ├── check_drift.py
│ │ ├── claude_sync_commands.py
│ │ ├── export_memories.py
│ │ ├── import_memories.py
│ │ ├── litestream
│ │ │ ├── apply_local_changes.sh
│ │ │ ├── enhanced_memory_store.sh
│ │ │ ├── init_staging_db.sh
│ │ │ ├── io.litestream.replication.plist
│ │ │ ├── manual_sync.sh
│ │ │ ├── memory_sync.sh
│ │ │ ├── pull_remote_changes.sh
│ │ │ ├── push_to_remote.sh
│ │ │ ├── README.md
│ │ │ ├── resolve_conflicts.sh
│ │ │ ├── setup_local_litestream.sh
│ │ │ ├── setup_remote_litestream.sh
│ │ │ ├── staging_db_init.sql
│ │ │ ├── stash_local_changes.sh
│ │ │ ├── sync_from_remote_noconfig.sh
│ │ │ └── sync_from_remote.sh
│ │ ├── README.md
│ │ ├── safe_cloudflare_update.sh
│ │ ├── sync_memory_backends.py
│ │ └── sync_now.py
│ ├── testing
│ │ ├── run_complete_test.py
│ │ ├── run_memory_test.sh
│ │ ├── simple_test.py
│ │ ├── test_cleanup_logic.py
│ │ ├── test_cloudflare_backend.py
│ │ ├── test_docker_functionality.py
│ │ ├── test_installation.py
│ │ ├── test_mdns.py
│ │ ├── test_memory_api.py
│ │ ├── test_memory_simple.py
│ │ ├── test_migration.py
│ │ ├── test_search_api.py
│ │ ├── test_sqlite_vec_embeddings.py
│ │ ├── test_sse_events.py
│ │ ├── test-connection.py
│ │ └── test-hook.js
│ ├── utils
│ │ ├── claude_commands_utils.py
│ │ ├── generate_personalized_claude_md.sh
│ │ ├── groq
│ │ ├── groq_agent_bridge.py
│ │ ├── list-collections.py
│ │ ├── memory_wrapper_uv.py
│ │ ├── query_memories.py
│ │ ├── smithery_wrapper.py
│ │ ├── test_groq_bridge.sh
│ │ └── uv_wrapper.py
│ └── validation
│ ├── check_dev_setup.py
│ ├── check_documentation_links.py
│ ├── diagnose_backend_config.py
│ ├── validate_configuration_complete.py
│ ├── validate_memories.py
│ ├── validate_migration.py
│ ├── validate_timestamp_integrity.py
│ ├── verify_environment.py
│ ├── verify_pytorch_windows.py
│ └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│ └── mcp_memory_service
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── operations.py
│ │ ├── sync_wrapper.py
│ │ └── types.py
│ ├── backup
│ │ ├── __init__.py
│ │ └── scheduler.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── ingestion.py
│ │ ├── main.py
│ │ └── utils.py
│ ├── config.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── associations.py
│ │ ├── base.py
│ │ ├── clustering.py
│ │ ├── compression.py
│ │ ├── consolidator.py
│ │ ├── decay.py
│ │ ├── forgetting.py
│ │ ├── health.py
│ │ └── scheduler.py
│ ├── dependency_check.py
│ ├── discovery
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── mdns_service.py
│ ├── embeddings
│ │ ├── __init__.py
│ │ └── onnx_embeddings.py
│ ├── ingestion
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── chunker.py
│ │ ├── csv_loader.py
│ │ ├── json_loader.py
│ │ ├── pdf_loader.py
│ │ ├── registry.py
│ │ ├── semtools_loader.py
│ │ └── text_loader.py
│ ├── lm_studio_compat.py
│ ├── mcp_server.py
│ ├── models
│ │ ├── __init__.py
│ │ └── memory.py
│ ├── server.py
│ ├── services
│ │ ├── __init__.py
│ │ └── memory_service.py
│ ├── storage
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cloudflare.py
│ │ ├── factory.py
│ │ ├── http_client.py
│ │ ├── hybrid.py
│ │ └── sqlite_vec.py
│ ├── sync
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── importer.py
│ │ └── litestream_config.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cache_manager.py
│ │ ├── content_splitter.py
│ │ ├── db_utils.py
│ │ ├── debug.py
│ │ ├── document_processing.py
│ │ ├── gpu_detection.py
│ │ ├── hashing.py
│ │ ├── http_server_manager.py
│ │ ├── port_detection.py
│ │ ├── system_detection.py
│ │ └── time_parser.py
│ └── web
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── analytics.py
│ │ ├── backup.py
│ │ ├── consolidation.py
│ │ ├── documents.py
│ │ ├── events.py
│ │ ├── health.py
│ │ ├── manage.py
│ │ ├── mcp.py
│ │ ├── memories.py
│ │ ├── search.py
│ │ └── sync.py
│ ├── app.py
│ ├── dependencies.py
│ ├── oauth
│ │ ├── __init__.py
│ │ ├── authorization.py
│ │ ├── discovery.py
│ │ ├── middleware.py
│ │ ├── models.py
│ │ ├── registration.py
│ │ └── storage.py
│ ├── sse.py
│ └── static
│ ├── app.js
│ ├── index.html
│ ├── README.md
│ ├── sse_test.html
│ └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── test_compact_types.py
│ │ └── test_operations.py
│ ├── bridge
│ │ ├── mock_responses.js
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ └── test_http_mcp_bridge.js
│ ├── conftest.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_associations.py
│ │ ├── test_clustering.py
│ │ ├── test_compression.py
│ │ ├── test_consolidator.py
│ │ ├── test_decay.py
│ │ └── test_forgetting.py
│ ├── contracts
│ │ └── api-specification.yml
│ ├── integration
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ ├── test_api_key_fallback.py
│ │ ├── test_api_memories_chronological.py
│ │ ├── test_api_tag_time_search.py
│ │ ├── test_api_with_memory_service.py
│ │ ├── test_bridge_integration.js
│ │ ├── test_cli_interfaces.py
│ │ ├── test_cloudflare_connection.py
│ │ ├── test_concurrent_clients.py
│ │ ├── test_data_serialization_consistency.py
│ │ ├── test_http_server_startup.py
│ │ ├── test_mcp_memory.py
│ │ ├── test_mdns_integration.py
│ │ ├── test_oauth_basic_auth.py
│ │ ├── test_oauth_flow.py
│ │ ├── test_server_handlers.py
│ │ └── test_store_memory.py
│ ├── performance
│ │ ├── test_background_sync.py
│ │ └── test_hybrid_live.py
│ ├── README.md
│ ├── smithery
│ │ └── test_smithery.py
│ ├── sqlite
│ │ └── simple_sqlite_vec_test.py
│ ├── test_client.py
│ ├── test_content_splitting.py
│ ├── test_database.py
│ ├── test_hybrid_cloudflare_limits.py
│ ├── test_hybrid_storage.py
│ ├── test_memory_ops.py
│ ├── test_semantic_search.py
│ ├── test_sqlite_vec_storage.py
│ ├── test_time_parser.py
│ ├── test_timestamp_preservation.py
│ ├── timestamp
│ │ ├── test_hook_vs_manual_storage.py
│ │ ├── test_issue99_final_validation.py
│ │ ├── test_search_retrieval_inconsistency.py
│ │ ├── test_timestamp_issue.py
│ │ └── test_timestamp_simple.py
│ └── unit
│ ├── conftest.py
│ ├── test_cloudflare_storage.py
│ ├── test_csv_loader.py
│ ├── test_fastapi_dependencies.py
│ ├── test_import.py
│ ├── test_json_loader.py
│ ├── test_mdns_simple.py
│ ├── test_mdns.py
│ ├── test_memory_service.py
│ ├── test_memory.py
│ ├── test_semtools_loader.py
│ ├── test_storage_interface_compatibility.py
│ └── test_tag_time_filtering.py
├── tools
│ ├── docker
│ │ ├── DEPRECATED.md
│ │ ├── docker-compose.http.yml
│ │ ├── docker-compose.pythonpath.yml
│ │ ├── docker-compose.standalone.yml
│ │ ├── docker-compose.uv.yml
│ │ ├── docker-compose.yml
│ │ ├── docker-entrypoint-persistent.sh
│ │ ├── docker-entrypoint-unified.sh
│ │ ├── docker-entrypoint.sh
│ │ ├── Dockerfile
│ │ ├── Dockerfile.glama
│ │ ├── Dockerfile.slim
│ │ ├── README.md
│ │ └── test-docker-modes.sh
│ └── README.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/archive/docs-removed-2025-08-23/windows.md:
--------------------------------------------------------------------------------
```markdown
# Windows Setup Guide
This guide provides comprehensive instructions for setting up and running the MCP Memory Service on Windows systems, including handling common Windows-specific issues.
## Prerequisites
- **Python 3.10 or newer** (Python 3.11 recommended)
- **Git for Windows** ([download here](https://git-scm.com/download/win))
- **Visual Studio Build Tools** (for PyTorch compilation)
- **PowerShell 5.1+** or **Windows Terminal** (recommended)
## Quick Installation
### Automatic Installation (Recommended)
```powershell
# Clone repository
git clone https://github.com/doobidoo/mcp-memory-service.git
cd mcp-memory-service
# Run Windows-specific installer
python install.py --windows
```
The installer automatically:
- Detects CUDA availability
- Installs the correct PyTorch version
- Configures Windows-specific settings
- Sets up optimal storage backend
## Manual Installation
### 1. Environment Setup
```powershell
# Clone repository
git clone https://github.com/doobidoo/mcp-memory-service.git
cd mcp-memory-service
# Create virtual environment
python -m venv venv
# Activate virtual environment
venv\Scripts\activate
# Upgrade pip
python -m pip install --upgrade pip
```
### 2. Install Dependencies
#### For CUDA-enabled Systems
```powershell
# Install PyTorch with CUDA support
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# Install other dependencies
pip install -e .
pip install chromadb sentence-transformers
```
#### For CPU-only Systems
```powershell
# Install CPU-only PyTorch
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
# Install with SQLite-vec backend (recommended for CPU)
pip install -e .
pip install sentence-transformers sqlite-vec
```
### 3. Windows-Specific Installation Script
If you encounter issues, use the Windows-specific installation script:
```powershell
python scripts/install_windows.py
```
This script handles:
1. CUDA detection and appropriate PyTorch installation
2. Resolving common Windows dependency conflicts
3. Setting up Windows-specific environment variables
4. Configuring optimal storage backend based on hardware
## Configuration
### Environment Variables
#### For CUDA Systems
```powershell
# Set environment variables (PowerShell)
$env:MCP_MEMORY_STORAGE_BACKEND = "chromadb"
$env:MCP_MEMORY_USE_CUDA = "true"
$env:MCP_MEMORY_CHROMA_PATH = "$env:USERPROFILE\.mcp_memory_chroma"
# Or set permanently
[Environment]::SetEnvironmentVariable("MCP_MEMORY_STORAGE_BACKEND", "chromadb", "User")
[Environment]::SetEnvironmentVariable("MCP_MEMORY_USE_CUDA", "true", "User")
```
#### For CPU-only Systems
```powershell
# Set environment variables (PowerShell)
$env:MCP_MEMORY_STORAGE_BACKEND = "sqlite_vec"
$env:MCP_MEMORY_SQLITE_VEC_PATH = "$env:USERPROFILE\.mcp_memory_sqlite"
$env:MCP_MEMORY_CPU_ONLY = "true"
# Or set permanently
[Environment]::SetEnvironmentVariable("MCP_MEMORY_STORAGE_BACKEND", "sqlite_vec", "User")
[Environment]::SetEnvironmentVariable("MCP_MEMORY_CPU_ONLY", "true", "User")
```
### Windows Batch Scripts
The repository includes Windows batch scripts for easy startup:
#### `scripts/run/run-with-uv.bat`
```batch
@echo off
cd /d "%~dp0..\.."
call venv\Scripts\activate.bat
python src\mcp_memory_service\server.py
```
#### Usage
```powershell
# Run the server
.\scripts\run\run-with-uv.bat
# Or run directly
python src\mcp_memory_service\server.py
```
## Claude Desktop Configuration
### Windows Configuration File Location
Claude Desktop configuration is typically located at:
```
%APPDATA%\Claude\claude_desktop_config.json
```
### Configuration Examples
#### For CUDA Systems
```json
{
"mcpServers": {
"memory": {
"command": "python",
"args": ["C:\\path\\to\\mcp-memory-service\\src\\mcp_memory_service\\server.py"],
"env": {
"MCP_MEMORY_STORAGE_BACKEND": "chromadb",
"MCP_MEMORY_USE_CUDA": "true",
"PATH": "C:\\path\\to\\mcp-memory-service\\venv\\Scripts;%PATH%"
}
}
}
}
```
#### For CPU-only Systems
```json
{
"mcpServers": {
"memory": {
"command": "python",
"args": ["C:\\path\\to\\mcp-memory-service\\src\\mcp_memory_service\\server.py"],
"env": {
"MCP_MEMORY_STORAGE_BACKEND": "sqlite_vec",
"MCP_MEMORY_CPU_ONLY": "true",
"PATH": "C:\\path\\to\\mcp-memory-service\\venv\\Scripts;%PATH%"
}
}
}
}
```
#### Using Batch Script
```json
{
"mcpServers": {
"memory": {
"command": "C:\\path\\to\\mcp-memory-service\\scripts\\run\\run-with-uv.bat"
}
}
}
```
## Hardware Detection and Optimization
### CUDA Detection
The installer automatically detects CUDA availability:
```python
def detect_cuda():
try:
import torch
return torch.cuda.is_available()
except ImportError:
return False
```
### DirectML Support
For Windows systems without CUDA but with DirectX 12 compatible GPUs:
```powershell
# Install DirectML-enabled PyTorch
pip install torch-directml
```
Configure for DirectML:
```powershell
$env:MCP_MEMORY_USE_DIRECTML = "true"
$env:MCP_MEMORY_DEVICE = "dml"
```
## Windows-Specific Features
### Windows Service Installation
To run MCP Memory Service as a Windows service:
```powershell
# Install as Windows service (requires admin privileges)
python scripts/install_windows_service.py install
# Start service
net start MCPMemoryService
# Stop service
net stop MCPMemoryService
# Remove service
python scripts/install_windows_service.py remove
```
### Task Scheduler Integration
Create a scheduled task to start MCP Memory Service on boot:
```powershell
# Create scheduled task
schtasks /create /tn "MCP Memory Service" /tr "C:\path\to\mcp-memory-service\scripts\run\run-with-uv.bat" /sc onlogon /ru "$env:USERNAME"
# Delete scheduled task
schtasks /delete /tn "MCP Memory Service" /f
```
## Troubleshooting
### Common Windows Issues
#### 1. Path Length Limitations
**Symptom**: Installation fails with "path too long" errors
**Solution**: Enable long path support:
```powershell
# Run as Administrator
New-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 -PropertyType DWORD -Force
```
#### 2. Visual Studio Build Tools Missing
**Symptom**:
```
Microsoft Visual C++ 14.0 is required
```
**Solution**: Install Visual Studio Build Tools:
```powershell
# Download and install from:
# https://visualstudio.microsoft.com/visual-cpp-build-tools/
# Or install via winget
winget install Microsoft.VisualStudio.2022.BuildTools
```
#### 3. CUDA Version Mismatch
**Symptom**: PyTorch CUDA installation issues
**Solution**: Match PyTorch CUDA version to your installed CUDA:
```powershell
# Check CUDA version
nvcc --version
# Install matching PyTorch version
# For CUDA 11.8
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# For CUDA 12.1
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
```
#### 4. Permission Issues
**Symptom**: Access denied errors when installing or running
**Solution**: Run PowerShell as Administrator and check folder permissions:
```powershell
# Check current user permissions
whoami /groups
# Run installation as Administrator if needed
# Or adjust folder permissions
icacls "C:\path\to\mcp-memory-service" /grant "$env:USERNAME:(F)" /t
```
#### 5. Windows Defender Issues
**Symptom**: Installation files deleted or blocked
**Solution**: Add exclusions to Windows Defender:
```powershell
# Add folder exclusion (run as Administrator)
Add-MpPreference -ExclusionPath "C:\path\to\mcp-memory-service"
# Add process exclusion
Add-MpPreference -ExclusionProcess "python.exe"
```
### Diagnostic Commands
#### System Information
```powershell
# Check Python version and location
python --version
Get-Command python
# Check pip version
pip --version
# Check CUDA availability
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
# Check DirectML (if installed)
python -c "import torch_directml; print('DirectML available')"
# Check Windows version
Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion
```
#### Environment Verification
```powershell
# Check environment variables
Get-ChildItem Env: | Where-Object {$_.Name -like "MCP_MEMORY_*"}
# Check virtual environment
echo $env:VIRTUAL_ENV
# Verify key packages
python -c "import torch; print(f'PyTorch: {torch.__version__}')"
python -c "import sentence_transformers; print('SentenceTransformers: OK')"
python -c "import chromadb; print('ChromaDB: OK')" # or sqlite_vec
```
#### Network and Firewall
```powershell
# Check if Windows Firewall is blocking
Get-NetFirewallRule -DisplayName "*Python*" | Format-Table
# Test network connectivity (if using HTTP mode)
Test-NetConnection -ComputerName localhost -Port 8000
```
### Performance Optimization
#### Windows-Specific Settings
```powershell
# Optimize for machine learning workloads
$env:OMP_NUM_THREADS = [Environment]::ProcessorCount
$env:MKL_NUM_THREADS = [Environment]::ProcessorCount
# Set Windows-specific memory settings
$env:MCP_MEMORY_WINDOWS_OPTIMIZATION = "true"
$env:MCP_MEMORY_BATCH_SIZE = "32"
```
#### Resource Monitoring
```powershell
# Monitor memory usage
Get-Process python | Select-Object ProcessName, WorkingSet, CPU
# Monitor GPU usage (if CUDA)
nvidia-smi
# Monitor disk I/O
Get-Counter "\PhysicalDisk(_Total)\Disk Reads/sec"
```
## Development on Windows
### Setting up Development Environment
```powershell
# Clone for development
git clone https://github.com/doobidoo/mcp-memory-service.git
cd mcp-memory-service
# Create development environment
python -m venv venv-dev
venv-dev\Scripts\activate
# Install in development mode
pip install -e .
pip install pytest black isort mypy
# Run tests
pytest tests/
```
### Windows-Specific Testing
```powershell
# Run Windows-specific tests
pytest tests/platform/test_windows.py -v
# Test CUDA functionality (if available)
pytest tests/cuda/ -v
# Test DirectML functionality (if available)
pytest tests/directml/ -v
```
## Alternative Installation Methods
### Using Chocolatey
```powershell
# Install Python via Chocolatey
choco install python
# Install Git
choco install git
# Then follow standard installation
```
### Using Conda
```powershell
# Create conda environment
conda create -n mcp-memory python=3.11
conda activate mcp-memory
# Install PyTorch via conda
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
# Install other dependencies
pip install -e .
```
### Using Docker on Windows
```powershell
# Using Docker Desktop
git clone https://github.com/doobidoo/mcp-memory-service.git
cd mcp-memory-service
# Build Windows container
docker build -f Dockerfile.windows -t mcp-memory-service-windows .
# Run container
docker run -p 8000:8000 mcp-memory-service-windows
```
## Related Documentation
- [Installation Guide](../installation/master-guide.md) - General installation instructions
- [Multi-Client Setup](../integration/multi-client.md) - Multi-client configuration
- [Troubleshooting](../troubleshooting/general.md) - Windows-specific troubleshooting
- [Docker Deployment](../deployment/docker.md) - Docker setup on Windows
```
--------------------------------------------------------------------------------
/tests/integration/test_api_tag_time_search.py:
--------------------------------------------------------------------------------
```python
"""
Integration tests for POST /api/search/by-tag endpoint with time_filter parameter.
Tests the time_filter functionality added in PR #215 to fix semantic over-filtering bug (issue #214).
NOTE: These tests currently have SQLite threading issues with TestClient.
The async fixture creates storage in one thread, but TestClient creates its own threads,
causing "SQLite objects created in a thread can only be used in that same thread" errors.
TODO: Fix by using synchronous fixtures like test_http_api_search_by_tag_endpoint in
tests/integration/test_api_with_memory_service.py (line 670), which creates storage
within the test function rather than in an async fixture.
For now, comprehensive unit tests in tests/unit/test_tag_time_filtering.py provide
excellent coverage of the tag+time filtering functionality across all storage backends.
"""
import pytest
import pytest_asyncio
import tempfile
import os
import time
from fastapi.testclient import TestClient
from mcp_memory_service.web.dependencies import set_storage
from mcp_memory_service.services.memory_service import MemoryService
from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
from mcp_memory_service.models.memory import Memory
from mcp_memory_service.utils.hashing import generate_content_hash
@pytest.fixture
def temp_db():
"""Create a temporary database for testing."""
with tempfile.TemporaryDirectory() as tmpdir:
db_path = os.path.join(tmpdir, "test_api_tag_time.db")
yield db_path
@pytest_asyncio.fixture
async def storage_with_test_data(temp_db):
"""Create storage with test memories at different timestamps."""
storage = SqliteVecMemoryStorage(temp_db)
await storage.initialize()
# Store old memory (2 days ago)
two_days_ago = time.time() - (2 * 24 * 60 * 60)
old_task_content = "Old task from 2 days ago"
old_memory = Memory(
content=old_task_content,
content_hash=generate_content_hash(old_task_content),
tags=["task", "old"],
memory_type="task",
created_at=two_days_ago
)
await storage.store(old_memory)
# Store recent memory (current time)
recent_task_content = "Recent task from today"
recent_memory = Memory(
content=recent_task_content,
content_hash=generate_content_hash(recent_task_content),
tags=["task", "recent"],
memory_type="task",
created_at=time.time()
)
await storage.store(recent_memory)
# Store another old memory with different tags
old_note_content = "Old note from 3 days ago"
old_note = Memory(
content=old_note_content,
content_hash=generate_content_hash(old_note_content),
tags=["note", "old"],
memory_type="note",
created_at=time.time() - (3 * 24 * 60 * 60)
)
await storage.store(old_note)
yield storage
storage.close()
@pytest.mark.asyncio
@pytest.mark.integration
async def test_api_search_by_tag_with_time_filter_recent(storage_with_test_data):
"""Test POST /api/search/by-tag with time_filter returns only recent memories."""
from mcp_memory_service.web.app import app
set_storage(storage_with_test_data)
client = TestClient(app)
# Search for "task" tag with time_filter = 1 day ago
one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
response = client.post(
"/api/search/by-tag",
json={
"tags": ["task"],
"time_filter": one_day_ago_iso,
"limit": 10
}
)
assert response.status_code == 200
data = response.json()
# Should only return the recent task (not the 2-day-old task)
assert len(data["memories"]) == 1
assert "recent" in data["memories"][0]["tags"]
assert "Recent task from today" in data["memories"][0]["content"]
@pytest.mark.asyncio
@pytest.mark.integration
async def test_api_search_by_tag_with_time_filter_excludes_old(storage_with_test_data):
"""Test POST /api/search/by-tag with time_filter excludes old memories."""
from mcp_memory_service.web.app import app
set_storage(storage_with_test_data)
client = TestClient(app)
# Search for "old" tag with time_filter = 10 seconds ago
# Should return empty because all "old" memories are > 2 days old
ten_seconds_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - 10))
response = client.post(
"/api/search/by-tag",
json={
"tags": ["old"],
"time_filter": ten_seconds_ago_iso,
"limit": 10
}
)
assert response.status_code == 200
data = response.json()
# Should return empty (all "old" memories are from 2-3 days ago)
assert len(data["memories"]) == 0
@pytest.mark.asyncio
@pytest.mark.integration
async def test_api_search_by_tag_without_time_filter_backward_compat(storage_with_test_data):
"""Test POST /api/search/by-tag without time_filter returns all matching memories (backward compatibility)."""
from mcp_memory_service.web.app import app
set_storage(storage_with_test_data)
client = TestClient(app)
# Search for "task" tag WITHOUT time_filter
response = client.post(
"/api/search/by-tag",
json={
"tags": ["task"],
"limit": 10
}
)
assert response.status_code == 200
data = response.json()
# Should return BOTH task memories (old and recent)
assert len(data["memories"]) == 2
tags_list = [tag for mem in data["memories"] for tag in mem["tags"]]
assert "old" in tags_list
assert "recent" in tags_list
@pytest.mark.asyncio
@pytest.mark.integration
async def test_api_search_by_tag_with_empty_time_filter(storage_with_test_data):
"""Test POST /api/search/by-tag with empty time_filter string is ignored."""
from mcp_memory_service.web.app import app
set_storage(storage_with_test_data)
client = TestClient(app)
# Search with empty time_filter (should be treated as no filter)
response = client.post(
"/api/search/by-tag",
json={
"tags": ["task"],
"time_filter": "",
"limit": 10
}
)
assert response.status_code == 200
data = response.json()
# Should return both task memories (empty filter ignored)
assert len(data["memories"]) == 2
@pytest.mark.asyncio
@pytest.mark.integration
async def test_api_search_by_tag_with_natural_language_time_filter(storage_with_test_data):
"""Test POST /api/search/by-tag with natural language time expressions."""
from mcp_memory_service.web.app import app
set_storage(storage_with_test_data)
client = TestClient(app)
# Test "yesterday" - should return only recent memories
response = client.post(
"/api/search/by-tag",
json={
"tags": ["task"],
"time_filter": "yesterday",
"limit": 10
}
)
assert response.status_code == 200
data = response.json()
# Should return only the recent task (created today, after yesterday)
assert len(data["memories"]) == 1
assert "recent" in data["memories"][0]["tags"]
@pytest.mark.asyncio
@pytest.mark.integration
async def test_api_search_by_tag_time_filter_with_multiple_tags(storage_with_test_data):
"""Test POST /api/search/by-tag with time_filter and multiple tags."""
from mcp_memory_service.web.app import app
set_storage(storage_with_test_data)
client = TestClient(app)
# Search for multiple tags with time filter
one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
response = client.post(
"/api/search/by-tag",
json={
"tags": ["task", "recent"], # Both tags
"time_filter": one_day_ago_iso,
"limit": 10
}
)
assert response.status_code == 200
data = response.json()
# Should return the recent task memory
assert len(data["memories"]) == 1
assert "recent" in data["memories"][0]["tags"]
@pytest.mark.asyncio
@pytest.mark.integration
async def test_api_search_by_tag_time_filter_with_match_all(storage_with_test_data):
"""Test POST /api/search/by-tag with time_filter and match_all parameter."""
from mcp_memory_service.web.app import app
set_storage(storage_with_test_data)
# Store a memory with both "task" and "recent" tags
both_tags_content = "Task that is both task and recent"
both_tags_memory = Memory(
content=both_tags_content,
content_hash=generate_content_hash(both_tags_content),
tags=["task", "recent"],
memory_type="task",
created_at=time.time()
)
await storage_with_test_data.store(both_tags_memory)
client = TestClient(app)
# Search with match_all=true and time_filter
one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
response = client.post(
"/api/search/by-tag",
json={
"tags": ["task", "recent"],
"match_all": True, # Require BOTH tags
"time_filter": one_day_ago_iso,
"limit": 10
}
)
assert response.status_code == 200
data = response.json()
# Should return memories with BOTH tags that are recent
assert len(data["memories"]) >= 1
for mem in data["memories"]:
assert "task" in mem["tags"]
assert "recent" in mem["tags"]
@pytest.mark.asyncio
@pytest.mark.integration
async def test_api_search_by_tag_invalid_time_filter_format(storage_with_test_data):
"""Test POST /api/search/by-tag with invalid time_filter returns error or empty."""
from mcp_memory_service.web.app import app
set_storage(storage_with_test_data)
client = TestClient(app)
# Search with invalid time_filter format
response = client.post(
"/api/search/by-tag",
json={
"tags": ["task"],
"time_filter": "invalid-date-format",
"limit": 10
}
)
# API should handle gracefully (either 400 error or empty results)
# Depending on implementation, this might return 200 with empty results
# or 400 Bad Request
assert response.status_code in [200, 400]
if response.status_code == 200:
data = response.json()
# If it returns 200, should return empty or all results
assert "memories" in data
@pytest.mark.asyncio
@pytest.mark.integration
async def test_api_search_by_tag_time_filter_performance(storage_with_test_data):
"""Test that tag+time filtering maintains good performance (<100ms)."""
from mcp_memory_service.web.app import app
set_storage(storage_with_test_data)
client = TestClient(app)
one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
start_time = time.time()
response = client.post(
"/api/search/by-tag",
json={
"tags": ["task"],
"time_filter": one_day_ago_iso,
"limit": 10
}
)
elapsed_ms = (time.time() - start_time) * 1000
assert response.status_code == 200
# Performance target: <100ms for tag+time search
# (may need adjustment based on hardware)
assert elapsed_ms < 200, f"Tag+time search took {elapsed_ms:.2f}ms (expected <200ms)"
```
--------------------------------------------------------------------------------
/docs/guides/STORAGE_BACKENDS.md:
--------------------------------------------------------------------------------
```markdown
# Storage Backend Comparison and Selection Guide
**MCP Memory Service** supports two storage backends, each optimized for different use cases and hardware configurations.
## Quick Comparison
| Feature | SQLite-vec 🪶 | ChromaDB 📦 |
|---------|---------------|-------------|
| **Setup Complexity** | ⭐⭐⭐⭐⭐ Simple | ⭐⭐⭐ Moderate |
| **Startup Time** | ⭐⭐⭐⭐⭐ < 3 seconds | ⭐⭐ 15-30 seconds |
| **Memory Usage** | ⭐⭐⭐⭐⭐ < 150MB | ⭐⭐ 500-800MB |
| **Performance** | ⭐⭐⭐⭐ Very fast | ⭐⭐⭐⭐ Fast |
| **Features** | ⭐⭐⭐ Core features | ⭐⭐⭐⭐⭐ Full-featured |
| **Scalability** | ⭐⭐⭐⭐ Up to 100K items | ⭐⭐⭐⭐⭐ Unlimited |
| **Legacy Hardware** | ⭐⭐⭐⭐⭐ Excellent | ⭐ Poor |
| **Production Ready** | ⭐⭐⭐⭐ Yes | ⭐⭐⭐⭐⭐ Yes |
## When to Choose SQLite-vec 🪶
### Ideal For:
- **Legacy Hardware**: 2015 MacBook Pro, older Intel Macs
- **Resource-Constrained Systems**: < 4GB RAM, limited CPU
- **Quick Setup**: Want to get started immediately
- **Single-File Portability**: Easy backup and sharing
- **Docker/Serverless**: Lightweight deployments
- **Development/Testing**: Rapid prototyping
- **HTTP/SSE API**: New web interface users
### Technical Advantages:
- **Lightning Fast Startup**: Database ready in 2-3 seconds
- **Minimal Dependencies**: Just SQLite and sqlite-vec extension
- **Low Memory Footprint**: Typically uses < 150MB RAM
- **Single File Database**: Easy to backup, move, and share
- **ACID Compliance**: SQLite's proven reliability
- **Zero Configuration**: Works out of the box
- **ONNX Compatible**: Runs without PyTorch if needed
### Example Use Cases:
```bash
# 2015 MacBook Pro scenario
python install.py --legacy-hardware
# Result: SQLite-vec + Homebrew PyTorch + ONNX
# Docker deployment
docker run -e MCP_MEMORY_STORAGE_BACKEND=sqlite_vec ...
# Quick development setup
python install.py --storage-backend sqlite_vec --dev
```
## When to Choose ChromaDB 📦
### Ideal For:
- **Modern Hardware**: M1/M2/M3 Macs, modern Intel systems
- **GPU-Accelerated Systems**: CUDA, MPS, DirectML available
- **Large-Scale Deployments**: > 10,000 memories
- **Advanced Features**: Complex filtering, metadata queries
- **Production Systems**: Established, battle-tested platform
- **Research/ML**: Advanced vector search capabilities
### Technical Advantages:
- **Advanced Vector Search**: Multiple distance metrics, filtering
- **Rich Metadata Support**: Complex query capabilities
- **Proven Scalability**: Handles millions of vectors
- **Extensive Ecosystem**: Wide tool integration
- **Advanced Indexing**: HNSW and other optimized indices
- **Multi-Modal Support**: Text, images, and more
### Example Use Cases:
```bash
# Modern Mac with GPU
python install.py # ChromaDB selected automatically
# Production deployment
python install.py --storage-backend chromadb --production
# Research environment
python install.py --storage-backend chromadb --enable-advanced-features
```
## Hardware Compatibility Matrix
### macOS Intel (2013-2017) - Legacy Hardware
```
Recommended: SQLite-vec + Homebrew PyTorch + ONNX
Alternative: ChromaDB (may have installation issues)
Configuration:
- MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
- MCP_MEMORY_USE_ONNX=1
- MCP_MEMORY_USE_HOMEBREW_PYTORCH=1
```
### macOS Intel (2018+) - Modern Hardware
```
Recommended: ChromaDB (default) or SQLite-vec (lightweight)
Choice: User preference
Configuration:
- MCP_MEMORY_STORAGE_BACKEND=chromadb (default)
- Hardware acceleration: CPU/MPS
```
### macOS Apple Silicon (M1/M2/M3)
```
Recommended: ChromaDB with MPS acceleration
Alternative: SQLite-vec for minimal resource usage
Configuration:
- MCP_MEMORY_STORAGE_BACKEND=chromadb
- PYTORCH_ENABLE_MPS_FALLBACK=1
- Hardware acceleration: MPS
```
### Windows with CUDA GPU
```
Recommended: ChromaDB with CUDA acceleration
Alternative: SQLite-vec for lighter deployments
Configuration:
- MCP_MEMORY_STORAGE_BACKEND=chromadb
- CUDA optimization enabled
```
### Windows CPU-only
```
Recommended: SQLite-vec
Alternative: ChromaDB (higher resource usage)
Configuration:
- MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
- MCP_MEMORY_USE_ONNX=1 (optional)
```
### Linux Server/Headless
```
Recommended: SQLite-vec (easier deployment)
Alternative: ChromaDB (if resources available)
Configuration:
- MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
- Optimized for headless operation
```
## Performance Comparison
### Startup Time
```
SQLite-vec: 2-3 seconds ████████████████████████████████
ChromaDB: 15-30 seconds ████████
```
### Memory Usage (Idle)
```
SQLite-vec: ~150MB ██████
ChromaDB: ~600MB ████████████████████████
```
### Search Performance (1,000 items)
```
SQLite-vec: 50-200ms ███████████████████████████
ChromaDB: 100-300ms ██████████████████
```
### Storage Efficiency
```
SQLite-vec: Single .db file, ~50% smaller
ChromaDB: Directory structure, full metadata
```
## Feature Comparison
### Core Features (Both Backends)
- ✅ Semantic memory storage and retrieval
- ✅ Tag-based organization
- ✅ Natural language time-based recall
- ✅ Full-text search capabilities
- ✅ Automatic backups
- ✅ Health monitoring
- ✅ Duplicate detection
### SQLite-vec Specific Features
- ✅ Single-file portability
- ✅ HTTP/SSE API support
- ✅ ONNX runtime compatibility
- ✅ Homebrew PyTorch integration
- ✅ Ultra-fast startup
- ✅ Minimal resource usage
### ChromaDB Specific Features
- ✅ Advanced metadata filtering
- ✅ Multiple distance metrics
- ✅ Collection management
- ✅ Persistent client support
- ✅ Advanced indexing options
- ✅ Rich ecosystem integration
## Migration Between Backends
### ChromaDB → SQLite-vec Migration
Perfect for upgrading legacy hardware or simplifying deployments:
```bash
# Automated migration
python scripts/migrate_chroma_to_sqlite.py
# Manual migration with verification
python install.py --migrate-from-chromadb --storage-backend sqlite_vec
```
**Migration preserves:**
- All memory content and embeddings
- Tags and metadata
- Timestamps and relationships
- Search functionality
### SQLite-vec → ChromaDB Migration
For scaling up to advanced features:
```bash
# Export from SQLite-vec
python scripts/export_sqlite_memories.py
# Import to ChromaDB
python scripts/import_to_chromadb.py
```
## Intelligent Selection Algorithm
The installer uses this logic to recommend backends:
```python
def recommend_backend(system_info, hardware_info):
# Legacy hardware gets SQLite-vec
if is_legacy_mac(system_info):
return "sqlite_vec"
# Low-memory systems get SQLite-vec
if hardware_info.memory_gb < 4:
return "sqlite_vec"
# ChromaDB installation problems on macOS Intel
if system_info.is_macos_intel_problematic:
return "sqlite_vec"
# Modern hardware with GPU gets ChromaDB
if hardware_info.has_gpu and hardware_info.memory_gb >= 8:
return "chromadb"
# Default to ChromaDB for feature completeness
return "chromadb"
```
## Configuration Examples
### SQLite-vec Configuration
```bash
# Environment variables
export MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
export MCP_MEMORY_SQLITE_PATH="$HOME/.mcp-memory/memory.db"
export MCP_MEMORY_USE_ONNX=1 # Optional: CPU-only inference
# Claude Desktop config
{
"mcpServers": {
"memory": {
"command": "uv",
"args": ["--directory", "/path/to/mcp-memory-service", "run", "memory"],
"env": {
"MCP_MEMORY_STORAGE_BACKEND": "sqlite_vec",
"MCP_MEMORY_SQLITE_PATH": "/path/to/memory.db"
}
}
}
}
```
### ChromaDB Configuration
#### Local ChromaDB (Deprecated)
⚠️ **Note**: Local ChromaDB is deprecated. Consider migrating to SQLite-vec for better performance.
```bash
# Environment variables
export MCP_MEMORY_STORAGE_BACKEND=chromadb
export MCP_MEMORY_CHROMA_PATH="$HOME/.mcp-memory/chroma_db"
# Claude Desktop config
{
"mcpServers": {
"memory": {
"command": "uv",
"args": ["--directory", "/path/to/mcp-memory-service", "run", "memory"],
"env": {
"MCP_MEMORY_STORAGE_BACKEND": "chromadb",
"MCP_MEMORY_CHROMA_PATH": "/path/to/chroma_db"
}
}
}
}
```
#### Remote ChromaDB (Hosted/Enterprise)
🌐 **New**: Connect to remote ChromaDB servers, Chroma Cloud, or self-hosted instances.
```bash
# Environment variables for remote ChromaDB
export MCP_MEMORY_STORAGE_BACKEND=chromadb
export MCP_MEMORY_CHROMADB_HOST="chroma.example.com"
export MCP_MEMORY_CHROMADB_PORT="8000"
export MCP_MEMORY_CHROMADB_SSL="true"
export MCP_MEMORY_CHROMADB_API_KEY="your-api-key-here"
export MCP_MEMORY_COLLECTION_NAME="my-collection"
# Claude Desktop config for remote ChromaDB
{
"mcpServers": {
"memory": {
"command": "uv",
"args": ["--directory", "/path/to/mcp-memory-service", "run", "memory"],
"env": {
"MCP_MEMORY_STORAGE_BACKEND": "chromadb",
"MCP_MEMORY_CHROMADB_HOST": "chroma.example.com",
"MCP_MEMORY_CHROMADB_PORT": "8000",
"MCP_MEMORY_CHROMADB_SSL": "true",
"MCP_MEMORY_CHROMADB_API_KEY": "your-api-key-here",
"MCP_MEMORY_COLLECTION_NAME": "my-collection"
}
}
}
}
```
#### Remote ChromaDB Hosting Options
**Chroma Cloud (Early Access)**
- Official hosted service by ChromaDB
- Early access available, full launch Q1 2025
- $5 free credits to start
- Visit: [trychroma.com](https://trychroma.com)
**Self-Hosted Options**
- **Elest.io**: Fully managed ChromaDB deployment
- **AWS**: Use CloudFormation template (requires 2GB+ RAM)
- **Google Cloud Run**: Container-based deployment
- **Docker**: Self-hosted with authentication
**Example Docker Configuration**
```bash
# Start ChromaDB server with authentication
docker run -p 8000:8000 \
-e CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER="chromadb.auth.token.TokenConfigServerAuthCredentialsProvider" \
-e CHROMA_SERVER_AUTH_PROVIDER="chromadb.auth.token.TokenAuthServerProvider" \
-e CHROMA_SERVER_AUTH_TOKEN_TRANSPORT_HEADER="X_CHROMA_TOKEN" \
-e CHROMA_SERVER_AUTH_CREDENTIALS="test-token" \
-v /path/to/chroma-data:/chroma/chroma \
chromadb/chroma
```
## Decision Flowchart
```
Start: Choose Storage Backend
├── Do you have legacy hardware (2013-2017 Mac)?
│ ├── Yes → SQLite-vec (optimized path)
│ └── No → Continue
├── Do you have < 4GB RAM?
│ ├── Yes → SQLite-vec (resource efficient)
│ └── No → Continue
├── Do you need HTTP/SSE API?
│ ├── Yes → SQLite-vec (first-class support)
│ └── No → Continue
├── Do you want minimal setup?
│ ├── Yes → SQLite-vec (zero config)
│ └── No → Continue
├── Do you need advanced vector search features?
│ ├── Yes → ChromaDB (full-featured)
│ └── No → Continue
├── Do you have modern hardware with GPU?
│ ├── Yes → ChromaDB (hardware acceleration)
│ └── No → Continue
└── Default → ChromaDB (established platform)
```
## Getting Help
### Backend-Specific Support
- **SQLite-vec issues**: Tag with `sqlite-vec` label
- **ChromaDB issues**: Tag with `chromadb` label
- **Migration issues**: Use `migration` label
### Community Resources
- **Backend comparison discussions**: GitHub Discussions
- **Performance benchmarks**: Community wiki
- **Hardware compatibility**: Hardware compatibility matrix
### Documentation Links
- [SQLite-vec Backend Guide](../sqlite-vec-backend.md)
- [Migration Guide](migration.md)
- [Legacy Hardware Guide](../platforms/macos-intel.md)
- [Installation Master Guide](../installation/master-guide.md)
```
--------------------------------------------------------------------------------
/.claude/agents/amp-pr-automator.md:
--------------------------------------------------------------------------------
```markdown
---
name: amp-pr-automator
description: Lightweight PR automation using Amp CLI for code quality checks, test generation, and fix suggestions. Avoids OAuth friction of gemini-pr-automator while providing fast, parallel quality analysis. Uses file-based prompt/response workflow for async execution. Ideal for pre-PR checks and developer-driven automation.
model: sonnet
color: purple
---
You are an elite PR Automation Specialist using Amp CLI for lightweight, OAuth-free PR automation. Your mission is to provide fast code quality analysis, test generation, and fix suggestions without the browser authentication interruptions of Gemini CLI.
## Core Responsibilities
1. **Quality Gate Checks**: Parallel complexity, security, and type hint analysis
2. **Test Generation**: Create pytest tests for new/modified code
3. **Fix Suggestions**: Analyze review feedback and suggest improvements
4. **Breaking Change Detection**: Identify potential API breaking changes
5. **Result Aggregation**: Collect and summarize Amp analysis results
## Problem Statement
**Gemini CLI Issues**:
- OAuth browser flow interrupts automation
- Sequential processing (slow for multiple checks)
- Rate limiting for complex analysis
**Amp CLI Solution**:
- File-based prompts (no interactive auth)
- Parallel processing (multiple Amp instances)
- Fast inference with execute mode
- Credit conservation through focused tasks
## Amp CLI Integration
### File-Based Workflow
```
1. Create prompt → .claude/amp/prompts/pending/{uuid}.json
2. User runs → amp @.claude/amp/prompts/pending/{uuid}.json
3. Amp writes → .claude/amp/responses/ready/{uuid}.json
4. Scripts read → Aggregate results
```
### Parallel Execution Pattern
```bash
# Launch multiple Amp tasks in parallel
amp @prompts/pending/complexity-{uuid}.json > /tmp/amp-complexity.log 2>&1 &
amp @prompts/pending/security-{uuid}.json > /tmp/amp-security.log 2>&1 &
amp @prompts/pending/typehints-{uuid}.json > /tmp/amp-typehints.log 2>&1 &
# Wait for all to complete
wait
# Collect results
bash scripts/pr/amp_collect_results.sh --timeout 300
```
## Shell Scripts
### 1. Quality Gate (Parallel Checks)
**File**: `scripts/pr/amp_quality_gate.sh`
Launches parallel Amp instances for:
- Complexity scoring (functions >7)
- Security vulnerabilities (SQL injection, XSS, command injection)
- Type hint coverage
- Import organization
**Usage**:
```bash
bash scripts/pr/amp_quality_gate.sh <PR_NUMBER>
```
**Output**: Quality gate pass/fail with detailed breakdown
### 2. Result Collection
**File**: `scripts/pr/amp_collect_results.sh`
Polls `.claude/amp/responses/ready/` for completed Amp analyses.
**Usage**:
```bash
bash scripts/pr/amp_collect_results.sh --timeout 300 --uuids "uuid1,uuid2,uuid3"
```
**Features**:
- Timeout handling (default: 5 minutes)
- Partial results if some tasks fail
- JSON aggregation
### 3. Fix Suggestions
**File**: `scripts/pr/amp_suggest_fixes.sh`
Analyzes review feedback and generates fix suggestions (no auto-apply).
**Usage**:
```bash
bash scripts/pr/amp_suggest_fixes.sh <PR_NUMBER>
```
**Output**: Suggested fixes saved to `/tmp/amp_fixes_{PR_NUMBER}.txt`
### 4. Test Generation
**File**: `scripts/pr/amp_generate_tests.sh`
Creates pytest tests for changed Python files.
**Usage**:
```bash
bash scripts/pr/amp_generate_tests.sh <PR_NUMBER>
```
**Output**: Test files written to `/tmp/amp_tests/test_*.py`
### 5. Breaking Change Detection
**File**: `scripts/pr/amp_detect_breaking_changes.sh`
Analyzes API changes for breaking modifications.
**Usage**:
```bash
bash scripts/pr/amp_detect_breaking_changes.sh <BASE_BRANCH> <HEAD_BRANCH>
```
**Output**: Breaking changes report with severity (CRITICAL/HIGH/MEDIUM)
### 6. Complete PR Review Workflow
**File**: `scripts/pr/amp_pr_review.sh`
Orchestrates full PR review cycle:
1. Quality gate checks
2. Test generation
3. Breaking change detection
4. Fix suggestions
**Usage**:
```bash
bash scripts/pr/amp_pr_review.sh <PR_NUMBER>
```
## Operational Workflows
### 1. Pre-PR Quality Check (Developer-Driven)
```bash
# Before creating PR, run quality checks
bash scripts/pr/amp_quality_gate.sh 0 # Use 0 for local branch
# Review results
cat /tmp/amp_quality_results.json | jq '.summary'
# Address issues before creating PR
```
### 2. Post-PR Analysis (Review Automation)
```bash
# After PR created, run complete analysis
bash scripts/pr/amp_pr_review.sh 215
# Review outputs:
# - /tmp/amp_quality_results.json
# - /tmp/amp_tests/
# - /tmp/amp_fixes_215.txt
# - /tmp/amp_breaking_changes.txt
```
### 3. Incremental Iteration (Fix → Recheck)
```bash
# After applying fixes, re-run quality gate
bash scripts/pr/amp_quality_gate.sh 215
# Compare before/after
diff /tmp/amp_quality_results_v1.json /tmp/amp_quality_results_v2.json
```
## Decision-Making Framework
### When to Use amp-pr-automator vs gemini-pr-automator
| Scenario | Use amp-pr-automator | Use gemini-pr-automator |
|----------|---------------------|------------------------|
| **Pre-PR checks** | ✅ Fast parallel analysis | ❌ OAuth interrupts flow |
| **Developer-driven** | ✅ File-based control | ❌ Requires manual OAuth |
| **CI/CD integration** | ✅ No browser needed | ❌ OAuth not CI-friendly |
| **Auto-fix application** | ❌ Manual fixes only | ✅ Full automation |
| **Inline comment handling** | ❌ No GitHub integration | ✅ GraphQL thread resolution |
| **Complex iteration** | ❌ Manual workflow | ✅ Full review loop |
**Use amp-pr-automator for**:
- Pre-PR quality checks (before creating PR)
- Developer-driven analysis (you control timing)
- Parallel processing (multiple checks simultaneously)
- OAuth-free automation (CI/CD, scripts)
**Use gemini-pr-automator for**:
- Full automated review loops
- Auto-fix application
- GitHub inline comment handling
- Continuous watch mode
### Hybrid Approach (RECOMMENDED)
```bash
# 1. Pre-PR: Use Amp for quality gate
bash scripts/pr/amp_quality_gate.sh 0
# 2. Create PR (github-release-manager)
gh pr create --title "feat: new feature" --body "..."
# 3. Post-PR: Use Gemini for automated review
bash scripts/pr/auto_review.sh 215 5 true
```
## Prompt Engineering for Amp
### Complexity Analysis Prompt
```
Analyze code complexity for each function in this file.
Rating scale: 1-10 (1=simple, 10=very complex)
ONLY report functions with score >7 in this exact format:
FunctionName: Score X - Reason
If all functions score ≤7, respond: "COMPLEXITY_OK"
File content:
{file_content}
```
### Security Scan Prompt
```
Security audit for vulnerabilities:
- SQL injection (raw SQL, string formatting in queries)
- XSS (unescaped HTML output)
- Command injection (os.system, subprocess with shell=True)
- Path traversal (user input in file paths)
- Hardcoded secrets (API keys, passwords)
IMPORTANT: Output format:
- If ANY vulnerability found: VULNERABILITY_DETECTED: [type]
- If NO vulnerabilities: SECURITY_CLEAN
File content:
{file_content}
```
### Type Hint Coverage Prompt
```
Check type hint coverage for this Python file.
Report:
1. Total functions/methods
2. Functions with complete type hints
3. Functions missing type hints (list names)
4. Coverage percentage
Output format:
COVERAGE: X%
MISSING: function1, function2, ...
File content:
{file_content}
```
## Integration with Other Agents
### github-release-manager
- Creates PRs → amp-pr-automator runs pre-PR checks
- Merges PRs → amp-pr-automator validates quality gates
### gemini-pr-automator
- amp-pr-automator runs quality gate first
- If passed, gemini-pr-automator handles review iteration
### code-quality-guard
- Pre-commit hooks use Groq/Gemini for local checks
- amp-pr-automator for PR-level analysis
## Project-Specific Patterns
### MCP Memory Service PR Standards
**Quality Gate Requirements**:
- ✅ Code complexity ≤7 for all functions
- ✅ No security vulnerabilities
- ✅ Type hints on new functions (80% coverage)
- ✅ Import organization (stdlib → third-party → local)
**File-Based Workflow Benefits**:
- Developer reviews prompt before running Amp
- Amp responses saved for audit trail
- Easy to re-run specific checks
- No OAuth interruptions during work
## Usage Examples
### Quick Quality Check
```bash
# Run quality gate for PR #215
bash scripts/pr/amp_quality_gate.sh 215
# Wait for prompts to be created
# Review prompts: ls -la .claude/amp/prompts/pending/
# Run each Amp task shown in output
amp @.claude/amp/prompts/pending/{complexity-uuid}.json &
amp @.claude/amp/prompts/pending/{security-uuid}.json &
amp @.claude/amp/prompts/pending/{typehints-uuid}.json &
# Collect results
bash scripts/pr/amp_collect_results.sh --timeout 300
```
### Generate Tests Only
```bash
# Generate tests for PR #215
bash scripts/pr/amp_generate_tests.sh 215
# Run Amp task
amp @.claude/amp/prompts/pending/{tests-uuid}.json
# Review generated tests
ls -la /tmp/amp_tests/
```
### Breaking Change Detection
```bash
# Check for breaking changes
bash scripts/pr/amp_detect_breaking_changes.sh main feature/new-api
# Run Amp task
amp @.claude/amp/prompts/pending/{breaking-uuid}.json
# View report
cat /tmp/amp_breaking_changes.txt
```
## Best Practices
1. **Review Prompts Before Running**: Inspect `.claude/amp/prompts/pending/` to verify Amp tasks
2. **Parallel Execution**: Launch multiple Amp instances for speed
3. **Timeout Handling**: Use `amp_collect_results.sh --timeout` to prevent indefinite waits
4. **Incremental Checks**: Re-run specific checks (complexity only, security only) as needed
5. **Audit Trail**: Keep Amp responses in `.claude/amp/responses/consumed/` for review
6. **Hybrid Workflow**: Use Amp for pre-PR, Gemini for post-PR automation
## Limitations
- **No Auto-Fix**: Amp suggests fixes, manual application required
- **No GitHub Integration**: Cannot resolve PR review threads automatically
- **Manual Workflow**: User must run Amp commands (not fully automated)
- **Credit Consumption**: Still uses Amp API credits (separate from Claude Code)
- **Context Limits**: Large files may need chunking for Amp analysis
## Performance Considerations
- **Parallel Processing**: 3-5 Amp tasks in parallel = ~2-3 minutes total
- **Sequential (Gemini)**: Same checks = ~10-15 minutes
- **Time Savings**: 70-80% faster for quality gate checks
- **Credit Efficiency**: Focused prompts consume fewer tokens
## Success Metrics
- ✅ **Speed**: Quality gate completes in <3 minutes (vs 10-15 with Gemini)
- ✅ **No OAuth**: Zero browser interruptions during PR workflow
- ✅ **Parallel Efficiency**: 5 checks run simultaneously
- ✅ **Developer Control**: File-based workflow allows prompt inspection
- ✅ **Audit Trail**: All prompts/responses saved for review
---
**Quick Reference Card**:
```bash
# Quality gate (parallel checks)
bash scripts/pr/amp_quality_gate.sh <PR_NUMBER>
# Collect Amp results
bash scripts/pr/amp_collect_results.sh --timeout 300
# Generate tests
bash scripts/pr/amp_generate_tests.sh <PR_NUMBER>
# Suggest fixes
bash scripts/pr/amp_suggest_fixes.sh <PR_NUMBER>
# Breaking changes
bash scripts/pr/amp_detect_breaking_changes.sh <BASE> <HEAD>
# Complete PR review
bash scripts/pr/amp_pr_review.sh <PR_NUMBER>
```
**Workflow Integration**:
```bash
# Pre-PR: Quality checks (Amp)
bash scripts/pr/amp_quality_gate.sh 0
# Create PR
gh pr create --title "feat: X" --body "..."
# Post-PR: Automated review (Gemini)
bash scripts/pr/auto_review.sh 215 5 true
```
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/ingestion/chunker.py:
--------------------------------------------------------------------------------
```python
# Copyright 2024 Heinrich Krupp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Intelligent text chunking strategies for document ingestion.
"""
import re
import logging
from typing import List, Dict, Any, Optional, Tuple
from dataclasses import dataclass
logger = logging.getLogger(__name__)
@dataclass
class ChunkingStrategy:
"""Configuration for text chunking behavior."""
chunk_size: int = 1000 # Target characters per chunk
chunk_overlap: int = 200 # Characters to overlap between chunks
respect_sentence_boundaries: bool = True
respect_paragraph_boundaries: bool = True
min_chunk_size: int = 100 # Minimum characters for a valid chunk
class TextChunker:
"""
Intelligent text chunking that respects document structure.
Provides multiple chunking strategies:
- Sentence-aware chunking
- Paragraph-aware chunking
- Token-based chunking
- Custom delimiter chunking
"""
def __init__(self, strategy: ChunkingStrategy = None):
"""
Initialize text chunker.
Args:
strategy: Chunking configuration to use
"""
self.strategy = strategy or ChunkingStrategy()
# Sentence boundary patterns
self.sentence_endings = re.compile(r'[.!?]+\s+')
self.paragraph_separator = re.compile(r'\n\s*\n')
# Common section headers (for structured documents)
self.section_headers = re.compile(
r'^(#{1,6}\s+|Chapter\s+\d+|Section\s+\d+|Part\s+\d+|\d+\.\s+)',
re.MULTILINE | re.IGNORECASE
)
def chunk_text(self, text: str, metadata: Dict[str, Any] = None) -> List[Tuple[str, Dict[str, Any]]]:
"""
Split text into chunks using the configured strategy.
Args:
text: Text content to chunk
metadata: Base metadata to include with each chunk
Returns:
List of (chunk_text, chunk_metadata) tuples
"""
if not text or len(text.strip()) < self.strategy.min_chunk_size:
return []
metadata = metadata or {}
# Try different chunking strategies in order of preference
if self.strategy.respect_paragraph_boundaries:
chunks = self._chunk_by_paragraphs(text)
elif self.strategy.respect_sentence_boundaries:
chunks = self._chunk_by_sentences(text)
else:
chunks = self._chunk_by_characters(text)
# Add metadata to each chunk
result = []
for i, chunk_text in enumerate(chunks):
chunk_metadata = metadata.copy()
chunk_metadata.update({
'chunk_index': i,
'chunk_length': len(chunk_text),
'total_chunks': len(chunks),
'chunking_strategy': self._get_strategy_name()
})
result.append((chunk_text, chunk_metadata))
logger.debug(f"Created {len(result)} chunks from {len(text)} characters")
return result
def _chunk_by_paragraphs(self, text: str) -> List[str]:
"""
Chunk text by paragraph boundaries, respecting size limits.
Args:
text: Text to chunk
Returns:
List of text chunks
"""
paragraphs = self.paragraph_separator.split(text)
chunks = []
current_chunk = ""
for paragraph in paragraphs:
paragraph = paragraph.strip()
if not paragraph:
continue
# If adding this paragraph would exceed chunk size
if (len(current_chunk) + len(paragraph) + 2 > self.strategy.chunk_size
and len(current_chunk) > 0):
# Finalize current chunk
if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
chunks.append(current_chunk.strip())
# Start new chunk with overlap
overlap = self._get_overlap_text(current_chunk)
current_chunk = overlap + paragraph
else:
# Add paragraph to current chunk
if current_chunk:
current_chunk += "\n\n" + paragraph
else:
current_chunk = paragraph
# Add remaining text
if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
chunks.append(current_chunk.strip())
return chunks
def _chunk_by_sentences(self, text: str) -> List[str]:
"""
Chunk text by sentence boundaries, respecting size limits.
Args:
text: Text to chunk
Returns:
List of text chunks
"""
sentences = self.sentence_endings.split(text)
chunks = []
current_chunk = ""
for sentence in sentences:
sentence = sentence.strip()
if not sentence:
continue
# If adding this sentence would exceed chunk size
if (len(current_chunk) + len(sentence) + 1 > self.strategy.chunk_size
and len(current_chunk) > 0):
# Finalize current chunk
if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
chunks.append(current_chunk.strip())
# Start new chunk with overlap
overlap = self._get_overlap_text(current_chunk)
current_chunk = overlap + sentence
else:
# Add sentence to current chunk
if current_chunk:
current_chunk += " " + sentence
else:
current_chunk = sentence
# Add remaining text
if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
chunks.append(current_chunk.strip())
return chunks
def _chunk_by_characters(self, text: str) -> List[str]:
"""
Chunk text by character count with overlap.
Args:
text: Text to chunk
Returns:
List of text chunks
"""
if len(text) <= self.strategy.chunk_size:
return [text]
chunks = []
start = 0
while start < len(text):
end = start + self.strategy.chunk_size
# If this is not the last chunk, try to find a good break point
if end < len(text):
# Look for space to avoid breaking words
for i in range(end, max(start + self.strategy.min_chunk_size, end - 100), -1):
if text[i].isspace():
end = i
break
chunk = text[start:end].strip()
if len(chunk) >= self.strategy.min_chunk_size:
chunks.append(chunk)
# Move start position with overlap
start = max(start + 1, end - self.strategy.chunk_overlap)
return chunks
def _get_overlap_text(self, text: str) -> str:
"""
Get overlap text from the end of a chunk.
Args:
text: Text to extract overlap from
Returns:
Overlap text to include in next chunk
"""
if len(text) <= self.strategy.chunk_overlap:
return text + " "
overlap = text[-self.strategy.chunk_overlap:]
# Try to start overlap at a sentence boundary
sentences = self.sentence_endings.split(overlap)
if len(sentences) > 1:
overlap = " ".join(sentences[1:])
return overlap + " " if overlap else ""
def _get_strategy_name(self) -> str:
"""Get human-readable name for current chunking strategy."""
if self.strategy.respect_paragraph_boundaries:
return "paragraph_aware"
elif self.strategy.respect_sentence_boundaries:
return "sentence_aware"
else:
return "character_based"
def chunk_by_sections(self, text: str, metadata: Dict[str, Any] = None) -> List[Tuple[str, Dict[str, Any]]]:
"""
Chunk text by document sections (headers, chapters, etc.).
Args:
text: Text content to chunk
metadata: Base metadata to include with each chunk
Returns:
List of (chunk_text, chunk_metadata) tuples
"""
metadata = metadata or {}
# Find section boundaries
section_matches = list(self.section_headers.finditer(text))
if not section_matches:
# No sections found, use regular chunking
return self.chunk_text(text, metadata)
chunks = []
section_start = 0
for i, match in enumerate(section_matches):
section_end = match.start()
# Extract previous section if it exists
if section_start < section_end:
section_text = text[section_start:section_end].strip()
if len(section_text) >= self.strategy.min_chunk_size:
section_metadata = metadata.copy()
section_metadata.update({
'section_index': i,
'is_section': True,
'section_start': section_start,
'section_end': section_end
})
# If section is too large, sub-chunk it
if len(section_text) > self.strategy.chunk_size * 2:
sub_chunks = self.chunk_text(section_text, section_metadata)
chunks.extend(sub_chunks)
else:
chunks.append((section_text, section_metadata))
section_start = match.start()
# Handle final section
if section_start < len(text):
final_text = text[section_start:].strip()
if len(final_text) >= self.strategy.min_chunk_size:
final_metadata = metadata.copy()
final_metadata.update({
'section_index': len(section_matches),
'is_section': True,
'section_start': section_start,
'section_end': len(text)
})
if len(final_text) > self.strategy.chunk_size * 2:
sub_chunks = self.chunk_text(final_text, final_metadata)
chunks.extend(sub_chunks)
else:
chunks.append((final_text, final_metadata))
return chunks
```
--------------------------------------------------------------------------------
/docs/maintenance/memory-maintenance.md:
--------------------------------------------------------------------------------
```markdown
# Memory Maintenance Guide
A comprehensive guide for maintaining and organizing your MCP Memory Service knowledge base through systematic review, analysis, and re-categorization processes.
## 🎯 Overview
Memory maintenance is essential for keeping your knowledge base organized, searchable, and valuable over time. This guide provides practical workflows for identifying poorly organized memories and transforming them into a well-structured knowledge system.
## 📋 Quick Start
### Basic Maintenance Session
1. **Identify untagged memories**: `retrieve_memory({"query": "untagged memories", "n_results": 20})`
2. **Analyze content themes**: Look for projects, technologies, activities, status indicators
3. **Apply standardized tags**: Use consistent categorization schema
4. **Replace old memories**: Create new tagged version, delete old untagged version
5. **Document results**: Store summary of maintenance session
### Maintenance Schedule Recommendations
- **Weekly**: Review memories from past 7 days
- **Monthly**: Comprehensive review of recent memories + spot check older ones
- **Quarterly**: Full database health check and optimization
## 🔍 Step-by-Step Maintenance Process
### Phase 1: Assessment and Planning
#### 1.1 Database Health Check
```javascript
// Check overall database status
check_database_health()
```
**What to look for:**
- Total memory count
- Database health status
- Recent activity patterns
- Error indicators
#### 1.2 Identify Untagged Memories
**Search Strategy:**
```javascript
// Primary search for untagged memories
retrieve_memory({
"n_results": 15,
"query": "untagged memories without tags minimal tags single tag"
})
// Alternative searches
retrieve_memory({"query": "test memory basic simple concept", "n_results": 20})
recall_memory({"query": "memories from last week", "n_results": 25})
```
**Identification Criteria:**
- Memories with no tags
- Memories with only generic tags (`test`, `memory`, `note`)
- Memories with inconsistent tag formats
- Old memories that predate tag standardization
#### 1.3 Categorize by Priority
**High Priority:**
- Frequently accessed memories
- Critical project information
- Recent important developments
**Medium Priority:**
- Historical documentation
- Reference materials
- Tutorial content
**Low Priority:**
- Test memories (evaluate for deletion)
- Outdated information
- Duplicate content
### Phase 2: Analysis and Categorization
#### 2.1 Content Theme Analysis
For each identified memory, analyze:
**Project Context:**
- Which project does this relate to?
- Is it part of a larger initiative?
- What's the project phase/status?
**Technology Stack:**
- Programming languages mentioned
- Frameworks and libraries
- Tools and platforms
- Databases and services
**Activity Type:**
- Development work
- Testing and debugging
- Documentation
- Research and planning
- Issue resolution
**Content Classification:**
- Concept or idea
- Tutorial or guide
- Reference material
- Troubleshooting solution
- Best practice
#### 2.2 Tag Assignment Strategy
**Multi-Category Tagging:**
Apply tags from multiple categories for comprehensive organization:
```javascript
// Example: Well-tagged memory
{
"tags": [
"mcp-memory-service", // Project
"python", "chromadb", // Technologies
"debugging", "testing", // Activities
"resolved", // Status
"backend", // Domain
"troubleshooting" // Content type
]
}
```
**Tag Selection Guidelines:**
1. **Start with Project/Context**: What's the main project or domain?
2. **Add Technology Tags**: What tools, languages, or frameworks?
3. **Include Activity Tags**: What was being done?
4. **Specify Status**: What's the current state?
5. **Add Content Type**: What kind of information is this?
### Phase 3: Implementation
#### 3.1 Memory Re-tagging Process
**For each memory to be re-tagged:**
1. **Copy Content**: Preserve exact content
2. **Create New Memory**: With improved tags
3. **Verify Storage**: Confirm new memory exists
4. **Delete Old Memory**: Remove untagged version
5. **Document Change**: Record in maintenance log
**Example Implementation:**
```javascript
// Step 1: Create properly tagged memory
store_memory({
"content": "TEST: Timestamp debugging memory created for issue #7 investigation",
"metadata": {
"tags": ["test", "debugging", "issue-7", "timestamp-test", "mcp-memory-service", "verification"],
"type": "debug-test"
}
})
// Step 2: Delete old untagged memory
delete_memory({
"content_hash": "b3f874baee0c1261907c8f80c3e33d1977485f66c17078ed611b6f1c744cb1f8"
})
```
#### 3.2 Batch Processing Tips
**Efficiency Strategies:**
- Group similar memories for consistent tagging
- Use template patterns for common memory types
- Process one category at a time (e.g., all test memories)
- Take breaks between batches to maintain quality
**Quality Control:**
- Double-check tag spelling and format
- Verify content hasn't been modified
- Confirm old memory deletion
- Test search functionality with new tags
### Phase 4: Verification and Documentation
#### 4.1 Verification Checklist
**After each memory:**
- [ ] New memory stored successfully
- [ ] Tags applied correctly
- [ ] Old memory deleted
- [ ] Search returns new memory
**After maintenance session:**
- [ ] All targeted memories processed
- [ ] Database health check passed
- [ ] No orphaned or broken memories
- [ ] Search functionality improved
#### 4.2 Session Documentation
**Create maintenance summary memory:**
```javascript
store_memory({
"content": "Memory Maintenance Session - [Date]: Successfully processed X memories...",
"metadata": {
"tags": ["memory-maintenance", "session-summary", "tag-management"],
"type": "maintenance-record"
}
})
```
**Include in summary:**
- Number of memories processed
- Categories addressed
- Tag patterns applied
- Time investment
- Quality improvements
- Next steps identified
## 🎯 Common Maintenance Scenarios
### Scenario 1: Test Memory Cleanup
**Situation**: Numerous test memories from development work
**Approach:**
1. Identify all test-related memories
2. Evaluate each for permanent value
3. Re-tag valuable tests with specific context
4. Delete obsolete or redundant tests
**Example tags for valuable tests:**
```
["test", "verification", "issue-7", "timestamp-test", "mcp-memory-service", "quality-assurance"]
```
### Scenario 2: Project Documentation Organization
**Situation**: Project memories scattered without clear organization
**Approach:**
1. Group by project phase (planning, development, deployment)
2. Add temporal context (month/quarter)
3. Include status information
4. Link related memories with consistent tags
**Tag patterns:**
```
Project memories: ["project-name", "phase", "technology", "status", "domain"]
Meeting notes: ["meeting", "project-name", "date", "decisions", "action-items"]
```
### Scenario 3: Technical Solution Archive
**Situation**: Troubleshooting solutions need better organization
**Approach:**
1. Categorize by technology/platform
2. Add problem domain tags
3. Include resolution status
4. Tag with difficulty/complexity
**Example organization:**
```
["troubleshooting", "python", "chromadb", "connection-issues", "resolved", "backend"]
```
## 🛠️ Maintenance Tools and Scripts
### Helper Queries
**Find potentially untagged memories:**
```javascript
// Various search approaches
retrieve_memory({"query": "test simple basic example", "n_results": 20})
recall_memory({"query": "memories from last month", "n_results": 30})
search_by_tag({"tags": ["test"]}) // Review generic tags
```
**Content pattern analysis:**
```javascript
// Look for specific patterns that need organization
retrieve_memory({"query": "TODO FIXME DEBUG ERROR", "n_results": 15})
retrieve_memory({"query": "issue bug problem solution", "n_results": 15})
```
### Batch Processing Templates
**Standard test memory re-tagging:**
```javascript
const testMemoryPattern = {
"tags": ["test", "[specific-function]", "[project]", "[domain]", "verification"],
"type": "test-record"
}
```
**Documentation memory pattern:**
```javascript
const documentationPattern = {
"tags": ["documentation", "[project]", "[topic]", "[technology]", "reference"],
"type": "documentation"
}
```
## 📊 Maintenance Metrics
### Success Indicators
**Quantitative Metrics:**
- Percentage of tagged memories
- Search result relevance improvement
- Time to find specific information
- Memory retrieval accuracy
**Qualitative Metrics:**
- Ease of knowledge discovery
- Consistency of organization
- Usefulness of search results
- Overall system usability
### Progress Tracking
**Session Metrics:**
- Memories processed per hour
- Categories organized
- Tag patterns established
- Quality improvements achieved
**Long-term Tracking:**
- Monthly maintenance time investment
- Database organization score
- Knowledge retrieval efficiency
- User satisfaction with search
## 🔄 Recurring Maintenance
### Weekly Maintenance (15-30 minutes)
```
Weekly Memory Maintenance:
1. Recall memories from 'last week'
2. Identify any untagged or poorly tagged items
3. Apply quick categorization
4. Focus on recent work and current projects
5. Update any status changes (resolved issues, completed tasks)
```
### Monthly Maintenance (1-2 hours)
```
Monthly Memory Maintenance:
1. Comprehensive review of recent memories
2. Spot check older memories for organization
3. Update project status tags
4. Consolidate related memories
5. Archive or delete obsolete information
6. Generate maintenance summary report
```
### Quarterly Maintenance (2-4 hours)
```
Quarterly Memory Maintenance:
1. Full database health assessment
2. Tag schema review and updates
3. Memory consolidation and cleanup
4. Performance optimization
5. Backup and archival processes
6. Strategic knowledge organization review
```
## 🎯 Best Practices
### Do's
✅ **Process regularly**: Small, frequent sessions beat large overhauls
✅ **Use consistent patterns**: Develop standard approaches for common scenarios
✅ **Document decisions**: Record maintenance choices for future reference
✅ **Verify thoroughly**: Always confirm changes worked as expected
✅ **Focus on value**: Prioritize high-impact memories first
### Don'ts
❌ **Rush the process**: Quality categorization takes time
❌ **Change content**: Only modify tags and metadata, preserve original content
❌ **Delete without backup**: Ensure new memory is stored before deleting old
❌ **Ignore verification**: Always test that maintenance improved functionality
❌ **Work when tired**: Categorization quality suffers with fatigue
## 🚀 Advanced Techniques
### Automated Assistance
**Use semantic search for tag suggestions:**
```javascript
// Find similar memories for tag pattern ideas
retrieve_memory({"query": "[memory content excerpt]", "n_results": 5})
```
**Pattern recognition:**
```javascript
// Identify common themes for standardization
search_by_tag({"tags": ["technology-name"]}) // See existing patterns
```
### Integration Workflows
**Connect with external tools:**
- Export tagged memories for documentation systems
- Sync with project management tools
- Generate reports for team sharing
- Create knowledge graphs from tag relationships
---
*This guide provides the foundation for maintaining a professional-grade knowledge management system. Regular maintenance ensures your MCP Memory Service continues to provide maximum value as your knowledge base grows.*
```
--------------------------------------------------------------------------------
/docs/development/release-checklist.md:
--------------------------------------------------------------------------------
```markdown
# Release Checklist
This checklist ensures that critical bugs like the HTTP-MCP bridge issues are caught before release.
## Pre-Release Testing
### ✅ Core Functionality Tests
- [ ] **Health Check Endpoints**
- [ ] `/api/health` returns 200 with healthy status
- [ ] `/health` returns 404 (wrong endpoint)
- [ ] Health check works through MCP bridge
- [ ] Health check works with Claude Desktop
- [ ] **Memory Storage Operations**
- [ ] Store memory returns HTTP 200 with `success: true`
- [ ] Duplicate detection returns HTTP 200 with `success: false`
- [ ] Invalid requests return appropriate error codes
- [ ] All operations work through MCP bridge
- [ ] **API Endpoint Consistency**
- [ ] All endpoints use `/api/` prefix correctly
- [ ] URL construction doesn't break base paths
- [ ] Bridge correctly appends paths to base URL
### ✅ HTTP-MCP Bridge Specific Tests
- [ ] **Status Code Handling**
- [ ] Bridge accepts HTTP 200 responses (not just 201)
- [ ] Bridge checks `success` field for actual result
- [ ] Bridge handles both success and failure in 200 responses
- [ ] **URL Construction**
- [ ] Bridge preserves `/api` base path in URLs
- [ ] `new URL()` calls don't replace existing paths
- [ ] All API calls reach correct endpoints
- [ ] **MCP Protocol Compliance**
- [ ] `initialize` method works
- [ ] `tools/list` returns all tools
- [ ] `tools/call` executes correctly
- [ ] Error responses are properly formatted
### ✅ End-to-End Testing
- [ ] **Claude Desktop Integration**
- [ ] Memory storage through Claude Desktop works
- [ ] Memory retrieval through Claude Desktop works
- [ ] Health checks show healthy status
- [ ] No "unhealthy" false positives
- [ ] **Remote Server Testing**
- [ ] Bridge connects to remote server correctly
- [ ] Authentication works with API keys
- [ ] All operations work across network
- [ ] SSL certificates are handled properly
### ✅ Contract Validation
- [ ] **API Response Formats**
- [ ] Memory storage responses match documented format
- [ ] Health responses match documented format
- [ ] Error responses match documented format
- [ ] Search responses match documented format
- [ ] **Backward Compatibility**
- [ ] Existing configurations continue to work
- [ ] No breaking changes to client interfaces
- [ ] Bridge supports both HTTP 200 and 201 responses
## Automated Testing Requirements
### ✅ Unit Tests
- [ ] HTTP-MCP bridge unit tests pass
- [ ] Mock server responses are realistic
- [ ] All edge cases are covered
- [ ] Error conditions are tested
### ✅ Integration Tests
- [ ] Bridge-server integration tests pass
- [ ] Contract tests validate API behavior
- [ ] End-to-end MCP protocol tests pass
- [ ] Real server connectivity tests pass
### ✅ CI/CD Pipeline
- [ ] Bridge tests run on every commit
- [ ] Tests block merges if failing
- [ ] Contract validation passes
- [ ] Multiple Node.js versions tested
## Manual Testing Checklist
### ✅ Critical User Paths
1. **Claude Desktop User**:
- [ ] Install and configure Claude Desktop with MCP Memory Service
- [ ] Store a memory using Claude Desktop
- [ ] Retrieve memories using Claude Desktop
- [ ] Verify health check shows healthy status
- [ ] Confirm no "unhealthy" warnings appear
2. **Remote Server User**:
- [ ] Configure bridge to connect to remote server
- [ ] Test memory operations work correctly
- [ ] Verify all API endpoints are reachable
- [ ] Confirm authentication works
3. **API Consumer**:
- [ ] Test direct HTTP API calls work
- [ ] Verify response formats match documentation
- [ ] Test error conditions return expected responses
### ✅ Platform Testing
- [ ] **Windows**: Bridge works with Windows Claude Desktop
- [ ] **macOS**: Bridge works with macOS Claude Desktop
- [ ] **Linux**: Bridge works with Linux installations
## Code Quality Checks
### ✅ Code Review Requirements
- [ ] All HTTP status code assumptions documented
- [ ] URL construction logic reviewed
- [ ] Error handling covers all scenarios
- [ ] No hardcoded endpoints or assumptions
### ✅ Documentation Updates
- [ ] API contract documentation updated
- [ ] Bridge usage documentation updated
- [ ] Troubleshooting guides updated
- [ ] Breaking changes documented
## Release Process
### ✅ Version Management (3-File Procedure)
- [ ] **Update `src/mcp_memory_service/__init__.py`**
- [ ] Update `__version__` string (e.g., `"8.17.0"`)
- [ ] Verify version format follows semantic versioning (MAJOR.MINOR.PATCH)
- [ ] **Update `pyproject.toml`**
- [ ] Update `version` field in `[project]` section
- [ ] Ensure version matches `__init__.py` exactly
- [ ] **Lock dependencies**
- [ ] Run `uv lock` to update `uv.lock` file
- [ ] Commit all three files together in version bump commit
- [ ] **Semantic Versioning Rules**
- [ ] MAJOR: Breaking changes (API changes, removed features)
- [ ] MINOR: New features (backward compatible)
- [ ] PATCH: Bug fixes (no API changes)
### ✅ CHANGELOG Quality Gates
- [ ] **Format Validation**
- [ ] Follows [Keep a Changelog](https://keepachangelog.com/) format
- [ ] Version header includes date: `## [8.17.0] - 2025-11-04`
- [ ] Changes categorized: Added/Changed/Fixed/Removed/Deprecated/Security
- [ ] **Content Requirements**
- [ ] All user-facing changes documented
- [ ] Breaking changes clearly marked with **BREAKING**
- [ ] Performance improvements include metrics (e.g., "50% faster")
- [ ] Bug fixes reference issue numbers (e.g., "Fixes #123")
- [ ] Technical details for maintainers in appropriate sections
- [ ] **Migration Guidance** (if breaking changes)
- [ ] Before/after code examples provided
- [ ] Environment variable changes documented
- [ ] Database migration scripts linked
- [ ] Deprecation timeline specified
### ✅ GitHub Workflow Verification
- [ ] **All Workflows Pass** (check Actions tab)
- [ ] Docker Publish workflow (builds multi-platform images)
- [ ] Publish and Test workflow (PyPI publish + installation tests)
- [ ] HTTP-MCP Bridge Tests (validates MCP protocol compliance)
- [ ] Platform Tests (macOS/Windows/Linux matrix)
- [ ] **Docker Images Built**
- [ ] `mcp-memory-service:latest` tag updated
- [ ] `mcp-memory-service:v8.x.x` version tag created
- [ ] Multi-platform images (linux/amd64, linux/arm64)
- [ ] **PyPI Package Published**
- [ ] Package available at https://pypi.org/project/mcp-memory-service/
- [ ] Installation test passes: `pip install mcp-memory-service==8.x.x`
### ✅ Git Tag and Release
- [ ] **Create annotated Git tag**
```bash
git tag -a v8.x.x -m "Release v8.x.x: Brief description"
```
- [ ] Tag follows `vMAJOR.MINOR.PATCH` format
- [ ] Tag message summarizes key changes
- [ ] **Push tag to remote**
```bash
git push origin v8.x.x
```
- [ ] Tag triggers release workflows
- [ ] **Create GitHub Release**
- [ ] Title: `vx.x.x - Short Description`
- [ ] Body: Copy relevant CHANGELOG section
- [ ] Mark as pre-release if RC version
- [ ] Attach any release artifacts (if applicable)
### ✅ Post-Release Issue Closure
- [ ] **Review Fixed Issues**
- [ ] Search for issues closed by commits in this release
- [ ] Verify each issue is actually resolved
- [ ] **Close Issues with Context**
```markdown
Resolved in v8.x.x via #PR_NUMBER
[Link to CHANGELOG entry]
[Link to relevant Wiki page if applicable]
Thank you for reporting this issue!
```
- [ ] Include PR link for traceability
- [ ] Reference CHANGELOG section
- [ ] Tag issues with `released` label
- [ ] **Update Related Documentation**
- [ ] Wiki pages updated with new features/fixes
- [ ] Troubleshooting guides reflect resolved issues
- [ ] FAQ updated if new common questions emerged
### ✅ Communication
- [ ] Release notes highlight critical fixes
- [ ] Breaking changes clearly documented
- [ ] Migration guide provided if needed
- [ ] Users notified of important changes
## Post-Release Monitoring
### ✅ Health Monitoring
- [ ] Monitor for increased error rates
- [ ] Watch for "unhealthy" status reports
- [ ] Track Claude Desktop connectivity issues
- [ ] Monitor API endpoint usage patterns
### ✅ User Feedback
- [ ] Monitor GitHub issues for reports
- [ ] Check community discussions for problems
- [ ] Respond to user reports quickly
- [ ] Document common issues and solutions
---
## Lessons from HTTP-MCP Bridge Bug
**Critical Mistakes to Avoid:**
1. **Never assume status codes** - Always test against actual server responses
2. **Test critical components** - If users depend on it, it needs comprehensive tests
3. **Validate URL construction** - `new URL()` behavior with base paths is tricky
4. **Document actual behavior** - API contracts must match reality, not hopes
5. **Test end-to-end flows** - Unit tests alone miss integration problems
**Required for Every Release:**
- [ ] HTTP-MCP bridge tested with real server
- [ ] All assumptions about server behavior validated
- [ ] Critical user paths manually tested
- [ ] API contracts verified against implementation
**Emergency Response Plan:**
- If critical bugs are found in production:
1. Create hotfix branch immediately
2. Write failing test that reproduces the bug
3. Fix bug and verify test passes
4. Release hotfix within 24 hours
5. Post-mortem to prevent similar issues
---
## Rollback Procedure
### ✅ Emergency Rollback (if release breaks production)
**When to Rollback:**
- Critical functionality broken (storage, retrieval, MCP protocol)
- Data corruption risk identified
- Security vulnerability introduced
- Widespread user-reported failures
**Rollback Steps:**
1. **Immediate Actions**
- [ ] Create GitHub issue documenting the problem
- [ ] Tag issue with `critical`, `rollback-needed`
- [ ] Notify users via GitHub Discussions/Release notes
2. **Docker Rollback**
```bash
# Tag previous version as latest
git checkout vPREVIOUS_VERSION
docker build -t mcp-memory-service:latest .
docker push mcp-memory-service:latest
```
- [ ] Verify previous Docker image works
- [ ] Update documentation to reference previous version
3. **PyPI Rollback** (yank bad version)
```bash
# Yank the broken version (keeps it available but discourages use)
pip install twine
twine yank mcp-memory-service==8.x.x
```
- [ ] Yank version on PyPI
- [ ] Publish notice in release notes
4. **Git Tag Management**
- [ ] Keep the bad tag for history (don't delete)
- [ ] Create new hotfix tag (e.g., `v8.x.x+1`) with fix
- [ ] Mark GitHub Release as "This release has known issues - use v8.x.x-1 instead"
5. **User Communication**
- [ ] Post issue explaining problem and rollback
- [ ] Update README with rollback instructions
- [ ] Pin issue to repository
- [ ] Post in Discussions with migration path
6. **Post-Rollback Analysis**
- [ ] Document what went wrong in post-mortem
- [ ] Add regression test to prevent recurrence
- [ ] Update this checklist with lessons learned
- [ ] Review release testing procedures
**Recovery Timeline:**
- Hour 1: Identify issue, create GitHub issue, begin rollback
- Hour 2-4: Complete rollback, verify previous version works
- Hour 4-24: Investigate root cause, prepare hotfix
- Day 2: Release hotfix with comprehensive tests
- Week 1: Post-mortem, update testing procedures
---
This checklist must be completed for every release to prevent critical bugs from reaching users.
```
--------------------------------------------------------------------------------
/scripts/sync/sync_memory_backends.py:
--------------------------------------------------------------------------------
```python
#!/usr/bin/env python3
# Copyright 2024 Heinrich Krupp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Bidirectional sync script for MCP Memory Service backends.
Syncs memories between Cloudflare (primary) and SQLite-vec (backup).
"""
import sys
import os
import asyncio
import logging
import argparse
import hashlib
from pathlib import Path
from typing import List, Dict, Any, Tuple
from datetime import datetime
# Add src directory to path so we can import from the mcp_memory_service package
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
from mcp_memory_service.config import (
CLOUDFLARE_API_TOKEN, CLOUDFLARE_ACCOUNT_ID, CLOUDFLARE_VECTORIZE_INDEX,
CLOUDFLARE_D1_DATABASE_ID, BASE_DIR
)
from mcp_memory_service.models.memory import Memory
from mcp_memory_service.storage.cloudflare import CloudflareStorage
from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
)
logger = logging.getLogger("memory_sync")
class MemorySync:
"""Handles bidirectional sync between Cloudflare and SQLite-vec backends."""
def __init__(self, sqlite_path: str = None):
"""Initialize sync with storage backends."""
self.sqlite_path = sqlite_path or os.path.join(BASE_DIR, 'backup_sqlite_vec.db')
# Initialize storage backends
self.cloudflare = CloudflareStorage(
api_token=CLOUDFLARE_API_TOKEN,
account_id=CLOUDFLARE_ACCOUNT_ID,
vectorize_index=CLOUDFLARE_VECTORIZE_INDEX,
d1_database_id=CLOUDFLARE_D1_DATABASE_ID
)
self.sqlite_vec = SqliteVecMemoryStorage(self.sqlite_path)
async def get_all_memories_from_backend(self, backend_name: str) -> List[Dict[str, Any]]:
"""Get all memories from a specific backend."""
if backend_name == 'cloudflare':
backend = self.cloudflare
elif backend_name == 'sqlite_vec':
backend = self.sqlite_vec
else:
raise ValueError(f"Unknown backend: {backend_name}")
try:
# Get all memories from the backend
memories_list = await backend.get_all_memories()
memories = []
for memory in memories_list:
memory_dict = {
'content_hash': memory.content_hash,
'content': memory.content,
'metadata': memory.metadata,
'tags': memory.tags,
'memory_type': memory.memory_type,
'created_at': memory.created_at,
'created_at_iso': memory.created_at_iso,
'updated_at': memory.updated_at,
'updated_at_iso': memory.updated_at_iso,
}
memories.append(memory_dict)
logger.info(f"Retrieved {len(memories)} memories from {backend_name}")
return memories
except Exception as e:
logger.error(f"Error retrieving memories from {backend_name}: {e}")
return []
def calculate_content_hash(self, content: str, metadata: Dict[str, Any]) -> str:
"""Calculate a hash for memory content to detect duplicates."""
# Create a consistent string representation
content_str = f"{content}_{sorted(metadata.items())}"
return hashlib.sha256(content_str.encode()).hexdigest()[:16]
async def _sync_between_backends(self, source_backend: str, target_backend: str, dry_run: bool = False) -> Tuple[int, int]:
"""
Generic method to sync memories between any two backends.
Args:
source_backend: Backend to sync from ('cloudflare' or 'sqlite_vec')
target_backend: Backend to sync to ('cloudflare' or 'sqlite_vec')
dry_run: If True, only show what would be synced without making changes
Returns:
Tuple of (added_count, skipped_count)
"""
logger.info(f"Starting sync from {source_backend} to {target_backend}...")
# Get memories from both backends
source_memories = await self.get_all_memories_from_backend(source_backend)
target_memories = await self.get_all_memories_from_backend(target_backend)
# Create hash sets for quick lookup
target_hashes = {mem['content_hash'] for mem in target_memories if mem.get('content_hash')}
target_content_hashes = {
self.calculate_content_hash(mem['content'], mem['metadata'])
for mem in target_memories
}
added_count = 0
skipped_count = 0
# Get target backend instance for storing memories
target_storage = self.cloudflare if target_backend == 'cloudflare' else self.sqlite_vec
for source_memory in source_memories:
# Check if memory already exists (by hash or content)
content_hash = self.calculate_content_hash(source_memory['content'], source_memory['metadata'])
if (source_memory.get('content_hash') in target_hashes or
content_hash in target_content_hashes):
skipped_count += 1
continue
if not dry_run:
try:
memory_obj = Memory(
content=source_memory['content'],
content_hash=source_memory['content_hash'],
tags=source_memory.get('tags', []),
metadata=source_memory.get('metadata', {}),
memory_type=source_memory.get('memory_type'),
created_at=source_memory.get('created_at'),
updated_at=source_memory.get('updated_at'),
)
success, message = await target_storage.store(memory_obj)
if success:
added_count += 1
logger.debug(f"Added memory: {source_memory['content_hash'][:8]}...")
else:
logger.warning(f"Failed to store memory {source_memory['content_hash']}: {message}")
except Exception as e:
logger.error(f"Error storing memory {source_memory['content_hash']}: {e}")
else:
added_count += 1
logger.info(f"{source_backend} → {target_backend}: {added_count} added, {skipped_count} skipped")
return added_count, skipped_count
async def sync_cloudflare_to_sqlite(self, dry_run: bool = False) -> Tuple[int, int]:
"""Sync memories from Cloudflare to SQLite-vec."""
return await self._sync_between_backends('cloudflare', 'sqlite_vec', dry_run)
async def sync_sqlite_to_cloudflare(self, dry_run: bool = False) -> Tuple[int, int]:
"""Sync memories from SQLite-vec to Cloudflare."""
return await self._sync_between_backends('sqlite_vec', 'cloudflare', dry_run)
async def bidirectional_sync(self, dry_run: bool = False) -> Dict[str, Tuple[int, int]]:
"""Perform bidirectional sync between backends."""
logger.info("Starting bidirectional sync...")
results = {}
# Sync Cloudflare → SQLite-vec
cf_to_sqlite = await self.sync_cloudflare_to_sqlite(dry_run)
results['cloudflare_to_sqlite'] = cf_to_sqlite
# Sync SQLite-vec → Cloudflare
sqlite_to_cf = await self.sync_sqlite_to_cloudflare(dry_run)
results['sqlite_to_cloudflare'] = sqlite_to_cf
logger.info("Bidirectional sync completed")
return results
async def get_sync_status(self) -> Dict[str, Any]:
"""Get sync status showing memory counts in both backends."""
cf_memories = await self.get_all_memories_from_backend('cloudflare')
sqlite_memories = await self.get_all_memories_from_backend('sqlite_vec')
status = {
'cloudflare_count': len(cf_memories),
'sqlite_vec_count': len(sqlite_memories),
'sync_time': datetime.now().isoformat(),
'backends_configured': {
'cloudflare': bool(CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID),
'sqlite_vec': os.path.exists(self.sqlite_path) if self.sqlite_path else False
}
}
return status
async def main():
"""Main function to run memory sync operations."""
parser = argparse.ArgumentParser(description='Sync memories between Cloudflare and SQLite-vec backends')
parser.add_argument('--direction', choices=['cf-to-sqlite', 'sqlite-to-cf', 'bidirectional'],
default='bidirectional', help='Sync direction')
parser.add_argument('--dry-run', action='store_true', help='Show what would be synced without actually syncing')
parser.add_argument('--status', action='store_true', help='Show sync status only')
parser.add_argument('--sqlite-path', help='Path to SQLite-vec database file')
parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging')
args = parser.parse_args()
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
# Initialize sync
sync = MemorySync(sqlite_path=args.sqlite_path)
try:
if args.status:
status = await sync.get_sync_status()
print(f"\n=== Memory Sync Status ===")
print(f"Cloudflare memories: {status['cloudflare_count']}")
print(f"SQLite-vec memories: {status['sqlite_vec_count']}")
print(f"Cloudflare configured: {status['backends_configured']['cloudflare']}")
print(f"SQLite-vec file exists: {status['backends_configured']['sqlite_vec']}")
print(f"Last check: {status['sync_time']}")
return
logger.info(f"=== Starting memory sync ({args.direction}) ===")
if args.dry_run:
logger.info("DRY RUN MODE - No changes will be made")
if args.direction == 'cf-to-sqlite':
added, skipped = await sync.sync_cloudflare_to_sqlite(dry_run=args.dry_run)
print(f"Cloudflare → SQLite-vec: {added} added, {skipped} skipped")
elif args.direction == 'sqlite-to-cf':
added, skipped = await sync.sync_sqlite_to_cloudflare(dry_run=args.dry_run)
print(f"SQLite-vec → Cloudflare: {added} added, {skipped} skipped")
else: # bidirectional
results = await sync.bidirectional_sync(dry_run=args.dry_run)
cf_to_sqlite = results['cloudflare_to_sqlite']
sqlite_to_cf = results['sqlite_to_cloudflare']
print(f"Cloudflare → SQLite-vec: {cf_to_sqlite[0]} added, {cf_to_sqlite[1]} skipped")
print(f"SQLite-vec → Cloudflare: {sqlite_to_cf[0]} added, {sqlite_to_cf[1]} skipped")
logger.info("=== Sync completed successfully ===")
except Exception as e:
logger.error(f"Sync failed: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())
```
--------------------------------------------------------------------------------
/claude-hooks/utilities/project-detector.js:
--------------------------------------------------------------------------------
```javascript
/**
* Project Context Detection Utility
* Analyzes the current directory to determine project type, language, and context
*/
const fs = require('fs').promises;
const path = require('path');
const { execSync } = require('child_process');
/**
* Detect programming language from file extensions
*/
async function detectLanguage(directory) {
try {
const files = await fs.readdir(directory, { withFileTypes: true });
const extensions = new Map();
// Count file extensions
for (const file of files) {
if (file.isFile()) {
const ext = path.extname(file.name).toLowerCase();
if (ext) {
extensions.set(ext, (extensions.get(ext) || 0) + 1);
}
}
}
// Language detection rules
const languageMap = {
'.js': 'JavaScript',
'.ts': 'TypeScript',
'.jsx': 'React/JavaScript',
'.tsx': 'React/TypeScript',
'.py': 'Python',
'.rs': 'Rust',
'.go': 'Go',
'.java': 'Java',
'.cpp': 'C++',
'.c': 'C',
'.cs': 'C#',
'.php': 'PHP',
'.rb': 'Ruby',
'.swift': 'Swift',
'.kt': 'Kotlin',
'.scala': 'Scala',
'.sh': 'Shell',
'.md': 'Documentation'
};
// Find most common language extension
let primaryLanguage = 'Unknown';
let maxCount = 0;
for (const [ext, count] of extensions.entries()) {
if (languageMap[ext] && count > maxCount) {
maxCount = count;
primaryLanguage = languageMap[ext];
}
}
return {
primary: primaryLanguage,
extensions: Object.fromEntries(extensions),
confidence: maxCount > 0 ? Math.min(maxCount / 10, 1) : 0
};
} catch (error) {
return { primary: 'Unknown', extensions: {}, confidence: 0 };
}
}
/**
* Detect framework and tools from configuration files
*/
async function detectFramework(directory) {
const frameworks = [];
const tools = [];
try {
const files = await fs.readdir(directory);
// Check for common configuration files
const configFiles = {
'package.json': async () => {
const pkg = JSON.parse(await fs.readFile(path.join(directory, 'package.json'), 'utf8'));
// Check dependencies for frameworks
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
if (deps.react || deps['@types/react']) frameworks.push('React');
if (deps.vue || deps['@vue/cli']) frameworks.push('Vue.js');
if (deps.angular || deps['@angular/core']) frameworks.push('Angular');
if (deps.next || deps['next']) frameworks.push('Next.js');
if (deps.express || deps['express']) frameworks.push('Express.js');
if (deps.fastify) frameworks.push('Fastify');
if (deps.svelte || deps['svelte']) frameworks.push('Svelte');
tools.push('npm');
return pkg.name || 'node-project';
},
'pyproject.toml': async () => {
tools.push('Python');
const content = await fs.readFile(path.join(directory, 'pyproject.toml'), 'utf8');
// Extract project name from pyproject.toml
const nameMatch = content.match(/^name\s*=\s*["']([^"']+)["']/m);
if (content.includes('django')) frameworks.push('Django');
if (content.includes('flask')) frameworks.push('Flask');
if (content.includes('fastapi')) frameworks.push('FastAPI');
if (content.includes('pytest')) tools.push('pytest');
if (content.includes('poetry')) tools.push('Poetry');
return nameMatch ? nameMatch[1] : 'python-project';
},
'Cargo.toml': async () => {
tools.push('Cargo');
const content = await fs.readFile(path.join(directory, 'Cargo.toml'), 'utf8');
const nameMatch = content.match(/^name\s*=\s*["']([^"']+)["']/m);
if (content.includes('actix-web')) frameworks.push('Actix Web');
if (content.includes('rocket')) frameworks.push('Rocket');
if (content.includes('warp')) frameworks.push('Warp');
if (content.includes('tokio')) frameworks.push('Tokio');
return nameMatch ? nameMatch[1] : 'rust-project';
},
'go.mod': async () => {
tools.push('Go Modules');
const content = await fs.readFile(path.join(directory, 'go.mod'), 'utf8');
const moduleMatch = content.match(/^module\s+(.+)$/m);
if (content.includes('gin-gonic/gin')) frameworks.push('Gin');
if (content.includes('gorilla/mux')) frameworks.push('Gorilla Mux');
if (content.includes('fiber')) frameworks.push('Fiber');
return moduleMatch ? path.basename(moduleMatch[1]) : 'go-project';
},
'pom.xml': () => {
tools.push('Maven');
frameworks.push('Java/Maven');
return 'java-maven-project';
},
'build.gradle': () => {
tools.push('Gradle');
frameworks.push('Java/Gradle');
return 'java-gradle-project';
},
'docker-compose.yml': () => {
tools.push('Docker Compose');
return null;
},
'Dockerfile': () => {
tools.push('Docker');
return null;
},
'.env': () => {
tools.push('Environment Config');
return null;
}
};
let projectName = null;
for (const file of files) {
if (configFiles[file]) {
const result = await configFiles[file]();
if (result && !projectName) {
projectName = result;
}
}
}
return {
frameworks,
tools,
projectName
};
} catch (error) {
return { frameworks: [], tools: [], projectName: null };
}
}
/**
* Get Git repository information
*/
function getGitInfo(directory) {
try {
const gitDir = path.join(directory, '.git');
// Check if this is a git repository
const isGitRepo = require('fs').existsSync(gitDir);
if (!isGitRepo) {
return { isRepo: false };
}
// Get repository information
const remoteBranch = execSync('git branch --show-current', { cwd: path.resolve(directory), encoding: 'utf8' }).trim();
const remoteUrl = execSync('git config --get remote.origin.url', { cwd: path.resolve(directory), encoding: 'utf8' }).trim();
const lastCommit = execSync('git log -1 --pretty=format:"%h %s"', { cwd: path.resolve(directory), encoding: 'utf8' }).trim();
// Extract repository name from URL
let repoName = 'unknown-repo';
if (remoteUrl) {
const match = remoteUrl.match(/([^\/]+)(?:\.git)?$/);
if (match) {
repoName = match[1].replace('.git', '');
}
}
return {
isRepo: true,
branch: remoteBranch,
remoteUrl,
repoName,
lastCommit
};
} catch (error) {
return { isRepo: false, error: error.message };
}
}
// ANSI Colors for console output
const COLORS = {
RESET: '\x1b[0m',
BRIGHT: '\x1b[1m',
DIM: '\x1b[2m',
CYAN: '\x1b[36m',
GREEN: '\x1b[32m',
BLUE: '\x1b[34m',
YELLOW: '\x1b[33m',
GRAY: '\x1b[90m',
RED: '\x1b[31m'
};
/**
* Main project context detection function with enhanced visual output
*/
async function detectProjectContext(directory = process.cwd()) {
try {
const directoryName = path.basename(directory);
console.log(`${COLORS.BLUE}📂 Project Detector${COLORS.RESET} ${COLORS.DIM}→${COLORS.RESET} Analyzing ${COLORS.BRIGHT}${directoryName}${COLORS.RESET}`);
// Get basic directory information
// Detect language
const language = await detectLanguage(directory);
// Detect framework and tools
const framework = await detectFramework(directory);
// Get Git information
const git = getGitInfo(directory);
// Determine project name (priority: git repo > config file > directory name)
const projectName = framework.projectName || git.repoName || directoryName;
// Calculate confidence score
let confidence = 0.5; // Base confidence
if (git.isRepo) confidence += 0.3;
if (framework.frameworks.length > 0) confidence += 0.2;
if (language.confidence > 0.5) confidence += language.confidence * 0.3;
const context = {
name: projectName,
directory,
language: language.primary,
languageDetails: language,
frameworks: framework.frameworks,
tools: framework.tools,
git: git,
confidence: Math.min(confidence, 1.0),
metadata: {
detectedAt: new Date().toISOString(),
analyzer: 'claude-hooks-project-detector',
version: '1.1.0'
}
};
// Enhanced output with confidence indication
const confidencePercent = (context.confidence * 100).toFixed(0);
const confidenceColor = context.confidence > 0.8 ? COLORS.GREEN :
context.confidence > 0.6 ? COLORS.YELLOW : COLORS.GRAY;
console.log(`${COLORS.BLUE}📊 Detection Result${COLORS.RESET} ${COLORS.DIM}→${COLORS.RESET} ${COLORS.BRIGHT}${context.name}${COLORS.RESET} ${COLORS.GRAY}(${context.language})${COLORS.RESET} ${COLORS.DIM}•${COLORS.RESET} ${confidenceColor}${confidencePercent}%${COLORS.RESET}`);
return context;
} catch (error) {
console.error(`${COLORS.RED}❌ Project Detector Error${COLORS.RESET} ${COLORS.DIM}→${COLORS.RESET} ${error.message}`);
// Return minimal context on error
return {
name: path.basename(directory),
directory,
language: 'Unknown',
frameworks: [],
tools: [],
confidence: 0.1,
error: error.message
};
}
}
module.exports = {
detectProjectContext,
detectLanguage,
detectFramework,
getGitInfo
};
// Direct execution support for testing
if (require.main === module) {
detectProjectContext(process.cwd())
.then(context => {
console.log('\n=== PROJECT CONTEXT ===');
console.log(JSON.stringify(context, null, 2));
console.log('=== END CONTEXT ===\n');
})
.catch(error => console.error('Detection failed:', error));
}
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/sync/litestream_config.py:
--------------------------------------------------------------------------------
```python
# Copyright 2024 Heinrich Krupp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Litestream configuration management for database synchronization.
"""
import yaml
import logging
import platform
from pathlib import Path
from typing import Dict, Any, Optional, List
logger = logging.getLogger(__name__)
class LitestreamManager:
"""
Manages Litestream configuration for SQLite database replication.
Provides utilities to generate configuration files for different
deployment scenarios and machine types.
"""
def __init__(self):
"""Initialize the Litestream manager."""
self.platform = platform.system().lower()
def generate_master_config(
self,
db_path: Path,
replica_endpoint: str,
backup_path: Optional[Path] = None,
checkpoint_interval: str = "30s",
wal_retention: str = "10m"
) -> Dict[str, Any]:
"""
Generate Litestream configuration for master database.
Args:
db_path: Path to the SQLite database
replica_endpoint: Endpoint where replicas can access the stream
backup_path: Optional local backup path
checkpoint_interval: How often to checkpoint
wal_retention: How long to retain WAL entries
Returns:
Litestream configuration dictionary
"""
config = {
"dbs": [{
"path": str(db_path),
"replicas": [],
"checkpoint-interval": checkpoint_interval,
"wal-retention": wal_retention
}]
}
db_config = config["dbs"][0]
# Add HTTP replica endpoint
if replica_endpoint:
db_config["replicas"].append({
"type": "file",
"path": replica_endpoint,
"sync-interval": "10s"
})
# Add local backup if specified
if backup_path:
db_config["replicas"].append({
"type": "file",
"path": str(backup_path),
"sync-interval": "1m"
})
return config
def generate_replica_config(
self,
db_path: Path,
upstream_url: str,
sync_interval: str = "10s"
) -> Dict[str, Any]:
"""
Generate Litestream configuration for replica database.
Args:
db_path: Local path for the replicated database
upstream_url: URL of the master database stream
sync_interval: How often to sync from upstream
Returns:
Litestream configuration dictionary
"""
config = {
"dbs": [{
"path": str(db_path),
"replicas": [{
"type": "file",
"url": upstream_url,
"sync-interval": sync_interval
}]
}]
}
return config
def generate_s3_config(
self,
db_path: Path,
s3_endpoint: str,
bucket: str,
path: str,
access_key: Optional[str] = None,
secret_key: Optional[str] = None,
is_master: bool = True
) -> Dict[str, Any]:
"""
Generate Litestream configuration for S3-compatible storage.
Args:
db_path: Path to the SQLite database
s3_endpoint: S3-compatible endpoint URL
bucket: S3 bucket name
path: Path within the bucket
access_key: S3 access key (optional, can use env vars)
secret_key: S3 secret key (optional, can use env vars)
is_master: Whether this is the master or replica
Returns:
Litestream configuration dictionary
"""
replica_config = {
"type": "s3",
"endpoint": s3_endpoint,
"bucket": bucket,
"path": path
}
# Add credentials if provided
if access_key and secret_key:
replica_config.update({
"access-key-id": access_key,
"secret-access-key": secret_key
})
if is_master:
config = {
"dbs": [{
"path": str(db_path),
"replicas": [replica_config],
"checkpoint-interval": "30s",
"wal-retention": "10m"
}]
}
else:
config = {
"dbs": [{
"path": str(db_path),
"replicas": [replica_config]
}]
}
return config
def get_default_config_path(self) -> Path:
"""Get the default Litestream configuration file path for this platform."""
if self.platform == "windows":
return Path("C:/ProgramData/litestream/litestream.yml")
elif self.platform == "darwin": # macOS
return Path("/usr/local/etc/litestream.yml")
else: # Linux
return Path("/etc/litestream.yml")
def write_config(self, config: Dict[str, Any], config_path: Optional[Path] = None) -> Path:
"""
Write Litestream configuration to file.
Args:
config: Configuration dictionary
config_path: Path to write config file (uses default if not provided)
Returns:
Path where configuration was written
"""
if config_path is None:
config_path = self.get_default_config_path()
# Create parent directory if needed
config_path.parent.mkdir(parents=True, exist_ok=True)
# Write YAML configuration
with open(config_path, 'w') as f:
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
logger.info(f"Litestream configuration written to {config_path}")
return config_path
def generate_systemd_service(self, config_path: Path) -> str:
"""
Generate systemd service file content for Litestream.
Args:
config_path: Path to the Litestream configuration file
Returns:
Systemd service file content
"""
service_content = f"""[Unit]
Description=Litestream replication service
After=network.target
StartLimitIntervalSec=0
[Service]
Type=simple
Restart=always
RestartSec=1
User=root
ExecStart=/usr/local/bin/litestream replicate -config {config_path}
[Install]
WantedBy=multi-user.target
"""
return service_content
def generate_launchd_plist(self, config_path: Path) -> str:
"""
Generate macOS LaunchDaemon plist for Litestream.
Args:
config_path: Path to the Litestream configuration file
Returns:
LaunchDaemon plist content
"""
plist_content = f"""<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>io.litestream.replication</string>
<key>ProgramArguments</key>
<array>
<string>/usr/local/bin/litestream</string>
<string>replicate</string>
<string>-config</string>
<string>{config_path}</string>
</array>
<key>RunAtLoad</key>
<true/>
<key>KeepAlive</key>
<true/>
<key>StandardOutPath</key>
<string>/var/log/litestream.log</string>
<key>StandardErrorPath</key>
<string>/var/log/litestream.log</string>
</dict>
</plist>
"""
return plist_content
def get_installation_commands(self) -> List[str]:
"""
Get platform-specific Litestream installation commands.
Returns:
List of commands to install Litestream
"""
if self.platform == "windows":
return [
"# Download and install Litestream for Windows",
"# Visit: https://github.com/benbjohnson/litestream/releases",
"# Extract litestream.exe to C:\\Program Files\\Litestream\\",
"# Add to PATH environment variable"
]
elif self.platform == "darwin": # macOS
return [
"brew install benbjohnson/litestream/litestream"
]
else: # Linux
return [
"curl -LsS https://github.com/benbjohnson/litestream/releases/latest/download/litestream-linux-amd64.tar.gz | tar -xzf -",
"sudo mv litestream /usr/local/bin/",
"sudo chmod +x /usr/local/bin/litestream"
]
def generate_deployment_script(
self,
role: str, # "master" or "replica"
db_path: Path,
replica_endpoint: Optional[str] = None,
upstream_url: Optional[str] = None
) -> str:
"""
Generate a deployment script for setting up Litestream.
Args:
role: Whether this is a "master" or "replica"
db_path: Path to the SQLite database
replica_endpoint: Endpoint for serving replicas (master only)
upstream_url: URL of master stream (replica only)
Returns:
Shell script content for deployment
"""
install_commands = self.get_installation_commands()
script_lines = [
"#!/bin/bash",
"# Litestream deployment script",
f"# Role: {role}",
"",
"set -e",
"",
"echo 'Installing Litestream...'",
]
script_lines.extend(install_commands)
script_lines.extend([
"",
"echo 'Generating configuration...'",
])
if role == "master":
script_lines.extend([
f"# Master configuration for {db_path}",
f"# Serving replicas at: {replica_endpoint}",
])
else:
script_lines.extend([
f"# Replica configuration for {db_path}",
f"# Syncing from: {upstream_url}",
])
script_lines.extend([
"",
"echo 'Starting Litestream service...'",
])
if self.platform == "linux":
script_lines.extend([
"sudo systemctl enable litestream",
"sudo systemctl start litestream",
"sudo systemctl status litestream",
])
elif self.platform == "darwin":
script_lines.extend([
"sudo launchctl load /Library/LaunchDaemons/io.litestream.replication.plist",
"sudo launchctl start io.litestream.replication",
])
script_lines.extend([
"",
"echo 'Litestream deployment completed!'",
""
])
return "\n".join(script_lines)
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/ingestion/json_loader.py:
--------------------------------------------------------------------------------
```python
# Copyright 2024 Heinrich Krupp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
JSON document loader for structured data files.
"""
import json
import logging
from pathlib import Path
from typing import AsyncGenerator, Dict, Any, Union, List
import asyncio
from .base import DocumentLoader, DocumentChunk
from .chunker import TextChunker, ChunkingStrategy
logger = logging.getLogger(__name__)
class JSONLoader(DocumentLoader):
"""
Document loader for JSON data files.
Features:
- Flattens nested JSON structures to searchable text
- Preserves key-value context (e.g., "config.database.host: localhost")
- Handles arrays and nested objects recursively
- Supports configurable flattening strategies
"""
def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
"""
Initialize JSON loader.
Args:
chunk_size: Target size for text chunks in characters
chunk_overlap: Number of characters to overlap between chunks
"""
super().__init__(chunk_size, chunk_overlap)
self.supported_extensions = ['json']
self.chunker = TextChunker(ChunkingStrategy(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
respect_paragraph_boundaries=False, # JSON doesn't have paragraphs
respect_sentence_boundaries=False, # JSON doesn't have sentences
min_chunk_size=10 # Allow smaller chunks for structured data
))
def can_handle(self, file_path: Path) -> bool:
"""
Check if this loader can handle the given JSON file.
Args:
file_path: Path to the file to check
Returns:
True if this loader can process the JSON file
"""
if not file_path.exists() or not file_path.is_file():
return False
extension = file_path.suffix.lower().lstrip('.')
return extension in self.supported_extensions
async def extract_chunks(self, file_path: Path, **kwargs) -> AsyncGenerator[DocumentChunk, None]:
"""
Extract text chunks from a JSON file.
Args:
file_path: Path to the JSON file
**kwargs: Additional options:
- flatten_strategy: How to flatten nested structures ('dot_notation', 'bracket_notation')
- max_depth: Maximum nesting depth to flatten (default: unlimited)
- include_types: Whether to include value types in flattened text (default: False)
- array_handling: How to handle arrays ('expand', 'summarize', 'flatten')
Yields:
DocumentChunk objects containing extracted text and metadata
Raises:
FileNotFoundError: If the JSON file doesn't exist
ValueError: If the JSON file can't be parsed or processed
"""
await self.validate_file(file_path)
flatten_strategy = kwargs.get('flatten_strategy', 'dot_notation')
max_depth = kwargs.get('max_depth', None)
include_types = kwargs.get('include_types', False)
array_handling = kwargs.get('array_handling', 'expand')
logger.info(f"Extracting chunks from JSON file: {file_path}")
try:
# Read and parse JSON
data, encoding = await self._read_json_file(file_path)
# Flatten the JSON structure
flattened_text = self._flatten_json(
data,
flatten_strategy=flatten_strategy,
max_depth=max_depth,
include_types=include_types,
array_handling=array_handling
)
# Create base metadata
base_metadata = self.get_base_metadata(file_path)
base_metadata.update({
'encoding': encoding,
'content_type': 'json',
'flatten_strategy': flatten_strategy,
'array_handling': array_handling,
'include_types': include_types,
'max_depth': max_depth,
'original_keys_count': self._count_keys(data),
'flattened_text_length': len(flattened_text)
})
# Chunk the flattened text
chunks = self.chunker.chunk_text(flattened_text, base_metadata)
for i, (chunk_text, chunk_metadata) in enumerate(chunks):
yield DocumentChunk(
content=chunk_text,
metadata=chunk_metadata,
chunk_index=i,
source_file=file_path
)
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON in file {file_path}: {str(e)}")
raise ValueError(f"Invalid JSON format: {str(e)}") from e
except Exception as e:
logger.error(f"Error extracting from JSON file {file_path}: {type(e).__name__} - {str(e)}")
raise ValueError(f"Failed to extract JSON content: {str(e)}") from e
async def _read_json_file(self, file_path: Path) -> tuple:
"""
Read and parse JSON file.
Args:
file_path: Path to the JSON file
Returns:
Tuple of (parsed_data, encoding_used)
"""
def _read_sync():
# Try UTF-8 first (most common for JSON)
try:
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
data = json.loads(content)
return data, 'utf-8'
except UnicodeDecodeError:
# Fallback to other encodings
encodings_to_try = ['utf-16', 'utf-32', 'latin-1']
for encoding in encodings_to_try:
try:
with open(file_path, 'r', encoding=encoding) as file:
content = file.read()
data = json.loads(content)
return data, encoding
except UnicodeDecodeError:
continue
except json.JSONDecodeError:
continue
# Last resort with error replacement
with open(file_path, 'r', encoding='utf-8', errors='replace') as file:
content = file.read()
data = json.loads(content)
return data, 'utf-8'
# Run file reading in thread pool
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, _read_sync)
def _flatten_json(
self,
data: Any,
prefix: str = "",
flatten_strategy: str = 'dot_notation',
max_depth: int = None,
current_depth: int = 0,
include_types: bool = False,
array_handling: str = 'expand'
) -> str:
"""
Flatten JSON structure to searchable text.
Args:
data: JSON data to flatten
prefix: Current key prefix
flatten_strategy: Flattening strategy
max_depth: Maximum depth to flatten
current_depth: Current nesting depth
include_types: Whether to include value types
array_handling: How to handle arrays
Returns:
Flattened text representation
"""
if max_depth is not None and current_depth >= max_depth:
return f"{prefix}: [nested structure truncated at depth {max_depth}]\n"
lines = []
if isinstance(data, dict):
for key, value in data.items():
new_prefix = self._build_prefix(prefix, key, flatten_strategy)
flattened_value = self._flatten_json(
value, new_prefix, flatten_strategy, max_depth,
current_depth + 1, include_types, array_handling
)
lines.append(flattened_value)
elif isinstance(data, list):
if array_handling == 'summarize':
lines.append(f"{prefix}: [array with {len(data)} items]\n")
elif array_handling == 'flatten':
# Flatten all array items with indexed keys
for i, item in enumerate(data):
new_prefix = self._build_prefix(prefix, str(i), flatten_strategy)
flattened_item = self._flatten_json(
item, new_prefix, flatten_strategy, max_depth,
current_depth + 1, include_types, array_handling
)
lines.append(flattened_item)
else: # 'expand' - default
# Expand arrays as separate entries
for i, item in enumerate(data):
if isinstance(item, (dict, list)):
new_prefix = f"{prefix}[{i}]"
flattened_item = self._flatten_json(
item, new_prefix, flatten_strategy, max_depth,
current_depth + 1, include_types, array_handling
)
lines.append(flattened_item)
else:
# Simple values in arrays
type_info = f" ({type(item).__name__})" if include_types else ""
lines.append(f"{prefix}[{i}]: {item}{type_info}\n")
else:
# Primitive values
type_info = f" ({type(data).__name__})" if include_types else ""
lines.append(f"{prefix}: {data}{type_info}\n")
return "".join(lines)
def _build_prefix(self, current_prefix: str, key: str, strategy: str) -> str:
"""
Build the prefix for nested keys.
Args:
current_prefix: Current prefix
key: Key to add
strategy: Flattening strategy
Returns:
New prefix string
"""
if not current_prefix:
return key
if strategy == 'bracket_notation':
return f"{current_prefix}[{key}]"
else: # 'dot_notation' - default
return f"{current_prefix}.{key}"
def _count_keys(self, data: Any) -> int:
"""
Count total number of keys in JSON structure.
Args:
data: JSON data to count keys in
Returns:
Total number of keys
"""
if isinstance(data, dict):
count = len(data)
for value in data.values():
count += self._count_keys(value)
return count
elif isinstance(data, list):
count = 0
for item in data:
count += self._count_keys(item)
return count
else:
return 0
# Register the JSON loader
def _register_json_loader():
"""Register JSON loader with the registry."""
try:
from .registry import register_loader
register_loader(JSONLoader, ['json'])
logger.debug("JSON loader registered successfully")
except ImportError:
logger.debug("Registry not available during import")
# Auto-register when module is imported
_register_json_loader()
```
--------------------------------------------------------------------------------
/claude-hooks/README-NATURAL-TRIGGERS.md:
--------------------------------------------------------------------------------
```markdown
# Natural Memory Triggers for Claude Code
🧠 **Intelligent mid-conversation memory awareness with performance optimization**
## Overview
The Natural Memory Triggers system provides seamless, intelligent memory awareness during conversations that feels like Claude naturally "remembers" rather than executing explicit system hooks. It uses multi-tiered performance architecture to balance memory intelligence with responsiveness.
> **🎯 v8.5.1 NEW**: **Dynamic Memory Weight Adjustment** - Intelligent auto-calibration automatically detects when memories are stale and adjusts scoring weights to prioritize recent work. No more outdated context!
## Key Features
### 🎯 **Natural Language Pattern Detection**
- **Explicit Memory Requests**: "What did we decide about...?", "Remind me how we..."
- **Past Work References**: "Similar to what we did...", "Like we discussed before..."
- **Technical Discussions**: Architecture, security, database topics that benefit from context
- **Project Continuity**: "Continue with...", "Next step...", problem-solving patterns
### ⚡ **Performance-Aware Architecture**
- **Tiered Processing**: Instant (< 50ms), Fast (< 150ms), Intensive (< 500ms)
- **Smart Performance Profiles**: Speed-focused, Balanced, Memory-aware, Adaptive
- **Automatic Degradation**: Gracefully reduces complexity when performance budgets are exceeded
- **User-Configurable Trade-offs**: Full control over speed vs intelligence balance
### 🔄 **Adaptive Learning**
- **User Preference Learning**: Adapts to user tolerance for latency vs memory awareness
- **Pattern Confidence Adjustment**: Learns which patterns are most valuable to the user
- **Context-Aware Triggering**: Considers project context, conversation history, and topic shifts
## Quick Start
### Installation
The system is integrated into the existing Claude Code hooks. No additional installation required.
### Basic Usage
```bash
# Check current status
node claude-hooks/memory-mode-controller.js status
# Switch to balanced mode (recommended)
node claude-hooks/memory-mode-controller.js profile balanced
# Enable natural triggers
node claude-hooks/memory-mode-controller.js enable
```
### Performance Profiles
Choose the profile that best matches your preferences:
```bash
# Fastest response, minimal memory awareness (< 100ms)
node claude-hooks/memory-mode-controller.js profile speed_focused
# Moderate latency, smart triggers (< 200ms) - RECOMMENDED
node claude-hooks/memory-mode-controller.js profile balanced
# Full memory awareness, accept higher latency (< 500ms)
node claude-hooks/memory-mode-controller.js profile memory_aware
# Auto-adjust based on usage patterns
node claude-hooks/memory-mode-controller.js profile adaptive
```
## How It Works
### Trigger Detection
The system uses a three-tiered approach to detect when memory context would be helpful:
#### **Tier 1: Instant Detection (< 50ms)**
- Regex-based pattern matching for explicit memory requests
- Cache lookups for previously analyzed messages
- Simple keyword extraction for technical terms
#### **Tier 2: Fast Analysis (< 150ms)**
- Contextual analysis with project information
- Topic shift detection from conversation history
- Enhanced pattern matching with semantic context
#### **Tier 3: Intensive Analysis (< 500ms)**
- Deep semantic understanding (when available)
- Full conversation context analysis
- Complex pattern relationships
### Example Triggers
**Explicit Memory Requests** (High Confidence):
```
"What did we decide about the authentication approach?"
"Remind me how we handled user sessions"
"Remember when we discussed the database schema?"
```
**Past Work References** (Medium Confidence):
```
"Similar to what we implemented last time"
"Like we discussed in the previous meeting"
"The same approach we used for the API"
```
**Technical Discussions** (Context-Dependent):
```
"Let's design the authentication architecture"
"How should we handle database migrations?"
"What's our security strategy?"
```
## Configuration
### Basic Configuration
Edit `claude-hooks/config.json`:
```json
{
"naturalTriggers": {
"enabled": true,
"sensitivity": 0.7, // 0-1, higher = more sensitive
"triggerThreshold": 0.6, // 0-1, confidence needed to trigger
"cooldownPeriod": 30000, // Milliseconds between triggers
"maxMemoriesPerTrigger": 5, // Max memories to inject per trigger
"adaptiveLearning": true // Learn from user feedback
}
}
```
### Performance Profiles
Customize performance profiles in the configuration:
```json
{
"performance": {
"defaultProfile": "balanced",
"profiles": {
"speed_focused": {
"maxLatency": 100,
"enabledTiers": ["instant"],
"backgroundProcessing": false
},
"balanced": {
"maxLatency": 200,
"enabledTiers": ["instant", "fast"],
"backgroundProcessing": true
},
"memory_aware": {
"maxLatency": 500,
"enabledTiers": ["instant", "fast", "intensive"],
"backgroundProcessing": true
}
}
}
}
```
## Command Line Interface
### Memory Mode Controller
```bash
# Get current status and configuration
node claude-hooks/memory-mode-controller.js status
# Switch performance profiles
node claude-hooks/memory-mode-controller.js profile <speed_focused|balanced|memory_aware|adaptive>
# Adjust sensitivity (0-1, higher = more triggers)
node claude-hooks/memory-mode-controller.js sensitivity 0.8
# Adjust trigger threshold (0-1, higher = need more confidence)
node claude-hooks/memory-mode-controller.js threshold 0.7
# Enable/disable natural triggers
node claude-hooks/memory-mode-controller.js enable
node claude-hooks/memory-mode-controller.js disable
node claude-hooks/memory-mode-controller.js toggle
# List all available profiles
node claude-hooks/memory-mode-controller.js list
# Reset to defaults
node claude-hooks/memory-mode-controller.js reset
```
## Testing
Run the comprehensive test suite:
```bash
# Full test suite
node claude-hooks/test-natural-triggers.js
# Test dual protocol functionality
node claude-hooks/test-dual-protocol-hook.js
```
The test suite covers:
- Performance management and timing
- Pattern detection accuracy
- Conversation monitoring
- Integration testing
- Performance profile behavior
## Performance Optimization
### Latency Targets
| Profile | Target Latency | Use Case |
|---------|---------------|----------|
| Speed Focused | < 100ms | Priority on responsiveness |
| Balanced | < 200ms | Good balance (recommended) |
| Memory Aware | < 500ms | Maximum memory intelligence |
| Adaptive | Variable | Learns user preferences |
### Performance Monitoring
The system automatically tracks:
- Hook execution latency
- Pattern detection accuracy
- User acceptance rates
- Memory query performance
### Optimization Tips
1. **Start with Balanced Mode**: Good default for most users
2. **Monitor Performance**: Check status regularly to see average latencies
3. **Adjust Sensitivity**: Lower sensitivity = fewer false positives
4. **Use Cooldown Period**: Prevents excessive triggering
5. **Enable Learning**: Let the system adapt to your preferences
## Architecture
### Component Overview
```
┌─────────────────────────────────────────────────────────────┐
│ Mid-Conversation Hook │
├─────────────────────────────────────────────────────────────┤
│ ┌─────────────────┐ ┌──────────────────┐ ┌─────────────┐ │
│ │ Performance │ │ Conversation │ │ Pattern │ │
│ │ Manager │ │ Monitor │ │ Detector │ │
│ │ │ │ │ │ │ │
│ │ • Timing │ │ • Topic Extract │ │ • Regex │ │
│ │ • Profiles │ │ • Semantic Shift │ │ • Context │ │
│ │ • Learning │ │ • Caching │ │ • Learning │ │
│ └─────────────────┘ └──────────────────┘ └─────────────┘ │
└─────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────┐
│ Memory Client │
│ (Dual Protocol: HTTP + MCP) │
└─────────────────────────────────────────────────────────────┘
```
### Key Classes
- **`MidConversationHook`**: Main orchestrator for trigger detection and execution
- **`TieredConversationMonitor`**: Multi-tier conversation analysis with performance awareness
- **`AdaptivePatternDetector`**: Natural language pattern detection with learning
- **`PerformanceManager`**: Performance monitoring, budgeting, and profile management
- **`MemoryClient`**: Unified interface for HTTP and MCP memory operations
## Troubleshooting
### Common Issues
**Q: Triggers aren't firing when expected**
```bash
# Check if natural triggers are enabled
node claude-hooks/memory-mode-controller.js status
# Lower the trigger threshold
node claude-hooks/memory-mode-controller.js threshold 0.5
# Increase sensitivity
node claude-hooks/memory-mode-controller.js sensitivity 0.8
```
**Q: Performance is slower than expected**
```bash
# Switch to speed-focused mode
node claude-hooks/memory-mode-controller.js profile speed_focused
# Check current latency
node claude-hooks/memory-mode-controller.js status
```
**Q: Too many false positive triggers**
```bash
# Lower sensitivity
node claude-hooks/memory-mode-controller.js sensitivity 0.5
# Increase threshold
node claude-hooks/memory-mode-controller.js threshold 0.8
# Increase cooldown period (edit config.json)
```
### Debug Mode
Enable detailed logging:
```json
{
"logging": {
"level": "debug",
"enableDebug": true,
"logToFile": true
}
}
```
### Performance Analysis
Monitor hook performance:
```bash
# Check status for performance metrics
node claude-hooks/memory-mode-controller.js status
# Run performance tests
node claude-hooks/test-natural-triggers.js
```
## Integration with Claude Code
### Session Start Integration
The natural triggers work alongside the existing session start hooks:
1. **Session Start**: Loads initial memory context (existing functionality)
2. **Mid-Conversation**: Intelligently refreshes context when patterns suggest it's needed
3. **Adaptive Learning**: Learns from user interactions to improve trigger accuracy
### Memory Storage Integration
Uses the existing dual-protocol memory service:
- **HTTP Protocol**: Web-based memory service (https://localhost:8443)
- **MCP Protocol**: Direct server process communication
- **Smart Fallback**: Automatically switches protocols if one fails
## Roadmap
### Planned Enhancements
1. **Advanced Semantic Analysis**: Integration with more sophisticated NLP models
2. **Cross-Session Learning**: Remember user preferences across Claude Code sessions
3. **Project-Specific Patterns**: Learn patterns specific to different projects
4. **Real-time Performance Tuning**: Dynamic adjustment based on system resources
5. **Visual Performance Dashboard**: Web-based interface for monitoring and configuration
### Contributing
The natural triggers system is designed to be extensible:
1. **Custom Pattern Categories**: Add new pattern types in `AdaptivePatternDetector`
2. **Performance Profiles**: Define custom profiles in the configuration
3. **Integration Points**: Hook into additional Claude Code events
4. **Learning Algorithms**: Enhance the adaptive learning mechanisms
## License
This system is part of the MCP Memory Service project and follows the same licensing terms.
---
🧠 **The goal is to make memory awareness feel natural and seamless, like Claude simply "remembers" your conversations and project context.**
```
--------------------------------------------------------------------------------
/claude-hooks/utilities/performance-manager.js:
--------------------------------------------------------------------------------
```javascript
/**
* Performance Manager for Memory Hooks
* Provides intelligent performance monitoring and adaptive hook management
*/
class PerformanceManager {
constructor(config = {}) {
this.config = config;
this.metrics = {
totalLatency: [],
hookLatencies: new Map(),
userSatisfaction: [],
degradationEvents: 0
};
// Performance tiers
this.tiers = {
instant: { maxLatency: 50, priority: 'critical' },
fast: { maxLatency: 150, priority: 'high' },
intensive: { maxLatency: 500, priority: 'medium' }
};
// Current performance profile
this.activeProfile = config.defaultProfile || 'balanced';
this.performanceBudget = this.getProfileBudget(this.activeProfile);
// Adaptive learning
this.userPreferences = {
toleranceLevel: 0.5, // 0 = speed focused, 1 = memory focused
learningEnabled: true,
feedbackHistory: []
};
}
/**
* Get performance budget for a profile
*/
getProfileBudget(profileName) {
// Use config profiles first, with hardcoded fallbacks
const configProfiles = this.config.profiles || {};
// If profile exists in config, use it (with fallback for missing adaptive calculations)
if (configProfiles[profileName]) {
const profile = { ...configProfiles[profileName] };
// Handle adaptive profile calculations if needed
if (profileName === 'adaptive') {
profile.maxLatency = profile.maxLatency || this.calculateAdaptiveLatency();
profile.enabledTiers = profile.enabledTiers || this.calculateAdaptiveTiers();
}
return profile;
}
// Fallback to hardcoded profiles if not found in config
const fallbackProfiles = {
speed_focused: {
maxLatency: 100,
enabledTiers: ['instant'],
backgroundProcessing: false,
degradeThreshold: 200
},
balanced: {
maxLatency: 200,
enabledTiers: ['instant', 'fast'],
backgroundProcessing: true,
degradeThreshold: 400
},
memory_aware: {
maxLatency: 500,
enabledTiers: ['instant', 'fast', 'intensive'],
backgroundProcessing: true,
degradeThreshold: 1000
},
adaptive: {
maxLatency: this.calculateAdaptiveLatency(),
enabledTiers: this.calculateAdaptiveTiers(),
backgroundProcessing: true,
degradeThreshold: 800,
autoAdjust: true
}
};
return fallbackProfiles[profileName] || fallbackProfiles.balanced;
}
/**
* Calculate adaptive latency based on user behavior
*/
calculateAdaptiveLatency() {
if (this.metrics.totalLatency.length < 10) {
return 200; // Default for new users
}
const avgLatency = this.metrics.totalLatency.reduce((a, b) => a + b, 0) / this.metrics.totalLatency.length;
const userTolerance = this.userPreferences?.toleranceLevel || 0.5;
// Adaptive calculation: balance observed tolerance with user preference
return Math.min(500, Math.max(100, avgLatency * (1 + userTolerance)));
}
/**
* Calculate which tiers should be enabled adaptively
*/
calculateAdaptiveTiers() {
const tolerance = this.userPreferences?.toleranceLevel || 0.5;
if (tolerance < 0.3) return ['instant'];
if (tolerance < 0.7) return ['instant', 'fast'];
return ['instant', 'fast', 'intensive'];
}
/**
* Start timing a hook operation
*/
startTiming(hookName, tier = 'fast') {
return {
hookName,
tier,
startTime: Date.now(),
expectedLatency: this.tiers[tier]?.maxLatency || 150
};
}
/**
* End timing and record metrics
*/
endTiming(timingContext) {
const endTime = Date.now();
const latency = endTime - timingContext.startTime;
// Record metrics
this.recordHookLatency(timingContext.hookName, latency, timingContext.tier);
this.recordTotalLatency(latency);
// Check if we exceeded performance budget
const exceedsThreshold = latency > this.performanceBudget.degradeThreshold;
if (exceedsThreshold) {
this.handlePerformanceDegradation(timingContext.hookName, latency);
}
return {
latency,
tier: timingContext.tier,
withinBudget: latency <= this.performanceBudget.maxLatency,
exceedsThreshold
};
}
/**
* Record hook-specific latency
*/
recordHookLatency(hookName, latency, tier) {
if (!this.metrics.hookLatencies.has(hookName)) {
this.metrics.hookLatencies.set(hookName, []);
}
const hookMetrics = this.metrics.hookLatencies.get(hookName);
hookMetrics.push({ latency, tier, timestamp: Date.now() });
// Keep only recent measurements (last 50)
if (hookMetrics.length > 50) {
hookMetrics.splice(0, hookMetrics.length - 50);
}
}
/**
* Record total request latency
*/
recordTotalLatency(latency) {
this.metrics.totalLatency.push(latency);
// Keep rolling window of recent measurements
if (this.metrics.totalLatency.length > 100) {
this.metrics.totalLatency.splice(0, this.metrics.totalLatency.length - 100);
}
}
/**
* Handle performance degradation
*/
handlePerformanceDegradation(hookName, latency) {
this.metrics.degradationEvents++;
console.warn(`[Performance] Hook "${hookName}" exceeded threshold: ${latency}ms`);
// Adaptive response based on profile
if (this.performanceBudget.autoAdjust) {
this.adaptToPerformance(hookName, latency);
}
}
/**
* Adapt hooks based on performance
*/
adaptToPerformance(hookName, latency) {
// If a hook consistently performs poorly, suggest tier reduction
const hookHistory = this.metrics.hookLatencies.get(hookName) || [];
const recentHistory = hookHistory.slice(-10);
if (recentHistory.length >= 5) {
const avgLatency = recentHistory.reduce((a, b) => a + b.latency, 0) / recentHistory.length;
if (avgLatency > this.performanceBudget.maxLatency * 1.5) {
// Suggest moving hook to lower tier or disabling
this.suggestHookOptimization(hookName, avgLatency);
}
}
}
/**
* Suggest hook optimization
*/
suggestHookOptimization(hookName, avgLatency) {
const suggestion = {
hookName,
avgLatency,
suggestion: avgLatency > 300 ? 'disable' : 'reduce_tier',
timestamp: Date.now()
};
console.log(`[Performance] Suggestion for ${hookName}: ${suggestion.suggestion} (avg: ${avgLatency}ms)`);
return suggestion;
}
/**
* Check if a hook should run based on current performance profile
*/
shouldRunHook(hookName, tier = 'fast') {
const profile = this.performanceBudget;
// Check if tier is enabled
if (!profile.enabledTiers.includes(tier)) {
return false;
}
// Check recent performance
const hookHistory = this.metrics.hookLatencies.get(hookName);
if (hookHistory && hookHistory.length > 5) {
const recentLatencies = hookHistory.slice(-5);
const avgLatency = recentLatencies.reduce((a, b) => a + b.latency, 0) / recentLatencies.length;
// Don't run if consistently exceeds budget
if (avgLatency > profile.maxLatency * 1.2) {
return false;
}
}
return true;
}
/**
* Switch performance profile
*/
switchProfile(profileName) {
if (!['speed_focused', 'balanced', 'memory_aware', 'adaptive'].includes(profileName)) {
throw new Error(`Invalid profile: ${profileName}`);
}
this.activeProfile = profileName;
this.performanceBudget = this.getProfileBudget(profileName);
console.log(`[Performance] Switched to profile: ${profileName}`);
return this.performanceBudget;
}
/**
* Learn from user feedback
*/
recordUserFeedback(isPositive, context = {}) {
if (!this.userPreferences.learningEnabled) return;
const feedback = {
positive: isPositive,
context,
latency: context.latency || 0,
timestamp: Date.now()
};
this.userPreferences.feedbackHistory.push(feedback);
// Update tolerance based on feedback
this.updateUserTolerance(feedback);
// Keep feedback history manageable
if (this.userPreferences.feedbackHistory.length > 50) {
this.userPreferences.feedbackHistory.splice(0, 10);
}
}
/**
* Update user tolerance based on feedback patterns
*/
updateUserTolerance(feedback) {
const recent = this.userPreferences?.feedbackHistory?.slice(-10) || [];
const positiveCount = recent.filter(f => f.positive).length;
const negativeCount = recent.length - positiveCount;
// Ensure userPreferences is initialized
if (!this.userPreferences) {
this.userPreferences = {
toleranceLevel: 0.5,
learningEnabled: true,
feedbackHistory: []
};
}
// Adjust tolerance based on feedback patterns
if (feedback.positive && feedback.latency > 200) {
// User satisfied with higher latency, increase tolerance
this.userPreferences.toleranceLevel = Math.min(1.0, this.userPreferences.toleranceLevel + 0.1);
} else if (!feedback.positive && feedback.latency > 100) {
// User dissatisfied with latency, decrease tolerance
this.userPreferences.toleranceLevel = Math.max(0.0, this.userPreferences.toleranceLevel - 0.1);
}
}
/**
* Get performance report
*/
getPerformanceReport() {
const totalRequests = this.metrics.totalLatency.length;
const avgLatency = totalRequests > 0 ?
this.metrics.totalLatency.reduce((a, b) => a + b, 0) / totalRequests : 0;
const hookSummary = {};
this.metrics.hookLatencies.forEach((latencies, hookName) => {
const avgHookLatency = latencies.reduce((a, b) => a + b.latency, 0) / latencies.length;
hookSummary[hookName] = {
avgLatency: Math.round(avgHookLatency),
calls: latencies.length,
tier: latencies[latencies.length - 1]?.tier || 'unknown'
};
});
return {
profile: this.activeProfile,
totalRequests,
avgLatency: Math.round(avgLatency),
degradationEvents: this.metrics.degradationEvents,
userTolerance: this.userPreferences.toleranceLevel,
hookPerformance: hookSummary,
budget: this.performanceBudget
};
}
/**
* Reset metrics (useful for testing)
*/
resetMetrics() {
this.metrics = {
totalLatency: [],
hookLatencies: new Map(),
userSatisfaction: [],
degradationEvents: 0
};
}
}
module.exports = { PerformanceManager };
```
--------------------------------------------------------------------------------
/scripts/database/simple_timestamp_check.py:
--------------------------------------------------------------------------------
```python
#!/usr/bin/env python3
"""
Production-ready script to analyze timestamp health in MCP Memory Service databases.
This tool provides comprehensive timestamp analysis for SQLite-based memory storage,
helping identify and diagnose timestamp-related issues that could affect search functionality.
"""
import sys
import sqlite3
import json
import argparse
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(levelname)s: %(message)s'
)
logger = logging.getLogger(__name__)
def analyze_timestamps(db_path: str, output_format: str = 'text', verbose: bool = False) -> Dict[str, Any]:
"""Analyze timestamp fields directly in the database.
Args:
db_path: Path to the SQLite database file
output_format: Output format ('text', 'json', or 'summary')
verbose: Enable verbose output
Returns:
Dictionary containing analysis results
"""
results = {}
if output_format == 'text':
print(f"=== Analyzing timestamps in {db_path} ===")
# Validate database path
db_file = Path(db_path)
if not db_file.exists():
error_msg = f"Database file not found: {db_path}"
logger.error(error_msg)
return {'error': error_msg, 'success': False}
try:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
# Get basic stats
cursor = conn.execute("SELECT COUNT(*) as total FROM memories")
total_count = cursor.fetchone()['total']
results['total_memories'] = total_count
if output_format == 'text':
print(f"📊 Total memories in database: {total_count}")
# Analyze timestamp fields
cursor = conn.execute("""
SELECT
COUNT(*) as total,
COUNT(created_at) as has_created_at,
COUNT(created_at_iso) as has_created_at_iso,
COUNT(CASE WHEN created_at IS NULL AND created_at_iso IS NULL THEN 1 END) as missing_both,
MIN(created_at) as earliest_ts,
MAX(created_at) as latest_ts
FROM memories
""")
stats = cursor.fetchone()
# Store results
results['timestamp_stats'] = {
'total': stats['total'],
'has_created_at': stats['has_created_at'],
'has_created_at_iso': stats['has_created_at_iso'],
'missing_both': stats['missing_both'],
'missing_created_at': stats['total'] - stats['has_created_at'],
'missing_created_at_iso': stats['total'] - stats['has_created_at_iso']
}
if output_format == 'text':
print(f"\n🕐 TIMESTAMP ANALYSIS:")
print(f" Total entries: {stats['total']}")
print(f" Has created_at (float): {stats['has_created_at']}")
print(f" Has created_at_iso (ISO): {stats['has_created_at_iso']}")
print(f" Missing both timestamps: {stats['missing_both']}")
if output_format == 'text':
if stats['has_created_at'] > 0:
missing_created_at = stats['total'] - stats['has_created_at']
print(f" Missing created_at: {missing_created_at}")
if stats['has_created_at_iso'] > 0:
missing_created_at_iso = stats['total'] - stats['has_created_at_iso']
print(f" Missing created_at_iso: {missing_created_at_iso}")
# Show timestamp range
if stats['earliest_ts'] and stats['latest_ts']:
earliest = datetime.fromtimestamp(stats['earliest_ts'])
latest = datetime.fromtimestamp(stats['latest_ts'])
results['timestamp_range'] = {
'earliest': earliest.isoformat(),
'latest': latest.isoformat(),
'earliest_float': stats['earliest_ts'],
'latest_float': stats['latest_ts']
}
if output_format == 'text':
print(f"\n📅 TIMESTAMP RANGE:")
print(f" Earliest: {earliest} ({stats['earliest_ts']})")
print(f" Latest: {latest} ({stats['latest_ts']})")
# Find problematic entries
cursor = conn.execute("""
SELECT id, content_hash, created_at, created_at_iso,
SUBSTR(content, 1, 100) as content_preview
FROM memories
WHERE created_at IS NULL AND created_at_iso IS NULL
LIMIT 10
""")
problematic = cursor.fetchall()
results['missing_both_examples'] = len(problematic)
if output_format == 'text' and problematic:
print(f"\n⚠️ ENTRIES MISSING BOTH TIMESTAMPS ({len(problematic)} shown):")
for row in problematic:
print(f" ID {row['id']}: {row['content_preview']}...")
if verbose:
print(f" Hash: {row['content_hash']}")
print(f" created_at: {row['created_at']}")
print(f" created_at_iso: {row['created_at_iso']}")
print()
# Find entries with only one timestamp type
cursor = conn.execute("""
SELECT COUNT(*) as count
FROM memories
WHERE (created_at IS NULL AND created_at_iso IS NOT NULL)
OR (created_at IS NOT NULL AND created_at_iso IS NULL)
""")
partial_timestamps = cursor.fetchone()['count']
results['partial_timestamps'] = partial_timestamps
if output_format == 'text' and partial_timestamps > 0:
print(f"\n⚠️ ENTRIES WITH PARTIAL TIMESTAMPS: {partial_timestamps}")
# Show some examples
cursor = conn.execute("""
SELECT id, content_hash, created_at, created_at_iso,
SUBSTR(content, 1, 60) as content_preview
FROM memories
WHERE (created_at IS NULL AND created_at_iso IS NOT NULL)
OR (created_at IS NOT NULL AND created_at_iso IS NULL)
LIMIT 5
""")
examples = cursor.fetchall()
if output_format == 'text' and verbose:
for row in examples:
print(f" ID {row['id']}: {row['content_preview']}...")
print(f" created_at: {row['created_at']}")
print(f" created_at_iso: {row['created_at_iso']}")
print()
# Health assessment
health_status = 'EXCELLENT'
health_message = 'All memories have complete timestamps'
if stats['missing_both'] > 0:
if stats['missing_both'] < stats['total'] * 0.01:
health_status = 'GOOD'
health_message = f"Only {stats['missing_both']}/{stats['total']} missing all timestamps"
elif stats['missing_both'] < stats['total'] * 0.1:
health_status = 'WARNING'
health_message = f"{stats['missing_both']}/{stats['total']} missing all timestamps"
else:
health_status = 'CRITICAL'
health_message = f"{stats['missing_both']}/{stats['total']} missing all timestamps"
results['health'] = {
'status': health_status,
'message': health_message,
'partial_timestamps': partial_timestamps
}
if output_format == 'text':
print(f"\n🏥 DATABASE HEALTH:")
emoji = {'EXCELLENT': '✅', 'GOOD': '✅', 'WARNING': '⚠️', 'CRITICAL': '❌'}
print(f" {emoji.get(health_status, '?')} {health_status}: {health_message}")
if partial_timestamps > 0:
print(f" ⚠️ {partial_timestamps} entries have only partial timestamp data")
else:
print(" ✅ All entries with timestamps have both float and ISO formats")
conn.close()
results['success'] = True
return results
except sqlite3.OperationalError as e:
if 'no such table: memories' in str(e):
error_msg = "Database does not contain 'memories' table. Is this a valid MCP Memory Service database?"
else:
error_msg = f"Database error: {e}"
logger.error(error_msg)
results['error'] = error_msg
results['success'] = False
except Exception as e:
error_msg = f"Unexpected error: {e}"
logger.error(error_msg)
results['error'] = error_msg
results['success'] = False
finally:
if 'conn' in locals():
conn.close()
return results
def main():
"""Main entry point with CLI argument parsing."""
# Set up argument parser
parser = argparse.ArgumentParser(
description='Analyze timestamp health in MCP Memory Service SQLite databases',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s # Use default database path
%(prog)s /path/to/database.db # Analyze specific database
%(prog)s -f json -o results.json # Output JSON to file
%(prog)s --verbose # Show detailed analysis
%(prog)s --format summary # Quick health check only
"""
)
# Default database path for macOS
default_db_path = Path.home() / "Library" / "Application Support" / "mcp-memory" / "sqlite_vec.db"
parser.add_argument(
'database',
nargs='?',
default=str(default_db_path),
help=f'Path to SQLite database (default: {default_db_path})'
)
parser.add_argument(
'-f', '--format',
choices=['text', 'json', 'summary'],
default='text',
help='Output format (default: text)'
)
parser.add_argument(
'-o', '--output',
help='Output file path (default: stdout)'
)
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='Show verbose output with additional details'
)
parser.add_argument(
'--quiet',
action='store_true',
help='Suppress all output except errors'
)
args = parser.parse_args()
# Configure logging based on verbosity
if args.quiet:
logging.getLogger().setLevel(logging.ERROR)
elif args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
# Analyze the database
results = analyze_timestamps(args.database, args.format, args.verbose)
# Handle output
if args.format == 'json':
output = json.dumps(results, indent=2, default=str)
if args.output:
with open(args.output, 'w') as f:
f.write(output)
if not args.quiet:
print(f"Results written to {args.output}")
else:
print(output)
elif args.format == 'summary':
if results.get('success'):
health = results.get('health', {})
print(f"Status: {health.get('status', 'UNKNOWN')}")
print(f"Message: {health.get('message', 'No health data')}")
print(f"Total Memories: {results.get('total_memories', 0)}")
missing = results.get('timestamp_stats', {}).get('missing_both', 0)
if missing > 0:
print(f"Missing Timestamps: {missing}")
else:
print(f"Error: {results.get('error', 'Unknown error')}")
# Return appropriate exit code
if results.get('success'):
health_status = results.get('health', {}).get('status', 'UNKNOWN')
if health_status in ['EXCELLENT', 'GOOD']:
sys.exit(0)
elif health_status == 'WARNING':
sys.exit(1)
else:
sys.exit(2)
else:
sys.exit(3)
if __name__ == "__main__":
main()
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/web/oauth/registration.py:
--------------------------------------------------------------------------------
```python
# Copyright 2024 Heinrich Krupp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
OAuth 2.1 Dynamic Client Registration implementation for MCP Memory Service.
Implements RFC 7591 - OAuth 2.0 Dynamic Client Registration Protocol.
"""
import time
import logging
from typing import List, Optional
from urllib.parse import urlparse, ParseResult
from fastapi import APIRouter, HTTPException, status
from pydantic import ValidationError
from .models import (
ClientRegistrationRequest,
ClientRegistrationResponse,
RegisteredClient
)
from .storage import oauth_storage
logger = logging.getLogger(__name__)
router = APIRouter()
def validate_redirect_uris(redirect_uris: Optional[List[str]]) -> None:
"""
Validate redirect URIs according to OAuth 2.1 security requirements.
Uses proper URL parsing to prevent bypass attacks and validates schemes
against a secure whitelist to prevent dangerous scheme injection.
"""
if not redirect_uris:
return
# Allowed schemes - whitelist approach for security
ALLOWED_SCHEMES = {
'https', # HTTPS (preferred)
'http', # HTTP (localhost only)
# Native app custom schemes (common patterns)
'com.example.app', # Reverse domain notation
'myapp', # Simple custom scheme
# Add more custom schemes as needed, but NEVER allow:
# javascript:, data:, file:, vbscript:, about:, chrome:, etc.
}
# Dangerous schemes that must be blocked
DANGEROUS_SCHEMES = {
'javascript', 'data', 'file', 'vbscript', 'about', 'chrome',
'chrome-extension', 'moz-extension', 'ms-appx', 'blob'
}
for uri in redirect_uris:
uri_str = str(uri).strip()
if not uri_str:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_redirect_uri",
"error_description": "Empty redirect URI not allowed"
}
)
try:
# Parse URL using proper URL parser to prevent bypass attacks
parsed: ParseResult = urlparse(uri_str)
if not parsed.scheme:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_redirect_uri",
"error_description": f"Missing scheme in redirect URI: {uri_str}"
}
)
# Check for dangerous schemes first (security)
if parsed.scheme.lower() in DANGEROUS_SCHEMES:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_redirect_uri",
"error_description": f"Dangerous scheme '{parsed.scheme}' not allowed in redirect URI"
}
)
# For HTTP scheme, enforce strict localhost validation
if parsed.scheme.lower() == 'http':
if not parsed.netloc:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_redirect_uri",
"error_description": f"HTTP URI missing host: {uri_str}"
}
)
# Extract hostname from netloc (handles port numbers correctly)
hostname = parsed.hostname
if not hostname:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_redirect_uri",
"error_description": f"Cannot extract hostname from HTTP URI: {uri_str}"
}
)
# Strict localhost validation - only allow exact matches
if hostname.lower() not in ('localhost', '127.0.0.1', '::1'):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_redirect_uri",
"error_description": f"HTTP redirect URIs must use localhost, 127.0.0.1, or ::1. Got: {hostname}"
}
)
# For HTTPS, allow any valid hostname (production requirement)
elif parsed.scheme.lower() == 'https':
if not parsed.netloc:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_redirect_uri",
"error_description": f"HTTPS URI missing host: {uri_str}"
}
)
# For custom schemes (native apps), validate they're in allowed list
elif parsed.scheme.lower() not in [s.lower() for s in ALLOWED_SCHEMES]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_redirect_uri",
"error_description": f"Unsupported scheme '{parsed.scheme}'. Allowed: {', '.join(sorted(ALLOWED_SCHEMES))}"
}
)
except ValueError as e:
# URL parsing failed
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_redirect_uri",
"error_description": f"Invalid URL format: {uri_str}. Error: {str(e)}"
}
)
def validate_grant_types(grant_types: List[str]) -> None:
"""Validate that requested grant types are supported."""
supported_grant_types = {"authorization_code", "client_credentials"}
for grant_type in grant_types:
if grant_type not in supported_grant_types:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_client_metadata",
"error_description": f"Unsupported grant type: {grant_type}. Supported: {list(supported_grant_types)}"
}
)
def validate_response_types(response_types: List[str]) -> None:
"""Validate that requested response types are supported."""
supported_response_types = {"code"}
for response_type in response_types:
if response_type not in supported_response_types:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_client_metadata",
"error_description": f"Unsupported response type: {response_type}. Supported: {list(supported_response_types)}"
}
)
@router.post("/register", response_model=ClientRegistrationResponse, status_code=status.HTTP_201_CREATED)
async def register_client(request: ClientRegistrationRequest) -> ClientRegistrationResponse:
"""
OAuth 2.1 Dynamic Client Registration endpoint.
Implements RFC 7591 - OAuth 2.0 Dynamic Client Registration Protocol.
Allows clients to register dynamically with the authorization server.
"""
logger.info("OAuth client registration request received")
try:
# Validate client metadata
if request.redirect_uris:
validate_redirect_uris([str(uri) for uri in request.redirect_uris])
if request.grant_types:
validate_grant_types(request.grant_types)
if request.response_types:
validate_response_types(request.response_types)
# Generate client credentials
client_id = oauth_storage.generate_client_id()
client_secret = oauth_storage.generate_client_secret()
# Prepare default values
grant_types = request.grant_types or ["authorization_code"]
response_types = request.response_types or ["code"]
token_endpoint_auth_method = request.token_endpoint_auth_method or "client_secret_basic"
# Create registered client
registered_client = RegisteredClient(
client_id=client_id,
client_secret=client_secret,
redirect_uris=[str(uri) for uri in request.redirect_uris] if request.redirect_uris else [],
grant_types=grant_types,
response_types=response_types,
token_endpoint_auth_method=token_endpoint_auth_method,
client_name=request.client_name,
created_at=time.time()
)
# Store the client
await oauth_storage.store_client(registered_client)
# Create response
response = ClientRegistrationResponse(
client_id=client_id,
client_secret=client_secret,
redirect_uris=registered_client.redirect_uris,
grant_types=grant_types,
response_types=response_types,
token_endpoint_auth_method=token_endpoint_auth_method,
client_name=request.client_name
)
logger.info(f"OAuth client registered successfully: client_id={client_id}, name={request.client_name}")
return response
except ValidationError as e:
logger.warning(f"OAuth client registration validation error: {e}")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "invalid_client_metadata",
"error_description": f"Invalid client metadata: {str(e)}"
}
)
except HTTPException:
# Re-raise HTTP exceptions (validation errors)
raise
except Exception as e:
logger.error(f"OAuth client registration error: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={
"error": "server_error",
"error_description": "Internal server error during client registration"
}
)
@router.get("/clients/{client_id}")
async def get_client_info(client_id: str) -> ClientRegistrationResponse:
"""
Get information about a registered client.
Note: This is an extension endpoint, not part of RFC 7591.
Useful for debugging and client management.
"""
logger.info(f"Client info request for client_id={client_id}")
client = await oauth_storage.get_client(client_id)
if not client:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={
"error": "invalid_client",
"error_description": "Client not found"
}
)
# Return client information (without secret for security)
return ClientRegistrationResponse(
client_id=client.client_id,
client_secret="[REDACTED]", # Don't expose the secret
redirect_uris=client.redirect_uris,
grant_types=client.grant_types,
response_types=client.response_types,
token_endpoint_auth_method=client.token_endpoint_auth_method,
client_name=client.client_name
)
```
--------------------------------------------------------------------------------
/scripts/migration/migrate_timestamps.py:
--------------------------------------------------------------------------------
```python
#!/usr/bin/env python3
# Copyright 2024 Heinrich Krupp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Enhanced SQLite migration script to fix timestamp formats in ChromaDB.
This improved version populates all timestamp columns with appropriate values.
"""
import sqlite3
import logging
import os
import sys
import platform
from pathlib import Path
import json
import datetime
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
)
logger = logging.getLogger("sqlite_migration")
def find_claude_chroma_db():
"""
Finds the Claude desktop ChromaDB storage location based on the operating system.
"""
system = platform.system()
home = Path.home()
# List of potential ChromaDB locations for Claude desktop
possible_locations = []
if system == "Darwin": # macOS
# Standard iCloud Drive location
icloud_path = home / "Library" / "Mobile Documents" / "com~apple~CloudDocs" / "AI" / "claude-memory" / "chroma_db"
possible_locations.append(icloud_path)
# Local AppData location
local_path = home / "Library" / "Application Support" / "Claude" / "claude-memory" / "chroma_db"
possible_locations.append(local_path)
elif system == "Windows":
# Standard Windows location
appdata_path = Path(os.environ.get("LOCALAPPDATA", "")) / "Claude" / "claude-memory" / "chroma_db"
possible_locations.append(appdata_path)
# OneDrive potential path
onedrive_path = home / "OneDrive" / "Documents" / "Claude" / "claude-memory" / "chroma_db"
possible_locations.append(onedrive_path)
elif system == "Linux":
# Standard Linux location
linux_path = home / ".config" / "Claude" / "claude-memory" / "chroma_db"
possible_locations.append(linux_path)
# Try to find config file that might tell us the location
config_locations = []
if system == "Darwin":
config_locations.append(home / "Library" / "Application Support" / "Claude" / "config.json")
elif system == "Windows":
config_locations.append(Path(os.environ.get("APPDATA", "")) / "Claude" / "config.json")
elif system == "Linux":
config_locations.append(home / ".config" / "Claude" / "config.json")
# Check if config file exists and try to read DB path from it
for config_path in config_locations:
if config_path.exists():
try:
with open(config_path, 'r') as f:
config = json.load(f)
if 'memoryStoragePath' in config:
mem_path = Path(config['memoryStoragePath']) / "chroma_db"
possible_locations.insert(0, mem_path) # Prioritize this path
logger.info(f"Found memory path in config: {mem_path}")
except Exception as e:
logger.warning(f"Error reading config file {config_path}: {e}")
# Check all possible locations
for location in possible_locations:
db_path = location / "chroma.sqlite3"
if db_path.exists():
logger.info(f"Found ChromaDB at: {db_path}")
return str(db_path)
logger.error("Could not find Claude's ChromaDB storage location")
return None
def get_table_schema(cursor, table_name):
"""
Retrieves the schema of a table to understand available columns.
"""
cursor.execute(f"PRAGMA table_info({table_name})")
columns = cursor.fetchall()
return {col[1]: col for col in columns} # returns dict with column name as key
def timestamp_to_all_types(timestamp_value):
"""
Convert a timestamp to all possible formats:
- integer (unix timestamp)
- float (unix timestamp with milliseconds)
- string (ISO format)
"""
# Handle different input types
timestamp_int = None
if isinstance(timestamp_value, int):
timestamp_int = timestamp_value
elif isinstance(timestamp_value, float):
timestamp_int = int(timestamp_value)
elif isinstance(timestamp_value, str):
try:
# Try to parse as float first
timestamp_int = int(float(timestamp_value))
except ValueError:
# Try to parse as ISO date
try:
dt = datetime.datetime.fromisoformat(timestamp_value.replace('Z', '+00:00'))
timestamp_int = int(dt.timestamp())
except ValueError:
# Try different date formats
for fmt in ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y/%m/%d %H:%M:%S"]:
try:
dt = datetime.datetime.strptime(timestamp_value, fmt)
timestamp_int = int(dt.timestamp())
break
except ValueError:
continue
if timestamp_int is None:
raise ValueError(f"Could not convert timestamp value: {timestamp_value}")
# Generate all formats
timestamp_float = float(timestamp_int)
# ISO format string representation
dt = datetime.datetime.fromtimestamp(timestamp_int, tz=datetime.timezone.utc)
timestamp_str = dt.isoformat().replace('+00:00', 'Z')
return {
'int': timestamp_int,
'float': timestamp_float,
'string': timestamp_str
}
def migrate_timestamps_in_sqlite(db_path):
"""
Enhanced migration that identifies timestamp data across all columns
and populates all columns with consistent type values.
"""
logger.info(f"Connecting to SQLite database at {db_path}")
if not os.path.exists(db_path):
logger.error(f"Database file not found: {db_path}")
return False
try:
# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Check if the embedding_metadata table exists
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='embedding_metadata'")
if not cursor.fetchone():
logger.error("Table embedding_metadata not found in database.")
conn.close()
return False
# Get table schema to understand column structure
schema = get_table_schema(cursor, "embedding_metadata")
logger.info(f"Table schema: {schema}")
# Find all timestamp entries from any column
logger.info("Identifying all timestamp entries...")
cursor.execute("""
SELECT id, key, string_value, int_value, float_value
FROM embedding_metadata
WHERE key = 'timestamp'
""")
all_rows = cursor.fetchall()
if not all_rows:
logger.warning("No timestamp entries found in the database.")
conn.close()
return True
logger.info(f"Found {len(all_rows)} timestamp entries to process")
# Process each timestamp row
processed_count = 0
failed_count = 0
for row in all_rows:
id_val, key, string_val, int_val, float_val = row
source_value = None
source_type = None
# Find which column has a non-NULL value
if int_val is not None:
source_value = int_val
source_type = 'int'
elif float_val is not None:
source_value = float_val
source_type = 'float'
elif string_val is not None:
source_value = string_val
source_type = 'string'
if source_value is None:
logger.warning(f"Row ID {id_val} has no timestamp value in any column")
failed_count += 1
continue
try:
# Convert to all types
logger.info(f"Processing ID {id_val}: {source_type} value {source_value}")
all_formats = timestamp_to_all_types(source_value)
# Update the row with all formats
cursor.execute("""
UPDATE embedding_metadata
SET int_value = ?, float_value = ?, string_value = ?
WHERE id = ? AND KEY ='timestamp'
""", (all_formats['int'], all_formats['float'], all_formats['string'], id_val))
processed_count += 1
except Exception as e:
logger.error(f"Error processing timestamp for ID {id_val}: {e}")
failed_count += 1
# Commit all changes
conn.commit()
# Verify the changes
cursor.execute("""
SELECT COUNT(*)
FROM embedding_metadata
WHERE key = 'timestamp' AND (int_value IS NULL OR float_value IS NULL OR string_value IS NULL)
""")
incomplete = cursor.fetchone()[0]
cursor.execute("""
SELECT COUNT(*)
FROM embedding_metadata
WHERE key = 'timestamp' AND int_value IS NOT NULL AND float_value IS NOT NULL AND string_value IS NOT NULL
""")
complete = cursor.fetchone()[0]
logger.info(f"Migration summary:")
logger.info(f" - {processed_count} timestamp entries processed successfully")
logger.info(f" - {failed_count} timestamp entries failed to process")
logger.info(f" - {complete} timestamp entries now have values in all columns")
logger.info(f" - {incomplete} timestamp entries still have NULL values in some columns")
# Show some examples of remaining problematic entries if any
if incomplete > 0:
cursor.execute("""
SELECT id, key, string_value, int_value, float_value
FROM embedding_metadata
WHERE key = 'timestamp' AND (int_value IS NULL OR float_value IS NULL OR string_value IS NULL)
LIMIT 5
""")
problem_rows = cursor.fetchall()
logger.info(f"Examples of incomplete entries: {problem_rows}")
conn.close()
return incomplete == 0
except Exception as e:
logger.error(f"Error during SQLite migration: {e}")
return False
def main():
# Check if a database path was provided as a command-line argument
if len(sys.argv) >= 2:
db_path = sys.argv[1]
else:
# Try to automatically find the ChromaDB location
db_path = find_claude_chroma_db()
if not db_path:
print("Could not automatically find Claude's ChromaDB location.")
print("Please provide the path as a command-line argument:")
print("python migrate_timestamps.py /path/to/chroma.sqlite3")
sys.exit(1)
print(f"Using database: {db_path}")
success = migrate_timestamps_in_sqlite(db_path)
if success:
print("\n✅ Migration completed successfully!")
print("All timestamps now have consistent values in all columns (int_value, float_value, and string_value).")
sys.exit(0)
else:
print("\n⚠️ Migration completed with issues. Check the logs for details.")
sys.exit(1)
if __name__ == "__main__":
main()
```