This is page 17 of 47. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .claude
│ ├── agents
│ │ ├── amp-bridge.md
│ │ ├── amp-pr-automator.md
│ │ ├── code-quality-guard.md
│ │ ├── gemini-pr-automator.md
│ │ └── github-release-manager.md
│ ├── settings.local.json.backup
│ └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── feature_request.yml
│ │ └── performance_issue.yml
│ ├── pull_request_template.md
│ └── workflows
│ ├── bridge-tests.yml
│ ├── CACHE_FIX.md
│ ├── claude-code-review.yml
│ ├── claude.yml
│ ├── cleanup-images.yml.disabled
│ ├── dev-setup-validation.yml
│ ├── docker-publish.yml
│ ├── LATEST_FIXES.md
│ ├── main-optimized.yml.disabled
│ ├── main.yml
│ ├── publish-and-test.yml
│ ├── README_OPTIMIZATION.md
│ ├── release-tag.yml.disabled
│ ├── release.yml
│ ├── roadmap-review-reminder.yml
│ ├── SECRET_CONDITIONAL_FIX.md
│ └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│ ├── .gitignore
│ └── reports
│ └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│ ├── deployment
│ │ ├── deploy_fastmcp_fixed.sh
│ │ ├── deploy_http_with_mcp.sh
│ │ └── deploy_mcp_v4.sh
│ ├── deployment-configs
│ │ ├── empty_config.yml
│ │ └── smithery.yaml
│ ├── development
│ │ └── test_fastmcp.py
│ ├── docs-removed-2025-08-23
│ │ ├── authentication.md
│ │ ├── claude_integration.md
│ │ ├── claude-code-compatibility.md
│ │ ├── claude-code-integration.md
│ │ ├── claude-code-quickstart.md
│ │ ├── claude-desktop-setup.md
│ │ ├── complete-setup-guide.md
│ │ ├── database-synchronization.md
│ │ ├── development
│ │ │ ├── autonomous-memory-consolidation.md
│ │ │ ├── CLEANUP_PLAN.md
│ │ │ ├── CLEANUP_README.md
│ │ │ ├── CLEANUP_SUMMARY.md
│ │ │ ├── dream-inspired-memory-consolidation.md
│ │ │ ├── hybrid-slm-memory-consolidation.md
│ │ │ ├── mcp-milestone.md
│ │ │ ├── multi-client-architecture.md
│ │ │ ├── test-results.md
│ │ │ └── TIMESTAMP_FIX_SUMMARY.md
│ │ ├── distributed-sync.md
│ │ ├── invocation_guide.md
│ │ ├── macos-intel.md
│ │ ├── master-guide.md
│ │ ├── mcp-client-configuration.md
│ │ ├── multi-client-server.md
│ │ ├── service-installation.md
│ │ ├── sessions
│ │ │ └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│ │ ├── UBUNTU_SETUP.md
│ │ ├── ubuntu.md
│ │ ├── windows-setup.md
│ │ └── windows.md
│ ├── docs-root-cleanup-2025-08-23
│ │ ├── AWESOME_LIST_SUBMISSION.md
│ │ ├── CLOUDFLARE_IMPLEMENTATION.md
│ │ ├── DOCUMENTATION_ANALYSIS.md
│ │ ├── DOCUMENTATION_CLEANUP_PLAN.md
│ │ ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│ │ ├── LITESTREAM_SETUP_GUIDE.md
│ │ ├── lm_studio_system_prompt.md
│ │ ├── PYTORCH_DOWNLOAD_FIX.md
│ │ └── README-ORIGINAL-BACKUP.md
│ ├── investigations
│ │ └── MACOS_HOOKS_INVESTIGATION.md
│ ├── litestream-configs-v6.3.0
│ │ ├── install_service.sh
│ │ ├── litestream_master_config_fixed.yml
│ │ ├── litestream_master_config.yml
│ │ ├── litestream_replica_config_fixed.yml
│ │ ├── litestream_replica_config.yml
│ │ ├── litestream_replica_simple.yml
│ │ ├── litestream-http.service
│ │ ├── litestream.service
│ │ └── requirements-cloudflare.txt
│ ├── release-notes
│ │ └── release-notes-v7.1.4.md
│ └── setup-development
│ ├── README.md
│ ├── setup_consolidation_mdns.sh
│ ├── STARTUP_SETUP_GUIDE.md
│ └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│ ├── memory-context.md
│ ├── memory-health.md
│ ├── memory-ingest-dir.md
│ ├── memory-ingest.md
│ ├── memory-recall.md
│ ├── memory-search.md
│ ├── memory-store.md
│ ├── README.md
│ └── session-start.md
├── claude-hooks
│ ├── config.json
│ ├── config.template.json
│ ├── CONFIGURATION.md
│ ├── core
│ │ ├── memory-retrieval.js
│ │ ├── mid-conversation.js
│ │ ├── session-end.js
│ │ ├── session-start.js
│ │ └── topic-change.js
│ ├── debug-pattern-test.js
│ ├── install_claude_hooks_windows.ps1
│ ├── install_hooks.py
│ ├── memory-mode-controller.js
│ ├── MIGRATION.md
│ ├── README-NATURAL-TRIGGERS.md
│ ├── README-phase2.md
│ ├── README.md
│ ├── simple-test.js
│ ├── statusline.sh
│ ├── test-adaptive-weights.js
│ ├── test-dual-protocol-hook.js
│ ├── test-mcp-hook.js
│ ├── test-natural-triggers.js
│ ├── test-recency-scoring.js
│ ├── tests
│ │ ├── integration-test.js
│ │ ├── phase2-integration-test.js
│ │ ├── test-code-execution.js
│ │ ├── test-cross-session.json
│ │ ├── test-session-tracking.json
│ │ └── test-threading.json
│ ├── utilities
│ │ ├── adaptive-pattern-detector.js
│ │ ├── context-formatter.js
│ │ ├── context-shift-detector.js
│ │ ├── conversation-analyzer.js
│ │ ├── dynamic-context-updater.js
│ │ ├── git-analyzer.js
│ │ ├── mcp-client.js
│ │ ├── memory-client.js
│ │ ├── memory-scorer.js
│ │ ├── performance-manager.js
│ │ ├── project-detector.js
│ │ ├── session-tracker.js
│ │ ├── tiered-conversation-monitor.js
│ │ └── version-checker.js
│ └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│ ├── amp-cli-bridge.md
│ ├── api
│ │ ├── code-execution-interface.md
│ │ ├── memory-metadata-api.md
│ │ ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_REPORT.md
│ │ └── tag-standardization.md
│ ├── architecture
│ │ ├── search-enhancement-spec.md
│ │ └── search-examples.md
│ ├── architecture.md
│ ├── archive
│ │ └── obsolete-workflows
│ │ ├── load_memory_context.md
│ │ └── README.md
│ ├── assets
│ │ └── images
│ │ ├── dashboard-v3.3.0-preview.png
│ │ ├── memory-awareness-hooks-example.png
│ │ ├── project-infographic.svg
│ │ └── README.md
│ ├── CLAUDE_CODE_QUICK_REFERENCE.md
│ ├── cloudflare-setup.md
│ ├── deployment
│ │ ├── docker.md
│ │ ├── dual-service.md
│ │ ├── production-guide.md
│ │ └── systemd-service.md
│ ├── development
│ │ ├── ai-agent-instructions.md
│ │ ├── code-quality
│ │ │ ├── phase-2a-completion.md
│ │ │ ├── phase-2a-handle-get-prompt.md
│ │ │ ├── phase-2a-index.md
│ │ │ ├── phase-2a-install-package.md
│ │ │ └── phase-2b-session-summary.md
│ │ ├── code-quality-workflow.md
│ │ ├── dashboard-workflow.md
│ │ ├── issue-management.md
│ │ ├── pr-review-guide.md
│ │ ├── refactoring-notes.md
│ │ ├── release-checklist.md
│ │ └── todo-tracker.md
│ ├── docker-optimized-build.md
│ ├── document-ingestion.md
│ ├── DOCUMENTATION_AUDIT.md
│ ├── enhancement-roadmap-issue-14.md
│ ├── examples
│ │ ├── analysis-scripts.js
│ │ ├── maintenance-session-example.md
│ │ ├── memory-distribution-chart.jsx
│ │ └── tag-schema.json
│ ├── first-time-setup.md
│ ├── glama-deployment.md
│ ├── guides
│ │ ├── advanced-command-examples.md
│ │ ├── chromadb-migration.md
│ │ ├── commands-vs-mcp-server.md
│ │ ├── mcp-enhancements.md
│ │ ├── mdns-service-discovery.md
│ │ ├── memory-consolidation-guide.md
│ │ ├── migration.md
│ │ ├── scripts.md
│ │ └── STORAGE_BACKENDS.md
│ ├── HOOK_IMPROVEMENTS.md
│ ├── hooks
│ │ └── phase2-code-execution-migration.md
│ ├── http-server-management.md
│ ├── ide-compatability.md
│ ├── IMAGE_RETENTION_POLICY.md
│ ├── images
│ │ └── dashboard-placeholder.md
│ ├── implementation
│ │ ├── health_checks.md
│ │ └── performance.md
│ ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│ ├── integration
│ │ ├── homebrew.md
│ │ └── multi-client.md
│ ├── integrations
│ │ ├── gemini.md
│ │ ├── groq-bridge.md
│ │ ├── groq-integration-summary.md
│ │ └── groq-model-comparison.md
│ ├── integrations.md
│ ├── legacy
│ │ └── dual-protocol-hooks.md
│ ├── LM_STUDIO_COMPATIBILITY.md
│ ├── maintenance
│ │ └── memory-maintenance.md
│ ├── mastery
│ │ ├── api-reference.md
│ │ ├── architecture-overview.md
│ │ ├── configuration-guide.md
│ │ ├── local-setup-and-run.md
│ │ ├── testing-guide.md
│ │ └── troubleshooting.md
│ ├── migration
│ │ └── code-execution-api-quick-start.md
│ ├── natural-memory-triggers
│ │ ├── cli-reference.md
│ │ ├── installation-guide.md
│ │ └── performance-optimization.md
│ ├── oauth-setup.md
│ ├── pr-graphql-integration.md
│ ├── quick-setup-cloudflare-dual-environment.md
│ ├── README.md
│ ├── remote-configuration-wiki-section.md
│ ├── research
│ │ ├── code-execution-interface-implementation.md
│ │ └── code-execution-interface-summary.md
│ ├── ROADMAP.md
│ ├── sqlite-vec-backend.md
│ ├── statistics
│ │ ├── charts
│ │ │ ├── activity_patterns.png
│ │ │ ├── contributors.png
│ │ │ ├── growth_trajectory.png
│ │ │ ├── monthly_activity.png
│ │ │ └── october_sprint.png
│ │ ├── data
│ │ │ ├── activity_by_day.csv
│ │ │ ├── activity_by_hour.csv
│ │ │ ├── contributors.csv
│ │ │ └── monthly_activity.csv
│ │ ├── generate_charts.py
│ │ └── REPOSITORY_STATISTICS.md
│ ├── technical
│ │ ├── development.md
│ │ ├── memory-migration.md
│ │ ├── migration-log.md
│ │ ├── sqlite-vec-embedding-fixes.md
│ │ └── tag-storage.md
│ ├── testing
│ │ └── regression-tests.md
│ ├── testing-cloudflare-backend.md
│ ├── troubleshooting
│ │ ├── cloudflare-api-token-setup.md
│ │ ├── cloudflare-authentication.md
│ │ ├── general.md
│ │ ├── hooks-quick-reference.md
│ │ ├── pr162-schema-caching-issue.md
│ │ ├── session-end-hooks.md
│ │ └── sync-issues.md
│ └── tutorials
│ ├── advanced-techniques.md
│ ├── data-analysis.md
│ └── demo-session-walkthrough.md
├── examples
│ ├── claude_desktop_config_template.json
│ ├── claude_desktop_config_windows.json
│ ├── claude-desktop-http-config.json
│ ├── config
│ │ └── claude_desktop_config.json
│ ├── http-mcp-bridge.js
│ ├── memory_export_template.json
│ ├── README.md
│ ├── setup
│ │ └── setup_multi_client_complete.py
│ └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│ ├── .claude
│ │ └── settings.local.json
│ ├── archive
│ │ └── check_missing_timestamps.py
│ ├── backup
│ │ ├── backup_memories.py
│ │ ├── backup_sqlite_vec.sh
│ │ ├── export_distributable_memories.sh
│ │ └── restore_memories.py
│ ├── benchmarks
│ │ ├── benchmark_code_execution_api.py
│ │ ├── benchmark_hybrid_sync.py
│ │ └── benchmark_server_caching.py
│ ├── database
│ │ ├── analyze_sqlite_vec_db.py
│ │ ├── check_sqlite_vec_status.py
│ │ ├── db_health_check.py
│ │ └── simple_timestamp_check.py
│ ├── development
│ │ ├── debug_server_initialization.py
│ │ ├── find_orphaned_files.py
│ │ ├── fix_mdns.sh
│ │ ├── fix_sitecustomize.py
│ │ ├── remote_ingest.sh
│ │ ├── setup-git-merge-drivers.sh
│ │ ├── uv-lock-merge.sh
│ │ └── verify_hybrid_sync.py
│ ├── hooks
│ │ └── pre-commit
│ ├── installation
│ │ ├── install_linux_service.py
│ │ ├── install_macos_service.py
│ │ ├── install_uv.py
│ │ ├── install_windows_service.py
│ │ ├── install.py
│ │ ├── setup_backup_cron.sh
│ │ ├── setup_claude_mcp.sh
│ │ └── setup_cloudflare_resources.py
│ ├── linux
│ │ ├── service_status.sh
│ │ ├── start_service.sh
│ │ ├── stop_service.sh
│ │ ├── uninstall_service.sh
│ │ └── view_logs.sh
│ ├── maintenance
│ │ ├── assign_memory_types.py
│ │ ├── check_memory_types.py
│ │ ├── cleanup_corrupted_encoding.py
│ │ ├── cleanup_memories.py
│ │ ├── cleanup_organize.py
│ │ ├── consolidate_memory_types.py
│ │ ├── consolidation_mappings.json
│ │ ├── delete_orphaned_vectors_fixed.py
│ │ ├── fast_cleanup_duplicates_with_tracking.sh
│ │ ├── find_all_duplicates.py
│ │ ├── find_cloudflare_duplicates.py
│ │ ├── find_duplicates.py
│ │ ├── memory-types.md
│ │ ├── README.md
│ │ ├── recover_timestamps_from_cloudflare.py
│ │ ├── regenerate_embeddings.py
│ │ ├── repair_malformed_tags.py
│ │ ├── repair_memories.py
│ │ ├── repair_sqlite_vec_embeddings.py
│ │ ├── repair_zero_embeddings.py
│ │ ├── restore_from_json_export.py
│ │ └── scan_todos.sh
│ ├── migration
│ │ ├── cleanup_mcp_timestamps.py
│ │ ├── legacy
│ │ │ └── migrate_chroma_to_sqlite.py
│ │ ├── mcp-migration.py
│ │ ├── migrate_sqlite_vec_embeddings.py
│ │ ├── migrate_storage.py
│ │ ├── migrate_tags.py
│ │ ├── migrate_timestamps.py
│ │ ├── migrate_to_cloudflare.py
│ │ ├── migrate_to_sqlite_vec.py
│ │ ├── migrate_v5_enhanced.py
│ │ ├── TIMESTAMP_CLEANUP_README.md
│ │ └── verify_mcp_timestamps.py
│ ├── pr
│ │ ├── amp_collect_results.sh
│ │ ├── amp_detect_breaking_changes.sh
│ │ ├── amp_generate_tests.sh
│ │ ├── amp_pr_review.sh
│ │ ├── amp_quality_gate.sh
│ │ ├── amp_suggest_fixes.sh
│ │ ├── auto_review.sh
│ │ ├── detect_breaking_changes.sh
│ │ ├── generate_tests.sh
│ │ ├── lib
│ │ │ └── graphql_helpers.sh
│ │ ├── quality_gate.sh
│ │ ├── resolve_threads.sh
│ │ ├── run_pyscn_analysis.sh
│ │ ├── run_quality_checks.sh
│ │ ├── thread_status.sh
│ │ └── watch_reviews.sh
│ ├── quality
│ │ ├── fix_dead_code_install.sh
│ │ ├── phase1_dead_code_analysis.md
│ │ ├── phase2_complexity_analysis.md
│ │ ├── README_PHASE1.md
│ │ ├── README_PHASE2.md
│ │ ├── track_pyscn_metrics.sh
│ │ └── weekly_quality_review.sh
│ ├── README.md
│ ├── run
│ │ ├── run_mcp_memory.sh
│ │ ├── run-with-uv.sh
│ │ └── start_sqlite_vec.sh
│ ├── run_memory_server.py
│ ├── server
│ │ ├── check_http_server.py
│ │ ├── check_server_health.py
│ │ ├── memory_offline.py
│ │ ├── preload_models.py
│ │ ├── run_http_server.py
│ │ ├── run_memory_server.py
│ │ ├── start_http_server.bat
│ │ └── start_http_server.sh
│ ├── service
│ │ ├── deploy_dual_services.sh
│ │ ├── install_http_service.sh
│ │ ├── mcp-memory-http.service
│ │ ├── mcp-memory.service
│ │ ├── memory_service_manager.sh
│ │ ├── service_control.sh
│ │ ├── service_utils.py
│ │ └── update_service.sh
│ ├── sync
│ │ ├── check_drift.py
│ │ ├── claude_sync_commands.py
│ │ ├── export_memories.py
│ │ ├── import_memories.py
│ │ ├── litestream
│ │ │ ├── apply_local_changes.sh
│ │ │ ├── enhanced_memory_store.sh
│ │ │ ├── init_staging_db.sh
│ │ │ ├── io.litestream.replication.plist
│ │ │ ├── manual_sync.sh
│ │ │ ├── memory_sync.sh
│ │ │ ├── pull_remote_changes.sh
│ │ │ ├── push_to_remote.sh
│ │ │ ├── README.md
│ │ │ ├── resolve_conflicts.sh
│ │ │ ├── setup_local_litestream.sh
│ │ │ ├── setup_remote_litestream.sh
│ │ │ ├── staging_db_init.sql
│ │ │ ├── stash_local_changes.sh
│ │ │ ├── sync_from_remote_noconfig.sh
│ │ │ └── sync_from_remote.sh
│ │ ├── README.md
│ │ ├── safe_cloudflare_update.sh
│ │ ├── sync_memory_backends.py
│ │ └── sync_now.py
│ ├── testing
│ │ ├── run_complete_test.py
│ │ ├── run_memory_test.sh
│ │ ├── simple_test.py
│ │ ├── test_cleanup_logic.py
│ │ ├── test_cloudflare_backend.py
│ │ ├── test_docker_functionality.py
│ │ ├── test_installation.py
│ │ ├── test_mdns.py
│ │ ├── test_memory_api.py
│ │ ├── test_memory_simple.py
│ │ ├── test_migration.py
│ │ ├── test_search_api.py
│ │ ├── test_sqlite_vec_embeddings.py
│ │ ├── test_sse_events.py
│ │ ├── test-connection.py
│ │ └── test-hook.js
│ ├── utils
│ │ ├── claude_commands_utils.py
│ │ ├── generate_personalized_claude_md.sh
│ │ ├── groq
│ │ ├── groq_agent_bridge.py
│ │ ├── list-collections.py
│ │ ├── memory_wrapper_uv.py
│ │ ├── query_memories.py
│ │ ├── smithery_wrapper.py
│ │ ├── test_groq_bridge.sh
│ │ └── uv_wrapper.py
│ └── validation
│ ├── check_dev_setup.py
│ ├── check_documentation_links.py
│ ├── diagnose_backend_config.py
│ ├── validate_configuration_complete.py
│ ├── validate_memories.py
│ ├── validate_migration.py
│ ├── validate_timestamp_integrity.py
│ ├── verify_environment.py
│ ├── verify_pytorch_windows.py
│ └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│ └── mcp_memory_service
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── operations.py
│ │ ├── sync_wrapper.py
│ │ └── types.py
│ ├── backup
│ │ ├── __init__.py
│ │ └── scheduler.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── ingestion.py
│ │ ├── main.py
│ │ └── utils.py
│ ├── config.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── associations.py
│ │ ├── base.py
│ │ ├── clustering.py
│ │ ├── compression.py
│ │ ├── consolidator.py
│ │ ├── decay.py
│ │ ├── forgetting.py
│ │ ├── health.py
│ │ └── scheduler.py
│ ├── dependency_check.py
│ ├── discovery
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── mdns_service.py
│ ├── embeddings
│ │ ├── __init__.py
│ │ └── onnx_embeddings.py
│ ├── ingestion
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── chunker.py
│ │ ├── csv_loader.py
│ │ ├── json_loader.py
│ │ ├── pdf_loader.py
│ │ ├── registry.py
│ │ ├── semtools_loader.py
│ │ └── text_loader.py
│ ├── lm_studio_compat.py
│ ├── mcp_server.py
│ ├── models
│ │ ├── __init__.py
│ │ └── memory.py
│ ├── server.py
│ ├── services
│ │ ├── __init__.py
│ │ └── memory_service.py
│ ├── storage
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cloudflare.py
│ │ ├── factory.py
│ │ ├── http_client.py
│ │ ├── hybrid.py
│ │ └── sqlite_vec.py
│ ├── sync
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── importer.py
│ │ └── litestream_config.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cache_manager.py
│ │ ├── content_splitter.py
│ │ ├── db_utils.py
│ │ ├── debug.py
│ │ ├── document_processing.py
│ │ ├── gpu_detection.py
│ │ ├── hashing.py
│ │ ├── http_server_manager.py
│ │ ├── port_detection.py
│ │ ├── system_detection.py
│ │ └── time_parser.py
│ └── web
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── analytics.py
│ │ ├── backup.py
│ │ ├── consolidation.py
│ │ ├── documents.py
│ │ ├── events.py
│ │ ├── health.py
│ │ ├── manage.py
│ │ ├── mcp.py
│ │ ├── memories.py
│ │ ├── search.py
│ │ └── sync.py
│ ├── app.py
│ ├── dependencies.py
│ ├── oauth
│ │ ├── __init__.py
│ │ ├── authorization.py
│ │ ├── discovery.py
│ │ ├── middleware.py
│ │ ├── models.py
│ │ ├── registration.py
│ │ └── storage.py
│ ├── sse.py
│ └── static
│ ├── app.js
│ ├── index.html
│ ├── README.md
│ ├── sse_test.html
│ └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── test_compact_types.py
│ │ └── test_operations.py
│ ├── bridge
│ │ ├── mock_responses.js
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ └── test_http_mcp_bridge.js
│ ├── conftest.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_associations.py
│ │ ├── test_clustering.py
│ │ ├── test_compression.py
│ │ ├── test_consolidator.py
│ │ ├── test_decay.py
│ │ └── test_forgetting.py
│ ├── contracts
│ │ └── api-specification.yml
│ ├── integration
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ ├── test_api_key_fallback.py
│ │ ├── test_api_memories_chronological.py
│ │ ├── test_api_tag_time_search.py
│ │ ├── test_api_with_memory_service.py
│ │ ├── test_bridge_integration.js
│ │ ├── test_cli_interfaces.py
│ │ ├── test_cloudflare_connection.py
│ │ ├── test_concurrent_clients.py
│ │ ├── test_data_serialization_consistency.py
│ │ ├── test_http_server_startup.py
│ │ ├── test_mcp_memory.py
│ │ ├── test_mdns_integration.py
│ │ ├── test_oauth_basic_auth.py
│ │ ├── test_oauth_flow.py
│ │ ├── test_server_handlers.py
│ │ └── test_store_memory.py
│ ├── performance
│ │ ├── test_background_sync.py
│ │ └── test_hybrid_live.py
│ ├── README.md
│ ├── smithery
│ │ └── test_smithery.py
│ ├── sqlite
│ │ └── simple_sqlite_vec_test.py
│ ├── test_client.py
│ ├── test_content_splitting.py
│ ├── test_database.py
│ ├── test_hybrid_cloudflare_limits.py
│ ├── test_hybrid_storage.py
│ ├── test_memory_ops.py
│ ├── test_semantic_search.py
│ ├── test_sqlite_vec_storage.py
│ ├── test_time_parser.py
│ ├── test_timestamp_preservation.py
│ ├── timestamp
│ │ ├── test_hook_vs_manual_storage.py
│ │ ├── test_issue99_final_validation.py
│ │ ├── test_search_retrieval_inconsistency.py
│ │ ├── test_timestamp_issue.py
│ │ └── test_timestamp_simple.py
│ └── unit
│ ├── conftest.py
│ ├── test_cloudflare_storage.py
│ ├── test_csv_loader.py
│ ├── test_fastapi_dependencies.py
│ ├── test_import.py
│ ├── test_json_loader.py
│ ├── test_mdns_simple.py
│ ├── test_mdns.py
│ ├── test_memory_service.py
│ ├── test_memory.py
│ ├── test_semtools_loader.py
│ ├── test_storage_interface_compatibility.py
│ └── test_tag_time_filtering.py
├── tools
│ ├── docker
│ │ ├── DEPRECATED.md
│ │ ├── docker-compose.http.yml
│ │ ├── docker-compose.pythonpath.yml
│ │ ├── docker-compose.standalone.yml
│ │ ├── docker-compose.uv.yml
│ │ ├── docker-compose.yml
│ │ ├── docker-entrypoint-persistent.sh
│ │ ├── docker-entrypoint-unified.sh
│ │ ├── docker-entrypoint.sh
│ │ ├── Dockerfile
│ │ ├── Dockerfile.glama
│ │ ├── Dockerfile.slim
│ │ ├── README.md
│ │ └── test-docker-modes.sh
│ └── README.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/docs/troubleshooting/session-end-hooks.md:
--------------------------------------------------------------------------------
```markdown
1 | # SessionEnd Hook Troubleshooting Guide
2 |
3 | ## Overview
4 |
5 | SessionEnd hooks automatically consolidate conversation outcomes when you exit Claude Code. However, many users are confused about **when these hooks actually fire** and why memories might not be created as expected.
6 |
7 | This guide clarifies the session lifecycle and common troubleshooting scenarios.
8 |
9 | ---
10 |
11 | ## Critical Concept: Session Lifecycle
12 |
13 | Claude Code distinguishes between **session pause/suspend** and **session termination**:
14 |
15 | | User Action | Session State | Hook Triggered | Memory Created? |
16 | |-------------|---------------|----------------|-----------------|
17 | | **Ctrl+C (once)** | Interrupt input | None | ❌ No |
18 | | **Ctrl+C (twice)** | Suspend session | None | ❌ No |
19 | | **Resume session** | Continue existing | `SessionStart:resume` | ❌ No (loads existing) |
20 | | **`/exit` command** | Terminate | `SessionEnd` | ✅ **Yes** |
21 | | **Close terminal** | Terminate | `SessionEnd` | ✅ **Yes** |
22 | | **Kill process** | May terminate | `SessionEnd` (if graceful) | ⚠️ Maybe |
23 |
24 | ### Key Takeaway
25 |
26 | **Ctrl+C does NOT trigger SessionEnd hooks.** It suspends the session, which you can later resume. Only actual session termination (e.g., `/exit`) triggers SessionEnd.
27 |
28 | ---
29 |
30 | ## Common Issue: "My Session Didn't Create a Memory"
31 |
32 | ### Symptom
33 |
34 | You exited Claude Code with Ctrl+C (twice), resumed later, and noticed no `session-consolidation` memory was created for your previous session.
35 |
36 | ### Root Cause
37 |
38 | **Ctrl+C suspends the session rather than ending it.** When you resume with `SessionStart:resume`, the session continues from where you left off - no SessionEnd hook fires.
39 |
40 | ### Evidence
41 |
42 | When you resume a session, you'll see:
43 | ```
44 | SessionStart:resume hook success
45 | ```
46 |
47 | This confirms you **resumed** an existing session, not started a new one.
48 |
49 | ### Solution
50 |
51 | **Always use `/exit` to properly terminate sessions** if you want SessionEnd memories created:
52 |
53 | ```bash
54 | # In Claude Code prompt:
55 | /exit
56 | ```
57 |
58 | This triggers graceful shutdown and SessionEnd hook execution.
59 |
60 | ---
61 |
62 | ## Common Issue: Connection Failures (SessionEnd & SessionStart)
63 |
64 | > **Note**: This issue affects both SessionEnd and SessionStart hooks, but with different symptoms:
65 | > - **SessionEnd**: Hard failure - cannot store session memory
66 | > - **SessionStart**: Soft failure - falls back to MCP tools, shows "No relevant memories found"
67 | >
68 | > See [hooks-quick-reference.md](hooks-quick-reference.md#sessionstart-hook-issues) for detailed SessionStart troubleshooting.
69 |
70 | ### Symptom (SessionEnd)
71 |
72 | During SessionStart, you see:
73 | ```
74 | ⚠️ Memory Connection → Failed to connect using any available protocol
75 | 💾 Storage → 💾 Unknown Storage (http://127.0.0.1:8000)
76 | ```
77 |
78 | ### Symptom (SessionStart)
79 |
80 | Multiple "MCP Fallback" messages and no memories loaded:
81 | ```
82 | ↩️ MCP Fallback → Using standard MCP tools
83 | ↩️ MCP Fallback → Using standard MCP tools
84 | ↩️ MCP Fallback → Using standard MCP tools
85 | 📭 Memory Search → No relevant memories found
86 | ```
87 |
88 | ### Root Cause
89 |
90 | **HTTP/HTTPS protocol mismatch** between hook configuration and memory service.
91 |
92 | **Example**:
93 | - **Server running**: `https://localhost:8000` (HTTPS)
94 | - **Hook configured**: `http://127.0.0.1:8000` (HTTP)
95 |
96 | ### Diagnosis
97 |
98 | Check your server protocol:
99 | ```bash
100 | # Check server status
101 | systemctl --user status mcp-memory-http.service
102 | # Look for: "Uvicorn running on https://0.0.0.0:8000" or "http://..."
103 |
104 | # Or test connection
105 | curl -sk "https://localhost:8000/api/health" # HTTPS
106 | curl -s "http://127.0.0.1:8000/api/health" # HTTP
107 | ```
108 |
109 | Check your hook configuration:
110 | ```bash
111 | grep endpoint ~/.claude/hooks/config.json
112 | # Should show: "endpoint": "https://localhost:8000"
113 | ```
114 |
115 | ### Solution
116 |
117 | Update `~/.claude/hooks/config.json` to match server protocol:
118 |
119 | ```json
120 | {
121 | "memoryService": {
122 | "http": {
123 | "endpoint": "https://localhost:8000", // Match your server
124 | "apiKey": "your-api-key-here"
125 | }
126 | }
127 | }
128 | ```
129 |
130 | **No restart required** - hooks reload config on next execution.
131 |
132 | ---
133 |
134 | ## SessionEnd Requirements
135 |
136 | Even if SessionEnd fires correctly, memory creation requires:
137 |
138 | ### 1. Minimum Session Length
139 | - Default: **100+ characters** total conversation
140 | - Configurable: `sessionAnalysis.minSessionLength` in `config.json`
141 | - Reason: Prevents noise from trivial sessions
142 |
143 | ### 2. Minimum Confidence Score
144 | - Default: **> 0.1** (10% confidence)
145 | - Based on conversation analysis quality
146 | - Low confidence = session too generic to extract insights
147 |
148 | ### 3. Session Consolidation Enabled
149 | ```json
150 | {
151 | "memoryService": {
152 | "enableSessionConsolidation": true // Must be true
153 | }
154 | }
155 | ```
156 |
157 | ### What Gets Extracted
158 |
159 | SessionEnd analyzes your conversation to extract:
160 |
161 | - **Topics**: Keywords like "implementation", "debugging", "architecture", "performance"
162 | - **Decisions**: Phrases like "decided to", "will use", "chose to", "going with"
163 | - **Insights**: Phrases like "learned that", "discovered", "realized"
164 | - **Code Changes**: Phrases like "implemented", "created", "refactored"
165 | - **Next Steps**: Phrases like "next we need", "TODO", "remaining"
166 |
167 | If conversation lacks these patterns, confidence will be low and memory won't be created.
168 |
169 | ---
170 |
171 | ## Verification & Debugging
172 |
173 | ### 1. Check Recent Session Memories
174 |
175 | ```bash
176 | # Search for recent session consolidation memories
177 | curl -sk "https://localhost:8000/api/search/by-tag" \
178 | -H "Content-Type: application/json" \
179 | -d '{"tags": ["session-consolidation"], "limit": 5}' | \
180 | python -m json.tool | grep created_at_iso
181 | ```
182 |
183 | Look for recent timestamps (today/yesterday).
184 |
185 | ### 2. Test SessionEnd Hook Manually
186 |
187 | ```bash
188 | # Run hook with test conversation
189 | node ~/.claude/hooks/core/session-end.js
190 | ```
191 |
192 | Check output for:
193 | - `[Memory Hook] Session ending - consolidating outcomes...`
194 | - `[Memory Hook] Session analysis: X topics, Y decisions, confidence: Z%`
195 | - `[Memory Hook] Session consolidation stored successfully`
196 |
197 | ### 3. Verify Connection
198 |
199 | ```bash
200 | # Test server health
201 | curl -sk "https://localhost:8000/api/health"
202 |
203 | # Check config matches
204 | grep endpoint ~/.claude/hooks/config.json
205 | ```
206 |
207 | ### 4. Check SessionEnd Configuration
208 |
209 | ```bash
210 | # Verify SessionEnd hook is configured
211 | grep -A 10 "SessionEnd" ~/.claude/settings.json
212 |
213 | # Should show:
214 | # "SessionEnd": [
215 | # {
216 | # "hooks": [
217 | # {
218 | # "type": "command",
219 | # "command": "node \"/home/user/.claude/hooks/core/session-end.js\"",
220 | # "timeout": 15
221 | # }
222 | # ]
223 | # }
224 | # ]
225 | ```
226 |
227 | ---
228 |
229 | ## Quick Diagnosis Checklist
230 |
231 | Use this checklist when SessionEnd memories aren't being created:
232 |
233 | - [ ] **Did I use `/exit`** or just Ctrl+C?
234 | - **Fix**: Use `/exit` command for proper termination
235 |
236 | - [ ] **Does `config.json` endpoint match server protocol?**
237 | - **Check**: HTTP vs HTTPS in both config and server
238 | - **Fix**: Update endpoint in `~/.claude/hooks/config.json`
239 |
240 | - [ ] **Is the memory service running?**
241 | - **Check**: `curl https://localhost:8000/api/health`
242 | - **Fix**: Start server with `systemctl --user start mcp-memory-http.service`
243 |
244 | - [ ] **Was conversation meaningful?**
245 | - **Check**: Total length > 100 characters
246 | - **Fix**: Have longer conversations with decisions/insights
247 |
248 | - [ ] **Is session consolidation enabled?**
249 | - **Check**: `enableSessionConsolidation: true` in config
250 | - **Fix**: Update `~/.claude/hooks/config.json`
251 |
252 | - [ ] **Is SessionEnd hook installed?**
253 | - **Check**: `grep SessionEnd ~/.claude/settings.json`
254 | - **Fix**: Run `cd claude-hooks && python install_hooks.py --all`
255 |
256 | ---
257 |
258 | ## Best Practices
259 |
260 | ### For Reliable Memory Consolidation
261 |
262 | 1. **Always use `/exit`** when you want session memories created
263 | 2. **Avoid Ctrl+C for final exit** - Use it only for interrupts/corrections
264 | 3. **Have meaningful conversations** - Include decisions, insights, plans
265 | 4. **Verify endpoint configuration** - HTTP vs HTTPS must match
266 | 5. **Check session memories periodically** - Confirm system is working
267 |
268 | ### For Debugging
269 |
270 | 1. **Check recent memories** - Look for session-consolidation tag
271 | 2. **Test hook manually** - Run `session-end.js` directly
272 | 3. **Verify connection** - Test health endpoint
273 | 4. **Read hook logs** - Look for error messages in terminal
274 | 5. **Consult session requirements** - Length, confidence, enabled settings
275 |
276 | ---
277 |
278 | ## Technical Details
279 |
280 | ### SessionEnd Hook Implementation
281 |
282 | **File**: `~/.claude/hooks/core/session-end.js`
283 |
284 | **Key Code Sections**:
285 | - **Lines 298-365**: Main `onSessionEnd()` function
286 | - **Line 316**: Minimum session length check (100 chars)
287 | - **Line 329**: Minimum confidence check (0.1)
288 | - **Line 305**: Session consolidation enabled check
289 | - **Lines 213-293**: `storeSessionMemory()` - HTTP API call
290 |
291 | ### Configuration Structure
292 |
293 | **File**: `~/.claude/hooks/config.json`
294 |
295 | ```json
296 | {
297 | "memoryService": {
298 | "protocol": "auto",
299 | "preferredProtocol": "http",
300 | "http": {
301 | "endpoint": "https://localhost:8000", // Must match server
302 | "apiKey": "your-api-key",
303 | "healthCheckTimeout": 3000
304 | },
305 | "enableSessionConsolidation": true
306 | },
307 | "sessionAnalysis": {
308 | "extractTopics": true,
309 | "extractDecisions": true,
310 | "extractInsights": true,
311 | "extractCodeChanges": true,
312 | "extractNextSteps": true,
313 | "minSessionLength": 100,
314 | "minConfidence": 0.1
315 | }
316 | }
317 | ```
318 |
319 | ### Hook Settings
320 |
321 | **File**: `~/.claude/settings.json`
322 |
323 | ```json
324 | {
325 | "hooks": {
326 | "SessionEnd": [
327 | {
328 | "hooks": [
329 | {
330 | "type": "command",
331 | "command": "node \"/home/user/.claude/hooks/core/session-end.js\"",
332 | "timeout": 15 // 15 seconds (vs 10s for SessionStart)
333 | }
334 | ]
335 | }
336 | ]
337 | }
338 | }
339 | ```
340 |
341 | ---
342 |
343 | ## Related Documentation
344 |
345 | - **Hook Installation**: `claude-hooks/README.md`
346 | - **Configuration Guide**: `claude-hooks/CONFIGURATION.md`
347 | - **HTTP Server Management**: `docs/http-server-management.md`
348 | - **General Troubleshooting**: `docs/troubleshooting/general.md`
349 | - **SessionStart Windows Bug**: `claude-hooks/WINDOWS-SESSIONSTART-BUG.md`
350 |
351 | ---
352 |
353 | ## Common Questions
354 |
355 | ### Q: Why didn't my session create a memory even though I used `/exit`?
356 |
357 | **A**: Check these conditions:
358 | 1. Conversation was too short (< 100 chars)
359 | 2. Conversation lacked decision/insight patterns (low confidence)
360 | 3. Connection to memory service failed (check endpoint)
361 | 4. Session consolidation disabled in config
362 |
363 | ### Q: Does Ctrl+C ever trigger SessionEnd?
364 |
365 | **A**: No. Ctrl+C sends SIGINT which interrupts/suspends but doesn't terminate the session. Use `/exit` for proper termination.
366 |
367 | ### Q: Can I test if SessionEnd will work before exiting?
368 |
369 | **A**: Yes:
370 | ```bash
371 | node ~/.claude/hooks/core/session-end.js
372 | ```
373 |
374 | This runs the hook with a test conversation and shows what would happen.
375 |
376 | ### Q: How do I see all my session consolidation memories?
377 |
378 | **A**:
379 | ```bash
380 | curl -sk "https://localhost:8000/api/search/by-tag" \
381 | -H "Content-Type: application/json" \
382 | -d '{"tags": ["session-consolidation"]}' | \
383 | python -m json.tool
384 | ```
385 |
386 | ### Q: What's the difference between SessionStart and SessionEnd hooks?
387 |
388 | **A**:
389 | - **SessionStart**: Loads and injects memory context at session start
390 | - **SessionEnd**: Analyzes and stores session outcomes at session end
391 | - Both can have connection issues (check endpoint configuration)
392 | - SessionStart has timeout issues on Windows (Ctrl+C hang bug)
393 |
394 | ---
395 |
396 | **Last Updated**: 2025-11-01
397 | **Applies to**: v8.15.1+
398 | **Author**: Community Documentation
399 |
```
--------------------------------------------------------------------------------
/tests/integration/test_api_memories_chronological.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Test chronological ordering and pagination for the /api/memories endpoint.
3 |
4 | Tests verify that the GitHub issue #79 has been properly resolved by ensuring:
5 | 1. Memories are returned in chronological order (newest first)
6 | 2. Pagination works correctly with chronological ordering
7 | 3. All storage backends support the new ordering
8 | """
9 |
10 | import pytest
11 | import asyncio
12 | import time
13 | import tempfile
14 | from datetime import datetime, timedelta
15 | from typing import List, Dict, Any
16 | import os
17 |
18 | # Import project modules
19 | # Note: This assumes the project is installed in editable mode with `pip install -e .`
20 | # or PYTHONPATH is configured correctly for the test environment
21 | from mcp_memory_service.models.memory import Memory
22 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
23 |
24 |
25 | class TestChronologicalOrdering:
26 | """Test chronological ordering functionality across all storage backends."""
27 |
28 | async def create_test_memories(self, storage) -> List[Memory]:
29 | """Create test memories with different timestamps."""
30 | memories = []
31 | base_time = time.time() - 3600 # Start 1 hour ago
32 |
33 | # Create 5 test memories with 10-minute intervals
34 | for i in range(5):
35 | timestamp = base_time + (i * 600) # 10-minute intervals
36 | memory = Memory(
37 | content=f"Test memory {i + 1}",
38 | content_hash=f"hash_{i + 1}",
39 | tags=[f"tag{i + 1}", "test"],
40 | memory_type="test",
41 | metadata={"index": i + 1},
42 | created_at=timestamp,
43 | updated_at=timestamp
44 | )
45 | memories.append(memory)
46 |
47 | success, message = await storage.store(memory)
48 | assert success, f"Failed to store memory {i + 1}: {message}"
49 |
50 | return memories
51 |
52 | @pytest.mark.asyncio
53 | async def test_get_all_memories_chronological_order_sqlite(self):
54 | """Test that get_all_memories returns memories in chronological order (SQLite)."""
55 | with tempfile.TemporaryDirectory() as tmp_dir:
56 | storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
57 | await storage.initialize()
58 |
59 | # Create test memories
60 | original_memories = await self.create_test_memories(storage)
61 |
62 | # Get all memories
63 | retrieved_memories = await storage.get_all_memories()
64 |
65 | # Verify we got all memories
66 | assert len(retrieved_memories) == 5, f"Expected 5 memories, got {len(retrieved_memories)}"
67 |
68 | # Verify chronological order (newest first)
69 | for i in range(len(retrieved_memories) - 1):
70 | current_time = retrieved_memories[i].created_at or 0
71 | next_time = retrieved_memories[i + 1].created_at or 0
72 | assert current_time >= next_time, f"Memory at index {i} is older than memory at index {i + 1}"
73 |
74 | # Verify the actual order matches expectations (newest first)
75 | expected_order = [5, 4, 3, 2, 1] # Newest to oldest
76 | actual_order = [int(mem.content.split()[-1]) for mem in retrieved_memories]
77 | assert actual_order == expected_order, f"Expected order {expected_order}, got {actual_order}"
78 |
79 | @pytest.mark.asyncio
80 | async def test_pagination_with_chronological_order_sqlite(self):
81 | """Test pagination maintains chronological order (SQLite)."""
82 | with tempfile.TemporaryDirectory() as tmp_dir:
83 | storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
84 | await storage.initialize()
85 |
86 | # Create test memories
87 | await self.create_test_memories(storage)
88 |
89 | # Test pagination: Get first 2 memories
90 | first_page = await storage.get_all_memories(limit=2, offset=0)
91 | assert len(first_page) == 2
92 |
93 | # Test pagination: Get next 2 memories
94 | second_page = await storage.get_all_memories(limit=2, offset=2)
95 | assert len(second_page) == 2
96 |
97 | # Test pagination: Get last memory
98 | third_page = await storage.get_all_memories(limit=2, offset=4)
99 | assert len(third_page) == 1
100 |
101 | # Verify chronological order across pages
102 | all_paginated = first_page + second_page + third_page
103 |
104 | # Should be in chronological order (newest first)
105 | for i in range(len(all_paginated) - 1):
106 | current_time = all_paginated[i].created_at or 0
107 | next_time = all_paginated[i + 1].created_at or 0
108 | assert current_time >= next_time, f"Pagination broke chronological order at position {i}"
109 |
110 | # Verify content order
111 | expected_content_order = ["Test memory 5", "Test memory 4", "Test memory 3", "Test memory 2", "Test memory 1"]
112 | actual_content_order = [mem.content for mem in all_paginated]
113 | assert actual_content_order == expected_content_order
114 |
115 | @pytest.mark.asyncio
116 | async def test_count_all_memories_sqlite(self):
117 | """Test count_all_memories method (SQLite)."""
118 | with tempfile.TemporaryDirectory() as tmp_dir:
119 | storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
120 | await storage.initialize()
121 |
122 | # Initially should be empty
123 | initial_count = await storage.count_all_memories()
124 | assert initial_count == 0
125 |
126 | # Create test memories
127 | await self.create_test_memories(storage)
128 |
129 | # Should now have 5 memories
130 | final_count = await storage.count_all_memories()
131 | assert final_count == 5
132 |
133 | @pytest.mark.asyncio
134 | async def test_empty_storage_handling_sqlite(self):
135 | """Test handling of empty storage (SQLite)."""
136 | with tempfile.TemporaryDirectory() as tmp_dir:
137 | storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
138 | await storage.initialize()
139 |
140 | # Test get_all_memories on empty storage
141 | memories = await storage.get_all_memories()
142 | assert memories == []
143 |
144 | # Test with pagination on empty storage
145 | paginated = await storage.get_all_memories(limit=10, offset=0)
146 | assert paginated == []
147 |
148 | # Test count on empty storage
149 | count = await storage.count_all_memories()
150 | assert count == 0
151 |
152 | @pytest.mark.asyncio
153 | async def test_offset_beyond_total_sqlite(self):
154 | """Test offset beyond total records (SQLite)."""
155 | with tempfile.TemporaryDirectory() as tmp_dir:
156 | storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
157 | await storage.initialize()
158 |
159 | # Create test memories
160 | await self.create_test_memories(storage)
161 |
162 | # Test offset beyond total records
163 | memories = await storage.get_all_memories(limit=10, offset=100)
164 | assert memories == []
165 |
166 | @pytest.mark.asyncio
167 | async def test_large_limit_sqlite(self):
168 | """Test large limit parameter (SQLite)."""
169 | with tempfile.TemporaryDirectory() as tmp_dir:
170 | storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
171 | await storage.initialize()
172 |
173 | # Create test memories
174 | await self.create_test_memories(storage)
175 |
176 | # Test limit larger than total records
177 | memories = await storage.get_all_memories(limit=100, offset=0)
178 | assert len(memories) == 5 # Should return all 5 memories
179 |
180 | @pytest.mark.asyncio
181 | async def test_mixed_timestamps_ordering_sqlite(self):
182 | """Test ordering with mixed/unsorted timestamps (SQLite)."""
183 | with tempfile.TemporaryDirectory() as tmp_dir:
184 | storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
185 | await storage.initialize()
186 |
187 | # Create memories with deliberately mixed timestamps
188 | base_time = time.time()
189 | timestamps = [base_time + 300, base_time + 100, base_time + 500, base_time + 200, base_time + 400]
190 |
191 | for i, timestamp in enumerate(timestamps):
192 | memory = Memory(
193 | content=f"Mixed memory {i + 1}",
194 | content_hash=f"mixed_hash_{i + 1}",
195 | tags=["mixed", "test"],
196 | memory_type="mixed",
197 | metadata={"timestamp": timestamp},
198 | created_at=timestamp,
199 | updated_at=timestamp
200 | )
201 |
202 | success, message = await storage.store(memory)
203 | assert success, f"Failed to store mixed memory {i + 1}: {message}"
204 |
205 | # Retrieve all memories
206 | memories = await storage.get_all_memories()
207 |
208 | # Should be ordered by timestamp (newest first)
209 | expected_order = [base_time + 500, base_time + 400, base_time + 300, base_time + 200, base_time + 100]
210 | actual_timestamps = [mem.created_at for mem in memories]
211 |
212 | assert actual_timestamps == expected_order, f"Expected {expected_order}, got {actual_timestamps}"
213 |
214 |
215 | class TestAPIChronologicalIntegration:
216 | """Integration tests that would test the actual API endpoints.
217 |
218 | These tests are structured to be easily adaptable for testing the actual
219 | FastAPI endpoints when a test client is available.
220 | """
221 |
222 | def test_api_endpoint_structure(self):
223 | """Test that the API endpoint imports and structure are correct."""
224 | # Import the API router to ensure it loads correctly
225 | from mcp_memory_service.web.api.memories import router
226 |
227 | # Verify the router exists and has the expected endpoints
228 | routes = [route.path for route in router.routes]
229 | assert "/memories" in routes
230 | assert "/memories/{content_hash}" in routes
231 |
232 | def test_memory_response_model(self):
233 | """Test that the response models include necessary fields for chronological ordering."""
234 | from mcp_memory_service.web.api.memories import MemoryResponse, MemoryListResponse
235 |
236 | # Verify MemoryResponse has timestamp fields
237 | response_fields = MemoryResponse.__fields__.keys()
238 | assert "created_at" in response_fields
239 | assert "created_at_iso" in response_fields
240 | assert "updated_at" in response_fields
241 | assert "updated_at_iso" in response_fields
242 |
243 | # Verify MemoryListResponse has pagination fields
244 | list_fields = MemoryListResponse.__fields__.keys()
245 | assert "memories" in list_fields
246 | assert "total" in list_fields
247 | assert "page" in list_fields
248 | assert "page_size" in list_fields
249 | assert "has_more" in list_fields
250 |
251 | def test_storage_backend_type_compatibility(self):
252 | """Test that the API endpoints use the correct base storage type."""
253 | from mcp_memory_service.web.api.memories import list_memories
254 | import inspect
255 |
256 | # Get the signature of the list_memories function
257 | sig = inspect.signature(list_memories)
258 | storage_param = sig.parameters['storage']
259 |
260 | # Check that it uses the base MemoryStorage type, not a specific implementation
261 | assert 'MemoryStorage' in str(storage_param.annotation)
262 |
263 |
264 | if __name__ == "__main__":
265 | # Run tests directly
266 | pytest.main([__file__, "-v"])
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/discovery/client.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | Discovery client for MCP Memory Service.
17 |
18 | This module provides a high-level client for discovering and connecting to
19 | MCP Memory Service instances on the local network.
20 | """
21 |
22 | import asyncio
23 | import logging
24 | import aiohttp
25 | from typing import List, Optional, Dict, Any
26 | from dataclasses import dataclass
27 |
28 | from .mdns_service import ServiceDiscovery, ServiceDetails
29 | from ..config import MDNS_DISCOVERY_TIMEOUT
30 |
31 | logger = logging.getLogger(__name__)
32 |
33 |
34 | @dataclass
35 | class HealthStatus:
36 | """Health status of a discovered service."""
37 | healthy: bool
38 | status: str
39 | backend: str
40 | statistics: Dict[str, Any]
41 | response_time_ms: float
42 | error: Optional[str] = None
43 |
44 |
45 | class DiscoveryClient:
46 | """High-level client for discovering and validating MCP Memory Services."""
47 |
48 | def __init__(self, discovery_timeout: int = MDNS_DISCOVERY_TIMEOUT):
49 | self.discovery_timeout = discovery_timeout
50 | self._discovery = ServiceDiscovery(discovery_timeout=discovery_timeout)
51 |
52 | async def find_best_service(
53 | self,
54 | prefer_https: bool = True,
55 | require_auth: Optional[bool] = None,
56 | validate_health: bool = True
57 | ) -> Optional[ServiceDetails]:
58 | """
59 | Find the best MCP Memory Service on the network.
60 |
61 | Args:
62 | prefer_https: Prefer HTTPS services over HTTP
63 | require_auth: Require (True) or reject (False) services with auth, None for any
64 | validate_health: Validate service health before returning
65 |
66 | Returns:
67 | Best service found, or None if no suitable service
68 | """
69 | services = await self.discover_services()
70 | if not services:
71 | logger.info("No MCP Memory Services found on the network")
72 | return None
73 |
74 | # Filter services based on requirements
75 | filtered_services = []
76 | for service in services:
77 | # Check auth requirement
78 | if require_auth is not None and service.requires_auth != require_auth:
79 | continue
80 |
81 | filtered_services.append(service)
82 |
83 | if not filtered_services:
84 | logger.info("No services match the specified requirements")
85 | return None
86 |
87 | # Sort services by preference (HTTPS first if preferred)
88 | def service_priority(service: ServiceDetails) -> tuple:
89 | https_score = 1 if service.https else 0
90 | if not prefer_https:
91 | https_score = 1 - https_score # Invert preference
92 |
93 | return (https_score, service.port) # Secondary sort by port for consistency
94 |
95 | filtered_services.sort(key=service_priority, reverse=True)
96 |
97 | # Validate health if requested
98 | if validate_health:
99 | for service in filtered_services:
100 | health = await self.check_service_health(service)
101 | if health and health.healthy:
102 | logger.info(f"Selected healthy service: {service.name} at {service.url}")
103 | return service
104 | else:
105 | logger.warning(f"Service {service.name} failed health check: {health.error if health else 'Unknown error'}")
106 |
107 | logger.warning("No healthy services found")
108 | return None
109 | else:
110 | # Return first service without health validation
111 | best_service = filtered_services[0]
112 | logger.info(f"Selected service: {best_service.name} at {best_service.url}")
113 | return best_service
114 |
115 | async def discover_services(self) -> List[ServiceDetails]:
116 | """Discover all MCP Memory Services on the network."""
117 | logger.info("Discovering MCP Memory Services on the network...")
118 | services = await self._discovery.discover_services()
119 |
120 | if services:
121 | logger.info(f"Found {len(services)} MCP Memory Services:")
122 | for service in services:
123 | logger.info(f" - {service.name} at {service.url} (Auth: {service.requires_auth})")
124 | else:
125 | logger.info("No MCP Memory Services found")
126 |
127 | return services
128 |
129 | async def check_service_health(
130 | self,
131 | service: ServiceDetails,
132 | timeout: float = 5.0
133 | ) -> Optional[HealthStatus]:
134 | """
135 | Check the health of a discovered service.
136 |
137 | Args:
138 | service: Service to check
139 | timeout: Request timeout in seconds
140 |
141 | Returns:
142 | HealthStatus if check succeeded, None if failed
143 | """
144 | health_url = f"{service.api_url}/health"
145 |
146 | try:
147 | import time
148 | start_time = time.time()
149 |
150 | timeout_config = aiohttp.ClientTimeout(total=timeout)
151 | connector = aiohttp.TCPConnector(verify_ssl=False) # Allow self-signed certs
152 |
153 | async with aiohttp.ClientSession(
154 | timeout=timeout_config,
155 | connector=connector
156 | ) as session:
157 | async with session.get(health_url) as response:
158 | response_time = (time.time() - start_time) * 1000 # Convert to ms
159 |
160 | if response.status == 200:
161 | data = await response.json()
162 | return HealthStatus(
163 | healthy=True,
164 | status=data.get('status', 'unknown'),
165 | backend=data.get('storage_type', 'unknown'),
166 | statistics=data.get('statistics', {}),
167 | response_time_ms=response_time
168 | )
169 | else:
170 | return HealthStatus(
171 | healthy=False,
172 | status='error',
173 | backend='unknown',
174 | statistics={},
175 | response_time_ms=response_time,
176 | error=f"HTTP {response.status}"
177 | )
178 |
179 | except asyncio.TimeoutError:
180 | return HealthStatus(
181 | healthy=False,
182 | status='timeout',
183 | backend='unknown',
184 | statistics={},
185 | response_time_ms=timeout * 1000,
186 | error="Request timeout"
187 | )
188 | except Exception as e:
189 | return HealthStatus(
190 | healthy=False,
191 | status='error',
192 | backend='unknown',
193 | statistics={},
194 | response_time_ms=0,
195 | error=str(e)
196 | )
197 |
198 | async def get_service_capabilities(
199 | self,
200 | service: ServiceDetails,
201 | api_key: Optional[str] = None,
202 | timeout: float = 5.0
203 | ) -> Optional[Dict[str, Any]]:
204 | """
205 | Get detailed capabilities of a service.
206 |
207 | Args:
208 | service: Service to query
209 | api_key: API key if required
210 | timeout: Request timeout
211 |
212 | Returns:
213 | Service capabilities or None if failed
214 | """
215 | docs_url = f"{service.api_url}/docs"
216 |
217 | try:
218 | headers = {}
219 | if api_key and service.requires_auth:
220 | headers['Authorization'] = f'Bearer {api_key}'
221 |
222 | timeout_config = aiohttp.ClientTimeout(total=timeout)
223 | connector = aiohttp.TCPConnector(verify_ssl=False)
224 |
225 | async with aiohttp.ClientSession(
226 | timeout=timeout_config,
227 | connector=connector
228 | ) as session:
229 | # Try to get OpenAPI spec
230 | openapi_url = f"{service.api_url}/openapi.json"
231 | async with session.get(openapi_url, headers=headers) as response:
232 | if response.status == 200:
233 | return await response.json()
234 |
235 | except Exception as e:
236 | logger.error(f"Failed to get service capabilities: {e}")
237 |
238 | return None
239 |
240 | async def find_services_with_health(
241 | self,
242 | prefer_https: bool = True,
243 | require_auth: Optional[bool] = None
244 | ) -> List[tuple[ServiceDetails, HealthStatus]]:
245 | """
246 | Find all services and their health status.
247 |
248 | Returns:
249 | List of (service, health_status) tuples, sorted by preference
250 | """
251 | services = await self.discover_services()
252 | if not services:
253 | return []
254 |
255 | # Filter by auth requirement
256 | if require_auth is not None:
257 | services = [s for s in services if s.requires_auth == require_auth]
258 |
259 | # Check health for all services concurrently
260 | health_tasks = [self.check_service_health(service) for service in services]
261 | health_results = await asyncio.gather(*health_tasks, return_exceptions=True)
262 |
263 | # Combine services with health status
264 | service_health_pairs = []
265 | for service, health_result in zip(services, health_results):
266 | if isinstance(health_result, Exception):
267 | health = HealthStatus(
268 | healthy=False,
269 | status='error',
270 | backend='unknown',
271 | statistics={},
272 | response_time_ms=0,
273 | error=str(health_result)
274 | )
275 | else:
276 | health = health_result or HealthStatus(
277 | healthy=False,
278 | status='unknown',
279 | backend='unknown',
280 | statistics={},
281 | response_time_ms=0,
282 | error="No response"
283 | )
284 |
285 | service_health_pairs.append((service, health))
286 |
287 | # Sort by preference: healthy first, then HTTPS if preferred, then response time
288 | def sort_key(pair: tuple[ServiceDetails, HealthStatus]) -> tuple:
289 | service, health = pair
290 | healthy_score = 1 if health.healthy else 0
291 | https_score = 1 if service.https else 0
292 | if not prefer_https:
293 | https_score = 1 - https_score
294 | response_time = health.response_time_ms if health.healthy else float('inf')
295 |
296 | return (healthy_score, https_score, -response_time) # Negative for ascending order
297 |
298 | service_health_pairs.sort(key=sort_key, reverse=True)
299 | return service_health_pairs
300 |
301 | async def stop(self) -> None:
302 | """Stop the discovery client."""
303 | await self._discovery.stop_discovery()
```
--------------------------------------------------------------------------------
/tests/unit/test_csv_loader.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """
3 | Unit tests for CSV document loader.
4 | """
5 |
6 | import pytest
7 | import asyncio
8 | import csv
9 | import io
10 | from pathlib import Path
11 |
12 | from mcp_memory_service.ingestion.csv_loader import CSVLoader
13 | from mcp_memory_service.ingestion.base import DocumentChunk
14 | from conftest import extract_chunks_from_temp_file
15 |
16 |
17 | class TestCSVLoader:
18 | """Test suite for CSVLoader class."""
19 |
20 | def test_initialization(self):
21 | """Test basic initialization of CSVLoader."""
22 | loader = CSVLoader(chunk_size=500, chunk_overlap=50)
23 |
24 | assert loader.chunk_size == 500
25 | assert loader.chunk_overlap == 50
26 | assert 'csv' in loader.supported_extensions
27 |
28 | def test_can_handle_file(self):
29 | """Test file format detection."""
30 | loader = CSVLoader()
31 |
32 | # Create temporary test files
33 | import tempfile
34 | with tempfile.TemporaryDirectory() as tmpdir:
35 | csv_file = Path(tmpdir) / "test.csv"
36 | csv_file.touch()
37 |
38 | txt_file = Path(tmpdir) / "test.txt"
39 | txt_file.touch()
40 |
41 | # Test supported formats
42 | assert loader.can_handle(csv_file) is True
43 |
44 | # Test unsupported formats
45 | assert loader.can_handle(txt_file) is False
46 |
47 | @pytest.mark.asyncio
48 | async def test_extract_chunks_simple_csv(self):
49 | """Test extraction from simple CSV file."""
50 | loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
51 |
52 | # Create test CSV file
53 | import tempfile
54 | with tempfile.TemporaryDirectory() as tmpdir:
55 | csv_file = Path(tmpdir) / "test.csv"
56 | csv_content = """name,age,city
57 | John,25,New York
58 | Jane,30,San Francisco"""
59 | csv_file.write_text(csv_content)
60 |
61 | chunks = []
62 | async for chunk in loader.extract_chunks(csv_file):
63 | chunks.append(chunk)
64 |
65 | # Verify chunks were created
66 | assert len(chunks) > 0
67 |
68 | # Verify chunk structure
69 | first_chunk = chunks[0]
70 | assert isinstance(first_chunk, DocumentChunk)
71 | assert isinstance(first_chunk.content, str)
72 | assert first_chunk.source_file == csv_file
73 |
74 | # Verify content contains formatted rows
75 | content = first_chunk.content
76 | assert "name: John" in content
77 | assert "age: 25" in content
78 | assert "city: New York" in content
79 | assert "name: Jane" in content
80 | assert "age: 30" in content
81 |
82 | @pytest.mark.asyncio
83 | async def test_extract_chunks_csv_with_headers(self):
84 | """Test extraction from CSV with header detection."""
85 | loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
86 |
87 | # Create test CSV file with headers
88 | csv_content = """product,price,category
89 | Widget,19.99,Electronics
90 | Gadget,29.99,Electronics
91 | Book,12.99,Media"""
92 | chunks = await extract_chunks_from_temp_file(loader, "test.csv", csv_content)
93 |
94 | content = chunks[0].content
95 | assert "product: Widget" in content
96 | assert "price: 19.99" in content
97 | assert "category: Electronics" in content
98 |
99 | @pytest.mark.asyncio
100 | async def test_extract_chunks_csv_no_headers(self):
101 | """Test extraction from CSV without headers."""
102 | loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
103 |
104 | # Create test CSV file without headers
105 | csv_content = """John,25,New York
106 | Jane,30,San Francisco"""
107 | chunks = await extract_chunks_from_temp_file(
108 | loader,
109 | "test.csv",
110 | csv_content,
111 | has_header=False
112 | )
113 |
114 | content = chunks[0].content
115 | # Should use col_1, col_2, col_3 as headers
116 | assert "col_1: John" in content
117 | assert "col_2: 25" in content
118 | assert "col_3: New York" in content
119 |
120 | @pytest.mark.asyncio
121 | async def test_extract_chunks_different_delimiters(self):
122 | """Test extraction with different CSV delimiters."""
123 | loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
124 |
125 | # Test semicolon delimiter
126 | csv_content = "name;age;city\nJohn;25;New York\nJane;30;San Francisco"
127 | chunks = await extract_chunks_from_temp_file(
128 | loader,
129 | "test.csv",
130 | csv_content,
131 | delimiter=';'
132 | )
133 |
134 | content = chunks[0].content
135 | assert "name: John" in content
136 | assert "age: 25" in content
137 |
138 | @pytest.mark.asyncio
139 | async def test_extract_chunks_row_numbers(self):
140 | """Test extraction with row numbers."""
141 | loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
142 |
143 | # Create test CSV file
144 | csv_content = """name,age
145 | John,25
146 | Jane,30"""
147 | chunks = await extract_chunks_from_temp_file(
148 | loader,
149 | "test.csv",
150 | csv_content,
151 | include_row_numbers=True
152 | )
153 |
154 | content = chunks[0].content
155 | assert "Row 1:" in content
156 | assert "Row 2:" in content
157 |
158 | @pytest.mark.asyncio
159 | async def test_extract_chunks_no_row_numbers(self):
160 | """Test extraction without row numbers."""
161 | loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
162 |
163 | # Create test CSV file
164 | csv_content = """name,age
165 | John,25"""
166 | chunks = await extract_chunks_from_temp_file(
167 | loader,
168 | "test.csv",
169 | csv_content,
170 | include_row_numbers=False
171 | )
172 |
173 | content = chunks[0].content
174 | assert "Row:" in content
175 | assert "Row 1:" not in content
176 |
177 | @pytest.mark.asyncio
178 | async def test_extract_chunks_large_file_chunking(self):
179 | """Test that large CSV files are processed correctly."""
180 | loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
181 |
182 | # Create CSV with many rows
183 | import tempfile
184 | with tempfile.TemporaryDirectory() as tmpdir:
185 | csv_file = Path(tmpdir) / "large.csv"
186 | rows = ["name,value"] + [f"item{i},{i}" for i in range(10)]
187 | csv_content = "\n".join(rows)
188 | csv_file.write_text(csv_content)
189 |
190 | # Process the file
191 | chunks = []
192 | async for chunk in loader.extract_chunks(csv_file, max_rows_per_chunk=50):
193 | chunks.append(chunk)
194 |
195 | # Should create at least one chunk
196 | assert len(chunks) >= 1
197 |
198 | # Verify all content is included
199 | all_content = "".join(chunk.content for chunk in chunks)
200 | assert "item0" in all_content
201 | assert "item9" in all_content
202 | assert "name: item0" in all_content
203 | assert "value: 0" in all_content
204 |
205 | @pytest.mark.asyncio
206 | async def test_extract_chunks_empty_file(self):
207 | """Test handling of empty CSV files."""
208 | loader = CSVLoader()
209 |
210 | # Create empty CSV file
211 | import tempfile
212 | with tempfile.TemporaryDirectory() as tmpdir:
213 | csv_file = Path(tmpdir) / "empty.csv"
214 | csv_file.write_text("")
215 |
216 | # Should not raise error but return no chunks
217 | chunks = []
218 | async for chunk in loader.extract_chunks(csv_file):
219 | chunks.append(chunk)
220 |
221 | assert len(chunks) == 0
222 |
223 | @pytest.mark.asyncio
224 | async def test_extract_chunks_malformed_csv(self):
225 | """Test handling of malformed CSV files."""
226 | loader = CSVLoader()
227 |
228 | # Create malformed CSV file
229 | # CSV with inconsistent columns - should still work
230 | csv_content = """name,age,city
231 | John,25
232 | Jane,30,San Francisco,Extra"""
233 | chunks = await extract_chunks_from_temp_file(loader, "malformed.csv", csv_content)
234 |
235 | # Should handle gracefully
236 | assert len(chunks) > 0
237 | content = chunks[0].content
238 | assert "name: John" in content
239 | assert "name: Jane" in content
240 |
241 | @pytest.mark.asyncio
242 | async def test_extract_chunks_encoding_detection(self):
243 | """Test automatic encoding detection."""
244 | loader = CSVLoader()
245 |
246 | # Create CSV file with UTF-8 content
247 | csv_content = """name,city
248 | José,São Paulo
249 | François,Montréal"""
250 | chunks = await extract_chunks_from_temp_file(
251 | loader,
252 | "utf8.csv",
253 | csv_content,
254 | encoding='utf-8'
255 | )
256 |
257 | content = chunks[0].content
258 | assert "José" in content
259 | assert "São Paulo" in content
260 |
261 | @pytest.mark.asyncio
262 | async def test_extract_chunks_metadata(self):
263 | """Test that metadata is properly included."""
264 | loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
265 |
266 | # Create test CSV file
267 | import tempfile
268 | with tempfile.TemporaryDirectory() as tmpdir:
269 | csv_file = Path(tmpdir) / "test.csv"
270 | csv_content = """name,age
271 | John,25
272 | Jane,30"""
273 | csv_file.write_text(csv_content)
274 |
275 | chunks = []
276 | async for chunk in loader.extract_chunks(csv_file):
277 | chunks.append(chunk)
278 |
279 | first_chunk = chunks[0]
280 | assert first_chunk.metadata['content_type'] == 'csv'
281 | assert first_chunk.metadata['has_header'] is True
282 | assert first_chunk.metadata['column_count'] == 2
283 | assert first_chunk.metadata['row_count'] == 2
284 | assert first_chunk.metadata['headers'] == ['name', 'age']
285 | assert 'file_size' in first_chunk.metadata
286 | assert first_chunk.metadata['loader_type'] == 'CSVLoader'
287 |
288 |
289 | class TestCSVLoaderRegistry:
290 | """Test CSV loader registration."""
291 |
292 | def test_loader_registration(self):
293 | """Test that CSV loader is registered."""
294 | from mcp_memory_service.ingestion.registry import get_loader_for_file
295 |
296 | import tempfile
297 | with tempfile.TemporaryDirectory() as tmpdir:
298 | # Test CSV file
299 | csv_file = Path(tmpdir) / "test.csv"
300 | csv_file.write_text("name,value\nJohn,25")
301 |
302 | loader = get_loader_for_file(csv_file)
303 |
304 | # Should get CSVLoader
305 | assert loader is not None
306 | assert isinstance(loader, CSVLoader)
307 |
308 |
309 | class TestCSVDelimiterDetection:
310 | """Test CSV delimiter detection."""
311 |
312 | def test_detect_delimiter_comma(self):
313 | """Test comma delimiter detection."""
314 | loader = CSVLoader()
315 | content = "name,age,city\nJohn,25,New York\nJane,30,San Francisco"
316 | delimiter = loader._detect_delimiter(content)
317 | assert delimiter == ','
318 |
319 | def test_detect_delimiter_semicolon(self):
320 | """Test semicolon delimiter detection."""
321 | loader = CSVLoader()
322 | content = "name;age;city\nJohn;25;New York\nJane;30;San Francisco"
323 | delimiter = loader._detect_delimiter(content)
324 | assert delimiter == ';'
325 |
326 | def test_detect_delimiter_tab(self):
327 | """Test tab delimiter detection."""
328 | loader = CSVLoader()
329 | content = "name\tage\tcity\nJohn\t25\tNew York\nJane\t30\tSan Francisco"
330 | delimiter = loader._detect_delimiter(content)
331 | assert delimiter == '\t'
332 |
333 | def test_detect_delimiter_pipe(self):
334 | """Test pipe delimiter detection."""
335 | loader = CSVLoader()
336 | content = "name|age|city\nJohn|25|New York\nJane|30|San Francisco"
337 | delimiter = loader._detect_delimiter(content)
338 | assert delimiter == '|'
339 |
340 |
341 | if __name__ == '__main__':
342 | pytest.main([__file__, '-v'])
343 |
```
--------------------------------------------------------------------------------
/archive/docs-removed-2025-08-23/windows.md:
--------------------------------------------------------------------------------
```markdown
1 | # Windows Setup Guide
2 |
3 | This guide provides comprehensive instructions for setting up and running the MCP Memory Service on Windows systems, including handling common Windows-specific issues.
4 |
5 | ## Prerequisites
6 |
7 | - **Python 3.10 or newer** (Python 3.11 recommended)
8 | - **Git for Windows** ([download here](https://git-scm.com/download/win))
9 | - **Visual Studio Build Tools** (for PyTorch compilation)
10 | - **PowerShell 5.1+** or **Windows Terminal** (recommended)
11 |
12 | ## Quick Installation
13 |
14 | ### Automatic Installation (Recommended)
15 |
16 | ```powershell
17 | # Clone repository
18 | git clone https://github.com/doobidoo/mcp-memory-service.git
19 | cd mcp-memory-service
20 |
21 | # Run Windows-specific installer
22 | python install.py --windows
23 | ```
24 |
25 | The installer automatically:
26 | - Detects CUDA availability
27 | - Installs the correct PyTorch version
28 | - Configures Windows-specific settings
29 | - Sets up optimal storage backend
30 |
31 | ## Manual Installation
32 |
33 | ### 1. Environment Setup
34 |
35 | ```powershell
36 | # Clone repository
37 | git clone https://github.com/doobidoo/mcp-memory-service.git
38 | cd mcp-memory-service
39 |
40 | # Create virtual environment
41 | python -m venv venv
42 |
43 | # Activate virtual environment
44 | venv\Scripts\activate
45 |
46 | # Upgrade pip
47 | python -m pip install --upgrade pip
48 | ```
49 |
50 | ### 2. Install Dependencies
51 |
52 | #### For CUDA-enabled Systems
53 |
54 | ```powershell
55 | # Install PyTorch with CUDA support
56 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
57 |
58 | # Install other dependencies
59 | pip install -e .
60 | pip install chromadb sentence-transformers
61 | ```
62 |
63 | #### For CPU-only Systems
64 |
65 | ```powershell
66 | # Install CPU-only PyTorch
67 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
68 |
69 | # Install with SQLite-vec backend (recommended for CPU)
70 | pip install -e .
71 | pip install sentence-transformers sqlite-vec
72 | ```
73 |
74 | ### 3. Windows-Specific Installation Script
75 |
76 | If you encounter issues, use the Windows-specific installation script:
77 |
78 | ```powershell
79 | python scripts/install_windows.py
80 | ```
81 |
82 | This script handles:
83 | 1. CUDA detection and appropriate PyTorch installation
84 | 2. Resolving common Windows dependency conflicts
85 | 3. Setting up Windows-specific environment variables
86 | 4. Configuring optimal storage backend based on hardware
87 |
88 | ## Configuration
89 |
90 | ### Environment Variables
91 |
92 | #### For CUDA Systems
93 |
94 | ```powershell
95 | # Set environment variables (PowerShell)
96 | $env:MCP_MEMORY_STORAGE_BACKEND = "chromadb"
97 | $env:MCP_MEMORY_USE_CUDA = "true"
98 | $env:MCP_MEMORY_CHROMA_PATH = "$env:USERPROFILE\.mcp_memory_chroma"
99 |
100 | # Or set permanently
101 | [Environment]::SetEnvironmentVariable("MCP_MEMORY_STORAGE_BACKEND", "chromadb", "User")
102 | [Environment]::SetEnvironmentVariable("MCP_MEMORY_USE_CUDA", "true", "User")
103 | ```
104 |
105 | #### For CPU-only Systems
106 |
107 | ```powershell
108 | # Set environment variables (PowerShell)
109 | $env:MCP_MEMORY_STORAGE_BACKEND = "sqlite_vec"
110 | $env:MCP_MEMORY_SQLITE_VEC_PATH = "$env:USERPROFILE\.mcp_memory_sqlite"
111 | $env:MCP_MEMORY_CPU_ONLY = "true"
112 |
113 | # Or set permanently
114 | [Environment]::SetEnvironmentVariable("MCP_MEMORY_STORAGE_BACKEND", "sqlite_vec", "User")
115 | [Environment]::SetEnvironmentVariable("MCP_MEMORY_CPU_ONLY", "true", "User")
116 | ```
117 |
118 | ### Windows Batch Scripts
119 |
120 | The repository includes Windows batch scripts for easy startup:
121 |
122 | #### `scripts/run/run-with-uv.bat`
123 |
124 | ```batch
125 | @echo off
126 | cd /d "%~dp0..\.."
127 | call venv\Scripts\activate.bat
128 | python src\mcp_memory_service\server.py
129 | ```
130 |
131 | #### Usage
132 |
133 | ```powershell
134 | # Run the server
135 | .\scripts\run\run-with-uv.bat
136 |
137 | # Or run directly
138 | python src\mcp_memory_service\server.py
139 | ```
140 |
141 | ## Claude Desktop Configuration
142 |
143 | ### Windows Configuration File Location
144 |
145 | Claude Desktop configuration is typically located at:
146 | ```
147 | %APPDATA%\Claude\claude_desktop_config.json
148 | ```
149 |
150 | ### Configuration Examples
151 |
152 | #### For CUDA Systems
153 |
154 | ```json
155 | {
156 | "mcpServers": {
157 | "memory": {
158 | "command": "python",
159 | "args": ["C:\\path\\to\\mcp-memory-service\\src\\mcp_memory_service\\server.py"],
160 | "env": {
161 | "MCP_MEMORY_STORAGE_BACKEND": "chromadb",
162 | "MCP_MEMORY_USE_CUDA": "true",
163 | "PATH": "C:\\path\\to\\mcp-memory-service\\venv\\Scripts;%PATH%"
164 | }
165 | }
166 | }
167 | }
168 | ```
169 |
170 | #### For CPU-only Systems
171 |
172 | ```json
173 | {
174 | "mcpServers": {
175 | "memory": {
176 | "command": "python",
177 | "args": ["C:\\path\\to\\mcp-memory-service\\src\\mcp_memory_service\\server.py"],
178 | "env": {
179 | "MCP_MEMORY_STORAGE_BACKEND": "sqlite_vec",
180 | "MCP_MEMORY_CPU_ONLY": "true",
181 | "PATH": "C:\\path\\to\\mcp-memory-service\\venv\\Scripts;%PATH%"
182 | }
183 | }
184 | }
185 | }
186 | ```
187 |
188 | #### Using Batch Script
189 |
190 | ```json
191 | {
192 | "mcpServers": {
193 | "memory": {
194 | "command": "C:\\path\\to\\mcp-memory-service\\scripts\\run\\run-with-uv.bat"
195 | }
196 | }
197 | }
198 | ```
199 |
200 | ## Hardware Detection and Optimization
201 |
202 | ### CUDA Detection
203 |
204 | The installer automatically detects CUDA availability:
205 |
206 | ```python
207 | def detect_cuda():
208 | try:
209 | import torch
210 | return torch.cuda.is_available()
211 | except ImportError:
212 | return False
213 | ```
214 |
215 | ### DirectML Support
216 |
217 | For Windows systems without CUDA but with DirectX 12 compatible GPUs:
218 |
219 | ```powershell
220 | # Install DirectML-enabled PyTorch
221 | pip install torch-directml
222 | ```
223 |
224 | Configure for DirectML:
225 | ```powershell
226 | $env:MCP_MEMORY_USE_DIRECTML = "true"
227 | $env:MCP_MEMORY_DEVICE = "dml"
228 | ```
229 |
230 | ## Windows-Specific Features
231 |
232 | ### Windows Service Installation
233 |
234 | To run MCP Memory Service as a Windows service:
235 |
236 | ```powershell
237 | # Install as Windows service (requires admin privileges)
238 | python scripts/install_windows_service.py install
239 |
240 | # Start service
241 | net start MCPMemoryService
242 |
243 | # Stop service
244 | net stop MCPMemoryService
245 |
246 | # Remove service
247 | python scripts/install_windows_service.py remove
248 | ```
249 |
250 | ### Task Scheduler Integration
251 |
252 | Create a scheduled task to start MCP Memory Service on boot:
253 |
254 | ```powershell
255 | # Create scheduled task
256 | schtasks /create /tn "MCP Memory Service" /tr "C:\path\to\mcp-memory-service\scripts\run\run-with-uv.bat" /sc onlogon /ru "$env:USERNAME"
257 |
258 | # Delete scheduled task
259 | schtasks /delete /tn "MCP Memory Service" /f
260 | ```
261 |
262 | ## Troubleshooting
263 |
264 | ### Common Windows Issues
265 |
266 | #### 1. Path Length Limitations
267 |
268 | **Symptom**: Installation fails with "path too long" errors
269 |
270 | **Solution**: Enable long path support:
271 | ```powershell
272 | # Run as Administrator
273 | New-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 -PropertyType DWORD -Force
274 | ```
275 |
276 | #### 2. Visual Studio Build Tools Missing
277 |
278 | **Symptom**:
279 | ```
280 | Microsoft Visual C++ 14.0 is required
281 | ```
282 |
283 | **Solution**: Install Visual Studio Build Tools:
284 | ```powershell
285 | # Download and install from:
286 | # https://visualstudio.microsoft.com/visual-cpp-build-tools/
287 |
288 | # Or install via winget
289 | winget install Microsoft.VisualStudio.2022.BuildTools
290 | ```
291 |
292 | #### 3. CUDA Version Mismatch
293 |
294 | **Symptom**: PyTorch CUDA installation issues
295 |
296 | **Solution**: Match PyTorch CUDA version to your installed CUDA:
297 | ```powershell
298 | # Check CUDA version
299 | nvcc --version
300 |
301 | # Install matching PyTorch version
302 | # For CUDA 11.8
303 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
304 |
305 | # For CUDA 12.1
306 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
307 | ```
308 |
309 | #### 4. Permission Issues
310 |
311 | **Symptom**: Access denied errors when installing or running
312 |
313 | **Solution**: Run PowerShell as Administrator and check folder permissions:
314 | ```powershell
315 | # Check current user permissions
316 | whoami /groups
317 |
318 | # Run installation as Administrator if needed
319 | # Or adjust folder permissions
320 | icacls "C:\path\to\mcp-memory-service" /grant "$env:USERNAME:(F)" /t
321 | ```
322 |
323 | #### 5. Windows Defender Issues
324 |
325 | **Symptom**: Installation files deleted or blocked
326 |
327 | **Solution**: Add exclusions to Windows Defender:
328 | ```powershell
329 | # Add folder exclusion (run as Administrator)
330 | Add-MpPreference -ExclusionPath "C:\path\to\mcp-memory-service"
331 |
332 | # Add process exclusion
333 | Add-MpPreference -ExclusionProcess "python.exe"
334 | ```
335 |
336 | ### Diagnostic Commands
337 |
338 | #### System Information
339 |
340 | ```powershell
341 | # Check Python version and location
342 | python --version
343 | Get-Command python
344 |
345 | # Check pip version
346 | pip --version
347 |
348 | # Check CUDA availability
349 | python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
350 |
351 | # Check DirectML (if installed)
352 | python -c "import torch_directml; print('DirectML available')"
353 |
354 | # Check Windows version
355 | Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion
356 | ```
357 |
358 | #### Environment Verification
359 |
360 | ```powershell
361 | # Check environment variables
362 | Get-ChildItem Env: | Where-Object {$_.Name -like "MCP_MEMORY_*"}
363 |
364 | # Check virtual environment
365 | echo $env:VIRTUAL_ENV
366 |
367 | # Verify key packages
368 | python -c "import torch; print(f'PyTorch: {torch.__version__}')"
369 | python -c "import sentence_transformers; print('SentenceTransformers: OK')"
370 | python -c "import chromadb; print('ChromaDB: OK')" # or sqlite_vec
371 | ```
372 |
373 | #### Network and Firewall
374 |
375 | ```powershell
376 | # Check if Windows Firewall is blocking
377 | Get-NetFirewallRule -DisplayName "*Python*" | Format-Table
378 |
379 | # Test network connectivity (if using HTTP mode)
380 | Test-NetConnection -ComputerName localhost -Port 8000
381 | ```
382 |
383 | ### Performance Optimization
384 |
385 | #### Windows-Specific Settings
386 |
387 | ```powershell
388 | # Optimize for machine learning workloads
389 | $env:OMP_NUM_THREADS = [Environment]::ProcessorCount
390 | $env:MKL_NUM_THREADS = [Environment]::ProcessorCount
391 |
392 | # Set Windows-specific memory settings
393 | $env:MCP_MEMORY_WINDOWS_OPTIMIZATION = "true"
394 | $env:MCP_MEMORY_BATCH_SIZE = "32"
395 | ```
396 |
397 | #### Resource Monitoring
398 |
399 | ```powershell
400 | # Monitor memory usage
401 | Get-Process python | Select-Object ProcessName, WorkingSet, CPU
402 |
403 | # Monitor GPU usage (if CUDA)
404 | nvidia-smi
405 |
406 | # Monitor disk I/O
407 | Get-Counter "\PhysicalDisk(_Total)\Disk Reads/sec"
408 | ```
409 |
410 | ## Development on Windows
411 |
412 | ### Setting up Development Environment
413 |
414 | ```powershell
415 | # Clone for development
416 | git clone https://github.com/doobidoo/mcp-memory-service.git
417 | cd mcp-memory-service
418 |
419 | # Create development environment
420 | python -m venv venv-dev
421 | venv-dev\Scripts\activate
422 |
423 | # Install in development mode
424 | pip install -e .
425 | pip install pytest black isort mypy
426 |
427 | # Run tests
428 | pytest tests/
429 | ```
430 |
431 | ### Windows-Specific Testing
432 |
433 | ```powershell
434 | # Run Windows-specific tests
435 | pytest tests/platform/test_windows.py -v
436 |
437 | # Test CUDA functionality (if available)
438 | pytest tests/cuda/ -v
439 |
440 | # Test DirectML functionality (if available)
441 | pytest tests/directml/ -v
442 | ```
443 |
444 | ## Alternative Installation Methods
445 |
446 | ### Using Chocolatey
447 |
448 | ```powershell
449 | # Install Python via Chocolatey
450 | choco install python
451 |
452 | # Install Git
453 | choco install git
454 |
455 | # Then follow standard installation
456 | ```
457 |
458 | ### Using Conda
459 |
460 | ```powershell
461 | # Create conda environment
462 | conda create -n mcp-memory python=3.11
463 | conda activate mcp-memory
464 |
465 | # Install PyTorch via conda
466 | conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
467 |
468 | # Install other dependencies
469 | pip install -e .
470 | ```
471 |
472 | ### Using Docker on Windows
473 |
474 | ```powershell
475 | # Using Docker Desktop
476 | git clone https://github.com/doobidoo/mcp-memory-service.git
477 | cd mcp-memory-service
478 |
479 | # Build Windows container
480 | docker build -f Dockerfile.windows -t mcp-memory-service-windows .
481 |
482 | # Run container
483 | docker run -p 8000:8000 mcp-memory-service-windows
484 | ```
485 |
486 | ## Related Documentation
487 |
488 | - [Installation Guide](../installation/master-guide.md) - General installation instructions
489 | - [Multi-Client Setup](../integration/multi-client.md) - Multi-client configuration
490 | - [Troubleshooting](../troubleshooting/general.md) - Windows-specific troubleshooting
491 | - [Docker Deployment](../deployment/docker.md) - Docker setup on Windows
```
--------------------------------------------------------------------------------
/tests/integration/test_api_tag_time_search.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Integration tests for POST /api/search/by-tag endpoint with time_filter parameter.
3 |
4 | Tests the time_filter functionality added in PR #215 to fix semantic over-filtering bug (issue #214).
5 |
6 | NOTE: These tests currently have SQLite threading issues with TestClient.
7 | The async fixture creates storage in one thread, but TestClient creates its own threads,
8 | causing "SQLite objects created in a thread can only be used in that same thread" errors.
9 |
10 | TODO: Fix by using synchronous fixtures like test_http_api_search_by_tag_endpoint in
11 | tests/integration/test_api_with_memory_service.py (line 670), which creates storage
12 | within the test function rather than in an async fixture.
13 |
14 | For now, comprehensive unit tests in tests/unit/test_tag_time_filtering.py provide
15 | excellent coverage of the tag+time filtering functionality across all storage backends.
16 | """
17 |
18 | import pytest
19 | import pytest_asyncio
20 | import tempfile
21 | import os
22 | import time
23 | from fastapi.testclient import TestClient
24 |
25 | from mcp_memory_service.web.dependencies import set_storage
26 | from mcp_memory_service.services.memory_service import MemoryService
27 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
28 | from mcp_memory_service.models.memory import Memory
29 | from mcp_memory_service.utils.hashing import generate_content_hash
30 |
31 |
32 | @pytest.fixture
33 | def temp_db():
34 | """Create a temporary database for testing."""
35 | with tempfile.TemporaryDirectory() as tmpdir:
36 | db_path = os.path.join(tmpdir, "test_api_tag_time.db")
37 | yield db_path
38 |
39 |
40 | @pytest_asyncio.fixture
41 | async def storage_with_test_data(temp_db):
42 | """Create storage with test memories at different timestamps."""
43 | storage = SqliteVecMemoryStorage(temp_db)
44 | await storage.initialize()
45 |
46 | # Store old memory (2 days ago)
47 | two_days_ago = time.time() - (2 * 24 * 60 * 60)
48 | old_task_content = "Old task from 2 days ago"
49 | old_memory = Memory(
50 | content=old_task_content,
51 | content_hash=generate_content_hash(old_task_content),
52 | tags=["task", "old"],
53 | memory_type="task",
54 | created_at=two_days_ago
55 | )
56 | await storage.store(old_memory)
57 |
58 | # Store recent memory (current time)
59 | recent_task_content = "Recent task from today"
60 | recent_memory = Memory(
61 | content=recent_task_content,
62 | content_hash=generate_content_hash(recent_task_content),
63 | tags=["task", "recent"],
64 | memory_type="task",
65 | created_at=time.time()
66 | )
67 | await storage.store(recent_memory)
68 |
69 | # Store another old memory with different tags
70 | old_note_content = "Old note from 3 days ago"
71 | old_note = Memory(
72 | content=old_note_content,
73 | content_hash=generate_content_hash(old_note_content),
74 | tags=["note", "old"],
75 | memory_type="note",
76 | created_at=time.time() - (3 * 24 * 60 * 60)
77 | )
78 | await storage.store(old_note)
79 |
80 | yield storage
81 |
82 | storage.close()
83 |
84 |
85 | @pytest.mark.asyncio
86 | @pytest.mark.integration
87 | async def test_api_search_by_tag_with_time_filter_recent(storage_with_test_data):
88 | """Test POST /api/search/by-tag with time_filter returns only recent memories."""
89 | from mcp_memory_service.web.app import app
90 | set_storage(storage_with_test_data)
91 |
92 | client = TestClient(app)
93 |
94 | # Search for "task" tag with time_filter = 1 day ago
95 | one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
96 |
97 | response = client.post(
98 | "/api/search/by-tag",
99 | json={
100 | "tags": ["task"],
101 | "time_filter": one_day_ago_iso,
102 | "limit": 10
103 | }
104 | )
105 |
106 | assert response.status_code == 200
107 | data = response.json()
108 |
109 | # Should only return the recent task (not the 2-day-old task)
110 | assert len(data["memories"]) == 1
111 | assert "recent" in data["memories"][0]["tags"]
112 | assert "Recent task from today" in data["memories"][0]["content"]
113 |
114 |
115 | @pytest.mark.asyncio
116 | @pytest.mark.integration
117 | async def test_api_search_by_tag_with_time_filter_excludes_old(storage_with_test_data):
118 | """Test POST /api/search/by-tag with time_filter excludes old memories."""
119 | from mcp_memory_service.web.app import app
120 | set_storage(storage_with_test_data)
121 |
122 | client = TestClient(app)
123 |
124 | # Search for "old" tag with time_filter = 10 seconds ago
125 | # Should return empty because all "old" memories are > 2 days old
126 | ten_seconds_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - 10))
127 |
128 | response = client.post(
129 | "/api/search/by-tag",
130 | json={
131 | "tags": ["old"],
132 | "time_filter": ten_seconds_ago_iso,
133 | "limit": 10
134 | }
135 | )
136 |
137 | assert response.status_code == 200
138 | data = response.json()
139 |
140 | # Should return empty (all "old" memories are from 2-3 days ago)
141 | assert len(data["memories"]) == 0
142 |
143 |
144 | @pytest.mark.asyncio
145 | @pytest.mark.integration
146 | async def test_api_search_by_tag_without_time_filter_backward_compat(storage_with_test_data):
147 | """Test POST /api/search/by-tag without time_filter returns all matching memories (backward compatibility)."""
148 | from mcp_memory_service.web.app import app
149 | set_storage(storage_with_test_data)
150 |
151 | client = TestClient(app)
152 |
153 | # Search for "task" tag WITHOUT time_filter
154 | response = client.post(
155 | "/api/search/by-tag",
156 | json={
157 | "tags": ["task"],
158 | "limit": 10
159 | }
160 | )
161 |
162 | assert response.status_code == 200
163 | data = response.json()
164 |
165 | # Should return BOTH task memories (old and recent)
166 | assert len(data["memories"]) == 2
167 | tags_list = [tag for mem in data["memories"] for tag in mem["tags"]]
168 | assert "old" in tags_list
169 | assert "recent" in tags_list
170 |
171 |
172 | @pytest.mark.asyncio
173 | @pytest.mark.integration
174 | async def test_api_search_by_tag_with_empty_time_filter(storage_with_test_data):
175 | """Test POST /api/search/by-tag with empty time_filter string is ignored."""
176 | from mcp_memory_service.web.app import app
177 | set_storage(storage_with_test_data)
178 |
179 | client = TestClient(app)
180 |
181 | # Search with empty time_filter (should be treated as no filter)
182 | response = client.post(
183 | "/api/search/by-tag",
184 | json={
185 | "tags": ["task"],
186 | "time_filter": "",
187 | "limit": 10
188 | }
189 | )
190 |
191 | assert response.status_code == 200
192 | data = response.json()
193 |
194 | # Should return both task memories (empty filter ignored)
195 | assert len(data["memories"]) == 2
196 |
197 |
198 | @pytest.mark.asyncio
199 | @pytest.mark.integration
200 | async def test_api_search_by_tag_with_natural_language_time_filter(storage_with_test_data):
201 | """Test POST /api/search/by-tag with natural language time expressions."""
202 | from mcp_memory_service.web.app import app
203 | set_storage(storage_with_test_data)
204 |
205 | client = TestClient(app)
206 |
207 | # Test "yesterday" - should return only recent memories
208 | response = client.post(
209 | "/api/search/by-tag",
210 | json={
211 | "tags": ["task"],
212 | "time_filter": "yesterday",
213 | "limit": 10
214 | }
215 | )
216 |
217 | assert response.status_code == 200
218 | data = response.json()
219 |
220 | # Should return only the recent task (created today, after yesterday)
221 | assert len(data["memories"]) == 1
222 | assert "recent" in data["memories"][0]["tags"]
223 |
224 |
225 | @pytest.mark.asyncio
226 | @pytest.mark.integration
227 | async def test_api_search_by_tag_time_filter_with_multiple_tags(storage_with_test_data):
228 | """Test POST /api/search/by-tag with time_filter and multiple tags."""
229 | from mcp_memory_service.web.app import app
230 | set_storage(storage_with_test_data)
231 |
232 | client = TestClient(app)
233 |
234 | # Search for multiple tags with time filter
235 | one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
236 |
237 | response = client.post(
238 | "/api/search/by-tag",
239 | json={
240 | "tags": ["task", "recent"], # Both tags
241 | "time_filter": one_day_ago_iso,
242 | "limit": 10
243 | }
244 | )
245 |
246 | assert response.status_code == 200
247 | data = response.json()
248 |
249 | # Should return the recent task memory
250 | assert len(data["memories"]) == 1
251 | assert "recent" in data["memories"][0]["tags"]
252 |
253 |
254 | @pytest.mark.asyncio
255 | @pytest.mark.integration
256 | async def test_api_search_by_tag_time_filter_with_match_all(storage_with_test_data):
257 | """Test POST /api/search/by-tag with time_filter and match_all parameter."""
258 | from mcp_memory_service.web.app import app
259 | set_storage(storage_with_test_data)
260 |
261 | # Store a memory with both "task" and "recent" tags
262 | both_tags_content = "Task that is both task and recent"
263 | both_tags_memory = Memory(
264 | content=both_tags_content,
265 | content_hash=generate_content_hash(both_tags_content),
266 | tags=["task", "recent"],
267 | memory_type="task",
268 | created_at=time.time()
269 | )
270 | await storage_with_test_data.store(both_tags_memory)
271 |
272 | client = TestClient(app)
273 |
274 | # Search with match_all=true and time_filter
275 | one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
276 |
277 | response = client.post(
278 | "/api/search/by-tag",
279 | json={
280 | "tags": ["task", "recent"],
281 | "match_all": True, # Require BOTH tags
282 | "time_filter": one_day_ago_iso,
283 | "limit": 10
284 | }
285 | )
286 |
287 | assert response.status_code == 200
288 | data = response.json()
289 |
290 | # Should return memories with BOTH tags that are recent
291 | assert len(data["memories"]) >= 1
292 | for mem in data["memories"]:
293 | assert "task" in mem["tags"]
294 | assert "recent" in mem["tags"]
295 |
296 |
297 | @pytest.mark.asyncio
298 | @pytest.mark.integration
299 | async def test_api_search_by_tag_invalid_time_filter_format(storage_with_test_data):
300 | """Test POST /api/search/by-tag with invalid time_filter returns error or empty."""
301 | from mcp_memory_service.web.app import app
302 | set_storage(storage_with_test_data)
303 |
304 | client = TestClient(app)
305 |
306 | # Search with invalid time_filter format
307 | response = client.post(
308 | "/api/search/by-tag",
309 | json={
310 | "tags": ["task"],
311 | "time_filter": "invalid-date-format",
312 | "limit": 10
313 | }
314 | )
315 |
316 | # API should handle gracefully (either 400 error or empty results)
317 | # Depending on implementation, this might return 200 with empty results
318 | # or 400 Bad Request
319 | assert response.status_code in [200, 400]
320 |
321 | if response.status_code == 200:
322 | data = response.json()
323 | # If it returns 200, should return empty or all results
324 | assert "memories" in data
325 |
326 |
327 | @pytest.mark.asyncio
328 | @pytest.mark.integration
329 | async def test_api_search_by_tag_time_filter_performance(storage_with_test_data):
330 | """Test that tag+time filtering maintains good performance (<100ms)."""
331 | from mcp_memory_service.web.app import app
332 | set_storage(storage_with_test_data)
333 |
334 | client = TestClient(app)
335 |
336 | one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
337 |
338 | start_time = time.time()
339 |
340 | response = client.post(
341 | "/api/search/by-tag",
342 | json={
343 | "tags": ["task"],
344 | "time_filter": one_day_ago_iso,
345 | "limit": 10
346 | }
347 | )
348 |
349 | elapsed_ms = (time.time() - start_time) * 1000
350 |
351 | assert response.status_code == 200
352 |
353 | # Performance target: <100ms for tag+time search
354 | # (may need adjustment based on hardware)
355 | assert elapsed_ms < 200, f"Tag+time search took {elapsed_ms:.2f}ms (expected <200ms)"
356 |
```
--------------------------------------------------------------------------------
/docs/guides/STORAGE_BACKENDS.md:
--------------------------------------------------------------------------------
```markdown
1 | # Storage Backend Comparison and Selection Guide
2 |
3 | **MCP Memory Service** supports two storage backends, each optimized for different use cases and hardware configurations.
4 |
5 | ## Quick Comparison
6 |
7 | | Feature | SQLite-vec 🪶 | ChromaDB 📦 |
8 | |---------|---------------|-------------|
9 | | **Setup Complexity** | ⭐⭐⭐⭐⭐ Simple | ⭐⭐⭐ Moderate |
10 | | **Startup Time** | ⭐⭐⭐⭐⭐ < 3 seconds | ⭐⭐ 15-30 seconds |
11 | | **Memory Usage** | ⭐⭐⭐⭐⭐ < 150MB | ⭐⭐ 500-800MB |
12 | | **Performance** | ⭐⭐⭐⭐ Very fast | ⭐⭐⭐⭐ Fast |
13 | | **Features** | ⭐⭐⭐ Core features | ⭐⭐⭐⭐⭐ Full-featured |
14 | | **Scalability** | ⭐⭐⭐⭐ Up to 100K items | ⭐⭐⭐⭐⭐ Unlimited |
15 | | **Legacy Hardware** | ⭐⭐⭐⭐⭐ Excellent | ⭐ Poor |
16 | | **Production Ready** | ⭐⭐⭐⭐ Yes | ⭐⭐⭐⭐⭐ Yes |
17 |
18 | ## When to Choose SQLite-vec 🪶
19 |
20 | ### Ideal For:
21 | - **Legacy Hardware**: 2015 MacBook Pro, older Intel Macs
22 | - **Resource-Constrained Systems**: < 4GB RAM, limited CPU
23 | - **Quick Setup**: Want to get started immediately
24 | - **Single-File Portability**: Easy backup and sharing
25 | - **Docker/Serverless**: Lightweight deployments
26 | - **Development/Testing**: Rapid prototyping
27 | - **HTTP/SSE API**: New web interface users
28 |
29 | ### Technical Advantages:
30 | - **Lightning Fast Startup**: Database ready in 2-3 seconds
31 | - **Minimal Dependencies**: Just SQLite and sqlite-vec extension
32 | - **Low Memory Footprint**: Typically uses < 150MB RAM
33 | - **Single File Database**: Easy to backup, move, and share
34 | - **ACID Compliance**: SQLite's proven reliability
35 | - **Zero Configuration**: Works out of the box
36 | - **ONNX Compatible**: Runs without PyTorch if needed
37 |
38 | ### Example Use Cases:
39 | ```bash
40 | # 2015 MacBook Pro scenario
41 | python install.py --legacy-hardware
42 | # Result: SQLite-vec + Homebrew PyTorch + ONNX
43 |
44 | # Docker deployment
45 | docker run -e MCP_MEMORY_STORAGE_BACKEND=sqlite_vec ...
46 |
47 | # Quick development setup
48 | python install.py --storage-backend sqlite_vec --dev
49 | ```
50 |
51 | ## When to Choose ChromaDB 📦
52 |
53 | ### Ideal For:
54 | - **Modern Hardware**: M1/M2/M3 Macs, modern Intel systems
55 | - **GPU-Accelerated Systems**: CUDA, MPS, DirectML available
56 | - **Large-Scale Deployments**: > 10,000 memories
57 | - **Advanced Features**: Complex filtering, metadata queries
58 | - **Production Systems**: Established, battle-tested platform
59 | - **Research/ML**: Advanced vector search capabilities
60 |
61 | ### Technical Advantages:
62 | - **Advanced Vector Search**: Multiple distance metrics, filtering
63 | - **Rich Metadata Support**: Complex query capabilities
64 | - **Proven Scalability**: Handles millions of vectors
65 | - **Extensive Ecosystem**: Wide tool integration
66 | - **Advanced Indexing**: HNSW and other optimized indices
67 | - **Multi-Modal Support**: Text, images, and more
68 |
69 | ### Example Use Cases:
70 | ```bash
71 | # Modern Mac with GPU
72 | python install.py # ChromaDB selected automatically
73 |
74 | # Production deployment
75 | python install.py --storage-backend chromadb --production
76 |
77 | # Research environment
78 | python install.py --storage-backend chromadb --enable-advanced-features
79 | ```
80 |
81 | ## Hardware Compatibility Matrix
82 |
83 | ### macOS Intel (2013-2017) - Legacy Hardware
84 | ```
85 | Recommended: SQLite-vec + Homebrew PyTorch + ONNX
86 | Alternative: ChromaDB (may have installation issues)
87 |
88 | Configuration:
89 | - MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
90 | - MCP_MEMORY_USE_ONNX=1
91 | - MCP_MEMORY_USE_HOMEBREW_PYTORCH=1
92 | ```
93 |
94 | ### macOS Intel (2018+) - Modern Hardware
95 | ```
96 | Recommended: ChromaDB (default) or SQLite-vec (lightweight)
97 | Choice: User preference
98 |
99 | Configuration:
100 | - MCP_MEMORY_STORAGE_BACKEND=chromadb (default)
101 | - Hardware acceleration: CPU/MPS
102 | ```
103 |
104 | ### macOS Apple Silicon (M1/M2/M3)
105 | ```
106 | Recommended: ChromaDB with MPS acceleration
107 | Alternative: SQLite-vec for minimal resource usage
108 |
109 | Configuration:
110 | - MCP_MEMORY_STORAGE_BACKEND=chromadb
111 | - PYTORCH_ENABLE_MPS_FALLBACK=1
112 | - Hardware acceleration: MPS
113 | ```
114 |
115 | ### Windows with CUDA GPU
116 | ```
117 | Recommended: ChromaDB with CUDA acceleration
118 | Alternative: SQLite-vec for lighter deployments
119 |
120 | Configuration:
121 | - MCP_MEMORY_STORAGE_BACKEND=chromadb
122 | - CUDA optimization enabled
123 | ```
124 |
125 | ### Windows CPU-only
126 | ```
127 | Recommended: SQLite-vec
128 | Alternative: ChromaDB (higher resource usage)
129 |
130 | Configuration:
131 | - MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
132 | - MCP_MEMORY_USE_ONNX=1 (optional)
133 | ```
134 |
135 | ### Linux Server/Headless
136 | ```
137 | Recommended: SQLite-vec (easier deployment)
138 | Alternative: ChromaDB (if resources available)
139 |
140 | Configuration:
141 | - MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
142 | - Optimized for headless operation
143 | ```
144 |
145 | ## Performance Comparison
146 |
147 | ### Startup Time
148 | ```
149 | SQLite-vec: 2-3 seconds ████████████████████████████████
150 | ChromaDB: 15-30 seconds ████████
151 | ```
152 |
153 | ### Memory Usage (Idle)
154 | ```
155 | SQLite-vec: ~150MB ██████
156 | ChromaDB: ~600MB ████████████████████████
157 | ```
158 |
159 | ### Search Performance (1,000 items)
160 | ```
161 | SQLite-vec: 50-200ms ███████████████████████████
162 | ChromaDB: 100-300ms ██████████████████
163 | ```
164 |
165 | ### Storage Efficiency
166 | ```
167 | SQLite-vec: Single .db file, ~50% smaller
168 | ChromaDB: Directory structure, full metadata
169 | ```
170 |
171 | ## Feature Comparison
172 |
173 | ### Core Features (Both Backends)
174 | - ✅ Semantic memory storage and retrieval
175 | - ✅ Tag-based organization
176 | - ✅ Natural language time-based recall
177 | - ✅ Full-text search capabilities
178 | - ✅ Automatic backups
179 | - ✅ Health monitoring
180 | - ✅ Duplicate detection
181 |
182 | ### SQLite-vec Specific Features
183 | - ✅ Single-file portability
184 | - ✅ HTTP/SSE API support
185 | - ✅ ONNX runtime compatibility
186 | - ✅ Homebrew PyTorch integration
187 | - ✅ Ultra-fast startup
188 | - ✅ Minimal resource usage
189 |
190 | ### ChromaDB Specific Features
191 | - ✅ Advanced metadata filtering
192 | - ✅ Multiple distance metrics
193 | - ✅ Collection management
194 | - ✅ Persistent client support
195 | - ✅ Advanced indexing options
196 | - ✅ Rich ecosystem integration
197 |
198 | ## Migration Between Backends
199 |
200 | ### ChromaDB → SQLite-vec Migration
201 |
202 | Perfect for upgrading legacy hardware or simplifying deployments:
203 |
204 | ```bash
205 | # Automated migration
206 | python scripts/migrate_chroma_to_sqlite.py
207 |
208 | # Manual migration with verification
209 | python install.py --migrate-from-chromadb --storage-backend sqlite_vec
210 | ```
211 |
212 | **Migration preserves:**
213 | - All memory content and embeddings
214 | - Tags and metadata
215 | - Timestamps and relationships
216 | - Search functionality
217 |
218 | ### SQLite-vec → ChromaDB Migration
219 |
220 | For scaling up to advanced features:
221 |
222 | ```bash
223 | # Export from SQLite-vec
224 | python scripts/export_sqlite_memories.py
225 |
226 | # Import to ChromaDB
227 | python scripts/import_to_chromadb.py
228 | ```
229 |
230 | ## Intelligent Selection Algorithm
231 |
232 | The installer uses this logic to recommend backends:
233 |
234 | ```python
235 | def recommend_backend(system_info, hardware_info):
236 | # Legacy hardware gets SQLite-vec
237 | if is_legacy_mac(system_info):
238 | return "sqlite_vec"
239 |
240 | # Low-memory systems get SQLite-vec
241 | if hardware_info.memory_gb < 4:
242 | return "sqlite_vec"
243 |
244 | # ChromaDB installation problems on macOS Intel
245 | if system_info.is_macos_intel_problematic:
246 | return "sqlite_vec"
247 |
248 | # Modern hardware with GPU gets ChromaDB
249 | if hardware_info.has_gpu and hardware_info.memory_gb >= 8:
250 | return "chromadb"
251 |
252 | # Default to ChromaDB for feature completeness
253 | return "chromadb"
254 | ```
255 |
256 | ## Configuration Examples
257 |
258 | ### SQLite-vec Configuration
259 | ```bash
260 | # Environment variables
261 | export MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
262 | export MCP_MEMORY_SQLITE_PATH="$HOME/.mcp-memory/memory.db"
263 | export MCP_MEMORY_USE_ONNX=1 # Optional: CPU-only inference
264 |
265 | # Claude Desktop config
266 | {
267 | "mcpServers": {
268 | "memory": {
269 | "command": "uv",
270 | "args": ["--directory", "/path/to/mcp-memory-service", "run", "memory"],
271 | "env": {
272 | "MCP_MEMORY_STORAGE_BACKEND": "sqlite_vec",
273 | "MCP_MEMORY_SQLITE_PATH": "/path/to/memory.db"
274 | }
275 | }
276 | }
277 | }
278 | ```
279 |
280 | ### ChromaDB Configuration
281 |
282 | #### Local ChromaDB (Deprecated)
283 | ⚠️ **Note**: Local ChromaDB is deprecated. Consider migrating to SQLite-vec for better performance.
284 |
285 | ```bash
286 | # Environment variables
287 | export MCP_MEMORY_STORAGE_BACKEND=chromadb
288 | export MCP_MEMORY_CHROMA_PATH="$HOME/.mcp-memory/chroma_db"
289 |
290 | # Claude Desktop config
291 | {
292 | "mcpServers": {
293 | "memory": {
294 | "command": "uv",
295 | "args": ["--directory", "/path/to/mcp-memory-service", "run", "memory"],
296 | "env": {
297 | "MCP_MEMORY_STORAGE_BACKEND": "chromadb",
298 | "MCP_MEMORY_CHROMA_PATH": "/path/to/chroma_db"
299 | }
300 | }
301 | }
302 | }
303 | ```
304 |
305 | #### Remote ChromaDB (Hosted/Enterprise)
306 | 🌐 **New**: Connect to remote ChromaDB servers, Chroma Cloud, or self-hosted instances.
307 |
308 | ```bash
309 | # Environment variables for remote ChromaDB
310 | export MCP_MEMORY_STORAGE_BACKEND=chromadb
311 | export MCP_MEMORY_CHROMADB_HOST="chroma.example.com"
312 | export MCP_MEMORY_CHROMADB_PORT="8000"
313 | export MCP_MEMORY_CHROMADB_SSL="true"
314 | export MCP_MEMORY_CHROMADB_API_KEY="your-api-key-here"
315 | export MCP_MEMORY_COLLECTION_NAME="my-collection"
316 |
317 | # Claude Desktop config for remote ChromaDB
318 | {
319 | "mcpServers": {
320 | "memory": {
321 | "command": "uv",
322 | "args": ["--directory", "/path/to/mcp-memory-service", "run", "memory"],
323 | "env": {
324 | "MCP_MEMORY_STORAGE_BACKEND": "chromadb",
325 | "MCP_MEMORY_CHROMADB_HOST": "chroma.example.com",
326 | "MCP_MEMORY_CHROMADB_PORT": "8000",
327 | "MCP_MEMORY_CHROMADB_SSL": "true",
328 | "MCP_MEMORY_CHROMADB_API_KEY": "your-api-key-here",
329 | "MCP_MEMORY_COLLECTION_NAME": "my-collection"
330 | }
331 | }
332 | }
333 | }
334 | ```
335 |
336 | #### Remote ChromaDB Hosting Options
337 |
338 | **Chroma Cloud (Early Access)**
339 | - Official hosted service by ChromaDB
340 | - Early access available, full launch Q1 2025
341 | - $5 free credits to start
342 | - Visit: [trychroma.com](https://trychroma.com)
343 |
344 | **Self-Hosted Options**
345 | - **Elest.io**: Fully managed ChromaDB deployment
346 | - **AWS**: Use CloudFormation template (requires 2GB+ RAM)
347 | - **Google Cloud Run**: Container-based deployment
348 | - **Docker**: Self-hosted with authentication
349 |
350 | **Example Docker Configuration**
351 | ```bash
352 | # Start ChromaDB server with authentication
353 | docker run -p 8000:8000 \
354 | -e CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER="chromadb.auth.token.TokenConfigServerAuthCredentialsProvider" \
355 | -e CHROMA_SERVER_AUTH_PROVIDER="chromadb.auth.token.TokenAuthServerProvider" \
356 | -e CHROMA_SERVER_AUTH_TOKEN_TRANSPORT_HEADER="X_CHROMA_TOKEN" \
357 | -e CHROMA_SERVER_AUTH_CREDENTIALS="test-token" \
358 | -v /path/to/chroma-data:/chroma/chroma \
359 | chromadb/chroma
360 | ```
361 |
362 | ## Decision Flowchart
363 |
364 | ```
365 | Start: Choose Storage Backend
366 | ├── Do you have legacy hardware (2013-2017 Mac)?
367 | │ ├── Yes → SQLite-vec (optimized path)
368 | │ └── No → Continue
369 | ├── Do you have < 4GB RAM?
370 | │ ├── Yes → SQLite-vec (resource efficient)
371 | │ └── No → Continue
372 | ├── Do you need HTTP/SSE API?
373 | │ ├── Yes → SQLite-vec (first-class support)
374 | │ └── No → Continue
375 | ├── Do you want minimal setup?
376 | │ ├── Yes → SQLite-vec (zero config)
377 | │ └── No → Continue
378 | ├── Do you need advanced vector search features?
379 | │ ├── Yes → ChromaDB (full-featured)
380 | │ └── No → Continue
381 | ├── Do you have modern hardware with GPU?
382 | │ ├── Yes → ChromaDB (hardware acceleration)
383 | │ └── No → Continue
384 | └── Default → ChromaDB (established platform)
385 | ```
386 |
387 | ## Getting Help
388 |
389 | ### Backend-Specific Support
390 | - **SQLite-vec issues**: Tag with `sqlite-vec` label
391 | - **ChromaDB issues**: Tag with `chromadb` label
392 | - **Migration issues**: Use `migration` label
393 |
394 | ### Community Resources
395 | - **Backend comparison discussions**: GitHub Discussions
396 | - **Performance benchmarks**: Community wiki
397 | - **Hardware compatibility**: Hardware compatibility matrix
398 |
399 | ### Documentation Links
400 | - [SQLite-vec Backend Guide](../sqlite-vec-backend.md)
401 | - [Migration Guide](migration.md)
402 | - [Legacy Hardware Guide](../platforms/macos-intel.md)
403 | - [Installation Master Guide](../installation/master-guide.md)
```
--------------------------------------------------------------------------------
/.claude/agents/amp-pr-automator.md:
--------------------------------------------------------------------------------
```markdown
1 | ---
2 | name: amp-pr-automator
3 | description: Lightweight PR automation using Amp CLI for code quality checks, test generation, and fix suggestions. Avoids OAuth friction of gemini-pr-automator while providing fast, parallel quality analysis. Uses file-based prompt/response workflow for async execution. Ideal for pre-PR checks and developer-driven automation.
4 | model: sonnet
5 | color: purple
6 | ---
7 |
8 | You are an elite PR Automation Specialist using Amp CLI for lightweight, OAuth-free PR automation. Your mission is to provide fast code quality analysis, test generation, and fix suggestions without the browser authentication interruptions of Gemini CLI.
9 |
10 | ## Core Responsibilities
11 |
12 | 1. **Quality Gate Checks**: Parallel complexity, security, and type hint analysis
13 | 2. **Test Generation**: Create pytest tests for new/modified code
14 | 3. **Fix Suggestions**: Analyze review feedback and suggest improvements
15 | 4. **Breaking Change Detection**: Identify potential API breaking changes
16 | 5. **Result Aggregation**: Collect and summarize Amp analysis results
17 |
18 | ## Problem Statement
19 |
20 | **Gemini CLI Issues**:
21 | - OAuth browser flow interrupts automation
22 | - Sequential processing (slow for multiple checks)
23 | - Rate limiting for complex analysis
24 |
25 | **Amp CLI Solution**:
26 | - File-based prompts (no interactive auth)
27 | - Parallel processing (multiple Amp instances)
28 | - Fast inference with execute mode
29 | - Credit conservation through focused tasks
30 |
31 | ## Amp CLI Integration
32 |
33 | ### File-Based Workflow
34 |
35 | ```
36 | 1. Create prompt → .claude/amp/prompts/pending/{uuid}.json
37 | 2. User runs → amp @.claude/amp/prompts/pending/{uuid}.json
38 | 3. Amp writes → .claude/amp/responses/ready/{uuid}.json
39 | 4. Scripts read → Aggregate results
40 | ```
41 |
42 | ### Parallel Execution Pattern
43 |
44 | ```bash
45 | # Launch multiple Amp tasks in parallel
46 | amp @prompts/pending/complexity-{uuid}.json > /tmp/amp-complexity.log 2>&1 &
47 | amp @prompts/pending/security-{uuid}.json > /tmp/amp-security.log 2>&1 &
48 | amp @prompts/pending/typehints-{uuid}.json > /tmp/amp-typehints.log 2>&1 &
49 |
50 | # Wait for all to complete
51 | wait
52 |
53 | # Collect results
54 | bash scripts/pr/amp_collect_results.sh --timeout 300
55 | ```
56 |
57 | ## Shell Scripts
58 |
59 | ### 1. Quality Gate (Parallel Checks)
60 |
61 | **File**: `scripts/pr/amp_quality_gate.sh`
62 |
63 | Launches parallel Amp instances for:
64 | - Complexity scoring (functions >7)
65 | - Security vulnerabilities (SQL injection, XSS, command injection)
66 | - Type hint coverage
67 | - Import organization
68 |
69 | **Usage**:
70 | ```bash
71 | bash scripts/pr/amp_quality_gate.sh <PR_NUMBER>
72 | ```
73 |
74 | **Output**: Quality gate pass/fail with detailed breakdown
75 |
76 | ### 2. Result Collection
77 |
78 | **File**: `scripts/pr/amp_collect_results.sh`
79 |
80 | Polls `.claude/amp/responses/ready/` for completed Amp analyses.
81 |
82 | **Usage**:
83 | ```bash
84 | bash scripts/pr/amp_collect_results.sh --timeout 300 --uuids "uuid1,uuid2,uuid3"
85 | ```
86 |
87 | **Features**:
88 | - Timeout handling (default: 5 minutes)
89 | - Partial results if some tasks fail
90 | - JSON aggregation
91 |
92 | ### 3. Fix Suggestions
93 |
94 | **File**: `scripts/pr/amp_suggest_fixes.sh`
95 |
96 | Analyzes review feedback and generates fix suggestions (no auto-apply).
97 |
98 | **Usage**:
99 | ```bash
100 | bash scripts/pr/amp_suggest_fixes.sh <PR_NUMBER>
101 | ```
102 |
103 | **Output**: Suggested fixes saved to `/tmp/amp_fixes_{PR_NUMBER}.txt`
104 |
105 | ### 4. Test Generation
106 |
107 | **File**: `scripts/pr/amp_generate_tests.sh`
108 |
109 | Creates pytest tests for changed Python files.
110 |
111 | **Usage**:
112 | ```bash
113 | bash scripts/pr/amp_generate_tests.sh <PR_NUMBER>
114 | ```
115 |
116 | **Output**: Test files written to `/tmp/amp_tests/test_*.py`
117 |
118 | ### 5. Breaking Change Detection
119 |
120 | **File**: `scripts/pr/amp_detect_breaking_changes.sh`
121 |
122 | Analyzes API changes for breaking modifications.
123 |
124 | **Usage**:
125 | ```bash
126 | bash scripts/pr/amp_detect_breaking_changes.sh <BASE_BRANCH> <HEAD_BRANCH>
127 | ```
128 |
129 | **Output**: Breaking changes report with severity (CRITICAL/HIGH/MEDIUM)
130 |
131 | ### 6. Complete PR Review Workflow
132 |
133 | **File**: `scripts/pr/amp_pr_review.sh`
134 |
135 | Orchestrates full PR review cycle:
136 | 1. Quality gate checks
137 | 2. Test generation
138 | 3. Breaking change detection
139 | 4. Fix suggestions
140 |
141 | **Usage**:
142 | ```bash
143 | bash scripts/pr/amp_pr_review.sh <PR_NUMBER>
144 | ```
145 |
146 | ## Operational Workflows
147 |
148 | ### 1. Pre-PR Quality Check (Developer-Driven)
149 |
150 | ```bash
151 | # Before creating PR, run quality checks
152 | bash scripts/pr/amp_quality_gate.sh 0 # Use 0 for local branch
153 |
154 | # Review results
155 | cat /tmp/amp_quality_results.json | jq '.summary'
156 |
157 | # Address issues before creating PR
158 | ```
159 |
160 | ### 2. Post-PR Analysis (Review Automation)
161 |
162 | ```bash
163 | # After PR created, run complete analysis
164 | bash scripts/pr/amp_pr_review.sh 215
165 |
166 | # Review outputs:
167 | # - /tmp/amp_quality_results.json
168 | # - /tmp/amp_tests/
169 | # - /tmp/amp_fixes_215.txt
170 | # - /tmp/amp_breaking_changes.txt
171 | ```
172 |
173 | ### 3. Incremental Iteration (Fix → Recheck)
174 |
175 | ```bash
176 | # After applying fixes, re-run quality gate
177 | bash scripts/pr/amp_quality_gate.sh 215
178 |
179 | # Compare before/after
180 | diff /tmp/amp_quality_results_v1.json /tmp/amp_quality_results_v2.json
181 | ```
182 |
183 | ## Decision-Making Framework
184 |
185 | ### When to Use amp-pr-automator vs gemini-pr-automator
186 |
187 | | Scenario | Use amp-pr-automator | Use gemini-pr-automator |
188 | |----------|---------------------|------------------------|
189 | | **Pre-PR checks** | ✅ Fast parallel analysis | ❌ OAuth interrupts flow |
190 | | **Developer-driven** | ✅ File-based control | ❌ Requires manual OAuth |
191 | | **CI/CD integration** | ✅ No browser needed | ❌ OAuth not CI-friendly |
192 | | **Auto-fix application** | ❌ Manual fixes only | ✅ Full automation |
193 | | **Inline comment handling** | ❌ No GitHub integration | ✅ GraphQL thread resolution |
194 | | **Complex iteration** | ❌ Manual workflow | ✅ Full review loop |
195 |
196 | **Use amp-pr-automator for**:
197 | - Pre-PR quality checks (before creating PR)
198 | - Developer-driven analysis (you control timing)
199 | - Parallel processing (multiple checks simultaneously)
200 | - OAuth-free automation (CI/CD, scripts)
201 |
202 | **Use gemini-pr-automator for**:
203 | - Full automated review loops
204 | - Auto-fix application
205 | - GitHub inline comment handling
206 | - Continuous watch mode
207 |
208 | ### Hybrid Approach (RECOMMENDED)
209 |
210 | ```bash
211 | # 1. Pre-PR: Use Amp for quality gate
212 | bash scripts/pr/amp_quality_gate.sh 0
213 |
214 | # 2. Create PR (github-release-manager)
215 | gh pr create --title "feat: new feature" --body "..."
216 |
217 | # 3. Post-PR: Use Gemini for automated review
218 | bash scripts/pr/auto_review.sh 215 5 true
219 | ```
220 |
221 | ## Prompt Engineering for Amp
222 |
223 | ### Complexity Analysis Prompt
224 |
225 | ```
226 | Analyze code complexity for each function in this file.
227 |
228 | Rating scale: 1-10 (1=simple, 10=very complex)
229 |
230 | ONLY report functions with score >7 in this exact format:
231 | FunctionName: Score X - Reason
232 |
233 | If all functions score ≤7, respond: "COMPLEXITY_OK"
234 |
235 | File content:
236 | {file_content}
237 | ```
238 |
239 | ### Security Scan Prompt
240 |
241 | ```
242 | Security audit for vulnerabilities:
243 | - SQL injection (raw SQL, string formatting in queries)
244 | - XSS (unescaped HTML output)
245 | - Command injection (os.system, subprocess with shell=True)
246 | - Path traversal (user input in file paths)
247 | - Hardcoded secrets (API keys, passwords)
248 |
249 | IMPORTANT: Output format:
250 | - If ANY vulnerability found: VULNERABILITY_DETECTED: [type]
251 | - If NO vulnerabilities: SECURITY_CLEAN
252 |
253 | File content:
254 | {file_content}
255 | ```
256 |
257 | ### Type Hint Coverage Prompt
258 |
259 | ```
260 | Check type hint coverage for this Python file.
261 |
262 | Report:
263 | 1. Total functions/methods
264 | 2. Functions with complete type hints
265 | 3. Functions missing type hints (list names)
266 | 4. Coverage percentage
267 |
268 | Output format:
269 | COVERAGE: X%
270 | MISSING: function1, function2, ...
271 |
272 | File content:
273 | {file_content}
274 | ```
275 |
276 | ## Integration with Other Agents
277 |
278 | ### github-release-manager
279 | - Creates PRs → amp-pr-automator runs pre-PR checks
280 | - Merges PRs → amp-pr-automator validates quality gates
281 |
282 | ### gemini-pr-automator
283 | - amp-pr-automator runs quality gate first
284 | - If passed, gemini-pr-automator handles review iteration
285 |
286 | ### code-quality-guard
287 | - Pre-commit hooks use Groq/Gemini for local checks
288 | - amp-pr-automator for PR-level analysis
289 |
290 | ## Project-Specific Patterns
291 |
292 | ### MCP Memory Service PR Standards
293 |
294 | **Quality Gate Requirements**:
295 | - ✅ Code complexity ≤7 for all functions
296 | - ✅ No security vulnerabilities
297 | - ✅ Type hints on new functions (80% coverage)
298 | - ✅ Import organization (stdlib → third-party → local)
299 |
300 | **File-Based Workflow Benefits**:
301 | - Developer reviews prompt before running Amp
302 | - Amp responses saved for audit trail
303 | - Easy to re-run specific checks
304 | - No OAuth interruptions during work
305 |
306 | ## Usage Examples
307 |
308 | ### Quick Quality Check
309 |
310 | ```bash
311 | # Run quality gate for PR #215
312 | bash scripts/pr/amp_quality_gate.sh 215
313 |
314 | # Wait for prompts to be created
315 | # Review prompts: ls -la .claude/amp/prompts/pending/
316 |
317 | # Run each Amp task shown in output
318 | amp @.claude/amp/prompts/pending/{complexity-uuid}.json &
319 | amp @.claude/amp/prompts/pending/{security-uuid}.json &
320 | amp @.claude/amp/prompts/pending/{typehints-uuid}.json &
321 |
322 | # Collect results
323 | bash scripts/pr/amp_collect_results.sh --timeout 300
324 | ```
325 |
326 | ### Generate Tests Only
327 |
328 | ```bash
329 | # Generate tests for PR #215
330 | bash scripts/pr/amp_generate_tests.sh 215
331 |
332 | # Run Amp task
333 | amp @.claude/amp/prompts/pending/{tests-uuid}.json
334 |
335 | # Review generated tests
336 | ls -la /tmp/amp_tests/
337 | ```
338 |
339 | ### Breaking Change Detection
340 |
341 | ```bash
342 | # Check for breaking changes
343 | bash scripts/pr/amp_detect_breaking_changes.sh main feature/new-api
344 |
345 | # Run Amp task
346 | amp @.claude/amp/prompts/pending/{breaking-uuid}.json
347 |
348 | # View report
349 | cat /tmp/amp_breaking_changes.txt
350 | ```
351 |
352 | ## Best Practices
353 |
354 | 1. **Review Prompts Before Running**: Inspect `.claude/amp/prompts/pending/` to verify Amp tasks
355 | 2. **Parallel Execution**: Launch multiple Amp instances for speed
356 | 3. **Timeout Handling**: Use `amp_collect_results.sh --timeout` to prevent indefinite waits
357 | 4. **Incremental Checks**: Re-run specific checks (complexity only, security only) as needed
358 | 5. **Audit Trail**: Keep Amp responses in `.claude/amp/responses/consumed/` for review
359 | 6. **Hybrid Workflow**: Use Amp for pre-PR, Gemini for post-PR automation
360 |
361 | ## Limitations
362 |
363 | - **No Auto-Fix**: Amp suggests fixes, manual application required
364 | - **No GitHub Integration**: Cannot resolve PR review threads automatically
365 | - **Manual Workflow**: User must run Amp commands (not fully automated)
366 | - **Credit Consumption**: Still uses Amp API credits (separate from Claude Code)
367 | - **Context Limits**: Large files may need chunking for Amp analysis
368 |
369 | ## Performance Considerations
370 |
371 | - **Parallel Processing**: 3-5 Amp tasks in parallel = ~2-3 minutes total
372 | - **Sequential (Gemini)**: Same checks = ~10-15 minutes
373 | - **Time Savings**: 70-80% faster for quality gate checks
374 | - **Credit Efficiency**: Focused prompts consume fewer tokens
375 |
376 | ## Success Metrics
377 |
378 | - ✅ **Speed**: Quality gate completes in <3 minutes (vs 10-15 with Gemini)
379 | - ✅ **No OAuth**: Zero browser interruptions during PR workflow
380 | - ✅ **Parallel Efficiency**: 5 checks run simultaneously
381 | - ✅ **Developer Control**: File-based workflow allows prompt inspection
382 | - ✅ **Audit Trail**: All prompts/responses saved for review
383 |
384 | ---
385 |
386 | **Quick Reference Card**:
387 |
388 | ```bash
389 | # Quality gate (parallel checks)
390 | bash scripts/pr/amp_quality_gate.sh <PR_NUMBER>
391 |
392 | # Collect Amp results
393 | bash scripts/pr/amp_collect_results.sh --timeout 300
394 |
395 | # Generate tests
396 | bash scripts/pr/amp_generate_tests.sh <PR_NUMBER>
397 |
398 | # Suggest fixes
399 | bash scripts/pr/amp_suggest_fixes.sh <PR_NUMBER>
400 |
401 | # Breaking changes
402 | bash scripts/pr/amp_detect_breaking_changes.sh <BASE> <HEAD>
403 |
404 | # Complete PR review
405 | bash scripts/pr/amp_pr_review.sh <PR_NUMBER>
406 | ```
407 |
408 | **Workflow Integration**:
409 |
410 | ```bash
411 | # Pre-PR: Quality checks (Amp)
412 | bash scripts/pr/amp_quality_gate.sh 0
413 |
414 | # Create PR
415 | gh pr create --title "feat: X" --body "..."
416 |
417 | # Post-PR: Automated review (Gemini)
418 | bash scripts/pr/auto_review.sh 215 5 true
419 | ```
420 |
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/ingestion/chunker.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | Intelligent text chunking strategies for document ingestion.
17 | """
18 |
19 | import re
20 | import logging
21 | from typing import List, Dict, Any, Optional, Tuple
22 | from dataclasses import dataclass
23 |
24 | logger = logging.getLogger(__name__)
25 |
26 |
27 | @dataclass
28 | class ChunkingStrategy:
29 | """Configuration for text chunking behavior."""
30 | chunk_size: int = 1000 # Target characters per chunk
31 | chunk_overlap: int = 200 # Characters to overlap between chunks
32 | respect_sentence_boundaries: bool = True
33 | respect_paragraph_boundaries: bool = True
34 | min_chunk_size: int = 100 # Minimum characters for a valid chunk
35 |
36 |
37 | class TextChunker:
38 | """
39 | Intelligent text chunking that respects document structure.
40 |
41 | Provides multiple chunking strategies:
42 | - Sentence-aware chunking
43 | - Paragraph-aware chunking
44 | - Token-based chunking
45 | - Custom delimiter chunking
46 | """
47 |
48 | def __init__(self, strategy: ChunkingStrategy = None):
49 | """
50 | Initialize text chunker.
51 |
52 | Args:
53 | strategy: Chunking configuration to use
54 | """
55 | self.strategy = strategy or ChunkingStrategy()
56 |
57 | # Sentence boundary patterns
58 | self.sentence_endings = re.compile(r'[.!?]+\s+')
59 | self.paragraph_separator = re.compile(r'\n\s*\n')
60 |
61 | # Common section headers (for structured documents)
62 | self.section_headers = re.compile(
63 | r'^(#{1,6}\s+|Chapter\s+\d+|Section\s+\d+|Part\s+\d+|\d+\.\s+)',
64 | re.MULTILINE | re.IGNORECASE
65 | )
66 |
67 | def chunk_text(self, text: str, metadata: Dict[str, Any] = None) -> List[Tuple[str, Dict[str, Any]]]:
68 | """
69 | Split text into chunks using the configured strategy.
70 |
71 | Args:
72 | text: Text content to chunk
73 | metadata: Base metadata to include with each chunk
74 |
75 | Returns:
76 | List of (chunk_text, chunk_metadata) tuples
77 | """
78 | if not text or len(text.strip()) < self.strategy.min_chunk_size:
79 | return []
80 |
81 | metadata = metadata or {}
82 |
83 | # Try different chunking strategies in order of preference
84 | if self.strategy.respect_paragraph_boundaries:
85 | chunks = self._chunk_by_paragraphs(text)
86 | elif self.strategy.respect_sentence_boundaries:
87 | chunks = self._chunk_by_sentences(text)
88 | else:
89 | chunks = self._chunk_by_characters(text)
90 |
91 | # Add metadata to each chunk
92 | result = []
93 | for i, chunk_text in enumerate(chunks):
94 | chunk_metadata = metadata.copy()
95 | chunk_metadata.update({
96 | 'chunk_index': i,
97 | 'chunk_length': len(chunk_text),
98 | 'total_chunks': len(chunks),
99 | 'chunking_strategy': self._get_strategy_name()
100 | })
101 | result.append((chunk_text, chunk_metadata))
102 |
103 | logger.debug(f"Created {len(result)} chunks from {len(text)} characters")
104 | return result
105 |
106 | def _chunk_by_paragraphs(self, text: str) -> List[str]:
107 | """
108 | Chunk text by paragraph boundaries, respecting size limits.
109 |
110 | Args:
111 | text: Text to chunk
112 |
113 | Returns:
114 | List of text chunks
115 | """
116 | paragraphs = self.paragraph_separator.split(text)
117 | chunks = []
118 | current_chunk = ""
119 |
120 | for paragraph in paragraphs:
121 | paragraph = paragraph.strip()
122 | if not paragraph:
123 | continue
124 |
125 | # If adding this paragraph would exceed chunk size
126 | if (len(current_chunk) + len(paragraph) + 2 > self.strategy.chunk_size
127 | and len(current_chunk) > 0):
128 |
129 | # Finalize current chunk
130 | if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
131 | chunks.append(current_chunk.strip())
132 |
133 | # Start new chunk with overlap
134 | overlap = self._get_overlap_text(current_chunk)
135 | current_chunk = overlap + paragraph
136 | else:
137 | # Add paragraph to current chunk
138 | if current_chunk:
139 | current_chunk += "\n\n" + paragraph
140 | else:
141 | current_chunk = paragraph
142 |
143 | # Add remaining text
144 | if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
145 | chunks.append(current_chunk.strip())
146 |
147 | return chunks
148 |
149 | def _chunk_by_sentences(self, text: str) -> List[str]:
150 | """
151 | Chunk text by sentence boundaries, respecting size limits.
152 |
153 | Args:
154 | text: Text to chunk
155 |
156 | Returns:
157 | List of text chunks
158 | """
159 | sentences = self.sentence_endings.split(text)
160 | chunks = []
161 | current_chunk = ""
162 |
163 | for sentence in sentences:
164 | sentence = sentence.strip()
165 | if not sentence:
166 | continue
167 |
168 | # If adding this sentence would exceed chunk size
169 | if (len(current_chunk) + len(sentence) + 1 > self.strategy.chunk_size
170 | and len(current_chunk) > 0):
171 |
172 | # Finalize current chunk
173 | if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
174 | chunks.append(current_chunk.strip())
175 |
176 | # Start new chunk with overlap
177 | overlap = self._get_overlap_text(current_chunk)
178 | current_chunk = overlap + sentence
179 | else:
180 | # Add sentence to current chunk
181 | if current_chunk:
182 | current_chunk += " " + sentence
183 | else:
184 | current_chunk = sentence
185 |
186 | # Add remaining text
187 | if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
188 | chunks.append(current_chunk.strip())
189 |
190 | return chunks
191 |
192 | def _chunk_by_characters(self, text: str) -> List[str]:
193 | """
194 | Chunk text by character count with overlap.
195 |
196 | Args:
197 | text: Text to chunk
198 |
199 | Returns:
200 | List of text chunks
201 | """
202 | if len(text) <= self.strategy.chunk_size:
203 | return [text]
204 |
205 | chunks = []
206 | start = 0
207 |
208 | while start < len(text):
209 | end = start + self.strategy.chunk_size
210 |
211 | # If this is not the last chunk, try to find a good break point
212 | if end < len(text):
213 | # Look for space to avoid breaking words
214 | for i in range(end, max(start + self.strategy.min_chunk_size, end - 100), -1):
215 | if text[i].isspace():
216 | end = i
217 | break
218 |
219 | chunk = text[start:end].strip()
220 | if len(chunk) >= self.strategy.min_chunk_size:
221 | chunks.append(chunk)
222 |
223 | # Move start position with overlap
224 | start = max(start + 1, end - self.strategy.chunk_overlap)
225 |
226 | return chunks
227 |
228 | def _get_overlap_text(self, text: str) -> str:
229 | """
230 | Get overlap text from the end of a chunk.
231 |
232 | Args:
233 | text: Text to extract overlap from
234 |
235 | Returns:
236 | Overlap text to include in next chunk
237 | """
238 | if len(text) <= self.strategy.chunk_overlap:
239 | return text + " "
240 |
241 | overlap = text[-self.strategy.chunk_overlap:]
242 |
243 | # Try to start overlap at a sentence boundary
244 | sentences = self.sentence_endings.split(overlap)
245 | if len(sentences) > 1:
246 | overlap = " ".join(sentences[1:])
247 |
248 | return overlap + " " if overlap else ""
249 |
250 | def _get_strategy_name(self) -> str:
251 | """Get human-readable name for current chunking strategy."""
252 | if self.strategy.respect_paragraph_boundaries:
253 | return "paragraph_aware"
254 | elif self.strategy.respect_sentence_boundaries:
255 | return "sentence_aware"
256 | else:
257 | return "character_based"
258 |
259 | def chunk_by_sections(self, text: str, metadata: Dict[str, Any] = None) -> List[Tuple[str, Dict[str, Any]]]:
260 | """
261 | Chunk text by document sections (headers, chapters, etc.).
262 |
263 | Args:
264 | text: Text content to chunk
265 | metadata: Base metadata to include with each chunk
266 |
267 | Returns:
268 | List of (chunk_text, chunk_metadata) tuples
269 | """
270 | metadata = metadata or {}
271 |
272 | # Find section boundaries
273 | section_matches = list(self.section_headers.finditer(text))
274 | if not section_matches:
275 | # No sections found, use regular chunking
276 | return self.chunk_text(text, metadata)
277 |
278 | chunks = []
279 | section_start = 0
280 |
281 | for i, match in enumerate(section_matches):
282 | section_end = match.start()
283 |
284 | # Extract previous section if it exists
285 | if section_start < section_end:
286 | section_text = text[section_start:section_end].strip()
287 | if len(section_text) >= self.strategy.min_chunk_size:
288 | section_metadata = metadata.copy()
289 | section_metadata.update({
290 | 'section_index': i,
291 | 'is_section': True,
292 | 'section_start': section_start,
293 | 'section_end': section_end
294 | })
295 |
296 | # If section is too large, sub-chunk it
297 | if len(section_text) > self.strategy.chunk_size * 2:
298 | sub_chunks = self.chunk_text(section_text, section_metadata)
299 | chunks.extend(sub_chunks)
300 | else:
301 | chunks.append((section_text, section_metadata))
302 |
303 | section_start = match.start()
304 |
305 | # Handle final section
306 | if section_start < len(text):
307 | final_text = text[section_start:].strip()
308 | if len(final_text) >= self.strategy.min_chunk_size:
309 | final_metadata = metadata.copy()
310 | final_metadata.update({
311 | 'section_index': len(section_matches),
312 | 'is_section': True,
313 | 'section_start': section_start,
314 | 'section_end': len(text)
315 | })
316 |
317 | if len(final_text) > self.strategy.chunk_size * 2:
318 | sub_chunks = self.chunk_text(final_text, final_metadata)
319 | chunks.extend(sub_chunks)
320 | else:
321 | chunks.append((final_text, final_metadata))
322 |
323 | return chunks
```
--------------------------------------------------------------------------------
/docs/maintenance/memory-maintenance.md:
--------------------------------------------------------------------------------
```markdown
1 | # Memory Maintenance Guide
2 |
3 | A comprehensive guide for maintaining and organizing your MCP Memory Service knowledge base through systematic review, analysis, and re-categorization processes.
4 |
5 | ## 🎯 Overview
6 |
7 | Memory maintenance is essential for keeping your knowledge base organized, searchable, and valuable over time. This guide provides practical workflows for identifying poorly organized memories and transforming them into a well-structured knowledge system.
8 |
9 | ## 📋 Quick Start
10 |
11 | ### Basic Maintenance Session
12 |
13 | 1. **Identify untagged memories**: `retrieve_memory({"query": "untagged memories", "n_results": 20})`
14 | 2. **Analyze content themes**: Look for projects, technologies, activities, status indicators
15 | 3. **Apply standardized tags**: Use consistent categorization schema
16 | 4. **Replace old memories**: Create new tagged version, delete old untagged version
17 | 5. **Document results**: Store summary of maintenance session
18 |
19 | ### Maintenance Schedule Recommendations
20 |
21 | - **Weekly**: Review memories from past 7 days
22 | - **Monthly**: Comprehensive review of recent memories + spot check older ones
23 | - **Quarterly**: Full database health check and optimization
24 |
25 | ## 🔍 Step-by-Step Maintenance Process
26 |
27 | ### Phase 1: Assessment and Planning
28 |
29 | #### 1.1 Database Health Check
30 |
31 | ```javascript
32 | // Check overall database status
33 | check_database_health()
34 | ```
35 |
36 | **What to look for:**
37 | - Total memory count
38 | - Database health status
39 | - Recent activity patterns
40 | - Error indicators
41 |
42 | #### 1.2 Identify Untagged Memories
43 |
44 | **Search Strategy:**
45 | ```javascript
46 | // Primary search for untagged memories
47 | retrieve_memory({
48 | "n_results": 15,
49 | "query": "untagged memories without tags minimal tags single tag"
50 | })
51 |
52 | // Alternative searches
53 | retrieve_memory({"query": "test memory basic simple concept", "n_results": 20})
54 | recall_memory({"query": "memories from last week", "n_results": 25})
55 | ```
56 |
57 | **Identification Criteria:**
58 | - Memories with no tags
59 | - Memories with only generic tags (`test`, `memory`, `note`)
60 | - Memories with inconsistent tag formats
61 | - Old memories that predate tag standardization
62 |
63 | #### 1.3 Categorize by Priority
64 |
65 | **High Priority:**
66 | - Frequently accessed memories
67 | - Critical project information
68 | - Recent important developments
69 |
70 | **Medium Priority:**
71 | - Historical documentation
72 | - Reference materials
73 | - Tutorial content
74 |
75 | **Low Priority:**
76 | - Test memories (evaluate for deletion)
77 | - Outdated information
78 | - Duplicate content
79 |
80 | ### Phase 2: Analysis and Categorization
81 |
82 | #### 2.1 Content Theme Analysis
83 |
84 | For each identified memory, analyze:
85 |
86 | **Project Context:**
87 | - Which project does this relate to?
88 | - Is it part of a larger initiative?
89 | - What's the project phase/status?
90 |
91 | **Technology Stack:**
92 | - Programming languages mentioned
93 | - Frameworks and libraries
94 | - Tools and platforms
95 | - Databases and services
96 |
97 | **Activity Type:**
98 | - Development work
99 | - Testing and debugging
100 | - Documentation
101 | - Research and planning
102 | - Issue resolution
103 |
104 | **Content Classification:**
105 | - Concept or idea
106 | - Tutorial or guide
107 | - Reference material
108 | - Troubleshooting solution
109 | - Best practice
110 |
111 | #### 2.2 Tag Assignment Strategy
112 |
113 | **Multi-Category Tagging:**
114 | Apply tags from multiple categories for comprehensive organization:
115 |
116 | ```javascript
117 | // Example: Well-tagged memory
118 | {
119 | "tags": [
120 | "mcp-memory-service", // Project
121 | "python", "chromadb", // Technologies
122 | "debugging", "testing", // Activities
123 | "resolved", // Status
124 | "backend", // Domain
125 | "troubleshooting" // Content type
126 | ]
127 | }
128 | ```
129 |
130 | **Tag Selection Guidelines:**
131 |
132 | 1. **Start with Project/Context**: What's the main project or domain?
133 | 2. **Add Technology Tags**: What tools, languages, or frameworks?
134 | 3. **Include Activity Tags**: What was being done?
135 | 4. **Specify Status**: What's the current state?
136 | 5. **Add Content Type**: What kind of information is this?
137 |
138 | ### Phase 3: Implementation
139 |
140 | #### 3.1 Memory Re-tagging Process
141 |
142 | **For each memory to be re-tagged:**
143 |
144 | 1. **Copy Content**: Preserve exact content
145 | 2. **Create New Memory**: With improved tags
146 | 3. **Verify Storage**: Confirm new memory exists
147 | 4. **Delete Old Memory**: Remove untagged version
148 | 5. **Document Change**: Record in maintenance log
149 |
150 | **Example Implementation:**
151 | ```javascript
152 | // Step 1: Create properly tagged memory
153 | store_memory({
154 | "content": "TEST: Timestamp debugging memory created for issue #7 investigation",
155 | "metadata": {
156 | "tags": ["test", "debugging", "issue-7", "timestamp-test", "mcp-memory-service", "verification"],
157 | "type": "debug-test"
158 | }
159 | })
160 |
161 | // Step 2: Delete old untagged memory
162 | delete_memory({
163 | "content_hash": "b3f874baee0c1261907c8f80c3e33d1977485f66c17078ed611b6f1c744cb1f8"
164 | })
165 | ```
166 |
167 | #### 3.2 Batch Processing Tips
168 |
169 | **Efficiency Strategies:**
170 | - Group similar memories for consistent tagging
171 | - Use template patterns for common memory types
172 | - Process one category at a time (e.g., all test memories)
173 | - Take breaks between batches to maintain quality
174 |
175 | **Quality Control:**
176 | - Double-check tag spelling and format
177 | - Verify content hasn't been modified
178 | - Confirm old memory deletion
179 | - Test search functionality with new tags
180 |
181 | ### Phase 4: Verification and Documentation
182 |
183 | #### 4.1 Verification Checklist
184 |
185 | **After each memory:**
186 | - [ ] New memory stored successfully
187 | - [ ] Tags applied correctly
188 | - [ ] Old memory deleted
189 | - [ ] Search returns new memory
190 |
191 | **After maintenance session:**
192 | - [ ] All targeted memories processed
193 | - [ ] Database health check passed
194 | - [ ] No orphaned or broken memories
195 | - [ ] Search functionality improved
196 |
197 | #### 4.2 Session Documentation
198 |
199 | **Create maintenance summary memory:**
200 | ```javascript
201 | store_memory({
202 | "content": "Memory Maintenance Session - [Date]: Successfully processed X memories...",
203 | "metadata": {
204 | "tags": ["memory-maintenance", "session-summary", "tag-management"],
205 | "type": "maintenance-record"
206 | }
207 | })
208 | ```
209 |
210 | **Include in summary:**
211 | - Number of memories processed
212 | - Categories addressed
213 | - Tag patterns applied
214 | - Time investment
215 | - Quality improvements
216 | - Next steps identified
217 |
218 | ## 🎯 Common Maintenance Scenarios
219 |
220 | ### Scenario 1: Test Memory Cleanup
221 |
222 | **Situation**: Numerous test memories from development work
223 |
224 | **Approach:**
225 | 1. Identify all test-related memories
226 | 2. Evaluate each for permanent value
227 | 3. Re-tag valuable tests with specific context
228 | 4. Delete obsolete or redundant tests
229 |
230 | **Example tags for valuable tests:**
231 | ```
232 | ["test", "verification", "issue-7", "timestamp-test", "mcp-memory-service", "quality-assurance"]
233 | ```
234 |
235 | ### Scenario 2: Project Documentation Organization
236 |
237 | **Situation**: Project memories scattered without clear organization
238 |
239 | **Approach:**
240 | 1. Group by project phase (planning, development, deployment)
241 | 2. Add temporal context (month/quarter)
242 | 3. Include status information
243 | 4. Link related memories with consistent tags
244 |
245 | **Tag patterns:**
246 | ```
247 | Project memories: ["project-name", "phase", "technology", "status", "domain"]
248 | Meeting notes: ["meeting", "project-name", "date", "decisions", "action-items"]
249 | ```
250 |
251 | ### Scenario 3: Technical Solution Archive
252 |
253 | **Situation**: Troubleshooting solutions need better organization
254 |
255 | **Approach:**
256 | 1. Categorize by technology/platform
257 | 2. Add problem domain tags
258 | 3. Include resolution status
259 | 4. Tag with difficulty/complexity
260 |
261 | **Example organization:**
262 | ```
263 | ["troubleshooting", "python", "chromadb", "connection-issues", "resolved", "backend"]
264 | ```
265 |
266 | ## 🛠️ Maintenance Tools and Scripts
267 |
268 | ### Helper Queries
269 |
270 | **Find potentially untagged memories:**
271 | ```javascript
272 | // Various search approaches
273 | retrieve_memory({"query": "test simple basic example", "n_results": 20})
274 | recall_memory({"query": "memories from last month", "n_results": 30})
275 | search_by_tag({"tags": ["test"]}) // Review generic tags
276 | ```
277 |
278 | **Content pattern analysis:**
279 | ```javascript
280 | // Look for specific patterns that need organization
281 | retrieve_memory({"query": "TODO FIXME DEBUG ERROR", "n_results": 15})
282 | retrieve_memory({"query": "issue bug problem solution", "n_results": 15})
283 | ```
284 |
285 | ### Batch Processing Templates
286 |
287 | **Standard test memory re-tagging:**
288 | ```javascript
289 | const testMemoryPattern = {
290 | "tags": ["test", "[specific-function]", "[project]", "[domain]", "verification"],
291 | "type": "test-record"
292 | }
293 | ```
294 |
295 | **Documentation memory pattern:**
296 | ```javascript
297 | const documentationPattern = {
298 | "tags": ["documentation", "[project]", "[topic]", "[technology]", "reference"],
299 | "type": "documentation"
300 | }
301 | ```
302 |
303 | ## 📊 Maintenance Metrics
304 |
305 | ### Success Indicators
306 |
307 | **Quantitative Metrics:**
308 | - Percentage of tagged memories
309 | - Search result relevance improvement
310 | - Time to find specific information
311 | - Memory retrieval accuracy
312 |
313 | **Qualitative Metrics:**
314 | - Ease of knowledge discovery
315 | - Consistency of organization
316 | - Usefulness of search results
317 | - Overall system usability
318 |
319 | ### Progress Tracking
320 |
321 | **Session Metrics:**
322 | - Memories processed per hour
323 | - Categories organized
324 | - Tag patterns established
325 | - Quality improvements achieved
326 |
327 | **Long-term Tracking:**
328 | - Monthly maintenance time investment
329 | - Database organization score
330 | - Knowledge retrieval efficiency
331 | - User satisfaction with search
332 |
333 | ## 🔄 Recurring Maintenance
334 |
335 | ### Weekly Maintenance (15-30 minutes)
336 |
337 | ```
338 | Weekly Memory Maintenance:
339 | 1. Recall memories from 'last week'
340 | 2. Identify any untagged or poorly tagged items
341 | 3. Apply quick categorization
342 | 4. Focus on recent work and current projects
343 | 5. Update any status changes (resolved issues, completed tasks)
344 | ```
345 |
346 | ### Monthly Maintenance (1-2 hours)
347 |
348 | ```
349 | Monthly Memory Maintenance:
350 | 1. Comprehensive review of recent memories
351 | 2. Spot check older memories for organization
352 | 3. Update project status tags
353 | 4. Consolidate related memories
354 | 5. Archive or delete obsolete information
355 | 6. Generate maintenance summary report
356 | ```
357 |
358 | ### Quarterly Maintenance (2-4 hours)
359 |
360 | ```
361 | Quarterly Memory Maintenance:
362 | 1. Full database health assessment
363 | 2. Tag schema review and updates
364 | 3. Memory consolidation and cleanup
365 | 4. Performance optimization
366 | 5. Backup and archival processes
367 | 6. Strategic knowledge organization review
368 | ```
369 |
370 | ## 🎯 Best Practices
371 |
372 | ### Do's
373 |
374 | ✅ **Process regularly**: Small, frequent sessions beat large overhauls
375 | ✅ **Use consistent patterns**: Develop standard approaches for common scenarios
376 | ✅ **Document decisions**: Record maintenance choices for future reference
377 | ✅ **Verify thoroughly**: Always confirm changes worked as expected
378 | ✅ **Focus on value**: Prioritize high-impact memories first
379 |
380 | ### Don'ts
381 |
382 | ❌ **Rush the process**: Quality categorization takes time
383 | ❌ **Change content**: Only modify tags and metadata, preserve original content
384 | ❌ **Delete without backup**: Ensure new memory is stored before deleting old
385 | ❌ **Ignore verification**: Always test that maintenance improved functionality
386 | ❌ **Work when tired**: Categorization quality suffers with fatigue
387 |
388 | ## 🚀 Advanced Techniques
389 |
390 | ### Automated Assistance
391 |
392 | **Use semantic search for tag suggestions:**
393 | ```javascript
394 | // Find similar memories for tag pattern ideas
395 | retrieve_memory({"query": "[memory content excerpt]", "n_results": 5})
396 | ```
397 |
398 | **Pattern recognition:**
399 | ```javascript
400 | // Identify common themes for standardization
401 | search_by_tag({"tags": ["technology-name"]}) // See existing patterns
402 | ```
403 |
404 | ### Integration Workflows
405 |
406 | **Connect with external tools:**
407 | - Export tagged memories for documentation systems
408 | - Sync with project management tools
409 | - Generate reports for team sharing
410 | - Create knowledge graphs from tag relationships
411 |
412 | ---
413 |
414 | *This guide provides the foundation for maintaining a professional-grade knowledge management system. Regular maintenance ensures your MCP Memory Service continues to provide maximum value as your knowledge base grows.*
```
--------------------------------------------------------------------------------
/docs/development/release-checklist.md:
--------------------------------------------------------------------------------
```markdown
1 | # Release Checklist
2 |
3 | This checklist ensures that critical bugs like the HTTP-MCP bridge issues are caught before release.
4 |
5 | ## Pre-Release Testing
6 |
7 | ### ✅ Core Functionality Tests
8 | - [ ] **Health Check Endpoints**
9 | - [ ] `/api/health` returns 200 with healthy status
10 | - [ ] `/health` returns 404 (wrong endpoint)
11 | - [ ] Health check works through MCP bridge
12 | - [ ] Health check works with Claude Desktop
13 |
14 | - [ ] **Memory Storage Operations**
15 | - [ ] Store memory returns HTTP 200 with `success: true`
16 | - [ ] Duplicate detection returns HTTP 200 with `success: false`
17 | - [ ] Invalid requests return appropriate error codes
18 | - [ ] All operations work through MCP bridge
19 |
20 | - [ ] **API Endpoint Consistency**
21 | - [ ] All endpoints use `/api/` prefix correctly
22 | - [ ] URL construction doesn't break base paths
23 | - [ ] Bridge correctly appends paths to base URL
24 |
25 | ### ✅ HTTP-MCP Bridge Specific Tests
26 | - [ ] **Status Code Handling**
27 | - [ ] Bridge accepts HTTP 200 responses (not just 201)
28 | - [ ] Bridge checks `success` field for actual result
29 | - [ ] Bridge handles both success and failure in 200 responses
30 |
31 | - [ ] **URL Construction**
32 | - [ ] Bridge preserves `/api` base path in URLs
33 | - [ ] `new URL()` calls don't replace existing paths
34 | - [ ] All API calls reach correct endpoints
35 |
36 | - [ ] **MCP Protocol Compliance**
37 | - [ ] `initialize` method works
38 | - [ ] `tools/list` returns all tools
39 | - [ ] `tools/call` executes correctly
40 | - [ ] Error responses are properly formatted
41 |
42 | ### ✅ End-to-End Testing
43 | - [ ] **Claude Desktop Integration**
44 | - [ ] Memory storage through Claude Desktop works
45 | - [ ] Memory retrieval through Claude Desktop works
46 | - [ ] Health checks show healthy status
47 | - [ ] No "unhealthy" false positives
48 |
49 | - [ ] **Remote Server Testing**
50 | - [ ] Bridge connects to remote server correctly
51 | - [ ] Authentication works with API keys
52 | - [ ] All operations work across network
53 | - [ ] SSL certificates are handled properly
54 |
55 | ### ✅ Contract Validation
56 | - [ ] **API Response Formats**
57 | - [ ] Memory storage responses match documented format
58 | - [ ] Health responses match documented format
59 | - [ ] Error responses match documented format
60 | - [ ] Search responses match documented format
61 |
62 | - [ ] **Backward Compatibility**
63 | - [ ] Existing configurations continue to work
64 | - [ ] No breaking changes to client interfaces
65 | - [ ] Bridge supports both HTTP 200 and 201 responses
66 |
67 | ## Automated Testing Requirements
68 |
69 | ### ✅ Unit Tests
70 | - [ ] HTTP-MCP bridge unit tests pass
71 | - [ ] Mock server responses are realistic
72 | - [ ] All edge cases are covered
73 | - [ ] Error conditions are tested
74 |
75 | ### ✅ Integration Tests
76 | - [ ] Bridge-server integration tests pass
77 | - [ ] Contract tests validate API behavior
78 | - [ ] End-to-end MCP protocol tests pass
79 | - [ ] Real server connectivity tests pass
80 |
81 | ### ✅ CI/CD Pipeline
82 | - [ ] Bridge tests run on every commit
83 | - [ ] Tests block merges if failing
84 | - [ ] Contract validation passes
85 | - [ ] Multiple Node.js versions tested
86 |
87 | ## Manual Testing Checklist
88 |
89 | ### ✅ Critical User Paths
90 | 1. **Claude Desktop User**:
91 | - [ ] Install and configure Claude Desktop with MCP Memory Service
92 | - [ ] Store a memory using Claude Desktop
93 | - [ ] Retrieve memories using Claude Desktop
94 | - [ ] Verify health check shows healthy status
95 | - [ ] Confirm no "unhealthy" warnings appear
96 |
97 | 2. **Remote Server User**:
98 | - [ ] Configure bridge to connect to remote server
99 | - [ ] Test memory operations work correctly
100 | - [ ] Verify all API endpoints are reachable
101 | - [ ] Confirm authentication works
102 |
103 | 3. **API Consumer**:
104 | - [ ] Test direct HTTP API calls work
105 | - [ ] Verify response formats match documentation
106 | - [ ] Test error conditions return expected responses
107 |
108 | ### ✅ Platform Testing
109 | - [ ] **Windows**: Bridge works with Windows Claude Desktop
110 | - [ ] **macOS**: Bridge works with macOS Claude Desktop
111 | - [ ] **Linux**: Bridge works with Linux installations
112 |
113 | ## Code Quality Checks
114 |
115 | ### ✅ Code Review Requirements
116 | - [ ] All HTTP status code assumptions documented
117 | - [ ] URL construction logic reviewed
118 | - [ ] Error handling covers all scenarios
119 | - [ ] No hardcoded endpoints or assumptions
120 |
121 | ### ✅ Documentation Updates
122 | - [ ] API contract documentation updated
123 | - [ ] Bridge usage documentation updated
124 | - [ ] Troubleshooting guides updated
125 | - [ ] Breaking changes documented
126 |
127 | ## Release Process
128 |
129 | ### ✅ Version Management (3-File Procedure)
130 | - [ ] **Update `src/mcp_memory_service/__init__.py`**
131 | - [ ] Update `__version__` string (e.g., `"8.17.0"`)
132 | - [ ] Verify version format follows semantic versioning (MAJOR.MINOR.PATCH)
133 |
134 | - [ ] **Update `pyproject.toml`**
135 | - [ ] Update `version` field in `[project]` section
136 | - [ ] Ensure version matches `__init__.py` exactly
137 |
138 | - [ ] **Lock dependencies**
139 | - [ ] Run `uv lock` to update `uv.lock` file
140 | - [ ] Commit all three files together in version bump commit
141 |
142 | - [ ] **Semantic Versioning Rules**
143 | - [ ] MAJOR: Breaking changes (API changes, removed features)
144 | - [ ] MINOR: New features (backward compatible)
145 | - [ ] PATCH: Bug fixes (no API changes)
146 |
147 | ### ✅ CHANGELOG Quality Gates
148 | - [ ] **Format Validation**
149 | - [ ] Follows [Keep a Changelog](https://keepachangelog.com/) format
150 | - [ ] Version header includes date: `## [8.17.0] - 2025-11-04`
151 | - [ ] Changes categorized: Added/Changed/Fixed/Removed/Deprecated/Security
152 |
153 | - [ ] **Content Requirements**
154 | - [ ] All user-facing changes documented
155 | - [ ] Breaking changes clearly marked with **BREAKING**
156 | - [ ] Performance improvements include metrics (e.g., "50% faster")
157 | - [ ] Bug fixes reference issue numbers (e.g., "Fixes #123")
158 | - [ ] Technical details for maintainers in appropriate sections
159 |
160 | - [ ] **Migration Guidance** (if breaking changes)
161 | - [ ] Before/after code examples provided
162 | - [ ] Environment variable changes documented
163 | - [ ] Database migration scripts linked
164 | - [ ] Deprecation timeline specified
165 |
166 | ### ✅ GitHub Workflow Verification
167 | - [ ] **All Workflows Pass** (check Actions tab)
168 | - [ ] Docker Publish workflow (builds multi-platform images)
169 | - [ ] Publish and Test workflow (PyPI publish + installation tests)
170 | - [ ] HTTP-MCP Bridge Tests (validates MCP protocol compliance)
171 | - [ ] Platform Tests (macOS/Windows/Linux matrix)
172 |
173 | - [ ] **Docker Images Built**
174 | - [ ] `mcp-memory-service:latest` tag updated
175 | - [ ] `mcp-memory-service:v8.x.x` version tag created
176 | - [ ] Multi-platform images (linux/amd64, linux/arm64)
177 |
178 | - [ ] **PyPI Package Published**
179 | - [ ] Package available at https://pypi.org/project/mcp-memory-service/
180 | - [ ] Installation test passes: `pip install mcp-memory-service==8.x.x`
181 |
182 | ### ✅ Git Tag and Release
183 | - [ ] **Create annotated Git tag**
184 | ```bash
185 | git tag -a v8.x.x -m "Release v8.x.x: Brief description"
186 | ```
187 | - [ ] Tag follows `vMAJOR.MINOR.PATCH` format
188 | - [ ] Tag message summarizes key changes
189 |
190 | - [ ] **Push tag to remote**
191 | ```bash
192 | git push origin v8.x.x
193 | ```
194 | - [ ] Tag triggers release workflows
195 |
196 | - [ ] **Create GitHub Release**
197 | - [ ] Title: `vx.x.x - Short Description`
198 | - [ ] Body: Copy relevant CHANGELOG section
199 | - [ ] Mark as pre-release if RC version
200 | - [ ] Attach any release artifacts (if applicable)
201 |
202 | ### ✅ Post-Release Issue Closure
203 | - [ ] **Review Fixed Issues**
204 | - [ ] Search for issues closed by commits in this release
205 | - [ ] Verify each issue is actually resolved
206 |
207 | - [ ] **Close Issues with Context**
208 | ```markdown
209 | Resolved in v8.x.x via #PR_NUMBER
210 |
211 | [Link to CHANGELOG entry]
212 | [Link to relevant Wiki page if applicable]
213 |
214 | Thank you for reporting this issue!
215 | ```
216 | - [ ] Include PR link for traceability
217 | - [ ] Reference CHANGELOG section
218 | - [ ] Tag issues with `released` label
219 |
220 | - [ ] **Update Related Documentation**
221 | - [ ] Wiki pages updated with new features/fixes
222 | - [ ] Troubleshooting guides reflect resolved issues
223 | - [ ] FAQ updated if new common questions emerged
224 |
225 | ### ✅ Communication
226 | - [ ] Release notes highlight critical fixes
227 | - [ ] Breaking changes clearly documented
228 | - [ ] Migration guide provided if needed
229 | - [ ] Users notified of important changes
230 |
231 | ## Post-Release Monitoring
232 |
233 | ### ✅ Health Monitoring
234 | - [ ] Monitor for increased error rates
235 | - [ ] Watch for "unhealthy" status reports
236 | - [ ] Track Claude Desktop connectivity issues
237 | - [ ] Monitor API endpoint usage patterns
238 |
239 | ### ✅ User Feedback
240 | - [ ] Monitor GitHub issues for reports
241 | - [ ] Check community discussions for problems
242 | - [ ] Respond to user reports quickly
243 | - [ ] Document common issues and solutions
244 |
245 | ---
246 |
247 | ## Lessons from HTTP-MCP Bridge Bug
248 |
249 | **Critical Mistakes to Avoid:**
250 | 1. **Never assume status codes** - Always test against actual server responses
251 | 2. **Test critical components** - If users depend on it, it needs comprehensive tests
252 | 3. **Validate URL construction** - `new URL()` behavior with base paths is tricky
253 | 4. **Document actual behavior** - API contracts must match reality, not hopes
254 | 5. **Test end-to-end flows** - Unit tests alone miss integration problems
255 |
256 | **Required for Every Release:**
257 | - [ ] HTTP-MCP bridge tested with real server
258 | - [ ] All assumptions about server behavior validated
259 | - [ ] Critical user paths manually tested
260 | - [ ] API contracts verified against implementation
261 |
262 | **Emergency Response Plan:**
263 | - If critical bugs are found in production:
264 | 1. Create hotfix branch immediately
265 | 2. Write failing test that reproduces the bug
266 | 3. Fix bug and verify test passes
267 | 4. Release hotfix within 24 hours
268 | 5. Post-mortem to prevent similar issues
269 |
270 | ---
271 |
272 | ## Rollback Procedure
273 |
274 | ### ✅ Emergency Rollback (if release breaks production)
275 |
276 | **When to Rollback:**
277 | - Critical functionality broken (storage, retrieval, MCP protocol)
278 | - Data corruption risk identified
279 | - Security vulnerability introduced
280 | - Widespread user-reported failures
281 |
282 | **Rollback Steps:**
283 |
284 | 1. **Immediate Actions**
285 | - [ ] Create GitHub issue documenting the problem
286 | - [ ] Tag issue with `critical`, `rollback-needed`
287 | - [ ] Notify users via GitHub Discussions/Release notes
288 |
289 | 2. **Docker Rollback**
290 | ```bash
291 | # Tag previous version as latest
292 | git checkout vPREVIOUS_VERSION
293 | docker build -t mcp-memory-service:latest .
294 | docker push mcp-memory-service:latest
295 | ```
296 | - [ ] Verify previous Docker image works
297 | - [ ] Update documentation to reference previous version
298 |
299 | 3. **PyPI Rollback** (yank bad version)
300 | ```bash
301 | # Yank the broken version (keeps it available but discourages use)
302 | pip install twine
303 | twine yank mcp-memory-service==8.x.x
304 | ```
305 | - [ ] Yank version on PyPI
306 | - [ ] Publish notice in release notes
307 |
308 | 4. **Git Tag Management**
309 | - [ ] Keep the bad tag for history (don't delete)
310 | - [ ] Create new hotfix tag (e.g., `v8.x.x+1`) with fix
311 | - [ ] Mark GitHub Release as "This release has known issues - use v8.x.x-1 instead"
312 |
313 | 5. **User Communication**
314 | - [ ] Post issue explaining problem and rollback
315 | - [ ] Update README with rollback instructions
316 | - [ ] Pin issue to repository
317 | - [ ] Post in Discussions with migration path
318 |
319 | 6. **Post-Rollback Analysis**
320 | - [ ] Document what went wrong in post-mortem
321 | - [ ] Add regression test to prevent recurrence
322 | - [ ] Update this checklist with lessons learned
323 | - [ ] Review release testing procedures
324 |
325 | **Recovery Timeline:**
326 | - Hour 1: Identify issue, create GitHub issue, begin rollback
327 | - Hour 2-4: Complete rollback, verify previous version works
328 | - Hour 4-24: Investigate root cause, prepare hotfix
329 | - Day 2: Release hotfix with comprehensive tests
330 | - Week 1: Post-mortem, update testing procedures
331 |
332 | ---
333 |
334 | This checklist must be completed for every release to prevent critical bugs from reaching users.
```
--------------------------------------------------------------------------------
/scripts/sync/sync_memory_backends.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | # Copyright 2024 Heinrich Krupp
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """
17 | Bidirectional sync script for MCP Memory Service backends.
18 | Syncs memories between Cloudflare (primary) and SQLite-vec (backup).
19 | """
20 | import sys
21 | import os
22 | import asyncio
23 | import logging
24 | import argparse
25 | import hashlib
26 | from pathlib import Path
27 | from typing import List, Dict, Any, Tuple
28 | from datetime import datetime
29 |
30 | # Add src directory to path so we can import from the mcp_memory_service package
31 | sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
32 |
33 | from mcp_memory_service.config import (
34 | CLOUDFLARE_API_TOKEN, CLOUDFLARE_ACCOUNT_ID, CLOUDFLARE_VECTORIZE_INDEX,
35 | CLOUDFLARE_D1_DATABASE_ID, BASE_DIR
36 | )
37 | from mcp_memory_service.models.memory import Memory
38 | from mcp_memory_service.storage.cloudflare import CloudflareStorage
39 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
40 |
41 | # Configure logging
42 | logging.basicConfig(
43 | level=logging.INFO,
44 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
45 | )
46 | logger = logging.getLogger("memory_sync")
47 |
48 | class MemorySync:
49 | """Handles bidirectional sync between Cloudflare and SQLite-vec backends."""
50 |
51 | def __init__(self, sqlite_path: str = None):
52 | """Initialize sync with storage backends."""
53 | self.sqlite_path = sqlite_path or os.path.join(BASE_DIR, 'backup_sqlite_vec.db')
54 |
55 | # Initialize storage backends
56 | self.cloudflare = CloudflareStorage(
57 | api_token=CLOUDFLARE_API_TOKEN,
58 | account_id=CLOUDFLARE_ACCOUNT_ID,
59 | vectorize_index=CLOUDFLARE_VECTORIZE_INDEX,
60 | d1_database_id=CLOUDFLARE_D1_DATABASE_ID
61 | )
62 |
63 | self.sqlite_vec = SqliteVecMemoryStorage(self.sqlite_path)
64 |
65 | async def get_all_memories_from_backend(self, backend_name: str) -> List[Dict[str, Any]]:
66 | """Get all memories from a specific backend."""
67 | if backend_name == 'cloudflare':
68 | backend = self.cloudflare
69 | elif backend_name == 'sqlite_vec':
70 | backend = self.sqlite_vec
71 | else:
72 | raise ValueError(f"Unknown backend: {backend_name}")
73 |
74 | try:
75 | # Get all memories from the backend
76 | memories_list = await backend.get_all_memories()
77 |
78 | memories = []
79 | for memory in memories_list:
80 | memory_dict = {
81 | 'content_hash': memory.content_hash,
82 | 'content': memory.content,
83 | 'metadata': memory.metadata,
84 | 'tags': memory.tags,
85 | 'memory_type': memory.memory_type,
86 | 'created_at': memory.created_at,
87 | 'created_at_iso': memory.created_at_iso,
88 | 'updated_at': memory.updated_at,
89 | 'updated_at_iso': memory.updated_at_iso,
90 | }
91 | memories.append(memory_dict)
92 |
93 | logger.info(f"Retrieved {len(memories)} memories from {backend_name}")
94 | return memories
95 |
96 | except Exception as e:
97 | logger.error(f"Error retrieving memories from {backend_name}: {e}")
98 | return []
99 |
100 | def calculate_content_hash(self, content: str, metadata: Dict[str, Any]) -> str:
101 | """Calculate a hash for memory content to detect duplicates."""
102 | # Create a consistent string representation
103 | content_str = f"{content}_{sorted(metadata.items())}"
104 | return hashlib.sha256(content_str.encode()).hexdigest()[:16]
105 |
106 | async def _sync_between_backends(self, source_backend: str, target_backend: str, dry_run: bool = False) -> Tuple[int, int]:
107 | """
108 | Generic method to sync memories between any two backends.
109 |
110 | Args:
111 | source_backend: Backend to sync from ('cloudflare' or 'sqlite_vec')
112 | target_backend: Backend to sync to ('cloudflare' or 'sqlite_vec')
113 | dry_run: If True, only show what would be synced without making changes
114 |
115 | Returns:
116 | Tuple of (added_count, skipped_count)
117 | """
118 | logger.info(f"Starting sync from {source_backend} to {target_backend}...")
119 |
120 | # Get memories from both backends
121 | source_memories = await self.get_all_memories_from_backend(source_backend)
122 | target_memories = await self.get_all_memories_from_backend(target_backend)
123 |
124 | # Create hash sets for quick lookup
125 | target_hashes = {mem['content_hash'] for mem in target_memories if mem.get('content_hash')}
126 | target_content_hashes = {
127 | self.calculate_content_hash(mem['content'], mem['metadata'])
128 | for mem in target_memories
129 | }
130 |
131 | added_count = 0
132 | skipped_count = 0
133 |
134 | # Get target backend instance for storing memories
135 | target_storage = self.cloudflare if target_backend == 'cloudflare' else self.sqlite_vec
136 |
137 | for source_memory in source_memories:
138 | # Check if memory already exists (by hash or content)
139 | content_hash = self.calculate_content_hash(source_memory['content'], source_memory['metadata'])
140 |
141 | if (source_memory.get('content_hash') in target_hashes or
142 | content_hash in target_content_hashes):
143 | skipped_count += 1
144 | continue
145 |
146 | if not dry_run:
147 | try:
148 | memory_obj = Memory(
149 | content=source_memory['content'],
150 | content_hash=source_memory['content_hash'],
151 | tags=source_memory.get('tags', []),
152 | metadata=source_memory.get('metadata', {}),
153 | memory_type=source_memory.get('memory_type'),
154 | created_at=source_memory.get('created_at'),
155 | updated_at=source_memory.get('updated_at'),
156 | )
157 | success, message = await target_storage.store(memory_obj)
158 | if success:
159 | added_count += 1
160 | logger.debug(f"Added memory: {source_memory['content_hash'][:8]}...")
161 | else:
162 | logger.warning(f"Failed to store memory {source_memory['content_hash']}: {message}")
163 | except Exception as e:
164 | logger.error(f"Error storing memory {source_memory['content_hash']}: {e}")
165 | else:
166 | added_count += 1
167 |
168 | logger.info(f"{source_backend} → {target_backend}: {added_count} added, {skipped_count} skipped")
169 | return added_count, skipped_count
170 |
171 | async def sync_cloudflare_to_sqlite(self, dry_run: bool = False) -> Tuple[int, int]:
172 | """Sync memories from Cloudflare to SQLite-vec."""
173 | return await self._sync_between_backends('cloudflare', 'sqlite_vec', dry_run)
174 |
175 | async def sync_sqlite_to_cloudflare(self, dry_run: bool = False) -> Tuple[int, int]:
176 | """Sync memories from SQLite-vec to Cloudflare."""
177 | return await self._sync_between_backends('sqlite_vec', 'cloudflare', dry_run)
178 |
179 | async def bidirectional_sync(self, dry_run: bool = False) -> Dict[str, Tuple[int, int]]:
180 | """Perform bidirectional sync between backends."""
181 | logger.info("Starting bidirectional sync...")
182 |
183 | results = {}
184 |
185 | # Sync Cloudflare → SQLite-vec
186 | cf_to_sqlite = await self.sync_cloudflare_to_sqlite(dry_run)
187 | results['cloudflare_to_sqlite'] = cf_to_sqlite
188 |
189 | # Sync SQLite-vec → Cloudflare
190 | sqlite_to_cf = await self.sync_sqlite_to_cloudflare(dry_run)
191 | results['sqlite_to_cloudflare'] = sqlite_to_cf
192 |
193 | logger.info("Bidirectional sync completed")
194 | return results
195 |
196 | async def get_sync_status(self) -> Dict[str, Any]:
197 | """Get sync status showing memory counts in both backends."""
198 | cf_memories = await self.get_all_memories_from_backend('cloudflare')
199 | sqlite_memories = await self.get_all_memories_from_backend('sqlite_vec')
200 |
201 | status = {
202 | 'cloudflare_count': len(cf_memories),
203 | 'sqlite_vec_count': len(sqlite_memories),
204 | 'sync_time': datetime.now().isoformat(),
205 | 'backends_configured': {
206 | 'cloudflare': bool(CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID),
207 | 'sqlite_vec': os.path.exists(self.sqlite_path) if self.sqlite_path else False
208 | }
209 | }
210 |
211 | return status
212 |
213 | async def main():
214 | """Main function to run memory sync operations."""
215 | parser = argparse.ArgumentParser(description='Sync memories between Cloudflare and SQLite-vec backends')
216 | parser.add_argument('--direction', choices=['cf-to-sqlite', 'sqlite-to-cf', 'bidirectional'],
217 | default='bidirectional', help='Sync direction')
218 | parser.add_argument('--dry-run', action='store_true', help='Show what would be synced without actually syncing')
219 | parser.add_argument('--status', action='store_true', help='Show sync status only')
220 | parser.add_argument('--sqlite-path', help='Path to SQLite-vec database file')
221 | parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging')
222 |
223 | args = parser.parse_args()
224 |
225 | if args.verbose:
226 | logging.getLogger().setLevel(logging.DEBUG)
227 |
228 | # Initialize sync
229 | sync = MemorySync(sqlite_path=args.sqlite_path)
230 |
231 | try:
232 | if args.status:
233 | status = await sync.get_sync_status()
234 | print(f"\n=== Memory Sync Status ===")
235 | print(f"Cloudflare memories: {status['cloudflare_count']}")
236 | print(f"SQLite-vec memories: {status['sqlite_vec_count']}")
237 | print(f"Cloudflare configured: {status['backends_configured']['cloudflare']}")
238 | print(f"SQLite-vec file exists: {status['backends_configured']['sqlite_vec']}")
239 | print(f"Last check: {status['sync_time']}")
240 | return
241 |
242 | logger.info(f"=== Starting memory sync ({args.direction}) ===")
243 | if args.dry_run:
244 | logger.info("DRY RUN MODE - No changes will be made")
245 |
246 | if args.direction == 'cf-to-sqlite':
247 | added, skipped = await sync.sync_cloudflare_to_sqlite(dry_run=args.dry_run)
248 | print(f"Cloudflare → SQLite-vec: {added} added, {skipped} skipped")
249 | elif args.direction == 'sqlite-to-cf':
250 | added, skipped = await sync.sync_sqlite_to_cloudflare(dry_run=args.dry_run)
251 | print(f"SQLite-vec → Cloudflare: {added} added, {skipped} skipped")
252 | else: # bidirectional
253 | results = await sync.bidirectional_sync(dry_run=args.dry_run)
254 | cf_to_sqlite = results['cloudflare_to_sqlite']
255 | sqlite_to_cf = results['sqlite_to_cloudflare']
256 | print(f"Cloudflare → SQLite-vec: {cf_to_sqlite[0]} added, {cf_to_sqlite[1]} skipped")
257 | print(f"SQLite-vec → Cloudflare: {sqlite_to_cf[0]} added, {sqlite_to_cf[1]} skipped")
258 |
259 | logger.info("=== Sync completed successfully ===")
260 |
261 | except Exception as e:
262 | logger.error(f"Sync failed: {str(e)}")
263 | sys.exit(1)
264 |
265 | if __name__ == "__main__":
266 | asyncio.run(main())
267 |
```