doobidoo/mcp-memory-service # codebase.md

This is page 17 of 47. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── agents
│   │   ├── amp-bridge.md
│   │   ├── amp-pr-automator.md
│   │   ├── code-quality-guard.md
│   │   ├── gemini-pr-automator.md
│   │   └── github-release-manager.md
│   ├── settings.local.json.backup
│   └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   ├── feature_request.yml
│   │   └── performance_issue.yml
│   ├── pull_request_template.md
│   └── workflows
│       ├── bridge-tests.yml
│       ├── CACHE_FIX.md
│       ├── claude-code-review.yml
│       ├── claude.yml
│       ├── cleanup-images.yml.disabled
│       ├── dev-setup-validation.yml
│       ├── docker-publish.yml
│       ├── LATEST_FIXES.md
│       ├── main-optimized.yml.disabled
│       ├── main.yml
│       ├── publish-and-test.yml
│       ├── README_OPTIMIZATION.md
│       ├── release-tag.yml.disabled
│       ├── release.yml
│       ├── roadmap-review-reminder.yml
│       ├── SECRET_CONDITIONAL_FIX.md
│       └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│   ├── .gitignore
│   └── reports
│       └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│   ├── deployment
│   │   ├── deploy_fastmcp_fixed.sh
│   │   ├── deploy_http_with_mcp.sh
│   │   └── deploy_mcp_v4.sh
│   ├── deployment-configs
│   │   ├── empty_config.yml
│   │   └── smithery.yaml
│   ├── development
│   │   └── test_fastmcp.py
│   ├── docs-removed-2025-08-23
│   │   ├── authentication.md
│   │   ├── claude_integration.md
│   │   ├── claude-code-compatibility.md
│   │   ├── claude-code-integration.md
│   │   ├── claude-code-quickstart.md
│   │   ├── claude-desktop-setup.md
│   │   ├── complete-setup-guide.md
│   │   ├── database-synchronization.md
│   │   ├── development
│   │   │   ├── autonomous-memory-consolidation.md
│   │   │   ├── CLEANUP_PLAN.md
│   │   │   ├── CLEANUP_README.md
│   │   │   ├── CLEANUP_SUMMARY.md
│   │   │   ├── dream-inspired-memory-consolidation.md
│   │   │   ├── hybrid-slm-memory-consolidation.md
│   │   │   ├── mcp-milestone.md
│   │   │   ├── multi-client-architecture.md
│   │   │   ├── test-results.md
│   │   │   └── TIMESTAMP_FIX_SUMMARY.md
│   │   ├── distributed-sync.md
│   │   ├── invocation_guide.md
│   │   ├── macos-intel.md
│   │   ├── master-guide.md
│   │   ├── mcp-client-configuration.md
│   │   ├── multi-client-server.md
│   │   ├── service-installation.md
│   │   ├── sessions
│   │   │   └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│   │   ├── UBUNTU_SETUP.md
│   │   ├── ubuntu.md
│   │   ├── windows-setup.md
│   │   └── windows.md
│   ├── docs-root-cleanup-2025-08-23
│   │   ├── AWESOME_LIST_SUBMISSION.md
│   │   ├── CLOUDFLARE_IMPLEMENTATION.md
│   │   ├── DOCUMENTATION_ANALYSIS.md
│   │   ├── DOCUMENTATION_CLEANUP_PLAN.md
│   │   ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│   │   ├── LITESTREAM_SETUP_GUIDE.md
│   │   ├── lm_studio_system_prompt.md
│   │   ├── PYTORCH_DOWNLOAD_FIX.md
│   │   └── README-ORIGINAL-BACKUP.md
│   ├── investigations
│   │   └── MACOS_HOOKS_INVESTIGATION.md
│   ├── litestream-configs-v6.3.0
│   │   ├── install_service.sh
│   │   ├── litestream_master_config_fixed.yml
│   │   ├── litestream_master_config.yml
│   │   ├── litestream_replica_config_fixed.yml
│   │   ├── litestream_replica_config.yml
│   │   ├── litestream_replica_simple.yml
│   │   ├── litestream-http.service
│   │   ├── litestream.service
│   │   └── requirements-cloudflare.txt
│   ├── release-notes
│   │   └── release-notes-v7.1.4.md
│   └── setup-development
│       ├── README.md
│       ├── setup_consolidation_mdns.sh
│       ├── STARTUP_SETUP_GUIDE.md
│       └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│   ├── memory-context.md
│   ├── memory-health.md
│   ├── memory-ingest-dir.md
│   ├── memory-ingest.md
│   ├── memory-recall.md
│   ├── memory-search.md
│   ├── memory-store.md
│   ├── README.md
│   └── session-start.md
├── claude-hooks
│   ├── config.json
│   ├── config.template.json
│   ├── CONFIGURATION.md
│   ├── core
│   │   ├── memory-retrieval.js
│   │   ├── mid-conversation.js
│   │   ├── session-end.js
│   │   ├── session-start.js
│   │   └── topic-change.js
│   ├── debug-pattern-test.js
│   ├── install_claude_hooks_windows.ps1
│   ├── install_hooks.py
│   ├── memory-mode-controller.js
│   ├── MIGRATION.md
│   ├── README-NATURAL-TRIGGERS.md
│   ├── README-phase2.md
│   ├── README.md
│   ├── simple-test.js
│   ├── statusline.sh
│   ├── test-adaptive-weights.js
│   ├── test-dual-protocol-hook.js
│   ├── test-mcp-hook.js
│   ├── test-natural-triggers.js
│   ├── test-recency-scoring.js
│   ├── tests
│   │   ├── integration-test.js
│   │   ├── phase2-integration-test.js
│   │   ├── test-code-execution.js
│   │   ├── test-cross-session.json
│   │   ├── test-session-tracking.json
│   │   └── test-threading.json
│   ├── utilities
│   │   ├── adaptive-pattern-detector.js
│   │   ├── context-formatter.js
│   │   ├── context-shift-detector.js
│   │   ├── conversation-analyzer.js
│   │   ├── dynamic-context-updater.js
│   │   ├── git-analyzer.js
│   │   ├── mcp-client.js
│   │   ├── memory-client.js
│   │   ├── memory-scorer.js
│   │   ├── performance-manager.js
│   │   ├── project-detector.js
│   │   ├── session-tracker.js
│   │   ├── tiered-conversation-monitor.js
│   │   └── version-checker.js
│   └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│   ├── amp-cli-bridge.md
│   ├── api
│   │   ├── code-execution-interface.md
│   │   ├── memory-metadata-api.md
│   │   ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_REPORT.md
│   │   └── tag-standardization.md
│   ├── architecture
│   │   ├── search-enhancement-spec.md
│   │   └── search-examples.md
│   ├── architecture.md
│   ├── archive
│   │   └── obsolete-workflows
│   │       ├── load_memory_context.md
│   │       └── README.md
│   ├── assets
│   │   └── images
│   │       ├── dashboard-v3.3.0-preview.png
│   │       ├── memory-awareness-hooks-example.png
│   │       ├── project-infographic.svg
│   │       └── README.md
│   ├── CLAUDE_CODE_QUICK_REFERENCE.md
│   ├── cloudflare-setup.md
│   ├── deployment
│   │   ├── docker.md
│   │   ├── dual-service.md
│   │   ├── production-guide.md
│   │   └── systemd-service.md
│   ├── development
│   │   ├── ai-agent-instructions.md
│   │   ├── code-quality
│   │   │   ├── phase-2a-completion.md
│   │   │   ├── phase-2a-handle-get-prompt.md
│   │   │   ├── phase-2a-index.md
│   │   │   ├── phase-2a-install-package.md
│   │   │   └── phase-2b-session-summary.md
│   │   ├── code-quality-workflow.md
│   │   ├── dashboard-workflow.md
│   │   ├── issue-management.md
│   │   ├── pr-review-guide.md
│   │   ├── refactoring-notes.md
│   │   ├── release-checklist.md
│   │   └── todo-tracker.md
│   ├── docker-optimized-build.md
│   ├── document-ingestion.md
│   ├── DOCUMENTATION_AUDIT.md
│   ├── enhancement-roadmap-issue-14.md
│   ├── examples
│   │   ├── analysis-scripts.js
│   │   ├── maintenance-session-example.md
│   │   ├── memory-distribution-chart.jsx
│   │   └── tag-schema.json
│   ├── first-time-setup.md
│   ├── glama-deployment.md
│   ├── guides
│   │   ├── advanced-command-examples.md
│   │   ├── chromadb-migration.md
│   │   ├── commands-vs-mcp-server.md
│   │   ├── mcp-enhancements.md
│   │   ├── mdns-service-discovery.md
│   │   ├── memory-consolidation-guide.md
│   │   ├── migration.md
│   │   ├── scripts.md
│   │   └── STORAGE_BACKENDS.md
│   ├── HOOK_IMPROVEMENTS.md
│   ├── hooks
│   │   └── phase2-code-execution-migration.md
│   ├── http-server-management.md
│   ├── ide-compatability.md
│   ├── IMAGE_RETENTION_POLICY.md
│   ├── images
│   │   └── dashboard-placeholder.md
│   ├── implementation
│   │   ├── health_checks.md
│   │   └── performance.md
│   ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│   ├── integration
│   │   ├── homebrew.md
│   │   └── multi-client.md
│   ├── integrations
│   │   ├── gemini.md
│   │   ├── groq-bridge.md
│   │   ├── groq-integration-summary.md
│   │   └── groq-model-comparison.md
│   ├── integrations.md
│   ├── legacy
│   │   └── dual-protocol-hooks.md
│   ├── LM_STUDIO_COMPATIBILITY.md
│   ├── maintenance
│   │   └── memory-maintenance.md
│   ├── mastery
│   │   ├── api-reference.md
│   │   ├── architecture-overview.md
│   │   ├── configuration-guide.md
│   │   ├── local-setup-and-run.md
│   │   ├── testing-guide.md
│   │   └── troubleshooting.md
│   ├── migration
│   │   └── code-execution-api-quick-start.md
│   ├── natural-memory-triggers
│   │   ├── cli-reference.md
│   │   ├── installation-guide.md
│   │   └── performance-optimization.md
│   ├── oauth-setup.md
│   ├── pr-graphql-integration.md
│   ├── quick-setup-cloudflare-dual-environment.md
│   ├── README.md
│   ├── remote-configuration-wiki-section.md
│   ├── research
│   │   ├── code-execution-interface-implementation.md
│   │   └── code-execution-interface-summary.md
│   ├── ROADMAP.md
│   ├── sqlite-vec-backend.md
│   ├── statistics
│   │   ├── charts
│   │   │   ├── activity_patterns.png
│   │   │   ├── contributors.png
│   │   │   ├── growth_trajectory.png
│   │   │   ├── monthly_activity.png
│   │   │   └── october_sprint.png
│   │   ├── data
│   │   │   ├── activity_by_day.csv
│   │   │   ├── activity_by_hour.csv
│   │   │   ├── contributors.csv
│   │   │   └── monthly_activity.csv
│   │   ├── generate_charts.py
│   │   └── REPOSITORY_STATISTICS.md
│   ├── technical
│   │   ├── development.md
│   │   ├── memory-migration.md
│   │   ├── migration-log.md
│   │   ├── sqlite-vec-embedding-fixes.md
│   │   └── tag-storage.md
│   ├── testing
│   │   └── regression-tests.md
│   ├── testing-cloudflare-backend.md
│   ├── troubleshooting
│   │   ├── cloudflare-api-token-setup.md
│   │   ├── cloudflare-authentication.md
│   │   ├── general.md
│   │   ├── hooks-quick-reference.md
│   │   ├── pr162-schema-caching-issue.md
│   │   ├── session-end-hooks.md
│   │   └── sync-issues.md
│   └── tutorials
│       ├── advanced-techniques.md
│       ├── data-analysis.md
│       └── demo-session-walkthrough.md
├── examples
│   ├── claude_desktop_config_template.json
│   ├── claude_desktop_config_windows.json
│   ├── claude-desktop-http-config.json
│   ├── config
│   │   └── claude_desktop_config.json
│   ├── http-mcp-bridge.js
│   ├── memory_export_template.json
│   ├── README.md
│   ├── setup
│   │   └── setup_multi_client_complete.py
│   └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│   ├── .claude
│   │   └── settings.local.json
│   ├── archive
│   │   └── check_missing_timestamps.py
│   ├── backup
│   │   ├── backup_memories.py
│   │   ├── backup_sqlite_vec.sh
│   │   ├── export_distributable_memories.sh
│   │   └── restore_memories.py
│   ├── benchmarks
│   │   ├── benchmark_code_execution_api.py
│   │   ├── benchmark_hybrid_sync.py
│   │   └── benchmark_server_caching.py
│   ├── database
│   │   ├── analyze_sqlite_vec_db.py
│   │   ├── check_sqlite_vec_status.py
│   │   ├── db_health_check.py
│   │   └── simple_timestamp_check.py
│   ├── development
│   │   ├── debug_server_initialization.py
│   │   ├── find_orphaned_files.py
│   │   ├── fix_mdns.sh
│   │   ├── fix_sitecustomize.py
│   │   ├── remote_ingest.sh
│   │   ├── setup-git-merge-drivers.sh
│   │   ├── uv-lock-merge.sh
│   │   └── verify_hybrid_sync.py
│   ├── hooks
│   │   └── pre-commit
│   ├── installation
│   │   ├── install_linux_service.py
│   │   ├── install_macos_service.py
│   │   ├── install_uv.py
│   │   ├── install_windows_service.py
│   │   ├── install.py
│   │   ├── setup_backup_cron.sh
│   │   ├── setup_claude_mcp.sh
│   │   └── setup_cloudflare_resources.py
│   ├── linux
│   │   ├── service_status.sh
│   │   ├── start_service.sh
│   │   ├── stop_service.sh
│   │   ├── uninstall_service.sh
│   │   └── view_logs.sh
│   ├── maintenance
│   │   ├── assign_memory_types.py
│   │   ├── check_memory_types.py
│   │   ├── cleanup_corrupted_encoding.py
│   │   ├── cleanup_memories.py
│   │   ├── cleanup_organize.py
│   │   ├── consolidate_memory_types.py
│   │   ├── consolidation_mappings.json
│   │   ├── delete_orphaned_vectors_fixed.py
│   │   ├── fast_cleanup_duplicates_with_tracking.sh
│   │   ├── find_all_duplicates.py
│   │   ├── find_cloudflare_duplicates.py
│   │   ├── find_duplicates.py
│   │   ├── memory-types.md
│   │   ├── README.md
│   │   ├── recover_timestamps_from_cloudflare.py
│   │   ├── regenerate_embeddings.py
│   │   ├── repair_malformed_tags.py
│   │   ├── repair_memories.py
│   │   ├── repair_sqlite_vec_embeddings.py
│   │   ├── repair_zero_embeddings.py
│   │   ├── restore_from_json_export.py
│   │   └── scan_todos.sh
│   ├── migration
│   │   ├── cleanup_mcp_timestamps.py
│   │   ├── legacy
│   │   │   └── migrate_chroma_to_sqlite.py
│   │   ├── mcp-migration.py
│   │   ├── migrate_sqlite_vec_embeddings.py
│   │   ├── migrate_storage.py
│   │   ├── migrate_tags.py
│   │   ├── migrate_timestamps.py
│   │   ├── migrate_to_cloudflare.py
│   │   ├── migrate_to_sqlite_vec.py
│   │   ├── migrate_v5_enhanced.py
│   │   ├── TIMESTAMP_CLEANUP_README.md
│   │   └── verify_mcp_timestamps.py
│   ├── pr
│   │   ├── amp_collect_results.sh
│   │   ├── amp_detect_breaking_changes.sh
│   │   ├── amp_generate_tests.sh
│   │   ├── amp_pr_review.sh
│   │   ├── amp_quality_gate.sh
│   │   ├── amp_suggest_fixes.sh
│   │   ├── auto_review.sh
│   │   ├── detect_breaking_changes.sh
│   │   ├── generate_tests.sh
│   │   ├── lib
│   │   │   └── graphql_helpers.sh
│   │   ├── quality_gate.sh
│   │   ├── resolve_threads.sh
│   │   ├── run_pyscn_analysis.sh
│   │   ├── run_quality_checks.sh
│   │   ├── thread_status.sh
│   │   └── watch_reviews.sh
│   ├── quality
│   │   ├── fix_dead_code_install.sh
│   │   ├── phase1_dead_code_analysis.md
│   │   ├── phase2_complexity_analysis.md
│   │   ├── README_PHASE1.md
│   │   ├── README_PHASE2.md
│   │   ├── track_pyscn_metrics.sh
│   │   └── weekly_quality_review.sh
│   ├── README.md
│   ├── run
│   │   ├── run_mcp_memory.sh
│   │   ├── run-with-uv.sh
│   │   └── start_sqlite_vec.sh
│   ├── run_memory_server.py
│   ├── server
│   │   ├── check_http_server.py
│   │   ├── check_server_health.py
│   │   ├── memory_offline.py
│   │   ├── preload_models.py
│   │   ├── run_http_server.py
│   │   ├── run_memory_server.py
│   │   ├── start_http_server.bat
│   │   └── start_http_server.sh
│   ├── service
│   │   ├── deploy_dual_services.sh
│   │   ├── install_http_service.sh
│   │   ├── mcp-memory-http.service
│   │   ├── mcp-memory.service
│   │   ├── memory_service_manager.sh
│   │   ├── service_control.sh
│   │   ├── service_utils.py
│   │   └── update_service.sh
│   ├── sync
│   │   ├── check_drift.py
│   │   ├── claude_sync_commands.py
│   │   ├── export_memories.py
│   │   ├── import_memories.py
│   │   ├── litestream
│   │   │   ├── apply_local_changes.sh
│   │   │   ├── enhanced_memory_store.sh
│   │   │   ├── init_staging_db.sh
│   │   │   ├── io.litestream.replication.plist
│   │   │   ├── manual_sync.sh
│   │   │   ├── memory_sync.sh
│   │   │   ├── pull_remote_changes.sh
│   │   │   ├── push_to_remote.sh
│   │   │   ├── README.md
│   │   │   ├── resolve_conflicts.sh
│   │   │   ├── setup_local_litestream.sh
│   │   │   ├── setup_remote_litestream.sh
│   │   │   ├── staging_db_init.sql
│   │   │   ├── stash_local_changes.sh
│   │   │   ├── sync_from_remote_noconfig.sh
│   │   │   └── sync_from_remote.sh
│   │   ├── README.md
│   │   ├── safe_cloudflare_update.sh
│   │   ├── sync_memory_backends.py
│   │   └── sync_now.py
│   ├── testing
│   │   ├── run_complete_test.py
│   │   ├── run_memory_test.sh
│   │   ├── simple_test.py
│   │   ├── test_cleanup_logic.py
│   │   ├── test_cloudflare_backend.py
│   │   ├── test_docker_functionality.py
│   │   ├── test_installation.py
│   │   ├── test_mdns.py
│   │   ├── test_memory_api.py
│   │   ├── test_memory_simple.py
│   │   ├── test_migration.py
│   │   ├── test_search_api.py
│   │   ├── test_sqlite_vec_embeddings.py
│   │   ├── test_sse_events.py
│   │   ├── test-connection.py
│   │   └── test-hook.js
│   ├── utils
│   │   ├── claude_commands_utils.py
│   │   ├── generate_personalized_claude_md.sh
│   │   ├── groq
│   │   ├── groq_agent_bridge.py
│   │   ├── list-collections.py
│   │   ├── memory_wrapper_uv.py
│   │   ├── query_memories.py
│   │   ├── smithery_wrapper.py
│   │   ├── test_groq_bridge.sh
│   │   └── uv_wrapper.py
│   └── validation
│       ├── check_dev_setup.py
│       ├── check_documentation_links.py
│       ├── diagnose_backend_config.py
│       ├── validate_configuration_complete.py
│       ├── validate_memories.py
│       ├── validate_migration.py
│       ├── validate_timestamp_integrity.py
│       ├── verify_environment.py
│       ├── verify_pytorch_windows.py
│       └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│   └── mcp_memory_service
│       ├── __init__.py
│       ├── api
│       │   ├── __init__.py
│       │   ├── client.py
│       │   ├── operations.py
│       │   ├── sync_wrapper.py
│       │   └── types.py
│       ├── backup
│       │   ├── __init__.py
│       │   └── scheduler.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── ingestion.py
│       │   ├── main.py
│       │   └── utils.py
│       ├── config.py
│       ├── consolidation
│       │   ├── __init__.py
│       │   ├── associations.py
│       │   ├── base.py
│       │   ├── clustering.py
│       │   ├── compression.py
│       │   ├── consolidator.py
│       │   ├── decay.py
│       │   ├── forgetting.py
│       │   ├── health.py
│       │   └── scheduler.py
│       ├── dependency_check.py
│       ├── discovery
│       │   ├── __init__.py
│       │   ├── client.py
│       │   └── mdns_service.py
│       ├── embeddings
│       │   ├── __init__.py
│       │   └── onnx_embeddings.py
│       ├── ingestion
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── chunker.py
│       │   ├── csv_loader.py
│       │   ├── json_loader.py
│       │   ├── pdf_loader.py
│       │   ├── registry.py
│       │   ├── semtools_loader.py
│       │   └── text_loader.py
│       ├── lm_studio_compat.py
│       ├── mcp_server.py
│       ├── models
│       │   ├── __init__.py
│       │   └── memory.py
│       ├── server.py
│       ├── services
│       │   ├── __init__.py
│       │   └── memory_service.py
│       ├── storage
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── cloudflare.py
│       │   ├── factory.py
│       │   ├── http_client.py
│       │   ├── hybrid.py
│       │   └── sqlite_vec.py
│       ├── sync
│       │   ├── __init__.py
│       │   ├── exporter.py
│       │   ├── importer.py
│       │   └── litestream_config.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cache_manager.py
│       │   ├── content_splitter.py
│       │   ├── db_utils.py
│       │   ├── debug.py
│       │   ├── document_processing.py
│       │   ├── gpu_detection.py
│       │   ├── hashing.py
│       │   ├── http_server_manager.py
│       │   ├── port_detection.py
│       │   ├── system_detection.py
│       │   └── time_parser.py
│       └── web
│           ├── __init__.py
│           ├── api
│           │   ├── __init__.py
│           │   ├── analytics.py
│           │   ├── backup.py
│           │   ├── consolidation.py
│           │   ├── documents.py
│           │   ├── events.py
│           │   ├── health.py
│           │   ├── manage.py
│           │   ├── mcp.py
│           │   ├── memories.py
│           │   ├── search.py
│           │   └── sync.py
│           ├── app.py
│           ├── dependencies.py
│           ├── oauth
│           │   ├── __init__.py
│           │   ├── authorization.py
│           │   ├── discovery.py
│           │   ├── middleware.py
│           │   ├── models.py
│           │   ├── registration.py
│           │   └── storage.py
│           ├── sse.py
│           └── static
│               ├── app.js
│               ├── index.html
│               ├── README.md
│               ├── sse_test.html
│               └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│   ├── __init__.py
│   ├── api
│   │   ├── __init__.py
│   │   ├── test_compact_types.py
│   │   └── test_operations.py
│   ├── bridge
│   │   ├── mock_responses.js
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   └── test_http_mcp_bridge.js
│   ├── conftest.py
│   ├── consolidation
│   │   ├── __init__.py
│   │   ├── conftest.py
│   │   ├── test_associations.py
│   │   ├── test_clustering.py
│   │   ├── test_compression.py
│   │   ├── test_consolidator.py
│   │   ├── test_decay.py
│   │   └── test_forgetting.py
│   ├── contracts
│   │   └── api-specification.yml
│   ├── integration
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── test_api_key_fallback.py
│   │   ├── test_api_memories_chronological.py
│   │   ├── test_api_tag_time_search.py
│   │   ├── test_api_with_memory_service.py
│   │   ├── test_bridge_integration.js
│   │   ├── test_cli_interfaces.py
│   │   ├── test_cloudflare_connection.py
│   │   ├── test_concurrent_clients.py
│   │   ├── test_data_serialization_consistency.py
│   │   ├── test_http_server_startup.py
│   │   ├── test_mcp_memory.py
│   │   ├── test_mdns_integration.py
│   │   ├── test_oauth_basic_auth.py
│   │   ├── test_oauth_flow.py
│   │   ├── test_server_handlers.py
│   │   └── test_store_memory.py
│   ├── performance
│   │   ├── test_background_sync.py
│   │   └── test_hybrid_live.py
│   ├── README.md
│   ├── smithery
│   │   └── test_smithery.py
│   ├── sqlite
│   │   └── simple_sqlite_vec_test.py
│   ├── test_client.py
│   ├── test_content_splitting.py
│   ├── test_database.py
│   ├── test_hybrid_cloudflare_limits.py
│   ├── test_hybrid_storage.py
│   ├── test_memory_ops.py
│   ├── test_semantic_search.py
│   ├── test_sqlite_vec_storage.py
│   ├── test_time_parser.py
│   ├── test_timestamp_preservation.py
│   ├── timestamp
│   │   ├── test_hook_vs_manual_storage.py
│   │   ├── test_issue99_final_validation.py
│   │   ├── test_search_retrieval_inconsistency.py
│   │   ├── test_timestamp_issue.py
│   │   └── test_timestamp_simple.py
│   └── unit
│       ├── conftest.py
│       ├── test_cloudflare_storage.py
│       ├── test_csv_loader.py
│       ├── test_fastapi_dependencies.py
│       ├── test_import.py
│       ├── test_json_loader.py
│       ├── test_mdns_simple.py
│       ├── test_mdns.py
│       ├── test_memory_service.py
│       ├── test_memory.py
│       ├── test_semtools_loader.py
│       ├── test_storage_interface_compatibility.py
│       └── test_tag_time_filtering.py
├── tools
│   ├── docker
│   │   ├── DEPRECATED.md
│   │   ├── docker-compose.http.yml
│   │   ├── docker-compose.pythonpath.yml
│   │   ├── docker-compose.standalone.yml
│   │   ├── docker-compose.uv.yml
│   │   ├── docker-compose.yml
│   │   ├── docker-entrypoint-persistent.sh
│   │   ├── docker-entrypoint-unified.sh
│   │   ├── docker-entrypoint.sh
│   │   ├── Dockerfile
│   │   ├── Dockerfile.glama
│   │   ├── Dockerfile.slim
│   │   ├── README.md
│   │   └── test-docker-modes.sh
│   └── README.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/docs/troubleshooting/session-end-hooks.md:
--------------------------------------------------------------------------------

```markdown
  1 | # SessionEnd Hook Troubleshooting Guide
  2 | 
  3 | ## Overview
  4 | 
  5 | SessionEnd hooks automatically consolidate conversation outcomes when you exit Claude Code. However, many users are confused about **when these hooks actually fire** and why memories might not be created as expected.
  6 | 
  7 | This guide clarifies the session lifecycle and common troubleshooting scenarios.
  8 | 
  9 | ---
 10 | 
 11 | ## Critical Concept: Session Lifecycle
 12 | 
 13 | Claude Code distinguishes between **session pause/suspend** and **session termination**:
 14 | 
 15 | | User Action | Session State | Hook Triggered | Memory Created? |
 16 | |-------------|---------------|----------------|-----------------|
 17 | | **Ctrl+C (once)** | Interrupt input | None | ❌ No |
 18 | | **Ctrl+C (twice)** | Suspend session | None | ❌ No |
 19 | | **Resume session** | Continue existing | `SessionStart:resume` | ❌ No (loads existing) |
 20 | | **`/exit` command** | Terminate | `SessionEnd` | ✅ **Yes** |
 21 | | **Close terminal** | Terminate | `SessionEnd` | ✅ **Yes** |
 22 | | **Kill process** | May terminate | `SessionEnd` (if graceful) | ⚠️ Maybe |
 23 | 
 24 | ### Key Takeaway
 25 | 
 26 | **Ctrl+C does NOT trigger SessionEnd hooks.** It suspends the session, which you can later resume. Only actual session termination (e.g., `/exit`) triggers SessionEnd.
 27 | 
 28 | ---
 29 | 
 30 | ## Common Issue: "My Session Didn't Create a Memory"
 31 | 
 32 | ### Symptom
 33 | 
 34 | You exited Claude Code with Ctrl+C (twice), resumed later, and noticed no `session-consolidation` memory was created for your previous session.
 35 | 
 36 | ### Root Cause
 37 | 
 38 | **Ctrl+C suspends the session rather than ending it.** When you resume with `SessionStart:resume`, the session continues from where you left off - no SessionEnd hook fires.
 39 | 
 40 | ### Evidence
 41 | 
 42 | When you resume a session, you'll see:
 43 | ```
 44 | SessionStart:resume hook success
 45 | ```
 46 | 
 47 | This confirms you **resumed** an existing session, not started a new one.
 48 | 
 49 | ### Solution
 50 | 
 51 | **Always use `/exit` to properly terminate sessions** if you want SessionEnd memories created:
 52 | 
 53 | ```bash
 54 | # In Claude Code prompt:
 55 | /exit
 56 | ```
 57 | 
 58 | This triggers graceful shutdown and SessionEnd hook execution.
 59 | 
 60 | ---
 61 | 
 62 | ## Common Issue: Connection Failures (SessionEnd & SessionStart)
 63 | 
 64 | > **Note**: This issue affects both SessionEnd and SessionStart hooks, but with different symptoms:
 65 | > - **SessionEnd**: Hard failure - cannot store session memory
 66 | > - **SessionStart**: Soft failure - falls back to MCP tools, shows "No relevant memories found"
 67 | >
 68 | > See [hooks-quick-reference.md](hooks-quick-reference.md#sessionstart-hook-issues) for detailed SessionStart troubleshooting.
 69 | 
 70 | ### Symptom (SessionEnd)
 71 | 
 72 | During SessionStart, you see:
 73 | ```
 74 | ⚠️ Memory Connection → Failed to connect using any available protocol
 75 | 💾 Storage → 💾 Unknown Storage (http://127.0.0.1:8000)
 76 | ```
 77 | 
 78 | ### Symptom (SessionStart)
 79 | 
 80 | Multiple "MCP Fallback" messages and no memories loaded:
 81 | ```
 82 | ↩️  MCP Fallback → Using standard MCP tools
 83 | ↩️  MCP Fallback → Using standard MCP tools
 84 | ↩️  MCP Fallback → Using standard MCP tools
 85 | 📭 Memory Search → No relevant memories found
 86 | ```
 87 | 
 88 | ### Root Cause
 89 | 
 90 | **HTTP/HTTPS protocol mismatch** between hook configuration and memory service.
 91 | 
 92 | **Example**:
 93 | - **Server running**: `https://localhost:8000` (HTTPS)
 94 | - **Hook configured**: `http://127.0.0.1:8000` (HTTP)
 95 | 
 96 | ### Diagnosis
 97 | 
 98 | Check your server protocol:
 99 | ```bash
100 | # Check server status
101 | systemctl --user status mcp-memory-http.service
102 | # Look for: "Uvicorn running on https://0.0.0.0:8000" or "http://..."
103 | 
104 | # Or test connection
105 | curl -sk "https://localhost:8000/api/health"  # HTTPS
106 | curl -s "http://127.0.0.1:8000/api/health"    # HTTP
107 | ```
108 | 
109 | Check your hook configuration:
110 | ```bash
111 | grep endpoint ~/.claude/hooks/config.json
112 | # Should show: "endpoint": "https://localhost:8000"
113 | ```
114 | 
115 | ### Solution
116 | 
117 | Update `~/.claude/hooks/config.json` to match server protocol:
118 | 
119 | ```json
120 | {
121 |   "memoryService": {
122 |     "http": {
123 |       "endpoint": "https://localhost:8000",  // Match your server
124 |       "apiKey": "your-api-key-here"
125 |     }
126 |   }
127 | }
128 | ```
129 | 
130 | **No restart required** - hooks reload config on next execution.
131 | 
132 | ---
133 | 
134 | ## SessionEnd Requirements
135 | 
136 | Even if SessionEnd fires correctly, memory creation requires:
137 | 
138 | ### 1. Minimum Session Length
139 | - Default: **100+ characters** total conversation
140 | - Configurable: `sessionAnalysis.minSessionLength` in `config.json`
141 | - Reason: Prevents noise from trivial sessions
142 | 
143 | ### 2. Minimum Confidence Score
144 | - Default: **> 0.1** (10% confidence)
145 | - Based on conversation analysis quality
146 | - Low confidence = session too generic to extract insights
147 | 
148 | ### 3. Session Consolidation Enabled
149 | ```json
150 | {
151 |   "memoryService": {
152 |     "enableSessionConsolidation": true  // Must be true
153 |   }
154 | }
155 | ```
156 | 
157 | ### What Gets Extracted
158 | 
159 | SessionEnd analyzes your conversation to extract:
160 | 
161 | - **Topics**: Keywords like "implementation", "debugging", "architecture", "performance"
162 | - **Decisions**: Phrases like "decided to", "will use", "chose to", "going with"
163 | - **Insights**: Phrases like "learned that", "discovered", "realized"
164 | - **Code Changes**: Phrases like "implemented", "created", "refactored"
165 | - **Next Steps**: Phrases like "next we need", "TODO", "remaining"
166 | 
167 | If conversation lacks these patterns, confidence will be low and memory won't be created.
168 | 
169 | ---
170 | 
171 | ## Verification & Debugging
172 | 
173 | ### 1. Check Recent Session Memories
174 | 
175 | ```bash
176 | # Search for recent session consolidation memories
177 | curl -sk "https://localhost:8000/api/search/by-tag" \
178 |   -H "Content-Type: application/json" \
179 |   -d '{"tags": ["session-consolidation"], "limit": 5}' | \
180 |   python -m json.tool | grep created_at_iso
181 | ```
182 | 
183 | Look for recent timestamps (today/yesterday).
184 | 
185 | ### 2. Test SessionEnd Hook Manually
186 | 
187 | ```bash
188 | # Run hook with test conversation
189 | node ~/.claude/hooks/core/session-end.js
190 | ```
191 | 
192 | Check output for:
193 | - `[Memory Hook] Session ending - consolidating outcomes...`
194 | - `[Memory Hook] Session analysis: X topics, Y decisions, confidence: Z%`
195 | - `[Memory Hook] Session consolidation stored successfully`
196 | 
197 | ### 3. Verify Connection
198 | 
199 | ```bash
200 | # Test server health
201 | curl -sk "https://localhost:8000/api/health"
202 | 
203 | # Check config matches
204 | grep endpoint ~/.claude/hooks/config.json
205 | ```
206 | 
207 | ### 4. Check SessionEnd Configuration
208 | 
209 | ```bash
210 | # Verify SessionEnd hook is configured
211 | grep -A 10 "SessionEnd" ~/.claude/settings.json
212 | 
213 | # Should show:
214 | # "SessionEnd": [
215 | #   {
216 | #     "hooks": [
217 | #       {
218 | #         "type": "command",
219 | #         "command": "node \"/home/user/.claude/hooks/core/session-end.js\"",
220 | #         "timeout": 15
221 | #       }
222 | #     ]
223 | #   }
224 | # ]
225 | ```
226 | 
227 | ---
228 | 
229 | ## Quick Diagnosis Checklist
230 | 
231 | Use this checklist when SessionEnd memories aren't being created:
232 | 
233 | - [ ] **Did I use `/exit`** or just Ctrl+C?
234 |   - **Fix**: Use `/exit` command for proper termination
235 | 
236 | - [ ] **Does `config.json` endpoint match server protocol?**
237 |   - **Check**: HTTP vs HTTPS in both config and server
238 |   - **Fix**: Update endpoint in `~/.claude/hooks/config.json`
239 | 
240 | - [ ] **Is the memory service running?**
241 |   - **Check**: `curl https://localhost:8000/api/health`
242 |   - **Fix**: Start server with `systemctl --user start mcp-memory-http.service`
243 | 
244 | - [ ] **Was conversation meaningful?**
245 |   - **Check**: Total length > 100 characters
246 |   - **Fix**: Have longer conversations with decisions/insights
247 | 
248 | - [ ] **Is session consolidation enabled?**
249 |   - **Check**: `enableSessionConsolidation: true` in config
250 |   - **Fix**: Update `~/.claude/hooks/config.json`
251 | 
252 | - [ ] **Is SessionEnd hook installed?**
253 |   - **Check**: `grep SessionEnd ~/.claude/settings.json`
254 |   - **Fix**: Run `cd claude-hooks && python install_hooks.py --all`
255 | 
256 | ---
257 | 
258 | ## Best Practices
259 | 
260 | ### For Reliable Memory Consolidation
261 | 
262 | 1. **Always use `/exit`** when you want session memories created
263 | 2. **Avoid Ctrl+C for final exit** - Use it only for interrupts/corrections
264 | 3. **Have meaningful conversations** - Include decisions, insights, plans
265 | 4. **Verify endpoint configuration** - HTTP vs HTTPS must match
266 | 5. **Check session memories periodically** - Confirm system is working
267 | 
268 | ### For Debugging
269 | 
270 | 1. **Check recent memories** - Look for session-consolidation tag
271 | 2. **Test hook manually** - Run `session-end.js` directly
272 | 3. **Verify connection** - Test health endpoint
273 | 4. **Read hook logs** - Look for error messages in terminal
274 | 5. **Consult session requirements** - Length, confidence, enabled settings
275 | 
276 | ---
277 | 
278 | ## Technical Details
279 | 
280 | ### SessionEnd Hook Implementation
281 | 
282 | **File**: `~/.claude/hooks/core/session-end.js`
283 | 
284 | **Key Code Sections**:
285 | - **Lines 298-365**: Main `onSessionEnd()` function
286 | - **Line 316**: Minimum session length check (100 chars)
287 | - **Line 329**: Minimum confidence check (0.1)
288 | - **Line 305**: Session consolidation enabled check
289 | - **Lines 213-293**: `storeSessionMemory()` - HTTP API call
290 | 
291 | ### Configuration Structure
292 | 
293 | **File**: `~/.claude/hooks/config.json`
294 | 
295 | ```json
296 | {
297 |   "memoryService": {
298 |     "protocol": "auto",
299 |     "preferredProtocol": "http",
300 |     "http": {
301 |       "endpoint": "https://localhost:8000",  // Must match server
302 |       "apiKey": "your-api-key",
303 |       "healthCheckTimeout": 3000
304 |     },
305 |     "enableSessionConsolidation": true
306 |   },
307 |   "sessionAnalysis": {
308 |     "extractTopics": true,
309 |     "extractDecisions": true,
310 |     "extractInsights": true,
311 |     "extractCodeChanges": true,
312 |     "extractNextSteps": true,
313 |     "minSessionLength": 100,
314 |     "minConfidence": 0.1
315 |   }
316 | }
317 | ```
318 | 
319 | ### Hook Settings
320 | 
321 | **File**: `~/.claude/settings.json`
322 | 
323 | ```json
324 | {
325 |   "hooks": {
326 |     "SessionEnd": [
327 |       {
328 |         "hooks": [
329 |           {
330 |             "type": "command",
331 |             "command": "node \"/home/user/.claude/hooks/core/session-end.js\"",
332 |             "timeout": 15  // 15 seconds (vs 10s for SessionStart)
333 |           }
334 |         ]
335 |       }
336 |     ]
337 |   }
338 | }
339 | ```
340 | 
341 | ---
342 | 
343 | ## Related Documentation
344 | 
345 | - **Hook Installation**: `claude-hooks/README.md`
346 | - **Configuration Guide**: `claude-hooks/CONFIGURATION.md`
347 | - **HTTP Server Management**: `docs/http-server-management.md`
348 | - **General Troubleshooting**: `docs/troubleshooting/general.md`
349 | - **SessionStart Windows Bug**: `claude-hooks/WINDOWS-SESSIONSTART-BUG.md`
350 | 
351 | ---
352 | 
353 | ## Common Questions
354 | 
355 | ### Q: Why didn't my session create a memory even though I used `/exit`?
356 | 
357 | **A**: Check these conditions:
358 | 1. Conversation was too short (< 100 chars)
359 | 2. Conversation lacked decision/insight patterns (low confidence)
360 | 3. Connection to memory service failed (check endpoint)
361 | 4. Session consolidation disabled in config
362 | 
363 | ### Q: Does Ctrl+C ever trigger SessionEnd?
364 | 
365 | **A**: No. Ctrl+C sends SIGINT which interrupts/suspends but doesn't terminate the session. Use `/exit` for proper termination.
366 | 
367 | ### Q: Can I test if SessionEnd will work before exiting?
368 | 
369 | **A**: Yes:
370 | ```bash
371 | node ~/.claude/hooks/core/session-end.js
372 | ```
373 | 
374 | This runs the hook with a test conversation and shows what would happen.
375 | 
376 | ### Q: How do I see all my session consolidation memories?
377 | 
378 | **A**:
379 | ```bash
380 | curl -sk "https://localhost:8000/api/search/by-tag" \
381 |   -H "Content-Type: application/json" \
382 |   -d '{"tags": ["session-consolidation"]}' | \
383 |   python -m json.tool
384 | ```
385 | 
386 | ### Q: What's the difference between SessionStart and SessionEnd hooks?
387 | 
388 | **A**:
389 | - **SessionStart**: Loads and injects memory context at session start
390 | - **SessionEnd**: Analyzes and stores session outcomes at session end
391 | - Both can have connection issues (check endpoint configuration)
392 | - SessionStart has timeout issues on Windows (Ctrl+C hang bug)
393 | 
394 | ---
395 | 
396 | **Last Updated**: 2025-11-01
397 | **Applies to**: v8.15.1+
398 | **Author**: Community Documentation
399 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_api_memories_chronological.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Test chronological ordering and pagination for the /api/memories endpoint.
  3 | 
  4 | Tests verify that the GitHub issue #79 has been properly resolved by ensuring:
  5 | 1. Memories are returned in chronological order (newest first)
  6 | 2. Pagination works correctly with chronological ordering
  7 | 3. All storage backends support the new ordering
  8 | """
  9 | 
 10 | import pytest
 11 | import asyncio
 12 | import time
 13 | import tempfile
 14 | from datetime import datetime, timedelta
 15 | from typing import List, Dict, Any
 16 | import os
 17 | 
 18 | # Import project modules
 19 | # Note: This assumes the project is installed in editable mode with `pip install -e .`
 20 | # or PYTHONPATH is configured correctly for the test environment
 21 | from mcp_memory_service.models.memory import Memory
 22 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 23 | 
 24 | 
 25 | class TestChronologicalOrdering:
 26 |     """Test chronological ordering functionality across all storage backends."""
 27 | 
 28 |     async def create_test_memories(self, storage) -> List[Memory]:
 29 |         """Create test memories with different timestamps."""
 30 |         memories = []
 31 |         base_time = time.time() - 3600  # Start 1 hour ago
 32 | 
 33 |         # Create 5 test memories with 10-minute intervals
 34 |         for i in range(5):
 35 |             timestamp = base_time + (i * 600)  # 10-minute intervals
 36 |             memory = Memory(
 37 |                 content=f"Test memory {i + 1}",
 38 |                 content_hash=f"hash_{i + 1}",
 39 |                 tags=[f"tag{i + 1}", "test"],
 40 |                 memory_type="test",
 41 |                 metadata={"index": i + 1},
 42 |                 created_at=timestamp,
 43 |                 updated_at=timestamp
 44 |             )
 45 |             memories.append(memory)
 46 | 
 47 |             success, message = await storage.store(memory)
 48 |             assert success, f"Failed to store memory {i + 1}: {message}"
 49 | 
 50 |         return memories
 51 | 
 52 |     @pytest.mark.asyncio
 53 |     async def test_get_all_memories_chronological_order_sqlite(self):
 54 |         """Test that get_all_memories returns memories in chronological order (SQLite)."""
 55 |         with tempfile.TemporaryDirectory() as tmp_dir:
 56 |             storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
 57 |             await storage.initialize()
 58 | 
 59 |             # Create test memories
 60 |             original_memories = await self.create_test_memories(storage)
 61 | 
 62 |             # Get all memories
 63 |             retrieved_memories = await storage.get_all_memories()
 64 | 
 65 |             # Verify we got all memories
 66 |             assert len(retrieved_memories) == 5, f"Expected 5 memories, got {len(retrieved_memories)}"
 67 | 
 68 |             # Verify chronological order (newest first)
 69 |             for i in range(len(retrieved_memories) - 1):
 70 |                 current_time = retrieved_memories[i].created_at or 0
 71 |                 next_time = retrieved_memories[i + 1].created_at or 0
 72 |                 assert current_time >= next_time, f"Memory at index {i} is older than memory at index {i + 1}"
 73 | 
 74 |             # Verify the actual order matches expectations (newest first)
 75 |             expected_order = [5, 4, 3, 2, 1]  # Newest to oldest
 76 |             actual_order = [int(mem.content.split()[-1]) for mem in retrieved_memories]
 77 |             assert actual_order == expected_order, f"Expected order {expected_order}, got {actual_order}"
 78 | 
 79 |     @pytest.mark.asyncio
 80 |     async def test_pagination_with_chronological_order_sqlite(self):
 81 |         """Test pagination maintains chronological order (SQLite)."""
 82 |         with tempfile.TemporaryDirectory() as tmp_dir:
 83 |             storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
 84 |             await storage.initialize()
 85 | 
 86 |             # Create test memories
 87 |             await self.create_test_memories(storage)
 88 | 
 89 |             # Test pagination: Get first 2 memories
 90 |             first_page = await storage.get_all_memories(limit=2, offset=0)
 91 |             assert len(first_page) == 2
 92 | 
 93 |             # Test pagination: Get next 2 memories
 94 |             second_page = await storage.get_all_memories(limit=2, offset=2)
 95 |             assert len(second_page) == 2
 96 | 
 97 |             # Test pagination: Get last memory
 98 |             third_page = await storage.get_all_memories(limit=2, offset=4)
 99 |             assert len(third_page) == 1
100 | 
101 |             # Verify chronological order across pages
102 |             all_paginated = first_page + second_page + third_page
103 | 
104 |             # Should be in chronological order (newest first)
105 |             for i in range(len(all_paginated) - 1):
106 |                 current_time = all_paginated[i].created_at or 0
107 |                 next_time = all_paginated[i + 1].created_at or 0
108 |                 assert current_time >= next_time, f"Pagination broke chronological order at position {i}"
109 | 
110 |             # Verify content order
111 |             expected_content_order = ["Test memory 5", "Test memory 4", "Test memory 3", "Test memory 2", "Test memory 1"]
112 |             actual_content_order = [mem.content for mem in all_paginated]
113 |             assert actual_content_order == expected_content_order
114 | 
115 |     @pytest.mark.asyncio
116 |     async def test_count_all_memories_sqlite(self):
117 |         """Test count_all_memories method (SQLite)."""
118 |         with tempfile.TemporaryDirectory() as tmp_dir:
119 |             storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
120 |             await storage.initialize()
121 | 
122 |             # Initially should be empty
123 |             initial_count = await storage.count_all_memories()
124 |             assert initial_count == 0
125 | 
126 |             # Create test memories
127 |             await self.create_test_memories(storage)
128 | 
129 |             # Should now have 5 memories
130 |             final_count = await storage.count_all_memories()
131 |             assert final_count == 5
132 | 
133 |     @pytest.mark.asyncio
134 |     async def test_empty_storage_handling_sqlite(self):
135 |         """Test handling of empty storage (SQLite)."""
136 |         with tempfile.TemporaryDirectory() as tmp_dir:
137 |             storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
138 |             await storage.initialize()
139 | 
140 |             # Test get_all_memories on empty storage
141 |             memories = await storage.get_all_memories()
142 |             assert memories == []
143 | 
144 |             # Test with pagination on empty storage
145 |             paginated = await storage.get_all_memories(limit=10, offset=0)
146 |             assert paginated == []
147 | 
148 |             # Test count on empty storage
149 |             count = await storage.count_all_memories()
150 |             assert count == 0
151 | 
152 |     @pytest.mark.asyncio
153 |     async def test_offset_beyond_total_sqlite(self):
154 |         """Test offset beyond total records (SQLite)."""
155 |         with tempfile.TemporaryDirectory() as tmp_dir:
156 |             storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
157 |             await storage.initialize()
158 | 
159 |             # Create test memories
160 |             await self.create_test_memories(storage)
161 | 
162 |             # Test offset beyond total records
163 |             memories = await storage.get_all_memories(limit=10, offset=100)
164 |             assert memories == []
165 | 
166 |     @pytest.mark.asyncio
167 |     async def test_large_limit_sqlite(self):
168 |         """Test large limit parameter (SQLite)."""
169 |         with tempfile.TemporaryDirectory() as tmp_dir:
170 |             storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
171 |             await storage.initialize()
172 | 
173 |             # Create test memories
174 |             await self.create_test_memories(storage)
175 | 
176 |             # Test limit larger than total records
177 |             memories = await storage.get_all_memories(limit=100, offset=0)
178 |             assert len(memories) == 5  # Should return all 5 memories
179 | 
180 |     @pytest.mark.asyncio
181 |     async def test_mixed_timestamps_ordering_sqlite(self):
182 |         """Test ordering with mixed/unsorted timestamps (SQLite)."""
183 |         with tempfile.TemporaryDirectory() as tmp_dir:
184 |             storage = SqliteVecMemoryStorage(os.path.join(tmp_dir, "test.db"))
185 |             await storage.initialize()
186 | 
187 |             # Create memories with deliberately mixed timestamps
188 |             base_time = time.time()
189 |             timestamps = [base_time + 300, base_time + 100, base_time + 500, base_time + 200, base_time + 400]
190 | 
191 |             for i, timestamp in enumerate(timestamps):
192 |                 memory = Memory(
193 |                     content=f"Mixed memory {i + 1}",
194 |                     content_hash=f"mixed_hash_{i + 1}",
195 |                     tags=["mixed", "test"],
196 |                     memory_type="mixed",
197 |                     metadata={"timestamp": timestamp},
198 |                     created_at=timestamp,
199 |                     updated_at=timestamp
200 |                 )
201 | 
202 |                 success, message = await storage.store(memory)
203 |                 assert success, f"Failed to store mixed memory {i + 1}: {message}"
204 | 
205 |             # Retrieve all memories
206 |             memories = await storage.get_all_memories()
207 | 
208 |             # Should be ordered by timestamp (newest first)
209 |             expected_order = [base_time + 500, base_time + 400, base_time + 300, base_time + 200, base_time + 100]
210 |             actual_timestamps = [mem.created_at for mem in memories]
211 | 
212 |             assert actual_timestamps == expected_order, f"Expected {expected_order}, got {actual_timestamps}"
213 | 
214 | 
215 | class TestAPIChronologicalIntegration:
216 |     """Integration tests that would test the actual API endpoints.
217 | 
218 |     These tests are structured to be easily adaptable for testing the actual
219 |     FastAPI endpoints when a test client is available.
220 |     """
221 | 
222 |     def test_api_endpoint_structure(self):
223 |         """Test that the API endpoint imports and structure are correct."""
224 |         # Import the API router to ensure it loads correctly
225 |         from mcp_memory_service.web.api.memories import router
226 | 
227 |         # Verify the router exists and has the expected endpoints
228 |         routes = [route.path for route in router.routes]
229 |         assert "/memories" in routes
230 |         assert "/memories/{content_hash}" in routes
231 | 
232 |     def test_memory_response_model(self):
233 |         """Test that the response models include necessary fields for chronological ordering."""
234 |         from mcp_memory_service.web.api.memories import MemoryResponse, MemoryListResponse
235 | 
236 |         # Verify MemoryResponse has timestamp fields
237 |         response_fields = MemoryResponse.__fields__.keys()
238 |         assert "created_at" in response_fields
239 |         assert "created_at_iso" in response_fields
240 |         assert "updated_at" in response_fields
241 |         assert "updated_at_iso" in response_fields
242 | 
243 |         # Verify MemoryListResponse has pagination fields
244 |         list_fields = MemoryListResponse.__fields__.keys()
245 |         assert "memories" in list_fields
246 |         assert "total" in list_fields
247 |         assert "page" in list_fields
248 |         assert "page_size" in list_fields
249 |         assert "has_more" in list_fields
250 | 
251 |     def test_storage_backend_type_compatibility(self):
252 |         """Test that the API endpoints use the correct base storage type."""
253 |         from mcp_memory_service.web.api.memories import list_memories
254 |         import inspect
255 | 
256 |         # Get the signature of the list_memories function
257 |         sig = inspect.signature(list_memories)
258 |         storage_param = sig.parameters['storage']
259 | 
260 |         # Check that it uses the base MemoryStorage type, not a specific implementation
261 |         assert 'MemoryStorage' in str(storage_param.annotation)
262 | 
263 | 
264 | if __name__ == "__main__":
265 |     # Run tests directly
266 |     pytest.main([__file__, "-v"])
```

--------------------------------------------------------------------------------
/src/mcp_memory_service/discovery/client.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Discovery client for MCP Memory Service.
 17 | 
 18 | This module provides a high-level client for discovering and connecting to
 19 | MCP Memory Service instances on the local network.
 20 | """
 21 | 
 22 | import asyncio
 23 | import logging
 24 | import aiohttp
 25 | from typing import List, Optional, Dict, Any
 26 | from dataclasses import dataclass
 27 | 
 28 | from .mdns_service import ServiceDiscovery, ServiceDetails
 29 | from ..config import MDNS_DISCOVERY_TIMEOUT
 30 | 
 31 | logger = logging.getLogger(__name__)
 32 | 
 33 | 
 34 | @dataclass
 35 | class HealthStatus:
 36 |     """Health status of a discovered service."""
 37 |     healthy: bool
 38 |     status: str
 39 |     backend: str
 40 |     statistics: Dict[str, Any]
 41 |     response_time_ms: float
 42 |     error: Optional[str] = None
 43 | 
 44 | 
 45 | class DiscoveryClient:
 46 |     """High-level client for discovering and validating MCP Memory Services."""
 47 |     
 48 |     def __init__(self, discovery_timeout: int = MDNS_DISCOVERY_TIMEOUT):
 49 |         self.discovery_timeout = discovery_timeout
 50 |         self._discovery = ServiceDiscovery(discovery_timeout=discovery_timeout)
 51 |     
 52 |     async def find_best_service(
 53 |         self,
 54 |         prefer_https: bool = True,
 55 |         require_auth: Optional[bool] = None,
 56 |         validate_health: bool = True
 57 |     ) -> Optional[ServiceDetails]:
 58 |         """
 59 |         Find the best MCP Memory Service on the network.
 60 |         
 61 |         Args:
 62 |             prefer_https: Prefer HTTPS services over HTTP
 63 |             require_auth: Require (True) or reject (False) services with auth, None for any
 64 |             validate_health: Validate service health before returning
 65 |             
 66 |         Returns:
 67 |             Best service found, or None if no suitable service
 68 |         """
 69 |         services = await self.discover_services()
 70 |         if not services:
 71 |             logger.info("No MCP Memory Services found on the network")
 72 |             return None
 73 |         
 74 |         # Filter services based on requirements
 75 |         filtered_services = []
 76 |         for service in services:
 77 |             # Check auth requirement
 78 |             if require_auth is not None and service.requires_auth != require_auth:
 79 |                 continue
 80 |             
 81 |             filtered_services.append(service)
 82 |         
 83 |         if not filtered_services:
 84 |             logger.info("No services match the specified requirements")
 85 |             return None
 86 |         
 87 |         # Sort services by preference (HTTPS first if preferred)
 88 |         def service_priority(service: ServiceDetails) -> tuple:
 89 |             https_score = 1 if service.https else 0
 90 |             if not prefer_https:
 91 |                 https_score = 1 - https_score  # Invert preference
 92 |             
 93 |             return (https_score, service.port)  # Secondary sort by port for consistency
 94 |         
 95 |         filtered_services.sort(key=service_priority, reverse=True)
 96 |         
 97 |         # Validate health if requested
 98 |         if validate_health:
 99 |             for service in filtered_services:
100 |                 health = await self.check_service_health(service)
101 |                 if health and health.healthy:
102 |                     logger.info(f"Selected healthy service: {service.name} at {service.url}")
103 |                     return service
104 |                 else:
105 |                     logger.warning(f"Service {service.name} failed health check: {health.error if health else 'Unknown error'}")
106 |             
107 |             logger.warning("No healthy services found")
108 |             return None
109 |         else:
110 |             # Return first service without health validation
111 |             best_service = filtered_services[0]
112 |             logger.info(f"Selected service: {best_service.name} at {best_service.url}")
113 |             return best_service
114 |     
115 |     async def discover_services(self) -> List[ServiceDetails]:
116 |         """Discover all MCP Memory Services on the network."""
117 |         logger.info("Discovering MCP Memory Services on the network...")
118 |         services = await self._discovery.discover_services()
119 |         
120 |         if services:
121 |             logger.info(f"Found {len(services)} MCP Memory Services:")
122 |             for service in services:
123 |                 logger.info(f"  - {service.name} at {service.url} (Auth: {service.requires_auth})")
124 |         else:
125 |             logger.info("No MCP Memory Services found")
126 |         
127 |         return services
128 |     
129 |     async def check_service_health(
130 |         self,
131 |         service: ServiceDetails,
132 |         timeout: float = 5.0
133 |     ) -> Optional[HealthStatus]:
134 |         """
135 |         Check the health of a discovered service.
136 |         
137 |         Args:
138 |             service: Service to check
139 |             timeout: Request timeout in seconds
140 |             
141 |         Returns:
142 |             HealthStatus if check succeeded, None if failed
143 |         """
144 |         health_url = f"{service.api_url}/health"
145 |         
146 |         try:
147 |             import time
148 |             start_time = time.time()
149 |             
150 |             timeout_config = aiohttp.ClientTimeout(total=timeout)
151 |             connector = aiohttp.TCPConnector(verify_ssl=False)  # Allow self-signed certs
152 |             
153 |             async with aiohttp.ClientSession(
154 |                 timeout=timeout_config,
155 |                 connector=connector
156 |             ) as session:
157 |                 async with session.get(health_url) as response:
158 |                     response_time = (time.time() - start_time) * 1000  # Convert to ms
159 |                     
160 |                     if response.status == 200:
161 |                         data = await response.json()
162 |                         return HealthStatus(
163 |                             healthy=True,
164 |                             status=data.get('status', 'unknown'),
165 |                             backend=data.get('storage_type', 'unknown'),
166 |                             statistics=data.get('statistics', {}),
167 |                             response_time_ms=response_time
168 |                         )
169 |                     else:
170 |                         return HealthStatus(
171 |                             healthy=False,
172 |                             status='error',
173 |                             backend='unknown',
174 |                             statistics={},
175 |                             response_time_ms=response_time,
176 |                             error=f"HTTP {response.status}"
177 |                         )
178 |         
179 |         except asyncio.TimeoutError:
180 |             return HealthStatus(
181 |                 healthy=False,
182 |                 status='timeout',
183 |                 backend='unknown',
184 |                 statistics={},
185 |                 response_time_ms=timeout * 1000,
186 |                 error="Request timeout"
187 |             )
188 |         except Exception as e:
189 |             return HealthStatus(
190 |                 healthy=False,
191 |                 status='error',
192 |                 backend='unknown',
193 |                 statistics={},
194 |                 response_time_ms=0,
195 |                 error=str(e)
196 |             )
197 |     
198 |     async def get_service_capabilities(
199 |         self,
200 |         service: ServiceDetails,
201 |         api_key: Optional[str] = None,
202 |         timeout: float = 5.0
203 |     ) -> Optional[Dict[str, Any]]:
204 |         """
205 |         Get detailed capabilities of a service.
206 |         
207 |         Args:
208 |             service: Service to query
209 |             api_key: API key if required
210 |             timeout: Request timeout
211 |             
212 |         Returns:
213 |             Service capabilities or None if failed
214 |         """
215 |         docs_url = f"{service.api_url}/docs"
216 |         
217 |         try:
218 |             headers = {}
219 |             if api_key and service.requires_auth:
220 |                 headers['Authorization'] = f'Bearer {api_key}'
221 |             
222 |             timeout_config = aiohttp.ClientTimeout(total=timeout)
223 |             connector = aiohttp.TCPConnector(verify_ssl=False)
224 |             
225 |             async with aiohttp.ClientSession(
226 |                 timeout=timeout_config,
227 |                 connector=connector
228 |             ) as session:
229 |                 # Try to get OpenAPI spec
230 |                 openapi_url = f"{service.api_url}/openapi.json"
231 |                 async with session.get(openapi_url, headers=headers) as response:
232 |                     if response.status == 200:
233 |                         return await response.json()
234 |                     
235 |         except Exception as e:
236 |             logger.error(f"Failed to get service capabilities: {e}")
237 |         
238 |         return None
239 |     
240 |     async def find_services_with_health(
241 |         self,
242 |         prefer_https: bool = True,
243 |         require_auth: Optional[bool] = None
244 |     ) -> List[tuple[ServiceDetails, HealthStatus]]:
245 |         """
246 |         Find all services and their health status.
247 |         
248 |         Returns:
249 |             List of (service, health_status) tuples, sorted by preference
250 |         """
251 |         services = await self.discover_services()
252 |         if not services:
253 |             return []
254 |         
255 |         # Filter by auth requirement
256 |         if require_auth is not None:
257 |             services = [s for s in services if s.requires_auth == require_auth]
258 |         
259 |         # Check health for all services concurrently
260 |         health_tasks = [self.check_service_health(service) for service in services]
261 |         health_results = await asyncio.gather(*health_tasks, return_exceptions=True)
262 |         
263 |         # Combine services with health status
264 |         service_health_pairs = []
265 |         for service, health_result in zip(services, health_results):
266 |             if isinstance(health_result, Exception):
267 |                 health = HealthStatus(
268 |                     healthy=False,
269 |                     status='error',
270 |                     backend='unknown',
271 |                     statistics={},
272 |                     response_time_ms=0,
273 |                     error=str(health_result)
274 |                 )
275 |             else:
276 |                 health = health_result or HealthStatus(
277 |                     healthy=False,
278 |                     status='unknown',
279 |                     backend='unknown',
280 |                     statistics={},
281 |                     response_time_ms=0,
282 |                     error="No response"
283 |                 )
284 |             
285 |             service_health_pairs.append((service, health))
286 |         
287 |         # Sort by preference: healthy first, then HTTPS if preferred, then response time
288 |         def sort_key(pair: tuple[ServiceDetails, HealthStatus]) -> tuple:
289 |             service, health = pair
290 |             healthy_score = 1 if health.healthy else 0
291 |             https_score = 1 if service.https else 0
292 |             if not prefer_https:
293 |                 https_score = 1 - https_score
294 |             response_time = health.response_time_ms if health.healthy else float('inf')
295 |             
296 |             return (healthy_score, https_score, -response_time)  # Negative for ascending order
297 |         
298 |         service_health_pairs.sort(key=sort_key, reverse=True)
299 |         return service_health_pairs
300 |     
301 |     async def stop(self) -> None:
302 |         """Stop the discovery client."""
303 |         await self._discovery.stop_discovery()
```

--------------------------------------------------------------------------------
/tests/unit/test_csv_loader.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Unit tests for CSV document loader.
  4 | """
  5 | 
  6 | import pytest
  7 | import asyncio
  8 | import csv
  9 | import io
 10 | from pathlib import Path
 11 | 
 12 | from mcp_memory_service.ingestion.csv_loader import CSVLoader
 13 | from mcp_memory_service.ingestion.base import DocumentChunk
 14 | from conftest import extract_chunks_from_temp_file
 15 | 
 16 | 
 17 | class TestCSVLoader:
 18 |     """Test suite for CSVLoader class."""
 19 | 
 20 |     def test_initialization(self):
 21 |         """Test basic initialization of CSVLoader."""
 22 |         loader = CSVLoader(chunk_size=500, chunk_overlap=50)
 23 | 
 24 |         assert loader.chunk_size == 500
 25 |         assert loader.chunk_overlap == 50
 26 |         assert 'csv' in loader.supported_extensions
 27 | 
 28 |     def test_can_handle_file(self):
 29 |         """Test file format detection."""
 30 |         loader = CSVLoader()
 31 | 
 32 |         # Create temporary test files
 33 |         import tempfile
 34 |         with tempfile.TemporaryDirectory() as tmpdir:
 35 |             csv_file = Path(tmpdir) / "test.csv"
 36 |             csv_file.touch()
 37 | 
 38 |             txt_file = Path(tmpdir) / "test.txt"
 39 |             txt_file.touch()
 40 | 
 41 |             # Test supported formats
 42 |             assert loader.can_handle(csv_file) is True
 43 | 
 44 |             # Test unsupported formats
 45 |             assert loader.can_handle(txt_file) is False
 46 | 
 47 |     @pytest.mark.asyncio
 48 |     async def test_extract_chunks_simple_csv(self):
 49 |         """Test extraction from simple CSV file."""
 50 |         loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
 51 | 
 52 |         # Create test CSV file
 53 |         import tempfile
 54 |         with tempfile.TemporaryDirectory() as tmpdir:
 55 |             csv_file = Path(tmpdir) / "test.csv"
 56 |             csv_content = """name,age,city
 57 | John,25,New York
 58 | Jane,30,San Francisco"""
 59 |             csv_file.write_text(csv_content)
 60 | 
 61 |             chunks = []
 62 |             async for chunk in loader.extract_chunks(csv_file):
 63 |                 chunks.append(chunk)
 64 | 
 65 |             # Verify chunks were created
 66 |             assert len(chunks) > 0
 67 | 
 68 |             # Verify chunk structure
 69 |             first_chunk = chunks[0]
 70 |             assert isinstance(first_chunk, DocumentChunk)
 71 |             assert isinstance(first_chunk.content, str)
 72 |             assert first_chunk.source_file == csv_file
 73 | 
 74 |             # Verify content contains formatted rows
 75 |             content = first_chunk.content
 76 |             assert "name: John" in content
 77 |             assert "age: 25" in content
 78 |             assert "city: New York" in content
 79 |             assert "name: Jane" in content
 80 |             assert "age: 30" in content
 81 | 
 82 |     @pytest.mark.asyncio
 83 |     async def test_extract_chunks_csv_with_headers(self):
 84 |         """Test extraction from CSV with header detection."""
 85 |         loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
 86 | 
 87 |         # Create test CSV file with headers
 88 |         csv_content = """product,price,category
 89 | Widget,19.99,Electronics
 90 | Gadget,29.99,Electronics
 91 | Book,12.99,Media"""
 92 |         chunks = await extract_chunks_from_temp_file(loader, "test.csv", csv_content)
 93 | 
 94 |         content = chunks[0].content
 95 |         assert "product: Widget" in content
 96 |         assert "price: 19.99" in content
 97 |         assert "category: Electronics" in content
 98 | 
 99 |     @pytest.mark.asyncio
100 |     async def test_extract_chunks_csv_no_headers(self):
101 |         """Test extraction from CSV without headers."""
102 |         loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
103 | 
104 |         # Create test CSV file without headers
105 |         csv_content = """John,25,New York
106 | Jane,30,San Francisco"""
107 |         chunks = await extract_chunks_from_temp_file(
108 |             loader,
109 |             "test.csv",
110 |             csv_content,
111 |             has_header=False
112 |         )
113 | 
114 |         content = chunks[0].content
115 |         # Should use col_1, col_2, col_3 as headers
116 |         assert "col_1: John" in content
117 |         assert "col_2: 25" in content
118 |         assert "col_3: New York" in content
119 | 
120 |     @pytest.mark.asyncio
121 |     async def test_extract_chunks_different_delimiters(self):
122 |         """Test extraction with different CSV delimiters."""
123 |         loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
124 | 
125 |         # Test semicolon delimiter
126 |         csv_content = "name;age;city\nJohn;25;New York\nJane;30;San Francisco"
127 |         chunks = await extract_chunks_from_temp_file(
128 |             loader,
129 |             "test.csv",
130 |             csv_content,
131 |             delimiter=';'
132 |         )
133 | 
134 |         content = chunks[0].content
135 |         assert "name: John" in content
136 |         assert "age: 25" in content
137 | 
138 |     @pytest.mark.asyncio
139 |     async def test_extract_chunks_row_numbers(self):
140 |         """Test extraction with row numbers."""
141 |         loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
142 | 
143 |         # Create test CSV file
144 |         csv_content = """name,age
145 | John,25
146 | Jane,30"""
147 |         chunks = await extract_chunks_from_temp_file(
148 |             loader,
149 |             "test.csv",
150 |             csv_content,
151 |             include_row_numbers=True
152 |         )
153 | 
154 |         content = chunks[0].content
155 |         assert "Row 1:" in content
156 |         assert "Row 2:" in content
157 | 
158 |     @pytest.mark.asyncio
159 |     async def test_extract_chunks_no_row_numbers(self):
160 |         """Test extraction without row numbers."""
161 |         loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
162 | 
163 |         # Create test CSV file
164 |         csv_content = """name,age
165 | John,25"""
166 |         chunks = await extract_chunks_from_temp_file(
167 |             loader,
168 |             "test.csv",
169 |             csv_content,
170 |             include_row_numbers=False
171 |         )
172 | 
173 |         content = chunks[0].content
174 |         assert "Row:" in content
175 |         assert "Row 1:" not in content
176 | 
177 |     @pytest.mark.asyncio
178 |     async def test_extract_chunks_large_file_chunking(self):
179 |         """Test that large CSV files are processed correctly."""
180 |         loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
181 | 
182 |         # Create CSV with many rows
183 |         import tempfile
184 |         with tempfile.TemporaryDirectory() as tmpdir:
185 |             csv_file = Path(tmpdir) / "large.csv"
186 |             rows = ["name,value"] + [f"item{i},{i}" for i in range(10)]
187 |             csv_content = "\n".join(rows)
188 |             csv_file.write_text(csv_content)
189 | 
190 |             # Process the file
191 |             chunks = []
192 |             async for chunk in loader.extract_chunks(csv_file, max_rows_per_chunk=50):
193 |                 chunks.append(chunk)
194 | 
195 |             # Should create at least one chunk
196 |             assert len(chunks) >= 1
197 | 
198 |             # Verify all content is included
199 |             all_content = "".join(chunk.content for chunk in chunks)
200 |             assert "item0" in all_content
201 |             assert "item9" in all_content
202 |             assert "name: item0" in all_content
203 |             assert "value: 0" in all_content
204 | 
205 |     @pytest.mark.asyncio
206 |     async def test_extract_chunks_empty_file(self):
207 |         """Test handling of empty CSV files."""
208 |         loader = CSVLoader()
209 | 
210 |         # Create empty CSV file
211 |         import tempfile
212 |         with tempfile.TemporaryDirectory() as tmpdir:
213 |             csv_file = Path(tmpdir) / "empty.csv"
214 |             csv_file.write_text("")
215 | 
216 |             # Should not raise error but return no chunks
217 |             chunks = []
218 |             async for chunk in loader.extract_chunks(csv_file):
219 |                 chunks.append(chunk)
220 | 
221 |             assert len(chunks) == 0
222 | 
223 |     @pytest.mark.asyncio
224 |     async def test_extract_chunks_malformed_csv(self):
225 |         """Test handling of malformed CSV files."""
226 |         loader = CSVLoader()
227 | 
228 |         # Create malformed CSV file
229 |         # CSV with inconsistent columns - should still work
230 |         csv_content = """name,age,city
231 | John,25
232 | Jane,30,San Francisco,Extra"""
233 |         chunks = await extract_chunks_from_temp_file(loader, "malformed.csv", csv_content)
234 | 
235 |         # Should handle gracefully
236 |         assert len(chunks) > 0
237 |         content = chunks[0].content
238 |         assert "name: John" in content
239 |         assert "name: Jane" in content
240 | 
241 |     @pytest.mark.asyncio
242 |     async def test_extract_chunks_encoding_detection(self):
243 |         """Test automatic encoding detection."""
244 |         loader = CSVLoader()
245 | 
246 |         # Create CSV file with UTF-8 content
247 |         csv_content = """name,city
248 | José,São Paulo
249 | François,Montréal"""
250 |         chunks = await extract_chunks_from_temp_file(
251 |             loader,
252 |             "utf8.csv",
253 |             csv_content,
254 |             encoding='utf-8'
255 |         )
256 | 
257 |         content = chunks[0].content
258 |         assert "José" in content
259 |         assert "São Paulo" in content
260 | 
261 |     @pytest.mark.asyncio
262 |     async def test_extract_chunks_metadata(self):
263 |         """Test that metadata is properly included."""
264 |         loader = CSVLoader(chunk_size=1000, chunk_overlap=200)
265 | 
266 |         # Create test CSV file
267 |         import tempfile
268 |         with tempfile.TemporaryDirectory() as tmpdir:
269 |             csv_file = Path(tmpdir) / "test.csv"
270 |             csv_content = """name,age
271 | John,25
272 | Jane,30"""
273 |             csv_file.write_text(csv_content)
274 | 
275 |             chunks = []
276 |             async for chunk in loader.extract_chunks(csv_file):
277 |                 chunks.append(chunk)
278 | 
279 |             first_chunk = chunks[0]
280 |             assert first_chunk.metadata['content_type'] == 'csv'
281 |             assert first_chunk.metadata['has_header'] is True
282 |             assert first_chunk.metadata['column_count'] == 2
283 |             assert first_chunk.metadata['row_count'] == 2
284 |             assert first_chunk.metadata['headers'] == ['name', 'age']
285 |             assert 'file_size' in first_chunk.metadata
286 |             assert first_chunk.metadata['loader_type'] == 'CSVLoader'
287 | 
288 | 
289 | class TestCSVLoaderRegistry:
290 |     """Test CSV loader registration."""
291 | 
292 |     def test_loader_registration(self):
293 |         """Test that CSV loader is registered."""
294 |         from mcp_memory_service.ingestion.registry import get_loader_for_file
295 | 
296 |         import tempfile
297 |         with tempfile.TemporaryDirectory() as tmpdir:
298 |             # Test CSV file
299 |             csv_file = Path(tmpdir) / "test.csv"
300 |             csv_file.write_text("name,value\nJohn,25")
301 | 
302 |             loader = get_loader_for_file(csv_file)
303 | 
304 |             # Should get CSVLoader
305 |             assert loader is not None
306 |             assert isinstance(loader, CSVLoader)
307 | 
308 | 
309 | class TestCSVDelimiterDetection:
310 |     """Test CSV delimiter detection."""
311 | 
312 |     def test_detect_delimiter_comma(self):
313 |         """Test comma delimiter detection."""
314 |         loader = CSVLoader()
315 |         content = "name,age,city\nJohn,25,New York\nJane,30,San Francisco"
316 |         delimiter = loader._detect_delimiter(content)
317 |         assert delimiter == ','
318 | 
319 |     def test_detect_delimiter_semicolon(self):
320 |         """Test semicolon delimiter detection."""
321 |         loader = CSVLoader()
322 |         content = "name;age;city\nJohn;25;New York\nJane;30;San Francisco"
323 |         delimiter = loader._detect_delimiter(content)
324 |         assert delimiter == ';'
325 | 
326 |     def test_detect_delimiter_tab(self):
327 |         """Test tab delimiter detection."""
328 |         loader = CSVLoader()
329 |         content = "name\tage\tcity\nJohn\t25\tNew York\nJane\t30\tSan Francisco"
330 |         delimiter = loader._detect_delimiter(content)
331 |         assert delimiter == '\t'
332 | 
333 |     def test_detect_delimiter_pipe(self):
334 |         """Test pipe delimiter detection."""
335 |         loader = CSVLoader()
336 |         content = "name|age|city\nJohn|25|New York\nJane|30|San Francisco"
337 |         delimiter = loader._detect_delimiter(content)
338 |         assert delimiter == '|'
339 | 
340 | 
341 | if __name__ == '__main__':
342 |     pytest.main([__file__, '-v'])
343 | 
```

--------------------------------------------------------------------------------
/archive/docs-removed-2025-08-23/windows.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Windows Setup Guide
  2 | 
  3 | This guide provides comprehensive instructions for setting up and running the MCP Memory Service on Windows systems, including handling common Windows-specific issues.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | - **Python 3.10 or newer** (Python 3.11 recommended)
  8 | - **Git for Windows** ([download here](https://git-scm.com/download/win))
  9 | - **Visual Studio Build Tools** (for PyTorch compilation)
 10 | - **PowerShell 5.1+** or **Windows Terminal** (recommended)
 11 | 
 12 | ## Quick Installation
 13 | 
 14 | ### Automatic Installation (Recommended)
 15 | 
 16 | ```powershell
 17 | # Clone repository
 18 | git clone https://github.com/doobidoo/mcp-memory-service.git
 19 | cd mcp-memory-service
 20 | 
 21 | # Run Windows-specific installer
 22 | python install.py --windows
 23 | ```
 24 | 
 25 | The installer automatically:
 26 | - Detects CUDA availability
 27 | - Installs the correct PyTorch version
 28 | - Configures Windows-specific settings
 29 | - Sets up optimal storage backend
 30 | 
 31 | ## Manual Installation
 32 | 
 33 | ### 1. Environment Setup
 34 | 
 35 | ```powershell
 36 | # Clone repository
 37 | git clone https://github.com/doobidoo/mcp-memory-service.git
 38 | cd mcp-memory-service
 39 | 
 40 | # Create virtual environment
 41 | python -m venv venv
 42 | 
 43 | # Activate virtual environment
 44 | venv\Scripts\activate
 45 | 
 46 | # Upgrade pip
 47 | python -m pip install --upgrade pip
 48 | ```
 49 | 
 50 | ### 2. Install Dependencies
 51 | 
 52 | #### For CUDA-enabled Systems
 53 | 
 54 | ```powershell
 55 | # Install PyTorch with CUDA support
 56 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
 57 | 
 58 | # Install other dependencies
 59 | pip install -e .
 60 | pip install chromadb sentence-transformers
 61 | ```
 62 | 
 63 | #### For CPU-only Systems
 64 | 
 65 | ```powershell
 66 | # Install CPU-only PyTorch
 67 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
 68 | 
 69 | # Install with SQLite-vec backend (recommended for CPU)
 70 | pip install -e .
 71 | pip install sentence-transformers sqlite-vec
 72 | ```
 73 | 
 74 | ### 3. Windows-Specific Installation Script
 75 | 
 76 | If you encounter issues, use the Windows-specific installation script:
 77 | 
 78 | ```powershell
 79 | python scripts/install_windows.py
 80 | ```
 81 | 
 82 | This script handles:
 83 | 1. CUDA detection and appropriate PyTorch installation
 84 | 2. Resolving common Windows dependency conflicts
 85 | 3. Setting up Windows-specific environment variables
 86 | 4. Configuring optimal storage backend based on hardware
 87 | 
 88 | ## Configuration
 89 | 
 90 | ### Environment Variables
 91 | 
 92 | #### For CUDA Systems
 93 | 
 94 | ```powershell
 95 | # Set environment variables (PowerShell)
 96 | $env:MCP_MEMORY_STORAGE_BACKEND = "chromadb"
 97 | $env:MCP_MEMORY_USE_CUDA = "true"
 98 | $env:MCP_MEMORY_CHROMA_PATH = "$env:USERPROFILE\.mcp_memory_chroma"
 99 | 
100 | # Or set permanently
101 | [Environment]::SetEnvironmentVariable("MCP_MEMORY_STORAGE_BACKEND", "chromadb", "User")
102 | [Environment]::SetEnvironmentVariable("MCP_MEMORY_USE_CUDA", "true", "User")
103 | ```
104 | 
105 | #### For CPU-only Systems
106 | 
107 | ```powershell
108 | # Set environment variables (PowerShell)
109 | $env:MCP_MEMORY_STORAGE_BACKEND = "sqlite_vec"
110 | $env:MCP_MEMORY_SQLITE_VEC_PATH = "$env:USERPROFILE\.mcp_memory_sqlite"
111 | $env:MCP_MEMORY_CPU_ONLY = "true"
112 | 
113 | # Or set permanently
114 | [Environment]::SetEnvironmentVariable("MCP_MEMORY_STORAGE_BACKEND", "sqlite_vec", "User")
115 | [Environment]::SetEnvironmentVariable("MCP_MEMORY_CPU_ONLY", "true", "User")
116 | ```
117 | 
118 | ### Windows Batch Scripts
119 | 
120 | The repository includes Windows batch scripts for easy startup:
121 | 
122 | #### `scripts/run/run-with-uv.bat`
123 | 
124 | ```batch
125 | @echo off
126 | cd /d "%~dp0..\.."
127 | call venv\Scripts\activate.bat
128 | python src\mcp_memory_service\server.py
129 | ```
130 | 
131 | #### Usage
132 | 
133 | ```powershell
134 | # Run the server
135 | .\scripts\run\run-with-uv.bat
136 | 
137 | # Or run directly
138 | python src\mcp_memory_service\server.py
139 | ```
140 | 
141 | ## Claude Desktop Configuration
142 | 
143 | ### Windows Configuration File Location
144 | 
145 | Claude Desktop configuration is typically located at:
146 | ```
147 | %APPDATA%\Claude\claude_desktop_config.json
148 | ```
149 | 
150 | ### Configuration Examples
151 | 
152 | #### For CUDA Systems
153 | 
154 | ```json
155 | {
156 |   "mcpServers": {
157 |     "memory": {
158 |       "command": "python",
159 |       "args": ["C:\\path\\to\\mcp-memory-service\\src\\mcp_memory_service\\server.py"],
160 |       "env": {
161 |         "MCP_MEMORY_STORAGE_BACKEND": "chromadb",
162 |         "MCP_MEMORY_USE_CUDA": "true",
163 |         "PATH": "C:\\path\\to\\mcp-memory-service\\venv\\Scripts;%PATH%"
164 |       }
165 |     }
166 |   }
167 | }
168 | ```
169 | 
170 | #### For CPU-only Systems
171 | 
172 | ```json
173 | {
174 |   "mcpServers": {
175 |     "memory": {
176 |       "command": "python",
177 |       "args": ["C:\\path\\to\\mcp-memory-service\\src\\mcp_memory_service\\server.py"],
178 |       "env": {
179 |         "MCP_MEMORY_STORAGE_BACKEND": "sqlite_vec",
180 |         "MCP_MEMORY_CPU_ONLY": "true",
181 |         "PATH": "C:\\path\\to\\mcp-memory-service\\venv\\Scripts;%PATH%"
182 |       }
183 |     }
184 |   }
185 | }
186 | ```
187 | 
188 | #### Using Batch Script
189 | 
190 | ```json
191 | {
192 |   "mcpServers": {
193 |     "memory": {
194 |       "command": "C:\\path\\to\\mcp-memory-service\\scripts\\run\\run-with-uv.bat"
195 |     }
196 |   }
197 | }
198 | ```
199 | 
200 | ## Hardware Detection and Optimization
201 | 
202 | ### CUDA Detection
203 | 
204 | The installer automatically detects CUDA availability:
205 | 
206 | ```python
207 | def detect_cuda():
208 |     try:
209 |         import torch
210 |         return torch.cuda.is_available()
211 |     except ImportError:
212 |         return False
213 | ```
214 | 
215 | ### DirectML Support
216 | 
217 | For Windows systems without CUDA but with DirectX 12 compatible GPUs:
218 | 
219 | ```powershell
220 | # Install DirectML-enabled PyTorch
221 | pip install torch-directml
222 | ```
223 | 
224 | Configure for DirectML:
225 | ```powershell
226 | $env:MCP_MEMORY_USE_DIRECTML = "true"
227 | $env:MCP_MEMORY_DEVICE = "dml"
228 | ```
229 | 
230 | ## Windows-Specific Features
231 | 
232 | ### Windows Service Installation
233 | 
234 | To run MCP Memory Service as a Windows service:
235 | 
236 | ```powershell
237 | # Install as Windows service (requires admin privileges)
238 | python scripts/install_windows_service.py install
239 | 
240 | # Start service
241 | net start MCPMemoryService
242 | 
243 | # Stop service
244 | net stop MCPMemoryService
245 | 
246 | # Remove service
247 | python scripts/install_windows_service.py remove
248 | ```
249 | 
250 | ### Task Scheduler Integration
251 | 
252 | Create a scheduled task to start MCP Memory Service on boot:
253 | 
254 | ```powershell
255 | # Create scheduled task
256 | schtasks /create /tn "MCP Memory Service" /tr "C:\path\to\mcp-memory-service\scripts\run\run-with-uv.bat" /sc onlogon /ru "$env:USERNAME"
257 | 
258 | # Delete scheduled task
259 | schtasks /delete /tn "MCP Memory Service" /f
260 | ```
261 | 
262 | ## Troubleshooting
263 | 
264 | ### Common Windows Issues
265 | 
266 | #### 1. Path Length Limitations
267 | 
268 | **Symptom**: Installation fails with "path too long" errors
269 | 
270 | **Solution**: Enable long path support:
271 | ```powershell
272 | # Run as Administrator
273 | New-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 -PropertyType DWORD -Force
274 | ```
275 | 
276 | #### 2. Visual Studio Build Tools Missing
277 | 
278 | **Symptom**: 
279 | ```
280 | Microsoft Visual C++ 14.0 is required
281 | ```
282 | 
283 | **Solution**: Install Visual Studio Build Tools:
284 | ```powershell
285 | # Download and install from:
286 | # https://visualstudio.microsoft.com/visual-cpp-build-tools/
287 | 
288 | # Or install via winget
289 | winget install Microsoft.VisualStudio.2022.BuildTools
290 | ```
291 | 
292 | #### 3. CUDA Version Mismatch
293 | 
294 | **Symptom**: PyTorch CUDA installation issues
295 | 
296 | **Solution**: Match PyTorch CUDA version to your installed CUDA:
297 | ```powershell
298 | # Check CUDA version
299 | nvcc --version
300 | 
301 | # Install matching PyTorch version
302 | # For CUDA 11.8
303 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
304 | 
305 | # For CUDA 12.1
306 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
307 | ```
308 | 
309 | #### 4. Permission Issues
310 | 
311 | **Symptom**: Access denied errors when installing or running
312 | 
313 | **Solution**: Run PowerShell as Administrator and check folder permissions:
314 | ```powershell
315 | # Check current user permissions
316 | whoami /groups
317 | 
318 | # Run installation as Administrator if needed
319 | # Or adjust folder permissions
320 | icacls "C:\path\to\mcp-memory-service" /grant "$env:USERNAME:(F)" /t
321 | ```
322 | 
323 | #### 5. Windows Defender Issues
324 | 
325 | **Symptom**: Installation files deleted or blocked
326 | 
327 | **Solution**: Add exclusions to Windows Defender:
328 | ```powershell
329 | # Add folder exclusion (run as Administrator)
330 | Add-MpPreference -ExclusionPath "C:\path\to\mcp-memory-service"
331 | 
332 | # Add process exclusion
333 | Add-MpPreference -ExclusionProcess "python.exe"
334 | ```
335 | 
336 | ### Diagnostic Commands
337 | 
338 | #### System Information
339 | 
340 | ```powershell
341 | # Check Python version and location
342 | python --version
343 | Get-Command python
344 | 
345 | # Check pip version
346 | pip --version
347 | 
348 | # Check CUDA availability
349 | python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
350 | 
351 | # Check DirectML (if installed)
352 | python -c "import torch_directml; print('DirectML available')"
353 | 
354 | # Check Windows version
355 | Get-ComputerInfo | Select-Object WindowsProductName, WindowsVersion
356 | ```
357 | 
358 | #### Environment Verification
359 | 
360 | ```powershell
361 | # Check environment variables
362 | Get-ChildItem Env: | Where-Object {$_.Name -like "MCP_MEMORY_*"}
363 | 
364 | # Check virtual environment
365 | echo $env:VIRTUAL_ENV
366 | 
367 | # Verify key packages
368 | python -c "import torch; print(f'PyTorch: {torch.__version__}')"
369 | python -c "import sentence_transformers; print('SentenceTransformers: OK')"
370 | python -c "import chromadb; print('ChromaDB: OK')" # or sqlite_vec
371 | ```
372 | 
373 | #### Network and Firewall
374 | 
375 | ```powershell
376 | # Check if Windows Firewall is blocking
377 | Get-NetFirewallRule -DisplayName "*Python*" | Format-Table
378 | 
379 | # Test network connectivity (if using HTTP mode)
380 | Test-NetConnection -ComputerName localhost -Port 8000
381 | ```
382 | 
383 | ### Performance Optimization
384 | 
385 | #### Windows-Specific Settings
386 | 
387 | ```powershell
388 | # Optimize for machine learning workloads
389 | $env:OMP_NUM_THREADS = [Environment]::ProcessorCount
390 | $env:MKL_NUM_THREADS = [Environment]::ProcessorCount
391 | 
392 | # Set Windows-specific memory settings
393 | $env:MCP_MEMORY_WINDOWS_OPTIMIZATION = "true"
394 | $env:MCP_MEMORY_BATCH_SIZE = "32"
395 | ```
396 | 
397 | #### Resource Monitoring
398 | 
399 | ```powershell
400 | # Monitor memory usage
401 | Get-Process python | Select-Object ProcessName, WorkingSet, CPU
402 | 
403 | # Monitor GPU usage (if CUDA)
404 | nvidia-smi
405 | 
406 | # Monitor disk I/O
407 | Get-Counter "\PhysicalDisk(_Total)\Disk Reads/sec"
408 | ```
409 | 
410 | ## Development on Windows
411 | 
412 | ### Setting up Development Environment
413 | 
414 | ```powershell
415 | # Clone for development
416 | git clone https://github.com/doobidoo/mcp-memory-service.git
417 | cd mcp-memory-service
418 | 
419 | # Create development environment
420 | python -m venv venv-dev
421 | venv-dev\Scripts\activate
422 | 
423 | # Install in development mode
424 | pip install -e .
425 | pip install pytest black isort mypy
426 | 
427 | # Run tests
428 | pytest tests/
429 | ```
430 | 
431 | ### Windows-Specific Testing
432 | 
433 | ```powershell
434 | # Run Windows-specific tests
435 | pytest tests/platform/test_windows.py -v
436 | 
437 | # Test CUDA functionality (if available)
438 | pytest tests/cuda/ -v
439 | 
440 | # Test DirectML functionality (if available)
441 | pytest tests/directml/ -v
442 | ```
443 | 
444 | ## Alternative Installation Methods
445 | 
446 | ### Using Chocolatey
447 | 
448 | ```powershell
449 | # Install Python via Chocolatey
450 | choco install python
451 | 
452 | # Install Git
453 | choco install git
454 | 
455 | # Then follow standard installation
456 | ```
457 | 
458 | ### Using Conda
459 | 
460 | ```powershell
461 | # Create conda environment
462 | conda create -n mcp-memory python=3.11
463 | conda activate mcp-memory
464 | 
465 | # Install PyTorch via conda
466 | conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
467 | 
468 | # Install other dependencies
469 | pip install -e .
470 | ```
471 | 
472 | ### Using Docker on Windows
473 | 
474 | ```powershell
475 | # Using Docker Desktop
476 | git clone https://github.com/doobidoo/mcp-memory-service.git
477 | cd mcp-memory-service
478 | 
479 | # Build Windows container
480 | docker build -f Dockerfile.windows -t mcp-memory-service-windows .
481 | 
482 | # Run container
483 | docker run -p 8000:8000 mcp-memory-service-windows
484 | ```
485 | 
486 | ## Related Documentation
487 | 
488 | - [Installation Guide](../installation/master-guide.md) - General installation instructions
489 | - [Multi-Client Setup](../integration/multi-client.md) - Multi-client configuration
490 | - [Troubleshooting](../troubleshooting/general.md) - Windows-specific troubleshooting
491 | - [Docker Deployment](../deployment/docker.md) - Docker setup on Windows
```

--------------------------------------------------------------------------------
/tests/integration/test_api_tag_time_search.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Integration tests for POST /api/search/by-tag endpoint with time_filter parameter.
  3 | 
  4 | Tests the time_filter functionality added in PR #215 to fix semantic over-filtering bug (issue #214).
  5 | 
  6 | NOTE: These tests currently have SQLite threading issues with TestClient.
  7 | The async fixture creates storage in one thread, but TestClient creates its own threads,
  8 | causing "SQLite objects created in a thread can only be used in that same thread" errors.
  9 | 
 10 | TODO: Fix by using synchronous fixtures like test_http_api_search_by_tag_endpoint in
 11 | tests/integration/test_api_with_memory_service.py (line 670), which creates storage
 12 | within the test function rather than in an async fixture.
 13 | 
 14 | For now, comprehensive unit tests in tests/unit/test_tag_time_filtering.py provide
 15 | excellent coverage of the tag+time filtering functionality across all storage backends.
 16 | """
 17 | 
 18 | import pytest
 19 | import pytest_asyncio
 20 | import tempfile
 21 | import os
 22 | import time
 23 | from fastapi.testclient import TestClient
 24 | 
 25 | from mcp_memory_service.web.dependencies import set_storage
 26 | from mcp_memory_service.services.memory_service import MemoryService
 27 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 28 | from mcp_memory_service.models.memory import Memory
 29 | from mcp_memory_service.utils.hashing import generate_content_hash
 30 | 
 31 | 
 32 | @pytest.fixture
 33 | def temp_db():
 34 |     """Create a temporary database for testing."""
 35 |     with tempfile.TemporaryDirectory() as tmpdir:
 36 |         db_path = os.path.join(tmpdir, "test_api_tag_time.db")
 37 |         yield db_path
 38 | 
 39 | 
 40 | @pytest_asyncio.fixture
 41 | async def storage_with_test_data(temp_db):
 42 |     """Create storage with test memories at different timestamps."""
 43 |     storage = SqliteVecMemoryStorage(temp_db)
 44 |     await storage.initialize()
 45 | 
 46 |     # Store old memory (2 days ago)
 47 |     two_days_ago = time.time() - (2 * 24 * 60 * 60)
 48 |     old_task_content = "Old task from 2 days ago"
 49 |     old_memory = Memory(
 50 |         content=old_task_content,
 51 |         content_hash=generate_content_hash(old_task_content),
 52 |         tags=["task", "old"],
 53 |         memory_type="task",
 54 |         created_at=two_days_ago
 55 |     )
 56 |     await storage.store(old_memory)
 57 | 
 58 |     # Store recent memory (current time)
 59 |     recent_task_content = "Recent task from today"
 60 |     recent_memory = Memory(
 61 |         content=recent_task_content,
 62 |         content_hash=generate_content_hash(recent_task_content),
 63 |         tags=["task", "recent"],
 64 |         memory_type="task",
 65 |         created_at=time.time()
 66 |     )
 67 |     await storage.store(recent_memory)
 68 | 
 69 |     # Store another old memory with different tags
 70 |     old_note_content = "Old note from 3 days ago"
 71 |     old_note = Memory(
 72 |         content=old_note_content,
 73 |         content_hash=generate_content_hash(old_note_content),
 74 |         tags=["note", "old"],
 75 |         memory_type="note",
 76 |         created_at=time.time() - (3 * 24 * 60 * 60)
 77 |     )
 78 |     await storage.store(old_note)
 79 | 
 80 |     yield storage
 81 | 
 82 |     storage.close()
 83 | 
 84 | 
 85 | @pytest.mark.asyncio
 86 | @pytest.mark.integration
 87 | async def test_api_search_by_tag_with_time_filter_recent(storage_with_test_data):
 88 |     """Test POST /api/search/by-tag with time_filter returns only recent memories."""
 89 |     from mcp_memory_service.web.app import app
 90 |     set_storage(storage_with_test_data)
 91 | 
 92 |     client = TestClient(app)
 93 | 
 94 |     # Search for "task" tag with time_filter = 1 day ago
 95 |     one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
 96 | 
 97 |     response = client.post(
 98 |         "/api/search/by-tag",
 99 |         json={
100 |             "tags": ["task"],
101 |             "time_filter": one_day_ago_iso,
102 |             "limit": 10
103 |         }
104 |     )
105 | 
106 |     assert response.status_code == 200
107 |     data = response.json()
108 | 
109 |     # Should only return the recent task (not the 2-day-old task)
110 |     assert len(data["memories"]) == 1
111 |     assert "recent" in data["memories"][0]["tags"]
112 |     assert "Recent task from today" in data["memories"][0]["content"]
113 | 
114 | 
115 | @pytest.mark.asyncio
116 | @pytest.mark.integration
117 | async def test_api_search_by_tag_with_time_filter_excludes_old(storage_with_test_data):
118 |     """Test POST /api/search/by-tag with time_filter excludes old memories."""
119 |     from mcp_memory_service.web.app import app
120 |     set_storage(storage_with_test_data)
121 | 
122 |     client = TestClient(app)
123 | 
124 |     # Search for "old" tag with time_filter = 10 seconds ago
125 |     # Should return empty because all "old" memories are > 2 days old
126 |     ten_seconds_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - 10))
127 | 
128 |     response = client.post(
129 |         "/api/search/by-tag",
130 |         json={
131 |             "tags": ["old"],
132 |             "time_filter": ten_seconds_ago_iso,
133 |             "limit": 10
134 |         }
135 |     )
136 | 
137 |     assert response.status_code == 200
138 |     data = response.json()
139 | 
140 |     # Should return empty (all "old" memories are from 2-3 days ago)
141 |     assert len(data["memories"]) == 0
142 | 
143 | 
144 | @pytest.mark.asyncio
145 | @pytest.mark.integration
146 | async def test_api_search_by_tag_without_time_filter_backward_compat(storage_with_test_data):
147 |     """Test POST /api/search/by-tag without time_filter returns all matching memories (backward compatibility)."""
148 |     from mcp_memory_service.web.app import app
149 |     set_storage(storage_with_test_data)
150 | 
151 |     client = TestClient(app)
152 | 
153 |     # Search for "task" tag WITHOUT time_filter
154 |     response = client.post(
155 |         "/api/search/by-tag",
156 |         json={
157 |             "tags": ["task"],
158 |             "limit": 10
159 |         }
160 |     )
161 | 
162 |     assert response.status_code == 200
163 |     data = response.json()
164 | 
165 |     # Should return BOTH task memories (old and recent)
166 |     assert len(data["memories"]) == 2
167 |     tags_list = [tag for mem in data["memories"] for tag in mem["tags"]]
168 |     assert "old" in tags_list
169 |     assert "recent" in tags_list
170 | 
171 | 
172 | @pytest.mark.asyncio
173 | @pytest.mark.integration
174 | async def test_api_search_by_tag_with_empty_time_filter(storage_with_test_data):
175 |     """Test POST /api/search/by-tag with empty time_filter string is ignored."""
176 |     from mcp_memory_service.web.app import app
177 |     set_storage(storage_with_test_data)
178 | 
179 |     client = TestClient(app)
180 | 
181 |     # Search with empty time_filter (should be treated as no filter)
182 |     response = client.post(
183 |         "/api/search/by-tag",
184 |         json={
185 |             "tags": ["task"],
186 |             "time_filter": "",
187 |             "limit": 10
188 |         }
189 |     )
190 | 
191 |     assert response.status_code == 200
192 |     data = response.json()
193 | 
194 |     # Should return both task memories (empty filter ignored)
195 |     assert len(data["memories"]) == 2
196 | 
197 | 
198 | @pytest.mark.asyncio
199 | @pytest.mark.integration
200 | async def test_api_search_by_tag_with_natural_language_time_filter(storage_with_test_data):
201 |     """Test POST /api/search/by-tag with natural language time expressions."""
202 |     from mcp_memory_service.web.app import app
203 |     set_storage(storage_with_test_data)
204 | 
205 |     client = TestClient(app)
206 | 
207 |     # Test "yesterday" - should return only recent memories
208 |     response = client.post(
209 |         "/api/search/by-tag",
210 |         json={
211 |             "tags": ["task"],
212 |             "time_filter": "yesterday",
213 |             "limit": 10
214 |         }
215 |     )
216 | 
217 |     assert response.status_code == 200
218 |     data = response.json()
219 | 
220 |     # Should return only the recent task (created today, after yesterday)
221 |     assert len(data["memories"]) == 1
222 |     assert "recent" in data["memories"][0]["tags"]
223 | 
224 | 
225 | @pytest.mark.asyncio
226 | @pytest.mark.integration
227 | async def test_api_search_by_tag_time_filter_with_multiple_tags(storage_with_test_data):
228 |     """Test POST /api/search/by-tag with time_filter and multiple tags."""
229 |     from mcp_memory_service.web.app import app
230 |     set_storage(storage_with_test_data)
231 | 
232 |     client = TestClient(app)
233 | 
234 |     # Search for multiple tags with time filter
235 |     one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
236 | 
237 |     response = client.post(
238 |         "/api/search/by-tag",
239 |         json={
240 |             "tags": ["task", "recent"],  # Both tags
241 |             "time_filter": one_day_ago_iso,
242 |             "limit": 10
243 |         }
244 |     )
245 | 
246 |     assert response.status_code == 200
247 |     data = response.json()
248 | 
249 |     # Should return the recent task memory
250 |     assert len(data["memories"]) == 1
251 |     assert "recent" in data["memories"][0]["tags"]
252 | 
253 | 
254 | @pytest.mark.asyncio
255 | @pytest.mark.integration
256 | async def test_api_search_by_tag_time_filter_with_match_all(storage_with_test_data):
257 |     """Test POST /api/search/by-tag with time_filter and match_all parameter."""
258 |     from mcp_memory_service.web.app import app
259 |     set_storage(storage_with_test_data)
260 | 
261 |     # Store a memory with both "task" and "recent" tags
262 |     both_tags_content = "Task that is both task and recent"
263 |     both_tags_memory = Memory(
264 |         content=both_tags_content,
265 |         content_hash=generate_content_hash(both_tags_content),
266 |         tags=["task", "recent"],
267 |         memory_type="task",
268 |         created_at=time.time()
269 |     )
270 |     await storage_with_test_data.store(both_tags_memory)
271 | 
272 |     client = TestClient(app)
273 | 
274 |     # Search with match_all=true and time_filter
275 |     one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
276 | 
277 |     response = client.post(
278 |         "/api/search/by-tag",
279 |         json={
280 |             "tags": ["task", "recent"],
281 |             "match_all": True,  # Require BOTH tags
282 |             "time_filter": one_day_ago_iso,
283 |             "limit": 10
284 |         }
285 |     )
286 | 
287 |     assert response.status_code == 200
288 |     data = response.json()
289 | 
290 |     # Should return memories with BOTH tags that are recent
291 |     assert len(data["memories"]) >= 1
292 |     for mem in data["memories"]:
293 |         assert "task" in mem["tags"]
294 |         assert "recent" in mem["tags"]
295 | 
296 | 
297 | @pytest.mark.asyncio
298 | @pytest.mark.integration
299 | async def test_api_search_by_tag_invalid_time_filter_format(storage_with_test_data):
300 |     """Test POST /api/search/by-tag with invalid time_filter returns error or empty."""
301 |     from mcp_memory_service.web.app import app
302 |     set_storage(storage_with_test_data)
303 | 
304 |     client = TestClient(app)
305 | 
306 |     # Search with invalid time_filter format
307 |     response = client.post(
308 |         "/api/search/by-tag",
309 |         json={
310 |             "tags": ["task"],
311 |             "time_filter": "invalid-date-format",
312 |             "limit": 10
313 |         }
314 |     )
315 | 
316 |     # API should handle gracefully (either 400 error or empty results)
317 |     # Depending on implementation, this might return 200 with empty results
318 |     # or 400 Bad Request
319 |     assert response.status_code in [200, 400]
320 | 
321 |     if response.status_code == 200:
322 |         data = response.json()
323 |         # If it returns 200, should return empty or all results
324 |         assert "memories" in data
325 | 
326 | 
327 | @pytest.mark.asyncio
328 | @pytest.mark.integration
329 | async def test_api_search_by_tag_time_filter_performance(storage_with_test_data):
330 |     """Test that tag+time filtering maintains good performance (<100ms)."""
331 |     from mcp_memory_service.web.app import app
332 |     set_storage(storage_with_test_data)
333 | 
334 |     client = TestClient(app)
335 | 
336 |     one_day_ago_iso = time.strftime("%Y-%m-%d", time.gmtime(time.time() - (24 * 60 * 60)))
337 | 
338 |     start_time = time.time()
339 | 
340 |     response = client.post(
341 |         "/api/search/by-tag",
342 |         json={
343 |             "tags": ["task"],
344 |             "time_filter": one_day_ago_iso,
345 |             "limit": 10
346 |         }
347 |     )
348 | 
349 |     elapsed_ms = (time.time() - start_time) * 1000
350 | 
351 |     assert response.status_code == 200
352 | 
353 |     # Performance target: <100ms for tag+time search
354 |     # (may need adjustment based on hardware)
355 |     assert elapsed_ms < 200, f"Tag+time search took {elapsed_ms:.2f}ms (expected <200ms)"
356 | 
```

--------------------------------------------------------------------------------
/docs/guides/STORAGE_BACKENDS.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Storage Backend Comparison and Selection Guide
  2 | 
  3 | **MCP Memory Service** supports two storage backends, each optimized for different use cases and hardware configurations.
  4 | 
  5 | ## Quick Comparison
  6 | 
  7 | | Feature | SQLite-vec 🪶 | ChromaDB 📦 |
  8 | |---------|---------------|-------------|
  9 | | **Setup Complexity** | ⭐⭐⭐⭐⭐ Simple | ⭐⭐⭐ Moderate |
 10 | | **Startup Time** | ⭐⭐⭐⭐⭐ < 3 seconds | ⭐⭐ 15-30 seconds |
 11 | | **Memory Usage** | ⭐⭐⭐⭐⭐ < 150MB | ⭐⭐ 500-800MB |
 12 | | **Performance** | ⭐⭐⭐⭐ Very fast | ⭐⭐⭐⭐ Fast |
 13 | | **Features** | ⭐⭐⭐ Core features | ⭐⭐⭐⭐⭐ Full-featured |
 14 | | **Scalability** | ⭐⭐⭐⭐ Up to 100K items | ⭐⭐⭐⭐⭐ Unlimited |
 15 | | **Legacy Hardware** | ⭐⭐⭐⭐⭐ Excellent | ⭐ Poor |
 16 | | **Production Ready** | ⭐⭐⭐⭐ Yes | ⭐⭐⭐⭐⭐ Yes |
 17 | 
 18 | ## When to Choose SQLite-vec 🪶
 19 | 
 20 | ### Ideal For:
 21 | - **Legacy Hardware**: 2015 MacBook Pro, older Intel Macs
 22 | - **Resource-Constrained Systems**: < 4GB RAM, limited CPU
 23 | - **Quick Setup**: Want to get started immediately
 24 | - **Single-File Portability**: Easy backup and sharing
 25 | - **Docker/Serverless**: Lightweight deployments
 26 | - **Development/Testing**: Rapid prototyping
 27 | - **HTTP/SSE API**: New web interface users
 28 | 
 29 | ### Technical Advantages:
 30 | - **Lightning Fast Startup**: Database ready in 2-3 seconds
 31 | - **Minimal Dependencies**: Just SQLite and sqlite-vec extension
 32 | - **Low Memory Footprint**: Typically uses < 150MB RAM
 33 | - **Single File Database**: Easy to backup, move, and share
 34 | - **ACID Compliance**: SQLite's proven reliability
 35 | - **Zero Configuration**: Works out of the box
 36 | - **ONNX Compatible**: Runs without PyTorch if needed
 37 | 
 38 | ### Example Use Cases:
 39 | ```bash
 40 | # 2015 MacBook Pro scenario
 41 | python install.py --legacy-hardware
 42 | # Result: SQLite-vec + Homebrew PyTorch + ONNX
 43 | 
 44 | # Docker deployment
 45 | docker run -e MCP_MEMORY_STORAGE_BACKEND=sqlite_vec ...
 46 | 
 47 | # Quick development setup
 48 | python install.py --storage-backend sqlite_vec --dev
 49 | ```
 50 | 
 51 | ## When to Choose ChromaDB 📦
 52 | 
 53 | ### Ideal For:
 54 | - **Modern Hardware**: M1/M2/M3 Macs, modern Intel systems
 55 | - **GPU-Accelerated Systems**: CUDA, MPS, DirectML available
 56 | - **Large-Scale Deployments**: > 10,000 memories
 57 | - **Advanced Features**: Complex filtering, metadata queries
 58 | - **Production Systems**: Established, battle-tested platform
 59 | - **Research/ML**: Advanced vector search capabilities
 60 | 
 61 | ### Technical Advantages:
 62 | - **Advanced Vector Search**: Multiple distance metrics, filtering
 63 | - **Rich Metadata Support**: Complex query capabilities
 64 | - **Proven Scalability**: Handles millions of vectors
 65 | - **Extensive Ecosystem**: Wide tool integration
 66 | - **Advanced Indexing**: HNSW and other optimized indices
 67 | - **Multi-Modal Support**: Text, images, and more
 68 | 
 69 | ### Example Use Cases:
 70 | ```bash
 71 | # Modern Mac with GPU
 72 | python install.py  # ChromaDB selected automatically
 73 | 
 74 | # Production deployment
 75 | python install.py --storage-backend chromadb --production
 76 | 
 77 | # Research environment
 78 | python install.py --storage-backend chromadb --enable-advanced-features
 79 | ```
 80 | 
 81 | ## Hardware Compatibility Matrix
 82 | 
 83 | ### macOS Intel (2013-2017) - Legacy Hardware
 84 | ```
 85 | Recommended: SQLite-vec + Homebrew PyTorch + ONNX
 86 | Alternative: ChromaDB (may have installation issues)
 87 | 
 88 | Configuration:
 89 | - MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
 90 | - MCP_MEMORY_USE_ONNX=1
 91 | - MCP_MEMORY_USE_HOMEBREW_PYTORCH=1
 92 | ```
 93 | 
 94 | ### macOS Intel (2018+) - Modern Hardware
 95 | ```
 96 | Recommended: ChromaDB (default) or SQLite-vec (lightweight)
 97 | Choice: User preference
 98 | 
 99 | Configuration:
100 | - MCP_MEMORY_STORAGE_BACKEND=chromadb (default)
101 | - Hardware acceleration: CPU/MPS
102 | ```
103 | 
104 | ### macOS Apple Silicon (M1/M2/M3)
105 | ```
106 | Recommended: ChromaDB with MPS acceleration
107 | Alternative: SQLite-vec for minimal resource usage
108 | 
109 | Configuration:
110 | - MCP_MEMORY_STORAGE_BACKEND=chromadb
111 | - PYTORCH_ENABLE_MPS_FALLBACK=1
112 | - Hardware acceleration: MPS
113 | ```
114 | 
115 | ### Windows with CUDA GPU
116 | ```
117 | Recommended: ChromaDB with CUDA acceleration
118 | Alternative: SQLite-vec for lighter deployments
119 | 
120 | Configuration:
121 | - MCP_MEMORY_STORAGE_BACKEND=chromadb
122 | - CUDA optimization enabled
123 | ```
124 | 
125 | ### Windows CPU-only
126 | ```
127 | Recommended: SQLite-vec
128 | Alternative: ChromaDB (higher resource usage)
129 | 
130 | Configuration:
131 | - MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
132 | - MCP_MEMORY_USE_ONNX=1 (optional)
133 | ```
134 | 
135 | ### Linux Server/Headless
136 | ```
137 | Recommended: SQLite-vec (easier deployment)
138 | Alternative: ChromaDB (if resources available)
139 | 
140 | Configuration:
141 | - MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
142 | - Optimized for headless operation
143 | ```
144 | 
145 | ## Performance Comparison
146 | 
147 | ### Startup Time
148 | ```
149 | SQLite-vec:  2-3 seconds     ████████████████████████████████
150 | ChromaDB:    15-30 seconds   ████████
151 | ```
152 | 
153 | ### Memory Usage (Idle)
154 | ```
155 | SQLite-vec:  ~150MB    ██████
156 | ChromaDB:    ~600MB    ████████████████████████
157 | ```
158 | 
159 | ### Search Performance (1,000 items)
160 | ```
161 | SQLite-vec:  50-200ms    ███████████████████████████
162 | ChromaDB:    100-300ms   ██████████████████
163 | ```
164 | 
165 | ### Storage Efficiency
166 | ```
167 | SQLite-vec:  Single .db file, ~50% smaller
168 | ChromaDB:    Directory structure, full metadata
169 | ```
170 | 
171 | ## Feature Comparison
172 | 
173 | ### Core Features (Both Backends)
174 | - ✅ Semantic memory storage and retrieval
175 | - ✅ Tag-based organization
176 | - ✅ Natural language time-based recall
177 | - ✅ Full-text search capabilities
178 | - ✅ Automatic backups
179 | - ✅ Health monitoring
180 | - ✅ Duplicate detection
181 | 
182 | ### SQLite-vec Specific Features
183 | - ✅ Single-file portability
184 | - ✅ HTTP/SSE API support
185 | - ✅ ONNX runtime compatibility
186 | - ✅ Homebrew PyTorch integration
187 | - ✅ Ultra-fast startup
188 | - ✅ Minimal resource usage
189 | 
190 | ### ChromaDB Specific Features
191 | - ✅ Advanced metadata filtering
192 | - ✅ Multiple distance metrics
193 | - ✅ Collection management
194 | - ✅ Persistent client support
195 | - ✅ Advanced indexing options
196 | - ✅ Rich ecosystem integration
197 | 
198 | ## Migration Between Backends
199 | 
200 | ### ChromaDB → SQLite-vec Migration
201 | 
202 | Perfect for upgrading legacy hardware or simplifying deployments:
203 | 
204 | ```bash
205 | # Automated migration
206 | python scripts/migrate_chroma_to_sqlite.py
207 | 
208 | # Manual migration with verification
209 | python install.py --migrate-from-chromadb --storage-backend sqlite_vec
210 | ```
211 | 
212 | **Migration preserves:**
213 | - All memory content and embeddings
214 | - Tags and metadata
215 | - Timestamps and relationships
216 | - Search functionality
217 | 
218 | ### SQLite-vec → ChromaDB Migration
219 | 
220 | For scaling up to advanced features:
221 | 
222 | ```bash
223 | # Export from SQLite-vec
224 | python scripts/export_sqlite_memories.py
225 | 
226 | # Import to ChromaDB
227 | python scripts/import_to_chromadb.py
228 | ```
229 | 
230 | ## Intelligent Selection Algorithm
231 | 
232 | The installer uses this logic to recommend backends:
233 | 
234 | ```python
235 | def recommend_backend(system_info, hardware_info):
236 |     # Legacy hardware gets SQLite-vec
237 |     if is_legacy_mac(system_info):
238 |         return "sqlite_vec"
239 |     
240 |     # Low-memory systems get SQLite-vec
241 |     if hardware_info.memory_gb < 4:
242 |         return "sqlite_vec"
243 |     
244 |     # ChromaDB installation problems on macOS Intel
245 |     if system_info.is_macos_intel_problematic:
246 |         return "sqlite_vec"
247 |     
248 |     # Modern hardware with GPU gets ChromaDB
249 |     if hardware_info.has_gpu and hardware_info.memory_gb >= 8:
250 |         return "chromadb"
251 |     
252 |     # Default to ChromaDB for feature completeness
253 |     return "chromadb"
254 | ```
255 | 
256 | ## Configuration Examples
257 | 
258 | ### SQLite-vec Configuration
259 | ```bash
260 | # Environment variables
261 | export MCP_MEMORY_STORAGE_BACKEND=sqlite_vec
262 | export MCP_MEMORY_SQLITE_PATH="$HOME/.mcp-memory/memory.db"
263 | export MCP_MEMORY_USE_ONNX=1  # Optional: CPU-only inference
264 | 
265 | # Claude Desktop config
266 | {
267 |   "mcpServers": {
268 |     "memory": {
269 |       "command": "uv",
270 |       "args": ["--directory", "/path/to/mcp-memory-service", "run", "memory"],
271 |       "env": {
272 |         "MCP_MEMORY_STORAGE_BACKEND": "sqlite_vec",
273 |         "MCP_MEMORY_SQLITE_PATH": "/path/to/memory.db"
274 |       }
275 |     }
276 |   }
277 | }
278 | ```
279 | 
280 | ### ChromaDB Configuration
281 | 
282 | #### Local ChromaDB (Deprecated)
283 | ⚠️ **Note**: Local ChromaDB is deprecated. Consider migrating to SQLite-vec for better performance.
284 | 
285 | ```bash
286 | # Environment variables
287 | export MCP_MEMORY_STORAGE_BACKEND=chromadb
288 | export MCP_MEMORY_CHROMA_PATH="$HOME/.mcp-memory/chroma_db"
289 | 
290 | # Claude Desktop config
291 | {
292 |   "mcpServers": {
293 |     "memory": {
294 |       "command": "uv",
295 |       "args": ["--directory", "/path/to/mcp-memory-service", "run", "memory"],
296 |       "env": {
297 |         "MCP_MEMORY_STORAGE_BACKEND": "chromadb",
298 |         "MCP_MEMORY_CHROMA_PATH": "/path/to/chroma_db"
299 |       }
300 |     }
301 |   }
302 | }
303 | ```
304 | 
305 | #### Remote ChromaDB (Hosted/Enterprise)
306 | 🌐 **New**: Connect to remote ChromaDB servers, Chroma Cloud, or self-hosted instances.
307 | 
308 | ```bash
309 | # Environment variables for remote ChromaDB
310 | export MCP_MEMORY_STORAGE_BACKEND=chromadb
311 | export MCP_MEMORY_CHROMADB_HOST="chroma.example.com"
312 | export MCP_MEMORY_CHROMADB_PORT="8000"
313 | export MCP_MEMORY_CHROMADB_SSL="true"
314 | export MCP_MEMORY_CHROMADB_API_KEY="your-api-key-here"
315 | export MCP_MEMORY_COLLECTION_NAME="my-collection"
316 | 
317 | # Claude Desktop config for remote ChromaDB
318 | {
319 |   "mcpServers": {
320 |     "memory": {
321 |       "command": "uv",
322 |       "args": ["--directory", "/path/to/mcp-memory-service", "run", "memory"],
323 |       "env": {
324 |         "MCP_MEMORY_STORAGE_BACKEND": "chromadb",
325 |         "MCP_MEMORY_CHROMADB_HOST": "chroma.example.com",
326 |         "MCP_MEMORY_CHROMADB_PORT": "8000",
327 |         "MCP_MEMORY_CHROMADB_SSL": "true",
328 |         "MCP_MEMORY_CHROMADB_API_KEY": "your-api-key-here",
329 |         "MCP_MEMORY_COLLECTION_NAME": "my-collection"
330 |       }
331 |     }
332 |   }
333 | }
334 | ```
335 | 
336 | #### Remote ChromaDB Hosting Options
337 | 
338 | **Chroma Cloud (Early Access)**
339 | - Official hosted service by ChromaDB
340 | - Early access available, full launch Q1 2025
341 | - $5 free credits to start
342 | - Visit: [trychroma.com](https://trychroma.com)
343 | 
344 | **Self-Hosted Options**
345 | - **Elest.io**: Fully managed ChromaDB deployment
346 | - **AWS**: Use CloudFormation template (requires 2GB+ RAM)
347 | - **Google Cloud Run**: Container-based deployment
348 | - **Docker**: Self-hosted with authentication
349 | 
350 | **Example Docker Configuration**
351 | ```bash
352 | # Start ChromaDB server with authentication
353 | docker run -p 8000:8000 \
354 |   -e CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER="chromadb.auth.token.TokenConfigServerAuthCredentialsProvider" \
355 |   -e CHROMA_SERVER_AUTH_PROVIDER="chromadb.auth.token.TokenAuthServerProvider" \
356 |   -e CHROMA_SERVER_AUTH_TOKEN_TRANSPORT_HEADER="X_CHROMA_TOKEN" \
357 |   -e CHROMA_SERVER_AUTH_CREDENTIALS="test-token" \
358 |   -v /path/to/chroma-data:/chroma/chroma \
359 |   chromadb/chroma
360 | ```
361 | 
362 | ## Decision Flowchart
363 | 
364 | ```
365 | Start: Choose Storage Backend
366 | ├── Do you have legacy hardware (2013-2017 Mac)?
367 | │   ├── Yes → SQLite-vec (optimized path)
368 | │   └── No → Continue
369 | ├── Do you have < 4GB RAM?
370 | │   ├── Yes → SQLite-vec (resource efficient)
371 | │   └── No → Continue
372 | ├── Do you need HTTP/SSE API?
373 | │   ├── Yes → SQLite-vec (first-class support)
374 | │   └── No → Continue
375 | ├── Do you want minimal setup?
376 | │   ├── Yes → SQLite-vec (zero config)
377 | │   └── No → Continue
378 | ├── Do you need advanced vector search features?
379 | │   ├── Yes → ChromaDB (full-featured)
380 | │   └── No → Continue
381 | ├── Do you have modern hardware with GPU?
382 | │   ├── Yes → ChromaDB (hardware acceleration)
383 | │   └── No → Continue
384 | └── Default → ChromaDB (established platform)
385 | ```
386 | 
387 | ## Getting Help
388 | 
389 | ### Backend-Specific Support
390 | - **SQLite-vec issues**: Tag with `sqlite-vec` label
391 | - **ChromaDB issues**: Tag with `chromadb` label
392 | - **Migration issues**: Use `migration` label
393 | 
394 | ### Community Resources
395 | - **Backend comparison discussions**: GitHub Discussions
396 | - **Performance benchmarks**: Community wiki
397 | - **Hardware compatibility**: Hardware compatibility matrix
398 | 
399 | ### Documentation Links
400 | - [SQLite-vec Backend Guide](../sqlite-vec-backend.md)
401 | - [Migration Guide](migration.md)
402 | - [Legacy Hardware Guide](../platforms/macos-intel.md)
403 | - [Installation Master Guide](../installation/master-guide.md)
```

--------------------------------------------------------------------------------
/.claude/agents/amp-pr-automator.md:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | name: amp-pr-automator
  3 | description: Lightweight PR automation using Amp CLI for code quality checks, test generation, and fix suggestions. Avoids OAuth friction of gemini-pr-automator while providing fast, parallel quality analysis. Uses file-based prompt/response workflow for async execution. Ideal for pre-PR checks and developer-driven automation.
  4 | model: sonnet
  5 | color: purple
  6 | ---
  7 | 
  8 | You are an elite PR Automation Specialist using Amp CLI for lightweight, OAuth-free PR automation. Your mission is to provide fast code quality analysis, test generation, and fix suggestions without the browser authentication interruptions of Gemini CLI.
  9 | 
 10 | ## Core Responsibilities
 11 | 
 12 | 1. **Quality Gate Checks**: Parallel complexity, security, and type hint analysis
 13 | 2. **Test Generation**: Create pytest tests for new/modified code
 14 | 3. **Fix Suggestions**: Analyze review feedback and suggest improvements
 15 | 4. **Breaking Change Detection**: Identify potential API breaking changes
 16 | 5. **Result Aggregation**: Collect and summarize Amp analysis results
 17 | 
 18 | ## Problem Statement
 19 | 
 20 | **Gemini CLI Issues**:
 21 | - OAuth browser flow interrupts automation
 22 | - Sequential processing (slow for multiple checks)
 23 | - Rate limiting for complex analysis
 24 | 
 25 | **Amp CLI Solution**:
 26 | - File-based prompts (no interactive auth)
 27 | - Parallel processing (multiple Amp instances)
 28 | - Fast inference with execute mode
 29 | - Credit conservation through focused tasks
 30 | 
 31 | ## Amp CLI Integration
 32 | 
 33 | ### File-Based Workflow
 34 | 
 35 | ```
 36 | 1. Create prompt → .claude/amp/prompts/pending/{uuid}.json
 37 | 2. User runs → amp @.claude/amp/prompts/pending/{uuid}.json
 38 | 3. Amp writes → .claude/amp/responses/ready/{uuid}.json
 39 | 4. Scripts read → Aggregate results
 40 | ```
 41 | 
 42 | ### Parallel Execution Pattern
 43 | 
 44 | ```bash
 45 | # Launch multiple Amp tasks in parallel
 46 | amp @prompts/pending/complexity-{uuid}.json > /tmp/amp-complexity.log 2>&1 &
 47 | amp @prompts/pending/security-{uuid}.json > /tmp/amp-security.log 2>&1 &
 48 | amp @prompts/pending/typehints-{uuid}.json > /tmp/amp-typehints.log 2>&1 &
 49 | 
 50 | # Wait for all to complete
 51 | wait
 52 | 
 53 | # Collect results
 54 | bash scripts/pr/amp_collect_results.sh --timeout 300
 55 | ```
 56 | 
 57 | ## Shell Scripts
 58 | 
 59 | ### 1. Quality Gate (Parallel Checks)
 60 | 
 61 | **File**: `scripts/pr/amp_quality_gate.sh`
 62 | 
 63 | Launches parallel Amp instances for:
 64 | - Complexity scoring (functions >7)
 65 | - Security vulnerabilities (SQL injection, XSS, command injection)
 66 | - Type hint coverage
 67 | - Import organization
 68 | 
 69 | **Usage**:
 70 | ```bash
 71 | bash scripts/pr/amp_quality_gate.sh <PR_NUMBER>
 72 | ```
 73 | 
 74 | **Output**: Quality gate pass/fail with detailed breakdown
 75 | 
 76 | ### 2. Result Collection
 77 | 
 78 | **File**: `scripts/pr/amp_collect_results.sh`
 79 | 
 80 | Polls `.claude/amp/responses/ready/` for completed Amp analyses.
 81 | 
 82 | **Usage**:
 83 | ```bash
 84 | bash scripts/pr/amp_collect_results.sh --timeout 300 --uuids "uuid1,uuid2,uuid3"
 85 | ```
 86 | 
 87 | **Features**:
 88 | - Timeout handling (default: 5 minutes)
 89 | - Partial results if some tasks fail
 90 | - JSON aggregation
 91 | 
 92 | ### 3. Fix Suggestions
 93 | 
 94 | **File**: `scripts/pr/amp_suggest_fixes.sh`
 95 | 
 96 | Analyzes review feedback and generates fix suggestions (no auto-apply).
 97 | 
 98 | **Usage**:
 99 | ```bash
100 | bash scripts/pr/amp_suggest_fixes.sh <PR_NUMBER>
101 | ```
102 | 
103 | **Output**: Suggested fixes saved to `/tmp/amp_fixes_{PR_NUMBER}.txt`
104 | 
105 | ### 4. Test Generation
106 | 
107 | **File**: `scripts/pr/amp_generate_tests.sh`
108 | 
109 | Creates pytest tests for changed Python files.
110 | 
111 | **Usage**:
112 | ```bash
113 | bash scripts/pr/amp_generate_tests.sh <PR_NUMBER>
114 | ```
115 | 
116 | **Output**: Test files written to `/tmp/amp_tests/test_*.py`
117 | 
118 | ### 5. Breaking Change Detection
119 | 
120 | **File**: `scripts/pr/amp_detect_breaking_changes.sh`
121 | 
122 | Analyzes API changes for breaking modifications.
123 | 
124 | **Usage**:
125 | ```bash
126 | bash scripts/pr/amp_detect_breaking_changes.sh <BASE_BRANCH> <HEAD_BRANCH>
127 | ```
128 | 
129 | **Output**: Breaking changes report with severity (CRITICAL/HIGH/MEDIUM)
130 | 
131 | ### 6. Complete PR Review Workflow
132 | 
133 | **File**: `scripts/pr/amp_pr_review.sh`
134 | 
135 | Orchestrates full PR review cycle:
136 | 1. Quality gate checks
137 | 2. Test generation
138 | 3. Breaking change detection
139 | 4. Fix suggestions
140 | 
141 | **Usage**:
142 | ```bash
143 | bash scripts/pr/amp_pr_review.sh <PR_NUMBER>
144 | ```
145 | 
146 | ## Operational Workflows
147 | 
148 | ### 1. Pre-PR Quality Check (Developer-Driven)
149 | 
150 | ```bash
151 | # Before creating PR, run quality checks
152 | bash scripts/pr/amp_quality_gate.sh 0  # Use 0 for local branch
153 | 
154 | # Review results
155 | cat /tmp/amp_quality_results.json | jq '.summary'
156 | 
157 | # Address issues before creating PR
158 | ```
159 | 
160 | ### 2. Post-PR Analysis (Review Automation)
161 | 
162 | ```bash
163 | # After PR created, run complete analysis
164 | bash scripts/pr/amp_pr_review.sh 215
165 | 
166 | # Review outputs:
167 | # - /tmp/amp_quality_results.json
168 | # - /tmp/amp_tests/
169 | # - /tmp/amp_fixes_215.txt
170 | # - /tmp/amp_breaking_changes.txt
171 | ```
172 | 
173 | ### 3. Incremental Iteration (Fix → Recheck)
174 | 
175 | ```bash
176 | # After applying fixes, re-run quality gate
177 | bash scripts/pr/amp_quality_gate.sh 215
178 | 
179 | # Compare before/after
180 | diff /tmp/amp_quality_results_v1.json /tmp/amp_quality_results_v2.json
181 | ```
182 | 
183 | ## Decision-Making Framework
184 | 
185 | ### When to Use amp-pr-automator vs gemini-pr-automator
186 | 
187 | | Scenario | Use amp-pr-automator | Use gemini-pr-automator |
188 | |----------|---------------------|------------------------|
189 | | **Pre-PR checks** | ✅ Fast parallel analysis | ❌ OAuth interrupts flow |
190 | | **Developer-driven** | ✅ File-based control | ❌ Requires manual OAuth |
191 | | **CI/CD integration** | ✅ No browser needed | ❌ OAuth not CI-friendly |
192 | | **Auto-fix application** | ❌ Manual fixes only | ✅ Full automation |
193 | | **Inline comment handling** | ❌ No GitHub integration | ✅ GraphQL thread resolution |
194 | | **Complex iteration** | ❌ Manual workflow | ✅ Full review loop |
195 | 
196 | **Use amp-pr-automator for**:
197 | - Pre-PR quality checks (before creating PR)
198 | - Developer-driven analysis (you control timing)
199 | - Parallel processing (multiple checks simultaneously)
200 | - OAuth-free automation (CI/CD, scripts)
201 | 
202 | **Use gemini-pr-automator for**:
203 | - Full automated review loops
204 | - Auto-fix application
205 | - GitHub inline comment handling
206 | - Continuous watch mode
207 | 
208 | ### Hybrid Approach (RECOMMENDED)
209 | 
210 | ```bash
211 | # 1. Pre-PR: Use Amp for quality gate
212 | bash scripts/pr/amp_quality_gate.sh 0
213 | 
214 | # 2. Create PR (github-release-manager)
215 | gh pr create --title "feat: new feature" --body "..."
216 | 
217 | # 3. Post-PR: Use Gemini for automated review
218 | bash scripts/pr/auto_review.sh 215 5 true
219 | ```
220 | 
221 | ## Prompt Engineering for Amp
222 | 
223 | ### Complexity Analysis Prompt
224 | 
225 | ```
226 | Analyze code complexity for each function in this file.
227 | 
228 | Rating scale: 1-10 (1=simple, 10=very complex)
229 | 
230 | ONLY report functions with score >7 in this exact format:
231 | FunctionName: Score X - Reason
232 | 
233 | If all functions score ≤7, respond: "COMPLEXITY_OK"
234 | 
235 | File content:
236 | {file_content}
237 | ```
238 | 
239 | ### Security Scan Prompt
240 | 
241 | ```
242 | Security audit for vulnerabilities:
243 | - SQL injection (raw SQL, string formatting in queries)
244 | - XSS (unescaped HTML output)
245 | - Command injection (os.system, subprocess with shell=True)
246 | - Path traversal (user input in file paths)
247 | - Hardcoded secrets (API keys, passwords)
248 | 
249 | IMPORTANT: Output format:
250 | - If ANY vulnerability found: VULNERABILITY_DETECTED: [type]
251 | - If NO vulnerabilities: SECURITY_CLEAN
252 | 
253 | File content:
254 | {file_content}
255 | ```
256 | 
257 | ### Type Hint Coverage Prompt
258 | 
259 | ```
260 | Check type hint coverage for this Python file.
261 | 
262 | Report:
263 | 1. Total functions/methods
264 | 2. Functions with complete type hints
265 | 3. Functions missing type hints (list names)
266 | 4. Coverage percentage
267 | 
268 | Output format:
269 | COVERAGE: X%
270 | MISSING: function1, function2, ...
271 | 
272 | File content:
273 | {file_content}
274 | ```
275 | 
276 | ## Integration with Other Agents
277 | 
278 | ### github-release-manager
279 | - Creates PRs → amp-pr-automator runs pre-PR checks
280 | - Merges PRs → amp-pr-automator validates quality gates
281 | 
282 | ### gemini-pr-automator
283 | - amp-pr-automator runs quality gate first
284 | - If passed, gemini-pr-automator handles review iteration
285 | 
286 | ### code-quality-guard
287 | - Pre-commit hooks use Groq/Gemini for local checks
288 | - amp-pr-automator for PR-level analysis
289 | 
290 | ## Project-Specific Patterns
291 | 
292 | ### MCP Memory Service PR Standards
293 | 
294 | **Quality Gate Requirements**:
295 | - ✅ Code complexity ≤7 for all functions
296 | - ✅ No security vulnerabilities
297 | - ✅ Type hints on new functions (80% coverage)
298 | - ✅ Import organization (stdlib → third-party → local)
299 | 
300 | **File-Based Workflow Benefits**:
301 | - Developer reviews prompt before running Amp
302 | - Amp responses saved for audit trail
303 | - Easy to re-run specific checks
304 | - No OAuth interruptions during work
305 | 
306 | ## Usage Examples
307 | 
308 | ### Quick Quality Check
309 | 
310 | ```bash
311 | # Run quality gate for PR #215
312 | bash scripts/pr/amp_quality_gate.sh 215
313 | 
314 | # Wait for prompts to be created
315 | # Review prompts: ls -la .claude/amp/prompts/pending/
316 | 
317 | # Run each Amp task shown in output
318 | amp @.claude/amp/prompts/pending/{complexity-uuid}.json &
319 | amp @.claude/amp/prompts/pending/{security-uuid}.json &
320 | amp @.claude/amp/prompts/pending/{typehints-uuid}.json &
321 | 
322 | # Collect results
323 | bash scripts/pr/amp_collect_results.sh --timeout 300
324 | ```
325 | 
326 | ### Generate Tests Only
327 | 
328 | ```bash
329 | # Generate tests for PR #215
330 | bash scripts/pr/amp_generate_tests.sh 215
331 | 
332 | # Run Amp task
333 | amp @.claude/amp/prompts/pending/{tests-uuid}.json
334 | 
335 | # Review generated tests
336 | ls -la /tmp/amp_tests/
337 | ```
338 | 
339 | ### Breaking Change Detection
340 | 
341 | ```bash
342 | # Check for breaking changes
343 | bash scripts/pr/amp_detect_breaking_changes.sh main feature/new-api
344 | 
345 | # Run Amp task
346 | amp @.claude/amp/prompts/pending/{breaking-uuid}.json
347 | 
348 | # View report
349 | cat /tmp/amp_breaking_changes.txt
350 | ```
351 | 
352 | ## Best Practices
353 | 
354 | 1. **Review Prompts Before Running**: Inspect `.claude/amp/prompts/pending/` to verify Amp tasks
355 | 2. **Parallel Execution**: Launch multiple Amp instances for speed
356 | 3. **Timeout Handling**: Use `amp_collect_results.sh --timeout` to prevent indefinite waits
357 | 4. **Incremental Checks**: Re-run specific checks (complexity only, security only) as needed
358 | 5. **Audit Trail**: Keep Amp responses in `.claude/amp/responses/consumed/` for review
359 | 6. **Hybrid Workflow**: Use Amp for pre-PR, Gemini for post-PR automation
360 | 
361 | ## Limitations
362 | 
363 | - **No Auto-Fix**: Amp suggests fixes, manual application required
364 | - **No GitHub Integration**: Cannot resolve PR review threads automatically
365 | - **Manual Workflow**: User must run Amp commands (not fully automated)
366 | - **Credit Consumption**: Still uses Amp API credits (separate from Claude Code)
367 | - **Context Limits**: Large files may need chunking for Amp analysis
368 | 
369 | ## Performance Considerations
370 | 
371 | - **Parallel Processing**: 3-5 Amp tasks in parallel = ~2-3 minutes total
372 | - **Sequential (Gemini)**: Same checks = ~10-15 minutes
373 | - **Time Savings**: 70-80% faster for quality gate checks
374 | - **Credit Efficiency**: Focused prompts consume fewer tokens
375 | 
376 | ## Success Metrics
377 | 
378 | - ✅ **Speed**: Quality gate completes in <3 minutes (vs 10-15 with Gemini)
379 | - ✅ **No OAuth**: Zero browser interruptions during PR workflow
380 | - ✅ **Parallel Efficiency**: 5 checks run simultaneously
381 | - ✅ **Developer Control**: File-based workflow allows prompt inspection
382 | - ✅ **Audit Trail**: All prompts/responses saved for review
383 | 
384 | ---
385 | 
386 | **Quick Reference Card**:
387 | 
388 | ```bash
389 | # Quality gate (parallel checks)
390 | bash scripts/pr/amp_quality_gate.sh <PR_NUMBER>
391 | 
392 | # Collect Amp results
393 | bash scripts/pr/amp_collect_results.sh --timeout 300
394 | 
395 | # Generate tests
396 | bash scripts/pr/amp_generate_tests.sh <PR_NUMBER>
397 | 
398 | # Suggest fixes
399 | bash scripts/pr/amp_suggest_fixes.sh <PR_NUMBER>
400 | 
401 | # Breaking changes
402 | bash scripts/pr/amp_detect_breaking_changes.sh <BASE> <HEAD>
403 | 
404 | # Complete PR review
405 | bash scripts/pr/amp_pr_review.sh <PR_NUMBER>
406 | ```
407 | 
408 | **Workflow Integration**:
409 | 
410 | ```bash
411 | # Pre-PR: Quality checks (Amp)
412 | bash scripts/pr/amp_quality_gate.sh 0
413 | 
414 | # Create PR
415 | gh pr create --title "feat: X" --body "..."
416 | 
417 | # Post-PR: Automated review (Gemini)
418 | bash scripts/pr/auto_review.sh 215 5 true
419 | ```
420 | 
```

--------------------------------------------------------------------------------
/src/mcp_memory_service/ingestion/chunker.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Intelligent text chunking strategies for document ingestion.
 17 | """
 18 | 
 19 | import re
 20 | import logging
 21 | from typing import List, Dict, Any, Optional, Tuple
 22 | from dataclasses import dataclass
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | @dataclass
 28 | class ChunkingStrategy:
 29 |     """Configuration for text chunking behavior."""
 30 |     chunk_size: int = 1000  # Target characters per chunk
 31 |     chunk_overlap: int = 200  # Characters to overlap between chunks
 32 |     respect_sentence_boundaries: bool = True
 33 |     respect_paragraph_boundaries: bool = True
 34 |     min_chunk_size: int = 100  # Minimum characters for a valid chunk
 35 | 
 36 | 
 37 | class TextChunker:
 38 |     """
 39 |     Intelligent text chunking that respects document structure.
 40 |     
 41 |     Provides multiple chunking strategies:
 42 |     - Sentence-aware chunking
 43 |     - Paragraph-aware chunking  
 44 |     - Token-based chunking
 45 |     - Custom delimiter chunking
 46 |     """
 47 |     
 48 |     def __init__(self, strategy: ChunkingStrategy = None):
 49 |         """
 50 |         Initialize text chunker.
 51 |         
 52 |         Args:
 53 |             strategy: Chunking configuration to use
 54 |         """
 55 |         self.strategy = strategy or ChunkingStrategy()
 56 |         
 57 |         # Sentence boundary patterns
 58 |         self.sentence_endings = re.compile(r'[.!?]+\s+')
 59 |         self.paragraph_separator = re.compile(r'\n\s*\n')
 60 |         
 61 |         # Common section headers (for structured documents)
 62 |         self.section_headers = re.compile(
 63 |             r'^(#{1,6}\s+|Chapter\s+\d+|Section\s+\d+|Part\s+\d+|\d+\.\s+)',
 64 |             re.MULTILINE | re.IGNORECASE
 65 |         )
 66 |     
 67 |     def chunk_text(self, text: str, metadata: Dict[str, Any] = None) -> List[Tuple[str, Dict[str, Any]]]:
 68 |         """
 69 |         Split text into chunks using the configured strategy.
 70 |         
 71 |         Args:
 72 |             text: Text content to chunk
 73 |             metadata: Base metadata to include with each chunk
 74 |             
 75 |         Returns:
 76 |             List of (chunk_text, chunk_metadata) tuples
 77 |         """
 78 |         if not text or len(text.strip()) < self.strategy.min_chunk_size:
 79 |             return []
 80 |         
 81 |         metadata = metadata or {}
 82 |         
 83 |         # Try different chunking strategies in order of preference
 84 |         if self.strategy.respect_paragraph_boundaries:
 85 |             chunks = self._chunk_by_paragraphs(text)
 86 |         elif self.strategy.respect_sentence_boundaries:
 87 |             chunks = self._chunk_by_sentences(text)
 88 |         else:
 89 |             chunks = self._chunk_by_characters(text)
 90 |         
 91 |         # Add metadata to each chunk
 92 |         result = []
 93 |         for i, chunk_text in enumerate(chunks):
 94 |             chunk_metadata = metadata.copy()
 95 |             chunk_metadata.update({
 96 |                 'chunk_index': i,
 97 |                 'chunk_length': len(chunk_text),
 98 |                 'total_chunks': len(chunks),
 99 |                 'chunking_strategy': self._get_strategy_name()
100 |             })
101 |             result.append((chunk_text, chunk_metadata))
102 |         
103 |         logger.debug(f"Created {len(result)} chunks from {len(text)} characters")
104 |         return result
105 |     
106 |     def _chunk_by_paragraphs(self, text: str) -> List[str]:
107 |         """
108 |         Chunk text by paragraph boundaries, respecting size limits.
109 |         
110 |         Args:
111 |             text: Text to chunk
112 |             
113 |         Returns:
114 |             List of text chunks
115 |         """
116 |         paragraphs = self.paragraph_separator.split(text)
117 |         chunks = []
118 |         current_chunk = ""
119 |         
120 |         for paragraph in paragraphs:
121 |             paragraph = paragraph.strip()
122 |             if not paragraph:
123 |                 continue
124 |             
125 |             # If adding this paragraph would exceed chunk size
126 |             if (len(current_chunk) + len(paragraph) + 2 > self.strategy.chunk_size 
127 |                 and len(current_chunk) > 0):
128 |                 
129 |                 # Finalize current chunk
130 |                 if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
131 |                     chunks.append(current_chunk.strip())
132 |                 
133 |                 # Start new chunk with overlap
134 |                 overlap = self._get_overlap_text(current_chunk)
135 |                 current_chunk = overlap + paragraph
136 |             else:
137 |                 # Add paragraph to current chunk
138 |                 if current_chunk:
139 |                     current_chunk += "\n\n" + paragraph
140 |                 else:
141 |                     current_chunk = paragraph
142 |         
143 |         # Add remaining text
144 |         if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
145 |             chunks.append(current_chunk.strip())
146 |         
147 |         return chunks
148 |     
149 |     def _chunk_by_sentences(self, text: str) -> List[str]:
150 |         """
151 |         Chunk text by sentence boundaries, respecting size limits.
152 |         
153 |         Args:
154 |             text: Text to chunk
155 |             
156 |         Returns:
157 |             List of text chunks
158 |         """
159 |         sentences = self.sentence_endings.split(text)
160 |         chunks = []
161 |         current_chunk = ""
162 |         
163 |         for sentence in sentences:
164 |             sentence = sentence.strip()
165 |             if not sentence:
166 |                 continue
167 |             
168 |             # If adding this sentence would exceed chunk size
169 |             if (len(current_chunk) + len(sentence) + 1 > self.strategy.chunk_size 
170 |                 and len(current_chunk) > 0):
171 |                 
172 |                 # Finalize current chunk
173 |                 if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
174 |                     chunks.append(current_chunk.strip())
175 |                 
176 |                 # Start new chunk with overlap
177 |                 overlap = self._get_overlap_text(current_chunk)
178 |                 current_chunk = overlap + sentence
179 |             else:
180 |                 # Add sentence to current chunk
181 |                 if current_chunk:
182 |                     current_chunk += " " + sentence
183 |                 else:
184 |                     current_chunk = sentence
185 |         
186 |         # Add remaining text
187 |         if len(current_chunk.strip()) >= self.strategy.min_chunk_size:
188 |             chunks.append(current_chunk.strip())
189 |         
190 |         return chunks
191 |     
192 |     def _chunk_by_characters(self, text: str) -> List[str]:
193 |         """
194 |         Chunk text by character count with overlap.
195 |         
196 |         Args:
197 |             text: Text to chunk
198 |             
199 |         Returns:
200 |             List of text chunks
201 |         """
202 |         if len(text) <= self.strategy.chunk_size:
203 |             return [text]
204 |         
205 |         chunks = []
206 |         start = 0
207 |         
208 |         while start < len(text):
209 |             end = start + self.strategy.chunk_size
210 |             
211 |             # If this is not the last chunk, try to find a good break point
212 |             if end < len(text):
213 |                 # Look for space to avoid breaking words
214 |                 for i in range(end, max(start + self.strategy.min_chunk_size, end - 100), -1):
215 |                     if text[i].isspace():
216 |                         end = i
217 |                         break
218 |             
219 |             chunk = text[start:end].strip()
220 |             if len(chunk) >= self.strategy.min_chunk_size:
221 |                 chunks.append(chunk)
222 |             
223 |             # Move start position with overlap
224 |             start = max(start + 1, end - self.strategy.chunk_overlap)
225 |         
226 |         return chunks
227 |     
228 |     def _get_overlap_text(self, text: str) -> str:
229 |         """
230 |         Get overlap text from the end of a chunk.
231 |         
232 |         Args:
233 |             text: Text to extract overlap from
234 |             
235 |         Returns:
236 |             Overlap text to include in next chunk
237 |         """
238 |         if len(text) <= self.strategy.chunk_overlap:
239 |             return text + " "
240 |         
241 |         overlap = text[-self.strategy.chunk_overlap:]
242 |         
243 |         # Try to start overlap at a sentence boundary
244 |         sentences = self.sentence_endings.split(overlap)
245 |         if len(sentences) > 1:
246 |             overlap = " ".join(sentences[1:])
247 |         
248 |         return overlap + " " if overlap else ""
249 |     
250 |     def _get_strategy_name(self) -> str:
251 |         """Get human-readable name for current chunking strategy."""
252 |         if self.strategy.respect_paragraph_boundaries:
253 |             return "paragraph_aware"
254 |         elif self.strategy.respect_sentence_boundaries:
255 |             return "sentence_aware"
256 |         else:
257 |             return "character_based"
258 |     
259 |     def chunk_by_sections(self, text: str, metadata: Dict[str, Any] = None) -> List[Tuple[str, Dict[str, Any]]]:
260 |         """
261 |         Chunk text by document sections (headers, chapters, etc.).
262 |         
263 |         Args:
264 |             text: Text content to chunk
265 |             metadata: Base metadata to include with each chunk
266 |             
267 |         Returns:
268 |             List of (chunk_text, chunk_metadata) tuples
269 |         """
270 |         metadata = metadata or {}
271 |         
272 |         # Find section boundaries
273 |         section_matches = list(self.section_headers.finditer(text))
274 |         if not section_matches:
275 |             # No sections found, use regular chunking
276 |             return self.chunk_text(text, metadata)
277 |         
278 |         chunks = []
279 |         section_start = 0
280 |         
281 |         for i, match in enumerate(section_matches):
282 |             section_end = match.start()
283 |             
284 |             # Extract previous section if it exists
285 |             if section_start < section_end:
286 |                 section_text = text[section_start:section_end].strip()
287 |                 if len(section_text) >= self.strategy.min_chunk_size:
288 |                     section_metadata = metadata.copy()
289 |                     section_metadata.update({
290 |                         'section_index': i,
291 |                         'is_section': True,
292 |                         'section_start': section_start,
293 |                         'section_end': section_end
294 |                     })
295 |                     
296 |                     # If section is too large, sub-chunk it
297 |                     if len(section_text) > self.strategy.chunk_size * 2:
298 |                         sub_chunks = self.chunk_text(section_text, section_metadata)
299 |                         chunks.extend(sub_chunks)
300 |                     else:
301 |                         chunks.append((section_text, section_metadata))
302 |             
303 |             section_start = match.start()
304 |         
305 |         # Handle final section
306 |         if section_start < len(text):
307 |             final_text = text[section_start:].strip()
308 |             if len(final_text) >= self.strategy.min_chunk_size:
309 |                 final_metadata = metadata.copy()
310 |                 final_metadata.update({
311 |                     'section_index': len(section_matches),
312 |                     'is_section': True,
313 |                     'section_start': section_start,
314 |                     'section_end': len(text)
315 |                 })
316 |                 
317 |                 if len(final_text) > self.strategy.chunk_size * 2:
318 |                     sub_chunks = self.chunk_text(final_text, final_metadata)
319 |                     chunks.extend(sub_chunks)
320 |                 else:
321 |                     chunks.append((final_text, final_metadata))
322 |         
323 |         return chunks
```

--------------------------------------------------------------------------------
/docs/maintenance/memory-maintenance.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Memory Maintenance Guide
  2 | 
  3 | A comprehensive guide for maintaining and organizing your MCP Memory Service knowledge base through systematic review, analysis, and re-categorization processes.
  4 | 
  5 | ## 🎯 Overview
  6 | 
  7 | Memory maintenance is essential for keeping your knowledge base organized, searchable, and valuable over time. This guide provides practical workflows for identifying poorly organized memories and transforming them into a well-structured knowledge system.
  8 | 
  9 | ## 📋 Quick Start
 10 | 
 11 | ### Basic Maintenance Session
 12 | 
 13 | 1. **Identify untagged memories**: `retrieve_memory({"query": "untagged memories", "n_results": 20})`
 14 | 2. **Analyze content themes**: Look for projects, technologies, activities, status indicators
 15 | 3. **Apply standardized tags**: Use consistent categorization schema
 16 | 4. **Replace old memories**: Create new tagged version, delete old untagged version
 17 | 5. **Document results**: Store summary of maintenance session
 18 | 
 19 | ### Maintenance Schedule Recommendations
 20 | 
 21 | - **Weekly**: Review memories from past 7 days
 22 | - **Monthly**: Comprehensive review of recent memories + spot check older ones
 23 | - **Quarterly**: Full database health check and optimization
 24 | 
 25 | ## 🔍 Step-by-Step Maintenance Process
 26 | 
 27 | ### Phase 1: Assessment and Planning
 28 | 
 29 | #### 1.1 Database Health Check
 30 | 
 31 | ```javascript
 32 | // Check overall database status
 33 | check_database_health()
 34 | ```
 35 | 
 36 | **What to look for:**
 37 | - Total memory count
 38 | - Database health status
 39 | - Recent activity patterns
 40 | - Error indicators
 41 | 
 42 | #### 1.2 Identify Untagged Memories
 43 | 
 44 | **Search Strategy:**
 45 | ```javascript
 46 | // Primary search for untagged memories
 47 | retrieve_memory({
 48 |   "n_results": 15,
 49 |   "query": "untagged memories without tags minimal tags single tag"
 50 | })
 51 | 
 52 | // Alternative searches
 53 | retrieve_memory({"query": "test memory basic simple concept", "n_results": 20})
 54 | recall_memory({"query": "memories from last week", "n_results": 25})
 55 | ```
 56 | 
 57 | **Identification Criteria:**
 58 | - Memories with no tags
 59 | - Memories with only generic tags (`test`, `memory`, `note`)
 60 | - Memories with inconsistent tag formats
 61 | - Old memories that predate tag standardization
 62 | 
 63 | #### 1.3 Categorize by Priority
 64 | 
 65 | **High Priority:**
 66 | - Frequently accessed memories
 67 | - Critical project information
 68 | - Recent important developments
 69 | 
 70 | **Medium Priority:**
 71 | - Historical documentation
 72 | - Reference materials
 73 | - Tutorial content
 74 | 
 75 | **Low Priority:**
 76 | - Test memories (evaluate for deletion)
 77 | - Outdated information
 78 | - Duplicate content
 79 | 
 80 | ### Phase 2: Analysis and Categorization
 81 | 
 82 | #### 2.1 Content Theme Analysis
 83 | 
 84 | For each identified memory, analyze:
 85 | 
 86 | **Project Context:**
 87 | - Which project does this relate to?
 88 | - Is it part of a larger initiative?
 89 | - What's the project phase/status?
 90 | 
 91 | **Technology Stack:**
 92 | - Programming languages mentioned
 93 | - Frameworks and libraries
 94 | - Tools and platforms
 95 | - Databases and services
 96 | 
 97 | **Activity Type:**
 98 | - Development work
 99 | - Testing and debugging
100 | - Documentation
101 | - Research and planning
102 | - Issue resolution
103 | 
104 | **Content Classification:**
105 | - Concept or idea
106 | - Tutorial or guide
107 | - Reference material
108 | - Troubleshooting solution
109 | - Best practice
110 | 
111 | #### 2.2 Tag Assignment Strategy
112 | 
113 | **Multi-Category Tagging:**
114 | Apply tags from multiple categories for comprehensive organization:
115 | 
116 | ```javascript
117 | // Example: Well-tagged memory
118 | {
119 |   "tags": [
120 |     "mcp-memory-service",     // Project
121 |     "python", "chromadb",     // Technologies
122 |     "debugging", "testing",   // Activities
123 |     "resolved",               // Status
124 |     "backend",               // Domain
125 |     "troubleshooting"        // Content type
126 |   ]
127 | }
128 | ```
129 | 
130 | **Tag Selection Guidelines:**
131 | 
132 | 1. **Start with Project/Context**: What's the main project or domain?
133 | 2. **Add Technology Tags**: What tools, languages, or frameworks?
134 | 3. **Include Activity Tags**: What was being done?
135 | 4. **Specify Status**: What's the current state?
136 | 5. **Add Content Type**: What kind of information is this?
137 | 
138 | ### Phase 3: Implementation
139 | 
140 | #### 3.1 Memory Re-tagging Process
141 | 
142 | **For each memory to be re-tagged:**
143 | 
144 | 1. **Copy Content**: Preserve exact content
145 | 2. **Create New Memory**: With improved tags
146 | 3. **Verify Storage**: Confirm new memory exists
147 | 4. **Delete Old Memory**: Remove untagged version
148 | 5. **Document Change**: Record in maintenance log
149 | 
150 | **Example Implementation:**
151 | ```javascript
152 | // Step 1: Create properly tagged memory
153 | store_memory({
154 |   "content": "TEST: Timestamp debugging memory created for issue #7 investigation",
155 |   "metadata": {
156 |     "tags": ["test", "debugging", "issue-7", "timestamp-test", "mcp-memory-service", "verification"],
157 |     "type": "debug-test"
158 |   }
159 | })
160 | 
161 | // Step 2: Delete old untagged memory
162 | delete_memory({
163 |   "content_hash": "b3f874baee0c1261907c8f80c3e33d1977485f66c17078ed611b6f1c744cb1f8"
164 | })
165 | ```
166 | 
167 | #### 3.2 Batch Processing Tips
168 | 
169 | **Efficiency Strategies:**
170 | - Group similar memories for consistent tagging
171 | - Use template patterns for common memory types
172 | - Process one category at a time (e.g., all test memories)
173 | - Take breaks between batches to maintain quality
174 | 
175 | **Quality Control:**
176 | - Double-check tag spelling and format
177 | - Verify content hasn't been modified
178 | - Confirm old memory deletion
179 | - Test search functionality with new tags
180 | 
181 | ### Phase 4: Verification and Documentation
182 | 
183 | #### 4.1 Verification Checklist
184 | 
185 | **After each memory:**
186 | - [ ] New memory stored successfully
187 | - [ ] Tags applied correctly
188 | - [ ] Old memory deleted
189 | - [ ] Search returns new memory
190 | 
191 | **After maintenance session:**
192 | - [ ] All targeted memories processed
193 | - [ ] Database health check passed
194 | - [ ] No orphaned or broken memories
195 | - [ ] Search functionality improved
196 | 
197 | #### 4.2 Session Documentation
198 | 
199 | **Create maintenance summary memory:**
200 | ```javascript
201 | store_memory({
202 |   "content": "Memory Maintenance Session - [Date]: Successfully processed X memories...",
203 |   "metadata": {
204 |     "tags": ["memory-maintenance", "session-summary", "tag-management"],
205 |     "type": "maintenance-record"
206 |   }
207 | })
208 | ```
209 | 
210 | **Include in summary:**
211 | - Number of memories processed
212 | - Categories addressed
213 | - Tag patterns applied
214 | - Time investment
215 | - Quality improvements
216 | - Next steps identified
217 | 
218 | ## 🎯 Common Maintenance Scenarios
219 | 
220 | ### Scenario 1: Test Memory Cleanup
221 | 
222 | **Situation**: Numerous test memories from development work
223 | 
224 | **Approach:**
225 | 1. Identify all test-related memories
226 | 2. Evaluate each for permanent value
227 | 3. Re-tag valuable tests with specific context
228 | 4. Delete obsolete or redundant tests
229 | 
230 | **Example tags for valuable tests:**
231 | ```
232 | ["test", "verification", "issue-7", "timestamp-test", "mcp-memory-service", "quality-assurance"]
233 | ```
234 | 
235 | ### Scenario 2: Project Documentation Organization
236 | 
237 | **Situation**: Project memories scattered without clear organization
238 | 
239 | **Approach:**
240 | 1. Group by project phase (planning, development, deployment)
241 | 2. Add temporal context (month/quarter)
242 | 3. Include status information
243 | 4. Link related memories with consistent tags
244 | 
245 | **Tag patterns:**
246 | ```
247 | Project memories: ["project-name", "phase", "technology", "status", "domain"]
248 | Meeting notes: ["meeting", "project-name", "date", "decisions", "action-items"]
249 | ```
250 | 
251 | ### Scenario 3: Technical Solution Archive
252 | 
253 | **Situation**: Troubleshooting solutions need better organization
254 | 
255 | **Approach:**
256 | 1. Categorize by technology/platform
257 | 2. Add problem domain tags
258 | 3. Include resolution status
259 | 4. Tag with difficulty/complexity
260 | 
261 | **Example organization:**
262 | ```
263 | ["troubleshooting", "python", "chromadb", "connection-issues", "resolved", "backend"]
264 | ```
265 | 
266 | ## 🛠️ Maintenance Tools and Scripts
267 | 
268 | ### Helper Queries
269 | 
270 | **Find potentially untagged memories:**
271 | ```javascript
272 | // Various search approaches
273 | retrieve_memory({"query": "test simple basic example", "n_results": 20})
274 | recall_memory({"query": "memories from last month", "n_results": 30})
275 | search_by_tag({"tags": ["test"]}) // Review generic tags
276 | ```
277 | 
278 | **Content pattern analysis:**
279 | ```javascript
280 | // Look for specific patterns that need organization
281 | retrieve_memory({"query": "TODO FIXME DEBUG ERROR", "n_results": 15})
282 | retrieve_memory({"query": "issue bug problem solution", "n_results": 15})
283 | ```
284 | 
285 | ### Batch Processing Templates
286 | 
287 | **Standard test memory re-tagging:**
288 | ```javascript
289 | const testMemoryPattern = {
290 |   "tags": ["test", "[specific-function]", "[project]", "[domain]", "verification"],
291 |   "type": "test-record"
292 | }
293 | ```
294 | 
295 | **Documentation memory pattern:**
296 | ```javascript
297 | const documentationPattern = {
298 |   "tags": ["documentation", "[project]", "[topic]", "[technology]", "reference"],
299 |   "type": "documentation"
300 | }
301 | ```
302 | 
303 | ## 📊 Maintenance Metrics
304 | 
305 | ### Success Indicators
306 | 
307 | **Quantitative Metrics:**
308 | - Percentage of tagged memories
309 | - Search result relevance improvement
310 | - Time to find specific information
311 | - Memory retrieval accuracy
312 | 
313 | **Qualitative Metrics:**
314 | - Ease of knowledge discovery
315 | - Consistency of organization
316 | - Usefulness of search results
317 | - Overall system usability
318 | 
319 | ### Progress Tracking
320 | 
321 | **Session Metrics:**
322 | - Memories processed per hour
323 | - Categories organized
324 | - Tag patterns established
325 | - Quality improvements achieved
326 | 
327 | **Long-term Tracking:**
328 | - Monthly maintenance time investment
329 | - Database organization score
330 | - Knowledge retrieval efficiency
331 | - User satisfaction with search
332 | 
333 | ## 🔄 Recurring Maintenance
334 | 
335 | ### Weekly Maintenance (15-30 minutes)
336 | 
337 | ```
338 | Weekly Memory Maintenance:
339 | 1. Recall memories from 'last week'
340 | 2. Identify any untagged or poorly tagged items
341 | 3. Apply quick categorization
342 | 4. Focus on recent work and current projects
343 | 5. Update any status changes (resolved issues, completed tasks)
344 | ```
345 | 
346 | ### Monthly Maintenance (1-2 hours)
347 | 
348 | ```
349 | Monthly Memory Maintenance:
350 | 1. Comprehensive review of recent memories
351 | 2. Spot check older memories for organization
352 | 3. Update project status tags
353 | 4. Consolidate related memories
354 | 5. Archive or delete obsolete information
355 | 6. Generate maintenance summary report
356 | ```
357 | 
358 | ### Quarterly Maintenance (2-4 hours)
359 | 
360 | ```
361 | Quarterly Memory Maintenance:
362 | 1. Full database health assessment
363 | 2. Tag schema review and updates
364 | 3. Memory consolidation and cleanup
365 | 4. Performance optimization
366 | 5. Backup and archival processes
367 | 6. Strategic knowledge organization review
368 | ```
369 | 
370 | ## 🎯 Best Practices
371 | 
372 | ### Do's
373 | 
374 | ✅ **Process regularly**: Small, frequent sessions beat large overhauls
375 | ✅ **Use consistent patterns**: Develop standard approaches for common scenarios
376 | ✅ **Document decisions**: Record maintenance choices for future reference
377 | ✅ **Verify thoroughly**: Always confirm changes worked as expected
378 | ✅ **Focus on value**: Prioritize high-impact memories first
379 | 
380 | ### Don'ts
381 | 
382 | ❌ **Rush the process**: Quality categorization takes time
383 | ❌ **Change content**: Only modify tags and metadata, preserve original content
384 | ❌ **Delete without backup**: Ensure new memory is stored before deleting old
385 | ❌ **Ignore verification**: Always test that maintenance improved functionality
386 | ❌ **Work when tired**: Categorization quality suffers with fatigue
387 | 
388 | ## 🚀 Advanced Techniques
389 | 
390 | ### Automated Assistance
391 | 
392 | **Use semantic search for tag suggestions:**
393 | ```javascript
394 | // Find similar memories for tag pattern ideas
395 | retrieve_memory({"query": "[memory content excerpt]", "n_results": 5})
396 | ```
397 | 
398 | **Pattern recognition:**
399 | ```javascript
400 | // Identify common themes for standardization
401 | search_by_tag({"tags": ["technology-name"]})  // See existing patterns
402 | ```
403 | 
404 | ### Integration Workflows
405 | 
406 | **Connect with external tools:**
407 | - Export tagged memories for documentation systems
408 | - Sync with project management tools
409 | - Generate reports for team sharing
410 | - Create knowledge graphs from tag relationships
411 | 
412 | ---
413 | 
414 | *This guide provides the foundation for maintaining a professional-grade knowledge management system. Regular maintenance ensures your MCP Memory Service continues to provide maximum value as your knowledge base grows.*
```

--------------------------------------------------------------------------------
/docs/development/release-checklist.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Release Checklist
  2 | 
  3 | This checklist ensures that critical bugs like the HTTP-MCP bridge issues are caught before release.
  4 | 
  5 | ## Pre-Release Testing
  6 | 
  7 | ### ✅ Core Functionality Tests
  8 | - [ ] **Health Check Endpoints**
  9 |   - [ ] `/api/health` returns 200 with healthy status
 10 |   - [ ] `/health` returns 404 (wrong endpoint)
 11 |   - [ ] Health check works through MCP bridge
 12 |   - [ ] Health check works with Claude Desktop
 13 | 
 14 | - [ ] **Memory Storage Operations**  
 15 |   - [ ] Store memory returns HTTP 200 with `success: true`
 16 |   - [ ] Duplicate detection returns HTTP 200 with `success: false`
 17 |   - [ ] Invalid requests return appropriate error codes
 18 |   - [ ] All operations work through MCP bridge
 19 | 
 20 | - [ ] **API Endpoint Consistency**
 21 |   - [ ] All endpoints use `/api/` prefix correctly
 22 |   - [ ] URL construction doesn't break base paths
 23 |   - [ ] Bridge correctly appends paths to base URL
 24 | 
 25 | ### ✅ HTTP-MCP Bridge Specific Tests
 26 | - [ ] **Status Code Handling**
 27 |   - [ ] Bridge accepts HTTP 200 responses (not just 201)
 28 |   - [ ] Bridge checks `success` field for actual result
 29 |   - [ ] Bridge handles both success and failure in 200 responses
 30 |   
 31 | - [ ] **URL Construction**
 32 |   - [ ] Bridge preserves `/api` base path in URLs
 33 |   - [ ] `new URL()` calls don't replace existing paths
 34 |   - [ ] All API calls reach correct endpoints
 35 | 
 36 | - [ ] **MCP Protocol Compliance**
 37 |   - [ ] `initialize` method works
 38 |   - [ ] `tools/list` returns all tools
 39 |   - [ ] `tools/call` executes correctly
 40 |   - [ ] Error responses are properly formatted
 41 | 
 42 | ### ✅ End-to-End Testing
 43 | - [ ] **Claude Desktop Integration**
 44 |   - [ ] Memory storage through Claude Desktop works
 45 |   - [ ] Memory retrieval through Claude Desktop works
 46 |   - [ ] Health checks show healthy status
 47 |   - [ ] No "unhealthy" false positives
 48 | 
 49 | - [ ] **Remote Server Testing**
 50 |   - [ ] Bridge connects to remote server correctly
 51 |   - [ ] Authentication works with API keys
 52 |   - [ ] All operations work across network
 53 |   - [ ] SSL certificates are handled properly
 54 | 
 55 | ### ✅ Contract Validation
 56 | - [ ] **API Response Formats**
 57 |   - [ ] Memory storage responses match documented format
 58 |   - [ ] Health responses match documented format
 59 |   - [ ] Error responses match documented format
 60 |   - [ ] Search responses match documented format
 61 | 
 62 | - [ ] **Backward Compatibility**
 63 |   - [ ] Existing configurations continue to work
 64 |   - [ ] No breaking changes to client interfaces
 65 |   - [ ] Bridge supports both HTTP 200 and 201 responses
 66 | 
 67 | ## Automated Testing Requirements
 68 | 
 69 | ### ✅ Unit Tests
 70 | - [ ] HTTP-MCP bridge unit tests pass
 71 | - [ ] Mock server responses are realistic
 72 | - [ ] All edge cases are covered
 73 | - [ ] Error conditions are tested
 74 | 
 75 | ### ✅ Integration Tests  
 76 | - [ ] Bridge-server integration tests pass
 77 | - [ ] Contract tests validate API behavior
 78 | - [ ] End-to-end MCP protocol tests pass
 79 | - [ ] Real server connectivity tests pass
 80 | 
 81 | ### ✅ CI/CD Pipeline
 82 | - [ ] Bridge tests run on every commit
 83 | - [ ] Tests block merges if failing
 84 | - [ ] Contract validation passes
 85 | - [ ] Multiple Node.js versions tested
 86 | 
 87 | ## Manual Testing Checklist
 88 | 
 89 | ### ✅ Critical User Paths
 90 | 1. **Claude Desktop User**:
 91 |    - [ ] Install and configure Claude Desktop with MCP Memory Service
 92 |    - [ ] Store a memory using Claude Desktop
 93 |    - [ ] Retrieve memories using Claude Desktop  
 94 |    - [ ] Verify health check shows healthy status
 95 |    - [ ] Confirm no "unhealthy" warnings appear
 96 | 
 97 | 2. **Remote Server User**:
 98 |    - [ ] Configure bridge to connect to remote server
 99 |    - [ ] Test memory operations work correctly
100 |    - [ ] Verify all API endpoints are reachable
101 |    - [ ] Confirm authentication works
102 | 
103 | 3. **API Consumer**:
104 |    - [ ] Test direct HTTP API calls work
105 |    - [ ] Verify response formats match documentation
106 |    - [ ] Test error conditions return expected responses
107 | 
108 | ### ✅ Platform Testing
109 | - [ ] **Windows**: Bridge works with Windows Claude Desktop
110 | - [ ] **macOS**: Bridge works with macOS Claude Desktop  
111 | - [ ] **Linux**: Bridge works with Linux installations
112 | 
113 | ## Code Quality Checks
114 | 
115 | ### ✅ Code Review Requirements
116 | - [ ] All HTTP status code assumptions documented
117 | - [ ] URL construction logic reviewed
118 | - [ ] Error handling covers all scenarios
119 | - [ ] No hardcoded endpoints or assumptions
120 | 
121 | ### ✅ Documentation Updates
122 | - [ ] API contract documentation updated
123 | - [ ] Bridge usage documentation updated
124 | - [ ] Troubleshooting guides updated
125 | - [ ] Breaking changes documented
126 | 
127 | ## Release Process
128 | 
129 | ### ✅ Version Management (3-File Procedure)
130 | - [ ] **Update `src/mcp_memory_service/__init__.py`**
131 |   - [ ] Update `__version__` string (e.g., `"8.17.0"`)
132 |   - [ ] Verify version format follows semantic versioning (MAJOR.MINOR.PATCH)
133 | 
134 | - [ ] **Update `pyproject.toml`**
135 |   - [ ] Update `version` field in `[project]` section
136 |   - [ ] Ensure version matches `__init__.py` exactly
137 | 
138 | - [ ] **Lock dependencies**
139 |   - [ ] Run `uv lock` to update `uv.lock` file
140 |   - [ ] Commit all three files together in version bump commit
141 | 
142 | - [ ] **Semantic Versioning Rules**
143 |   - [ ] MAJOR: Breaking changes (API changes, removed features)
144 |   - [ ] MINOR: New features (backward compatible)
145 |   - [ ] PATCH: Bug fixes (no API changes)
146 | 
147 | ### ✅ CHANGELOG Quality Gates
148 | - [ ] **Format Validation**
149 |   - [ ] Follows [Keep a Changelog](https://keepachangelog.com/) format
150 |   - [ ] Version header includes date: `## [8.17.0] - 2025-11-04`
151 |   - [ ] Changes categorized: Added/Changed/Fixed/Removed/Deprecated/Security
152 | 
153 | - [ ] **Content Requirements**
154 |   - [ ] All user-facing changes documented
155 |   - [ ] Breaking changes clearly marked with **BREAKING**
156 |   - [ ] Performance improvements include metrics (e.g., "50% faster")
157 |   - [ ] Bug fixes reference issue numbers (e.g., "Fixes #123")
158 |   - [ ] Technical details for maintainers in appropriate sections
159 | 
160 | - [ ] **Migration Guidance** (if breaking changes)
161 |   - [ ] Before/after code examples provided
162 |   - [ ] Environment variable changes documented
163 |   - [ ] Database migration scripts linked
164 |   - [ ] Deprecation timeline specified
165 | 
166 | ### ✅ GitHub Workflow Verification
167 | - [ ] **All Workflows Pass** (check Actions tab)
168 |   - [ ] Docker Publish workflow (builds multi-platform images)
169 |   - [ ] Publish and Test workflow (PyPI publish + installation tests)
170 |   - [ ] HTTP-MCP Bridge Tests (validates MCP protocol compliance)
171 |   - [ ] Platform Tests (macOS/Windows/Linux matrix)
172 | 
173 | - [ ] **Docker Images Built**
174 |   - [ ] `mcp-memory-service:latest` tag updated
175 |   - [ ] `mcp-memory-service:v8.x.x` version tag created
176 |   - [ ] Multi-platform images (linux/amd64, linux/arm64)
177 | 
178 | - [ ] **PyPI Package Published**
179 |   - [ ] Package available at https://pypi.org/project/mcp-memory-service/
180 |   - [ ] Installation test passes: `pip install mcp-memory-service==8.x.x`
181 | 
182 | ### ✅ Git Tag and Release
183 | - [ ] **Create annotated Git tag**
184 |   ```bash
185 |   git tag -a v8.x.x -m "Release v8.x.x: Brief description"
186 |   ```
187 |   - [ ] Tag follows `vMAJOR.MINOR.PATCH` format
188 |   - [ ] Tag message summarizes key changes
189 | 
190 | - [ ] **Push tag to remote**
191 |   ```bash
192 |   git push origin v8.x.x
193 |   ```
194 |   - [ ] Tag triggers release workflows
195 | 
196 | - [ ] **Create GitHub Release**
197 |   - [ ] Title: `vx.x.x - Short Description`
198 |   - [ ] Body: Copy relevant CHANGELOG section
199 |   - [ ] Mark as pre-release if RC version
200 |   - [ ] Attach any release artifacts (if applicable)
201 | 
202 | ### ✅ Post-Release Issue Closure
203 | - [ ] **Review Fixed Issues**
204 |   - [ ] Search for issues closed by commits in this release
205 |   - [ ] Verify each issue is actually resolved
206 | 
207 | - [ ] **Close Issues with Context**
208 |   ```markdown
209 |   Resolved in v8.x.x via #PR_NUMBER
210 | 
211 |   [Link to CHANGELOG entry]
212 |   [Link to relevant Wiki page if applicable]
213 | 
214 |   Thank you for reporting this issue!
215 |   ```
216 |   - [ ] Include PR link for traceability
217 |   - [ ] Reference CHANGELOG section
218 |   - [ ] Tag issues with `released` label
219 | 
220 | - [ ] **Update Related Documentation**
221 |   - [ ] Wiki pages updated with new features/fixes
222 |   - [ ] Troubleshooting guides reflect resolved issues
223 |   - [ ] FAQ updated if new common questions emerged
224 | 
225 | ### ✅ Communication
226 | - [ ] Release notes highlight critical fixes
227 | - [ ] Breaking changes clearly documented
228 | - [ ] Migration guide provided if needed
229 | - [ ] Users notified of important changes
230 | 
231 | ## Post-Release Monitoring
232 | 
233 | ### ✅ Health Monitoring
234 | - [ ] Monitor for increased error rates
235 | - [ ] Watch for "unhealthy" status reports
236 | - [ ] Track Claude Desktop connectivity issues
237 | - [ ] Monitor API endpoint usage patterns
238 | 
239 | ### ✅ User Feedback
240 | - [ ] Monitor GitHub issues for reports
241 | - [ ] Check community discussions for problems
242 | - [ ] Respond to user reports quickly
243 | - [ ] Document common issues and solutions
244 | 
245 | ---
246 | 
247 | ## Lessons from HTTP-MCP Bridge Bug
248 | 
249 | **Critical Mistakes to Avoid:**
250 | 1. **Never assume status codes** - Always test against actual server responses
251 | 2. **Test critical components** - If users depend on it, it needs comprehensive tests
252 | 3. **Validate URL construction** - `new URL()` behavior with base paths is tricky
253 | 4. **Document actual behavior** - API contracts must match reality, not hopes
254 | 5. **Test end-to-end flows** - Unit tests alone miss integration problems
255 | 
256 | **Required for Every Release:**
257 | - [ ] HTTP-MCP bridge tested with real server
258 | - [ ] All assumptions about server behavior validated
259 | - [ ] Critical user paths manually tested
260 | - [ ] API contracts verified against implementation
261 | 
262 | **Emergency Response Plan:**
263 | - If critical bugs are found in production:
264 |   1. Create hotfix branch immediately
265 |   2. Write failing test that reproduces the bug
266 |   3. Fix bug and verify test passes
267 |   4. Release hotfix within 24 hours
268 |   5. Post-mortem to prevent similar issues
269 | 
270 | ---
271 | 
272 | ## Rollback Procedure
273 | 
274 | ### ✅ Emergency Rollback (if release breaks production)
275 | 
276 | **When to Rollback:**
277 | - Critical functionality broken (storage, retrieval, MCP protocol)
278 | - Data corruption risk identified
279 | - Security vulnerability introduced
280 | - Widespread user-reported failures
281 | 
282 | **Rollback Steps:**
283 | 
284 | 1. **Immediate Actions**
285 |    - [ ] Create GitHub issue documenting the problem
286 |    - [ ] Tag issue with `critical`, `rollback-needed`
287 |    - [ ] Notify users via GitHub Discussions/Release notes
288 | 
289 | 2. **Docker Rollback**
290 |    ```bash
291 |    # Tag previous version as latest
292 |    git checkout vPREVIOUS_VERSION
293 |    docker build -t mcp-memory-service:latest .
294 |    docker push mcp-memory-service:latest
295 |    ```
296 |    - [ ] Verify previous Docker image works
297 |    - [ ] Update documentation to reference previous version
298 | 
299 | 3. **PyPI Rollback** (yank bad version)
300 |    ```bash
301 |    # Yank the broken version (keeps it available but discourages use)
302 |    pip install twine
303 |    twine yank mcp-memory-service==8.x.x
304 |    ```
305 |    - [ ] Yank version on PyPI
306 |    - [ ] Publish notice in release notes
307 | 
308 | 4. **Git Tag Management**
309 |    - [ ] Keep the bad tag for history (don't delete)
310 |    - [ ] Create new hotfix tag (e.g., `v8.x.x+1`) with fix
311 |    - [ ] Mark GitHub Release as "This release has known issues - use v8.x.x-1 instead"
312 | 
313 | 5. **User Communication**
314 |    - [ ] Post issue explaining problem and rollback
315 |    - [ ] Update README with rollback instructions
316 |    - [ ] Pin issue to repository
317 |    - [ ] Post in Discussions with migration path
318 | 
319 | 6. **Post-Rollback Analysis**
320 |    - [ ] Document what went wrong in post-mortem
321 |    - [ ] Add regression test to prevent recurrence
322 |    - [ ] Update this checklist with lessons learned
323 |    - [ ] Review release testing procedures
324 | 
325 | **Recovery Timeline:**
326 | - Hour 1: Identify issue, create GitHub issue, begin rollback
327 | - Hour 2-4: Complete rollback, verify previous version works
328 | - Hour 4-24: Investigate root cause, prepare hotfix
329 | - Day 2: Release hotfix with comprehensive tests
330 | - Week 1: Post-mortem, update testing procedures
331 | 
332 | ---
333 | 
334 | This checklist must be completed for every release to prevent critical bugs from reaching users.
```

--------------------------------------------------------------------------------
/scripts/sync/sync_memory_backends.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | # Copyright 2024 Heinrich Krupp
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Bidirectional sync script for MCP Memory Service backends.
 18 | Syncs memories between Cloudflare (primary) and SQLite-vec (backup).
 19 | """
 20 | import sys
 21 | import os
 22 | import asyncio
 23 | import logging
 24 | import argparse
 25 | import hashlib
 26 | from pathlib import Path
 27 | from typing import List, Dict, Any, Tuple
 28 | from datetime import datetime
 29 | 
 30 | # Add src directory to path so we can import from the mcp_memory_service package
 31 | sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
 32 | 
 33 | from mcp_memory_service.config import (
 34 |     CLOUDFLARE_API_TOKEN, CLOUDFLARE_ACCOUNT_ID, CLOUDFLARE_VECTORIZE_INDEX,
 35 |     CLOUDFLARE_D1_DATABASE_ID, BASE_DIR
 36 | )
 37 | from mcp_memory_service.models.memory import Memory
 38 | from mcp_memory_service.storage.cloudflare import CloudflareStorage
 39 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 40 | 
 41 | # Configure logging
 42 | logging.basicConfig(
 43 |     level=logging.INFO,
 44 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 45 | )
 46 | logger = logging.getLogger("memory_sync")
 47 | 
 48 | class MemorySync:
 49 |     """Handles bidirectional sync between Cloudflare and SQLite-vec backends."""
 50 | 
 51 |     def __init__(self, sqlite_path: str = None):
 52 |         """Initialize sync with storage backends."""
 53 |         self.sqlite_path = sqlite_path or os.path.join(BASE_DIR, 'backup_sqlite_vec.db')
 54 | 
 55 |         # Initialize storage backends
 56 |         self.cloudflare = CloudflareStorage(
 57 |             api_token=CLOUDFLARE_API_TOKEN,
 58 |             account_id=CLOUDFLARE_ACCOUNT_ID,
 59 |             vectorize_index=CLOUDFLARE_VECTORIZE_INDEX,
 60 |             d1_database_id=CLOUDFLARE_D1_DATABASE_ID
 61 |         )
 62 | 
 63 |         self.sqlite_vec = SqliteVecMemoryStorage(self.sqlite_path)
 64 | 
 65 |     async def get_all_memories_from_backend(self, backend_name: str) -> List[Dict[str, Any]]:
 66 |         """Get all memories from a specific backend."""
 67 |         if backend_name == 'cloudflare':
 68 |             backend = self.cloudflare
 69 |         elif backend_name == 'sqlite_vec':
 70 |             backend = self.sqlite_vec
 71 |         else:
 72 |             raise ValueError(f"Unknown backend: {backend_name}")
 73 | 
 74 |         try:
 75 |             # Get all memories from the backend
 76 |             memories_list = await backend.get_all_memories()
 77 | 
 78 |             memories = []
 79 |             for memory in memories_list:
 80 |                 memory_dict = {
 81 |                     'content_hash': memory.content_hash,
 82 |                     'content': memory.content,
 83 |                     'metadata': memory.metadata,
 84 |                     'tags': memory.tags,
 85 |                     'memory_type': memory.memory_type,
 86 |                     'created_at': memory.created_at,
 87 |                     'created_at_iso': memory.created_at_iso,
 88 |                     'updated_at': memory.updated_at,
 89 |                     'updated_at_iso': memory.updated_at_iso,
 90 |                 }
 91 |                 memories.append(memory_dict)
 92 | 
 93 |             logger.info(f"Retrieved {len(memories)} memories from {backend_name}")
 94 |             return memories
 95 | 
 96 |         except Exception as e:
 97 |             logger.error(f"Error retrieving memories from {backend_name}: {e}")
 98 |             return []
 99 | 
100 |     def calculate_content_hash(self, content: str, metadata: Dict[str, Any]) -> str:
101 |         """Calculate a hash for memory content to detect duplicates."""
102 |         # Create a consistent string representation
103 |         content_str = f"{content}_{sorted(metadata.items())}"
104 |         return hashlib.sha256(content_str.encode()).hexdigest()[:16]
105 | 
106 |     async def _sync_between_backends(self, source_backend: str, target_backend: str, dry_run: bool = False) -> Tuple[int, int]:
107 |         """
108 |         Generic method to sync memories between any two backends.
109 | 
110 |         Args:
111 |             source_backend: Backend to sync from ('cloudflare' or 'sqlite_vec')
112 |             target_backend: Backend to sync to ('cloudflare' or 'sqlite_vec')
113 |             dry_run: If True, only show what would be synced without making changes
114 | 
115 |         Returns:
116 |             Tuple of (added_count, skipped_count)
117 |         """
118 |         logger.info(f"Starting sync from {source_backend} to {target_backend}...")
119 | 
120 |         # Get memories from both backends
121 |         source_memories = await self.get_all_memories_from_backend(source_backend)
122 |         target_memories = await self.get_all_memories_from_backend(target_backend)
123 | 
124 |         # Create hash sets for quick lookup
125 |         target_hashes = {mem['content_hash'] for mem in target_memories if mem.get('content_hash')}
126 |         target_content_hashes = {
127 |             self.calculate_content_hash(mem['content'], mem['metadata'])
128 |             for mem in target_memories
129 |         }
130 | 
131 |         added_count = 0
132 |         skipped_count = 0
133 | 
134 |         # Get target backend instance for storing memories
135 |         target_storage = self.cloudflare if target_backend == 'cloudflare' else self.sqlite_vec
136 | 
137 |         for source_memory in source_memories:
138 |             # Check if memory already exists (by hash or content)
139 |             content_hash = self.calculate_content_hash(source_memory['content'], source_memory['metadata'])
140 | 
141 |             if (source_memory.get('content_hash') in target_hashes or
142 |                 content_hash in target_content_hashes):
143 |                 skipped_count += 1
144 |                 continue
145 | 
146 |             if not dry_run:
147 |                 try:
148 |                     memory_obj = Memory(
149 |                         content=source_memory['content'],
150 |                         content_hash=source_memory['content_hash'],
151 |                         tags=source_memory.get('tags', []),
152 |                         metadata=source_memory.get('metadata', {}),
153 |                         memory_type=source_memory.get('memory_type'),
154 |                         created_at=source_memory.get('created_at'),
155 |                         updated_at=source_memory.get('updated_at'),
156 |                     )
157 |                     success, message = await target_storage.store(memory_obj)
158 |                     if success:
159 |                         added_count += 1
160 |                         logger.debug(f"Added memory: {source_memory['content_hash'][:8]}...")
161 |                     else:
162 |                         logger.warning(f"Failed to store memory {source_memory['content_hash']}: {message}")
163 |                 except Exception as e:
164 |                     logger.error(f"Error storing memory {source_memory['content_hash']}: {e}")
165 |             else:
166 |                 added_count += 1
167 | 
168 |         logger.info(f"{source_backend} → {target_backend}: {added_count} added, {skipped_count} skipped")
169 |         return added_count, skipped_count
170 | 
171 |     async def sync_cloudflare_to_sqlite(self, dry_run: bool = False) -> Tuple[int, int]:
172 |         """Sync memories from Cloudflare to SQLite-vec."""
173 |         return await self._sync_between_backends('cloudflare', 'sqlite_vec', dry_run)
174 | 
175 |     async def sync_sqlite_to_cloudflare(self, dry_run: bool = False) -> Tuple[int, int]:
176 |         """Sync memories from SQLite-vec to Cloudflare."""
177 |         return await self._sync_between_backends('sqlite_vec', 'cloudflare', dry_run)
178 | 
179 |     async def bidirectional_sync(self, dry_run: bool = False) -> Dict[str, Tuple[int, int]]:
180 |         """Perform bidirectional sync between backends."""
181 |         logger.info("Starting bidirectional sync...")
182 | 
183 |         results = {}
184 | 
185 |         # Sync Cloudflare → SQLite-vec
186 |         cf_to_sqlite = await self.sync_cloudflare_to_sqlite(dry_run)
187 |         results['cloudflare_to_sqlite'] = cf_to_sqlite
188 | 
189 |         # Sync SQLite-vec → Cloudflare
190 |         sqlite_to_cf = await self.sync_sqlite_to_cloudflare(dry_run)
191 |         results['sqlite_to_cloudflare'] = sqlite_to_cf
192 | 
193 |         logger.info("Bidirectional sync completed")
194 |         return results
195 | 
196 |     async def get_sync_status(self) -> Dict[str, Any]:
197 |         """Get sync status showing memory counts in both backends."""
198 |         cf_memories = await self.get_all_memories_from_backend('cloudflare')
199 |         sqlite_memories = await self.get_all_memories_from_backend('sqlite_vec')
200 | 
201 |         status = {
202 |             'cloudflare_count': len(cf_memories),
203 |             'sqlite_vec_count': len(sqlite_memories),
204 |             'sync_time': datetime.now().isoformat(),
205 |             'backends_configured': {
206 |                 'cloudflare': bool(CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID),
207 |                 'sqlite_vec': os.path.exists(self.sqlite_path) if self.sqlite_path else False
208 |             }
209 |         }
210 | 
211 |         return status
212 | 
213 | async def main():
214 |     """Main function to run memory sync operations."""
215 |     parser = argparse.ArgumentParser(description='Sync memories between Cloudflare and SQLite-vec backends')
216 |     parser.add_argument('--direction', choices=['cf-to-sqlite', 'sqlite-to-cf', 'bidirectional'],
217 |                         default='bidirectional', help='Sync direction')
218 |     parser.add_argument('--dry-run', action='store_true', help='Show what would be synced without actually syncing')
219 |     parser.add_argument('--status', action='store_true', help='Show sync status only')
220 |     parser.add_argument('--sqlite-path', help='Path to SQLite-vec database file')
221 |     parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging')
222 | 
223 |     args = parser.parse_args()
224 | 
225 |     if args.verbose:
226 |         logging.getLogger().setLevel(logging.DEBUG)
227 | 
228 |     # Initialize sync
229 |     sync = MemorySync(sqlite_path=args.sqlite_path)
230 | 
231 |     try:
232 |         if args.status:
233 |             status = await sync.get_sync_status()
234 |             print(f"\n=== Memory Sync Status ===")
235 |             print(f"Cloudflare memories: {status['cloudflare_count']}")
236 |             print(f"SQLite-vec memories: {status['sqlite_vec_count']}")
237 |             print(f"Cloudflare configured: {status['backends_configured']['cloudflare']}")
238 |             print(f"SQLite-vec file exists: {status['backends_configured']['sqlite_vec']}")
239 |             print(f"Last check: {status['sync_time']}")
240 |             return
241 | 
242 |         logger.info(f"=== Starting memory sync ({args.direction}) ===")
243 |         if args.dry_run:
244 |             logger.info("DRY RUN MODE - No changes will be made")
245 | 
246 |         if args.direction == 'cf-to-sqlite':
247 |             added, skipped = await sync.sync_cloudflare_to_sqlite(dry_run=args.dry_run)
248 |             print(f"Cloudflare → SQLite-vec: {added} added, {skipped} skipped")
249 |         elif args.direction == 'sqlite-to-cf':
250 |             added, skipped = await sync.sync_sqlite_to_cloudflare(dry_run=args.dry_run)
251 |             print(f"SQLite-vec → Cloudflare: {added} added, {skipped} skipped")
252 |         else:  # bidirectional
253 |             results = await sync.bidirectional_sync(dry_run=args.dry_run)
254 |             cf_to_sqlite = results['cloudflare_to_sqlite']
255 |             sqlite_to_cf = results['sqlite_to_cloudflare']
256 |             print(f"Cloudflare → SQLite-vec: {cf_to_sqlite[0]} added, {cf_to_sqlite[1]} skipped")
257 |             print(f"SQLite-vec → Cloudflare: {sqlite_to_cf[0]} added, {sqlite_to_cf[1]} skipped")
258 | 
259 |         logger.info("=== Sync completed successfully ===")
260 | 
261 |     except Exception as e:
262 |         logger.error(f"Sync failed: {str(e)}")
263 |         sys.exit(1)
264 | 
265 | if __name__ == "__main__":
266 |     asyncio.run(main())
267 | 
```