#
tokens: 46722/50000 13/625 files (page 16/47)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 16 of 47. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── agents
│   │   ├── amp-bridge.md
│   │   ├── amp-pr-automator.md
│   │   ├── code-quality-guard.md
│   │   ├── gemini-pr-automator.md
│   │   └── github-release-manager.md
│   ├── settings.local.json.backup
│   └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   ├── feature_request.yml
│   │   └── performance_issue.yml
│   ├── pull_request_template.md
│   └── workflows
│       ├── bridge-tests.yml
│       ├── CACHE_FIX.md
│       ├── claude-code-review.yml
│       ├── claude.yml
│       ├── cleanup-images.yml.disabled
│       ├── dev-setup-validation.yml
│       ├── docker-publish.yml
│       ├── LATEST_FIXES.md
│       ├── main-optimized.yml.disabled
│       ├── main.yml
│       ├── publish-and-test.yml
│       ├── README_OPTIMIZATION.md
│       ├── release-tag.yml.disabled
│       ├── release.yml
│       ├── roadmap-review-reminder.yml
│       ├── SECRET_CONDITIONAL_FIX.md
│       └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│   ├── .gitignore
│   └── reports
│       └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│   ├── deployment
│   │   ├── deploy_fastmcp_fixed.sh
│   │   ├── deploy_http_with_mcp.sh
│   │   └── deploy_mcp_v4.sh
│   ├── deployment-configs
│   │   ├── empty_config.yml
│   │   └── smithery.yaml
│   ├── development
│   │   └── test_fastmcp.py
│   ├── docs-removed-2025-08-23
│   │   ├── authentication.md
│   │   ├── claude_integration.md
│   │   ├── claude-code-compatibility.md
│   │   ├── claude-code-integration.md
│   │   ├── claude-code-quickstart.md
│   │   ├── claude-desktop-setup.md
│   │   ├── complete-setup-guide.md
│   │   ├── database-synchronization.md
│   │   ├── development
│   │   │   ├── autonomous-memory-consolidation.md
│   │   │   ├── CLEANUP_PLAN.md
│   │   │   ├── CLEANUP_README.md
│   │   │   ├── CLEANUP_SUMMARY.md
│   │   │   ├── dream-inspired-memory-consolidation.md
│   │   │   ├── hybrid-slm-memory-consolidation.md
│   │   │   ├── mcp-milestone.md
│   │   │   ├── multi-client-architecture.md
│   │   │   ├── test-results.md
│   │   │   └── TIMESTAMP_FIX_SUMMARY.md
│   │   ├── distributed-sync.md
│   │   ├── invocation_guide.md
│   │   ├── macos-intel.md
│   │   ├── master-guide.md
│   │   ├── mcp-client-configuration.md
│   │   ├── multi-client-server.md
│   │   ├── service-installation.md
│   │   ├── sessions
│   │   │   └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│   │   ├── UBUNTU_SETUP.md
│   │   ├── ubuntu.md
│   │   ├── windows-setup.md
│   │   └── windows.md
│   ├── docs-root-cleanup-2025-08-23
│   │   ├── AWESOME_LIST_SUBMISSION.md
│   │   ├── CLOUDFLARE_IMPLEMENTATION.md
│   │   ├── DOCUMENTATION_ANALYSIS.md
│   │   ├── DOCUMENTATION_CLEANUP_PLAN.md
│   │   ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│   │   ├── LITESTREAM_SETUP_GUIDE.md
│   │   ├── lm_studio_system_prompt.md
│   │   ├── PYTORCH_DOWNLOAD_FIX.md
│   │   └── README-ORIGINAL-BACKUP.md
│   ├── investigations
│   │   └── MACOS_HOOKS_INVESTIGATION.md
│   ├── litestream-configs-v6.3.0
│   │   ├── install_service.sh
│   │   ├── litestream_master_config_fixed.yml
│   │   ├── litestream_master_config.yml
│   │   ├── litestream_replica_config_fixed.yml
│   │   ├── litestream_replica_config.yml
│   │   ├── litestream_replica_simple.yml
│   │   ├── litestream-http.service
│   │   ├── litestream.service
│   │   └── requirements-cloudflare.txt
│   ├── release-notes
│   │   └── release-notes-v7.1.4.md
│   └── setup-development
│       ├── README.md
│       ├── setup_consolidation_mdns.sh
│       ├── STARTUP_SETUP_GUIDE.md
│       └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│   ├── memory-context.md
│   ├── memory-health.md
│   ├── memory-ingest-dir.md
│   ├── memory-ingest.md
│   ├── memory-recall.md
│   ├── memory-search.md
│   ├── memory-store.md
│   ├── README.md
│   └── session-start.md
├── claude-hooks
│   ├── config.json
│   ├── config.template.json
│   ├── CONFIGURATION.md
│   ├── core
│   │   ├── memory-retrieval.js
│   │   ├── mid-conversation.js
│   │   ├── session-end.js
│   │   ├── session-start.js
│   │   └── topic-change.js
│   ├── debug-pattern-test.js
│   ├── install_claude_hooks_windows.ps1
│   ├── install_hooks.py
│   ├── memory-mode-controller.js
│   ├── MIGRATION.md
│   ├── README-NATURAL-TRIGGERS.md
│   ├── README-phase2.md
│   ├── README.md
│   ├── simple-test.js
│   ├── statusline.sh
│   ├── test-adaptive-weights.js
│   ├── test-dual-protocol-hook.js
│   ├── test-mcp-hook.js
│   ├── test-natural-triggers.js
│   ├── test-recency-scoring.js
│   ├── tests
│   │   ├── integration-test.js
│   │   ├── phase2-integration-test.js
│   │   ├── test-code-execution.js
│   │   ├── test-cross-session.json
│   │   ├── test-session-tracking.json
│   │   └── test-threading.json
│   ├── utilities
│   │   ├── adaptive-pattern-detector.js
│   │   ├── context-formatter.js
│   │   ├── context-shift-detector.js
│   │   ├── conversation-analyzer.js
│   │   ├── dynamic-context-updater.js
│   │   ├── git-analyzer.js
│   │   ├── mcp-client.js
│   │   ├── memory-client.js
│   │   ├── memory-scorer.js
│   │   ├── performance-manager.js
│   │   ├── project-detector.js
│   │   ├── session-tracker.js
│   │   ├── tiered-conversation-monitor.js
│   │   └── version-checker.js
│   └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│   ├── amp-cli-bridge.md
│   ├── api
│   │   ├── code-execution-interface.md
│   │   ├── memory-metadata-api.md
│   │   ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_REPORT.md
│   │   └── tag-standardization.md
│   ├── architecture
│   │   ├── search-enhancement-spec.md
│   │   └── search-examples.md
│   ├── architecture.md
│   ├── archive
│   │   └── obsolete-workflows
│   │       ├── load_memory_context.md
│   │       └── README.md
│   ├── assets
│   │   └── images
│   │       ├── dashboard-v3.3.0-preview.png
│   │       ├── memory-awareness-hooks-example.png
│   │       ├── project-infographic.svg
│   │       └── README.md
│   ├── CLAUDE_CODE_QUICK_REFERENCE.md
│   ├── cloudflare-setup.md
│   ├── deployment
│   │   ├── docker.md
│   │   ├── dual-service.md
│   │   ├── production-guide.md
│   │   └── systemd-service.md
│   ├── development
│   │   ├── ai-agent-instructions.md
│   │   ├── code-quality
│   │   │   ├── phase-2a-completion.md
│   │   │   ├── phase-2a-handle-get-prompt.md
│   │   │   ├── phase-2a-index.md
│   │   │   ├── phase-2a-install-package.md
│   │   │   └── phase-2b-session-summary.md
│   │   ├── code-quality-workflow.md
│   │   ├── dashboard-workflow.md
│   │   ├── issue-management.md
│   │   ├── pr-review-guide.md
│   │   ├── refactoring-notes.md
│   │   ├── release-checklist.md
│   │   └── todo-tracker.md
│   ├── docker-optimized-build.md
│   ├── document-ingestion.md
│   ├── DOCUMENTATION_AUDIT.md
│   ├── enhancement-roadmap-issue-14.md
│   ├── examples
│   │   ├── analysis-scripts.js
│   │   ├── maintenance-session-example.md
│   │   ├── memory-distribution-chart.jsx
│   │   └── tag-schema.json
│   ├── first-time-setup.md
│   ├── glama-deployment.md
│   ├── guides
│   │   ├── advanced-command-examples.md
│   │   ├── chromadb-migration.md
│   │   ├── commands-vs-mcp-server.md
│   │   ├── mcp-enhancements.md
│   │   ├── mdns-service-discovery.md
│   │   ├── memory-consolidation-guide.md
│   │   ├── migration.md
│   │   ├── scripts.md
│   │   └── STORAGE_BACKENDS.md
│   ├── HOOK_IMPROVEMENTS.md
│   ├── hooks
│   │   └── phase2-code-execution-migration.md
│   ├── http-server-management.md
│   ├── ide-compatability.md
│   ├── IMAGE_RETENTION_POLICY.md
│   ├── images
│   │   └── dashboard-placeholder.md
│   ├── implementation
│   │   ├── health_checks.md
│   │   └── performance.md
│   ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│   ├── integration
│   │   ├── homebrew.md
│   │   └── multi-client.md
│   ├── integrations
│   │   ├── gemini.md
│   │   ├── groq-bridge.md
│   │   ├── groq-integration-summary.md
│   │   └── groq-model-comparison.md
│   ├── integrations.md
│   ├── legacy
│   │   └── dual-protocol-hooks.md
│   ├── LM_STUDIO_COMPATIBILITY.md
│   ├── maintenance
│   │   └── memory-maintenance.md
│   ├── mastery
│   │   ├── api-reference.md
│   │   ├── architecture-overview.md
│   │   ├── configuration-guide.md
│   │   ├── local-setup-and-run.md
│   │   ├── testing-guide.md
│   │   └── troubleshooting.md
│   ├── migration
│   │   └── code-execution-api-quick-start.md
│   ├── natural-memory-triggers
│   │   ├── cli-reference.md
│   │   ├── installation-guide.md
│   │   └── performance-optimization.md
│   ├── oauth-setup.md
│   ├── pr-graphql-integration.md
│   ├── quick-setup-cloudflare-dual-environment.md
│   ├── README.md
│   ├── remote-configuration-wiki-section.md
│   ├── research
│   │   ├── code-execution-interface-implementation.md
│   │   └── code-execution-interface-summary.md
│   ├── ROADMAP.md
│   ├── sqlite-vec-backend.md
│   ├── statistics
│   │   ├── charts
│   │   │   ├── activity_patterns.png
│   │   │   ├── contributors.png
│   │   │   ├── growth_trajectory.png
│   │   │   ├── monthly_activity.png
│   │   │   └── october_sprint.png
│   │   ├── data
│   │   │   ├── activity_by_day.csv
│   │   │   ├── activity_by_hour.csv
│   │   │   ├── contributors.csv
│   │   │   └── monthly_activity.csv
│   │   ├── generate_charts.py
│   │   └── REPOSITORY_STATISTICS.md
│   ├── technical
│   │   ├── development.md
│   │   ├── memory-migration.md
│   │   ├── migration-log.md
│   │   ├── sqlite-vec-embedding-fixes.md
│   │   └── tag-storage.md
│   ├── testing
│   │   └── regression-tests.md
│   ├── testing-cloudflare-backend.md
│   ├── troubleshooting
│   │   ├── cloudflare-api-token-setup.md
│   │   ├── cloudflare-authentication.md
│   │   ├── general.md
│   │   ├── hooks-quick-reference.md
│   │   ├── pr162-schema-caching-issue.md
│   │   ├── session-end-hooks.md
│   │   └── sync-issues.md
│   └── tutorials
│       ├── advanced-techniques.md
│       ├── data-analysis.md
│       └── demo-session-walkthrough.md
├── examples
│   ├── claude_desktop_config_template.json
│   ├── claude_desktop_config_windows.json
│   ├── claude-desktop-http-config.json
│   ├── config
│   │   └── claude_desktop_config.json
│   ├── http-mcp-bridge.js
│   ├── memory_export_template.json
│   ├── README.md
│   ├── setup
│   │   └── setup_multi_client_complete.py
│   └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│   ├── .claude
│   │   └── settings.local.json
│   ├── archive
│   │   └── check_missing_timestamps.py
│   ├── backup
│   │   ├── backup_memories.py
│   │   ├── backup_sqlite_vec.sh
│   │   ├── export_distributable_memories.sh
│   │   └── restore_memories.py
│   ├── benchmarks
│   │   ├── benchmark_code_execution_api.py
│   │   ├── benchmark_hybrid_sync.py
│   │   └── benchmark_server_caching.py
│   ├── database
│   │   ├── analyze_sqlite_vec_db.py
│   │   ├── check_sqlite_vec_status.py
│   │   ├── db_health_check.py
│   │   └── simple_timestamp_check.py
│   ├── development
│   │   ├── debug_server_initialization.py
│   │   ├── find_orphaned_files.py
│   │   ├── fix_mdns.sh
│   │   ├── fix_sitecustomize.py
│   │   ├── remote_ingest.sh
│   │   ├── setup-git-merge-drivers.sh
│   │   ├── uv-lock-merge.sh
│   │   └── verify_hybrid_sync.py
│   ├── hooks
│   │   └── pre-commit
│   ├── installation
│   │   ├── install_linux_service.py
│   │   ├── install_macos_service.py
│   │   ├── install_uv.py
│   │   ├── install_windows_service.py
│   │   ├── install.py
│   │   ├── setup_backup_cron.sh
│   │   ├── setup_claude_mcp.sh
│   │   └── setup_cloudflare_resources.py
│   ├── linux
│   │   ├── service_status.sh
│   │   ├── start_service.sh
│   │   ├── stop_service.sh
│   │   ├── uninstall_service.sh
│   │   └── view_logs.sh
│   ├── maintenance
│   │   ├── assign_memory_types.py
│   │   ├── check_memory_types.py
│   │   ├── cleanup_corrupted_encoding.py
│   │   ├── cleanup_memories.py
│   │   ├── cleanup_organize.py
│   │   ├── consolidate_memory_types.py
│   │   ├── consolidation_mappings.json
│   │   ├── delete_orphaned_vectors_fixed.py
│   │   ├── fast_cleanup_duplicates_with_tracking.sh
│   │   ├── find_all_duplicates.py
│   │   ├── find_cloudflare_duplicates.py
│   │   ├── find_duplicates.py
│   │   ├── memory-types.md
│   │   ├── README.md
│   │   ├── recover_timestamps_from_cloudflare.py
│   │   ├── regenerate_embeddings.py
│   │   ├── repair_malformed_tags.py
│   │   ├── repair_memories.py
│   │   ├── repair_sqlite_vec_embeddings.py
│   │   ├── repair_zero_embeddings.py
│   │   ├── restore_from_json_export.py
│   │   └── scan_todos.sh
│   ├── migration
│   │   ├── cleanup_mcp_timestamps.py
│   │   ├── legacy
│   │   │   └── migrate_chroma_to_sqlite.py
│   │   ├── mcp-migration.py
│   │   ├── migrate_sqlite_vec_embeddings.py
│   │   ├── migrate_storage.py
│   │   ├── migrate_tags.py
│   │   ├── migrate_timestamps.py
│   │   ├── migrate_to_cloudflare.py
│   │   ├── migrate_to_sqlite_vec.py
│   │   ├── migrate_v5_enhanced.py
│   │   ├── TIMESTAMP_CLEANUP_README.md
│   │   └── verify_mcp_timestamps.py
│   ├── pr
│   │   ├── amp_collect_results.sh
│   │   ├── amp_detect_breaking_changes.sh
│   │   ├── amp_generate_tests.sh
│   │   ├── amp_pr_review.sh
│   │   ├── amp_quality_gate.sh
│   │   ├── amp_suggest_fixes.sh
│   │   ├── auto_review.sh
│   │   ├── detect_breaking_changes.sh
│   │   ├── generate_tests.sh
│   │   ├── lib
│   │   │   └── graphql_helpers.sh
│   │   ├── quality_gate.sh
│   │   ├── resolve_threads.sh
│   │   ├── run_pyscn_analysis.sh
│   │   ├── run_quality_checks.sh
│   │   ├── thread_status.sh
│   │   └── watch_reviews.sh
│   ├── quality
│   │   ├── fix_dead_code_install.sh
│   │   ├── phase1_dead_code_analysis.md
│   │   ├── phase2_complexity_analysis.md
│   │   ├── README_PHASE1.md
│   │   ├── README_PHASE2.md
│   │   ├── track_pyscn_metrics.sh
│   │   └── weekly_quality_review.sh
│   ├── README.md
│   ├── run
│   │   ├── run_mcp_memory.sh
│   │   ├── run-with-uv.sh
│   │   └── start_sqlite_vec.sh
│   ├── run_memory_server.py
│   ├── server
│   │   ├── check_http_server.py
│   │   ├── check_server_health.py
│   │   ├── memory_offline.py
│   │   ├── preload_models.py
│   │   ├── run_http_server.py
│   │   ├── run_memory_server.py
│   │   ├── start_http_server.bat
│   │   └── start_http_server.sh
│   ├── service
│   │   ├── deploy_dual_services.sh
│   │   ├── install_http_service.sh
│   │   ├── mcp-memory-http.service
│   │   ├── mcp-memory.service
│   │   ├── memory_service_manager.sh
│   │   ├── service_control.sh
│   │   ├── service_utils.py
│   │   └── update_service.sh
│   ├── sync
│   │   ├── check_drift.py
│   │   ├── claude_sync_commands.py
│   │   ├── export_memories.py
│   │   ├── import_memories.py
│   │   ├── litestream
│   │   │   ├── apply_local_changes.sh
│   │   │   ├── enhanced_memory_store.sh
│   │   │   ├── init_staging_db.sh
│   │   │   ├── io.litestream.replication.plist
│   │   │   ├── manual_sync.sh
│   │   │   ├── memory_sync.sh
│   │   │   ├── pull_remote_changes.sh
│   │   │   ├── push_to_remote.sh
│   │   │   ├── README.md
│   │   │   ├── resolve_conflicts.sh
│   │   │   ├── setup_local_litestream.sh
│   │   │   ├── setup_remote_litestream.sh
│   │   │   ├── staging_db_init.sql
│   │   │   ├── stash_local_changes.sh
│   │   │   ├── sync_from_remote_noconfig.sh
│   │   │   └── sync_from_remote.sh
│   │   ├── README.md
│   │   ├── safe_cloudflare_update.sh
│   │   ├── sync_memory_backends.py
│   │   └── sync_now.py
│   ├── testing
│   │   ├── run_complete_test.py
│   │   ├── run_memory_test.sh
│   │   ├── simple_test.py
│   │   ├── test_cleanup_logic.py
│   │   ├── test_cloudflare_backend.py
│   │   ├── test_docker_functionality.py
│   │   ├── test_installation.py
│   │   ├── test_mdns.py
│   │   ├── test_memory_api.py
│   │   ├── test_memory_simple.py
│   │   ├── test_migration.py
│   │   ├── test_search_api.py
│   │   ├── test_sqlite_vec_embeddings.py
│   │   ├── test_sse_events.py
│   │   ├── test-connection.py
│   │   └── test-hook.js
│   ├── utils
│   │   ├── claude_commands_utils.py
│   │   ├── generate_personalized_claude_md.sh
│   │   ├── groq
│   │   ├── groq_agent_bridge.py
│   │   ├── list-collections.py
│   │   ├── memory_wrapper_uv.py
│   │   ├── query_memories.py
│   │   ├── smithery_wrapper.py
│   │   ├── test_groq_bridge.sh
│   │   └── uv_wrapper.py
│   └── validation
│       ├── check_dev_setup.py
│       ├── check_documentation_links.py
│       ├── diagnose_backend_config.py
│       ├── validate_configuration_complete.py
│       ├── validate_memories.py
│       ├── validate_migration.py
│       ├── validate_timestamp_integrity.py
│       ├── verify_environment.py
│       ├── verify_pytorch_windows.py
│       └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│   └── mcp_memory_service
│       ├── __init__.py
│       ├── api
│       │   ├── __init__.py
│       │   ├── client.py
│       │   ├── operations.py
│       │   ├── sync_wrapper.py
│       │   └── types.py
│       ├── backup
│       │   ├── __init__.py
│       │   └── scheduler.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── ingestion.py
│       │   ├── main.py
│       │   └── utils.py
│       ├── config.py
│       ├── consolidation
│       │   ├── __init__.py
│       │   ├── associations.py
│       │   ├── base.py
│       │   ├── clustering.py
│       │   ├── compression.py
│       │   ├── consolidator.py
│       │   ├── decay.py
│       │   ├── forgetting.py
│       │   ├── health.py
│       │   └── scheduler.py
│       ├── dependency_check.py
│       ├── discovery
│       │   ├── __init__.py
│       │   ├── client.py
│       │   └── mdns_service.py
│       ├── embeddings
│       │   ├── __init__.py
│       │   └── onnx_embeddings.py
│       ├── ingestion
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── chunker.py
│       │   ├── csv_loader.py
│       │   ├── json_loader.py
│       │   ├── pdf_loader.py
│       │   ├── registry.py
│       │   ├── semtools_loader.py
│       │   └── text_loader.py
│       ├── lm_studio_compat.py
│       ├── mcp_server.py
│       ├── models
│       │   ├── __init__.py
│       │   └── memory.py
│       ├── server.py
│       ├── services
│       │   ├── __init__.py
│       │   └── memory_service.py
│       ├── storage
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── cloudflare.py
│       │   ├── factory.py
│       │   ├── http_client.py
│       │   ├── hybrid.py
│       │   └── sqlite_vec.py
│       ├── sync
│       │   ├── __init__.py
│       │   ├── exporter.py
│       │   ├── importer.py
│       │   └── litestream_config.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cache_manager.py
│       │   ├── content_splitter.py
│       │   ├── db_utils.py
│       │   ├── debug.py
│       │   ├── document_processing.py
│       │   ├── gpu_detection.py
│       │   ├── hashing.py
│       │   ├── http_server_manager.py
│       │   ├── port_detection.py
│       │   ├── system_detection.py
│       │   └── time_parser.py
│       └── web
│           ├── __init__.py
│           ├── api
│           │   ├── __init__.py
│           │   ├── analytics.py
│           │   ├── backup.py
│           │   ├── consolidation.py
│           │   ├── documents.py
│           │   ├── events.py
│           │   ├── health.py
│           │   ├── manage.py
│           │   ├── mcp.py
│           │   ├── memories.py
│           │   ├── search.py
│           │   └── sync.py
│           ├── app.py
│           ├── dependencies.py
│           ├── oauth
│           │   ├── __init__.py
│           │   ├── authorization.py
│           │   ├── discovery.py
│           │   ├── middleware.py
│           │   ├── models.py
│           │   ├── registration.py
│           │   └── storage.py
│           ├── sse.py
│           └── static
│               ├── app.js
│               ├── index.html
│               ├── README.md
│               ├── sse_test.html
│               └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│   ├── __init__.py
│   ├── api
│   │   ├── __init__.py
│   │   ├── test_compact_types.py
│   │   └── test_operations.py
│   ├── bridge
│   │   ├── mock_responses.js
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   └── test_http_mcp_bridge.js
│   ├── conftest.py
│   ├── consolidation
│   │   ├── __init__.py
│   │   ├── conftest.py
│   │   ├── test_associations.py
│   │   ├── test_clustering.py
│   │   ├── test_compression.py
│   │   ├── test_consolidator.py
│   │   ├── test_decay.py
│   │   └── test_forgetting.py
│   ├── contracts
│   │   └── api-specification.yml
│   ├── integration
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── test_api_key_fallback.py
│   │   ├── test_api_memories_chronological.py
│   │   ├── test_api_tag_time_search.py
│   │   ├── test_api_with_memory_service.py
│   │   ├── test_bridge_integration.js
│   │   ├── test_cli_interfaces.py
│   │   ├── test_cloudflare_connection.py
│   │   ├── test_concurrent_clients.py
│   │   ├── test_data_serialization_consistency.py
│   │   ├── test_http_server_startup.py
│   │   ├── test_mcp_memory.py
│   │   ├── test_mdns_integration.py
│   │   ├── test_oauth_basic_auth.py
│   │   ├── test_oauth_flow.py
│   │   ├── test_server_handlers.py
│   │   └── test_store_memory.py
│   ├── performance
│   │   ├── test_background_sync.py
│   │   └── test_hybrid_live.py
│   ├── README.md
│   ├── smithery
│   │   └── test_smithery.py
│   ├── sqlite
│   │   └── simple_sqlite_vec_test.py
│   ├── test_client.py
│   ├── test_content_splitting.py
│   ├── test_database.py
│   ├── test_hybrid_cloudflare_limits.py
│   ├── test_hybrid_storage.py
│   ├── test_memory_ops.py
│   ├── test_semantic_search.py
│   ├── test_sqlite_vec_storage.py
│   ├── test_time_parser.py
│   ├── test_timestamp_preservation.py
│   ├── timestamp
│   │   ├── test_hook_vs_manual_storage.py
│   │   ├── test_issue99_final_validation.py
│   │   ├── test_search_retrieval_inconsistency.py
│   │   ├── test_timestamp_issue.py
│   │   └── test_timestamp_simple.py
│   └── unit
│       ├── conftest.py
│       ├── test_cloudflare_storage.py
│       ├── test_csv_loader.py
│       ├── test_fastapi_dependencies.py
│       ├── test_import.py
│       ├── test_json_loader.py
│       ├── test_mdns_simple.py
│       ├── test_mdns.py
│       ├── test_memory_service.py
│       ├── test_memory.py
│       ├── test_semtools_loader.py
│       ├── test_storage_interface_compatibility.py
│       └── test_tag_time_filtering.py
├── tools
│   ├── docker
│   │   ├── DEPRECATED.md
│   │   ├── docker-compose.http.yml
│   │   ├── docker-compose.pythonpath.yml
│   │   ├── docker-compose.standalone.yml
│   │   ├── docker-compose.uv.yml
│   │   ├── docker-compose.yml
│   │   ├── docker-entrypoint-persistent.sh
│   │   ├── docker-entrypoint-unified.sh
│   │   ├── docker-entrypoint.sh
│   │   ├── Dockerfile
│   │   ├── Dockerfile.glama
│   │   ├── Dockerfile.slim
│   │   ├── README.md
│   │   └── test-docker-modes.sh
│   └── README.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/archive/docs-removed-2025-08-23/claude-code-integration.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Using MCP Memory Service with Claude Code
  2 | 
  3 | This guide explains how to integrate the MCP Memory Service with Claude Code, providing two powerful approaches for using persistent memory capabilities in the Claude CLI environment.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | Before you begin, ensure you have:
  8 | 
  9 | 1. Installed [Claude Code](https://www.anthropic.com/news/introducing-claude-code) CLI tool
 10 | 2. Set up the MCP Memory Service on your machine
 11 | 3. Basic familiarity with command-line interfaces
 12 | 
 13 | ## Integration Approaches
 14 | 
 15 | The MCP Memory Service offers **two integration methods** with Claude Code:
 16 | 
 17 | ### 🎯 Method 1: Conversational Commands (Recommended) 
 18 | **New in v2.2.0** - Direct command syntax following the CCPlugins pattern
 19 | 
 20 | ### 🔧 Method 2: MCP Server Registration
 21 | Traditional MCP server approach for deep integration
 22 | 
 23 | ---
 24 | 
 25 | ## Method 1: Conversational Commands (v2.2.0)
 26 | 
 27 | The **conversational commands approach** provides direct memory operations through Claude Code commands that follow the CCPlugins pattern. This is the **recommended approach** for most users as it provides immediate access to memory operations without MCP server configuration.
 28 | 
 29 | ### Installation
 30 | 
 31 | The commands can be installed during the main MCP Memory Service installation:
 32 | 
 33 | ```bash
 34 | # Install with commands (will prompt if Claude Code CLI is detected)
 35 | python install.py
 36 | 
 37 | # Force install commands without prompting
 38 | python install.py --install-claude-commands
 39 | 
 40 | # Skip command installation prompt
 41 | python install.py --skip-claude-commands-prompt
 42 | ```
 43 | 
 44 | Or install them manually:
 45 | 
 46 | ```bash
 47 | # Install commands directly
 48 | python scripts/claude_commands_utils.py
 49 | 
 50 | # Test installation prerequisites
 51 | python scripts/claude_commands_utils.py --test
 52 | 
 53 | # Uninstall commands
 54 | python scripts/claude_commands_utils.py --uninstall
 55 | ```
 56 | 
 57 | ### Available Commands
 58 | 
 59 | #### `/memory-store` - Store Information with Context
 60 | Store information in your memory service with automatic context detection and smart tagging.
 61 | 
 62 | ```bash
 63 | claude /memory-store "Important architectural decision about database backend"
 64 | claude /memory-store --tags "decision,architecture" "We chose SQLite-vec for performance"
 65 | claude /memory-store --type "note" "Remember to update Docker configuration"
 66 | ```
 67 | 
 68 | **Features:**
 69 | - Automatic project context detection from current directory
 70 | - Smart tag generation based on file types and git repository
 71 | - Session context integration
 72 | - Metadata enrichment with timestamps and paths
 73 | 
 74 | #### `/memory-recall` - Time-based Memory Retrieval
 75 | Retrieve memories using natural language time expressions.
 76 | 
 77 | ```bash
 78 | claude /memory-recall "what did we decide about the database last week?"
 79 | claude /memory-recall "yesterday's architectural discussions"
 80 | claude /memory-recall --project "mcp-memory-service" "last month's progress"
 81 | ```
 82 | 
 83 | **Features:**
 84 | - Natural language time parsing ("yesterday", "last Tuesday", "two months ago")
 85 | - Context-aware filtering based on current project
 86 | - Temporal relevance scoring
 87 | - Seasonal and event-based queries
 88 | 
 89 | #### `/memory-search` - Tag and Content Search
 90 | Search through stored memories using tags, content keywords, and semantic similarity.
 91 | 
 92 | ```bash
 93 | claude /memory-search --tags "architecture,database"
 94 | claude /memory-search "SQLite performance optimization"
 95 | claude /memory-search --project "current" --type "decision"
 96 | ```
 97 | 
 98 | **Features:**
 99 | - Tag-based filtering with partial matching
100 | - Semantic content search using embeddings
101 | - Combined query support (tags + content)
102 | - Relevance scoring and ranking
103 | 
104 | #### `/memory-context` - Session Context Integration
105 | Capture the current conversation and project context as a memory.
106 | 
107 | ```bash
108 | claude /memory-context
109 | claude /memory-context --summary "Architecture planning session"
110 | claude /memory-context --include-files --include-commits
111 | ```
112 | 
113 | **Features:**
114 | - Automatic session analysis and summarization
115 | - Git repository state capture
116 | - File change detection and inclusion
117 | - Key decision and insight extraction
118 | 
119 | #### `/memory-health` - Service Health and Diagnostics
120 | Check the health and status of your MCP Memory Service.
121 | 
122 | ```bash
123 | claude /memory-health
124 | claude /memory-health --detailed
125 | claude /memory-health --test-operations
126 | ```
127 | 
128 | **Features:**
129 | - Service connectivity verification
130 | - Database health and statistics
131 | - Performance metrics and diagnostics
132 | - Auto-discovery testing and troubleshooting
133 | 
134 | ### Command Features
135 | 
136 | - **Auto-Discovery**: Commands automatically locate your MCP Memory Service via mDNS
137 | - **Context Awareness**: Understand current project, git repository, and session state
138 | - **Error Recovery**: Graceful handling when memory service is unavailable
139 | - **Cross-Platform**: Full support for Windows, macOS, and Linux
140 | - **Backend Agnostic**: Works with both ChromaDB and SQLite-vec storage backends
141 | 
142 | ### Example Workflow
143 | 
144 | ```bash
145 | # Start a development session
146 | claude /memory-context --summary "Starting work on mDNS integration"
147 | 
148 | # Store important decisions during development
149 | claude /memory-store --tags "mDNS,architecture" "Decided to use zeroconf library for service discovery"
150 | 
151 | # Continue development work...
152 | 
153 | # Later, recall what was decided
154 | claude /memory-recall "what did we decide about mDNS last week?"
155 | 
156 | # Search for related technical information  
157 | claude /memory-search --tags "mDNS,zeroconf"
158 | 
159 | # Check if everything is working correctly
160 | claude /memory-health
161 | ```
162 | 
163 | ---
164 | 
165 | ## Method 2: MCP Server Registration
166 | 
167 | For users who prefer the traditional MCP server approach or need deeper integration, you can register the MCP Memory Service directly with Claude Code.
168 | 
169 | ### Registering the Memory Service with Claude Code
170 | 
171 | You can register the MCP Memory Service to work with Claude Code using the `claude mcp add` command.
172 | 
173 | ### Check Existing MCP Servers
174 | 
175 | To see which MCP servers are already registered with Claude:
176 | 
177 | ```bash
178 | claude mcp list
179 | ```
180 | 
181 | ### Add the Memory Service
182 | 
183 | To add the memory service that's running on your local machine:
184 | 
185 | ```bash
186 | claude mcp add memory-service spawn -- /path/to/your/command
187 | ```
188 | 
189 | For example, if you've installed the memory service using UV (recommended):
190 | 
191 | ```bash
192 | claude mcp add memory-service spawn -- /opt/homebrew/bin/uv --directory /Users/yourusername/path/to/mcp-memory-service run memory
193 | ```
194 | 
195 | Replace the path elements with the actual paths on your system.
196 | 
197 | ## Example Configuration
198 | 
199 | Here's a real-world example of adding the memory service to Claude Code:
200 | 
201 | ```bash
202 | claude mcp add memory-service spawn -- /opt/homebrew/bin/uv --directory /Users/yourusername/Documents/GitHub/mcp-memory-service run memory
203 | ```
204 | 
205 | This command:
206 | 1. Registers a new MCP server named "memory-service"
207 | 2. Uses the "spawn" transport method, which runs the command when needed
208 | 3. Specifies the full path to the UV command
209 | 4. Sets the working directory to your mcp-memory-service location
210 | 5. Runs the "memory" module
211 | 
212 | After running this command, you should see a confirmation message like:
213 | 
214 | ```
215 | Added stdio MCP server memory-service with command: spawn /opt/homebrew/bin/uv --directory /Users/yourusername/Documents/GitHub/mcp-memory-service run memory to local config
216 | ```
217 | 
218 | ## Using Memory Functions in Claude Code
219 | 
220 | Once registered, you can use the memory service directly in your conversations with Claude Code. The memory functions available include:
221 | 
222 | - Storing memories
223 | - Retrieving memories based on semantic search
224 | - Recalling information from specific time periods
225 | - Searching by tags
226 | - And many more
227 | 
228 | ---
229 | 
230 | ## Troubleshooting
231 | 
232 | ### For Conversational Commands (Method 1)
233 | 
234 | If you encounter issues with the commands:
235 | 
236 | 1. **Commands Not Available**:
237 |    - Verify Claude Code CLI is installed: `claude --version`
238 |    - Check commands are installed: `ls ~/.claude/commands/memory-*.md`
239 |    - Reinstall commands: `python scripts/claude_commands_utils.py`
240 | 
241 | 2. **MCP Service Connection Issues**:
242 |    - Verify MCP Memory Service is running: `memory --help`
243 |    - Check service health: `claude /memory-health`
244 |    - Ensure service is discoverable via mDNS
245 | 
246 | 3. **Permission Issues**:
247 |    - Check commands directory permissions: `ls -la ~/.claude/commands/`
248 |    - Ensure write access to the commands directory
249 | 
250 | ### For MCP Server Registration (Method 2)
251 | 
252 | If you encounter issues with MCP server registration:
253 | 
254 | 1. Verify the memory service is running properly as a standalone application
255 | 2. Check that the paths in your `claude mcp add` command are correct
256 | 3. Ensure you have the necessary permissions to execute the specified commands
257 | 4. Try running `claude mcp list` to verify the server was added correctly
258 | 
259 | ## Which Method Should You Use?
260 | 
261 | ### Choose **Conversational Commands (Method 1)** if:
262 | - ✅ You want quick setup with minimal configuration
263 | - ✅ You prefer direct command syntax (`claude /memory-store`)
264 | - ✅ You want automatic service discovery and context awareness
265 | - ✅ You're new to MCP and want the simplest approach
266 | - ✅ You want CCPlugins-compatible command integration
267 | 
268 | ### Choose **MCP Server Registration (Method 2)** if:
269 | - ✅ You need deep integration with Claude Code's MCP system
270 | - ✅ You want to use the service alongside other MCP servers
271 | - ✅ You prefer traditional MCP tool-based interactions
272 | - ✅ You need maximum control over the server configuration
273 | - ✅ You're building complex multi-server MCP workflows
274 | 
275 | ## Benefits of Claude Code Integration
276 | 
277 | Both integration methods provide powerful advantages:
278 | 
279 | ### Core Benefits
280 | 1. **Persistent Memory**: Your conversations and stored information persist across sessions
281 | 2. **Semantic Search**: Claude can retrieve relevant information even when not phrased exactly the same way
282 | 3. **Temporal Recall**: Ask about information from specific time periods (e.g., "last week", "yesterday")
283 | 4. **Organized Knowledge**: Use tags to categorize and later retrieve information by category
284 | 5. **Project Context**: Commands understand your current project and development context
285 | 
286 | ### Method 1 (Commands) Additional Benefits
287 | - **Immediate Access**: Direct command syntax without MCP server complexity
288 | - **Context Integration**: Automatic project and git repository detection
289 | - **Error Recovery**: Graceful fallback when service is unavailable
290 | - **User-Friendly**: Conversational interface following established patterns
291 | 
292 | ### Method 2 (MCP Server) Additional Benefits
293 | - **Deep Integration**: Full MCP protocol support with rich tool interactions
294 | - **Flexible Configuration**: Advanced server configuration options
295 | - **Multi-Server Support**: Seamless integration with other MCP servers
296 | - **Protocol Compliance**: Standard MCP tool-based interactions
297 | 
298 | This integration transforms Claude Code into a powerful knowledge management system with long-term memory capabilities, making your development workflow more efficient and context-aware.
299 | 
```

--------------------------------------------------------------------------------
/tests/test_content_splitting.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Tests for content splitting and backend-specific length limits.
 17 | 
 18 | Tests cover:
 19 | - Content splitting utility functions
 20 | - Backend limit enforcement
 21 | - Automatic chunking with metadata
 22 | - Boundary preservation (sentences, paragraphs, code blocks)
 23 | - Overlap between chunks for context preservation
 24 | """
 25 | 
 26 | import pytest
 27 | from src.mcp_memory_service.utils.content_splitter import (
 28 |     split_content,
 29 |     estimate_chunks_needed,
 30 |     validate_chunk_lengths,
 31 |     _find_best_split_point
 32 | )
 33 | 
 34 | 
 35 | class TestContentSplitter:
 36 |     """Test the content_splitter utility module."""
 37 | 
 38 |     def test_split_short_content(self):
 39 |         """Content shorter than max_length should not be split."""
 40 |         content = "This is a short sentence."
 41 |         chunks = split_content(content, max_length=100)
 42 | 
 43 |         assert len(chunks) == 1
 44 |         assert chunks[0] == content
 45 | 
 46 |     def test_split_long_content_character_mode(self):
 47 |         """Test character-based splitting without boundary preservation."""
 48 |         content = "a" * 500
 49 |         chunks = split_content(content, max_length=100, preserve_boundaries=False, overlap=10)
 50 | 
 51 |         # Should create multiple chunks
 52 |         assert len(chunks) > 1
 53 |         # All chunks should be <= max_length
 54 |         assert all(len(chunk) <= 100 for chunk in chunks)
 55 |         # Should have overlap
 56 |         assert chunks[1].startswith(chunks[0][-10:])
 57 | 
 58 |     def test_split_preserves_paragraphs(self):
 59 |         """Test that paragraph boundaries are preferred for splitting."""
 60 |         content = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
 61 |         chunks = split_content(content, max_length=30, preserve_boundaries=True)
 62 | 
 63 |         # Should split at paragraph boundaries
 64 |         assert len(chunks) >= 2
 65 |         # Each chunk should end cleanly (no mid-paragraph cuts)
 66 |         for chunk in chunks[:-1]:  # Check all but last chunk
 67 |             assert chunk.strip().endswith('.') or '\n\n' in chunk
 68 | 
 69 |     def test_split_preserves_sentences(self):
 70 |         """Test that sentence boundaries are preferred when paragraphs don't fit."""
 71 |         content = "First sentence. Second sentence. Third sentence. Fourth sentence."
 72 |         chunks = split_content(content, max_length=40, preserve_boundaries=True)
 73 | 
 74 |         # Should split at sentence boundaries
 75 |         assert len(chunks) >= 2
 76 |         # Most chunks should end with period
 77 |         period_endings = sum(1 for chunk in chunks if chunk.strip().endswith('.'))
 78 |         assert period_endings >= len(chunks) - 1
 79 | 
 80 |     def test_split_preserves_words(self):
 81 |         """Test that word boundaries are preferred when sentences don't fit."""
 82 |         content = "word1 word2 word3 word4 word5 word6 word7 word8"
 83 |         chunks = split_content(content, max_length=25, preserve_boundaries=True)
 84 | 
 85 |         # Should split at word boundaries
 86 |         assert len(chunks) >= 2
 87 |         # No chunk should end mid-word (except possibly last)
 88 |         for chunk in chunks[:-1]:
 89 |             # Should not end with partial word (will end with space or be complete)
 90 |             assert chunk.endswith(' ') or chunk == chunks[-1]
 91 | 
 92 |     def test_split_overlap(self):
 93 |         """Test that chunks have proper overlap for context."""
 94 |         content = "The quick brown fox jumps over the lazy dog. " * 10
 95 |         chunks = split_content(content, max_length=100, preserve_boundaries=True, overlap=20)
 96 | 
 97 |         assert len(chunks) > 1
 98 |         # Check that consecutive chunks have overlap
 99 |         for i in range(len(chunks) - 1):
100 |             # The next chunk should contain some content from the end of current chunk
101 |             current_end = chunks[i][-20:]
102 |             assert any(word in chunks[i+1] for word in current_end.split()[:3])
103 | 
104 |     def test_estimate_chunks_needed(self):
105 |         """Test chunk estimation function."""
106 |         # Basic cases without overlap
107 |         assert estimate_chunks_needed(0, 100) == 0
108 |         assert estimate_chunks_needed(100, 100) == 1
109 |         assert estimate_chunks_needed(200, 100) == 2
110 |         assert estimate_chunks_needed(250, 100) == 3
111 | 
112 |         # Cases with overlap
113 |         assert estimate_chunks_needed(100, 100, overlap=10) == 1  # Fits in one chunk
114 |         assert estimate_chunks_needed(150, 100, overlap=10) == 2  # First chunk 100, second chunk covers remaining 50
115 |         assert estimate_chunks_needed(200, 100, overlap=50) == 3  # Effective chunk size is 50
116 | 
117 |         # Edge cases
118 |         assert estimate_chunks_needed(100, 100, overlap=100) == 1  # Invalid overlap, fallback to simple division
119 |         assert estimate_chunks_needed(100, 100, overlap=150) == 1  # Invalid overlap larger than max_length
120 | 
121 |     def test_validate_chunk_lengths(self):
122 |         """Test chunk length validation."""
123 |         valid_chunks = ["short", "also short", "still short"]
124 |         invalid_chunks = ["short", "this is way too long for the limit", "short"]
125 | 
126 |         assert validate_chunk_lengths(valid_chunks, max_length=50) is True
127 |         assert validate_chunk_lengths(invalid_chunks, max_length=20) is False
128 | 
129 |     def test_find_best_split_point_paragraph(self):
130 |         """Test that paragraph breaks are prioritized."""
131 |         text = "First para.\n\nSecond para.\n\nThird para."
132 |         split_point = _find_best_split_point(text, max_length=25)
133 | 
134 |         # Should split at first paragraph break
135 |         assert text[split_point-2:split_point] == '\n\n'
136 | 
137 |     def test_find_best_split_point_sentence(self):
138 |         """Test that sentence boundaries are used when no paragraph breaks."""
139 |         text = "First sentence. Second sentence. Third sentence."
140 |         split_point = _find_best_split_point(text, max_length=30)
141 | 
142 |         # Should split at sentence boundary
143 |         assert '. ' in text[:split_point]
144 | 
145 |     def test_split_empty_content(self):
146 |         """Test handling of empty content."""
147 |         chunks = split_content("", max_length=100)
148 |         assert chunks == []
149 | 
150 |     def test_split_exact_length(self):
151 |         """Test content exactly at max_length."""
152 |         content = "a" * 100
153 |         chunks = split_content(content, max_length=100)
154 | 
155 |         assert len(chunks) == 1
156 |         assert chunks[0] == content
157 | 
158 |     def test_split_code_blocks(self):
159 |         """Test that code blocks are handled reasonably."""
160 |         content = """def function_one():
161 |     return True
162 | 
163 | def function_two():
164 |     return False
165 | 
166 | def function_three():
167 |     return None"""
168 | 
169 |         chunks = split_content(content, max_length=60, preserve_boundaries=True)
170 | 
171 |         # Should split at paragraph/function boundaries
172 |         assert len(chunks) >= 2
173 |         # Each chunk should contain complete functions ideally
174 |         for chunk in chunks:
175 |             # Count function definitions
176 |             if 'def ' in chunk:
177 |                 # If it has a def, it should have a return (complete function)
178 |                 assert 'return' in chunk or chunk == chunks[-1]
179 | 
180 | 
181 | class TestBackendLimits:
182 |     """Test backend-specific content length limits."""
183 | 
184 |     def test_cloudflare_limit(self):
185 |         """Test that Cloudflare backend uses config constant."""
186 |         from src.mcp_memory_service.storage.cloudflare import CloudflareStorage
187 |         from src.mcp_memory_service.config import CLOUDFLARE_MAX_CONTENT_LENGTH
188 | 
189 |         # Verify the class constant matches config
190 |         assert CloudflareStorage._MAX_CONTENT_LENGTH == CLOUDFLARE_MAX_CONTENT_LENGTH
191 | 
192 |     def test_sqlitevec_unlimited(self):
193 |         """Test that SQLite-vec backend uses config constant."""
194 |         from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
195 |         from src.mcp_memory_service.config import SQLITEVEC_MAX_CONTENT_LENGTH
196 | 
197 |         # Create a mock instance to check property
198 |         import tempfile
199 |         import os
200 | 
201 |         with tempfile.TemporaryDirectory() as tmpdir:
202 |             db_path = os.path.join(tmpdir, "test.db")
203 |             storage = SqliteVecMemoryStorage(db_path=db_path)
204 | 
205 |             # Should return configured value (default: None/unlimited)
206 |             assert storage.max_content_length == SQLITEVEC_MAX_CONTENT_LENGTH
207 |             assert storage.supports_chunking is True
208 | 
209 |     def test_hybrid_follows_config(self):
210 |         """Test that Hybrid backend uses config constant."""
211 |         from src.mcp_memory_service.storage.hybrid import HybridMemoryStorage
212 |         from src.mcp_memory_service.config import HYBRID_MAX_CONTENT_LENGTH
213 |         import tempfile
214 |         import os
215 | 
216 |         with tempfile.TemporaryDirectory() as tmpdir:
217 |             db_path = os.path.join(tmpdir, "test.db")
218 |             storage = HybridMemoryStorage(
219 |                 sqlite_db_path=db_path,
220 |                 cloudflare_config=None  # No cloud sync for this test
221 |             )
222 | 
223 |             # Should match configured hybrid limit
224 |             assert storage.max_content_length == HYBRID_MAX_CONTENT_LENGTH
225 |             assert storage.supports_chunking is True
226 | 
227 | 
228 | class TestConfigurationConstants:
229 |     """Test configuration constants for content limits."""
230 | 
231 |     def test_config_constants_exist(self):
232 |         """Test that all content limit constants are defined."""
233 |         from src.mcp_memory_service.config import (
234 |             CLOUDFLARE_MAX_CONTENT_LENGTH,
235 |             SQLITEVEC_MAX_CONTENT_LENGTH,
236 |             HYBRID_MAX_CONTENT_LENGTH,
237 |             ENABLE_AUTO_SPLIT,
238 |             CONTENT_SPLIT_OVERLAP,
239 |             CONTENT_PRESERVE_BOUNDARIES
240 |         )
241 | 
242 |         assert CLOUDFLARE_MAX_CONTENT_LENGTH == 800
243 |         assert SQLITEVEC_MAX_CONTENT_LENGTH is None  # Unlimited
244 |         assert HYBRID_MAX_CONTENT_LENGTH == CLOUDFLARE_MAX_CONTENT_LENGTH
245 |         assert isinstance(ENABLE_AUTO_SPLIT, bool)
246 |         assert isinstance(CONTENT_SPLIT_OVERLAP, int)
247 |         assert isinstance(CONTENT_PRESERVE_BOUNDARIES, bool)
248 | 
249 |     def test_config_validation(self):
250 |         """Test that config values are sensible."""
251 |         from src.mcp_memory_service.config import (
252 |             CLOUDFLARE_MAX_CONTENT_LENGTH,
253 |             CONTENT_SPLIT_OVERLAP
254 |         )
255 | 
256 |         # Limits should be positive
257 |         assert CLOUDFLARE_MAX_CONTENT_LENGTH > 0
258 | 
259 |         # Overlap should be reasonable
260 |         assert 0 <= CONTENT_SPLIT_OVERLAP <= 500
261 | 
262 | 
263 | if __name__ == "__main__":
264 |     pytest.main([__file__, "-v"])
265 | 
```

--------------------------------------------------------------------------------
/src/mcp_memory_service/api/client.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Storage client wrapper for code execution interface.
 17 | 
 18 | Provides global storage instance management with lazy initialization
 19 | and automatic connection reuse for optimal performance.
 20 | 
 21 | Design Goals:
 22 |     - Single storage instance per process (avoid redundant connections)
 23 |     - Lazy initialization (create on first use)
 24 |     - Thread-safe access to global instance
 25 |     - Automatic cleanup on process exit
 26 |     - Graceful error handling with fallbacks
 27 | 
 28 | Performance:
 29 |     - First call: ~50ms (initialization + connection)
 30 |     - Subsequent calls: ~0ms (reuses connection)
 31 |     - Memory overhead: ~10MB for embedding model cache
 32 | """
 33 | 
 34 | import asyncio
 35 | import logging
 36 | import os
 37 | from typing import Optional
 38 | from ..storage.base import MemoryStorage
 39 | from ..storage.factory import create_storage_instance
 40 | from ..config import DATABASE_PATH, get_base_directory
 41 | 
 42 | logger = logging.getLogger(__name__)
 43 | 
 44 | # Global storage instance (module-level singleton)
 45 | _storage_instance: Optional[MemoryStorage] = None
 46 | _initialization_lock = asyncio.Lock()
 47 | 
 48 | # Global consolidation instances (set by HTTP server)
 49 | _consolidator_instance: Optional["DreamInspiredConsolidator"] = None
 50 | _scheduler_instance: Optional["ConsolidationScheduler"] = None
 51 | 
 52 | 
 53 | async def _get_storage_async() -> MemoryStorage:
 54 |     """
 55 |     Get or create storage backend instance (async version).
 56 | 
 57 |     This function implements lazy initialization with connection reuse:
 58 |     1. Returns existing instance if available
 59 |     2. Creates new instance if none exists
 60 |     3. Initializes storage backend on first call
 61 |     4. Reuses connection for subsequent calls
 62 | 
 63 |     Thread Safety:
 64 |         Uses asyncio.Lock to prevent race conditions during initialization.
 65 | 
 66 |     Returns:
 67 |         Initialized MemoryStorage instance
 68 | 
 69 |     Raises:
 70 |         RuntimeError: If storage initialization fails
 71 |     """
 72 |     global _storage_instance
 73 | 
 74 |     # Fast path: return existing instance
 75 |     if _storage_instance is not None:
 76 |         return _storage_instance
 77 | 
 78 |     # Slow path: create new instance with lock
 79 |     async with _initialization_lock:
 80 |         # Double-check after acquiring lock (another coroutine may have initialized)
 81 |         if _storage_instance is not None:
 82 |             return _storage_instance
 83 | 
 84 |         try:
 85 |             logger.info("Initializing storage backend for code execution API...")
 86 | 
 87 |             # Determine SQLite database path
 88 |             db_path = DATABASE_PATH
 89 |             if not db_path:
 90 |                 # Fallback to cross-platform default path
 91 |                 base_dir = get_base_directory()
 92 |                 db_path = os.path.join(base_dir, "sqlite_vec.db")
 93 |                 logger.warning(f"DATABASE_PATH not configured, using default: {db_path}")
 94 | 
 95 |             # Ensure database directory exists
 96 |             db_dir = os.path.dirname(db_path)
 97 |             if db_dir and not os.path.exists(db_dir):
 98 |                 os.makedirs(db_dir, exist_ok=True)
 99 |                 logger.info(f"Created database directory: {db_dir}")
100 | 
101 |             # Create and initialize storage instance
102 |             _storage_instance = await create_storage_instance(db_path)
103 | 
104 |             logger.info(f"Storage backend initialized: {type(_storage_instance).__name__}")
105 |             return _storage_instance
106 | 
107 |         except Exception as e:
108 |             logger.error(f"Failed to initialize storage backend: {e}")
109 |             raise RuntimeError(f"Storage initialization failed: {e}") from e
110 | 
111 | 
112 | async def get_storage_async() -> MemoryStorage:
113 |     """
114 |     Get storage backend instance (async version).
115 | 
116 |     This is the internal async version that should be used within
117 |     async contexts. For synchronous contexts, the sync_wrapper
118 |     will handle the event loop management.
119 | 
120 |     Returns:
121 |         Initialized MemoryStorage instance
122 | 
123 |     Raises:
124 |         RuntimeError: If storage initialization fails
125 |     """
126 |     return await _get_storage_async()
127 | 
128 | 
129 | def get_storage() -> MemoryStorage:
130 |     """
131 |     Get storage backend instance (synchronous wrapper).
132 | 
133 |     This is the primary entry point for code execution API operations.
134 |     It wraps the async initialization in a synchronous interface for
135 |     ease of use in non-async contexts.
136 | 
137 |     Connection Reuse:
138 |         - First call: ~50ms (initialization)
139 |         - Subsequent calls: ~0ms (returns cached instance)
140 | 
141 |     Returns:
142 |         Initialized MemoryStorage instance
143 | 
144 |     Raises:
145 |         RuntimeError: If storage initialization fails
146 | 
147 |     Example:
148 |         >>> storage = get_storage()
149 |         >>> # Use storage for operations
150 |         >>> results = await storage.retrieve("query", n_results=5)
151 |     """
152 |     global _storage_instance
153 | 
154 |     # Fast path: if already initialized, return immediately
155 |     if _storage_instance is not None:
156 |         return _storage_instance
157 | 
158 |     # Need to initialize - this requires an event loop
159 |     try:
160 |         # Check if we're already in an async context
161 |         try:
162 |             loop = asyncio.get_running_loop()
163 |             # We're in an async context, but we can't use run_until_complete
164 |             # This shouldn't happen in normal usage, but handle it gracefully
165 |             logger.error("get_storage() called from async context - use get_storage_async() instead")
166 |             raise RuntimeError("get_storage() cannot be called from async context")
167 |         except RuntimeError:
168 |             # No running loop, we can proceed with synchronous initialization
169 |             pass
170 | 
171 |         # Get or create event loop
172 |         try:
173 |             loop = asyncio.get_event_loop()
174 |             if loop.is_closed():
175 |                 loop = asyncio.new_event_loop()
176 |                 asyncio.set_event_loop(loop)
177 |         except RuntimeError:
178 |             loop = asyncio.new_event_loop()
179 |             asyncio.set_event_loop(loop)
180 | 
181 |         # Run async initialization
182 |         storage = loop.run_until_complete(_get_storage_async())
183 |         return storage
184 | 
185 |     except Exception as e:
186 |         logger.error(f"Error getting storage instance: {e}")
187 |         raise
188 | 
189 | 
190 | def close() -> None:
191 |     """
192 |     Close and clean up storage resources.
193 | 
194 |     Explicitly closes the storage backend connection and clears
195 |     the global instance. This ensures proper cleanup when the
196 |     process is terminating or when you want to force a reconnection.
197 | 
198 |     After calling close(), the next call to get_storage() will
199 |     create a fresh connection.
200 | 
201 |     Note:
202 |         If the storage backend has an async close() method, it will
203 |         be scheduled but not awaited. For proper async cleanup, use
204 |         close_async() instead.
205 | 
206 |     Example:
207 |         >>> from mcp_memory_service.api import close
208 |         >>> close()  # Cleanup resources
209 |         >>> # Next get_storage() call will create new connection
210 |     """
211 |     global _storage_instance
212 | 
213 |     if _storage_instance is not None:
214 |         try:
215 |             logger.info("Closing storage instance")
216 |             # Simply clear the instance reference
217 |             # Async cleanup will happen via atexit or explicit close_async()
218 |         except Exception as e:
219 |             logger.warning(f"Error closing storage instance: {e}")
220 |         finally:
221 |             _storage_instance = None
222 | 
223 | 
224 | async def close_async() -> None:
225 |     """
226 |     Close and clean up storage resources (async version).
227 | 
228 |     This is the proper way to close storage backends that have
229 |     async cleanup methods. Use this in async contexts.
230 | 
231 |     Example:
232 |         >>> from mcp_memory_service.api import close_async
233 |         >>> await close_async()  # Proper async cleanup
234 |     """
235 |     global _storage_instance
236 | 
237 |     if _storage_instance is not None:
238 |         try:
239 |             logger.info("Closing storage instance (async)")
240 |             # If storage has an async close method, await it
241 |             if hasattr(_storage_instance, 'close') and callable(_storage_instance.close):
242 |                 close_method = _storage_instance.close()
243 |                 # Check if it's a coroutine
244 |                 if hasattr(close_method, '__await__'):
245 |                     await close_method
246 |         except Exception as e:
247 |             logger.warning(f"Error closing storage instance: {e}")
248 |         finally:
249 |             _storage_instance = None
250 | 
251 | 
252 | def reset_storage() -> None:
253 |     """
254 |     Reset global storage instance.
255 | 
256 |     Useful for testing or when configuration changes require
257 |     reinitializing the storage backend.
258 | 
259 |     Warning:
260 |         This closes the existing connection. Subsequent calls to
261 |         get_storage() will create a new instance.
262 | 
263 |     Example:
264 |         >>> reset_storage()  # Close current connection
265 |         >>> storage = get_storage()  # Creates new connection
266 |     """
267 |     close()  # Reuse the close() method for consistency
268 | 
269 | 
270 | # Cleanup on module exit
271 | import atexit
272 | 
273 | 
274 | def _cleanup_storage():
275 |     """Cleanup storage instance on process exit."""
276 |     global _storage_instance
277 |     if _storage_instance is not None:
278 |         logger.info("Cleaning up storage instance on exit")
279 |         _storage_instance = None
280 | 
281 | 
282 | atexit.register(_cleanup_storage)
283 | 
284 | 
285 | def set_consolidator(consolidator: "DreamInspiredConsolidator") -> None:
286 |     """
287 |     Set global consolidator instance (called by HTTP server).
288 | 
289 |     This allows the API to access the consolidator instance
290 |     that's managed by the HTTP server lifecycle.
291 | 
292 |     Args:
293 |         consolidator: DreamInspiredConsolidator instance
294 |     """
295 |     global _consolidator_instance
296 |     _consolidator_instance = consolidator
297 |     logger.info("Global consolidator instance set")
298 | 
299 | 
300 | def set_scheduler(scheduler: "ConsolidationScheduler") -> None:
301 |     """
302 |     Set global scheduler instance (called by HTTP server).
303 | 
304 |     This allows the API to access the scheduler instance
305 |     that's managed by the HTTP server lifecycle.
306 | 
307 |     Args:
308 |         scheduler: ConsolidationScheduler instance
309 |     """
310 |     global _scheduler_instance
311 |     _scheduler_instance = scheduler
312 |     logger.info("Global scheduler instance set")
313 | 
314 | 
315 | def get_consolidator() -> Optional["DreamInspiredConsolidator"]:
316 |     """
317 |     Get global consolidator instance.
318 | 
319 |     Returns:
320 |         DreamInspiredConsolidator instance or None if not set
321 |     """
322 |     return _consolidator_instance
323 | 
324 | 
325 | def get_scheduler() -> Optional["ConsolidationScheduler"]:
326 |     """
327 |     Get global scheduler instance.
328 | 
329 |     Returns:
330 |         ConsolidationScheduler instance or None if not set
331 |     """
332 |     return _scheduler_instance
333 | 
```

--------------------------------------------------------------------------------
/claude-hooks/memory-mode-controller.js:
--------------------------------------------------------------------------------

```javascript
  1 | #!/usr/bin/env node
  2 | 
  3 | /**
  4 |  * Memory Mode Controller
  5 |  * Command-line utility for managing memory hook performance profiles
  6 |  */
  7 | 
  8 | const fs = require('fs').promises;
  9 | const path = require('path');
 10 | 
 11 | class MemoryModeController {
 12 |     constructor(configPath = null) {
 13 |         this.configPath = configPath || path.join(__dirname, 'config.json');
 14 |     }
 15 | 
 16 |     /**
 17 |      * Load current configuration
 18 |      */
 19 |     async loadConfig() {
 20 |         try {
 21 |             const configData = await fs.readFile(this.configPath, 'utf8');
 22 |             return JSON.parse(configData);
 23 |         } catch (error) {
 24 |             throw new Error(`Failed to load config: ${error.message}`);
 25 |         }
 26 |     }
 27 | 
 28 |     /**
 29 |      * Save configuration
 30 |      */
 31 |     async saveConfig(config) {
 32 |         try {
 33 |             await fs.writeFile(this.configPath, JSON.stringify(config, null, 2));
 34 |         } catch (error) {
 35 |             throw new Error(`Failed to save config: ${error.message}`);
 36 |         }
 37 |     }
 38 | 
 39 |     /**
 40 |      * Switch to a performance profile
 41 |      */
 42 |     async switchProfile(profileName) {
 43 |         const config = await this.loadConfig();
 44 | 
 45 |         if (!config.performance?.profiles[profileName]) {
 46 |             throw new Error(`Unknown profile: ${profileName}. Available profiles: ${Object.keys(config.performance?.profiles || {}).join(', ')}`);
 47 |         }
 48 | 
 49 |         config.performance.defaultProfile = profileName;
 50 |         await this.saveConfig(config);
 51 | 
 52 |         const profile = config.performance.profiles[profileName];
 53 |         console.log(`✅ Switched to profile: ${profileName}`);
 54 |         console.log(`📊 Description: ${profile.description}`);
 55 |         console.log(`⚡ Max Latency: ${profile.maxLatency || 'adaptive'}ms`);
 56 |         console.log(`🎯 Enabled Tiers: ${profile.enabledTiers?.join(', ') || 'adaptive'}`);
 57 |         console.log(`🔄 Background Processing: ${profile.backgroundProcessing ? 'enabled' : 'disabled'}`);
 58 | 
 59 |         return profile;
 60 |     }
 61 | 
 62 |     /**
 63 |      * Get current status
 64 |      */
 65 |     async getStatus() {
 66 |         const config = await this.loadConfig();
 67 |         const currentProfile = config.performance?.defaultProfile || 'balanced';
 68 |         const profile = config.performance?.profiles[currentProfile];
 69 | 
 70 |         console.log('📊 Memory Hook Status');
 71 |         console.log('═'.repeat(50));
 72 |         console.log(`Current Profile: ${currentProfile}`);
 73 |         console.log(`Description: ${profile?.description || 'No description'}`);
 74 |         console.log(`Natural Triggers: ${config.naturalTriggers?.enabled ? 'enabled' : 'disabled'}`);
 75 |         console.log(`Sensitivity: ${config.patternDetector?.sensitivity || 0.7}`);
 76 |         console.log(`Trigger Threshold: ${config.naturalTriggers?.triggerThreshold || 0.6}`);
 77 |         console.log(`Cooldown Period: ${(config.naturalTriggers?.cooldownPeriod || 30000) / 1000}s`);
 78 | 
 79 |         if (profile) {
 80 |             console.log('\n🎯 Performance Settings');
 81 |             console.log('─'.repeat(30));
 82 |             console.log(`Max Latency: ${profile.maxLatency || 'adaptive'}ms`);
 83 |             console.log(`Enabled Tiers: ${profile.enabledTiers?.join(', ') || 'adaptive'}`);
 84 |             console.log(`Background Processing: ${profile.backgroundProcessing ? 'enabled' : 'disabled'}`);
 85 |             console.log(`Degrade Threshold: ${profile.degradeThreshold || 'adaptive'}ms`);
 86 |         }
 87 | 
 88 |         console.log('\n🔧 Available Profiles');
 89 |         console.log('─'.repeat(30));
 90 |         for (const [name, prof] of Object.entries(config.performance?.profiles || {})) {
 91 |             const current = name === currentProfile ? ' (current)' : '';
 92 |             console.log(`${name}${current}: ${prof.description}`);
 93 |         }
 94 | 
 95 |         return {
 96 |             currentProfile,
 97 |             config: config.performance,
 98 |             naturalTriggers: config.naturalTriggers
 99 |         };
100 |     }
101 | 
102 |     /**
103 |      * Update sensitivity
104 |      */
105 |     async updateSensitivity(sensitivity) {
106 |         const config = await this.loadConfig();
107 | 
108 |         if (sensitivity < 0 || sensitivity > 1) {
109 |             throw new Error('Sensitivity must be between 0 and 1');
110 |         }
111 | 
112 |         if (!config.patternDetector) {
113 |             config.patternDetector = {};
114 |         }
115 | 
116 |         config.patternDetector.sensitivity = sensitivity;
117 |         await this.saveConfig(config);
118 | 
119 |         console.log(`✅ Updated sensitivity to ${sensitivity}`);
120 |         return sensitivity;
121 |     }
122 | 
123 |     /**
124 |      * Update trigger threshold
125 |      */
126 |     async updateThreshold(threshold) {
127 |         const config = await this.loadConfig();
128 | 
129 |         if (threshold < 0 || threshold > 1) {
130 |             throw new Error('Threshold must be between 0 and 1');
131 |         }
132 | 
133 |         if (!config.naturalTriggers) {
134 |             config.naturalTriggers = {};
135 |         }
136 | 
137 |         config.naturalTriggers.triggerThreshold = threshold;
138 |         await this.saveConfig(config);
139 | 
140 |         console.log(`✅ Updated trigger threshold to ${threshold}`);
141 |         return threshold;
142 |     }
143 | 
144 |     /**
145 |      * Enable or disable natural triggers
146 |      */
147 |     async toggleNaturalTriggers(enabled = null) {
148 |         const config = await this.loadConfig();
149 | 
150 |         if (!config.naturalTriggers) {
151 |             config.naturalTriggers = {};
152 |         }
153 | 
154 |         if (enabled === null) {
155 |             enabled = !config.naturalTriggers.enabled;
156 |         }
157 | 
158 |         config.naturalTriggers.enabled = enabled;
159 |         await this.saveConfig(config);
160 | 
161 |         console.log(`✅ Natural triggers ${enabled ? 'enabled' : 'disabled'}`);
162 |         return enabled;
163 |     }
164 | 
165 |     /**
166 |      * Reset to default configuration
167 |      */
168 |     async resetToDefaults() {
169 |         const config = await this.loadConfig();
170 | 
171 |         config.performance.defaultProfile = 'balanced';
172 |         config.naturalTriggers = {
173 |             enabled: true,
174 |             triggerThreshold: 0.6,
175 |             cooldownPeriod: 30000,
176 |             maxMemoriesPerTrigger: 5
177 |         };
178 | 
179 |         // Pattern detector defaults
180 |         if (!config.patternDetector) {
181 |             config.patternDetector = {};
182 |         }
183 |         config.patternDetector.sensitivity = 0.7;
184 |         config.patternDetector.adaptiveLearning = true;
185 | 
186 |         await this.saveConfig(config);
187 |         console.log('✅ Reset to default configuration');
188 |         return config;
189 |     }
190 | 
191 |     /**
192 |      * Get performance profiles information
193 |      */
194 |     async listProfiles() {
195 |         const config = await this.loadConfig();
196 |         const profiles = config.performance?.profiles || {};
197 | 
198 |         console.log('📋 Available Performance Profiles');
199 |         console.log('═'.repeat(60));
200 | 
201 |         for (const [name, profile] of Object.entries(profiles)) {
202 |             const current = name === config.performance?.defaultProfile ? ' ⭐' : '';
203 |             console.log(`\n${name}${current}`);
204 |             console.log(`  Description: ${profile.description}`);
205 |             console.log(`  Max Latency: ${profile.maxLatency || 'adaptive'}ms`);
206 |             console.log(`  Enabled Tiers: ${profile.enabledTiers?.join(', ') || 'adaptive'}`);
207 |             console.log(`  Background Processing: ${profile.backgroundProcessing ? 'yes' : 'no'}`);
208 |         }
209 | 
210 |         return profiles;
211 |     }
212 | }
213 | 
214 | /**
215 |  * Command-line interface
216 |  */
217 | async function main() {
218 |     const args = process.argv.slice(2);
219 |     const controller = new MemoryModeController();
220 | 
221 |     try {
222 |         if (args.length === 0 || args[0] === 'status') {
223 |             await controller.getStatus();
224 |             return;
225 |         }
226 | 
227 |         const command = args[0];
228 | 
229 |         switch (command) {
230 |             case 'switch':
231 |             case 'profile':
232 |                 if (!args[1]) {
233 |                     console.error('❌ Please specify a profile name');
234 |                     console.log('Available profiles: speed_focused, balanced, memory_aware, adaptive');
235 |                     process.exit(1);
236 |                 }
237 |                 await controller.switchProfile(args[1]);
238 |                 break;
239 | 
240 |             case 'sensitivity':
241 |                 if (!args[1]) {
242 |                     console.error('❌ Please specify sensitivity value (0-1)');
243 |                     process.exit(1);
244 |                 }
245 |                 const sensitivity = parseFloat(args[1]);
246 |                 await controller.updateSensitivity(sensitivity);
247 |                 break;
248 | 
249 |             case 'threshold':
250 |                 if (!args[1]) {
251 |                     console.error('❌ Please specify threshold value (0-1)');
252 |                     process.exit(1);
253 |                 }
254 |                 const threshold = parseFloat(args[1]);
255 |                 await controller.updateThreshold(threshold);
256 |                 break;
257 | 
258 |             case 'enable':
259 |                 await controller.toggleNaturalTriggers(true);
260 |                 break;
261 | 
262 |             case 'disable':
263 |                 await controller.toggleNaturalTriggers(false);
264 |                 break;
265 | 
266 |             case 'toggle':
267 |                 await controller.toggleNaturalTriggers();
268 |                 break;
269 | 
270 |             case 'reset':
271 |                 await controller.resetToDefaults();
272 |                 break;
273 | 
274 |             case 'list':
275 |             case 'profiles':
276 |                 await controller.listProfiles();
277 |                 break;
278 | 
279 |             case 'help':
280 |             case '-h':
281 |             case '--help':
282 |                 showHelp();
283 |                 break;
284 | 
285 |             default:
286 |                 console.error(`❌ Unknown command: ${command}`);
287 |                 showHelp();
288 |                 process.exit(1);
289 |         }
290 | 
291 |     } catch (error) {
292 |         console.error(`❌ Error: ${error.message}`);
293 |         process.exit(1);
294 |     }
295 | }
296 | 
297 | function showHelp() {
298 |     console.log(`
299 | 🧠 Memory Mode Controller
300 | 
301 | Usage: node memory-mode-controller.js <command> [options]
302 | 
303 | Commands:
304 |   status                    Show current configuration and status
305 |   profile <name>           Switch to performance profile
306 |   sensitivity <0-1>        Set pattern detection sensitivity
307 |   threshold <0-1>          Set trigger threshold
308 |   enable                   Enable natural triggers
309 |   disable                  Disable natural triggers
310 |   toggle                   Toggle natural triggers on/off
311 |   reset                    Reset to default configuration
312 |   list                     List available profiles
313 |   help                     Show this help message
314 | 
315 | Performance Profiles:
316 |   speed_focused           Fastest response, minimal memory (< 100ms)
317 |   balanced               Moderate latency, smart triggers (< 200ms)
318 |   memory_aware           Full awareness, accept latency (< 500ms)
319 |   adaptive               Auto-adjust based on usage patterns
320 | 
321 | Examples:
322 |   node memory-mode-controller.js status
323 |   node memory-mode-controller.js profile balanced
324 |   node memory-mode-controller.js sensitivity 0.8
325 |   node memory-mode-controller.js disable
326 | `);
327 | }
328 | 
329 | // Run if called directly
330 | if (require.main === module) {
331 |     main().catch(error => {
332 |         console.error('❌ Fatal error:', error.message);
333 |         process.exit(1);
334 |     });
335 | }
336 | 
337 | module.exports = { MemoryModeController };
```

--------------------------------------------------------------------------------
/tests/unit/test_semtools_loader.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Unit tests for Semtools document loader.
  4 | """
  5 | 
  6 | import pytest
  7 | import asyncio
  8 | from pathlib import Path
  9 | from unittest.mock import Mock, patch, AsyncMock, MagicMock
 10 | import shutil
 11 | 
 12 | from mcp_memory_service.ingestion.semtools_loader import SemtoolsLoader
 13 | from mcp_memory_service.ingestion.base import DocumentChunk
 14 | 
 15 | 
 16 | class TestSemtoolsLoader:
 17 |     """Test suite for SemtoolsLoader class."""
 18 | 
 19 |     def test_initialization(self):
 20 |         """Test basic initialization of SemtoolsLoader."""
 21 |         loader = SemtoolsLoader(chunk_size=500, chunk_overlap=50)
 22 | 
 23 |         assert loader.chunk_size == 500
 24 |         assert loader.chunk_overlap == 50
 25 |         assert 'pdf' in loader.supported_extensions
 26 |         assert 'docx' in loader.supported_extensions
 27 |         assert 'pptx' in loader.supported_extensions
 28 | 
 29 |     @patch('shutil.which')
 30 |     def test_semtools_availability_check(self, mock_which):
 31 |         """Test detection of semtools availability."""
 32 |         # Test when semtools is available
 33 |         mock_which.return_value = '/usr/local/bin/semtools'
 34 |         loader = SemtoolsLoader()
 35 |         assert loader._semtools_available is True
 36 | 
 37 |         # Test when semtools is not available
 38 |         mock_which.return_value = None
 39 |         loader = SemtoolsLoader()
 40 |         assert loader._semtools_available is False
 41 | 
 42 |     @patch('shutil.which')
 43 |     def test_can_handle_file(self, mock_which):
 44 |         """Test file format detection."""
 45 |         mock_which.return_value = '/usr/local/bin/semtools'
 46 |         loader = SemtoolsLoader()
 47 | 
 48 |         # Create temporary test files
 49 |         import tempfile
 50 |         with tempfile.TemporaryDirectory() as tmpdir:
 51 |             pdf_file = Path(tmpdir) / "test.pdf"
 52 |             pdf_file.touch()
 53 | 
 54 |             docx_file = Path(tmpdir) / "test.docx"
 55 |             docx_file.touch()
 56 | 
 57 |             txt_file = Path(tmpdir) / "test.txt"
 58 |             txt_file.touch()
 59 | 
 60 |             # Test supported formats
 61 |             assert loader.can_handle(pdf_file) is True
 62 |             assert loader.can_handle(docx_file) is True
 63 | 
 64 |             # Test unsupported formats
 65 |             assert loader.can_handle(txt_file) is False
 66 | 
 67 |     @patch('shutil.which')
 68 |     def test_can_handle_returns_false_when_semtools_unavailable(self, mock_which):
 69 |         """Test that can_handle returns False when semtools is not installed."""
 70 |         mock_which.return_value = None
 71 |         loader = SemtoolsLoader()
 72 | 
 73 |         import tempfile
 74 |         with tempfile.TemporaryDirectory() as tmpdir:
 75 |             pdf_file = Path(tmpdir) / "test.pdf"
 76 |             pdf_file.touch()
 77 | 
 78 |             # Should return False even for supported format when semtools unavailable
 79 |             assert loader.can_handle(pdf_file) is False
 80 | 
 81 |     @pytest.mark.asyncio
 82 |     @patch('shutil.which')
 83 |     async def test_extract_chunks_semtools_unavailable(self, mock_which):
 84 |         """Test that extract_chunks raises error when semtools is unavailable."""
 85 |         mock_which.return_value = None
 86 |         loader = SemtoolsLoader()
 87 | 
 88 |         import tempfile
 89 |         with tempfile.TemporaryDirectory() as tmpdir:
 90 |             pdf_file = Path(tmpdir) / "test.pdf"
 91 |             pdf_file.write_text("dummy content")
 92 | 
 93 |             # When semtools is unavailable, validate_file will fail first
 94 |             with pytest.raises(ValueError, match="File format not supported"):
 95 |                 async for chunk in loader.extract_chunks(pdf_file):
 96 |                     pass
 97 | 
 98 |     @pytest.mark.asyncio
 99 |     @patch('mcp_memory_service.ingestion.semtools_loader.SemtoolsLoader._check_semtools_availability')
100 |     @patch('asyncio.create_subprocess_exec')
101 |     async def test_extract_chunks_success(self, mock_subprocess, mock_check_semtools):
102 |         """Test successful document extraction with semtools."""
103 |         # Force semtools to be "available"
104 |         mock_check_semtools.return_value = True
105 | 
106 |         # Mock successful semtools execution with sufficient content to create chunks
107 |         mock_content = b"# Document Title\n\n" + b"This is a test document with enough content to create chunks. " * 10
108 |         mock_process = AsyncMock()
109 |         mock_process.returncode = 0
110 |         mock_process.communicate = AsyncMock(
111 |             return_value=(mock_content, b"")
112 |         )
113 |         mock_subprocess.return_value = mock_process
114 | 
115 |         loader = SemtoolsLoader(chunk_size=200, chunk_overlap=50)
116 |         loader._semtools_available = True  # Override
117 | 
118 |         import tempfile
119 |         with tempfile.TemporaryDirectory() as tmpdir:
120 |             pdf_file = Path(tmpdir) / "test.pdf"
121 |             pdf_file.write_text("dummy content")
122 | 
123 |             chunks = []
124 |             async for chunk in loader.extract_chunks(pdf_file):
125 |                 chunks.append(chunk)
126 | 
127 |             # Verify chunks were created
128 |             assert len(chunks) > 0
129 | 
130 |             # Verify chunk structure
131 |             first_chunk = chunks[0]
132 |             assert isinstance(first_chunk, DocumentChunk)
133 |             assert isinstance(first_chunk.content, str)
134 |             assert first_chunk.metadata['extraction_method'] == 'semtools'
135 |             assert first_chunk.metadata['parser_backend'] == 'llamaparse'
136 |             assert first_chunk.source_file == pdf_file
137 | 
138 |     @pytest.mark.asyncio
139 |     @patch('mcp_memory_service.ingestion.semtools_loader.SemtoolsLoader._check_semtools_availability')
140 |     @patch('asyncio.create_subprocess_exec')
141 |     @patch.dict('os.environ', {'LLAMAPARSE_API_KEY': 'test-api-key'})
142 |     async def test_extract_chunks_with_api_key(self, mock_subprocess, mock_check_semtools):
143 |         """Test that API key is passed to semtools when available."""
144 |         mock_check_semtools.return_value = True
145 | 
146 |         # Mock with sufficient content to create chunks
147 |         mock_content = b"# Content\n\n" + b"This document has enough content to create chunks. " * 10
148 |         mock_process = AsyncMock()
149 |         mock_process.returncode = 0
150 |         mock_process.communicate = AsyncMock(
151 |             return_value=(mock_content, b"")
152 |         )
153 |         mock_subprocess.return_value = mock_process
154 | 
155 |         # Create loader with API key
156 |         loader = SemtoolsLoader()
157 |         loader._semtools_available = True  # Override
158 | 
159 |         import tempfile
160 |         with tempfile.TemporaryDirectory() as tmpdir:
161 |             pdf_file = Path(tmpdir) / "test.pdf"
162 |             pdf_file.write_text("dummy content")
163 | 
164 |             chunks = []
165 |             async for chunk in loader.extract_chunks(pdf_file):
166 |                 chunks.append(chunk)
167 | 
168 |             # Verify chunks were created and API key was recognized
169 |             assert len(chunks) > 0
170 |             assert chunks[0].metadata['has_api_key'] is True
171 | 
172 |     @pytest.mark.asyncio
173 |     @patch('shutil.which')
174 |     @patch('asyncio.create_subprocess_exec')
175 |     async def test_extract_chunks_semtools_error(self, mock_subprocess, mock_which):
176 |         """Test handling of semtools execution errors."""
177 |         mock_which.return_value = '/usr/local/bin/semtools'
178 | 
179 |         # Mock failed semtools execution
180 |         mock_process = AsyncMock()
181 |         mock_process.returncode = 1
182 |         mock_process.communicate = AsyncMock(
183 |             return_value=(b"", b"Error: Failed to parse document")
184 |         )
185 |         mock_subprocess.return_value = mock_process
186 | 
187 |         loader = SemtoolsLoader()
188 | 
189 |         import tempfile
190 |         with tempfile.TemporaryDirectory() as tmpdir:
191 |             pdf_file = Path(tmpdir) / "test.pdf"
192 |             pdf_file.write_text("dummy content")
193 | 
194 |             with pytest.raises(ValueError, match="Failed to parse document"):
195 |                 async for chunk in loader.extract_chunks(pdf_file):
196 |                     pass
197 | 
198 |     @pytest.mark.asyncio
199 |     @patch('shutil.which')
200 |     @patch('asyncio.create_subprocess_exec')
201 |     @patch('asyncio.wait_for')
202 |     async def test_extract_chunks_timeout(self, mock_wait_for, mock_subprocess, mock_which):
203 |         """Test handling of semtools timeout."""
204 |         mock_which.return_value = '/usr/local/bin/semtools'
205 | 
206 |         # Mock timeout scenario
207 |         mock_wait_for.side_effect = asyncio.TimeoutError()
208 | 
209 |         loader = SemtoolsLoader()
210 | 
211 |         import tempfile
212 |         with tempfile.TemporaryDirectory() as tmpdir:
213 |             pdf_file = Path(tmpdir) / "test.pdf"
214 |             pdf_file.write_text("dummy content")
215 | 
216 |             with pytest.raises(ValueError, match="timed out|Failed to parse"):
217 |                 async for chunk in loader.extract_chunks(pdf_file):
218 |                     pass
219 | 
220 |     @pytest.mark.asyncio
221 |     @patch('shutil.which')
222 |     @patch('asyncio.create_subprocess_exec')
223 |     async def test_extract_chunks_empty_content(self, mock_subprocess, mock_which):
224 |         """Test handling of empty content from semtools."""
225 |         mock_which.return_value = '/usr/local/bin/semtools'
226 | 
227 |         # Mock empty output
228 |         mock_process = AsyncMock()
229 |         mock_process.returncode = 0
230 |         mock_process.communicate = AsyncMock(
231 |             return_value=(b"", b"")  # Empty stdout
232 |         )
233 |         mock_subprocess.return_value = mock_process
234 | 
235 |         loader = SemtoolsLoader()
236 | 
237 |         import tempfile
238 |         with tempfile.TemporaryDirectory() as tmpdir:
239 |             pdf_file = Path(tmpdir) / "test.pdf"
240 |             pdf_file.write_text("dummy content")
241 | 
242 |             with pytest.raises(ValueError, match="empty content|Failed to parse"):
243 |                 async for chunk in loader.extract_chunks(pdf_file):
244 |                     pass
245 | 
246 | 
247 | class TestSemtoolsLoaderRegistry:
248 |     """Test semtools loader registration."""
249 | 
250 |     @patch('shutil.which')
251 |     def test_loader_registration_with_semtools(self, mock_which):
252 |         """Test that semtools loader is registered when available."""
253 |         mock_which.return_value = '/usr/local/bin/semtools'
254 | 
255 |         from mcp_memory_service.ingestion.registry import get_loader_for_file
256 | 
257 |         import tempfile
258 |         with tempfile.TemporaryDirectory() as tmpdir:
259 |             # Test DOCX file (semtools-only format)
260 |             docx_file = Path(tmpdir) / "test.docx"
261 |             docx_file.touch()
262 | 
263 |             loader = get_loader_for_file(docx_file)
264 | 
265 |             # Should get SemtoolsLoader when semtools is available
266 |             assert loader is not None
267 |             assert isinstance(loader, SemtoolsLoader)
268 | 
269 |     @patch('shutil.which')
270 |     def test_loader_registration_without_semtools(self, mock_which):
271 |         """Test that docx files return None when semtools unavailable."""
272 |         mock_which.return_value = None
273 | 
274 |         from mcp_memory_service.ingestion.registry import get_loader_for_file
275 | 
276 |         import tempfile
277 |         with tempfile.TemporaryDirectory() as tmpdir:
278 |             # Test DOCX file (semtools-only format)
279 |             docx_file = Path(tmpdir) / "test.docx"
280 |             docx_file.touch()
281 | 
282 |             loader = get_loader_for_file(docx_file)
283 | 
284 |             # Should return None when semtools is not available
285 |             assert loader is None
286 | 
287 | 
288 | if __name__ == '__main__':
289 |     pytest.main([__file__, '-v'])
290 | 
```

--------------------------------------------------------------------------------
/scripts/database/db_health_check.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Comprehensive Database Health Check for MCP Memory Service SQLite-vec Backend
  4 | """
  5 | 
  6 | import asyncio
  7 | import os
  8 | import sys
  9 | import tempfile
 10 | import time
 11 | from pathlib import Path
 12 | 
 13 | # Add src to path
 14 | sys.path.insert(0, 'src')
 15 | 
 16 | # Set environment for sqlite-vec
 17 | os.environ['MCP_MEMORY_STORAGE_BACKEND'] = 'sqlite_vec'
 18 | 
 19 | class HealthChecker:
 20 |     def __init__(self):
 21 |         self.results = []
 22 |         self.errors = []
 23 |     
 24 |     async def test(self, name: str, func):
 25 |         """Run a test and record results."""
 26 |         print(f"🔍 {name}...")
 27 |         try:
 28 |             start_time = time.time()
 29 |             
 30 |             if asyncio.iscoroutinefunction(func):
 31 |                 result = await func()
 32 |             else:
 33 |                 result = func()
 34 |             
 35 |             duration = time.time() - start_time
 36 |             
 37 |             if result:
 38 |                 print(f"   ✅ PASS ({duration:.2f}s)")
 39 |                 self.results.append((name, "PASS", duration))
 40 |             else:
 41 |                 print(f"   ❌ FAIL ({duration:.2f}s)")
 42 |                 self.results.append((name, "FAIL", duration))
 43 |                 
 44 |         except Exception as e:
 45 |             duration = time.time() - start_time
 46 |             print(f"   ❌ ERROR ({duration:.2f}s): {str(e)}")
 47 |             self.results.append((name, "ERROR", duration))
 48 |             self.errors.append((name, str(e)))
 49 |     
 50 |     def test_imports(self):
 51 |         """Test all necessary imports."""
 52 |         try:
 53 |             import sqlite_vec
 54 |             from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 55 |             from src.mcp_memory_service.models.memory import Memory
 56 |             from src.mcp_memory_service.utils.hashing import generate_content_hash
 57 |             return True
 58 |         except ImportError as e:
 59 |             print(f"      Import error: {e}")
 60 |             return False
 61 |     
 62 |     async def test_database_creation(self):
 63 |         """Test database creation and initialization."""
 64 |         temp_dir = tempfile.mkdtemp()
 65 |         db_path = os.path.join(temp_dir, "health_check.db")
 66 |         
 67 |         try:
 68 |             from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 69 |             storage = SqliteVecMemoryStorage(db_path)
 70 |             await storage.initialize()
 71 |             
 72 |             # Check tables exist
 73 |             cursor = storage.conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
 74 |             tables = [row[0] for row in cursor.fetchall()]
 75 |             
 76 |             expected_tables = ['memories', 'memory_embeddings']
 77 |             for table in expected_tables:
 78 |                 if table not in tables:
 79 |                     print(f"      Missing table: {table}")
 80 |                     return False
 81 |             
 82 |             storage.close()
 83 |             os.remove(db_path)
 84 |             os.rmdir(temp_dir)
 85 |             return True
 86 |             
 87 |         except Exception as e:
 88 |             print(f"      Database creation error: {e}")
 89 |             return False
 90 |     
 91 |     async def test_memory_operations(self):
 92 |         """Test core memory operations."""
 93 |         temp_dir = tempfile.mkdtemp()
 94 |         db_path = os.path.join(temp_dir, "operations_test.db")
 95 |         
 96 |         try:
 97 |             from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 98 |             from src.mcp_memory_service.models.memory import Memory
 99 |             from src.mcp_memory_service.utils.hashing import generate_content_hash
100 |             
101 |             storage = SqliteVecMemoryStorage(db_path)
102 |             await storage.initialize()
103 |             
104 |             # Test store
105 |             content = "Health check test memory"
106 |             memory = Memory(
107 |                 content=content,
108 |                 content_hash=generate_content_hash(content),
109 |                 tags=["health", "check"],
110 |                 memory_type="test"
111 |             )
112 |             
113 |             success, message = await storage.store(memory)
114 |             if not success:
115 |                 print(f"      Store failed: {message}")
116 |                 return False
117 |             
118 |             # Test retrieve
119 |             results = await storage.retrieve("health check", n_results=1)
120 |             if not results:
121 |                 print("      Retrieve failed: No results")
122 |                 return False
123 |             
124 |             # Test tag search
125 |             tag_results = await storage.search_by_tag(["health"])
126 |             if not tag_results:
127 |                 print("      Tag search failed: No results")
128 |                 return False
129 |             
130 |             # Test delete
131 |             success, message = await storage.delete(memory.content_hash)
132 |             if not success:
133 |                 print(f"      Delete failed: {message}")
134 |                 return False
135 |             
136 |             storage.close()
137 |             os.remove(db_path)
138 |             os.rmdir(temp_dir)
139 |             return True
140 |             
141 |         except Exception as e:
142 |             print(f"      Memory operations error: {e}")
143 |             return False
144 |     
145 |     async def test_vector_search(self):
146 |         """Test vector similarity search functionality."""
147 |         temp_dir = tempfile.mkdtemp()
148 |         db_path = os.path.join(temp_dir, "vector_test.db")
149 |         
150 |         try:
151 |             from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
152 |             from src.mcp_memory_service.models.memory import Memory
153 |             from src.mcp_memory_service.utils.hashing import generate_content_hash
154 |             
155 |             storage = SqliteVecMemoryStorage(db_path)
156 |             await storage.initialize()
157 |             
158 |             # Store multiple memories
159 |             test_memories = [
160 |                 "Python programming is fun",
161 |                 "JavaScript development tools",
162 |                 "Database design patterns"
163 |             ]
164 |             
165 |             for content in test_memories:
166 |                 memory = Memory(
167 |                     content=content,
168 |                     content_hash=generate_content_hash(content),
169 |                     tags=["programming"],
170 |                     memory_type="test"
171 |                 )
172 |                 await storage.store(memory)
173 |             
174 |             # Test vector search
175 |             results = await storage.retrieve("programming languages", n_results=3)
176 |             if len(results) != 3:
177 |                 print(f"      Expected 3 results, got {len(results)}")
178 |                 return False
179 |             
180 |             # Check relevance scores
181 |             for result in results:
182 |                 if result.relevance_score < 0:
183 |                     print(f"      Invalid relevance score: {result.relevance_score}")
184 |                     return False
185 |             
186 |             storage.close()
187 |             os.remove(db_path)
188 |             os.rmdir(temp_dir)
189 |             return True
190 |             
191 |         except Exception as e:
192 |             print(f"      Vector search error: {e}")
193 |             return False
194 |     
195 |     def test_environment(self):
196 |         """Test environment configuration."""
197 |         required_vars = {
198 |             'MCP_MEMORY_STORAGE_BACKEND': 'sqlite_vec'
199 |         }
200 |         
201 |         for var, expected in required_vars.items():
202 |             actual = os.getenv(var)
203 |             if actual != expected:
204 |                 print(f"      Environment variable {var}: expected '{expected}', got '{actual}'")
205 |                 return False
206 |         
207 |         return True
208 |     
209 |     def test_dependencies(self):
210 |         """Test that all dependencies are available."""
211 |         try:
212 |             import sqlite_vec
213 |             version = getattr(sqlite_vec, '__version__', 'unknown')
214 |             print(f"      sqlite-vec version: {version}")
215 |             
216 |             import sentence_transformers
217 |             print(f"      sentence-transformers available")
218 |             
219 |             import torch
220 |             print(f"      torch available")
221 |             
222 |             return True
223 |         except ImportError as e:
224 |             print(f"      Dependency error: {e}")
225 |             return False
226 |     
227 |     def test_database_path(self):
228 |         """Test database path accessibility."""
229 |         home = str(Path.home())
230 |         db_path = os.path.join(home, '.local', 'share', 'mcp-memory', 'sqlite_vec.db')
231 |         db_dir = os.path.dirname(db_path)
232 |         
233 |         try:
234 |             # Ensure directory exists
235 |             os.makedirs(db_dir, exist_ok=True)
236 |             
237 |             # Test write permission
238 |             test_file = os.path.join(db_dir, '.write_test')
239 |             with open(test_file, 'w') as f:
240 |                 f.write('test')
241 |             os.remove(test_file)
242 |             
243 |             print(f"      Database path: {db_path}")
244 |             print(f"      Directory writable: ✅")
245 |             return True
246 |             
247 |         except Exception as e:
248 |             print(f"      Database path error: {e}")
249 |             return False
250 | 
251 | async def main():
252 |     """Main health check function."""
253 |     print("=" * 60)
254 |     print("🏥 MCP Memory Service - SQLite-vec Database Health Check")
255 |     print("=" * 60)
256 |     
257 |     checker = HealthChecker()
258 |     
259 |     # Run all tests
260 |     await checker.test("Environment Configuration", checker.test_environment)
261 |     await checker.test("Dependencies Check", checker.test_dependencies)
262 |     await checker.test("Import Tests", checker.test_imports)
263 |     await checker.test("Database Path", checker.test_database_path)
264 |     await checker.test("Database Creation", checker.test_database_creation)
265 |     await checker.test("Memory Operations", checker.test_memory_operations)
266 |     await checker.test("Vector Search", checker.test_vector_search)
267 |     
268 |     # Summary
269 |     print("\n" + "=" * 60)
270 |     print("📊 Health Check Summary")
271 |     print("=" * 60)
272 |     
273 |     passed = sum(1 for _, status, _ in checker.results if status == "PASS")
274 |     failed = sum(1 for _, status, _ in checker.results if status in ["FAIL", "ERROR"])
275 |     total = len(checker.results)
276 |     
277 |     for name, status, duration in checker.results:
278 |         status_icon = "✅" if status == "PASS" else "❌"
279 |         print(f"{status_icon} {name}: {status} ({duration:.2f}s)")
280 |     
281 |     print(f"\nResults: {passed}/{total} tests passed")
282 |     
283 |     if checker.errors:
284 |         print("\n❌ Errors Found:")
285 |         for name, error in checker.errors:
286 |             print(f"   • {name}: {error}")
287 |     
288 |     if passed == total:
289 |         print("\n🎉 Database Health Check: PASSED")
290 |         print("   SQLite-vec backend is fully functional and ready for production use!")
291 |         print("\n🚀 Ready for Claude Code integration:")
292 |         print("   - Start server: python -m src.mcp_memory_service.server")
293 |         print("   - Database: ~/.local/share/mcp-memory/sqlite_vec.db")
294 |         print("   - 75% memory reduction vs ChromaDB")
295 |         return 0
296 |     else:
297 |         print("\n💥 Database Health Check: FAILED")
298 |         print("   Please resolve the issues above before using the service.")
299 |         return 1
300 | 
301 | if __name__ == "__main__":
302 |     sys.exit(asyncio.run(main()))
```

--------------------------------------------------------------------------------
/tests/integration/test_concurrent_clients.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Integration tests for concurrent MCP client access to SQLite-vec backend.
  3 | """
  4 | 
  5 | import pytest
  6 | import pytest_asyncio
  7 | import asyncio
  8 | import tempfile
  9 | import os
 10 | import time
 11 | from typing import List
 12 | 
 13 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 14 | from mcp_memory_service.models.memory import Memory
 15 | from mcp_memory_service.utils.hashing import generate_content_hash
 16 | 
 17 | 
 18 | class TestConcurrentClients:
 19 |     """Test suite for concurrent client access scenarios."""
 20 |     
 21 |     @pytest_asyncio.fixture
 22 |     async def db_path(self):
 23 |         """Create a temporary database path."""
 24 |         with tempfile.NamedTemporaryFile(delete=False, suffix=".db") as tmp:
 25 |             path = tmp.name
 26 |         yield path
 27 |         # Cleanup
 28 |         for ext in ["", "-wal", "-shm"]:
 29 |             try:
 30 |                 os.unlink(path + ext)
 31 |             except:
 32 |                 pass
 33 |     
 34 |     async def create_client(self, db_path: str, client_id: str) -> SqliteVecMemoryStorage:
 35 |         """Create a storage client instance."""
 36 |         storage = SqliteVecMemoryStorage(db_path)
 37 |         await storage.initialize()
 38 |         return storage
 39 |     
 40 |     async def client_writer(self, db_path: str, client_id: str, num_memories: int) -> List[tuple]:
 41 |         """Simulate a client writing memories."""
 42 |         storage = await self.create_client(db_path, client_id)
 43 |         results = []
 44 |         
 45 |         try:
 46 |             for i in range(num_memories):
 47 |                 content = f"Memory from {client_id} - {i}"
 48 |                 memory = Memory(
 49 |                     content=content,
 50 |                     content_hash=generate_content_hash(content),
 51 |                     tags=[client_id, "concurrent"],
 52 |                     memory_type="test",
 53 |                     metadata={"client_id": client_id, "index": i}
 54 |                 )
 55 |                 
 56 |                 success, msg = await storage.store(memory)
 57 |                 results.append((success, msg, memory.content_hash))
 58 |                 
 59 |                 # Small delay to simulate real-world timing
 60 |                 await asyncio.sleep(0.01)
 61 |         finally:
 62 |             storage.close()
 63 |         
 64 |         return results
 65 |     
 66 |     async def client_reader(self, db_path: str, client_id: str, num_reads: int) -> List[int]:
 67 |         """Simulate a client reading memories."""
 68 |         storage = await self.create_client(db_path, client_id)
 69 |         counts = []
 70 |         
 71 |         try:
 72 |             for i in range(num_reads):
 73 |                 # Get all memories with tag "concurrent"
 74 |                 memories = await storage.search_by_tag(["concurrent"])
 75 |                 counts.append(len(memories))
 76 |                 
 77 |                 # Small delay between reads
 78 |                 await asyncio.sleep(0.02)
 79 |         finally:
 80 |             storage.close()
 81 |         
 82 |         return counts
 83 |     
 84 |     @pytest.mark.asyncio
 85 |     async def test_two_clients_concurrent_write(self, db_path):
 86 |         """Test two clients writing memories concurrently."""
 87 |         # Run two clients concurrently
 88 |         results = await asyncio.gather(
 89 |             self.client_writer(db_path, "client1", 10),
 90 |             self.client_writer(db_path, "client2", 10),
 91 |             return_exceptions=True
 92 |         )
 93 |         
 94 |         # Check results
 95 |         assert len(results) == 2
 96 |         assert not isinstance(results[0], Exception), f"Client 1 failed: {results[0]}"
 97 |         assert not isinstance(results[1], Exception), f"Client 2 failed: {results[1]}"
 98 |         
 99 |         client1_results, client2_results = results
100 |         
101 |         # Count successful writes
102 |         client1_success = sum(1 for success, _, _ in client1_results if success)
103 |         client2_success = sum(1 for success, _, _ in client2_results if success)
104 |         
105 |         # Both clients should have written their memories
106 |         assert client1_success == 10, f"Client 1 only wrote {client1_success}/10 memories"
107 |         assert client2_success == 10, f"Client 2 only wrote {client2_success}/10 memories"
108 |         
109 |         # Verify total memories in database
110 |         storage = await self.create_client(db_path, "verifier")
111 |         try:
112 |             all_memories = await storage.search_by_tag(["concurrent"])
113 |             assert len(all_memories) == 20, f"Expected 20 memories, found {len(all_memories)}"
114 |         finally:
115 |             storage.close()
116 |     
117 |     @pytest.mark.asyncio
118 |     async def test_reader_writer_concurrent(self, db_path):
119 |         """Test one client reading while another writes."""
120 |         # Start with some initial data
121 |         initial_storage = await self.create_client(db_path, "initial")
122 |         for i in range(5):
123 |             content = f"Initial memory {i}"
124 |             memory = Memory(
125 |                 content=content,
126 |                 content_hash=generate_content_hash(content),
127 |                 tags=["concurrent", "initial"],
128 |                 memory_type="test"
129 |             )
130 |             await initial_storage.store(memory)
131 |         initial_storage.close()
132 |         
133 |         # Run reader and writer concurrently
134 |         results = await asyncio.gather(
135 |             self.client_reader(db_path, "reader", 10),
136 |             self.client_writer(db_path, "writer", 5),
137 |             return_exceptions=True
138 |         )
139 |         
140 |         assert not isinstance(results[0], Exception), f"Reader failed: {results[0]}"
141 |         assert not isinstance(results[1], Exception), f"Writer failed: {results[1]}"
142 |         
143 |         read_counts, write_results = results
144 |         
145 |         # Reader should see increasing counts as writer adds memories
146 |         assert read_counts[0] >= 5, "Reader should see initial memories"
147 |         assert read_counts[-1] >= read_counts[0], "Read count should not decrease"
148 |         
149 |         # Writer should succeed
150 |         write_success = sum(1 for success, _, _ in write_results if success)
151 |         assert write_success == 5, f"Writer only wrote {write_success}/5 memories"
152 |     
153 |     @pytest.mark.asyncio
154 |     async def test_multiple_readers_one_writer(self, db_path):
155 |         """Test multiple readers accessing while one writer updates."""
156 |         # Create writer and multiple readers
157 |         async def run_scenario():
158 |             tasks = [
159 |                 self.client_writer(db_path, "writer", 10),
160 |                 self.client_reader(db_path, "reader1", 5),
161 |                 self.client_reader(db_path, "reader2", 5),
162 |                 self.client_reader(db_path, "reader3", 5),
163 |             ]
164 |             return await asyncio.gather(*tasks, return_exceptions=True)
165 |         
166 |         results = await run_scenario()
167 |         
168 |         # Check all operations completed without exceptions
169 |         for i, result in enumerate(results):
170 |             assert not isinstance(result, Exception), f"Task {i} failed: {result}"
171 |         
172 |         write_results = results[0]
173 |         write_success = sum(1 for success, _, _ in write_results if success)
174 |         assert write_success == 10, f"Writer only wrote {write_success}/10 memories"
175 |         
176 |         # All readers should have successfully read
177 |         for reader_counts in results[1:]:
178 |             assert len(reader_counts) == 5, "Reader should complete all reads"
179 |             assert all(isinstance(count, int) for count in reader_counts)
180 |     
181 |     @pytest.mark.asyncio
182 |     async def test_rapid_concurrent_access(self, db_path):
183 |         """Test rapid concurrent access with minimal delays."""
184 |         async def rapid_writer(client_id: str):
185 |             storage = await self.create_client(db_path, client_id)
186 |             try:
187 |                 for i in range(20):
188 |                     content = f"Rapid {client_id}-{i}"
189 |                     memory = Memory(
190 |                         content=content,
191 |                         content_hash=generate_content_hash(content),
192 |                         tags=["rapid"],
193 |                         memory_type="test"
194 |                     )
195 |                     await storage.store(memory)
196 |                     # No delay - rapid fire
197 |             finally:
198 |                 storage.close()
199 |         
200 |         async def rapid_reader(client_id: str):
201 |             storage = await self.create_client(db_path, client_id)
202 |             try:
203 |                 for i in range(20):
204 |                     await storage.search_by_tag(["rapid"])
205 |                     # No delay - rapid fire
206 |             finally:
207 |                 storage.close()
208 |         
209 |         # Run multiple clients with rapid access
210 |         tasks = [
211 |             rapid_writer("w1"),
212 |             rapid_writer("w2"),
213 |             rapid_reader("r1"),
214 |             rapid_reader("r2"),
215 |         ]
216 |         
217 |         results = await asyncio.gather(*tasks, return_exceptions=True)
218 |         
219 |         # Check no exceptions occurred
220 |         exceptions = [r for r in results if isinstance(r, Exception)]
221 |         assert len(exceptions) == 0, f"Exceptions occurred: {exceptions}"
222 |     
223 |     @pytest.mark.asyncio
224 |     async def test_database_consistency(self, db_path):
225 |         """Test that database remains consistent under concurrent access."""
226 |         # Write unique memories from multiple clients
227 |         async def write_unique_memories(client_id: str, start_idx: int):
228 |             storage = await self.create_client(db_path, client_id)
229 |             hashes = []
230 |             try:
231 |                 for i in range(10):
232 |                     content = f"Unique memory {start_idx + i}"
233 |                     memory = Memory(
234 |                         content=content,
235 |                         content_hash=generate_content_hash(content),
236 |                         tags=["consistency", client_id],
237 |                         memory_type="test"
238 |                     )
239 |                     success, msg = await storage.store(memory)
240 |                     if success:
241 |                         hashes.append(memory.content_hash)
242 |             finally:
243 |                 storage.close()
244 |             return hashes
245 |         
246 |         # Run concurrent writes
247 |         results = await asyncio.gather(
248 |             write_unique_memories("client1", 0),
249 |             write_unique_memories("client2", 100),
250 |             write_unique_memories("client3", 200),
251 |         )
252 |         
253 |         all_hashes = []
254 |         for client_hashes in results:
255 |             all_hashes.extend(client_hashes)
256 |         
257 |         # Verify all memories are in database and no duplicates
258 |         storage = await self.create_client(db_path, "verifier")
259 |         try:
260 |             memories = await storage.search_by_tag(["consistency"])
261 |             
262 |             # Check count matches
263 |             assert len(memories) == len(all_hashes), f"Memory count mismatch: {len(memories)} vs {len(all_hashes)}"
264 |             
265 |             # Check no duplicates
266 |             db_hashes = {m.content_hash for m in memories}
267 |             assert len(db_hashes) == len(memories), "Duplicate memories found"
268 |             
269 |             # Check all written memories are present
270 |             for hash_val in all_hashes:
271 |                 assert hash_val in db_hashes, f"Memory {hash_val} not found in database"
272 |         finally:
273 |             storage.close()
274 | 
275 | 
276 | if __name__ == "__main__":
277 |     pytest.main([__file__, "-v"])
```

--------------------------------------------------------------------------------
/scripts/maintenance/repair_sqlite_vec_embeddings.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Repair script to fix existing SQLite-vec databases without full migration.
  4 | 
  5 | This script attempts to repair the database in-place by:
  6 | 1. Checking the current state
  7 | 2. Regenerating missing embeddings
  8 | 3. Fixing dimension mismatches if possible
  9 | """
 10 | 
 11 | import asyncio
 12 | import os
 13 | import sys
 14 | import sqlite3
 15 | import logging
 16 | from typing import List, Tuple
 17 | 
 18 | # Add parent directory to path
 19 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 20 | 
 21 | try:
 22 |     import sqlite_vec
 23 |     from sqlite_vec import serialize_float32
 24 |     SQLITE_VEC_AVAILABLE = True
 25 | except ImportError:
 26 |     SQLITE_VEC_AVAILABLE = False
 27 | 
 28 | try:
 29 |     from sentence_transformers import SentenceTransformer
 30 |     SENTENCE_TRANSFORMERS_AVAILABLE = True
 31 | except ImportError:
 32 |     SENTENCE_TRANSFORMERS_AVAILABLE = False
 33 | 
 34 | # Configure logging
 35 | logging.basicConfig(
 36 |     level=logging.INFO,
 37 |     format='%(asctime)s - %(levelname)s - %(message)s'
 38 | )
 39 | logger = logging.getLogger(__name__)
 40 | 
 41 | 
 42 | class SqliteVecRepair:
 43 |     """Repair SQLite-vec database embeddings."""
 44 |     
 45 |     def __init__(self, db_path: str, model_name: str = "all-MiniLM-L6-v2"):
 46 |         self.db_path = db_path
 47 |         self.model_name = model_name
 48 |         self.conn = None
 49 |         self.model = None
 50 |         self.embedding_dimension = 384  # Default for all-MiniLM-L6-v2
 51 |         
 52 |     def check_dependencies(self):
 53 |         """Check required dependencies."""
 54 |         print("Checking dependencies...")
 55 |         
 56 |         if not SQLITE_VEC_AVAILABLE:
 57 |             print("❌ sqlite-vec not installed. Run: pip install sqlite-vec")
 58 |             return False
 59 |             
 60 |         if not SENTENCE_TRANSFORMERS_AVAILABLE:
 61 |             print("❌ sentence-transformers not installed. Run: pip install sentence-transformers torch")
 62 |             return False
 63 |             
 64 |         print("✅ All dependencies available")
 65 |         return True
 66 |         
 67 |     def connect_database(self):
 68 |         """Connect to the database."""
 69 |         print(f"\nConnecting to database: {self.db_path}")
 70 |         
 71 |         if not os.path.exists(self.db_path):
 72 |             raise FileNotFoundError(f"Database not found: {self.db_path}")
 73 |             
 74 |         self.conn = sqlite3.connect(self.db_path)
 75 |         self.conn.enable_load_extension(True)
 76 |         sqlite_vec.load(self.conn)
 77 |         self.conn.enable_load_extension(False)
 78 |         
 79 |         print("✅ Connected to database")
 80 |         
 81 |     def analyze_database(self) -> dict:
 82 |         """Analyze the current state of the database."""
 83 |         print("\nAnalyzing database...")
 84 |         
 85 |         analysis = {
 86 |             "memory_count": 0,
 87 |             "embedding_count": 0,
 88 |             "missing_embeddings": 0,
 89 |             "embedding_dimension": None,
 90 |             "issues": []
 91 |         }
 92 |         
 93 |         # Count memories
 94 |         cursor = self.conn.execute("SELECT COUNT(*) FROM memories")
 95 |         analysis["memory_count"] = cursor.fetchone()[0]
 96 |         
 97 |         # Count embeddings
 98 |         cursor = self.conn.execute("SELECT COUNT(*) FROM memory_embeddings")
 99 |         analysis["embedding_count"] = cursor.fetchone()[0]
100 |         
101 |         # Check embedding dimension
102 |         cursor = self.conn.execute("""
103 |             SELECT sql FROM sqlite_master 
104 |             WHERE type='table' AND name='memory_embeddings'
105 |         """)
106 |         schema = cursor.fetchone()
107 |         if schema:
108 |             # Extract dimension from schema
109 |             import re
110 |             match = re.search(r'FLOAT\[(\d+)\]', schema[0])
111 |             if match:
112 |                 analysis["embedding_dimension"] = int(match.group(1))
113 |                 
114 |         # Find memories without embeddings
115 |         cursor = self.conn.execute("""
116 |             SELECT COUNT(*) FROM memories m
117 |             WHERE NOT EXISTS (
118 |                 SELECT 1 FROM memory_embeddings e WHERE e.rowid = m.id
119 |             )
120 |         """)
121 |         analysis["missing_embeddings"] = cursor.fetchone()[0]
122 |         
123 |         # Identify issues
124 |         if analysis["memory_count"] != analysis["embedding_count"]:
125 |             analysis["issues"].append(
126 |                 f"Mismatch: {analysis['memory_count']} memories vs {analysis['embedding_count']} embeddings"
127 |             )
128 |             
129 |         if analysis["missing_embeddings"] > 0:
130 |             analysis["issues"].append(
131 |                 f"Missing embeddings: {analysis['missing_embeddings']} memories have no embeddings"
132 |             )
133 |             
134 |         print(f"  Memories: {analysis['memory_count']}")
135 |         print(f"  Embeddings: {analysis['embedding_count']}")
136 |         print(f"  Missing embeddings: {analysis['missing_embeddings']}")
137 |         print(f"  Embedding dimension: {analysis['embedding_dimension']}")
138 |         
139 |         if analysis["issues"]:
140 |             print("\n⚠️  Issues found:")
141 |             for issue in analysis["issues"]:
142 |                 print(f"  - {issue}")
143 |         else:
144 |             print("\n✅ No issues found")
145 |             
146 |         return analysis
147 |         
148 |     def load_model(self):
149 |         """Load the embedding model."""
150 |         print(f"\nLoading embedding model: {self.model_name}")
151 |         
152 |         self.model = SentenceTransformer(self.model_name)
153 |         
154 |         # Get actual dimension
155 |         test_embedding = self.model.encode(["test"], convert_to_numpy=True)
156 |         self.embedding_dimension = test_embedding.shape[1]
157 |         
158 |         print(f"✅ Model loaded (dimension: {self.embedding_dimension})")
159 |         
160 |     def generate_missing_embeddings(self, analysis: dict) -> int:
161 |         """Generate embeddings for memories that don't have them."""
162 |         if analysis["missing_embeddings"] == 0:
163 |             return 0
164 |             
165 |         print(f"\nGenerating {analysis['missing_embeddings']} missing embeddings...")
166 |         
167 |         # Check if dimensions match
168 |         if analysis["embedding_dimension"] and analysis["embedding_dimension"] != self.embedding_dimension:
169 |             print(f"⚠️  WARNING: Database expects dimension {analysis['embedding_dimension']}, "
170 |                   f"but model produces dimension {self.embedding_dimension}")
171 |             print("   This may cause errors. Consider full migration instead.")
172 |             response = input("\nContinue anyway? (y/N): ").strip().lower()
173 |             if response != 'y':
174 |                 return 0
175 |                 
176 |         # Find memories without embeddings
177 |         cursor = self.conn.execute("""
178 |             SELECT m.id, m.content FROM memories m
179 |             WHERE NOT EXISTS (
180 |                 SELECT 1 FROM memory_embeddings e WHERE e.rowid = m.id
181 |             )
182 |         """)
183 |         
184 |         memories_to_fix = cursor.fetchall()
185 |         fixed_count = 0
186 |         
187 |         for memory_id, content in memories_to_fix:
188 |             try:
189 |                 # Generate embedding
190 |                 embedding = self.model.encode([content], convert_to_numpy=True)[0]
191 |                 
192 |                 # Insert embedding
193 |                 self.conn.execute(
194 |                     "INSERT INTO memory_embeddings (rowid, content_embedding) VALUES (?, ?)",
195 |                     (memory_id, serialize_float32(embedding))
196 |                 )
197 |                 
198 |                 fixed_count += 1
199 |                 
200 |                 # Show progress
201 |                 if fixed_count % 10 == 0:
202 |                     print(f"  ... {fixed_count}/{len(memories_to_fix)} embeddings generated")
203 |                     
204 |             except Exception as e:
205 |                 logger.error(f"Failed to generate embedding for memory {memory_id}: {e}")
206 |                 
207 |         self.conn.commit()
208 |         print(f"✅ Generated {fixed_count} embeddings")
209 |         
210 |         return fixed_count
211 |         
212 |     def verify_search(self) -> bool:
213 |         """Test if semantic search works."""
214 |         print("\nTesting semantic search...")
215 |         
216 |         try:
217 |             # Generate a test query embedding
218 |             test_query = "test query"
219 |             query_embedding = self.model.encode([test_query], convert_to_numpy=True)[0]
220 |             
221 |             # Try to search
222 |             cursor = self.conn.execute("""
223 |                 SELECT COUNT(*) FROM memory_embeddings 
224 |                 WHERE content_embedding MATCH ?
225 |                 LIMIT 1
226 |             """, (serialize_float32(query_embedding),))
227 |             
228 |             cursor.fetchone()
229 |             print("✅ Semantic search is working")
230 |             return True
231 |             
232 |         except Exception as e:
233 |             print(f"❌ Semantic search failed: {e}")
234 |             return False
235 |             
236 |     def run_repair(self):
237 |         """Run the repair process."""
238 |         print("\n" + "="*60)
239 |         print("SQLite-vec Embedding Repair Tool")
240 |         print("="*60)
241 |         
242 |         try:
243 |             # Check dependencies
244 |             if not self.check_dependencies():
245 |                 return
246 |                 
247 |             # Connect to database
248 |             self.connect_database()
249 |             
250 |             # Analyze current state
251 |             analysis = self.analyze_database()
252 |             
253 |             if not analysis["issues"]:
254 |                 print("\n✅ Database appears healthy, no repair needed")
255 |                 return
256 |                 
257 |             # Load model
258 |             self.load_model()
259 |             
260 |             # Fix missing embeddings
261 |             fixed = self.generate_missing_embeddings(analysis)
262 |             
263 |             # Verify search works
264 |             self.verify_search()
265 |             
266 |             # Re-analyze
267 |             print("\nRe-analyzing database after repair...")
268 |             new_analysis = self.analyze_database()
269 |             
270 |             print("\n" + "="*60)
271 |             print("Repair Summary")
272 |             print("="*60)
273 |             print(f"Fixed {fixed} missing embeddings")
274 |             
275 |             if new_analysis["issues"]:
276 |                 print("\n⚠️  Some issues remain:")
277 |                 for issue in new_analysis["issues"]:
278 |                     print(f"  - {issue}")
279 |                 print("\nConsider running the full migration script instead.")
280 |             else:
281 |                 print("\n✅ All issues resolved!")
282 |                 
283 |         except Exception as e:
284 |             print(f"\n❌ Repair failed: {e}")
285 |             logger.exception("Repair failed")
286 |             
287 |         finally:
288 |             if self.conn:
289 |                 self.conn.close()
290 |                 
291 | 
292 | def main():
293 |     """Run the repair tool."""
294 |     if len(sys.argv) < 2:
295 |         print("Usage: python repair_sqlite_vec_embeddings.py <database_path>")
296 |         print("\nExample:")
297 |         print("  python repair_sqlite_vec_embeddings.py ~/.mcp_memory/sqlite_vec.db")
298 |         print("\nThis tool will:")
299 |         print("  - Check for missing embeddings")
300 |         print("  - Generate embeddings for memories that don't have them")
301 |         print("  - Verify semantic search functionality")
302 |         print("\nFor more complex issues (dimension mismatches, schema problems),")
303 |         print("use migrate_sqlite_vec_embeddings.py instead.")
304 |         sys.exit(1)
305 |         
306 |     db_path = sys.argv[1]
307 |     
308 |     repair = SqliteVecRepair(db_path)
309 |     repair.run_repair()
310 |     
311 | 
312 | if __name__ == "__main__":
313 |     main()
```

--------------------------------------------------------------------------------
/scripts/maintenance/repair_malformed_tags.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | # Copyright 2024 Heinrich Krupp
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Script to repair malformed tags in the memory database.
 18 | 
 19 | Detects and fixes tags that contain JSON serialization artifacts like:
 20 | - Tags with quotes: ["ai" or "bug-fix"
 21 | - Tags with brackets: ["important"] or ["note"]
 22 | - Double-serialized JSON arrays
 23 | 
 24 | Usage:
 25 |     python scripts/maintenance/repair_malformed_tags.py --dry-run  # Preview changes
 26 |     python scripts/maintenance/repair_malformed_tags.py            # Apply fixes
 27 | """
 28 | 
 29 | import sys
 30 | import os
 31 | import json
 32 | import re
 33 | import sqlite3
 34 | import argparse
 35 | import logging
 36 | from pathlib import Path
 37 | from datetime import datetime
 38 | from typing import List, Tuple, Set, Dict
 39 | 
 40 | # Add parent directory to path
 41 | sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 42 | 
 43 | from src.mcp_memory_service.config import SQLITE_VEC_PATH
 44 | 
 45 | # Configure logging
 46 | logging.basicConfig(
 47 |     level=logging.INFO,
 48 |     format='%(asctime)s - %(levelname)s - %(message)s',
 49 | )
 50 | logger = logging.getLogger(__name__)
 51 | 
 52 | 
 53 | def parse_malformed_tag(tag: str) -> List[str]:
 54 |     """
 55 |     Parse a malformed tag that may contain JSON serialization artifacts.
 56 | 
 57 |     Examples:
 58 |         ["ai" -> ["ai"]
 59 |         "bug-fix" -> ["bug-fix"]
 60 |         ["important"] -> ["important"]
 61 |         [\"ai\",\"bug\"] -> ["ai", "bug"]
 62 | 
 63 |     Returns:
 64 |         List of clean tag strings
 65 |     """
 66 |     # Remove leading/trailing whitespace
 67 |     tag = tag.strip()
 68 | 
 69 |     # If tag doesn't contain quotes or brackets, it's clean
 70 |     if '"' not in tag and '[' not in tag and ']' not in tag:
 71 |         return [tag] if tag else []
 72 | 
 73 |     # Try to parse as JSON first
 74 |     try:
 75 |         # Handle cases where tag is a JSON string like ["tag1","tag2"]
 76 |         if tag.startswith('[') and tag.endswith(']'):
 77 |             parsed = json.loads(tag)
 78 |             if isinstance(parsed, list):
 79 |                 # Recursively clean each element
 80 |                 result = []
 81 |                 for item in parsed:
 82 |                     result.extend(parse_malformed_tag(str(item)))
 83 |                 return result
 84 |     except json.JSONDecodeError:
 85 |         pass
 86 | 
 87 |     # Handle escaped quotes like [\"ai\" or \"bug-fix\"
 88 |     if '\\"' in tag or tag.startswith('["') or tag.startswith('"'):
 89 |         # Remove all quotes and brackets
 90 |         cleaned = tag.replace('\\"', '').replace('"', '').replace('[', '').replace(']', '').strip()
 91 |         if cleaned:
 92 |             return [cleaned]
 93 | 
 94 |     # Handle patterns like ["ai" (missing closing bracket/quote)
 95 |     if tag.startswith('["') or tag.startswith('"['):
 96 |         cleaned = tag.replace('[', '').replace('"', '').strip()
 97 |         if cleaned:
 98 |             return [cleaned]
 99 | 
100 |     # If nothing worked, just remove quotes and brackets
101 |     cleaned = tag.replace('"', '').replace('[', '').replace(']', '').strip()
102 |     return [cleaned] if cleaned else []
103 | 
104 | 
105 | def is_malformed_tag(tag: str) -> bool:
106 |     """Check if a tag contains malformed characters."""
107 |     return any(char in tag for char in ['"', '[', ']', '\\'])
108 | 
109 | 
110 | def analyze_tags(db_path: str) -> Tuple[int, int, Set[str], Dict[str, int]]:
111 |     """
112 |     Analyze tags in the database to find malformed entries.
113 | 
114 |     Returns:
115 |         (total_memories, malformed_count, malformed_tags_set, tag_frequency)
116 |     """
117 |     conn = sqlite3.connect(db_path)
118 |     cursor = conn.cursor()
119 | 
120 |     try:
121 |         # Get all memories with their tags
122 |         cursor.execute("SELECT content_hash, tags FROM memories WHERE tags IS NOT NULL AND tags != ''")
123 |         rows = cursor.fetchall()
124 | 
125 |         total_memories = len(rows)
126 |         malformed_count = 0
127 |         malformed_tags = set()
128 |         tag_frequency = {}
129 | 
130 |         for content_hash, tags_str in rows:
131 |             # Parse tags (comma-separated)
132 |             tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
133 | 
134 |             has_malformed = False
135 |             for tag in tags:
136 |                 if is_malformed_tag(tag):
137 |                     has_malformed = True
138 |                     malformed_tags.add(tag)
139 |                     tag_frequency[tag] = tag_frequency.get(tag, 0) + 1
140 | 
141 |             if has_malformed:
142 |                 malformed_count += 1
143 | 
144 |         return total_memories, malformed_count, malformed_tags, tag_frequency
145 | 
146 |     finally:
147 |         conn.close()
148 | 
149 | 
150 | def repair_tags(db_path: str, dry_run: bool = False) -> Tuple[int, int, Dict[str, List[str]]]:
151 |     """
152 |     Repair malformed tags in the database.
153 | 
154 |     Returns:
155 |         (memories_updated, tags_fixed, replacements_dict)
156 |     """
157 |     conn = sqlite3.connect(db_path)
158 |     cursor = conn.cursor()
159 | 
160 |     memories_updated = 0
161 |     tags_fixed = 0
162 |     replacements = {}  # old_tag -> [new_tags]
163 | 
164 |     try:
165 |         # Get all memories with tags
166 |         cursor.execute("SELECT content_hash, tags FROM memories WHERE tags IS NOT NULL AND tags != ''")
167 |         rows = cursor.fetchall()
168 | 
169 |         for content_hash, tags_str in rows:
170 |             # Parse tags (comma-separated)
171 |             original_tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
172 | 
173 |             # Check if any tags are malformed
174 |             needs_repair = any(is_malformed_tag(tag) for tag in original_tags)
175 | 
176 |             if needs_repair:
177 |                 # Parse and clean each tag
178 |                 new_tags = []
179 |                 for tag in original_tags:
180 |                     if is_malformed_tag(tag):
181 |                         parsed = parse_malformed_tag(tag)
182 |                         if parsed:
183 |                             new_tags.extend(parsed)
184 |                             replacements[tag] = parsed
185 |                             tags_fixed += 1
186 |                     else:
187 |                         new_tags.append(tag)
188 | 
189 |                 # Remove duplicates while preserving order
190 |                 seen = set()
191 |                 unique_tags = []
192 |                 for tag in new_tags:
193 |                     if tag and tag not in seen:
194 |                         seen.add(tag)
195 |                         unique_tags.append(tag)
196 | 
197 |                 # Update database
198 |                 new_tags_str = ",".join(unique_tags)
199 | 
200 |                 if dry_run:
201 |                     logger.info(f"[DRY RUN] Would update {content_hash[:8]}...")
202 |                     logger.info(f"  Old: {tags_str}")
203 |                     logger.info(f"  New: {new_tags_str}")
204 |                 else:
205 |                     cursor.execute(
206 |                         "UPDATE memories SET tags = ? WHERE content_hash = ?",
207 |                         (new_tags_str, content_hash)
208 |                     )
209 | 
210 |                 memories_updated += 1
211 | 
212 |         if not dry_run:
213 |             conn.commit()
214 |             logger.info(f"✅ Database updated successfully")
215 | 
216 |         return memories_updated, tags_fixed, replacements
217 | 
218 |     finally:
219 |         conn.close()
220 | 
221 | 
222 | def create_backup(db_path: str) -> str:
223 |     """Create a backup of the database before modifications."""
224 |     backup_path = f"{db_path}.backup-{datetime.now().strftime('%Y%m%d_%H%M%S')}"
225 | 
226 |     import shutil
227 |     shutil.copy2(db_path, backup_path)
228 | 
229 |     logger.info(f"✅ Backup created: {backup_path}")
230 |     return backup_path
231 | 
232 | 
233 | def main():
234 |     parser = argparse.ArgumentParser(
235 |         description="Repair malformed tags in the memory database",
236 |         formatter_class=argparse.RawDescriptionHelpFormatter,
237 |         epilog="""
238 | Examples:
239 |   # Preview changes without modifying database
240 |   python repair_malformed_tags.py --dry-run
241 | 
242 |   # Apply fixes (creates backup automatically)
243 |   python repair_malformed_tags.py
244 | 
245 |   # Verbose output with detailed logging
246 |   python repair_malformed_tags.py --verbose
247 | """
248 |     )
249 |     parser.add_argument(
250 |         '--dry-run',
251 |         action='store_true',
252 |         help='Preview changes without modifying the database'
253 |     )
254 |     parser.add_argument(
255 |         '--verbose', '-v',
256 |         action='store_true',
257 |         help='Enable verbose logging'
258 |     )
259 |     parser.add_argument(
260 |         '--db-path',
261 |         type=str,
262 |         default=SQLITE_VEC_PATH,
263 |         help=f'Path to SQLite database (default: {SQLITE_VEC_PATH})'
264 |     )
265 | 
266 |     args = parser.parse_args()
267 | 
268 |     # Set logging level
269 |     if args.verbose:
270 |         logging.getLogger().setLevel(logging.DEBUG)
271 | 
272 |     # Check if database exists
273 |     if not os.path.exists(args.db_path):
274 |         logger.error(f"❌ Database not found: {args.db_path}")
275 |         sys.exit(1)
276 | 
277 |     logger.info("=" * 60)
278 |     logger.info("🔧 Malformed Tag Repair Tool")
279 |     logger.info("=" * 60)
280 |     logger.info(f"Database: {args.db_path}")
281 |     logger.info(f"Mode: {'DRY RUN (no changes)' if args.dry_run else 'REPAIR (will modify database)'}")
282 |     logger.info("")
283 | 
284 |     # Analyze tags
285 |     logger.info("📊 Analyzing tags...")
286 |     total_memories, malformed_count, malformed_tags, tag_frequency = analyze_tags(args.db_path)
287 | 
288 |     logger.info(f"Total memories: {total_memories}")
289 |     logger.info(f"Memories with malformed tags: {malformed_count}")
290 |     logger.info(f"Unique malformed tags: {len(malformed_tags)}")
291 |     logger.info("")
292 | 
293 |     if malformed_count == 0:
294 |         logger.info("✅ No malformed tags found! Database is clean.")
295 |         return
296 | 
297 |     # Show most common malformed tags
298 |     logger.info("🔍 Most common malformed tags:")
299 |     sorted_tags = sorted(tag_frequency.items(), key=lambda x: x[1], reverse=True)
300 |     for tag, count in sorted_tags[:10]:
301 |         logger.info(f"  {tag!r} -> appears {count} times")
302 |         parsed = parse_malformed_tag(tag)
303 |         logger.info(f"    Will become: {parsed}")
304 |     logger.info("")
305 | 
306 |     # Create backup if not dry-run
307 |     if not args.dry_run:
308 |         logger.info("💾 Creating backup...")
309 |         backup_path = create_backup(args.db_path)
310 |         logger.info("")
311 | 
312 |     # Repair tags
313 |     logger.info("🔧 Repairing tags...")
314 |     memories_updated, tags_fixed, replacements = repair_tags(args.db_path, dry_run=args.dry_run)
315 | 
316 |     logger.info("")
317 |     logger.info("=" * 60)
318 |     logger.info("📈 Summary")
319 |     logger.info("=" * 60)
320 |     logger.info(f"Memories updated: {memories_updated}")
321 |     logger.info(f"Tags fixed: {tags_fixed}")
322 |     logger.info("")
323 | 
324 |     if replacements:
325 |         logger.info("🔄 Tag replacements:")
326 |         for old_tag, new_tags in list(replacements.items())[:10]:
327 |             logger.info(f"  {old_tag!r} -> {new_tags}")
328 |         if len(replacements) > 10:
329 |             logger.info(f"  ... and {len(replacements) - 10} more")
330 | 
331 |     logger.info("")
332 |     if args.dry_run:
333 |         logger.info("⚠️  This was a DRY RUN - no changes were made")
334 |         logger.info("   Run without --dry-run to apply fixes")
335 |     else:
336 |         logger.info("✅ Repair completed successfully!")
337 |         logger.info(f"   Backup saved to: {backup_path}")
338 | 
339 |     logger.info("=" * 60)
340 | 
341 | 
342 | if __name__ == "__main__":
343 |     main()
344 | 
```

--------------------------------------------------------------------------------
/tests/api/test_compact_types.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Tests for compact data types.
 17 | 
 18 | Validates token efficiency, immutability, and type safety of compact types.
 19 | """
 20 | 
 21 | import pytest
 22 | import time
 23 | from mcp_memory_service.api.types import (
 24 |     CompactMemory,
 25 |     CompactSearchResult,
 26 |     CompactHealthInfo
 27 | )
 28 | 
 29 | 
 30 | class TestCompactMemory:
 31 |     """Tests for CompactMemory type."""
 32 | 
 33 |     def test_compact_memory_creation(self):
 34 |         """Test basic CompactMemory creation."""
 35 |         memory = CompactMemory(
 36 |             hash='abc12345',
 37 |             preview='Test content preview',
 38 |             tags=('test', 'example'),
 39 |             created=time.time(),
 40 |             score=0.95
 41 |         )
 42 | 
 43 |         assert memory.hash == 'abc12345'
 44 |         assert memory.preview == 'Test content preview'
 45 |         assert memory.tags == ('test', 'example')
 46 |         assert memory.score == 0.95
 47 |         assert isinstance(memory.created, float)
 48 | 
 49 |     def test_compact_memory_immutability(self):
 50 |         """Test that CompactMemory is immutable."""
 51 |         memory = CompactMemory(
 52 |             hash='abc12345',
 53 |             preview='Test content',
 54 |             tags=('test',),
 55 |             created=time.time(),
 56 |             score=0.85
 57 |         )
 58 | 
 59 |         # NamedTuple should be immutable
 60 |         with pytest.raises(AttributeError):
 61 |             memory.hash = 'new_hash'  # type: ignore
 62 | 
 63 |     def test_compact_memory_tuple_behavior(self):
 64 |         """Test that CompactMemory behaves like a tuple."""
 65 |         memory = CompactMemory(
 66 |             hash='abc12345',
 67 |             preview='Test content',
 68 |             tags=('test',),
 69 |             created=1730928000.0,
 70 |             score=0.85
 71 |         )
 72 | 
 73 |         # Should support tuple unpacking
 74 |         hash_val, preview, tags, created, score = memory
 75 | 
 76 |         assert hash_val == 'abc12345'
 77 |         assert preview == 'Test content'
 78 |         assert tags == ('test',)
 79 |         assert created == 1730928000.0
 80 |         assert score == 0.85
 81 | 
 82 |     def test_compact_memory_field_access(self):
 83 |         """Test named field access."""
 84 |         memory = CompactMemory(
 85 |             hash='test123',
 86 |             preview='Preview text',
 87 |             tags=('tag1', 'tag2'),
 88 |             created=1730928000.0,
 89 |             score=0.75
 90 |         )
 91 | 
 92 |         # Named access should work
 93 |         assert memory.hash == 'test123'
 94 |         assert memory.tags[0] == 'tag1'
 95 |         assert memory.tags[1] == 'tag2'
 96 | 
 97 |     def test_compact_memory_token_size(self):
 98 |         """Test that CompactMemory achieves target token size."""
 99 |         # Create memory with typical values
100 |         memory = CompactMemory(
101 |             hash='abc12345',
102 |             preview='A' * 200,  # 200 char preview
103 |             tags=('tag1', 'tag2', 'tag3'),
104 |             created=1730928000.0,
105 |             score=0.85
106 |         )
107 | 
108 |         # Convert to string representation (approximates token count)
109 |         repr_str = str(memory)
110 | 
111 |         # Should be much smaller than full Memory object (~820 tokens)
112 |         # Target: ~73 tokens, allow some margin
113 |         # Rough estimate: 1 token H 4 characters
114 |         estimated_tokens = len(repr_str) / 4
115 | 
116 |         assert estimated_tokens < 150, \
117 |             f"CompactMemory too large: {estimated_tokens} tokens (target: <150)"
118 | 
119 | 
120 | class TestCompactSearchResult:
121 |     """Tests for CompactSearchResult type."""
122 | 
123 |     def test_compact_search_result_creation(self):
124 |         """Test basic CompactSearchResult creation."""
125 |         memories = (
126 |             CompactMemory('hash1', 'preview1', ('tag1',), time.time(), 0.95),
127 |             CompactMemory('hash2', 'preview2', ('tag2',), time.time(), 0.85),
128 |         )
129 | 
130 |         result = CompactSearchResult(
131 |             memories=memories,
132 |             total=2,
133 |             query='test query'
134 |         )
135 | 
136 |         assert len(result.memories) == 2
137 |         assert result.total == 2
138 |         assert result.query == 'test query'
139 | 
140 |     def test_compact_search_result_repr(self):
141 |         """Test CompactSearchResult string representation."""
142 |         memories = (
143 |             CompactMemory('hash1', 'preview1', ('tag1',), time.time(), 0.95),
144 |             CompactMemory('hash2', 'preview2', ('tag2',), time.time(), 0.85),
145 |             CompactMemory('hash3', 'preview3', ('tag3',), time.time(), 0.75),
146 |         )
147 | 
148 |         result = CompactSearchResult(
149 |             memories=memories,
150 |             total=3,
151 |             query='architecture'
152 |         )
153 | 
154 |         repr_str = repr(result)
155 |         assert 'found=3' in repr_str
156 |         assert 'shown=3' in repr_str
157 | 
158 |     def test_compact_search_result_empty(self):
159 |         """Test CompactSearchResult with no results."""
160 |         result = CompactSearchResult(
161 |             memories=(),
162 |             total=0,
163 |             query='nonexistent query'
164 |         )
165 | 
166 |         assert len(result.memories) == 0
167 |         assert result.total == 0
168 |         assert repr(result) == 'SearchResult(found=0, shown=0)'
169 | 
170 |     def test_compact_search_result_iteration(self):
171 |         """Test iterating over search results."""
172 |         memories = tuple(
173 |             CompactMemory(f'hash{i}', f'preview{i}', ('tag',), time.time(), 0.9 - i*0.1)
174 |             for i in range(5)
175 |         )
176 | 
177 |         result = CompactSearchResult(
178 |             memories=memories,
179 |             total=5,
180 |             query='test'
181 |         )
182 | 
183 |         # Should be iterable
184 |         for i, memory in enumerate(result.memories):
185 |             assert memory.hash == f'hash{i}'
186 | 
187 |     def test_compact_search_result_token_size(self):
188 |         """Test that CompactSearchResult achieves target token size."""
189 |         # Create result with 5 memories (typical use case)
190 |         memories = tuple(
191 |             CompactMemory(
192 |                 f'hash{i:04d}',
193 |                 'A' * 200,  # 200 char preview
194 |                 ('tag1', 'tag2'),
195 |                 time.time(),
196 |                 0.9 - i*0.05
197 |             )
198 |             for i in range(5)
199 |         )
200 | 
201 |         result = CompactSearchResult(
202 |             memories=memories,
203 |             total=5,
204 |             query='architecture decisions'
205 |         )
206 | 
207 |         # Convert to string representation
208 |         repr_str = str(result.memories)
209 | 
210 |         # Target: ~385 tokens for 5 results (vs ~2,625 for full Memory objects)
211 |         # Allow some margin
212 |         estimated_tokens = len(repr_str) / 4
213 | 
214 |         assert estimated_tokens < 800, \
215 |             f"CompactSearchResult too large: {estimated_tokens} tokens (target: <800 for 5 results)"
216 | 
217 | 
218 | class TestCompactHealthInfo:
219 |     """Tests for CompactHealthInfo type."""
220 | 
221 |     def test_compact_health_info_creation(self):
222 |         """Test basic CompactHealthInfo creation."""
223 |         info = CompactHealthInfo(
224 |             status='healthy',
225 |             count=1247,
226 |             backend='sqlite_vec'
227 |         )
228 | 
229 |         assert info.status == 'healthy'
230 |         assert info.count == 1247
231 |         assert info.backend == 'sqlite_vec'
232 | 
233 |     def test_compact_health_info_status_values(self):
234 |         """Test different status values."""
235 |         statuses = ['healthy', 'degraded', 'error']
236 | 
237 |         for status in statuses:
238 |             info = CompactHealthInfo(
239 |                 status=status,
240 |                 count=100,
241 |                 backend='cloudflare'
242 |             )
243 |             assert info.status == status
244 | 
245 |     def test_compact_health_info_backends(self):
246 |         """Test different backend types."""
247 |         backends = ['sqlite_vec', 'cloudflare', 'hybrid']
248 | 
249 |         for backend in backends:
250 |             info = CompactHealthInfo(
251 |                 status='healthy',
252 |                 count=500,
253 |                 backend=backend
254 |             )
255 |             assert info.backend == backend
256 | 
257 |     def test_compact_health_info_token_size(self):
258 |         """Test that CompactHealthInfo achieves target token size."""
259 |         info = CompactHealthInfo(
260 |             status='healthy',
261 |             count=1247,
262 |             backend='sqlite_vec'
263 |         )
264 | 
265 |         repr_str = str(info)
266 | 
267 |         # Target: ~20 tokens (vs ~125 for full health check)
268 |         estimated_tokens = len(repr_str) / 4
269 | 
270 |         assert estimated_tokens < 50, \
271 |             f"CompactHealthInfo too large: {estimated_tokens} tokens (target: <50)"
272 | 
273 | 
274 | class TestTokenEfficiency:
275 |     """Integration tests for overall token efficiency."""
276 | 
277 |     def test_memory_size_comparison(self):
278 |         """Compare CompactMemory size to full Memory object."""
279 |         from mcp_memory_service.models.memory import Memory
280 | 
281 |         # Create full Memory object
282 |         full_memory = Memory(
283 |             content='A' * 1000,  # Long content
284 |             content_hash='abc123def456' * 5,
285 |             tags=['tag1', 'tag2', 'tag3'],
286 |             memory_type='note',
287 |             metadata={'key': 'value'},
288 |             embedding=[0.1] * 768,  # Full embedding vector
289 |         )
290 | 
291 |         # Create compact version
292 |         compact = CompactMemory(
293 |             hash='abc12345',
294 |             preview='A' * 200,  # First 200 chars only
295 |             tags=('tag1', 'tag2', 'tag3'),
296 |             created=time.time(),
297 |             score=0.95
298 |         )
299 | 
300 |         # Compare sizes
301 |         full_repr = str(full_memory.to_dict())
302 |         compact_repr = str(compact)
303 | 
304 |         # Compact should be significantly smaller
305 |         # Note: String representation is not exact token count, allow some margin
306 |         size_ratio = len(compact_repr) / len(full_repr)
307 | 
308 |         assert size_ratio < 0.30, \
309 |             f"CompactMemory not small enough: {size_ratio:.2%} of full size (target: <30%)"
310 | 
311 |     def test_search_result_size_reduction(self):
312 |         """Validate 85%+ token reduction for search results."""
313 |         # Create 5 compact memories
314 |         memories = tuple(
315 |             CompactMemory(
316 |                 f'hash{i:04d}',
317 |                 'A' * 200,
318 |                 ('tag1', 'tag2'),
319 |                 time.time(),
320 |                 0.9 - i*0.05
321 |             )
322 |             for i in range(5)
323 |         )
324 | 
325 |         result = CompactSearchResult(
326 |             memories=memories,
327 |             total=5,
328 |             query='test'
329 |         )
330 | 
331 |         # Estimate tokens
332 |         repr_str = str(result)
333 |         estimated_tokens = len(repr_str) / 4
334 | 
335 |         # Target: 85% reduction from ~2,625 tokens � ~385 tokens
336 |         # Allow some margin: should be under 600 tokens
337 |         assert estimated_tokens < 600, \
338 |             f"Search result not efficient enough: {estimated_tokens} tokens (target: <600)"
339 | 
340 |         # Verify we're achieving significant reduction
341 |         # Original would be ~2,625 tokens, we should be well under 1000
342 |         reduction_vs_original = 1 - (estimated_tokens / 2625)
343 |         assert reduction_vs_original >= 0.75, \
344 |             f"Token reduction insufficient: {reduction_vs_original:.1%} (target: e75%)"
345 | 
```

--------------------------------------------------------------------------------
/src/mcp_memory_service/ingestion/pdf_loader.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | PDF document loader for extracting text content from PDF files.
 17 | """
 18 | 
 19 | import logging
 20 | from pathlib import Path
 21 | from typing import AsyncGenerator, Dict, Any, Optional
 22 | import asyncio
 23 | 
 24 | from .base import DocumentLoader, DocumentChunk
 25 | from .chunker import TextChunker, ChunkingStrategy
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | # Try to import PDF processing library
 30 | try:
 31 |     import PyPDF2
 32 |     HAS_PYPDF2 = True
 33 | except ImportError:
 34 |     HAS_PYPDF2 = False
 35 |     logger.warning("PyPDF2 not available. PDF support will be limited.")
 36 | 
 37 | try:
 38 |     import pdfplumber
 39 |     HAS_PDFPLUMBER = True
 40 | except ImportError:
 41 |     HAS_PDFPLUMBER = False
 42 |     logger.debug("pdfplumber not available, falling back to PyPDF2")
 43 | 
 44 | 
 45 | class PDFLoader(DocumentLoader):
 46 |     """
 47 |     Document loader for PDF files.
 48 |     
 49 |     Supports multiple PDF processing backends:
 50 |     - pdfplumber (preferred): Better text extraction, table support
 51 |     - PyPDF2 (fallback): Basic text extraction
 52 |     """
 53 |     
 54 |     def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
 55 |         """
 56 |         Initialize PDF loader.
 57 |         
 58 |         Args:
 59 |             chunk_size: Target size for text chunks in characters
 60 |             chunk_overlap: Number of characters to overlap between chunks
 61 |         """
 62 |         super().__init__(chunk_size, chunk_overlap)
 63 |         self.supported_extensions = ['pdf']
 64 |         self.chunker = TextChunker(ChunkingStrategy(
 65 |             chunk_size=chunk_size,
 66 |             chunk_overlap=chunk_overlap,
 67 |             respect_paragraph_boundaries=True
 68 |         ))
 69 |         
 70 |         # Check which PDF backend is available
 71 |         if HAS_PDFPLUMBER:
 72 |             self.backend = 'pdfplumber'
 73 |         elif HAS_PYPDF2:
 74 |             self.backend = 'pypdf2'
 75 |         else:
 76 |             self.backend = None
 77 |             logger.error("No PDF processing library available. Install pypdf2 or pdfplumber.")
 78 |     
 79 |     def can_handle(self, file_path: Path) -> bool:
 80 |         """
 81 |         Check if this loader can handle the given PDF file.
 82 |         
 83 |         Args:
 84 |             file_path: Path to the file to check
 85 |             
 86 |         Returns:
 87 |             True if this loader can process the PDF file
 88 |         """
 89 |         if self.backend is None:
 90 |             return False
 91 |         
 92 |         return (file_path.suffix.lower() == '.pdf' and 
 93 |                 file_path.exists() and 
 94 |                 file_path.is_file())
 95 |     
 96 |     async def extract_chunks(self, file_path: Path, **kwargs) -> AsyncGenerator[DocumentChunk, None]:
 97 |         """
 98 |         Extract text chunks from a PDF file.
 99 |         
100 |         Args:
101 |             file_path: Path to the PDF file
102 |             **kwargs: Additional options:
103 |                 - extract_images: Whether to extract image descriptions (default: False)
104 |                 - extract_tables: Whether to extract table content (default: False)
105 |                 - page_range: Tuple of (start, end) pages to extract (1-indexed)
106 |             
107 |         Yields:
108 |             DocumentChunk objects containing extracted text and metadata
109 |             
110 |         Raises:
111 |             FileNotFoundError: If the PDF file doesn't exist
112 |             ValueError: If the PDF file is invalid or can't be processed
113 |         """
114 |         await self.validate_file(file_path)
115 |         
116 |         if self.backend is None:
117 |             raise ValueError("No PDF processing backend available")
118 |         
119 |         extract_images = kwargs.get('extract_images', False)
120 |         extract_tables = kwargs.get('extract_tables', False)
121 |         page_range = kwargs.get('page_range', None)
122 |         
123 |         logger.info(f"Extracting chunks from PDF: {file_path} using {self.backend}")
124 |         
125 |         try:
126 |             if self.backend == 'pdfplumber':
127 |                 async for chunk in self._extract_with_pdfplumber(
128 |                     file_path, extract_images, extract_tables, page_range
129 |                 ):
130 |                     yield chunk
131 |             else:
132 |                 async for chunk in self._extract_with_pypdf2(
133 |                     file_path, page_range
134 |                 ):
135 |                     yield chunk
136 |                     
137 |         except Exception as e:
138 |             logger.error(f"Error extracting from PDF {file_path}: {str(e)}")
139 |             raise ValueError(f"Failed to extract PDF content: {str(e)}") from e
140 |     
141 |     async def _extract_with_pdfplumber(
142 |         self, 
143 |         file_path: Path, 
144 |         extract_images: bool,
145 |         extract_tables: bool,
146 |         page_range: Optional[tuple]
147 |     ) -> AsyncGenerator[DocumentChunk, None]:
148 |         """
149 |         Extract text using pdfplumber backend.
150 |         
151 |         Args:
152 |             file_path: Path to PDF file
153 |             extract_images: Whether to extract image descriptions
154 |             extract_tables: Whether to extract table content
155 |             page_range: Optional page range to extract
156 |             
157 |         Yields:
158 |             DocumentChunk objects
159 |         """
160 |         def _extract_sync():
161 |             """Synchronous extraction function to run in thread pool."""
162 |             with pdfplumber.open(file_path) as pdf:
163 |                 total_pages = len(pdf.pages)
164 |                 start_page = page_range[0] - 1 if page_range else 0
165 |                 end_page = min(page_range[1], total_pages) if page_range else total_pages
166 |                 
167 |                 for page_num in range(start_page, end_page):
168 |                     page = pdf.pages[page_num]
169 |                     
170 |                     # Extract main text
171 |                     text = page.extract_text() or ""
172 |                     
173 |                     # Extract table content if requested
174 |                     if extract_tables:
175 |                         tables = page.extract_tables()
176 |                         for table in tables or []:
177 |                             table_text = self._format_table(table)
178 |                             text += f"\n\n[TABLE]\n{table_text}\n[/TABLE]"
179 |                     
180 |                     # Note: Image extraction would require additional processing
181 |                     if extract_images:
182 |                         # Placeholder for image extraction
183 |                         images = page.images
184 |                         if images:
185 |                             text += f"\n\n[IMAGES: {len(images)} images found on this page]"
186 |                     
187 |                     if text.strip():
188 |                         yield (page_num + 1, text.strip())
189 |         
190 |         # Run extraction in thread pool to avoid blocking
191 |         loop = asyncio.get_event_loop()
192 |         page_generator = await loop.run_in_executor(None, lambda: list(_extract_sync()))
193 |         
194 |         base_metadata = self.get_base_metadata(file_path)
195 |         chunk_index = 0
196 |         
197 |         for page_num, page_text in page_generator:
198 |             page_metadata = base_metadata.copy()
199 |             page_metadata.update({
200 |                 'page_number': page_num,
201 |                 'extraction_method': 'pdfplumber',
202 |                 'content_type': 'pdf_page'
203 |             })
204 |             
205 |             # Chunk the page text
206 |             chunks = self.chunker.chunk_text(page_text, page_metadata)
207 |             
208 |             for chunk_text, chunk_metadata in chunks:
209 |                 yield DocumentChunk(
210 |                     content=chunk_text,
211 |                     metadata=chunk_metadata,
212 |                     chunk_index=chunk_index,
213 |                     source_file=file_path
214 |                 )
215 |                 chunk_index += 1
216 |     
217 |     async def _extract_with_pypdf2(
218 |         self, 
219 |         file_path: Path,
220 |         page_range: Optional[tuple]
221 |     ) -> AsyncGenerator[DocumentChunk, None]:
222 |         """
223 |         Extract text using PyPDF2 backend.
224 |         
225 |         Args:
226 |             file_path: Path to PDF file
227 |             page_range: Optional page range to extract
228 |             
229 |         Yields:
230 |             DocumentChunk objects
231 |         """
232 |         def _extract_sync():
233 |             """Synchronous extraction function."""
234 |             with open(file_path, 'rb') as file:
235 |                 pdf_reader = PyPDF2.PdfReader(file)
236 |                 total_pages = len(pdf_reader.pages)
237 |                 start_page = page_range[0] - 1 if page_range else 0
238 |                 end_page = min(page_range[1], total_pages) if page_range else total_pages
239 |                 
240 |                 for page_num in range(start_page, end_page):
241 |                     page = pdf_reader.pages[page_num]
242 |                     text = page.extract_text()
243 |                     
244 |                     if text.strip():
245 |                         yield (page_num + 1, text.strip())
246 |         
247 |         # Run extraction in thread pool
248 |         loop = asyncio.get_event_loop()
249 |         page_generator = await loop.run_in_executor(None, lambda: list(_extract_sync()))
250 |         
251 |         base_metadata = self.get_base_metadata(file_path)
252 |         chunk_index = 0
253 |         
254 |         for page_num, page_text in page_generator:
255 |             page_metadata = base_metadata.copy()
256 |             page_metadata.update({
257 |                 'page_number': page_num,
258 |                 'extraction_method': 'pypdf2',
259 |                 'content_type': 'pdf_page'
260 |             })
261 |             
262 |             # Chunk the page text
263 |             chunks = self.chunker.chunk_text(page_text, page_metadata)
264 |             
265 |             for chunk_text, chunk_metadata in chunks:
266 |                 yield DocumentChunk(
267 |                     content=chunk_text,
268 |                     metadata=chunk_metadata,
269 |                     chunk_index=chunk_index,
270 |                     source_file=file_path
271 |                 )
272 |                 chunk_index += 1
273 |     
274 |     def _format_table(self, table: list) -> str:
275 |         """
276 |         Format extracted table data as text.
277 |         
278 |         Args:
279 |             table: Table data as list of rows
280 |             
281 |         Returns:
282 |             Formatted table text
283 |         """
284 |         if not table:
285 |             return ""
286 |         
287 |         # Simple table formatting
288 |         formatted_rows = []
289 |         for row in table:
290 |             if row:  # Skip empty rows
291 |                 cleaned_row = [str(cell).strip() if cell else "" for cell in row]
292 |                 formatted_rows.append(" | ".join(cleaned_row))
293 |         
294 |         return "\n".join(formatted_rows)
295 | 
296 | 
297 | # Register the PDF loader
298 | def _register_pdf_loader():
299 |     """Register PDF loader with the registry."""
300 |     try:
301 |         from .registry import register_loader
302 |         register_loader(PDFLoader, ['pdf'])
303 |         logger.debug("PDF loader registered successfully")
304 |     except ImportError:
305 |         logger.debug("Registry not available during import")
306 | 
307 | 
308 | # Auto-register when module is imported
309 | _register_pdf_loader()
```

--------------------------------------------------------------------------------
/claude-hooks/utilities/mcp-client.js:
--------------------------------------------------------------------------------

```javascript
  1 | /**
  2 |  * MCP Client for Memory Hook Integration
  3 |  * Provides MCP protocol communication for memory operations
  4 |  */
  5 | 
  6 | const { spawn } = require('child_process');
  7 | const { EventEmitter } = require('events');
  8 | const fs = require('fs');
  9 | const path = require('path');
 10 | 
 11 | class MCPClient extends EventEmitter {
 12 |     constructor(serverCommand, options = {}) {
 13 |         super();
 14 |         this.serverCommand = serverCommand;
 15 |         this.serverWorkingDir = options.workingDir || process.cwd();
 16 |         this.connectionTimeout = options.connectionTimeout || 5000;
 17 |         this.toolCallTimeout = options.toolCallTimeout || 10000;
 18 |         this.serverProcess = null;
 19 |         this.messageId = 0;
 20 |         this.pendingRequests = new Map();
 21 |         this.connected = false;
 22 |         this.buffer = '';
 23 | 
 24 |         // Load environment variables from .env file
 25 |         this.loadEnvironment();
 26 |     }
 27 | 
 28 |     /**
 29 |      * Load environment variables from .env file
 30 |      */
 31 |     loadEnvironment() {
 32 |         const envPath = path.join(this.serverWorkingDir, '.env');
 33 |         try {
 34 |             if (fs.existsSync(envPath)) {
 35 |                 const envContent = fs.readFileSync(envPath, 'utf8');
 36 |                 const lines = envContent.split('\n');
 37 | 
 38 |                 for (const line of lines) {
 39 |                     const match = line.match(/^([^#\s][^=]*?)=(.*)$/);
 40 |                     if (match) {
 41 |                         const [, key, value] = match;
 42 |                         if (!process.env[key]) {
 43 |                             process.env[key] = value.replace(/^["']|["']$/g, '');
 44 |                         }
 45 |                     }
 46 |                 }
 47 |             }
 48 |         } catch (error) {
 49 |             // Ignore .env loading errors
 50 |         }
 51 |     }
 52 | 
 53 |     /**
 54 |      * Start MCP server and establish connection
 55 |      */
 56 |     async connect() {
 57 |         return new Promise((resolve, reject) => {
 58 |             try {
 59 |                 // Start MCP server process
 60 |                 this.serverProcess = spawn(this.serverCommand[0], this.serverCommand.slice(1), {
 61 |                     cwd: this.serverWorkingDir,
 62 |                     stdio: ['pipe', 'pipe', 'pipe'],
 63 |                     env: { ...process.env }
 64 |                 });
 65 | 
 66 |                 // Handle server output
 67 |                 this.serverProcess.stdout.on('data', (data) => {
 68 |                     this.buffer += data.toString();
 69 |                     this.processMessages();
 70 |                 });
 71 | 
 72 |                 this.serverProcess.stderr.on('data', (data) => {
 73 |                     const error = data.toString();
 74 |                     // Only emit critical errors, ignore warnings and debug info
 75 |                     if (error.includes('FATAL') || error.includes('ExceptionGroup')) {
 76 |                         this.emit('error', new Error(`Server error: ${error.substring(0, 200)}...`));
 77 |                     }
 78 |                 });
 79 | 
 80 |                 this.serverProcess.on('error', (error) => {
 81 |                     if (!this.connected) {
 82 |                         reject(new Error(`Server process error: ${error.message}`));
 83 |                     }
 84 |                 });
 85 | 
 86 |                 this.serverProcess.on('exit', (code) => {
 87 |                     this.connected = false;
 88 |                     if (code !== 0 && !this.connected) {
 89 |                         reject(new Error(`Server failed to start (exit code ${code})`));
 90 |                     }
 91 |                 });
 92 | 
 93 |                 // Initialize MCP connection
 94 |                 this.sendInitialize()
 95 |                     .then(() => {
 96 |                         this.connected = true;
 97 |                         resolve();
 98 |                     })
 99 |                     .catch(reject);
100 | 
101 |                 // Connection timeout
102 |                 setTimeout(() => {
103 |                     if (!this.connected) {
104 |                         reject(new Error('Connection timeout'));
105 |                     }
106 |                 }, this.connectionTimeout);
107 | 
108 |             } catch (error) {
109 |                 reject(error);
110 |             }
111 |         });
112 |     }
113 | 
114 |     /**
115 |      * Process incoming messages from server
116 |      */
117 |     processMessages() {
118 |         const lines = this.buffer.split('\n');
119 |         this.buffer = lines.pop() || ''; // Keep incomplete line in buffer
120 | 
121 |         for (const line of lines) {
122 |             if (line.trim()) {
123 |                 try {
124 |                     const message = JSON.parse(line);
125 |                     this.handleMessage(message);
126 |                 } catch (error) {
127 |                     // Ignore malformed messages
128 |                 }
129 |             }
130 |         }
131 |     }
132 | 
133 |     /**
134 |      * Handle incoming MCP messages
135 |      */
136 |     handleMessage(message) {
137 |         if (message.id && this.pendingRequests.has(message.id)) {
138 |             const { resolve, reject } = this.pendingRequests.get(message.id);
139 |             this.pendingRequests.delete(message.id);
140 | 
141 |             if (message.error) {
142 |                 reject(new Error(message.error.message || 'MCP Error'));
143 |             } else {
144 |                 resolve(message.result);
145 |             }
146 |         }
147 |     }
148 | 
149 |     /**
150 |      * Send MCP initialize request
151 |      */
152 |     async sendInitialize() {
153 |         const message = {
154 |             jsonrpc: '2.0',
155 |             id: ++this.messageId,
156 |             method: 'initialize',
157 |             params: {
158 |                 protocolVersion: '2024-11-05',
159 |                 capabilities: {
160 |                     tools: {}
161 |                 },
162 |                 clientInfo: {
163 |                     name: 'claude-hooks-mcp-client',
164 |                     version: '1.0.0'
165 |                 }
166 |             }
167 |         };
168 | 
169 |         return this.sendMessage(message);
170 |     }
171 | 
172 |     /**
173 |      * Send message to MCP server
174 |      */
175 |     async sendMessage(message) {
176 |         return new Promise((resolve, reject) => {
177 |             if (!this.serverProcess || this.serverProcess.exitCode !== null) {
178 |                 reject(new Error('Server not running'));
179 |                 return;
180 |             }
181 | 
182 |             if (message.id) {
183 |                 this.pendingRequests.set(message.id, { resolve, reject });
184 | 
185 |                 // Timeout for this specific request
186 |                 setTimeout(() => {
187 |                     if (this.pendingRequests.has(message.id)) {
188 |                         this.pendingRequests.delete(message.id);
189 |                         reject(new Error('Request timeout'));
190 |                     }
191 |                 }, this.toolCallTimeout);
192 |             }
193 | 
194 |             try {
195 |                 const messageStr = JSON.stringify(message) + '\n';
196 |                 this.serverProcess.stdin.write(messageStr);
197 | 
198 |                 if (!message.id) {
199 |                     resolve(); // No response expected
200 |                 }
201 |             } catch (error) {
202 |                 if (message.id && this.pendingRequests.has(message.id)) {
203 |                     this.pendingRequests.delete(message.id);
204 |                 }
205 |                 reject(error);
206 |             }
207 |         });
208 |     }
209 | 
210 |     /**
211 |      * Call a tool via MCP
212 |      */
213 |     async callTool(toolName, args = {}) {
214 |         const message = {
215 |             jsonrpc: '2.0',
216 |             id: ++this.messageId,
217 |             method: 'tools/call',
218 |             params: {
219 |                 name: toolName,
220 |                 arguments: args
221 |             }
222 |         };
223 | 
224 |         return this.sendMessage(message);
225 |     }
226 | 
227 |     /**
228 |      * Get memory service health status
229 |      */
230 |     async getHealthStatus() {
231 |         try {
232 |             const result = await this.callTool('check_database_health');
233 |             return {
234 |                 success: true,
235 |                 data: result.content ? this.parseToolResponse(result.content) : result
236 |             };
237 |         } catch (error) {
238 |             return {
239 |                 success: false,
240 |                 error: error.message,
241 |                 fallback: true
242 |             };
243 |         }
244 |     }
245 | 
246 |     /**
247 |      * Query memories using semantic search
248 |      */
249 |     async queryMemories(query, limit = 10) {
250 |         try {
251 |             const result = await this.callTool('retrieve_memory', {
252 |                 query: query,
253 |                 n_results: limit
254 |             });
255 | 
256 |             return this.parseToolResponse(result.content);
257 |         } catch (error) {
258 |             console.warn('[MCP Client] Memory query error:', error.message);
259 |             return [];
260 |         }
261 |     }
262 | 
263 |     /**
264 |      * Query memories by time range
265 |      */
266 |     async queryMemoriesByTime(timeQuery, limit = 10) {
267 |         try {
268 |             const result = await this.callTool('recall_memory', {
269 |                 query: timeQuery,
270 |                 n_results: limit
271 |             });
272 | 
273 |             return this.parseToolResponse(result.content);
274 |         } catch (error) {
275 |             console.warn('[MCP Client] Time-based memory query error:', error.message);
276 |             return [];
277 |         }
278 |     }
279 | 
280 |     /**
281 |      * Parse tool response content
282 |      */
283 |     parseToolResponse(content) {
284 |         if (!content) return [];
285 | 
286 |         // Handle array of content objects
287 |         if (Array.isArray(content)) {
288 |             const textContent = content.find(c => c.type === 'text')?.text || '';
289 |             return this.parseMemoryResults(textContent);
290 |         }
291 | 
292 |         // Handle direct text content
293 |         if (typeof content === 'string') {
294 |             return this.parseMemoryResults(content);
295 |         }
296 | 
297 |         // Handle object with text property
298 |         if (content.text) {
299 |             return this.parseMemoryResults(content.text);
300 |         }
301 | 
302 |         return [];
303 |     }
304 | 
305 |     /**
306 |      * Parse memory results from text response
307 |      */
308 |     parseMemoryResults(textData) {
309 |         try {
310 |             // Handle Python dict format conversion to JSON
311 |             let cleanText = textData
312 |                 .replace(/'/g, '"')
313 |                 .replace(/True/g, 'true')
314 |                 .replace(/False/g, 'false')
315 |                 .replace(/None/g, 'null');
316 | 
317 |             const parsed = JSON.parse(cleanText);
318 |             return parsed.results || parsed.memories || parsed || [];
319 |         } catch (error) {
320 |             // Try to extract JSON from text
321 |             const jsonMatch = textData.match(/\{[\s\S]*\}/);
322 |             if (jsonMatch) {
323 |                 try {
324 |                     const extracted = JSON.parse(jsonMatch[0]);
325 |                     return extracted.results || extracted.memories || [extracted];
326 |                 } catch {}
327 |             }
328 | 
329 |             console.warn('[MCP Client] Could not parse memory results:', error.message);
330 |             return [];
331 |         }
332 |     }
333 | 
334 |     /**
335 |      * Disconnect and cleanup
336 |      */
337 |     async disconnect() {
338 |         this.connected = false;
339 | 
340 |         // Clear pending requests
341 |         for (const [id, { reject }] of this.pendingRequests) {
342 |             reject(new Error('Connection closed'));
343 |         }
344 |         this.pendingRequests.clear();
345 | 
346 |         // Terminate server process
347 |         if (this.serverProcess && this.serverProcess.exitCode === null) {
348 |             this.serverProcess.kill('SIGTERM');
349 | 
350 |             // Force kill if doesn't exit gracefully
351 |             setTimeout(() => {
352 |                 if (this.serverProcess && this.serverProcess.exitCode === null) {
353 |                     this.serverProcess.kill('SIGKILL');
354 |                 }
355 |             }, 2000);
356 |         }
357 |     }
358 | }
359 | 
360 | module.exports = { MCPClient };
```

--------------------------------------------------------------------------------
/scripts/validation/validate_timestamp_integrity.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Timestamp Integrity Validation Script
  4 | 
  5 | Detects timestamp anomalies that could indicate the regression bug where
  6 | created_at timestamps were being reset during metadata sync operations.
  7 | 
  8 | Checks for:
  9 | 1. Suspicious clusters of recent created_at timestamps
 10 | 2. Created_at timestamps that are newer than they should be
 11 | 3. Memories with identical or very similar created_at timestamps (indicating bulk reset)
 12 | 4. created_at > updated_at (logically impossible)
 13 | """
 14 | 
 15 | import asyncio
 16 | import sys
 17 | import os
 18 | import time
 19 | from datetime import datetime, timedelta
 20 | from pathlib import Path
 21 | from collections import Counter
 22 | from typing import List, Dict, Tuple, Optional
 23 | 
 24 | # Add project root to path
 25 | project_root = Path(__file__).parent.parent.parent
 26 | sys.path.insert(0, str(project_root / "src"))
 27 | 
 28 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 29 | import mcp_memory_service.config as config_module
 30 | 
 31 | 
 32 | class TimestampIntegrityValidator:
 33 |     """Validator for detecting timestamp integrity issues."""
 34 | 
 35 |     def __init__(self, storage):
 36 |         self.storage = storage
 37 |         self.warnings = []
 38 |         self.errors = []
 39 | 
 40 |     async def validate_all(self) -> Tuple[bool, List[str], List[str]]:
 41 |         """
 42 |         Run all timestamp integrity checks.
 43 | 
 44 |         Returns:
 45 |             Tuple of (is_healthy, warnings, errors)
 46 |         """
 47 |         print("🔍 Running timestamp integrity validation...\n")
 48 | 
 49 |         await self.check_impossible_timestamps()
 50 |         await self.check_suspicious_clusters()
 51 |         await self.check_future_timestamps()
 52 |         await self.check_timestamp_distribution()
 53 | 
 54 |         # Print summary
 55 |         print("\n" + "="*60)
 56 |         print("📊 VALIDATION SUMMARY")
 57 |         print("="*60)
 58 | 
 59 |         if not self.errors and not self.warnings:
 60 |             print("✅ No timestamp integrity issues detected!")
 61 |             return True, [], []
 62 | 
 63 |         if self.errors:
 64 |             print(f"\n❌ ERRORS: {len(self.errors)}")
 65 |             for error in self.errors:
 66 |                 print(f"  - {error}")
 67 | 
 68 |         if self.warnings:
 69 |             print(f"\n⚠️  WARNINGS: {len(self.warnings)}")
 70 |             for warning in self.warnings:
 71 |                 print(f"  - {warning}")
 72 | 
 73 |         return len(self.errors) == 0, self.warnings, self.errors
 74 | 
 75 |     async def check_impossible_timestamps(self):
 76 |         """Check for logically impossible timestamps (created_at > updated_at)."""
 77 |         print("1️⃣ Checking for impossible timestamps (created_at > updated_at)...")
 78 | 
 79 |         if hasattr(self.storage, 'conn'):  # SQLite-vec
 80 |             cursor = self.storage.conn.execute('''
 81 |                 SELECT content_hash, created_at, updated_at,
 82 |                        created_at_iso, updated_at_iso
 83 |                 FROM memories
 84 |                 WHERE created_at > updated_at
 85 |             ''')
 86 | 
 87 |             impossible = cursor.fetchall()
 88 | 
 89 |             if impossible:
 90 |                 self.errors.append(
 91 |                     f"Found {len(impossible)} memories with created_at > updated_at (impossible!)"
 92 |                 )
 93 |                 for row in impossible[:5]:  # Show first 5
 94 |                     content_hash, created_at, updated_at, created_iso, updated_iso = row
 95 |                     print(f"   ❌ {content_hash[:8]}: created={created_iso}, updated={updated_iso}")
 96 |             else:
 97 |                 print("   ✅ No impossible timestamps found")
 98 | 
 99 |         else:
100 |             print("   ⚠️ Skipped (not SQLite backend)")
101 | 
102 |     async def check_suspicious_clusters(self):
103 |         """Check for suspicious clusters of recent created_at timestamps."""
104 |         print("\n2️⃣ Checking for suspicious timestamp clusters...")
105 | 
106 |         if hasattr(self.storage, 'conn'):  # SQLite-vec
107 |             # Get all created_at timestamps
108 |             cursor = self.storage.conn.execute('''
109 |                 SELECT created_at, created_at_iso, COUNT(*) as count
110 |                 FROM memories
111 |                 GROUP BY created_at
112 |                 HAVING COUNT(*) > 1
113 |                 ORDER BY count DESC
114 |                 LIMIT 10
115 |             ''')
116 | 
117 |             clusters = cursor.fetchall()
118 | 
119 |             if clusters:
120 |                 # Check if there are large clusters (> 5 memories with same timestamp)
121 |                 large_clusters = [c for c in clusters if c[2] > 5]
122 | 
123 |                 if large_clusters:
124 |                     self.warnings.append(
125 |                         f"Found {len(large_clusters)} suspicious timestamp clusters "
126 |                         f"(multiple memories with identical created_at)"
127 |                     )
128 |                     print(f"   ⚠️  {len(large_clusters)} suspicious clusters found:")
129 |                     for created_at, created_iso, count in large_clusters[:5]:
130 |                         age_hours = (time.time() - created_at) / 3600
131 |                         print(f"      - {count} memories at {created_iso} ({age_hours:.1f}h ago)")
132 |                 else:
133 |                     print(f"   ✅ No suspicious clusters (some duplicates normal)")
134 | 
135 |             else:
136 |                 print("   ✅ No timestamp clusters found")
137 | 
138 |         else:
139 |             print("   ⚠️ Skipped (not SQLite backend)")
140 | 
141 |     async def check_future_timestamps(self):
142 |         """Check for timestamps in the future."""
143 |         print("\n3️⃣ Checking for future timestamps...")
144 | 
145 |         now = time.time()
146 |         future_threshold = now + 300  # 5 minutes tolerance
147 | 
148 |         if hasattr(self.storage, 'conn'):  # SQLite-vec
149 |             cursor = self.storage.conn.execute('''
150 |                 SELECT content_hash, created_at, updated_at,
151 |                        created_at_iso, updated_at_iso
152 |                 FROM memories
153 |                 WHERE created_at > ? OR updated_at > ?
154 |             ''', (future_threshold, future_threshold))
155 | 
156 |             future_timestamps = cursor.fetchall()
157 | 
158 |             if future_timestamps:
159 |                 self.errors.append(
160 |                     f"Found {len(future_timestamps)} memories with timestamps in the future!"
161 |                 )
162 |                 for row in future_timestamps[:5]:
163 |                     content_hash, created_at, updated_at, created_iso, updated_iso = row
164 |                     if created_at > future_threshold:
165 |                         print(f"   ❌ {content_hash[:8]}: created_at in future: {created_iso}")
166 |                     if updated_at > future_threshold:
167 |                         print(f"   ❌ {content_hash[:8]}: updated_at in future: {updated_iso}")
168 |             else:
169 |                 print("   ✅ No future timestamps found")
170 | 
171 |         else:
172 |             print("   ⚠️ Skipped (not SQLite backend)")
173 | 
174 |     async def check_timestamp_distribution(self):
175 |         """Check timestamp distribution for anomalies (e.g., all recent)."""
176 |         print("\n4️⃣ Checking timestamp distribution...")
177 | 
178 |         if hasattr(self.storage, 'conn'):  # SQLite-vec
179 |             # Get timestamp statistics
180 |             cursor = self.storage.conn.execute('''
181 |                 SELECT
182 |                     COUNT(*) as total,
183 |                     MIN(created_at) as oldest,
184 |                     MAX(created_at) as newest,
185 |                     AVG(created_at) as avg_timestamp
186 |                 FROM memories
187 |             ''')
188 | 
189 |             row = cursor.fetchone()
190 |             if not row or row[0] == 0:
191 |                 print("   ℹ️  No memories to analyze")
192 |                 return
193 | 
194 |             total, oldest, newest, avg_timestamp = row
195 | 
196 |             # Calculate time ranges
197 |             now = time.time()
198 |             oldest_age_days = (now - oldest) / 86400
199 |             newest_age_hours = (now - newest) / 3600
200 | 
201 |             print(f"   📈 Total memories: {total}")
202 |             print(f"   📅 Oldest: {datetime.utcfromtimestamp(oldest).isoformat()}Z ({oldest_age_days:.1f} days ago)")
203 |             print(f"   📅 Newest: {datetime.utcfromtimestamp(newest).isoformat()}Z ({newest_age_hours:.1f} hours ago)")
204 | 
205 |             # Check for anomaly: if > 50% of memories created in last 24 hours
206 |             # but oldest is > 7 days old (indicates bulk timestamp reset)
207 |             cursor = self.storage.conn.execute('''
208 |                 SELECT COUNT(*) FROM memories
209 |                 WHERE created_at > ?
210 |             ''', (now - 86400,))
211 | 
212 |             recent_count = cursor.fetchone()[0]
213 |             recent_percentage = (recent_count / total) * 100
214 | 
215 |             if recent_percentage > 50 and oldest_age_days > 7:
216 |                 self.warnings.append(
217 |                     f"Suspicious: {recent_percentage:.1f}% of memories created in last 24h, "
218 |                     f"but oldest memory is {oldest_age_days:.1f} days old. "
219 |                     f"This could indicate timestamp reset bug!"
220 |                 )
221 |                 print(f"   ⚠️  {recent_percentage:.1f}% created in last 24h (suspicious if many old memories)")
222 | 
223 |             # Check distribution by age buckets
224 |             buckets = [
225 |                 ("Last hour", 3600),
226 |                 ("Last 24 hours", 86400),
227 |                 ("Last week", 604800),
228 |                 ("Last month", 2592000),
229 |                 ("Older", float('inf'))
230 |             ]
231 | 
232 |             print(f"\n   📊 Distribution by age:")
233 |             for label, seconds in buckets:
234 |                 if seconds == float('inf'):
235 |                     cursor = self.storage.conn.execute('''
236 |                         SELECT COUNT(*) FROM memories
237 |                         WHERE created_at < ?
238 |                     ''', (now - 2592000,))
239 |                 else:
240 |                     cursor = self.storage.conn.execute('''
241 |                         SELECT COUNT(*) FROM memories
242 |                         WHERE created_at > ?
243 |                     ''', (now - seconds,))
244 | 
245 |                 count = cursor.fetchone()[0]
246 |                 percentage = (count / total) * 100
247 |                 print(f"      {label:15}: {count:4} ({percentage:5.1f}%)")
248 | 
249 |         else:
250 |             print("   ⚠️ Skipped (not SQLite backend)")
251 | 
252 | 
253 | async def main():
254 |     """Main validation function."""
255 |     print("="*60)
256 |     print("⏰ TIMESTAMP INTEGRITY VALIDATION")
257 |     print("="*60)
258 |     print()
259 | 
260 |     try:
261 |         # Get database path from config
262 |         storage_backend = os.getenv('MCP_MEMORY_STORAGE_BACKEND', 'sqlite_vec')
263 |         db_path = config_module.MEMORY_SQLITE_DB_PATH
264 | 
265 |         print(f"Backend: {storage_backend}")
266 |         print(f"Database: {db_path}")
267 |         print()
268 | 
269 |         # Initialize storage
270 |         storage = SqliteVecMemoryStorage(
271 |             db_path=db_path,
272 |             embedding_model="all-MiniLM-L6-v2"
273 |         )
274 |         await storage.initialize()
275 | 
276 |         # Run validation
277 |         validator = TimestampIntegrityValidator(storage)
278 |         is_healthy, warnings, errors = await validator.validate_all()
279 | 
280 |         # Close storage
281 |         if hasattr(storage, 'close'):
282 |             storage.close()
283 | 
284 |         # Exit with appropriate code
285 |         if errors:
286 |             print("\n❌ Validation FAILED with errors")
287 |             sys.exit(1)
288 |         elif warnings:
289 |             print("\n⚠️  Validation completed with warnings")
290 |             sys.exit(0)
291 |         else:
292 |             print("\n✅ Validation PASSED - Timestamps are healthy!")
293 |             sys.exit(0)
294 | 
295 |     except Exception as e:
296 |         print(f"\n❌ Validation failed with exception: {e}")
297 |         import traceback
298 |         traceback.print_exc()
299 |         sys.exit(1)
300 | 
301 | 
302 | if __name__ == "__main__":
303 |     asyncio.run(main())
304 | 
```
Page 16/47FirstPrevNextLast