doobidoo/mcp-memory-service # codebase.md

This is page 12 of 47. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── agents
│   │   ├── amp-bridge.md
│   │   ├── amp-pr-automator.md
│   │   ├── code-quality-guard.md
│   │   ├── gemini-pr-automator.md
│   │   └── github-release-manager.md
│   ├── settings.local.json.backup
│   └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   ├── feature_request.yml
│   │   └── performance_issue.yml
│   ├── pull_request_template.md
│   └── workflows
│       ├── bridge-tests.yml
│       ├── CACHE_FIX.md
│       ├── claude-code-review.yml
│       ├── claude.yml
│       ├── cleanup-images.yml.disabled
│       ├── dev-setup-validation.yml
│       ├── docker-publish.yml
│       ├── LATEST_FIXES.md
│       ├── main-optimized.yml.disabled
│       ├── main.yml
│       ├── publish-and-test.yml
│       ├── README_OPTIMIZATION.md
│       ├── release-tag.yml.disabled
│       ├── release.yml
│       ├── roadmap-review-reminder.yml
│       ├── SECRET_CONDITIONAL_FIX.md
│       └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│   ├── .gitignore
│   └── reports
│       └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│   ├── deployment
│   │   ├── deploy_fastmcp_fixed.sh
│   │   ├── deploy_http_with_mcp.sh
│   │   └── deploy_mcp_v4.sh
│   ├── deployment-configs
│   │   ├── empty_config.yml
│   │   └── smithery.yaml
│   ├── development
│   │   └── test_fastmcp.py
│   ├── docs-removed-2025-08-23
│   │   ├── authentication.md
│   │   ├── claude_integration.md
│   │   ├── claude-code-compatibility.md
│   │   ├── claude-code-integration.md
│   │   ├── claude-code-quickstart.md
│   │   ├── claude-desktop-setup.md
│   │   ├── complete-setup-guide.md
│   │   ├── database-synchronization.md
│   │   ├── development
│   │   │   ├── autonomous-memory-consolidation.md
│   │   │   ├── CLEANUP_PLAN.md
│   │   │   ├── CLEANUP_README.md
│   │   │   ├── CLEANUP_SUMMARY.md
│   │   │   ├── dream-inspired-memory-consolidation.md
│   │   │   ├── hybrid-slm-memory-consolidation.md
│   │   │   ├── mcp-milestone.md
│   │   │   ├── multi-client-architecture.md
│   │   │   ├── test-results.md
│   │   │   └── TIMESTAMP_FIX_SUMMARY.md
│   │   ├── distributed-sync.md
│   │   ├── invocation_guide.md
│   │   ├── macos-intel.md
│   │   ├── master-guide.md
│   │   ├── mcp-client-configuration.md
│   │   ├── multi-client-server.md
│   │   ├── service-installation.md
│   │   ├── sessions
│   │   │   └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│   │   ├── UBUNTU_SETUP.md
│   │   ├── ubuntu.md
│   │   ├── windows-setup.md
│   │   └── windows.md
│   ├── docs-root-cleanup-2025-08-23
│   │   ├── AWESOME_LIST_SUBMISSION.md
│   │   ├── CLOUDFLARE_IMPLEMENTATION.md
│   │   ├── DOCUMENTATION_ANALYSIS.md
│   │   ├── DOCUMENTATION_CLEANUP_PLAN.md
│   │   ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│   │   ├── LITESTREAM_SETUP_GUIDE.md
│   │   ├── lm_studio_system_prompt.md
│   │   ├── PYTORCH_DOWNLOAD_FIX.md
│   │   └── README-ORIGINAL-BACKUP.md
│   ├── investigations
│   │   └── MACOS_HOOKS_INVESTIGATION.md
│   ├── litestream-configs-v6.3.0
│   │   ├── install_service.sh
│   │   ├── litestream_master_config_fixed.yml
│   │   ├── litestream_master_config.yml
│   │   ├── litestream_replica_config_fixed.yml
│   │   ├── litestream_replica_config.yml
│   │   ├── litestream_replica_simple.yml
│   │   ├── litestream-http.service
│   │   ├── litestream.service
│   │   └── requirements-cloudflare.txt
│   ├── release-notes
│   │   └── release-notes-v7.1.4.md
│   └── setup-development
│       ├── README.md
│       ├── setup_consolidation_mdns.sh
│       ├── STARTUP_SETUP_GUIDE.md
│       └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│   ├── memory-context.md
│   ├── memory-health.md
│   ├── memory-ingest-dir.md
│   ├── memory-ingest.md
│   ├── memory-recall.md
│   ├── memory-search.md
│   ├── memory-store.md
│   ├── README.md
│   └── session-start.md
├── claude-hooks
│   ├── config.json
│   ├── config.template.json
│   ├── CONFIGURATION.md
│   ├── core
│   │   ├── memory-retrieval.js
│   │   ├── mid-conversation.js
│   │   ├── session-end.js
│   │   ├── session-start.js
│   │   └── topic-change.js
│   ├── debug-pattern-test.js
│   ├── install_claude_hooks_windows.ps1
│   ├── install_hooks.py
│   ├── memory-mode-controller.js
│   ├── MIGRATION.md
│   ├── README-NATURAL-TRIGGERS.md
│   ├── README-phase2.md
│   ├── README.md
│   ├── simple-test.js
│   ├── statusline.sh
│   ├── test-adaptive-weights.js
│   ├── test-dual-protocol-hook.js
│   ├── test-mcp-hook.js
│   ├── test-natural-triggers.js
│   ├── test-recency-scoring.js
│   ├── tests
│   │   ├── integration-test.js
│   │   ├── phase2-integration-test.js
│   │   ├── test-code-execution.js
│   │   ├── test-cross-session.json
│   │   ├── test-session-tracking.json
│   │   └── test-threading.json
│   ├── utilities
│   │   ├── adaptive-pattern-detector.js
│   │   ├── context-formatter.js
│   │   ├── context-shift-detector.js
│   │   ├── conversation-analyzer.js
│   │   ├── dynamic-context-updater.js
│   │   ├── git-analyzer.js
│   │   ├── mcp-client.js
│   │   ├── memory-client.js
│   │   ├── memory-scorer.js
│   │   ├── performance-manager.js
│   │   ├── project-detector.js
│   │   ├── session-tracker.js
│   │   ├── tiered-conversation-monitor.js
│   │   └── version-checker.js
│   └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│   ├── amp-cli-bridge.md
│   ├── api
│   │   ├── code-execution-interface.md
│   │   ├── memory-metadata-api.md
│   │   ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_REPORT.md
│   │   └── tag-standardization.md
│   ├── architecture
│   │   ├── search-enhancement-spec.md
│   │   └── search-examples.md
│   ├── architecture.md
│   ├── archive
│   │   └── obsolete-workflows
│   │       ├── load_memory_context.md
│   │       └── README.md
│   ├── assets
│   │   └── images
│   │       ├── dashboard-v3.3.0-preview.png
│   │       ├── memory-awareness-hooks-example.png
│   │       ├── project-infographic.svg
│   │       └── README.md
│   ├── CLAUDE_CODE_QUICK_REFERENCE.md
│   ├── cloudflare-setup.md
│   ├── deployment
│   │   ├── docker.md
│   │   ├── dual-service.md
│   │   ├── production-guide.md
│   │   └── systemd-service.md
│   ├── development
│   │   ├── ai-agent-instructions.md
│   │   ├── code-quality
│   │   │   ├── phase-2a-completion.md
│   │   │   ├── phase-2a-handle-get-prompt.md
│   │   │   ├── phase-2a-index.md
│   │   │   ├── phase-2a-install-package.md
│   │   │   └── phase-2b-session-summary.md
│   │   ├── code-quality-workflow.md
│   │   ├── dashboard-workflow.md
│   │   ├── issue-management.md
│   │   ├── pr-review-guide.md
│   │   ├── refactoring-notes.md
│   │   ├── release-checklist.md
│   │   └── todo-tracker.md
│   ├── docker-optimized-build.md
│   ├── document-ingestion.md
│   ├── DOCUMENTATION_AUDIT.md
│   ├── enhancement-roadmap-issue-14.md
│   ├── examples
│   │   ├── analysis-scripts.js
│   │   ├── maintenance-session-example.md
│   │   ├── memory-distribution-chart.jsx
│   │   └── tag-schema.json
│   ├── first-time-setup.md
│   ├── glama-deployment.md
│   ├── guides
│   │   ├── advanced-command-examples.md
│   │   ├── chromadb-migration.md
│   │   ├── commands-vs-mcp-server.md
│   │   ├── mcp-enhancements.md
│   │   ├── mdns-service-discovery.md
│   │   ├── memory-consolidation-guide.md
│   │   ├── migration.md
│   │   ├── scripts.md
│   │   └── STORAGE_BACKENDS.md
│   ├── HOOK_IMPROVEMENTS.md
│   ├── hooks
│   │   └── phase2-code-execution-migration.md
│   ├── http-server-management.md
│   ├── ide-compatability.md
│   ├── IMAGE_RETENTION_POLICY.md
│   ├── images
│   │   └── dashboard-placeholder.md
│   ├── implementation
│   │   ├── health_checks.md
│   │   └── performance.md
│   ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│   ├── integration
│   │   ├── homebrew.md
│   │   └── multi-client.md
│   ├── integrations
│   │   ├── gemini.md
│   │   ├── groq-bridge.md
│   │   ├── groq-integration-summary.md
│   │   └── groq-model-comparison.md
│   ├── integrations.md
│   ├── legacy
│   │   └── dual-protocol-hooks.md
│   ├── LM_STUDIO_COMPATIBILITY.md
│   ├── maintenance
│   │   └── memory-maintenance.md
│   ├── mastery
│   │   ├── api-reference.md
│   │   ├── architecture-overview.md
│   │   ├── configuration-guide.md
│   │   ├── local-setup-and-run.md
│   │   ├── testing-guide.md
│   │   └── troubleshooting.md
│   ├── migration
│   │   └── code-execution-api-quick-start.md
│   ├── natural-memory-triggers
│   │   ├── cli-reference.md
│   │   ├── installation-guide.md
│   │   └── performance-optimization.md
│   ├── oauth-setup.md
│   ├── pr-graphql-integration.md
│   ├── quick-setup-cloudflare-dual-environment.md
│   ├── README.md
│   ├── remote-configuration-wiki-section.md
│   ├── research
│   │   ├── code-execution-interface-implementation.md
│   │   └── code-execution-interface-summary.md
│   ├── ROADMAP.md
│   ├── sqlite-vec-backend.md
│   ├── statistics
│   │   ├── charts
│   │   │   ├── activity_patterns.png
│   │   │   ├── contributors.png
│   │   │   ├── growth_trajectory.png
│   │   │   ├── monthly_activity.png
│   │   │   └── october_sprint.png
│   │   ├── data
│   │   │   ├── activity_by_day.csv
│   │   │   ├── activity_by_hour.csv
│   │   │   ├── contributors.csv
│   │   │   └── monthly_activity.csv
│   │   ├── generate_charts.py
│   │   └── REPOSITORY_STATISTICS.md
│   ├── technical
│   │   ├── development.md
│   │   ├── memory-migration.md
│   │   ├── migration-log.md
│   │   ├── sqlite-vec-embedding-fixes.md
│   │   └── tag-storage.md
│   ├── testing
│   │   └── regression-tests.md
│   ├── testing-cloudflare-backend.md
│   ├── troubleshooting
│   │   ├── cloudflare-api-token-setup.md
│   │   ├── cloudflare-authentication.md
│   │   ├── general.md
│   │   ├── hooks-quick-reference.md
│   │   ├── pr162-schema-caching-issue.md
│   │   ├── session-end-hooks.md
│   │   └── sync-issues.md
│   └── tutorials
│       ├── advanced-techniques.md
│       ├── data-analysis.md
│       └── demo-session-walkthrough.md
├── examples
│   ├── claude_desktop_config_template.json
│   ├── claude_desktop_config_windows.json
│   ├── claude-desktop-http-config.json
│   ├── config
│   │   └── claude_desktop_config.json
│   ├── http-mcp-bridge.js
│   ├── memory_export_template.json
│   ├── README.md
│   ├── setup
│   │   └── setup_multi_client_complete.py
│   └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│   ├── .claude
│   │   └── settings.local.json
│   ├── archive
│   │   └── check_missing_timestamps.py
│   ├── backup
│   │   ├── backup_memories.py
│   │   ├── backup_sqlite_vec.sh
│   │   ├── export_distributable_memories.sh
│   │   └── restore_memories.py
│   ├── benchmarks
│   │   ├── benchmark_code_execution_api.py
│   │   ├── benchmark_hybrid_sync.py
│   │   └── benchmark_server_caching.py
│   ├── database
│   │   ├── analyze_sqlite_vec_db.py
│   │   ├── check_sqlite_vec_status.py
│   │   ├── db_health_check.py
│   │   └── simple_timestamp_check.py
│   ├── development
│   │   ├── debug_server_initialization.py
│   │   ├── find_orphaned_files.py
│   │   ├── fix_mdns.sh
│   │   ├── fix_sitecustomize.py
│   │   ├── remote_ingest.sh
│   │   ├── setup-git-merge-drivers.sh
│   │   ├── uv-lock-merge.sh
│   │   └── verify_hybrid_sync.py
│   ├── hooks
│   │   └── pre-commit
│   ├── installation
│   │   ├── install_linux_service.py
│   │   ├── install_macos_service.py
│   │   ├── install_uv.py
│   │   ├── install_windows_service.py
│   │   ├── install.py
│   │   ├── setup_backup_cron.sh
│   │   ├── setup_claude_mcp.sh
│   │   └── setup_cloudflare_resources.py
│   ├── linux
│   │   ├── service_status.sh
│   │   ├── start_service.sh
│   │   ├── stop_service.sh
│   │   ├── uninstall_service.sh
│   │   └── view_logs.sh
│   ├── maintenance
│   │   ├── assign_memory_types.py
│   │   ├── check_memory_types.py
│   │   ├── cleanup_corrupted_encoding.py
│   │   ├── cleanup_memories.py
│   │   ├── cleanup_organize.py
│   │   ├── consolidate_memory_types.py
│   │   ├── consolidation_mappings.json
│   │   ├── delete_orphaned_vectors_fixed.py
│   │   ├── fast_cleanup_duplicates_with_tracking.sh
│   │   ├── find_all_duplicates.py
│   │   ├── find_cloudflare_duplicates.py
│   │   ├── find_duplicates.py
│   │   ├── memory-types.md
│   │   ├── README.md
│   │   ├── recover_timestamps_from_cloudflare.py
│   │   ├── regenerate_embeddings.py
│   │   ├── repair_malformed_tags.py
│   │   ├── repair_memories.py
│   │   ├── repair_sqlite_vec_embeddings.py
│   │   ├── repair_zero_embeddings.py
│   │   ├── restore_from_json_export.py
│   │   └── scan_todos.sh
│   ├── migration
│   │   ├── cleanup_mcp_timestamps.py
│   │   ├── legacy
│   │   │   └── migrate_chroma_to_sqlite.py
│   │   ├── mcp-migration.py
│   │   ├── migrate_sqlite_vec_embeddings.py
│   │   ├── migrate_storage.py
│   │   ├── migrate_tags.py
│   │   ├── migrate_timestamps.py
│   │   ├── migrate_to_cloudflare.py
│   │   ├── migrate_to_sqlite_vec.py
│   │   ├── migrate_v5_enhanced.py
│   │   ├── TIMESTAMP_CLEANUP_README.md
│   │   └── verify_mcp_timestamps.py
│   ├── pr
│   │   ├── amp_collect_results.sh
│   │   ├── amp_detect_breaking_changes.sh
│   │   ├── amp_generate_tests.sh
│   │   ├── amp_pr_review.sh
│   │   ├── amp_quality_gate.sh
│   │   ├── amp_suggest_fixes.sh
│   │   ├── auto_review.sh
│   │   ├── detect_breaking_changes.sh
│   │   ├── generate_tests.sh
│   │   ├── lib
│   │   │   └── graphql_helpers.sh
│   │   ├── quality_gate.sh
│   │   ├── resolve_threads.sh
│   │   ├── run_pyscn_analysis.sh
│   │   ├── run_quality_checks.sh
│   │   ├── thread_status.sh
│   │   └── watch_reviews.sh
│   ├── quality
│   │   ├── fix_dead_code_install.sh
│   │   ├── phase1_dead_code_analysis.md
│   │   ├── phase2_complexity_analysis.md
│   │   ├── README_PHASE1.md
│   │   ├── README_PHASE2.md
│   │   ├── track_pyscn_metrics.sh
│   │   └── weekly_quality_review.sh
│   ├── README.md
│   ├── run
│   │   ├── run_mcp_memory.sh
│   │   ├── run-with-uv.sh
│   │   └── start_sqlite_vec.sh
│   ├── run_memory_server.py
│   ├── server
│   │   ├── check_http_server.py
│   │   ├── check_server_health.py
│   │   ├── memory_offline.py
│   │   ├── preload_models.py
│   │   ├── run_http_server.py
│   │   ├── run_memory_server.py
│   │   ├── start_http_server.bat
│   │   └── start_http_server.sh
│   ├── service
│   │   ├── deploy_dual_services.sh
│   │   ├── install_http_service.sh
│   │   ├── mcp-memory-http.service
│   │   ├── mcp-memory.service
│   │   ├── memory_service_manager.sh
│   │   ├── service_control.sh
│   │   ├── service_utils.py
│   │   └── update_service.sh
│   ├── sync
│   │   ├── check_drift.py
│   │   ├── claude_sync_commands.py
│   │   ├── export_memories.py
│   │   ├── import_memories.py
│   │   ├── litestream
│   │   │   ├── apply_local_changes.sh
│   │   │   ├── enhanced_memory_store.sh
│   │   │   ├── init_staging_db.sh
│   │   │   ├── io.litestream.replication.plist
│   │   │   ├── manual_sync.sh
│   │   │   ├── memory_sync.sh
│   │   │   ├── pull_remote_changes.sh
│   │   │   ├── push_to_remote.sh
│   │   │   ├── README.md
│   │   │   ├── resolve_conflicts.sh
│   │   │   ├── setup_local_litestream.sh
│   │   │   ├── setup_remote_litestream.sh
│   │   │   ├── staging_db_init.sql
│   │   │   ├── stash_local_changes.sh
│   │   │   ├── sync_from_remote_noconfig.sh
│   │   │   └── sync_from_remote.sh
│   │   ├── README.md
│   │   ├── safe_cloudflare_update.sh
│   │   ├── sync_memory_backends.py
│   │   └── sync_now.py
│   ├── testing
│   │   ├── run_complete_test.py
│   │   ├── run_memory_test.sh
│   │   ├── simple_test.py
│   │   ├── test_cleanup_logic.py
│   │   ├── test_cloudflare_backend.py
│   │   ├── test_docker_functionality.py
│   │   ├── test_installation.py
│   │   ├── test_mdns.py
│   │   ├── test_memory_api.py
│   │   ├── test_memory_simple.py
│   │   ├── test_migration.py
│   │   ├── test_search_api.py
│   │   ├── test_sqlite_vec_embeddings.py
│   │   ├── test_sse_events.py
│   │   ├── test-connection.py
│   │   └── test-hook.js
│   ├── utils
│   │   ├── claude_commands_utils.py
│   │   ├── generate_personalized_claude_md.sh
│   │   ├── groq
│   │   ├── groq_agent_bridge.py
│   │   ├── list-collections.py
│   │   ├── memory_wrapper_uv.py
│   │   ├── query_memories.py
│   │   ├── smithery_wrapper.py
│   │   ├── test_groq_bridge.sh
│   │   └── uv_wrapper.py
│   └── validation
│       ├── check_dev_setup.py
│       ├── check_documentation_links.py
│       ├── diagnose_backend_config.py
│       ├── validate_configuration_complete.py
│       ├── validate_memories.py
│       ├── validate_migration.py
│       ├── validate_timestamp_integrity.py
│       ├── verify_environment.py
│       ├── verify_pytorch_windows.py
│       └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│   └── mcp_memory_service
│       ├── __init__.py
│       ├── api
│       │   ├── __init__.py
│       │   ├── client.py
│       │   ├── operations.py
│       │   ├── sync_wrapper.py
│       │   └── types.py
│       ├── backup
│       │   ├── __init__.py
│       │   └── scheduler.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── ingestion.py
│       │   ├── main.py
│       │   └── utils.py
│       ├── config.py
│       ├── consolidation
│       │   ├── __init__.py
│       │   ├── associations.py
│       │   ├── base.py
│       │   ├── clustering.py
│       │   ├── compression.py
│       │   ├── consolidator.py
│       │   ├── decay.py
│       │   ├── forgetting.py
│       │   ├── health.py
│       │   └── scheduler.py
│       ├── dependency_check.py
│       ├── discovery
│       │   ├── __init__.py
│       │   ├── client.py
│       │   └── mdns_service.py
│       ├── embeddings
│       │   ├── __init__.py
│       │   └── onnx_embeddings.py
│       ├── ingestion
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── chunker.py
│       │   ├── csv_loader.py
│       │   ├── json_loader.py
│       │   ├── pdf_loader.py
│       │   ├── registry.py
│       │   ├── semtools_loader.py
│       │   └── text_loader.py
│       ├── lm_studio_compat.py
│       ├── mcp_server.py
│       ├── models
│       │   ├── __init__.py
│       │   └── memory.py
│       ├── server.py
│       ├── services
│       │   ├── __init__.py
│       │   └── memory_service.py
│       ├── storage
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── cloudflare.py
│       │   ├── factory.py
│       │   ├── http_client.py
│       │   ├── hybrid.py
│       │   └── sqlite_vec.py
│       ├── sync
│       │   ├── __init__.py
│       │   ├── exporter.py
│       │   ├── importer.py
│       │   └── litestream_config.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cache_manager.py
│       │   ├── content_splitter.py
│       │   ├── db_utils.py
│       │   ├── debug.py
│       │   ├── document_processing.py
│       │   ├── gpu_detection.py
│       │   ├── hashing.py
│       │   ├── http_server_manager.py
│       │   ├── port_detection.py
│       │   ├── system_detection.py
│       │   └── time_parser.py
│       └── web
│           ├── __init__.py
│           ├── api
│           │   ├── __init__.py
│           │   ├── analytics.py
│           │   ├── backup.py
│           │   ├── consolidation.py
│           │   ├── documents.py
│           │   ├── events.py
│           │   ├── health.py
│           │   ├── manage.py
│           │   ├── mcp.py
│           │   ├── memories.py
│           │   ├── search.py
│           │   └── sync.py
│           ├── app.py
│           ├── dependencies.py
│           ├── oauth
│           │   ├── __init__.py
│           │   ├── authorization.py
│           │   ├── discovery.py
│           │   ├── middleware.py
│           │   ├── models.py
│           │   ├── registration.py
│           │   └── storage.py
│           ├── sse.py
│           └── static
│               ├── app.js
│               ├── index.html
│               ├── README.md
│               ├── sse_test.html
│               └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│   ├── __init__.py
│   ├── api
│   │   ├── __init__.py
│   │   ├── test_compact_types.py
│   │   └── test_operations.py
│   ├── bridge
│   │   ├── mock_responses.js
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   └── test_http_mcp_bridge.js
│   ├── conftest.py
│   ├── consolidation
│   │   ├── __init__.py
│   │   ├── conftest.py
│   │   ├── test_associations.py
│   │   ├── test_clustering.py
│   │   ├── test_compression.py
│   │   ├── test_consolidator.py
│   │   ├── test_decay.py
│   │   └── test_forgetting.py
│   ├── contracts
│   │   └── api-specification.yml
│   ├── integration
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── test_api_key_fallback.py
│   │   ├── test_api_memories_chronological.py
│   │   ├── test_api_tag_time_search.py
│   │   ├── test_api_with_memory_service.py
│   │   ├── test_bridge_integration.js
│   │   ├── test_cli_interfaces.py
│   │   ├── test_cloudflare_connection.py
│   │   ├── test_concurrent_clients.py
│   │   ├── test_data_serialization_consistency.py
│   │   ├── test_http_server_startup.py
│   │   ├── test_mcp_memory.py
│   │   ├── test_mdns_integration.py
│   │   ├── test_oauth_basic_auth.py
│   │   ├── test_oauth_flow.py
│   │   ├── test_server_handlers.py
│   │   └── test_store_memory.py
│   ├── performance
│   │   ├── test_background_sync.py
│   │   └── test_hybrid_live.py
│   ├── README.md
│   ├── smithery
│   │   └── test_smithery.py
│   ├── sqlite
│   │   └── simple_sqlite_vec_test.py
│   ├── test_client.py
│   ├── test_content_splitting.py
│   ├── test_database.py
│   ├── test_hybrid_cloudflare_limits.py
│   ├── test_hybrid_storage.py
│   ├── test_memory_ops.py
│   ├── test_semantic_search.py
│   ├── test_sqlite_vec_storage.py
│   ├── test_time_parser.py
│   ├── test_timestamp_preservation.py
│   ├── timestamp
│   │   ├── test_hook_vs_manual_storage.py
│   │   ├── test_issue99_final_validation.py
│   │   ├── test_search_retrieval_inconsistency.py
│   │   ├── test_timestamp_issue.py
│   │   └── test_timestamp_simple.py
│   └── unit
│       ├── conftest.py
│       ├── test_cloudflare_storage.py
│       ├── test_csv_loader.py
│       ├── test_fastapi_dependencies.py
│       ├── test_import.py
│       ├── test_json_loader.py
│       ├── test_mdns_simple.py
│       ├── test_mdns.py
│       ├── test_memory_service.py
│       ├── test_memory.py
│       ├── test_semtools_loader.py
│       ├── test_storage_interface_compatibility.py
│       └── test_tag_time_filtering.py
├── tools
│   ├── docker
│   │   ├── DEPRECATED.md
│   │   ├── docker-compose.http.yml
│   │   ├── docker-compose.pythonpath.yml
│   │   ├── docker-compose.standalone.yml
│   │   ├── docker-compose.uv.yml
│   │   ├── docker-compose.yml
│   │   ├── docker-entrypoint-persistent.sh
│   │   ├── docker-entrypoint-unified.sh
│   │   ├── docker-entrypoint.sh
│   │   ├── Dockerfile
│   │   ├── Dockerfile.glama
│   │   ├── Dockerfile.slim
│   │   ├── README.md
│   │   └── test-docker-modes.sh
│   └── README.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/docs/deployment/systemd-service.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Systemd Service Setup for Linux
  2 | 
  3 | This guide explains how to set up the MCP Memory HTTP server as a systemd service on Linux for automatic startup and management.
  4 | 
  5 | ## Overview
  6 | 
  7 | The systemd service provides:
  8 | - ✅ **Automatic startup** on user login
  9 | - ✅ **Persistent operation** even when logged out (with linger enabled)
 10 | - ✅ **Automatic restarts** on failure
 11 | - ✅ **Centralized logging** via journald
 12 | - ✅ **Easy management** via systemctl commands
 13 | 
 14 | ## Installation
 15 | 
 16 | ### Quick Install
 17 | 
 18 | ```bash
 19 | # Run the installation script
 20 | cd /path/to/mcp-memory-service
 21 | bash scripts/service/install_http_service.sh
 22 | ```
 23 | 
 24 | The script will:
 25 | 1. Check prerequisites (.env file, venv)
 26 | 2. Ask whether to install as user or system service
 27 | 3. Copy service file to appropriate location
 28 | 4. Reload systemd configuration
 29 | 5. Show next steps
 30 | 
 31 | ### Manual Installation
 32 | 
 33 | If you prefer manual installation:
 34 | 
 35 | **1. User Service (Recommended - No sudo required):**
 36 | 
 37 | ```bash
 38 | # Create directory
 39 | mkdir -p ~/.config/systemd/user
 40 | 
 41 | # Copy service file
 42 | cp scripts/service/mcp-memory-http.service ~/.config/systemd/user/
 43 | 
 44 | # Reload systemd
 45 | systemctl --user daemon-reload
 46 | 
 47 | # Start service
 48 | systemctl --user start mcp-memory-http.service
 49 | 
 50 | # Enable auto-start
 51 | systemctl --user enable mcp-memory-http.service
 52 | 
 53 | # Enable linger (runs even when logged out)
 54 | loginctl enable-linger $USER
 55 | ```
 56 | 
 57 | **2. System Service (Requires sudo):**
 58 | 
 59 | ```bash
 60 | # Copy service file
 61 | sudo cp scripts/service/mcp-memory-http.service /etc/systemd/system/
 62 | 
 63 | # Edit to ensure paths are correct
 64 | sudo nano /etc/systemd/system/mcp-memory-http.service
 65 | 
 66 | # Reload systemd
 67 | sudo systemctl daemon-reload
 68 | 
 69 | # Start service
 70 | sudo systemctl start mcp-memory-http.service
 71 | 
 72 | # Enable auto-start
 73 | sudo systemctl enable mcp-memory-http.service
 74 | ```
 75 | 
 76 | ## Service Management
 77 | 
 78 | ### Basic Commands
 79 | 
 80 | ```bash
 81 | # Start service
 82 | systemctl --user start mcp-memory-http.service
 83 | 
 84 | # Stop service
 85 | systemctl --user stop mcp-memory-http.service
 86 | 
 87 | # Restart service
 88 | systemctl --user restart mcp-memory-http.service
 89 | 
 90 | # Check status
 91 | systemctl --user status mcp-memory-http.service
 92 | 
 93 | # Enable auto-start on login
 94 | systemctl --user enable mcp-memory-http.service
 95 | 
 96 | # Disable auto-start
 97 | systemctl --user disable mcp-memory-http.service
 98 | ```
 99 | 
100 | ### Viewing Logs
101 | 
102 | ```bash
103 | # Live logs (follow mode)
104 | journalctl --user -u mcp-memory-http.service -f
105 | 
106 | # Last 50 lines
107 | journalctl --user -u mcp-memory-http.service -n 50
108 | 
109 | # Logs since boot
110 | journalctl --user -u mcp-memory-http.service -b
111 | 
112 | # Logs for specific time range
113 | journalctl --user -u mcp-memory-http.service --since "2 hours ago"
114 | 
115 | # Logs with priority filter (only errors and above)
116 | journalctl --user -u mcp-memory-http.service -p err
117 | ```
118 | 
119 | ## Configuration
120 | 
121 | The service file is located at:
122 | - User service: `~/.config/systemd/user/mcp-memory-http.service`
123 | - System service: `/etc/systemd/system/mcp-memory-http.service`
124 | 
125 | ### Service File Structure
126 | 
127 | ```ini
128 | [Unit]
129 | Description=MCP Memory Service HTTP Server (Hybrid Backend)
130 | Documentation=https://github.com/doobidoo/mcp-memory-service
131 | After=network.target network-online.target
132 | Wants=network-online.target
133 | 
134 | [Service]
135 | Type=simple
136 | WorkingDirectory=/home/hkr/repositories/mcp-memory-service
137 | Environment=PATH=/home/hkr/repositories/mcp-memory-service/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
138 | Environment=PYTHONPATH=/home/hkr/repositories/mcp-memory-service/src
139 | EnvironmentFile=/home/hkr/repositories/mcp-memory-service/.env
140 | ExecStart=/home/hkr/repositories/mcp-memory-service/venv/bin/python /home/hkr/repositories/mcp-memory-service/scripts/server/run_http_server.py
141 | Restart=always
142 | RestartSec=10
143 | StandardOutput=journal
144 | StandardError=journal
145 | SyslogIdentifier=mcp-memory-http
146 | 
147 | # Security hardening
148 | NoNewPrivileges=true
149 | PrivateTmp=true
150 | 
151 | [Install]
152 | WantedBy=default.target
153 | ```
154 | 
155 | ### Important Configuration Points
156 | 
157 | 1. **User Service vs System Service:**
158 |    - User services run as your user (recommended)
159 |    - System services run at boot (before user login)
160 |    - User services can't have `User=` and `Group=` directives
161 |    - User services use `WantedBy=default.target` not `multi-user.target`
162 | 
163 | 2. **Environment Loading:**
164 |    - Service loads `.env` file via `EnvironmentFile` directive
165 |    - All environment variables are available to the service
166 |    - Changes to `.env` require service restart
167 | 
168 | 3. **Working Directory:**
169 |    - Service runs from project root
170 |    - Relative paths in code work correctly
171 |    - Database paths should be absolute or relative to working directory
172 | 
173 | ## Troubleshooting
174 | 
175 | ### Service Won't Start
176 | 
177 | **Check status for errors:**
178 | ```bash
179 | systemctl --user status mcp-memory-http.service
180 | ```
181 | 
182 | **Common Issues:**
183 | 
184 | 1. **GROUP error (status=216/GROUP):**
185 |    - Remove `User=` and `Group=` directives from user service file
186 |    - These are only for system services
187 | 
188 | 2. **Permission denied:**
189 |    - Check that `.env` file is readable by your user
190 |    - Check that venv and scripts are accessible
191 |    - For system services, ensure files are owned by service user
192 | 
193 | 3. **Port already in use:**
194 |    ```bash
195 |    lsof -i :8000
196 |    # Kill existing process or change port in .env
197 |    ```
198 | 
199 | 4. **Missing dependencies:**
200 |    ```bash
201 |    # Verify venv is set up
202 |    ls -la venv/bin/python
203 | 
204 |    # Reinstall if needed
205 |    python -m venv venv
206 |    source venv/bin/activate
207 |    pip install -e .
208 |    ```
209 | 
210 | ### Service Fails to Enable
211 | 
212 | **Error:** "Unit is added as a dependency to a non-existent unit"
213 | 
214 | **Solution:** For user services, change `WantedBy=` target:
215 | ```bash
216 | # Edit service file
217 | nano ~/.config/systemd/user/mcp-memory-http.service
218 | 
219 | # Change this:
220 | [Install]
221 | WantedBy=multi-user.target
222 | 
223 | # To this:
224 | [Install]
225 | WantedBy=default.target
226 | 
227 | # Reload and reenable
228 | systemctl --user daemon-reload
229 | systemctl --user reenable mcp-memory-http.service
230 | ```
231 | 
232 | ### Logs Show Configuration Errors
233 | 
234 | **Check environment loading:**
235 | ```bash
236 | # View effective environment
237 | systemctl --user show-environment
238 | 
239 | # Test service startup manually
240 | cd /path/to/mcp-memory-service
241 | source .env
242 | venv/bin/python scripts/server/run_http_server.py
243 | ```
244 | 
245 | ### Service Stops After Logout
246 | 
247 | **Enable linger to keep user services running:**
248 | ```bash
249 | loginctl enable-linger $USER
250 | 
251 | # Verify
252 | loginctl show-user $USER | grep Linger
253 | # Should show: Linger=yes
254 | ```
255 | 
256 | ## Performance Monitoring
257 | 
258 | ```bash
259 | # Check memory usage
260 | systemctl --user status mcp-memory-http.service | grep Memory
261 | 
262 | # Check CPU usage
263 | systemctl --user status mcp-memory-http.service | grep CPU
264 | 
265 | # Monitor in real-time
266 | watch -n 2 'systemctl --user status mcp-memory-http.service | grep -E "Memory|CPU"'
267 | 
268 | # Detailed resource usage
269 | systemd-cgtop --user
270 | ```
271 | 
272 | ## Security Considerations
273 | 
274 | The service includes basic security hardening:
275 | - `NoNewPrivileges=true` - Prevents privilege escalation
276 | - `PrivateTmp=true` - Isolated /tmp directory
277 | - User services run with user permissions (no root access)
278 | 
279 | For system services, consider additional hardening:
280 | - `ProtectSystem=strict` - Read-only access to system directories
281 | - `ProtectHome=read-only` - Limited home directory access
282 | - `ReadWritePaths=` - Explicitly allow write access to database paths
283 | 
284 | **Note:** Some security directives may conflict with application requirements. Test thoroughly when adding restrictions.
285 | 
286 | ## Uninstallation
287 | 
288 | ```bash
289 | # Stop and disable service
290 | systemctl --user stop mcp-memory-http.service
291 | systemctl --user disable mcp-memory-http.service
292 | 
293 | # Remove service file
294 | rm ~/.config/systemd/user/mcp-memory-http.service
295 | 
296 | # Reload systemd
297 | systemctl --user daemon-reload
298 | 
299 | # Optional: Disable linger if no other user services needed
300 | loginctl disable-linger $USER
301 | ```
302 | 
303 | ## See Also
304 | 
305 | - [HTTP Server Management](../http-server-management.md) - General server management
306 | - [Troubleshooting Guide](https://github.com/doobidoo/mcp-memory-service/wiki/07-TROUBLESHOOTING) - Common issues
307 | - [Claude Code Hooks Configuration](../../CLAUDE.md#claude-code-hooks-configuration-) - Hooks setup
308 | - [systemd.service(5)](https://www.freedesktop.org/software/systemd/man/systemd.service.html) - systemd documentation
309 | 
310 | ---
311 | 
312 | **Last Updated**: 2025-10-13
313 | **Version**: 8.5.4
314 | **Tested On**: Ubuntu 22.04, Debian 12, Fedora 38
315 | 
```

--------------------------------------------------------------------------------
/src/mcp_memory_service/ingestion/semtools_loader.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Semtools document loader for enhanced text extraction using Rust-based parser.
 17 | 
 18 | Uses semtools CLI (https://github.com/run-llama/semtools) for superior document
 19 | parsing with LlamaParse API integration. Supports PDF, DOCX, PPTX and other formats.
 20 | """
 21 | 
 22 | import logging
 23 | import asyncio
 24 | import os
 25 | from pathlib import Path
 26 | from typing import AsyncGenerator, Dict, Any, Optional
 27 | import shutil
 28 | 
 29 | from .base import DocumentLoader, DocumentChunk
 30 | from .chunker import TextChunker, ChunkingStrategy
 31 | 
 32 | logger = logging.getLogger(__name__)
 33 | 
 34 | 
 35 | class SemtoolsLoader(DocumentLoader):
 36 |     """
 37 |     Document loader using semtools for superior text extraction.
 38 | 
 39 |     Leverages semtools' Rust-based parser with LlamaParse API for:
 40 |     - Advanced OCR capabilities
 41 |     - Table extraction
 42 |     - Multi-format support (PDF, DOCX, PPTX, etc.)
 43 | 
 44 |     Falls back gracefully when semtools is not available.
 45 |     """
 46 | 
 47 |     def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
 48 |         """
 49 |         Initialize Semtools loader.
 50 | 
 51 |         Args:
 52 |             chunk_size: Target size for text chunks in characters
 53 |             chunk_overlap: Number of characters to overlap between chunks
 54 |         """
 55 |         super().__init__(chunk_size, chunk_overlap)
 56 |         self.supported_extensions = ['pdf', 'docx', 'doc', 'pptx', 'xlsx']
 57 |         self.chunker = TextChunker(ChunkingStrategy(
 58 |             chunk_size=chunk_size,
 59 |             chunk_overlap=chunk_overlap,
 60 |             respect_paragraph_boundaries=True
 61 |         ))
 62 | 
 63 |         # Check semtools availability
 64 |         self._semtools_available = self._check_semtools_availability()
 65 | 
 66 |         # Get API key from environment
 67 |         self.api_key = os.getenv('LLAMAPARSE_API_KEY')
 68 |         if self._semtools_available and not self.api_key:
 69 |             logger.warning(
 70 |                 "Semtools is available but LLAMAPARSE_API_KEY not set. "
 71 |                 "Document parsing quality may be limited."
 72 |             )
 73 | 
 74 |     def _check_semtools_availability(self) -> bool:
 75 |         """
 76 |         Check if semtools is installed and available.
 77 | 
 78 |         Returns:
 79 |             True if semtools CLI is available
 80 |         """
 81 |         semtools_path = shutil.which('semtools')
 82 |         if semtools_path:
 83 |             logger.info(f"Semtools found at: {semtools_path}")
 84 |             return True
 85 |         else:
 86 |             logger.debug(
 87 |                 "Semtools not available. Install with: npm i -g @llamaindex/semtools "
 88 |                 "or cargo install semtools"
 89 |             )
 90 |             return False
 91 | 
 92 |     def can_handle(self, file_path: Path) -> bool:
 93 |         """
 94 |         Check if this loader can handle the file.
 95 | 
 96 |         Args:
 97 |             file_path: Path to the file to check
 98 | 
 99 |         Returns:
100 |             True if semtools is available and file format is supported
101 |         """
102 |         if not self._semtools_available:
103 |             return False
104 | 
105 |         return (file_path.suffix.lower().lstrip('.') in self.supported_extensions and
106 |                 file_path.exists() and
107 |                 file_path.is_file())
108 | 
109 |     async def extract_chunks(self, file_path: Path, **kwargs) -> AsyncGenerator[DocumentChunk, None]:
110 |         """
111 |         Extract text chunks from a document using semtools.
112 | 
113 |         Args:
114 |             file_path: Path to the document file
115 |             **kwargs: Additional options (currently unused)
116 | 
117 |         Yields:
118 |             DocumentChunk objects containing parsed content
119 | 
120 |         Raises:
121 |             FileNotFoundError: If the file doesn't exist
122 |             ValueError: If semtools is not available or parsing fails
123 |         """
124 |         await self.validate_file(file_path)
125 | 
126 |         if not self._semtools_available:
127 |             raise ValueError(
128 |                 "Semtools is not available. Install with: npm i -g @llamaindex/semtools"
129 |             )
130 | 
131 |         logger.info(f"Extracting chunks from {file_path} using semtools")
132 | 
133 |         try:
134 |             # Parse document to markdown using semtools
135 |             markdown_content = await self._parse_with_semtools(file_path)
136 | 
137 |             # Get base metadata
138 |             base_metadata = self.get_base_metadata(file_path)
139 |             base_metadata.update({
140 |                 'extraction_method': 'semtools',
141 |                 'parser_backend': 'llamaparse',
142 |                 'content_type': 'markdown',
143 |                 'has_api_key': bool(self.api_key)
144 |             })
145 | 
146 |             # Chunk the markdown content
147 |             chunks = self.chunker.chunk_text(markdown_content, base_metadata)
148 | 
149 |             chunk_index = 0
150 |             for chunk_text, chunk_metadata in chunks:
151 |                 yield DocumentChunk(
152 |                     content=chunk_text,
153 |                     metadata=chunk_metadata,
154 |                     chunk_index=chunk_index,
155 |                     source_file=file_path
156 |                 )
157 |                 chunk_index += 1
158 | 
159 |         except Exception as e:
160 |             logger.error(f"Error processing {file_path} with semtools: {e}")
161 |             raise ValueError(f"Failed to parse document: {str(e)}") from e
162 | 
163 |     async def _parse_with_semtools(self, file_path: Path) -> str:
164 |         """
165 |         Parse document using semtools CLI.
166 | 
167 |         Args:
168 |             file_path: Path to document to parse
169 | 
170 |         Returns:
171 |             Markdown content extracted from document
172 | 
173 |         Raises:
174 |             RuntimeError: If semtools command fails
175 |         """
176 |         # Prepare semtools command
177 |         cmd = ['semtools', 'parse', str(file_path)]
178 | 
179 |         # Set up environment with API key if available
180 |         env = os.environ.copy()
181 |         if self.api_key:
182 |             env['LLAMAPARSE_API_KEY'] = self.api_key
183 | 
184 |         try:
185 |             # Run semtools parse command
186 |             process = await asyncio.create_subprocess_exec(
187 |                 *cmd,
188 |                 stdout=asyncio.subprocess.PIPE,
189 |                 stderr=asyncio.subprocess.PIPE,
190 |                 env=env
191 |             )
192 | 
193 |             # Wait for completion with timeout
194 |             stdout, stderr = await asyncio.wait_for(
195 |                 process.communicate(),
196 |                 timeout=300  # 5 minute timeout for large documents
197 |             )
198 | 
199 |             if process.returncode != 0:
200 |                 error_msg = stderr.decode('utf-8', errors='replace')
201 |                 logger.error(f"Semtools parsing failed: {error_msg}")
202 |                 raise RuntimeError(f"Semtools returned error: {error_msg}")
203 | 
204 |             # Parse markdown output
205 |             markdown_content = stdout.decode('utf-8', errors='replace')
206 | 
207 |             if not markdown_content.strip():
208 |                 logger.warning(f"Semtools returned empty content for {file_path}")
209 |                 raise RuntimeError("Semtools returned empty content")
210 | 
211 |             logger.debug(f"Successfully parsed {file_path}, extracted {len(markdown_content)} characters")
212 |             return markdown_content
213 | 
214 |         except asyncio.TimeoutError:
215 |             logger.error(f"Semtools parsing timed out for {file_path}")
216 |             raise RuntimeError("Document parsing timed out after 5 minutes")
217 |         except Exception as e:
218 |             logger.error(f"Error running semtools: {e}")
219 |             raise
220 | 
221 | 
222 | # Register the semtools loader
223 | def _register_semtools_loader():
224 |     """Register semtools loader with the registry."""
225 |     try:
226 |         from .registry import register_loader
227 |         register_loader(SemtoolsLoader, ['pdf', 'docx', 'doc', 'pptx', 'xlsx'])
228 |         logger.debug("Semtools loader registered successfully")
229 |     except ImportError:
230 |         logger.debug("Registry not available during import")
231 | 
232 | 
233 | # Auto-register when module is imported
234 | _register_semtools_loader()
235 | 
```

--------------------------------------------------------------------------------
/archive/docs-root-cleanup-2025-08-23/lm_studio_system_prompt.md:
--------------------------------------------------------------------------------

```markdown
  1 | # LM Studio System Prompt for MCP Tools
  2 | 
  3 | You are an AI assistant with access to various tools through the Model Context Protocol (MCP). You have access to memory storage, database operations, and other utility functions.
  4 | 
  5 | ## Why This System Prompt Exists
  6 | 
  7 | **Normally, MCP servers provide tool schemas through the `tools/list` endpoint** - the client shouldn't need explicit instructions. However, this system prompt exists because:
  8 | 
  9 | 1. **LM Studio Implementation Gap**: Some MCP clients struggle with complex JSON schema interpretation
 10 | 2. **Model Training Limitation**: The openai/gpt-oss-20b model was failing to generate proper tool calls despite receiving correct schemas
 11 | 3. **Legacy Server Compatibility**: This connects to the legacy MCP Memory Service server with specific parameter expectations
 12 | 
 13 | **This prompt supplements, not replaces, the official MCP tool schemas.** It provides concrete examples when schema interpretation fails.
 14 | 
 15 | ## Available Tool Categories:
 16 | 
 17 | ### Memory Tools (MCP Memory Service):
 18 | - `check_database_health` - Check database status and performance
 19 | - `store_memory` - Store information with tags and metadata
 20 | - `retrieve_memory` - Search and retrieve stored memories
 21 | - `recall_memory` - Time-based memory retrieval with natural language
 22 | - `search_by_tag` - Find memories by specific tags
 23 | - `delete_memory` - Remove specific memories
 24 | - `delete_by_tag` - Bulk delete memories by tags
 25 | - `optimize_db` - Optimize database performance
 26 | 
 27 | ### Other Available Tools:
 28 | - File operations, web search, code analysis, etc. (varies by MCP setup)
 29 | 
 30 | ## Tool Usage Guidelines:
 31 | 
 32 | ### 1. When to Use Tools:
 33 | - **Always use tools** when the user explicitly mentions operations like:
 34 |   - "check database health", "db health", "database status"
 35 |   - "store this information", "remember this", "save to memory"
 36 |   - "search for", "find", "recall", "retrieve"
 37 |   - "delete", "remove", "clear"
 38 | - **Use tools** for data operations, file access, external queries
 39 | - **Respond directly** for general questions, explanations, or conversations
 40 | 
 41 | ### 2. Tool Call Format - CRITICAL:
 42 | When calling a tool, use this EXACT JSON structure:
 43 | 
 44 | **For store_memory (most common):**
 45 | ```json
 46 | {"name": "store_memory", "arguments": {"content": "your text here", "metadata": {"tags": ["tag1", "tag2"], "type": "fact"}}}
 47 | ```
 48 | 
 49 | **IMPORTANT: Parameter Rules for store_memory:**
 50 | - `content` (REQUIRED): String containing the information to store
 51 | - `metadata` (OPTIONAL): Object containing:
 52 |   - `tags` (OPTIONAL): Array of strings - e.g., ["database", "health", "check"] 
 53 |   - `type` (OPTIONAL): String - "note", "fact", "reminder", "decision", etc.
 54 | 
 55 | **NOTE: The MCP server expects tags INSIDE the metadata object, not as a separate parameter!**
 56 | 
 57 | **Other common tool calls:**
 58 | - Database health: `{"name": "check_database_health", "arguments": {}}`
 59 | - Retrieve: `{"name": "retrieve_memory", "arguments": {"query": "search terms"}}`
 60 | - Recall: `{"name": "recall_memory", "arguments": {"query": "last week"}}`
 61 | - Delete: `{"name": "delete_memory", "arguments": {"memory_id": "12345"}}`
 62 | 
 63 | **CRITICAL: JSON Formatting Rules:**
 64 | 1. `tags` must be an ARRAY: `["tag1", "tag2"]` NOT a string `"tag1,tag2"`
 65 | 2. All strings must be properly escaped (use `\"` for quotes inside strings)
 66 | 3. `content` parameter is ALWAYS required for store_memory
 67 | 4. No trailing commas in JSON objects
 68 | 
 69 | ### 3. Interpreting User Requests:
 70 | - "check db health" → use `check_database_health`
 71 | - "remember that X happened" → use `store_memory` with content="X happened"
 72 | - "what do you know about Y" → use `retrieve_memory` with query="Y"
 73 | - "find memories from last week" → use `recall_memory` with query="last week"
 74 | - "delete memories about Z" → use `search_by_tag` first, then `delete_memory`
 75 | 
 76 | ### 3.1. EXACT Examples for Common Requests:
 77 | 
 78 | **"Memorize the database health results":**
 79 | ```json
 80 | {"name": "store_memory", "arguments": {"content": "Database health check completed successfully. SQLite-vec backend is healthy with 439 memories stored (2.36 MB).", "metadata": {"tags": ["database", "health", "status"], "type": "reference"}}}
 81 | ```
 82 | 
 83 | **"Remember that we got Memory MCP running in LMStudio":**
 84 | ```json
 85 | {"name": "store_memory", "arguments": {"content": "Successfully got Memory MCP running in LMStudio. The integration is working properly.", "metadata": {"tags": ["lmstudio", "mcp", "integration", "success"], "type": "fact"}}}
 86 | ```
 87 | 
 88 | **"Store this configuration":**
 89 | ```json
 90 | {"name": "store_memory", "arguments": {"content": "Configuration details: [insert config here]", "metadata": {"tags": ["configuration", "setup"], "type": "note"}}}
 91 | ```
 92 | 
 93 | ### 4. Response Format:
 94 | After calling a tool:
 95 | 1. **Briefly summarize** what you did
 96 | 2. **Present the results** in a clear, user-friendly format
 97 | 3. **Offer follow-up actions** if relevant
 98 | 
 99 | Example response flow:
100 | ```
101 | I'll check the database health for you.
102 | 
103 | {"name": "check_database_health", "arguments": {}}
104 | 
105 | The database is healthy with 439 memories stored (2.36 MB). The SQLite-vec backend is working properly with the all-MiniLM-L6-v2 embedding model.
106 | 
107 | Would you like me to run any other database operations?
108 | ```
109 | 
110 | ### 5. Common Patterns:
111 | - For storage: Always include relevant tags like ["date", "project", "category"]
112 | - For retrieval: Start with broad searches, then narrow down
113 | - For health checks: Run without arguments first, then investigate specific issues
114 | - For deletion: Always search first to confirm what will be deleted
115 | 
116 | ### 6. Error Handling:
117 | - If a tool call fails, explain what went wrong and suggest alternatives
118 | - For missing information, ask the user for clarification
119 | - If unsure which tool to use, describe your options and ask the user
120 | 
121 | ### 7. Common JSON Parsing Errors - AVOID THESE:
122 | 
123 | **❌ WRONG: String instead of array for tags**
124 | ```json
125 | {"name": "store_memory", "arguments": {"content": "text", "metadata": {"tags": "database,health"}}}
126 | ```
127 | 
128 | **✅ CORRECT: Array for tags (inside metadata)**
129 | ```json
130 | {"name": "store_memory", "arguments": {"content": "text", "metadata": {"tags": ["database", "health"]}}}
131 | ```
132 | 
133 | **❌ WRONG: Missing content parameter**
134 | ```json
135 | {"name": "store_memory", "arguments": {"metadata": {"tags": ["database"], "type": "fact"}}}
136 | ```
137 | 
138 | **✅ CORRECT: Content parameter included**
139 | ```json
140 | {"name": "store_memory", "arguments": {"content": "Actual information to store", "metadata": {"tags": ["database"]}}}
141 | ```
142 | 
143 | **❌ WRONG: Tags as separate parameter (wrong for legacy server)**
144 | ```json
145 | {"name": "store_memory", "arguments": {"content": "text", "tags": ["tag1"], "memory_type": "fact"}}
146 | ```
147 | 
148 | **✅ CORRECT: Tags inside metadata object (legacy server format)**
149 | ```json
150 | {"name": "store_memory", "arguments": {"content": "text", "metadata": {"tags": ["tag1"], "type": "fact"}}}
151 | ```
152 | 
153 | ### 8. Debugging Tool Calls:
154 | If a tool call fails with "params requires property 'content'":
155 | 1. Ensure `content` is present and is a string
156 | 2. Check that `tags` is an array of strings, not a string
157 | 3. Verify JSON syntax (no trailing commas, proper escaping)
158 | 4. Use the exact examples above as templates
159 | 
160 | ### 9. COMPLETE WORKING EXAMPLE:
161 | For the request "Memorize the result and the fact that we got the Memory MCP running in LMStudio":
162 | 
163 | **Step 1:** Call check_database_health (if needed)
164 | ```json
165 | {"name": "check_database_health", "arguments": {}}
166 | ```
167 | 
168 | **Step 2:** Store the memory with CORRECT syntax:
169 | ```json
170 | {"name": "store_memory", "arguments": {"content": "Memory MCP is successfully running in LMStudio. Database health check shows SQLite-vec backend is healthy with 439 memories stored (2.36 MB). Integration confirmed working.", "metadata": {"tags": ["lmstudio", "mcp", "integration", "success", "database"], "type": "fact"}}}
171 | ```
172 | 
173 | **✅ This format will work because:**
174 | - `content` is present and contains the actual information
175 | - `metadata.tags` is an array of strings (not a separate parameter)
176 | - `metadata.type` is a string inside the metadata object
177 | - All JSON syntax is correct
178 | - Matches the legacy MCP server schema that LM Studio connects to
179 | 
180 | Remember: **Be proactive with tool use**. When users mention operations that tools can handle, use them immediately rather than just describing what you could do.
```

--------------------------------------------------------------------------------
/scripts/validation/verify_pytorch_windows.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | # Copyright 2024 Heinrich Krupp
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Verification script for PyTorch installation on Windows.
 18 | This script checks if PyTorch is properly installed and configured for Windows.
 19 | """
 20 | import os
 21 | import sys
 22 | import platform
 23 | import subprocess
 24 | import importlib.util
 25 | 
 26 | def print_header(text):
 27 |     """Print a formatted header."""
 28 |     print("\n" + "=" * 80)
 29 |     print(f" {text}")
 30 |     print("=" * 80)
 31 | 
 32 | def print_info(text):
 33 |     """Print formatted info text."""
 34 |     print(f"  → {text}")
 35 | 
 36 | def print_success(text):
 37 |     """Print formatted success text."""
 38 |     print(f"  ✅ {text}")
 39 | 
 40 | def print_error(text):
 41 |     """Print formatted error text."""
 42 |     print(f"  ❌ ERROR: {text}")
 43 | 
 44 | def print_warning(text):
 45 |     """Print formatted warning text."""
 46 |     print(f"  ⚠️  {text}")
 47 | 
 48 | def check_system():
 49 |     """Check if running on Windows."""
 50 |     system = platform.system().lower()
 51 |     if system != "windows":
 52 |         print_warning(f"This script is designed for Windows, but you're running on {system.capitalize()}")
 53 |     else:
 54 |         print_info(f"Running on {platform.system()} {platform.release()}")
 55 |     
 56 |     print_info(f"Python version: {platform.python_version()}")
 57 |     print_info(f"Architecture: {platform.machine()}")
 58 |     
 59 |     return system == "windows"
 60 | 
 61 | def check_pytorch_installation():
 62 |     """Check if PyTorch is installed and properly configured."""
 63 |     try:
 64 |         import torch
 65 |         print_success(f"PyTorch is installed (version {torch.__version__})")
 66 |         
 67 |         # Check if PyTorch was installed from the correct index URL
 68 |         if hasattr(torch, '_C'):
 69 |             print_success("PyTorch C extensions are available")
 70 |         else:
 71 |             print_warning("PyTorch C extensions might not be properly installed")
 72 |         
 73 |         # Check CUDA availability
 74 |         if torch.cuda.is_available():
 75 |             print_success(f"CUDA is available (version {torch.version.cuda})")
 76 |             print_info(f"GPU: {torch.cuda.get_device_name(0)}")
 77 |             print_info(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
 78 |         else:
 79 |             print_info("CUDA is not available, using CPU only")
 80 |             
 81 |             # Check if DirectML is available
 82 |             try:
 83 |                 import torch_directml
 84 |                 print_success(f"DirectML is available (version {torch_directml.__version__})")
 85 |                 
 86 |                 # Check for Intel ARC GPU
 87 |                 try:
 88 |                     ps_cmd = "Get-WmiObject Win32_VideoController | Select-Object Name | Format-List"
 89 |                     gpu_output = subprocess.check_output(['powershell', '-Command', ps_cmd],
 90 |                                                     stderr=subprocess.DEVNULL,
 91 |                                                     universal_newlines=True)
 92 |                     
 93 |                     if 'Intel(R) Arc(TM)' in gpu_output or 'Intel ARC' in gpu_output:
 94 |                         print_success("Intel ARC GPU detected, DirectML support is available")
 95 |                     elif 'Intel' in gpu_output:
 96 |                         print_success("Intel GPU detected, DirectML support is available")
 97 |                     elif 'AMD' in gpu_output or 'Radeon' in gpu_output:
 98 |                         print_success("AMD GPU detected, DirectML support is available")
 99 |                 except (subprocess.SubprocessError, FileNotFoundError):
100 |                     pass
101 |                 
102 |                 # Test a simple DirectML tensor operation
103 |                 try:
104 |                     dml = torch_directml.device()
105 |                     x_dml = torch.rand(5, 3, device=dml)
106 |                     y_dml = torch.rand(5, 3, device=dml)
107 |                     z_dml = x_dml + y_dml
108 |                     print_success("DirectML tensor operations work correctly")
109 |                 except Exception as e:
110 |                     print_warning(f"DirectML tensor operations failed: {e}")
111 |             except ImportError:
112 |                 print_info("DirectML is not available")
113 |                 
114 |                 # Check for Intel/AMD GPUs that could benefit from DirectML
115 |                 try:
116 |                     ps_cmd = "Get-WmiObject Win32_VideoController | Select-Object Name | Format-List"
117 |                     gpu_output = subprocess.check_output(['powershell', '-Command', ps_cmd],
118 |                                                     stderr=subprocess.DEVNULL,
119 |                                                     universal_newlines=True)
120 |                     
121 |                     if 'Intel(R) Arc(TM)' in gpu_output or 'Intel ARC' in gpu_output:
122 |                         print_warning("Intel ARC GPU detected, but DirectML is not installed")
123 |                         print_info("Consider installing torch-directml for better performance")
124 |                     elif 'Intel' in gpu_output or 'AMD' in gpu_output or 'Radeon' in gpu_output:
125 |                         print_warning("Intel/AMD GPU detected, but DirectML is not installed")
126 |                         print_info("Consider installing torch-directml for better performance")
127 |                 except (subprocess.SubprocessError, FileNotFoundError):
128 |                     pass
129 |         
130 |         # Test a simple tensor operation
131 |         try:
132 |             x = torch.rand(5, 3)
133 |             y = torch.rand(5, 3)
134 |             z = x + y
135 |             print_success("Basic tensor operations work correctly")
136 |         except Exception as e:
137 |             print_error(f"Failed to perform basic tensor operations: {e}")
138 |             return False
139 |         
140 |         return True
141 |     except ImportError:
142 |         print_error("PyTorch is not installed")
143 |         return False
144 |     except Exception as e:
145 |         print_error(f"Error checking PyTorch installation: {e}")
146 |         return False
147 | 
148 | def suggest_installation():
149 |     """Suggest PyTorch installation commands."""
150 |     print_header("Installation Suggestions")
151 |     print_info("To install PyTorch for Windows, use one of the following commands:")
152 |     print_info("\nFor CUDA support (NVIDIA GPUs):")
153 |     print("pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118")
154 |     
155 |     print_info("\nFor CPU-only:")
156 |     print("pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu")
157 |     
158 |     print_info("\nFor DirectML support (AMD/Intel GPUs):")
159 |     print("pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu")
160 |     print("pip install torch-directml>=0.2.0")
161 |     
162 |     print_info("\nFor Intel ARC Pro Graphics:")
163 |     print("pip install torch==2.2.0 torchvision==2.2.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu")
164 |     print("pip install torch-directml>=0.2.0")
165 |     
166 |     print_info("\nFor dual GPU setups (NVIDIA + Intel):")
167 |     print("pip install torch==2.2.0 torchvision==2.2.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu118")
168 |     print("pip install torch-directml>=0.2.0")
169 |     
170 |     print_info("\nAfter installing PyTorch, run this script again to verify the installation.")
171 | 
172 | def main():
173 |     """Main function."""
174 |     print_header("PyTorch Windows Installation Verification")
175 |     
176 |     is_windows = check_system()
177 |     if not is_windows:
178 |         print_warning("This script is designed for Windows, but may still provide useful information")
179 |     
180 |     pytorch_installed = check_pytorch_installation()
181 |     
182 |     if not pytorch_installed:
183 |         suggest_installation()
184 |         return 1
185 |     
186 |     print_header("Verification Complete")
187 |     print_success("PyTorch is properly installed and configured for Windows")
188 |     return 0
189 | 
190 | if __name__ == "__main__":
191 |     sys.exit(main())
```

--------------------------------------------------------------------------------
/scripts/quality/README_PHASE2.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Phase 2 Complexity Reduction - Quick Reference
  2 | 
  3 | ## Overview
  4 | 
  5 | This guide provides a quick reference for implementing Phase 2 complexity reductions identified in `phase2_complexity_analysis.md`.
  6 | 
  7 | ## Quick Stats
  8 | 
  9 | | Metric | Current | Target | Improvement |
 10 | |--------|---------|--------|-------------|
 11 | | **Complexity Score** | 40/100 | 50-55/100 | +10-15 points |
 12 | | **Overall Health** | 63/100 | 66-68/100 | +3 points |
 13 | | **Functions Analyzed** | 10 | - | - |
 14 | | **Total Time Estimate** | - | 12-15 hours | - |
 15 | | **Complexity Reduction** | - | -39 points | - |
 16 | 
 17 | ## Priority Matrix
 18 | 
 19 | ### High Priority (Week 1) - 7 hours
 20 | Critical path functions that need careful attention:
 21 | 
 22 | 1. **install.py::configure_paths()** (15 → 5, -10 points, 4h)
 23 |    - Extract platform detection
 24 |    - Extract storage setup
 25 |    - Extract Claude config update
 26 | 
 27 | 2. **cloudflare.py::_search_by_tags_internal()** (13 → 8, -5 points, 1.75h)
 28 |    - Extract tag normalization
 29 |    - Extract SQL query builder
 30 | 
 31 | 3. **consolidator.py::consolidate()** (12 → 8, -4 points, 1.25h)
 32 |    - Extract sync context manager
 33 |    - Extract phase guards
 34 | 
 35 | ### Medium Priority (Week 2) - 2.75 hours
 36 | Analytics functions (non-critical):
 37 | 
 38 | 4. **analytics.py::get_memory_growth()** (11 → 6, -5 points, 1.75h)
 39 |    - Extract period configuration
 40 |    - Extract interval aggregation
 41 | 
 42 | 5. **analytics.py::get_tag_usage_analytics()** (10 → 6, -4 points, 1h)
 43 |    - Extract storage stats retrieval
 44 |    - Extract tag stats calculation
 45 | 
 46 | ### Low Priority (Weeks 2-3) - 4.25 hours
 47 | Quick wins with minimal risk:
 48 | 
 49 | 6. **install.py::detect_gpu()** (10 → 7, -3 points, 1h)
 50 | 7. **cloudflare.py::get_memory_timestamps()** (9 → 7, -2 points, 45m)
 51 | 8. **consolidator.py::_get_memories_for_horizon()** (10 → 8, -2 points, 45m)
 52 | 9. **analytics.py::get_activity_breakdown()** (9 → 7, -2 points, 1h)
 53 | 10. **analytics.py::get_memory_type_distribution()** (9 → 7, -2 points, 45m)
 54 | 
 55 | ## Refactoring Patterns Cheat Sheet
 56 | 
 57 | ### Pattern 1: Extract Method
 58 | **When to use:** Function > 50 lines, nested logic, repeated code
 59 | 
 60 | **Example:**
 61 | ```python
 62 | # Before
 63 | def complex_function():
 64 |     # 20 lines of platform detection
 65 |     # 30 lines of setup logic
 66 |     # 15 lines of validation
 67 | 
 68 | # After
 69 | def detect_platform(): ...
 70 | def setup_system(): ...
 71 | def validate_config(): ...
 72 | 
 73 | def complex_function():
 74 |     platform = detect_platform()
 75 |     setup_system(platform)
 76 |     validate_config()
 77 | ```
 78 | 
 79 | ### Pattern 2: Dict Lookup
 80 | **When to use:** if/elif/else chains with similar structure
 81 | 
 82 | **Example:**
 83 | ```python
 84 | # Before
 85 | if period == "week":
 86 |     days = 7
 87 | elif period == "month":
 88 |     days = 30
 89 | elif period == "year":
 90 |     days = 365
 91 | 
 92 | # After
 93 | PERIOD_DAYS = {"week": 7, "month": 30, "year": 365}
 94 | days = PERIOD_DAYS[period]
 95 | ```
 96 | 
 97 | ### Pattern 3: Guard Clause
 98 | **When to use:** Nested if statements, early validation
 99 | 
100 | **Example:**
101 | ```python
102 | # Before
103 | def process(data):
104 |     if data is not None:
105 |         if data.valid():
106 |             if data.ready():
107 |                 return process_data(data)
108 |     return None
109 | 
110 | # After
111 | def process(data):
112 |     if data is None:
113 |         return None
114 |     if not data.valid():
115 |         return None
116 |     if not data.ready():
117 |         return None
118 |     return process_data(data)
119 | ```
120 | 
121 | ### Pattern 4: Context Manager
122 | **When to use:** Resource management, setup/teardown logic
123 | 
124 | **Example:**
125 | ```python
126 | # Before
127 | def process():
128 |     resource = acquire()
129 |     try:
130 |         do_work(resource)
131 |     finally:
132 |         release(resource)
133 | 
134 | # After
135 | class ResourceManager:
136 |     async def __aenter__(self): ...
137 |     async def __aexit__(self, *args): ...
138 | 
139 | async def process():
140 |     async with ResourceManager() as resource:
141 |         do_work(resource)
142 | ```
143 | 
144 | ### Pattern 5: Configuration Object
145 | **When to use:** Related configuration values, multiple parameters
146 | 
147 | **Example:**
148 | ```python
149 | # Before
150 | def analyze(period, days, interval, format):
151 |     ...
152 | 
153 | # After
154 | @dataclass
155 | class AnalysisConfig:
156 |     period: str
157 |     days: int
158 |     interval: int
159 |     format: str
160 | 
161 | def analyze(config: AnalysisConfig):
162 |     ...
163 | ```
164 | 
165 | ## Testing Checklist
166 | 
167 | For each refactored function:
168 | 
169 | - [ ] **Unit tests pass** - Run `pytest tests/test_<module>.py`
170 | - [ ] **Integration tests pass** - Run `pytest tests/integration/`
171 | - [ ] **No performance regression** - Benchmark before/after
172 | - [ ] **API contracts unchanged** - Check response formats
173 | - [ ] **Edge cases tested** - Null inputs, empty lists, errors
174 | - [ ] **Documentation updated** - Docstrings, comments
175 | 
176 | ## Implementation Order
177 | 
178 | ### Sequential (Single Developer)
179 | 1. Week 1: High priority functions (7h)
180 | 2. Week 2: Medium priority functions (2.75h)
181 | 3. Week 3: Low priority quick wins (4.25h)
182 | 
183 | **Total:** 14 hours over 3 weeks
184 | 
185 | ### Parallel (Multiple Developers)
186 | 1. **Developer A:** configure_paths, detect_gpu (5h)
187 | 2. **Developer B:** cloudflare functions (2.5h)
188 | 3. **Developer C:** consolidator functions (2h)
189 | 4. **Developer D:** analytics functions (4.75h)
190 | 
191 | **Total:** ~7 hours (with coordination overhead: 9-10 hours)
192 | 
193 | ### Prioritized (Critical Path Only)
194 | Focus on high-priority functions only:
195 | 1. configure_paths (4h)
196 | 2. _search_by_tags_internal (1.75h)
197 | 3. consolidate (1.25h)
198 | 
199 | **Total:** 7 hours for core improvements
200 | 
201 | ## Risk Mitigation
202 | 
203 | ### Critical Path Functions
204 | **Extra caution required:**
205 | - _search_by_tags_internal (core search)
206 | - consolidate (memory consolidation)
207 | - _get_memories_for_horizon (consolidation)
208 | 
209 | **Safety measures:**
210 | - Create feature branch for each
211 | - Comprehensive integration tests
212 | - Performance benchmarking
213 | - Staged rollout (dev → staging → production)
214 | 
215 | ### Low-Risk Functions
216 | **Can be batched:**
217 | - All analytics endpoints (read-only)
218 | - Setup functions (non-critical path)
219 | 
220 | **Safety measures:**
221 | - Standard unit testing
222 | - Manual smoke testing
223 | - Can be rolled back easily
224 | 
225 | ## Success Metrics
226 | 
227 | ### Quantitative Goals
228 | - [ ] Complexity score: 40 → 50+ (+10 points minimum)
229 | - [ ] Overall health: 63 → 66+ (+3 points minimum)
230 | - [ ] All 10 functions refactored successfully
231 | - [ ] Zero breaking changes
232 | - [ ] All tests passing
233 | 
234 | ### Qualitative Goals
235 | - [ ] Code easier to understand (peer review)
236 | - [ ] Functions are testable in isolation
237 | - [ ] Better separation of concerns
238 | - [ ] Improved maintainability
239 | 
240 | ## Common Pitfalls to Avoid
241 | 
242 | ### 1. Over-Extraction
243 | **Problem:** Creating too many tiny functions
244 | **Solution:** Extract only when it improves clarity (10+ lines minimum)
245 | 
246 | ### 2. Breaking API Contracts
247 | **Problem:** Changing function signatures
248 | **Solution:** Keep public APIs unchanged, refactor internals only
249 | 
250 | ### 3. Performance Regression
251 | **Problem:** Excessive function calls overhead
252 | **Solution:** Benchmark before/after, inline hot paths if needed
253 | 
254 | ### 4. Incomplete Testing
255 | **Problem:** Missing edge cases
256 | **Solution:** Test error paths, null inputs, boundary conditions
257 | 
258 | ### 5. Rushing Critical Functions
259 | **Problem:** Breaking core functionality
260 | **Solution:** Extra time for testing critical path functions
261 | 
262 | ## Command Reference
263 | 
264 | ### Run Quality Analysis
265 | ```bash
266 | # Run pyscn baseline report
267 | python -m pyscn baseline --output scripts/quality/baseline_report.txt
268 | 
269 | # Check specific function complexity
270 | python -m radon cc src/mcp_memory_service/storage/cloudflare.py -a
271 | 
272 | # Check cyclomatic complexity for all files
273 | python -m radon cc src/ -a
274 | ```
275 | 
276 | ### Run Tests
277 | ```bash
278 | # All tests
279 | pytest tests/
280 | 
281 | # Specific module
282 | pytest tests/test_storage.py
283 | 
284 | # Integration tests only
285 | pytest tests/integration/
286 | 
287 | # With coverage
288 | pytest tests/ --cov=mcp_memory_service --cov-report=html
289 | ```
290 | 
291 | ### Benchmark Performance
292 | ```bash
293 | # Before refactoring
294 | python scripts/benchmarks/run_benchmarks.py --baseline
295 | 
296 | # After refactoring
297 | python scripts/benchmarks/run_benchmarks.py --compare
298 | ```
299 | 
300 | ## Getting Help
301 | 
302 | ### Resources
303 | - **Phase 2 Analysis:** `scripts/quality/phase2_complexity_analysis.md` (detailed proposals)
304 | - **Phase 1 Results:** `scripts/quality/phase1_dead_code_analysis.md` (lessons learned)
305 | - **Complexity Guide:** `scripts/quality/complexity_scoring_guide.md` (understanding metrics)
306 | 
307 | ### Questions?
308 | - Review the detailed analysis for each function
309 | - Check the refactoring pattern examples
310 | - Test incrementally after each change
311 | - Ask for peer review on critical functions
312 | 
313 | ---
314 | 
315 | **Last Updated:** 2024-11-24
316 | **Next Review:** After Phase 2 completion
317 | 
```

--------------------------------------------------------------------------------
/scripts/maintenance/restore_from_json_export.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Restore Timestamps from Clean JSON Export
  4 | 
  5 | Recovers corrupted timestamps using the clean export from the other MacBook
  6 | (v8.26, before the hybrid sync bug). Matches memories by content_hash and
  7 | restores their original creation timestamps.
  8 | 
  9 | This script:
 10 | - Reads clean timestamp mapping (content_hash → ISO timestamp)
 11 | - Matches memories in current database by content_hash
 12 | - Updates created_at and created_at_iso with original timestamps
 13 | - Preserves memories not in mapping (created after the clean export)
 14 | 
 15 | Usage:
 16 |     python scripts/maintenance/restore_from_json_export.py [--dry-run|--apply]
 17 | """
 18 | 
 19 | import json
 20 | import sqlite3
 21 | import sys
 22 | from datetime import datetime
 23 | from pathlib import Path
 24 | 
 25 | # Add src to path
 26 | sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
 27 | 
 28 | from mcp_memory_service import config
 29 | 
 30 | 
 31 | def restore_from_json(db_path: str, mapping_file: str, dry_run: bool = True):
 32 |     """
 33 |     Restore timestamps from JSON export mapping.
 34 | 
 35 |     Args:
 36 |         db_path: Path to SQLite database
 37 |         mapping_file: Path to JSON file with content_hash → timestamp mapping
 38 |         dry_run: If True, only show what would be changed
 39 |     """
 40 |     print("=" * 80)
 41 |     print("TIMESTAMP RESTORATION FROM CLEAN JSON EXPORT")
 42 |     print("=" * 80)
 43 |     print(f"Database: {db_path}")
 44 |     print(f"Mapping:  {mapping_file}")
 45 |     print(f"Mode: {'DRY RUN (no changes)' if dry_run else 'LIVE (applying changes)'}")
 46 |     print()
 47 | 
 48 |     # Load clean timestamp mapping
 49 |     print("Loading clean timestamp mapping...")
 50 |     with open(mapping_file, 'r') as f:
 51 |         clean_mapping = json.load(f)
 52 | 
 53 |     print(f"✅ Loaded {len(clean_mapping)} clean timestamps")
 54 |     print()
 55 | 
 56 |     # Connect to database
 57 |     conn = sqlite3.connect(db_path, timeout=30.0)
 58 |     conn.execute('PRAGMA busy_timeout = 30000')
 59 |     cursor = conn.cursor()
 60 | 
 61 |     # Get all memories from current database
 62 |     print("Analyzing current database...")
 63 |     cursor.execute('''
 64 |         SELECT content_hash, created_at, created_at_iso, substr(content, 1, 60)
 65 |         FROM memories
 66 |     ''')
 67 | 
 68 |     current_memories = cursor.fetchall()
 69 |     print(f"✅ Found {len(current_memories)} memories in database")
 70 |     print()
 71 | 
 72 |     # Match and analyze
 73 |     print("=" * 80)
 74 |     print("MATCHING ANALYSIS:")
 75 |     print("=" * 80)
 76 | 
 77 |     matched = []
 78 |     unmatched = []
 79 |     already_correct = []
 80 | 
 81 |     for content_hash, created_at, created_at_iso, content_preview in current_memories:
 82 |         if content_hash in clean_mapping:
 83 |             clean_timestamp = clean_mapping[content_hash]
 84 | 
 85 |             # Check if already correct
 86 |             if created_at_iso == clean_timestamp:
 87 |                 already_correct.append(content_hash)
 88 |             else:
 89 |                 matched.append({
 90 |                     'hash': content_hash,
 91 |                     'current_iso': created_at_iso,
 92 |                     'clean_iso': clean_timestamp,
 93 |                     'content': content_preview
 94 |                 })
 95 |         else:
 96 |             unmatched.append({
 97 |                 'hash': content_hash,
 98 |                 'created_iso': created_at_iso,
 99 |                 'content': content_preview
100 |             })
101 | 
102 |     print(f"✅ Matched (will restore): {len(matched)}")
103 |     print(f"✅ Already correct: {len(already_correct)}")
104 |     print(f"⏭️  Unmatched (keep as-is): {len(unmatched)}")
105 |     print()
106 | 
107 |     # Show samples
108 |     print("=" * 80)
109 |     print("SAMPLE RESTORATIONS (first 10):")
110 |     print("=" * 80)
111 |     for i, mem in enumerate(matched[:10], 1):
112 |         print(f"{i}. Hash: {mem['hash'][:16]}...")
113 |         print(f"   CURRENT: {mem['current_iso']}")
114 |         print(f"   RESTORE: {mem['clean_iso']}")
115 |         print(f"   Content: {mem['content']}...")
116 |         print()
117 | 
118 |     if len(matched) > 10:
119 |         print(f"   ... and {len(matched) - 10} more")
120 |         print()
121 | 
122 |     # Show unmatched samples (new memories)
123 |     if unmatched:
124 |         print("=" * 80)
125 |         print("UNMATCHED MEMORIES (will keep current timestamps):")
126 |         print("=" * 80)
127 |         print(f"Total: {len(unmatched)} memories")
128 |         print("\nSample (first 5):")
129 |         for i, mem in enumerate(unmatched[:5], 1):
130 |             print(f"{i}. Hash: {mem['hash'][:16]}...")
131 |             print(f"   Created: {mem['created_iso']}")
132 |             print(f"   Content: {mem['content']}...")
133 |             print()
134 | 
135 |     if dry_run:
136 |         print("=" * 80)
137 |         print("DRY RUN COMPLETE - No changes made")
138 |         print("=" * 80)
139 |         print(f"Would restore {len(matched)} timestamps")
140 |         print(f"Would preserve {len(unmatched)} new memories")
141 |         print("\nTo apply changes, run with --apply flag")
142 |         conn.close()
143 |         return
144 | 
145 |     # Confirm before proceeding
146 |     print("=" * 80)
147 |     print(f"⚠️  ABOUT TO RESTORE {len(matched)} TIMESTAMPS")
148 |     print("=" * 80)
149 |     response = input("Continue with restoration? [y/N]: ")
150 | 
151 |     if response.lower() != 'y':
152 |         print("Restoration cancelled")
153 |         conn.close()
154 |         return
155 | 
156 |     # Apply restorations
157 |     print("\nRestoring timestamps...")
158 |     restored_count = 0
159 |     failed_count = 0
160 | 
161 |     for mem in matched:
162 |         try:
163 |             content_hash = mem['hash']
164 |             clean_iso = mem['clean_iso']
165 | 
166 |             # Convert ISO to Unix timestamp
167 |             dt = datetime.fromisoformat(clean_iso.replace('Z', '+00:00'))
168 |             clean_unix = dt.timestamp()
169 | 
170 |             # Update database
171 |             cursor.execute('''
172 |                 UPDATE memories
173 |                 SET created_at = ?, created_at_iso = ?
174 |                 WHERE content_hash = ?
175 |             ''', (clean_unix, clean_iso, content_hash))
176 | 
177 |             restored_count += 1
178 | 
179 |             if restored_count % 100 == 0:
180 |                 print(f"  Progress: {restored_count}/{len(matched)} restored...")
181 |                 conn.commit()  # Commit in batches
182 | 
183 |         except Exception as e:
184 |             print(f"  Error restoring {content_hash[:16]}: {e}")
185 |             failed_count += 1
186 | 
187 |     # Final commit
188 |     conn.commit()
189 | 
190 |     # Verify results
191 |     cursor.execute('''
192 |         SELECT created_at_iso, COUNT(*) as count
193 |         FROM memories
194 |         GROUP BY DATE(created_at_iso)
195 |         ORDER BY DATE(created_at_iso) DESC
196 |         LIMIT 20
197 |     ''')
198 | 
199 |     print()
200 |     print("=" * 80)
201 |     print("RESTORATION COMPLETE")
202 |     print("=" * 80)
203 |     print(f"✅ Successfully restored: {restored_count}")
204 |     print(f"❌ Failed to restore: {failed_count}")
205 |     print(f"⏭️  Preserved (new memories): {len(unmatched)}")
206 |     print()
207 | 
208 |     # Show date distribution
209 |     print("=" * 80)
210 |     print("TIMESTAMP DISTRIBUTION (After Restoration):")
211 |     print("=" * 80)
212 | 
213 |     from collections import Counter
214 |     cursor.execute('SELECT created_at_iso FROM memories')
215 |     dates = Counter()
216 |     for row in cursor.fetchall():
217 |         date_str = row[0][:10] if row[0] else 'Unknown'
218 |         dates[date_str] += 1
219 | 
220 |     for date, count in dates.most_common(15):
221 |         print(f"  {date}: {count:4} memories")
222 | 
223 |     # Check corruption remaining
224 |     corruption_dates = {'2025-11-16', '2025-11-17', '2025-11-18'}
225 |     corrupted_remaining = sum(count for date, count in dates.items() if date in corruption_dates)
226 | 
227 |     print()
228 |     print(f"Corrupted dates remaining: {corrupted_remaining}")
229 |     print(f"Expected: ~250-400 (legitimately created Nov 16-18)")
230 | 
231 |     conn.close()
232 | 
233 |     if failed_count == 0 and corrupted_remaining < 500:
234 |         print("\n🎉 SUCCESS: Timestamps restored successfully!")
235 |     else:
236 |         print(f"\n⚠️  WARNING: Some issues occurred during restoration")
237 | 
238 | 
239 | if __name__ == "__main__":
240 |     dry_run = '--apply' not in sys.argv
241 | 
242 |     db_path = config.SQLITE_VEC_PATH
243 |     mapping_file = Path(__file__).parent.parent.parent / "clean_timestamp_mapping.json"
244 | 
245 |     if not mapping_file.exists():
246 |         print(f"❌ ERROR: Mapping file not found: {mapping_file}")
247 |         print("Run Phase 1 first to extract the clean timestamp mapping")
248 |         sys.exit(1)
249 | 
250 |     try:
251 |         restore_from_json(str(db_path), str(mapping_file), dry_run=dry_run)
252 |     except KeyboardInterrupt:
253 |         print("\n\nRestoration cancelled by user")
254 |         sys.exit(1)
255 |     except Exception as e:
256 |         print(f"\n❌ Restoration failed: {e}")
257 |         import traceback
258 |         traceback.print_exc()
259 |         sys.exit(1)
260 | 
```

--------------------------------------------------------------------------------
/scripts/migration/mcp-migration.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | # Copyright 2024 Heinrich Krupp
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Enhanced migration script for MCP Memory Service.
 18 | This script handles migration of memories between different ChromaDB instances,
 19 | with support for both local and remote migrations.
 20 | """
 21 | import sys
 22 | import os
 23 | from dotenv import load_dotenv
 24 | from pathlib import Path
 25 | import chromadb
 26 | from chromadb import HttpClient, Settings
 27 | import json
 28 | import time
 29 | from chromadb.utils import embedding_functions
 30 | 
 31 | # Import our environment verifier
 32 | from verify_environment import EnvironmentVerifier
 33 | 
 34 | def verify_environment():
 35 |     """Verify the environment before proceeding with migration"""
 36 |     verifier = EnvironmentVerifier()
 37 |     verifier.run_verifications()
 38 |     if not verifier.print_results():
 39 |         print("\n⚠️  Environment verification failed! Migration cannot proceed.")
 40 |         sys.exit(1)
 41 |     print("\n✓ Environment verification passed! Proceeding with migration.")
 42 | 
 43 | # Load environment variables
 44 | load_dotenv()
 45 | 
 46 | def get_claude_desktop_chroma_path():
 47 |     """Get ChromaDB path from Claude Desktop config"""
 48 |     base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 49 |     config_path = os.path.join(base_path, 'claude_config', 'mcp-memory', 'chroma_db')
 50 |     print(f"Using ChromaDB path: {config_path}")
 51 |     return config_path
 52 | 
 53 | def migrate_memories(source_type, source_config, target_type, target_config):
 54 |     """
 55 |     Migrate memories between ChromaDB instances.
 56 |     
 57 |     Args:
 58 |         source_type: 'local' or 'remote'
 59 |         source_config: For local: path to ChromaDB, for remote: {'host': host, 'port': port}
 60 |         target_type: 'local' or 'remote'
 61 |         target_config: For local: path to ChromaDB, for remote: {'host': host, 'port': port}
 62 |     """
 63 |     print(f"Starting migration from {source_type} to {target_type}")
 64 |     
 65 |     try:
 66 |         # Set up embedding function
 67 |         embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
 68 |             model_name='all-MiniLM-L6-v2'
 69 |         )
 70 |         
 71 |         # Connect to target ChromaDB
 72 |         if target_type == 'remote':
 73 |             target_client = HttpClient(
 74 |                 host=target_config['host'],
 75 |                 port=target_config['port']
 76 |             )
 77 |             print(f"Connected to remote ChromaDB at {target_config['host']}:{target_config['port']}")
 78 |         else:
 79 |             settings = Settings(
 80 |                 anonymized_telemetry=False,
 81 |                 allow_reset=True,
 82 |                 is_persistent=True,
 83 |                 persist_directory=target_config
 84 |             )
 85 |             target_client = chromadb.Client(settings)
 86 |             print(f"Connected to local ChromaDB at {target_config}")
 87 |         
 88 |         # Get or create collection for imported memories
 89 |         try:
 90 |             target_collection = target_client.get_collection(
 91 |                 name="mcp_imported_memories",
 92 |                 embedding_function=embedding_function
 93 |             )
 94 |             print("Found existing collection 'mcp_imported_memories' on target")
 95 |         except Exception:
 96 |             target_collection = target_client.create_collection(
 97 |                 name="mcp_imported_memories",
 98 |                 metadata={"hnsw:space": "cosine"},
 99 |                 embedding_function=embedding_function
100 |             )
101 |             print("Created new collection 'mcp_imported_memories' on target")
102 |         
103 |         # Connect to source ChromaDB
104 |         if source_type == 'remote':
105 |             source_client = HttpClient(
106 |                 host=source_config['host'],
107 |                 port=source_config['port']
108 |             )
109 |             print(f"Connected to remote ChromaDB at {source_config['host']}:{source_config['port']}")
110 |         else:
111 |             settings = Settings(
112 |                 anonymized_telemetry=False,
113 |                 allow_reset=True,
114 |                 is_persistent=True,
115 |                 persist_directory=source_config
116 |             )
117 |             source_client = chromadb.Client(settings)
118 |             print(f"Connected to local ChromaDB at {source_config}")
119 |         
120 |         # List collections
121 |         collections = source_client.list_collections()
122 |         print(f"Found {len(collections)} collections in source")
123 |         for coll in collections:
124 |             print(f"- {coll.name}")
125 |         
126 |         # Try to get the memory collection
127 |         try:
128 |             source_collection = source_client.get_collection(
129 |                 name="memory_collection",
130 |                 embedding_function=embedding_function
131 |             )
132 |             print("Found source memory collection")
133 |         except ValueError as e:
134 |             print(f"Error accessing source collection: {str(e)}")
135 |             return
136 |             
137 |         # Get all memories from source
138 |         print("Fetching source memories...")
139 |         results = source_collection.get()
140 |         
141 |         if not results["ids"]:
142 |             print("No memories found in source collection")
143 |             return
144 |             
145 |         print(f"Found {len(results['ids'])} memories to migrate")
146 |         
147 |         # Check for existing memories in target to avoid duplicates
148 |         target_existing = target_collection.get()
149 |         existing_ids = set(target_existing["ids"])
150 |         
151 |         # Filter out already migrated memories
152 |         new_memories = {
153 |             "ids": [],
154 |             "documents": [],
155 |             "metadatas": []
156 |         }
157 |         
158 |         for i, memory_id in enumerate(results["ids"]):
159 |             if memory_id not in existing_ids:
160 |                 new_memories["ids"].append(memory_id)
161 |                 new_memories["documents"].append(results["documents"][i])
162 |                 new_memories["metadatas"].append(results["metadatas"][i])
163 |         
164 |         if not new_memories["ids"]:
165 |             print("All memories are already migrated!")
166 |             return
167 |             
168 |         print(f"Found {len(new_memories['ids'])} new memories to migrate")
169 |         
170 |         # Import in batches of 10
171 |         batch_size = 10
172 |         for i in range(0, len(new_memories['ids']), batch_size):
173 |             batch_end = min(i + batch_size, len(new_memories['ids']))
174 |             
175 |             batch_ids = new_memories['ids'][i:batch_end]
176 |             batch_documents = new_memories['documents'][i:batch_end]
177 |             batch_metadatas = new_memories['metadatas'][i:batch_end]
178 |             
179 |             print(f"Migrating batch {i//batch_size + 1} ({len(batch_ids)} memories)...")
180 |             
181 |             target_collection.add(
182 |                 documents=batch_documents,
183 |                 metadatas=batch_metadatas,
184 |                 ids=batch_ids
185 |             )
186 |             
187 |             # Small delay between batches
188 |             time.sleep(1)
189 |         
190 |         print("\nMigration complete!")
191 |         
192 |         # Verify migration
193 |         target_results = target_collection.get()
194 |         print(f"Verification: {len(target_results['ids'])} total memories in target collection")
195 |         
196 |     except Exception as e:
197 |         print(f"Error during migration: {str(e)}")
198 |         print("Please ensure both ChromaDB instances are running and accessible")
199 | 
200 | if __name__ == "__main__":
201 |     # First verify the environment
202 |     verify_environment()
203 |     
204 |     # Example usage:
205 |     # Local to remote migration
206 |     migrate_memories(
207 |         source_type='local',
208 |         source_config=get_claude_desktop_chroma_path(),
209 |         target_type='remote',
210 |         target_config={'host': '16.171.169.46', 'port': 8000}
211 |     )
212 |     
213 |     # Remote to local migration
214 |     # migrate_memories(
215 |     #     source_type='remote',
216 |     #     source_config={'host': '16.171.169.46', 'port': 8000},
217 |     #     target_type='local',
218 |     #     target_config=get_claude_desktop_chroma_path()
219 |     # )
220 | 
```

--------------------------------------------------------------------------------
/scripts/quality/weekly_quality_review.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | # scripts/quality/weekly_quality_review.sh - Weekly code quality review
  3 | #
  4 | # Usage: bash scripts/quality/weekly_quality_review.sh [--create-issue]
  5 | #
  6 | # Features:
  7 | # - Run pyscn analysis
  8 | # - Compare to last week's metrics
  9 | # - Generate markdown trend report
 10 | # - Optionally create GitHub issue if health score dropped >5%
 11 | 
 12 | set -e
 13 | 
 14 | # Colors for output
 15 | RED='\033[0;31m'
 16 | YELLOW='\033[1;33m'
 17 | GREEN='\033[0;32m'
 18 | BLUE='\033[0;34m'
 19 | NC='\033[0m' # No Color
 20 | 
 21 | # Parse arguments
 22 | CREATE_ISSUE=false
 23 | if [ "$1" = "--create-issue" ]; then
 24 |     CREATE_ISSUE=true
 25 | fi
 26 | 
 27 | echo -e "${BLUE}=== Weekly Quality Review ===${NC}"
 28 | echo ""
 29 | 
 30 | # Run metrics tracking
 31 | echo "Running pyscn metrics tracking..."
 32 | if bash scripts/quality/track_pyscn_metrics.sh > /tmp/weekly_review.log 2>&1; then
 33 |     echo -e "${GREEN}✓${NC} Metrics tracking complete"
 34 | else
 35 |     echo -e "${RED}❌ Metrics tracking failed${NC}"
 36 |     cat /tmp/weekly_review.log
 37 |     exit 1
 38 | fi
 39 | 
 40 | # Extract current and previous metrics
 41 | CSV_FILE=".pyscn/history/metrics.csv"
 42 | 
 43 | if [ ! -f "$CSV_FILE" ] || [ $(wc -l < "$CSV_FILE") -lt 2 ]; then
 44 |     echo -e "${YELLOW}⚠️  Insufficient data for weekly review (need at least 1 previous run)${NC}"
 45 |     exit 0
 46 | fi
 47 | 
 48 | # Get current (last line) and previous (second to last) metrics
 49 | CURRENT_LINE=$(tail -1 "$CSV_FILE")
 50 | CURRENT_HEALTH=$(echo "$CURRENT_LINE" | cut -d',' -f3)
 51 | CURRENT_DATE=$(echo "$CURRENT_LINE" | cut -d',' -f2)
 52 | CURRENT_COMPLEXITY=$(echo "$CURRENT_LINE" | cut -d',' -f4)
 53 | CURRENT_DUPLICATION=$(echo "$CURRENT_LINE" | cut -d',' -f6)
 54 | 
 55 | # Find last week's metrics (7+ days ago)
 56 | SEVEN_DAYS_AGO=$(date -v-7d +%Y%m%d 2>/dev/null || date -d "7 days ago" +%Y%m%d)
 57 | PREV_LINE=$(awk -F',' -v cutoff="$SEVEN_DAYS_AGO" '$1 < cutoff {last=$0} END {print last}' "$CSV_FILE")
 58 | 
 59 | if [ -z "$PREV_LINE" ]; then
 60 |     # Fallback to most recent previous entry if no 7-day-old entry exists
 61 |     PREV_LINE=$(tail -2 "$CSV_FILE" | head -1)
 62 | fi
 63 | 
 64 | PREV_HEALTH=$(echo "$PREV_LINE" | cut -d',' -f3)
 65 | PREV_DATE=$(echo "$PREV_LINE" | cut -d',' -f2)
 66 | PREV_COMPLEXITY=$(echo "$PREV_LINE" | cut -d',' -f4)
 67 | PREV_DUPLICATION=$(echo "$PREV_LINE" | cut -d',' -f6)
 68 | 
 69 | # Calculate deltas
 70 | HEALTH_DELTA=$((CURRENT_HEALTH - PREV_HEALTH))
 71 | COMPLEXITY_DELTA=$((CURRENT_COMPLEXITY - PREV_COMPLEXITY))
 72 | DUPLICATION_DELTA=$((CURRENT_DUPLICATION - PREV_DUPLICATION))
 73 | 
 74 | echo ""
 75 | echo -e "${BLUE}=== Weekly Comparison ===${NC}"
 76 | echo "Period: $(echo "$PREV_DATE" | cut -d' ' -f1) → $(echo "$CURRENT_DATE" | cut -d' ' -f1)"
 77 | echo ""
 78 | echo "Health Score:"
 79 | echo "  Previous: $PREV_HEALTH/100"
 80 | echo "  Current:  $CURRENT_HEALTH/100"
 81 | echo "  Change:   $([ $HEALTH_DELTA -ge 0 ] && echo "+")$HEALTH_DELTA points"
 82 | echo ""
 83 | 
 84 | # Determine overall trend
 85 | TREND_EMOJI="➡️"
 86 | TREND_TEXT="Stable"
 87 | 
 88 | if [ $HEALTH_DELTA -gt 5 ]; then
 89 |     TREND_EMOJI="📈"
 90 |     TREND_TEXT="Improving"
 91 | elif [ $HEALTH_DELTA -lt -5 ]; then
 92 |     TREND_EMOJI="📉"
 93 |     TREND_TEXT="Declining"
 94 | fi
 95 | 
 96 | echo -e "${TREND_EMOJI} Trend: ${TREND_TEXT}"
 97 | echo ""
 98 | 
 99 | # Generate markdown report
100 | REPORT_FILE="docs/development/quality-review-$(date +%Y%m%d).md"
101 | mkdir -p docs/development
102 | 
103 | cat > "$REPORT_FILE" <<EOF
104 | # Weekly Quality Review - $(date +"%B %d, %Y")
105 | 
106 | ## Summary
107 | 
108 | **Overall Trend:** ${TREND_EMOJI} ${TREND_TEXT}
109 | 
110 | | Metric | Previous | Current | Change |
111 | |--------|----------|---------|--------|
112 | | Health Score | $PREV_HEALTH/100 | $CURRENT_HEALTH/100 | $([ $HEALTH_DELTA -ge 0 ] && echo "+")$HEALTH_DELTA |
113 | | Complexity | $PREV_COMPLEXITY/100 | $CURRENT_COMPLEXITY/100 | $([ $COMPLEXITY_DELTA -ge 0 ] && echo "+")$COMPLEXITY_DELTA |
114 | | Duplication | $PREV_DUPLICATION/100 | $CURRENT_DUPLICATION/100 | $([ $DUPLICATION_DELTA -ge 0 ] && echo "+")$DUPLICATION_DELTA |
115 | 
116 | ## Analysis Period
117 | 
118 | - **Start**: $(echo "$PREV_DATE" | cut -d' ' -f1)
119 | - **End**: $(echo "$CURRENT_DATE" | cut -d' ' -f1)
120 | - **Duration**: ~7 days
121 | 
122 | ## Status
123 | 
124 | EOF
125 | 
126 | if [ $CURRENT_HEALTH -lt 50 ]; then
127 |     cat >> "$REPORT_FILE" <<EOF
128 | ### 🔴 Critical - Release Blocker
129 | 
130 | Health score below 50 requires immediate action:
131 | - Cannot merge PRs until resolved
132 | - Focus on refactoring high-complexity functions
133 | - Remove dead code
134 | - Address duplication
135 | 
136 | **Action Items:**
137 | 1. Review full pyscn report: \`.pyscn/reports/analyze_*.html\`
138 | 2. Create refactoring tasks for complexity >10 functions
139 | 3. Schedule refactoring sprint (target: 2 weeks)
140 | 4. Track progress in issue #240
141 | 
142 | EOF
143 | elif [ $CURRENT_HEALTH -lt 70 ]; then
144 |     cat >> "$REPORT_FILE" <<EOF
145 | ### ⚠️  Action Required
146 | 
147 | Health score 50-69 indicates technical debt accumulation:
148 | - Plan refactoring sprint within 2 weeks
149 | - Review high-complexity functions
150 | - Track improvement progress
151 | 
152 | **Recommended Actions:**
153 | 1. Identify top 5 complexity hotspots
154 | 2. Create project board for tracking
155 | 3. Allocate 20% of sprint capacity to quality improvements
156 | 
157 | EOF
158 | else
159 |     cat >> "$REPORT_FILE" <<EOF
160 | ### ✅ Acceptable
161 | 
162 | Health score ≥70 indicates good code quality:
163 | - Continue current development practices
164 | - Monitor trends for regressions
165 | - Address new issues proactively
166 | 
167 | **Maintenance:**
168 | - Monthly quality reviews
169 | - Track complexity trends
170 | - Keep health score above 70
171 | 
172 | EOF
173 | fi
174 | 
175 | # Add trend observations
176 | cat >> "$REPORT_FILE" <<EOF
177 | ## Observations
178 | 
179 | EOF
180 | 
181 | if [ $HEALTH_DELTA -gt 5 ]; then
182 |     cat >> "$REPORT_FILE" <<EOF
183 | - ✅ **Health score improved by $HEALTH_DELTA points** - Great progress on code quality
184 | EOF
185 | elif [ $HEALTH_DELTA -lt -5 ]; then
186 |     cat >> "$REPORT_FILE" <<EOF
187 | - ⚠️  **Health score declined by ${HEALTH_DELTA#-} points** - Quality regression detected
188 | EOF
189 | fi
190 | 
191 | if [ $COMPLEXITY_DELTA -gt 0 ]; then
192 |     cat >> "$REPORT_FILE" <<EOF
193 | - ⚠️  Complexity score decreased - New complex code introduced
194 | EOF
195 | elif [ $COMPLEXITY_DELTA -lt 0 ]; then
196 |     cat >> "$REPORT_FILE" <<EOF
197 | - ✅ Complexity score improved - Refactoring efforts paying off
198 | EOF
199 | fi
200 | 
201 | if [ $DUPLICATION_DELTA -lt 0 ]; then
202 |     cat >> "$REPORT_FILE" <<EOF
203 | - ⚠️  Code duplication increased - Review for consolidation opportunities
204 | EOF
205 | elif [ $DUPLICATION_DELTA -gt 0 ]; then
206 |     cat >> "$REPORT_FILE" <<EOF
207 | - ✅ Code duplication reduced - Good refactoring work
208 | EOF
209 | fi
210 | 
211 | cat >> "$REPORT_FILE" <<EOF
212 | 
213 | ## Next Steps
214 | 
215 | 1. Review detailed pyscn report for specific issues
216 | 2. Update project board with quality improvement tasks
217 | 3. Schedule next weekly review for $(date -v+7d +"%B %d, %Y" 2>/dev/null || date -d "7 days" +"%B %d, %Y")
218 | 
219 | ## Resources
220 | 
221 | - [Full pyscn Report](.pyscn/reports/)
222 | - [Metrics History](.pyscn/history/metrics.csv)
223 | - [Code Quality Workflow](docs/development/code-quality-workflow.md)
224 | - [Issue #240](https://github.com/doobidoo/mcp-memory-service/issues/240) - Quality improvements tracking
225 | 
226 | EOF
227 | 
228 | echo -e "${GREEN}✓${NC} Report generated: $REPORT_FILE"
229 | echo ""
230 | 
231 | # Create GitHub issue if significant regression and flag enabled
232 | if [ "$CREATE_ISSUE" = true ] && [ $HEALTH_DELTA -lt -5 ]; then
233 |     if command -v gh &> /dev/null; then
234 |         echo -e "${YELLOW}Creating GitHub issue for quality regression...${NC}"
235 | 
236 |         ISSUE_BODY="## Quality Regression Detected
237 | 
238 | Weekly quality review detected a significant health score decline:
239 | 
240 | **Health Score Change:** $PREV_HEALTH → $CURRENT_HEALTH (${HEALTH_DELTA} points)
241 | 
242 | ### Details
243 | 
244 | $(cat "$REPORT_FILE" | sed -n '/## Summary/,/## Next Steps/p' | head -n -1)
245 | 
246 | ### Action Required
247 | 
248 | 1. Review full weekly report: [\`$REPORT_FILE\`]($REPORT_FILE)
249 | 2. Investigate recent changes: \`git log --since='$PREV_DATE'\`
250 | 3. Prioritize quality improvements in next sprint
251 | 
252 | ### Related
253 | 
254 | - Issue #240 - Code Quality Improvements
255 | - [pyscn Report](.pyscn/reports/)
256 | "
257 | 
258 |         gh issue create \
259 |             --title "Weekly Quality Review: Health Score Regression (${HEALTH_DELTA} points)" \
260 |             --body "$ISSUE_BODY" \
261 |             --label "technical-debt,quality"
262 | 
263 |         echo -e "${GREEN}✓${NC} GitHub issue created"
264 |     else
265 |         echo -e "${YELLOW}⚠️  gh CLI not found, skipping issue creation${NC}"
266 |     fi
267 | fi
268 | 
269 | echo ""
270 | echo -e "${BLUE}=== Summary ===${NC}"
271 | echo "Review Period: $(echo "$PREV_DATE" | cut -d' ' -f1) → $(echo "$CURRENT_DATE" | cut -d' ' -f1)"
272 | echo "Health Score: $PREV_HEALTH → $CURRENT_HEALTH ($([ $HEALTH_DELTA -ge 0 ] && echo "+")$HEALTH_DELTA)"
273 | echo "Trend: ${TREND_EMOJI} ${TREND_TEXT}"
274 | echo ""
275 | echo "Report: $REPORT_FILE"
276 | echo ""
277 | echo -e "${GREEN}✓${NC} Weekly review complete"
278 | exit 0
279 | 
```

--------------------------------------------------------------------------------
/src/mcp_memory_service/web/api/health.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Health check endpoints for the HTTP interface.
 17 | """
 18 | 
 19 | import time
 20 | import platform
 21 | import psutil
 22 | from datetime import datetime, timezone
 23 | from typing import Dict, Any, TYPE_CHECKING
 24 | 
 25 | from fastapi import APIRouter, Depends
 26 | from pydantic import BaseModel
 27 | 
 28 | from ...storage.base import MemoryStorage
 29 | from ..dependencies import get_storage
 30 | from ... import __version__
 31 | from ...config import OAUTH_ENABLED
 32 | 
 33 | # OAuth authentication imports (conditional)
 34 | if OAUTH_ENABLED or TYPE_CHECKING:
 35 |     from ..oauth.middleware import require_read_access, AuthenticationResult
 36 | else:
 37 |     # Provide type stubs when OAuth is disabled
 38 |     AuthenticationResult = None
 39 |     require_read_access = None
 40 | 
 41 | router = APIRouter()
 42 | 
 43 | 
 44 | class HealthResponse(BaseModel):
 45 |     """Basic health check response."""
 46 |     status: str
 47 |     version: str
 48 |     timestamp: str
 49 |     uptime_seconds: float
 50 | 
 51 | 
 52 | class DetailedHealthResponse(BaseModel):
 53 |     """Detailed health check response."""
 54 |     status: str
 55 |     version: str
 56 |     timestamp: str
 57 |     uptime_seconds: float
 58 |     storage: Dict[str, Any]
 59 |     system: Dict[str, Any]
 60 |     performance: Dict[str, Any]
 61 |     statistics: Dict[str, Any] = None
 62 | 
 63 | 
 64 | # Track startup time for uptime calculation
 65 | _startup_time = time.time()
 66 | 
 67 | 
 68 | @router.get("/health", response_model=HealthResponse)
 69 | async def health_check():
 70 |     """Basic health check endpoint."""
 71 |     return HealthResponse(
 72 |         status="healthy",
 73 |         version=__version__,
 74 |         timestamp=datetime.now(timezone.utc).isoformat(),
 75 |         uptime_seconds=time.time() - _startup_time
 76 |     )
 77 | 
 78 | 
 79 | @router.get("/health/detailed", response_model=DetailedHealthResponse)
 80 | async def detailed_health_check(
 81 |     storage: MemoryStorage = Depends(get_storage),
 82 |     user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
 83 | ):
 84 |     """Detailed health check with system and storage information."""
 85 |     
 86 |     # Get system information
 87 |     memory_info = psutil.virtual_memory()
 88 |     disk_info = psutil.disk_usage('/')
 89 |     
 90 |     system_info = {
 91 |         "platform": platform.system(),
 92 |         "platform_version": platform.version(),
 93 |         "python_version": platform.python_version(),
 94 |         "cpu_count": psutil.cpu_count(),
 95 |         "memory_total_gb": round(memory_info.total / (1024**3), 2),
 96 |         "memory_available_gb": round(memory_info.available / (1024**3), 2),
 97 |         "memory_percent": memory_info.percent,
 98 |         "disk_total_gb": round(disk_info.total / (1024**3), 2),
 99 |         "disk_free_gb": round(disk_info.free / (1024**3), 2),
100 |         "disk_percent": round((disk_info.used / disk_info.total) * 100, 2)
101 |     }
102 |     
103 |     # Get storage information (support all storage backends)
104 |     try:
105 |         # Get statistics from storage using universal get_stats() method
106 |         if hasattr(storage, 'get_stats') and callable(getattr(storage, 'get_stats')):
107 |             # All storage backends now have async get_stats()
108 |             stats = await storage.get_stats()
109 |         else:
110 |             stats = {"error": "Storage backend doesn't support statistics"}
111 | 
112 |         if "error" not in stats:
113 |             # Detect backend type from storage class or stats
114 |             backend_name = stats.get("storage_backend", storage.__class__.__name__)
115 |             if "sqlite" in backend_name.lower():
116 |                 backend_type = "sqlite-vec"
117 |             elif "cloudflare" in backend_name.lower():
118 |                 backend_type = "cloudflare"
119 |             elif "hybrid" in backend_name.lower():
120 |                 backend_type = "hybrid"
121 |             else:
122 |                 backend_type = backend_name
123 | 
124 |             storage_info = {
125 |                 "backend": backend_type,
126 |                 "status": "connected",
127 |                 "accessible": True
128 |             }
129 | 
130 |             # Add backend-specific information if available
131 |             if hasattr(storage, 'db_path'):
132 |                 storage_info["database_path"] = storage.db_path
133 |             if hasattr(storage, 'embedding_model_name'):
134 |                 storage_info["embedding_model"] = storage.embedding_model_name
135 | 
136 |             # Add sync status for hybrid backend
137 |             if backend_type == "hybrid" and hasattr(storage, 'get_sync_status'):
138 |                 try:
139 |                     sync_status = await storage.get_sync_status()
140 |                     storage_info["sync_status"] = {
141 |                         "is_running": sync_status.get('is_running', False),
142 |                         "last_sync_time": sync_status.get('last_sync_time', 0),
143 |                         "pending_operations": sync_status.get('pending_operations', 0),
144 |                         "operations_processed": sync_status.get('operations_processed', 0),
145 |                         "operations_failed": sync_status.get('operations_failed', 0),
146 |                         "time_since_last_sync": time.time() - sync_status.get('last_sync_time', 0) if sync_status.get('last_sync_time', 0) > 0 else 0
147 |                     }
148 |                 except Exception as sync_err:
149 |                     storage_info["sync_status"] = {"error": str(sync_err)}
150 | 
151 |             # Merge all stats
152 |             storage_info.update(stats)
153 |         else:
154 |             storage_info = {
155 |                 "backend": storage.__class__.__name__,
156 |                 "status": "error",
157 |                 "accessible": False,
158 |                 "error": stats["error"]
159 |             }
160 | 
161 |     except Exception as e:
162 |         storage_info = {
163 |             "backend": storage.__class__.__name__ if hasattr(storage, '__class__') else "unknown",
164 |             "status": "error",
165 |             "error": str(e)
166 |         }
167 |     
168 |     # Performance metrics (basic for now)
169 |     performance_info = {
170 |         "uptime_seconds": time.time() - _startup_time,
171 |         "uptime_formatted": format_uptime(time.time() - _startup_time)
172 |     }
173 |     
174 |     # Extract statistics for separate field if available
175 |     statistics = {
176 |         "total_memories": storage_info.get("total_memories", 0),
177 |         "unique_tags": storage_info.get("unique_tags", 0),
178 |         "memories_this_week": storage_info.get("memories_this_week", 0),
179 |         "database_size_mb": storage_info.get("database_size_mb", 0),
180 |         "backend": storage_info.get("backend", "sqlite-vec")
181 |     }
182 |     
183 |     return DetailedHealthResponse(
184 |         status="healthy",
185 |         version=__version__,
186 |         timestamp=datetime.now(timezone.utc).isoformat(),
187 |         uptime_seconds=time.time() - _startup_time,
188 |         storage=storage_info,
189 |         system=system_info,
190 |         performance=performance_info,
191 |         statistics=statistics
192 |     )
193 | 
194 | 
195 | @router.get("/health/sync-status")
196 | async def sync_status(
197 |     storage: MemoryStorage = Depends(get_storage),
198 |     user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
199 | ):
200 |     """Get current initial sync status for hybrid storage."""
201 | 
202 |     # Check if this is a hybrid storage that supports sync status
203 |     if hasattr(storage, 'get_initial_sync_status'):
204 |         sync_status = storage.get_initial_sync_status()
205 |         return {
206 |             "sync_supported": True,
207 |             "status": sync_status
208 |         }
209 |     else:
210 |         return {
211 |             "sync_supported": False,
212 |             "status": {
213 |                 "in_progress": False,
214 |                 "total": 0,
215 |                 "completed": 0,
216 |                 "finished": True,
217 |                 "progress_percentage": 100
218 |             }
219 |         }
220 | 
221 | 
222 | def format_uptime(seconds: float) -> str:
223 |     """Format uptime in human-readable format."""
224 |     if seconds < 60:
225 |         return f"{seconds:.1f} seconds"
226 |     elif seconds < 3600:
227 |         return f"{seconds/60:.1f} minutes"
228 |     elif seconds < 86400:
229 |         return f"{seconds/3600:.1f} hours"
230 |     else:
231 |         return f"{seconds/86400:.1f} days"
```

--------------------------------------------------------------------------------
/scripts/migration/migrate_tags.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # scripts/migrate_tags.py
 16 | # python scripts/validate_memories.py --db-path /path/to/your/chroma_db
 17 | 
 18 | import asyncio
 19 | import json
 20 | import logging
 21 | from datetime import datetime
 22 | from pathlib import Path
 23 | from mcp_memory_service.storage.chroma import ChromaMemoryStorage
 24 | import argparse 
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | async def analyze_tag_formats(metadatas):
 29 |     """Analyze the current tag formats in the database"""
 30 |     formats = {
 31 |         "json_string": 0,
 32 |         "raw_list": 0,
 33 |         "comma_string": 0,
 34 |         "empty": 0,
 35 |         "invalid": 0
 36 |     }
 37 |     
 38 |     for meta in metadatas:
 39 |         tags = meta.get("tags")
 40 |         if tags is None:
 41 |             formats["empty"] += 1
 42 |             continue
 43 |             
 44 |         if isinstance(tags, list):
 45 |             formats["raw_list"] += 1
 46 |         elif isinstance(tags, str):
 47 |             try:
 48 |                 parsed = json.loads(tags)
 49 |                 if isinstance(parsed, list):
 50 |                     formats["json_string"] += 1
 51 |                 else:
 52 |                     formats["invalid"] += 1
 53 |             except json.JSONDecodeError:
 54 |                 if "," in tags:
 55 |                     formats["comma_string"] += 1
 56 |                 else:
 57 |                     formats["invalid"] += 1
 58 |         else:
 59 |             formats["invalid"] += 1
 60 |             
 61 |     return formats
 62 | 
 63 | async def find_invalid_tags(metadatas):
 64 |     """Find any invalid tag formats"""
 65 |     invalid_entries = []
 66 |     
 67 |     for i, meta in enumerate(metadatas):
 68 |         tags = meta.get("tags")
 69 |         if tags is None:
 70 |             continue
 71 |             
 72 |         try:
 73 |             if isinstance(tags, str):
 74 |                 json.loads(tags)
 75 |         except json.JSONDecodeError:
 76 |             invalid_entries.append({
 77 |                 "memory_id": meta.get("content_hash", f"index_{i}"),
 78 |                 "tags": tags
 79 |             })
 80 |             
 81 |     return invalid_entries
 82 | 
 83 | async def backup_memories(storage):
 84 |     """Create a backup of all memories"""
 85 |     results = storage.collection.get(include=["metadatas", "documents"])
 86 |     
 87 |     backup_data = {
 88 |         "timestamp": datetime.now().isoformat(),
 89 |         "memories": [{
 90 |             "id": results["ids"][i],
 91 |             "content": results["documents"][i],
 92 |             "metadata": results["metadatas"][i]
 93 |         } for i in range(len(results["ids"]))]
 94 |     }
 95 |     
 96 |     backup_path = Path("backups")
 97 |     backup_path.mkdir(exist_ok=True)
 98 |     
 99 |     backup_file = backup_path / f"memory_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
100 |     with open(backup_file, 'w') as f:
101 |         json.dump(backup_data, f)
102 |     
103 |     return backup_file
104 | 
105 | async def validate_current_state(storage):
106 |     """Validate the current state of the database"""
107 |     results = storage.collection.get(include=["metadatas"])
108 |     return {
109 |         "total_memories": len(results["ids"]),
110 |         "tag_formats": await analyze_tag_formats(results["metadatas"]),
111 |         "invalid_tags": await find_invalid_tags(results["metadatas"])
112 |     }
113 | 
114 | async def migrate_tags(storage):
115 |     """Perform the tag migration"""
116 |     results = storage.collection.get(include=["metadatas", "documents"])
117 |     
118 |     migrated_count = 0
119 |     error_count = 0
120 |     
121 |     for i, meta in enumerate(results["metadatas"]):
122 |         try:
123 |             # Extract current tags
124 |             current_tags = meta.get("tags", "[]")
125 |             
126 |             # Normalize to list format
127 |             if isinstance(current_tags, str):
128 |                 try:
129 |                     # Try parsing as JSON first
130 |                     tags = json.loads(current_tags)
131 |                     if isinstance(tags, str):
132 |                         tags = [t.strip() for t in tags.split(",")]
133 |                     elif isinstance(tags, list):
134 |                         tags = [str(t).strip() for t in tags]
135 |                     else:
136 |                         tags = []
137 |                 except json.JSONDecodeError:
138 |                     # Handle as comma-separated string
139 |                     tags = [t.strip() for t in current_tags.split(",")]
140 |             elif isinstance(current_tags, list):
141 |                 tags = [str(t).strip() for t in current_tags]
142 |             else:
143 |                 tags = []
144 |             
145 |             # Update with normalized format
146 |             new_meta = meta.copy()
147 |             new_meta["tags"] = json.dumps(tags)
148 |             
149 |             # Update memory
150 |             storage.collection.update(
151 |                 ids=[results["ids"][i]],
152 |                 metadatas=[new_meta]
153 |             )
154 |             
155 |             migrated_count += 1
156 |             
157 |         except Exception as e:
158 |             error_count += 1
159 |             logger.error(f"Error migrating memory {results['ids'][i]}: {str(e)}")
160 |             
161 |     return migrated_count, error_count
162 | 
163 | async def verify_migration(storage):
164 |     """Verify the migration was successful"""
165 |     results = storage.collection.get(include=["metadatas"])
166 |     
167 |     verification = {
168 |         "total_memories": len(results["ids"]),
169 |         "tag_formats": await analyze_tag_formats(results["metadatas"]),
170 |         "invalid_tags": await find_invalid_tags(results["metadatas"])
171 |     }
172 |     
173 |     return verification
174 | 
175 | async def rollback_migration(storage, backup_file):
176 |     """Rollback to the backup if needed"""
177 |     with open(backup_file, 'r') as f:
178 |         backup = json.load(f)
179 |         
180 |     for memory in backup["memories"]:
181 |         storage.collection.update(
182 |             ids=[memory["id"]],
183 |             metadatas=[memory["metadata"]],
184 |             documents=[memory["content"]]
185 |         )
186 | 
187 | async def main():
188 |     # Configure logging
189 |     log_level = os.getenv('LOG_LEVEL', 'ERROR').upper()
190 |     logging.basicConfig(
191 |         level=getattr(logging, log_level, logging.ERROR),
192 |         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
193 |         stream=sys.stderr
194 |     )
195 |     
196 |     # Initialize storage
197 |     # storage = ChromaMemoryStorage("path/to/your/db")
198 |     
199 |     # Parse command line arguments
200 |     parser = argparse.ArgumentParser(description='Validate memory data tags')
201 |     parser.add_argument('--db-path', required=True, help='Path to ChromaDB database')
202 |     args = parser.parse_args()
203 |     
204 |     # Initialize storage with provided path
205 |     logger.info(f"Connecting to database at: {args.db_path}")
206 |     storage = ChromaMemoryStorage(args.db_path)
207 | 
208 | 
209 |     # 1. Create backup
210 |     logger.info("Creating backup...")
211 |     backup_file = await backup_memories(storage)
212 |     logger.info(f"Backup created at: {backup_file}")
213 |     
214 |     # 2. Validate current state
215 |     logger.info("Validating current state...")
216 |     current_state = await validate_current_state(storage)
217 |     logger.info("\nCurrent state:")
218 |     logger.info(json.dumps(current_state, indent=2))
219 |     
220 |     # 3. Confirm migration
221 |     proceed = input("\nProceed with migration? (yes/no): ")
222 |     if proceed.lower() == 'yes':
223 |         # 4. Run migration
224 |         logger.info("Running migration...")
225 |         migrated_count, error_count = await migrate_tags(storage)
226 |         logger.info(f"Migration completed. Migrated: {migrated_count}, Errors: {error_count}")
227 |         
228 |         # 5. Verify migration
229 |         logger.info("Verifying migration...")
230 |         verification = await verify_migration(storage)
231 |         logger.info("\nMigration verification:")
232 |         logger.info(json.dumps(verification, indent=2))
233 |         
234 |         # 6. Check if rollback needed
235 |         if error_count > 0:
236 |             rollback = input("\nErrors detected. Rollback to backup? (yes/no): ")
237 |             if rollback.lower() == 'yes':
238 |                 logger.info("Rolling back...")
239 |                 await rollback_migration(storage, backup_file)
240 |                 logger.info("Rollback completed")
241 |     else:
242 |         logger.info("Migration cancelled")
243 | 
244 | if __name__ == "__main__":
245 |     asyncio.run(main())
246 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_oauth_flow.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | OAuth 2.1 Dynamic Client Registration integration test.
  4 | 
  5 | Tests the OAuth endpoints for full flow functionality from client registration
  6 | through token acquisition and API access.
  7 | """
  8 | 
  9 | import asyncio
 10 | import json
 11 | import sys
 12 | from typing import Optional
 13 | 
 14 | import httpx
 15 | 
 16 | 
 17 | async def test_oauth_endpoints(base_url: str = "http://localhost:8000") -> bool:
 18 |     """
 19 |     Test OAuth 2.1 endpoints for basic functionality.
 20 | 
 21 |     Returns:
 22 |         True if all tests pass, False otherwise
 23 |     """
 24 |     print(f"Testing OAuth endpoints at {base_url}")
 25 |     print("=" * 50)
 26 | 
 27 |     async with httpx.AsyncClient() as client:
 28 |         try:
 29 |             # Test 1: OAuth Authorization Server Metadata
 30 |             print("1. Testing OAuth Authorization Server Metadata...")
 31 |             response = await client.get(f"{base_url}/.well-known/oauth-authorization-server/mcp")
 32 | 
 33 |             if response.status_code != 200:
 34 |                 print(f"   ❌ Failed: {response.status_code}")
 35 |                 return False
 36 | 
 37 |             metadata = response.json()
 38 |             required_fields = ["issuer", "authorization_endpoint", "token_endpoint", "registration_endpoint"]
 39 | 
 40 |             for field in required_fields:
 41 |                 if field not in metadata:
 42 |                     print(f"   ❌ Missing required field: {field}")
 43 |                     return False
 44 | 
 45 |             print(f"   ✅ Metadata endpoint working")
 46 |             print(f"   📋 Issuer: {metadata.get('issuer')}")
 47 | 
 48 |             # Test 2: Client Registration
 49 |             print("\n2. Testing Dynamic Client Registration...")
 50 | 
 51 |             registration_data = {
 52 |                 "client_name": "Test Client",
 53 |                 "redirect_uris": ["https://example.com/callback"],
 54 |                 "grant_types": ["authorization_code"],
 55 |                 "response_types": ["code"]
 56 |             }
 57 | 
 58 |             response = await client.post(
 59 |                 f"{base_url}/oauth/register",
 60 |                 json=registration_data
 61 |             )
 62 | 
 63 |             if response.status_code != 201:
 64 |                 print(f"   ❌ Registration failed: {response.status_code}")
 65 |                 print(f"   Response: {response.text}")
 66 |                 return False
 67 | 
 68 |             client_info = response.json()
 69 |             client_id = client_info.get("client_id")
 70 |             client_secret = client_info.get("client_secret")
 71 | 
 72 |             if not client_id or not client_secret:
 73 |                 print(f"   ❌ Missing client credentials in response")
 74 |                 return False
 75 | 
 76 |             print(f"   ✅ Client registration successful")
 77 |             print(f"   📋 Client ID: {client_id}")
 78 | 
 79 |             # Test 3: Authorization Endpoint (expect redirect)
 80 |             print("\n3. Testing Authorization Endpoint...")
 81 | 
 82 |             auth_url = f"{base_url}/oauth/authorize"
 83 |             auth_params = {
 84 |                 "response_type": "code",
 85 |                 "client_id": client_id,
 86 |                 "redirect_uri": "https://example.com/callback",
 87 |                 "state": "test_state_123"
 88 |             }
 89 | 
 90 |             response = await client.get(auth_url, params=auth_params, follow_redirects=False)
 91 | 
 92 |             if response.status_code not in [302, 307]:
 93 |                 print(f"   ❌ Authorization failed: {response.status_code}")
 94 |                 print(f"   Response: {response.text}")
 95 |                 return False
 96 | 
 97 |             location = response.headers.get("location", "")
 98 |             if "code=" not in location or "state=test_state_123" not in location:
 99 |                 print(f"   ❌ Invalid redirect: {location}")
100 |                 return False
101 | 
102 |             print(f"   ✅ Authorization endpoint working")
103 |             print(f"   📋 Redirect URL: {location[:100]}...")
104 | 
105 |             # Extract authorization code from redirect
106 |             auth_code = None
107 |             for param in location.split("?")[1].split("&"):
108 |                 if param.startswith("code="):
109 |                     auth_code = param.split("=")[1]
110 |                     break
111 | 
112 |             if not auth_code:
113 |                 print(f"   ❌ No authorization code in redirect")
114 |                 return False
115 | 
116 |             # Test 4: Token Endpoint
117 |             print("\n4. Testing Token Endpoint...")
118 | 
119 |             token_data = {
120 |                 "grant_type": "authorization_code",
121 |                 "code": auth_code,
122 |                 "redirect_uri": "https://example.com/callback",
123 |                 "client_id": client_id,
124 |                 "client_secret": client_secret
125 |             }
126 | 
127 |             response = await client.post(
128 |                 f"{base_url}/oauth/token",
129 |                 data=token_data,
130 |                 headers={"Content-Type": "application/x-www-form-urlencoded"}
131 |             )
132 | 
133 |             if response.status_code != 200:
134 |                 print(f"   ❌ Token request failed: {response.status_code}")
135 |                 print(f"   Response: {response.text}")
136 |                 return False
137 | 
138 |             token_response = response.json()
139 |             access_token = token_response.get("access_token")
140 | 
141 |             if not access_token:
142 |                 print(f"   ❌ No access token in response")
143 |                 return False
144 | 
145 |             print(f"   ✅ Token endpoint working")
146 |             print(f"   📋 Token type: {token_response.get('token_type')}")
147 |             print(f"   📋 Expires in: {token_response.get('expires_in')} seconds")
148 | 
149 |             # Test 5: Protected Resource Access
150 |             print("\n5. Testing Protected API Endpoints...")
151 | 
152 |             headers = {"Authorization": f"Bearer {access_token}"}
153 | 
154 |             # Test health endpoint (should be public, no auth required)
155 |             response = await client.get(f"{base_url}/api/health")
156 |             if response.status_code == 200:
157 |                 print(f"   ✅ Public health endpoint accessible")
158 |             else:
159 |                 print(f"   ❌ Health endpoint failed: {response.status_code}")
160 | 
161 |             # Test protected memories endpoint (requires read access)
162 |             response = await client.get(f"{base_url}/api/memories", headers=headers)
163 |             if response.status_code == 200:
164 |                 print(f"   ✅ Protected memories endpoint accessible with Bearer token")
165 |             else:
166 |                 print(f"   ❌ Protected memories endpoint failed: {response.status_code}")
167 | 
168 |             # Test protected search endpoint (requires read access)
169 |             search_data = {"query": "test search", "n_results": 5}
170 |             response = await client.post(f"{base_url}/api/search", json=search_data, headers=headers)
171 |             if response.status_code in [200, 404]:  # 404 is OK if no memories exist
172 |                 print(f"   ✅ Protected search endpoint accessible with Bearer token")
173 |             else:
174 |                 print(f"   ❌ Protected search endpoint failed: {response.status_code}")
175 | 
176 |             # Test accessing protected endpoint without token (should fail)
177 |             response = await client.get(f"{base_url}/api/memories")
178 |             if response.status_code == 401:
179 |                 print(f"   ✅ Protected endpoint correctly rejects unauthenticated requests")
180 |             else:
181 |                 print(f"   ⚠️  Protected endpoint security test inconclusive: {response.status_code}")
182 | 
183 |             print("\n" + "=" * 50)
184 |             print("🎉 All OAuth 2.1 tests passed!")
185 |             print("✅ Ready for Claude Code HTTP transport integration")
186 |             print("✅ API endpoints properly protected with OAuth authentication")
187 |             return True
188 | 
189 |         except Exception as e:
190 |             print(f"\n❌ Test failed with exception: {e}")
191 |             return False
192 | 
193 | 
194 | async def main():
195 |     """Main test function."""
196 |     if len(sys.argv) > 1:
197 |         base_url = sys.argv[1]
198 |     else:
199 |         base_url = "http://localhost:8000"
200 | 
201 |     print("OAuth 2.1 Dynamic Client Registration Test")
202 |     print("==========================================")
203 |     print(f"Target: {base_url}")
204 |     print()
205 |     print("Make sure the MCP Memory Service is running with OAuth enabled:")
206 |     print("  export MCP_OAUTH_ENABLED=true")
207 |     print("  uv run memory server --http")
208 |     print()
209 | 
210 |     success = await test_oauth_endpoints(base_url)
211 | 
212 |     if success:
213 |         print("\n🚀 OAuth implementation is ready!")
214 |         sys.exit(0)
215 |     else:
216 |         print("\n💥 OAuth tests failed - check implementation")
217 |         sys.exit(1)
218 | 
219 | 
220 | if __name__ == "__main__":
221 |     asyncio.run(main())
```

--------------------------------------------------------------------------------
/docs/api/memory-metadata-api.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Memory Metadata Enhancement API
  2 | 
  3 | ## Overview
  4 | 
  5 | The Memory Metadata Enhancement API provides efficient memory metadata updates without requiring complete memory recreation. This addresses the core limitation identified in Issue #10 where updating memory metadata required deleting and recreating entire memory entries.
  6 | 
  7 | ## API Method
  8 | 
  9 | ### `update_memory_metadata`
 10 | 
 11 | Updates memory metadata while preserving the original memory content, embeddings, and optionally timestamps.
 12 | 
 13 | **Signature:**
 14 | ```python
 15 | async def update_memory_metadata(
 16 |     content_hash: str, 
 17 |     updates: Dict[str, Any], 
 18 |     preserve_timestamps: bool = True
 19 | ) -> Tuple[bool, str]
 20 | ```
 21 | 
 22 | **Parameters:**
 23 | - `content_hash` (string, required): The content hash of the memory to update
 24 | - `updates` (object, required): Dictionary of metadata fields to update
 25 | - `preserve_timestamps` (boolean, optional): Whether to preserve original created_at timestamp (default: true)
 26 | 
 27 | **Returns:**
 28 | - `success` (boolean): Whether the update was successful
 29 | - `message` (string): Summary of updated fields or error message
 30 | 
 31 | ## Supported Update Fields
 32 | 
 33 | ### Core Metadata Fields
 34 | 
 35 | 1. **tags** (array of strings)
 36 |    - Replaces existing tags completely
 37 |    - Example: `"tags": ["important", "reference", "new-tag"]`
 38 | 
 39 | 2. **memory_type** (string)
 40 |    - Updates the memory type classification
 41 |    - Example: `"memory_type": "reminder"`
 42 | 
 43 | 3. **metadata** (object)
 44 |    - Merges with existing custom metadata
 45 |    - Example: `"metadata": {"priority": "high", "due_date": "2024-01-15"}`
 46 | 
 47 | ### Custom Fields
 48 | 
 49 | Any other fields not in the protected list can be updated directly:
 50 | - `"priority": "urgent"`
 51 | - `"status": "active"`
 52 | - `"category": "work"`
 53 | - Custom application-specific fields
 54 | 
 55 | ### Protected Fields
 56 | 
 57 | These fields cannot be modified through this API:
 58 | - `content` - Memory content is immutable
 59 | - `content_hash` - Content hash is immutable  
 60 | - `embedding` - Embeddings are preserved automatically
 61 | - `created_at` / `created_at_iso` - Preserved unless `preserve_timestamps=false`
 62 | - Internal timestamp fields (`timestamp`, `timestamp_float`, `timestamp_str`)
 63 | 
 64 | ## Usage Examples
 65 | 
 66 | ### Example 1: Add Tags to Memory
 67 | 
 68 | ```json
 69 | {
 70 |   "content_hash": "abc123def456...",
 71 |   "updates": {
 72 |     "tags": ["important", "reference", "project-alpha"]
 73 |   }
 74 | }
 75 | ```
 76 | 
 77 | ### Example 2: Update Memory Type and Custom Metadata
 78 | 
 79 | ```json
 80 | {
 81 |   "content_hash": "abc123def456...",
 82 |   "updates": {
 83 |     "memory_type": "reminder",
 84 |     "metadata": {
 85 |       "priority": "high",
 86 |       "due_date": "2024-01-15",
 87 |       "assignee": "[email protected]"
 88 |     }
 89 |   }
 90 | }
 91 | ```
 92 | 
 93 | ### Example 3: Update Custom Fields Directly
 94 | 
 95 | ```json
 96 | {
 97 |   "content_hash": "abc123def456...",
 98 |   "updates": {
 99 |     "priority": "urgent",
100 |     "status": "active",
101 |     "category": "work",
102 |     "last_reviewed": "2024-01-10"
103 |   }
104 | }
105 | ```
106 | 
107 | ### Example 4: Update with Timestamp Reset
108 | 
109 | ```json
110 | {
111 |   "content_hash": "abc123def456...",
112 |   "updates": {
113 |     "tags": ["archived", "completed"]
114 |   },
115 |   "preserve_timestamps": false
116 | }
117 | ```
118 | 
119 | ## Timestamp Behavior
120 | 
121 | ### Default Behavior (preserve_timestamps=true)
122 | 
123 | - `created_at` and `created_at_iso` are preserved from original memory
124 | - `updated_at` and `updated_at_iso` are set to current time
125 | - Legacy timestamp fields are updated for backward compatibility
126 | 
127 | ### Reset Behavior (preserve_timestamps=false)
128 | 
129 | - All timestamp fields are set to current time
130 | - Useful for marking memories as "refreshed" or "re-activated"
131 | 
132 | ## Implementation Details
133 | 
134 | ### Storage Layer
135 | 
136 | The API is implemented in the storage abstraction layer:
137 | 
138 | 1. **Base Storage Interface** (`storage/base.py`)
139 |    - Abstract method definition
140 |    - Consistent interface across storage backends
141 | 
142 | 2. **ChromaDB Implementation** (`storage/chroma.py`)
143 |    - Efficient upsert operation preserving embeddings
144 |    - Metadata merging with validation
145 |    - Timestamp synchronization
146 | 
147 | 3. **Future Storage Backends**
148 |    - sqlite-vec implementation will follow same interface
149 |    - Other storage backends can implement consistently
150 | 
151 | ### MCP Protocol Integration
152 | 
153 | The API is exposed via the MCP protocol:
154 | 
155 | 1. **Tool Registration** - Available as `update_memory_metadata` tool
156 | 2. **Input Validation** - Comprehensive parameter validation
157 | 3. **Error Handling** - Clear error messages for debugging
158 | 4. **Logging** - Detailed operation logging for monitoring
159 | 
160 | ## Performance Benefits
161 | 
162 | ### Efficiency Gains
163 | 
164 | 1. **No Content Re-processing**
165 |    - Original content remains unchanged
166 |    - No need to regenerate embeddings
167 |    - Preserves vector database relationships
168 | 
169 | 2. **Minimal Network Transfer**
170 |    - Only metadata changes are transmitted
171 |    - Reduced bandwidth usage
172 |    - Faster operation completion
173 | 
174 | 3. **Database Optimization**
175 |    - Single update operation vs delete+insert
176 |    - Maintains database indices and relationships
177 |    - Reduces transaction overhead
178 | 
179 | ### Resource Savings
180 | 
181 | - **Memory Usage**: No need to load full memory content
182 | - **CPU Usage**: No embedding regeneration required
183 | - **Storage I/O**: Minimal database operations
184 | - **Network**: Reduced data transfer
185 | 
186 | ## Error Handling
187 | 
188 | ### Common Error Scenarios
189 | 
190 | 1. **Memory Not Found**
191 |    ```
192 |    Error: Memory with hash abc123... not found
193 |    ```
194 | 
195 | 2. **Invalid Updates Format**
196 |    ```
197 |    Error: updates must be a dictionary
198 |    ```
199 | 
200 | 3. **Invalid Tags Format**
201 |    ```
202 |    Error: Tags must be provided as a list of strings
203 |    ```
204 | 
205 | 4. **Storage Not Initialized**
206 |    ```
207 |    Error: Collection not initialized, cannot update memory metadata
208 |    ```
209 | 
210 | ### Error Recovery
211 | 
212 | - Detailed error messages for debugging
213 | - Transaction rollback on failures
214 | - Original memory remains unchanged on errors
215 | - Logging for troubleshooting
216 | 
217 | ## Migration and Compatibility
218 | 
219 | ### Backward Compatibility
220 | 
221 | - Existing memories work without modification
222 | - Legacy timestamp fields are maintained
223 | - No breaking changes to existing APIs
224 | 
225 | ### Migration Strategy
226 | 
227 | 1. **Immediate Availability** - API available immediately after deployment
228 | 2. **Gradual Adoption** - Can be adopted incrementally
229 | 3. **Fallback Support** - Original store/delete pattern still works
230 | 4. **Validation** - Comprehensive testing before production use
231 | 
232 | ## Use Cases
233 | 
234 | ### Memory Organization
235 | 
236 | 1. **Tag Management**
237 |    - Add organizational tags over time
238 |    - Categorize memories as understanding improves
239 |    - Apply bulk tagging for organization
240 | 
241 | 2. **Priority Updates**
242 |    - Mark memories as high/low priority
243 |    - Update urgency as contexts change
244 |    - Implement memory lifecycle management
245 | 
246 | 3. **Status Tracking**
247 |    - Track memory processing status
248 |    - Mark memories as reviewed/processed
249 |    - Implement workflow states
250 | 
251 | ### Advanced Features
252 | 
253 | 1. **Memory Linking**
254 |    - Add relationship metadata
255 |    - Create memory hierarchies
256 |    - Implement reference systems
257 | 
258 | 2. **Time-to-Live Management**
259 |    - Add expiration metadata
260 |    - Implement memory aging
261 |    - Schedule automatic cleanup
262 | 
263 | 3. **Access Control**
264 |    - Add ownership metadata
265 |    - Implement sharing controls
266 |    - Track access permissions
267 | 
268 | ## Testing and Validation
269 | 
270 | ### Unit Tests
271 | 
272 | - Comprehensive test coverage for all update scenarios
273 | - Error condition testing
274 | - Timestamp behavior validation
275 | - Metadata merging verification
276 | 
277 | ### Integration Tests
278 | 
279 | - End-to-end MCP protocol testing
280 | - Storage backend compatibility testing
281 | - Performance benchmarking
282 | - Cross-platform validation
283 | 
284 | ### Performance Testing
285 | 
286 | - Large dataset updates
287 | - Concurrent update operations
288 | - Memory usage monitoring
289 | - Response time measurement
290 | 
291 | ## Future Enhancements
292 | 
293 | ### Planned Improvements
294 | 
295 | 1. **Batch Updates** - Update multiple memories in single operation
296 | 2. **Conditional Updates** - Update only if conditions are met  
297 | 3. **Metadata Validation** - Schema validation for metadata fields
298 | 4. **Update History** - Track metadata change history
299 | 5. **Selective Updates** - Update only specific metadata fields
300 | 
301 | ### Storage Backend Support
302 | 
303 | - sqlite-vec implementation (Issue #40)
304 | - Other vector database backends
305 | - Consistent API across all backends
306 | - Performance optimization per backend
307 | 
308 | ## Conclusion
309 | 
310 | The Memory Metadata Enhancement API provides a robust, efficient solution for memory metadata management. It enables sophisticated memory organization features while maintaining excellent performance and backward compatibility.
311 | 
312 | This implementation forms the foundation for advanced memory management features like re-tagging systems (Issue #45) and memory consolidation (Issue #11).
```

--------------------------------------------------------------------------------
/scripts/installation/setup_cloudflare_resources.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Automated Cloudflare resource setup for MCP Memory Service.
  4 | This script creates the required Cloudflare resources using the HTTP API.
  5 | """
  6 | 
  7 | import os
  8 | import sys
  9 | import asyncio
 10 | import json
 11 | import logging
 12 | from typing import Dict, Any, Optional
 13 | import httpx
 14 | 
 15 | logging.basicConfig(level=logging.INFO)
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | class CloudflareSetup:
 19 |     def __init__(self, api_token: str, account_id: str):
 20 |         self.api_token = api_token
 21 |         self.account_id = account_id
 22 |         self.base_url = f"https://api.cloudflare.com/client/v4/accounts/{account_id}"
 23 |         self.client = None
 24 |     
 25 |     async def _get_client(self) -> httpx.AsyncClient:
 26 |         if self.client is None:
 27 |             headers = {
 28 |                 "Authorization": f"Bearer {self.api_token}",
 29 |                 "Content-Type": "application/json"
 30 |             }
 31 |             self.client = httpx.AsyncClient(headers=headers, timeout=30.0)
 32 |         return self.client
 33 |     
 34 |     async def _make_request(self, method: str, url: str, **kwargs) -> Dict[str, Any]:
 35 |         """Make authenticated request to Cloudflare API."""
 36 |         client = await self._get_client()
 37 |         response = await client.request(method, url, **kwargs)
 38 |         
 39 |         if response.status_code not in [200, 201]:
 40 |             logger.error(f"API request failed: {response.status_code} {response.text}")
 41 |             response.raise_for_status()
 42 |         
 43 |         return response.json()
 44 |     
 45 |     async def create_vectorize_index(self, name: str = "mcp-memory-index") -> str:
 46 |         """Create Vectorize index and return its ID."""
 47 |         logger.info(f"Creating Vectorize index: {name}")
 48 |         
 49 |         # Check if index already exists
 50 |         try:
 51 |             url = f"{self.base_url}/vectorize/indexes/{name}"
 52 |             result = await self._make_request("GET", url)
 53 |             if result.get("success"):
 54 |                 logger.info(f"Vectorize index {name} already exists")
 55 |                 return name
 56 |         except httpx.HTTPStatusError as e:
 57 |             if e.response.status_code != 404:
 58 |                 raise
 59 |         
 60 |         # Create new index
 61 |         url = f"{self.base_url}/vectorize/indexes"
 62 |         payload = {
 63 |             "name": name,
 64 |             "config": {
 65 |                 "dimensions": 768,
 66 |                 "metric": "cosine"
 67 |             }
 68 |         }
 69 |         
 70 |         result = await self._make_request("POST", url, json=payload)
 71 |         if result.get("success"):
 72 |             logger.info(f"✅ Created Vectorize index: {name}")
 73 |             return name
 74 |         else:
 75 |             raise ValueError(f"Failed to create Vectorize index: {result}")
 76 |     
 77 |     async def create_d1_database(self, name: str = "mcp-memory-db") -> str:
 78 |         """Create D1 database and return its ID."""
 79 |         logger.info(f"Creating D1 database: {name}")
 80 |         
 81 |         # List existing databases to check if it exists
 82 |         url = f"{self.base_url}/d1/database"
 83 |         result = await self._make_request("GET", url)
 84 |         
 85 |         if result.get("success"):
 86 |             for db in result.get("result", []):
 87 |                 if db.get("name") == name:
 88 |                     db_id = db.get("uuid")
 89 |                     logger.info(f"D1 database {name} already exists with ID: {db_id}")
 90 |                     return db_id
 91 |         
 92 |         # Create new database
 93 |         payload = {"name": name}
 94 |         result = await self._make_request("POST", url, json=payload)
 95 |         
 96 |         if result.get("success"):
 97 |             db_id = result["result"]["uuid"]
 98 |             logger.info(f"✅ Created D1 database: {name} (ID: {db_id})")
 99 |             return db_id
100 |         else:
101 |             raise ValueError(f"Failed to create D1 database: {result}")
102 |     
103 |     async def create_r2_bucket(self, name: str = "mcp-memory-content") -> str:
104 |         """Create R2 bucket and return its name."""
105 |         logger.info(f"Creating R2 bucket: {name}")
106 |         
107 |         # Check if bucket already exists
108 |         try:
109 |             url = f"{self.base_url}/r2/buckets/{name}"
110 |             result = await self._make_request("GET", url)
111 |             if result.get("success"):
112 |                 logger.info(f"R2 bucket {name} already exists")
113 |                 return name
114 |         except httpx.HTTPStatusError as e:
115 |             if e.response.status_code != 404:
116 |                 raise
117 |         
118 |         # Create new bucket
119 |         url = f"{self.base_url}/r2/buckets"
120 |         payload = {"name": name}
121 |         
122 |         result = await self._make_request("POST", url, json=payload)
123 |         if result.get("success"):
124 |             logger.info(f"✅ Created R2 bucket: {name}")
125 |             return name
126 |         else:
127 |             raise ValueError(f"Failed to create R2 bucket: {result}")
128 |     
129 |     async def verify_workers_ai_access(self) -> bool:
130 |         """Verify Workers AI access and embedding model."""
131 |         logger.info("Verifying Workers AI access...")
132 |         
133 |         # Test embedding generation
134 |         url = f"{self.base_url}/ai/run/@cf/baai/bge-base-en-v1.5"
135 |         payload = {"text": ["test embedding"]}
136 |         
137 |         try:
138 |             result = await self._make_request("POST", url, json=payload)
139 |             if result.get("success"):
140 |                 logger.info("✅ Workers AI access verified")
141 |                 return True
142 |             else:
143 |                 logger.warning(f"Workers AI test failed: {result}")
144 |                 return False
145 |         except Exception as e:
146 |             logger.warning(f"Workers AI verification failed: {e}")
147 |             return False
148 |     
149 |     async def close(self):
150 |         """Close HTTP client."""
151 |         if self.client:
152 |             await self.client.aclose()
153 | 
154 | async def main():
155 |     """Main setup routine."""
156 |     print("🚀 Cloudflare Backend Setup for MCP Memory Service")
157 |     print("=" * 55)
158 |     
159 |     # Check for required environment variables
160 |     api_token = os.getenv("CLOUDFLARE_API_TOKEN")
161 |     account_id = os.getenv("CLOUDFLARE_ACCOUNT_ID")
162 |     
163 |     if not api_token:
164 |         print("❌ CLOUDFLARE_API_TOKEN environment variable not set")
165 |         print("Please create an API token at: https://dash.cloudflare.com/profile/api-tokens")
166 |         print("Required permissions: Vectorize:Edit, D1:Edit, Workers AI:Edit, R2:Edit")
167 |         return False
168 |     
169 |     if not account_id:
170 |         print("❌ CLOUDFLARE_ACCOUNT_ID environment variable not set")
171 |         print("You can find your account ID in the Cloudflare dashboard sidebar")
172 |         return False
173 |     
174 |     setup = CloudflareSetup(api_token, account_id)
175 |     
176 |     try:
177 |         # Create resources
178 |         vectorize_index = await setup.create_vectorize_index()
179 |         d1_database_id = await setup.create_d1_database()
180 |         
181 |         # R2 bucket is optional
182 |         r2_bucket = None
183 |         create_r2 = input("\n🪣 Create R2 bucket for large content storage? (y/N): ").lower().strip()
184 |         if create_r2 in ['y', 'yes']:
185 |             try:
186 |                 r2_bucket = await setup.create_r2_bucket()
187 |             except Exception as e:
188 |                 logger.warning(f"Failed to create R2 bucket: {e}")
189 |                 logger.warning("Continuing without R2 storage...")
190 |         
191 |         # Verify Workers AI
192 |         ai_available = await setup.verify_workers_ai_access()
193 |         
194 |         print("\n🎉 Setup Complete!")
195 |         print("=" * 20)
196 |         print(f"Vectorize Index: {vectorize_index}")
197 |         print(f"D1 Database ID: {d1_database_id}")
198 |         print(f"R2 Bucket: {r2_bucket or 'Not configured'}")
199 |         print(f"Workers AI: {'Available' if ai_available else 'Limited access'}")
200 |         
201 |         print("\n📝 Environment Variables:")
202 |         print("=" * 25)
203 |         print(f"export CLOUDFLARE_API_TOKEN=\"{api_token[:10]}...\"")
204 |         print(f"export CLOUDFLARE_ACCOUNT_ID=\"{account_id}\"")
205 |         print(f"export CLOUDFLARE_VECTORIZE_INDEX=\"{vectorize_index}\"")
206 |         print(f"export CLOUDFLARE_D1_DATABASE_ID=\"{d1_database_id}\"")
207 |         if r2_bucket:
208 |             print(f"export CLOUDFLARE_R2_BUCKET=\"{r2_bucket}\"")
209 |         print("export MCP_MEMORY_STORAGE_BACKEND=\"cloudflare\"")
210 |         
211 |         print("\n🧪 Test the setup:")
212 |         print("python test_cloudflare_backend.py")
213 |         
214 |         return True
215 |         
216 |     except Exception as e:
217 |         logger.error(f"Setup failed: {e}")
218 |         return False
219 |     
220 |     finally:
221 |         await setup.close()
222 | 
223 | if __name__ == "__main__":
224 |     success = asyncio.run(main())
225 |     sys.exit(0 if success else 1)
```

--------------------------------------------------------------------------------
/docs/assets/images/project-infographic.svg:
--------------------------------------------------------------------------------

```
  1 | <svg width="800" height="1200" viewBox="0 0 800 1200" xmlns="http://www.w3.org/2000/svg">
  2 |   <!-- Background -->
  3 |   <rect width="800" height="1200" fill="#f8f9fa"/>
  4 |   
  5 |   <!-- Header -->
  6 |   <rect width="800" height="120" fill="#1a1a1a"/>
  7 |   <text x="400" y="60" font-family="Arial, sans-serif" font-size="36" font-weight="bold" fill="white" text-anchor="middle">MCP Memory Service</text>
  8 |   <text x="400" y="90" font-family="Arial, sans-serif" font-size="18" fill="#888" text-anchor="middle">Production-Ready Knowledge Management Platform</text>
  9 |   
 10 |   <!-- Performance Metrics Section -->
 11 |   <g transform="translate(0, 140)">
 12 |     <text x="400" y="30" font-family="Arial, sans-serif" font-size="24" font-weight="bold" fill="#333" text-anchor="middle">Performance Metrics</text>
 13 |     
 14 |     <!-- Metric Cards -->
 15 |     <g transform="translate(50, 60)">
 16 |       <!-- Card 1 -->
 17 |       <rect x="0" y="0" width="160" height="100" rx="10" fill="#e3f2fd" stroke="#2196f3" stroke-width="2"/>
 18 |       <text x="80" y="35" font-family="Arial, sans-serif" font-size="32" font-weight="bold" fill="#1976d2" text-anchor="middle">319+</text>
 19 |       <text x="80" y="60" font-family="Arial, sans-serif" font-size="14" fill="#555" text-anchor="middle">Memories</text>
 20 |       <text x="80" y="80" font-family="Arial, sans-serif" font-size="14" fill="#555" text-anchor="middle">Managed</text>
 21 |       
 22 |       <!-- Card 2 -->
 23 |       <rect x="190" y="0" width="160" height="100" rx="10" fill="#e8f5e9" stroke="#4caf50" stroke-width="2"/>
 24 |       <text x="270" y="35" font-family="Arial, sans-serif" font-size="32" font-weight="bold" fill="#388e3c" text-anchor="middle">828ms</text>
 25 |       <text x="270" y="60" font-family="Arial, sans-serif" font-size="14" fill="#555" text-anchor="middle">Avg Query</text>
 26 |       <text x="270" y="80" font-family="Arial, sans-serif" font-size="14" fill="#555" text-anchor="middle">Time</text>
 27 |       
 28 |       <!-- Card 3 -->
 29 |       <rect x="380" y="0" width="160" height="100" rx="10" fill="#fff3e0" stroke="#ff9800" stroke-width="2"/>
 30 |       <text x="460" y="35" font-family="Arial, sans-serif" font-size="32" font-weight="bold" fill="#f57c00" text-anchor="middle">100%</text>
 31 |       <text x="460" y="60" font-family="Arial, sans-serif" font-size="14" fill="#555" text-anchor="middle">Cache Hit</text>
 32 |       <text x="460" y="80" font-family="Arial, sans-serif" font-size="14" fill="#555" text-anchor="middle">Ratio</text>
 33 |       
 34 |       <!-- Card 4 -->
 35 |       <rect x="570" y="0" width="160" height="100" rx="10" fill="#fce4ec" stroke="#e91e63" stroke-width="2"/>
 36 |       <text x="650" y="35" font-family="Arial, sans-serif" font-size="32" font-weight="bold" fill="#c2185b" text-anchor="middle">20MB</text>
 37 |       <text x="650" y="60" font-family="Arial, sans-serif" font-size="14" fill="#555" text-anchor="middle">Efficient</text>
 38 |       <text x="650" y="80" font-family="Arial, sans-serif" font-size="14" fill="#555" text-anchor="middle">Storage</text>
 39 |     </g>
 40 |   </g>
 41 |   
 42 |   <!-- Features Section -->
 43 |   <g transform="translate(0, 380)">
 44 |     <text x="400" y="30" font-family="Arial, sans-serif" font-size="24" font-weight="bold" fill="#333" text-anchor="middle">16 Comprehensive Operations</text>
 45 |     
 46 |     <!-- Feature Categories -->
 47 |     <g transform="translate(50, 60)">
 48 |       <!-- Memory Operations -->
 49 |       <rect x="0" y="0" width="220" height="180" rx="10" fill="#f5f5f5" stroke="#999" stroke-width="1"/>
 50 |       <text x="110" y="25" font-family="Arial, sans-serif" font-size="16" font-weight="bold" fill="#333" text-anchor="middle">Memory Operations</text>
 51 |       <text x="15" y="50" font-family="Arial, sans-serif" font-size="14" fill="#555">• store_memory</text>
 52 |       <text x="15" y="70" font-family="Arial, sans-serif" font-size="14" fill="#555">• retrieve_memory</text>
 53 |       <text x="15" y="90" font-family="Arial, sans-serif" font-size="14" fill="#555">• search_by_tag</text>
 54 |       <text x="15" y="110" font-family="Arial, sans-serif" font-size="14" fill="#555">• delete_memory</text>
 55 |       <text x="15" y="130" font-family="Arial, sans-serif" font-size="14" fill="#555">• update_metadata</text>
 56 |       <text x="15" y="150" font-family="Arial, sans-serif" font-size="14" fill="#555">• exact_match_retrieve</text>
 57 |       
 58 |       <!-- Database Management -->
 59 |       <rect x="250" y="0" width="220" height="180" rx="10" fill="#f5f5f5" stroke="#999" stroke-width="1"/>
 60 |       <text x="360" y="25" font-family="Arial, sans-serif" font-size="16" font-weight="bold" fill="#333" text-anchor="middle">Database Management</text>
 61 |       <text x="265" y="50" font-family="Arial, sans-serif" font-size="14" fill="#555">• create_backup</text>
 62 |       <text x="265" y="70" font-family="Arial, sans-serif" font-size="14" fill="#555">• optimize_db</text>
 63 |       <text x="265" y="90" font-family="Arial, sans-serif" font-size="14" fill="#555">• check_health</text>
 64 |       <text x="265" y="110" font-family="Arial, sans-serif" font-size="14" fill="#555">• get_stats</text>
 65 |       <text x="265" y="130" font-family="Arial, sans-serif" font-size="14" fill="#555">• cleanup_duplicates</text>
 66 |       
 67 |       <!-- Advanced Features -->
 68 |       <rect x="500" y="0" width="200" height="180" rx="10" fill="#f5f5f5" stroke="#999" stroke-width="1"/>
 69 |       <text x="600" y="25" font-family="Arial, sans-serif" font-size="16" font-weight="bold" fill="#333" text-anchor="middle">Advanced Features</text>
 70 |       <text x="515" y="50" font-family="Arial, sans-serif" font-size="14" fill="#555">• debug_retrieve</text>
 71 |       <text x="515" y="70" font-family="Arial, sans-serif" font-size="14" fill="#555">• recall_memory</text>
 72 |       <text x="515" y="90" font-family="Arial, sans-serif" font-size="14" fill="#555">• delete_by_timeframe</text>
 73 |       <text x="515" y="110" font-family="Arial, sans-serif" font-size="14" fill="#555">• check_embedding</text>
 74 |     </g>
 75 |   </g>
 76 |   
 77 |   <!-- Architecture -->
 78 |   <g transform="translate(0, 650)">
 79 |     <text x="400" y="30" font-family="Arial, sans-serif" font-size="24" font-weight="bold" fill="#333" text-anchor="middle">Architecture Stack</text>
 80 |     
 81 |     <g transform="translate(150, 60)">
 82 |       <!-- Stack layers -->
 83 |       <rect x="0" y="0" width="500" height="50" rx="5" fill="#4a90e2" stroke="#357abd" stroke-width="2"/>
 84 |       <text x="250" y="30" font-family="Arial, sans-serif" font-size="16" font-weight="bold" fill="white" text-anchor="middle">React Dashboard + Real-time Statistics</text>
 85 |       
 86 |       <rect x="0" y="60" width="500" height="50" rx="5" fill="#5cb85c" stroke="#449d44" stroke-width="2"/>
 87 |       <text x="250" y="90" font-family="Arial, sans-serif" font-size="16" font-weight="bold" fill="white" text-anchor="middle">MCP Protocol (stdin/stdout)</text>
 88 |       
 89 |       <rect x="0" y="120" width="500" height="50" rx="5" fill="#f0ad4e" stroke="#ec971f" stroke-width="2"/>
 90 |       <text x="250" y="150" font-family="Arial, sans-serif" font-size="16" font-weight="bold" fill="white" text-anchor="middle">Python Server + Sentence Transformers</text>
 91 |       
 92 |       <rect x="0" y="180" width="500" height="50" rx="5" fill="#d9534f" stroke="#c9302c" stroke-width="2"/>
 93 |       <text x="250" y="210" font-family="Arial, sans-serif" font-size="16" font-weight="bold" fill="white" text-anchor="middle">ChromaDB Vector Storage</text>
 94 |     </g>
 95 |   </g>
 96 |   
 97 |   <!-- Sponsorship CTA -->
 98 |   <g transform="translate(0, 950)">
 99 |     <rect x="50" y="0" width="700" height="200" rx="15" fill="#1a1a1a"/>
100 |     <text x="400" y="40" font-family="Arial, sans-serif" font-size="28" font-weight="bold" fill="white" text-anchor="middle">Support Open Source Development</text>
101 |     
102 |     <text x="400" y="80" font-family="Arial, sans-serif" font-size="16" fill="#ccc" text-anchor="middle">Your sponsorship enables:</text>
103 |     <text x="200" y="110" font-family="Arial, sans-serif" font-size="14" fill="#aaa">✓ New feature development</text>
104 |     <text x="200" y="135" font-family="Arial, sans-serif" font-size="14" fill="#aaa">✓ Bug fixes &amp; maintenance</text>
105 |     <text x="450" y="110" font-family="Arial, sans-serif" font-size="14" fill="#aaa">✓ Documentation improvements</text>
106 |     <text x="450" y="135" font-family="Arial, sans-serif" font-size="14" fill="#aaa">✓ Community support</text>
107 |     
108 |     <rect x="300" y="155" width="200" height="35" rx="20" fill="#ea4aaa" stroke="none"/>
109 |     <text x="400" y="178" font-family="Arial, sans-serif" font-size="16" font-weight="bold" fill="white" text-anchor="middle">Become a Sponsor</text>
110 |   </g>
111 | </svg>
```

--------------------------------------------------------------------------------
/scripts/development/verify_hybrid_sync.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Comprehensive verification of hybrid storage background sync functionality.
  4 | """
  5 | 
  6 | import asyncio
  7 | import sys
  8 | import tempfile
  9 | import os
 10 | import time
 11 | from unittest.mock import patch
 12 | 
 13 | sys.path.insert(0, 'src')
 14 | 
 15 | from mcp_memory_service.storage.hybrid import HybridMemoryStorage
 16 | from mcp_memory_service.models.memory import Memory
 17 | import hashlib
 18 | 
 19 | 
 20 | class DetailedMockCloudflare:
 21 |     """Detailed mock for tracking sync operations."""
 22 | 
 23 |     def __init__(self, **kwargs):
 24 |         self.memories = {}
 25 |         self.operation_log = []
 26 |         self.initialized = False
 27 |         self.delay = 0.01  # Simulate network delay
 28 | 
 29 |     async def initialize(self):
 30 |         self.initialized = True
 31 |         self.operation_log.append(('init', time.time()))
 32 | 
 33 |     async def store(self, memory):
 34 |         await asyncio.sleep(self.delay)  # Simulate network
 35 |         self.memories[memory.content_hash] = memory
 36 |         self.operation_log.append(('store', memory.content_hash, time.time()))
 37 |         return True, "Stored"
 38 | 
 39 |     async def delete(self, content_hash):
 40 |         await asyncio.sleep(self.delay)
 41 |         if content_hash in self.memories:
 42 |             del self.memories[content_hash]
 43 |         self.operation_log.append(('delete', content_hash, time.time()))
 44 |         return True, "Deleted"
 45 | 
 46 |     async def update_memory_metadata(self, content_hash, updates, preserve_timestamps=True):
 47 |         await asyncio.sleep(self.delay)
 48 |         self.operation_log.append(('update', content_hash, time.time()))
 49 |         return True, "Updated"
 50 | 
 51 |     async def get_stats(self):
 52 |         return {"total": len(self.memories)}
 53 | 
 54 |     async def close(self):
 55 |         self.operation_log.append(('close', time.time()))
 56 | 
 57 | 
 58 | async def verify_sync():
 59 |     print("🔍 HYBRID STORAGE BACKGROUND SYNC VERIFICATION")
 60 |     print("=" * 60)
 61 | 
 62 |     with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
 63 |         db_path = tmp.name
 64 | 
 65 |     try:
 66 |         config = {
 67 |             'api_token': 'test',
 68 |             'account_id': 'test',
 69 |             'vectorize_index': 'test',
 70 |             'd1_database_id': 'test'
 71 |         }
 72 | 
 73 |         with patch('mcp_memory_service.storage.hybrid.CloudflareStorage', DetailedMockCloudflare):
 74 |             # Initialize with short sync interval
 75 |             storage = HybridMemoryStorage(
 76 |                 sqlite_db_path=db_path,
 77 |                 cloudflare_config=config,
 78 |                 sync_interval=0.5,  # 500ms for quick testing
 79 |                 batch_size=2
 80 |             )
 81 | 
 82 |             await storage.initialize()
 83 |             print("✅ Hybrid storage initialized with background sync")
 84 |             print(f"  • Primary: SQLite-vec (local)")
 85 |             print(f"  • Secondary: Mock Cloudflare (simulated)")
 86 |             print(f"  • Sync interval: 0.5 seconds")
 87 |             print(f"  • Batch size: 2 operations")
 88 |             print()
 89 | 
 90 |             # TEST 1: Store operations are queued
 91 |             print("📝 TEST 1: Store Operations Queuing")
 92 |             print("-" * 40)
 93 | 
 94 |             memories = []
 95 |             for i in range(4):
 96 |                 content = f"Sync test memory #{i+1} at {time.time()}"
 97 |                 memory = Memory(
 98 |                     content=content,
 99 |                     content_hash=hashlib.sha256(content.encode()).hexdigest(),
100 |                     tags=['sync-verify'],
101 |                     memory_type='test'
102 |                 )
103 |                 memories.append(memory)
104 | 
105 |                 start = time.time()
106 |                 success, msg = await storage.store(memory)
107 |                 elapsed = (time.time() - start) * 1000
108 |                 print(f"  Memory #{i+1}: ✅ stored in {elapsed:.1f}ms (local)")
109 | 
110 |             # Check initial queue
111 |             status = await storage.sync_service.get_sync_status()
112 |             print(f"\n  📊 Queue status after stores:")
113 |             print(f"     • Queued operations: {status['queue_size']}")
114 |             print(f"     • Processed: {status['stats']['operations_processed']}")
115 | 
116 |             # TEST 2: Wait for automatic background sync
117 |             print("\n⏳ TEST 2: Automatic Background Sync")
118 |             print("-" * 40)
119 |             print("  Waiting 1.5 seconds for automatic sync...")
120 |             await asyncio.sleep(1.5)
121 | 
122 |             status = await storage.sync_service.get_sync_status()
123 |             mock_log = storage.secondary.operation_log
124 | 
125 |             print(f"\n  📊 After automatic sync:")
126 |             print(f"     • Queue remaining: {status['queue_size']}")
127 |             print(f"     • Operations processed: {status['stats']['operations_processed']}")
128 |             print(f"     • Mock Cloudflare received: {len([op for op in mock_log if op[0] == 'store'])} stores")
129 | 
130 |             # TEST 3: Delete operation
131 |             print("\n🗑️ TEST 3: Delete Operation Sync")
132 |             print("-" * 40)
133 | 
134 |             delete_hash = memories[0].content_hash
135 |             success, msg = await storage.delete(delete_hash)
136 |             print(f"  Delete operation: ✅ (local)")
137 | 
138 |             await asyncio.sleep(1)  # Wait for sync
139 | 
140 |             delete_ops = [op for op in mock_log if op[0] == 'delete']
141 |             print(f"  Mock Cloudflare received: {len(delete_ops)} delete operation(s)")
142 | 
143 |             # TEST 4: Force sync
144 |             print("\n🔄 TEST 4: Force Sync")
145 |             print("-" * 40)
146 | 
147 |             # Add more memories
148 |             for i in range(2):
149 |                 content = f"Force sync test #{i+1}"
150 |                 memory = Memory(
151 |                     content=content,
152 |                     content_hash=hashlib.sha256(content.encode()).hexdigest(),
153 |                     tags=['force-sync'],
154 |                     memory_type='test'
155 |                 )
156 |                 await storage.store(memory)
157 | 
158 |             print(f"  Added 2 more memories")
159 | 
160 |             # Force sync
161 |             result = await storage.force_sync()
162 |             print(f"\n  Force sync result:")
163 |             print(f"     • Status: {result['status']}")
164 |             print(f"     • Primary memories: {result['primary_memories']}")
165 |             print(f"     • Synced to secondary: {result['synced_to_secondary']}")
166 |             print(f"     • Duration: {result.get('duration', 0):.3f}s")
167 | 
168 |             # Final verification
169 |             print("\n✅ FINAL VERIFICATION")
170 |             print("-" * 40)
171 | 
172 |             final_status = await storage.sync_service.get_sync_status()
173 |             final_mock_ops = storage.secondary.operation_log
174 | 
175 |             print(f"  Sync service statistics:")
176 |             print(f"     • Total operations processed: {final_status['stats']['operations_processed']}")
177 |             print(f"     • Failed operations: {final_status['stats'].get('operations_failed', 0)}")
178 |             print(f"     • Cloudflare available: {final_status['cloudflare_available']}")
179 | 
180 |             print(f"\n  Mock Cloudflare operations log:")
181 |             store_count = len([op for op in final_mock_ops if op[0] == 'store'])
182 |             delete_count = len([op for op in final_mock_ops if op[0] == 'delete'])
183 |             update_count = len([op for op in final_mock_ops if op[0] == 'update'])
184 | 
185 |             print(f"     • Store operations: {store_count}")
186 |             print(f"     • Delete operations: {delete_count}")
187 |             print(f"     • Update operations: {update_count}")
188 |             print(f"     • Total operations: {len(final_mock_ops) - 2}")  # Exclude init and close
189 | 
190 |             # Verify memory counts match
191 |             primary_count = len(await storage.primary.get_all_memories())
192 |             secondary_count = len(storage.secondary.memories)
193 | 
194 |             print(f"\n  Memory count verification:")
195 |             print(f"     • Primary (SQLite-vec): {primary_count}")
196 |             print(f"     • Secondary (Mock CF): {secondary_count}")
197 |             print(f"     • Match: {'✅ YES' if primary_count == secondary_count else '❌ NO'}")
198 | 
199 |             await storage.close()
200 | 
201 |             print("\n" + "=" * 60)
202 |             print("🎉 BACKGROUND SYNC VERIFICATION COMPLETE")
203 |             print("\nSummary: The hybrid storage backend is working correctly!")
204 |             print("  ✅ Store operations are queued for background sync")
205 |             print("  ✅ Automatic sync processes operations in batches")
206 |             print("  ✅ Delete operations are synced to secondary")
207 |             print("  ✅ Force sync ensures complete synchronization")
208 |             print("  ✅ Both backends maintain consistency")
209 | 
210 |     finally:
211 |         if os.path.exists(db_path):
212 |             os.unlink(db_path)
213 | 
214 | 
215 | if __name__ == "__main__":
216 |     asyncio.run(verify_sync())
```

--------------------------------------------------------------------------------
/tests/integration/test_server_handlers.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Integration tests for MCP handler methods in server.py.
  3 | 
  4 | These tests verify that the MCP handlers correctly transform MemoryService
  5 | responses to MCP TextContent format, particularly after the fix for issue #198.
  6 | """
  7 | 
  8 | import pytest
  9 | from mcp import types
 10 | from mcp_memory_service.server import MemoryServer
 11 | 
 12 | 
 13 | class TestHandleStoreMemory:
 14 |     """Test suite for handle_store_memory MCP handler."""
 15 | 
 16 |     @pytest.mark.asyncio
 17 |     async def test_store_memory_success(self):
 18 |         """Test storing a valid memory returns success message with hash."""
 19 |         server = MemoryServer()
 20 | 
 21 |         result = await server.handle_store_memory({
 22 |             "content": "Test memory content for integration test",
 23 |             "metadata": {
 24 |                 "tags": ["test", "integration"],
 25 |                 "type": "note"
 26 |             }
 27 |         })
 28 | 
 29 |         # Verify result structure
 30 |         assert isinstance(result, list)
 31 |         assert len(result) == 1
 32 |         assert isinstance(result[0], types.TextContent)
 33 | 
 34 |         # Verify success message
 35 |         text = result[0].text
 36 |         assert "successfully" in text.lower()
 37 |         assert "hash:" in text.lower()
 38 |         assert "..." in text  # Hash should be truncated
 39 | 
 40 |     @pytest.mark.asyncio
 41 |     async def test_store_memory_chunked(self):
 42 |         """Test storing long content creates multiple chunks."""
 43 |         server = MemoryServer()
 44 | 
 45 |         # Create content that will be auto-split (> 1500 chars)
 46 |         long_content = "This is a very long memory content. " * 100
 47 | 
 48 |         result = await server.handle_store_memory({
 49 |             "content": long_content,
 50 |             "metadata": {"tags": ["test"], "type": "note"}
 51 |         })
 52 | 
 53 |         # Verify result structure
 54 |         assert isinstance(result, list)
 55 |         assert len(result) == 1
 56 |         assert isinstance(result[0], types.TextContent)
 57 | 
 58 |         # Verify chunked message
 59 |         text = result[0].text
 60 |         assert "chunk" in text.lower()
 61 |         assert "successfully" in text.lower()
 62 | 
 63 |     @pytest.mark.asyncio
 64 |     async def test_store_memory_empty_content(self):
 65 |         """Test storing empty content returns error."""
 66 |         server = MemoryServer()
 67 | 
 68 |         result = await server.handle_store_memory({
 69 |             "content": "",
 70 |             "metadata": {}
 71 |         })
 72 | 
 73 |         # Verify error message
 74 |         assert isinstance(result, list)
 75 |         assert len(result) == 1
 76 |         text = result[0].text
 77 |         assert "error" in text.lower()
 78 |         assert "required" in text.lower()
 79 | 
 80 |     @pytest.mark.asyncio
 81 |     async def test_store_memory_missing_content(self):
 82 |         """Test storing without content parameter returns error."""
 83 |         server = MemoryServer()
 84 | 
 85 |         result = await server.handle_store_memory({
 86 |             "metadata": {"tags": ["test"]}
 87 |         })
 88 | 
 89 |         # Verify error message
 90 |         assert isinstance(result, list)
 91 |         assert len(result) == 1
 92 |         text = result[0].text
 93 |         assert "error" in text.lower()
 94 | 
 95 |     @pytest.mark.asyncio
 96 |     async def test_store_memory_with_tags_string(self):
 97 |         """Test storing memory with tags as string (not array)."""
 98 |         server = MemoryServer()
 99 | 
100 |         result = await server.handle_store_memory({
101 |             "content": "Test with string tags",
102 |             "metadata": {
103 |                 "tags": "test,integration,string-tags",
104 |                 "type": "note"
105 |             }
106 |         })
107 | 
108 |         # Should succeed - MemoryService handles string tags
109 |         assert isinstance(result, list)
110 |         assert len(result) == 1
111 |         text = result[0].text
112 |         assert "successfully" in text.lower()
113 | 
114 |     @pytest.mark.asyncio
115 |     async def test_store_memory_default_type(self):
116 |         """Test storing memory without explicit type uses default."""
117 |         server = MemoryServer()
118 | 
119 |         result = await server.handle_store_memory({
120 |             "content": "Memory without explicit type",
121 |             "metadata": {"tags": ["test"]}
122 |         })
123 | 
124 |         # Should succeed with default type
125 |         assert isinstance(result, list)
126 |         assert len(result) == 1
127 |         text = result[0].text
128 |         assert "successfully" in text.lower()
129 | 
130 | 
131 | class TestHandleRetrieveMemory:
132 |     """Test suite for handle_retrieve_memory MCP handler."""
133 | 
134 |     @pytest.mark.asyncio
135 |     async def test_retrieve_memory_success(self):
136 |         """Test retrieving memories with valid query."""
137 |         server = MemoryServer()
138 | 
139 |         # First store a memory
140 |         await server.handle_store_memory({
141 |             "content": "Searchable test memory for retrieval",
142 |             "metadata": {"tags": ["retrieval-test"], "type": "note"}
143 |         })
144 | 
145 |         # Now retrieve it
146 |         result = await server.handle_retrieve_memory({
147 |             "query": "searchable test memory",
148 |             "n_results": 5
149 |         })
150 | 
151 |         # Verify result structure
152 |         assert isinstance(result, list)
153 |         assert len(result) == 1
154 |         assert isinstance(result[0], types.TextContent)
155 | 
156 |         # Should contain memory data (JSON format)
157 |         text = result[0].text
158 |         assert "searchable test memory" in text.lower() or "retrieval-test" in text.lower()
159 | 
160 |     @pytest.mark.asyncio
161 |     async def test_retrieve_memory_missing_query(self):
162 |         """Test retrieving without query parameter returns error."""
163 |         server = MemoryServer()
164 | 
165 |         result = await server.handle_retrieve_memory({
166 |             "n_results": 5
167 |         })
168 | 
169 |         # Verify error message
170 |         assert isinstance(result, list)
171 |         assert len(result) == 1
172 |         text = result[0].text
173 |         assert "error" in text.lower()
174 |         assert "query" in text.lower()
175 | 
176 | 
177 | class TestHandleSearchByTag:
178 |     """Test suite for handle_search_by_tag MCP handler."""
179 | 
180 |     @pytest.mark.asyncio
181 |     async def test_search_by_tag_success(self):
182 |         """Test searching by tag returns matching memories."""
183 |         server = MemoryServer()
184 | 
185 |         # Store a memory with specific tag
186 |         await server.handle_store_memory({
187 |             "content": "Memory with unique tag for search",
188 |             "metadata": {"tags": ["unique-search-tag"], "type": "note"}
189 |         })
190 | 
191 |         # Search by tag
192 |         result = await server.handle_search_by_tag({
193 |             "tags": ["unique-search-tag"]
194 |         })
195 | 
196 |         # Verify result structure
197 |         assert isinstance(result, list)
198 |         assert len(result) == 1
199 |         assert isinstance(result[0], types.TextContent)
200 | 
201 |         # Should contain memory data
202 |         text = result[0].text
203 |         assert "unique-search-tag" in text.lower() or "memory with unique tag" in text.lower()
204 | 
205 |     @pytest.mark.asyncio
206 |     async def test_search_by_tag_missing_tags(self):
207 |         """Test searching without tags parameter returns error."""
208 |         server = MemoryServer()
209 | 
210 |         result = await server.handle_search_by_tag({})
211 | 
212 |         # Verify error message
213 |         assert isinstance(result, list)
214 |         assert len(result) == 1
215 |         text = result[0].text
216 |         assert "error" in text.lower()
217 |         assert "tags" in text.lower()
218 | 
219 | 
220 | # Regression test for issue #198
221 | class TestIssue198Regression:
222 |     """Regression tests specifically for issue #198 - Response format bug."""
223 | 
224 |     @pytest.mark.asyncio
225 |     async def test_no_keyerror_on_store_success(self):
226 |         """Verify fix for issue #198: No KeyError on successful store."""
227 |         server = MemoryServer()
228 | 
229 |         # This would previously raise KeyError: 'message'
230 |         result = await server.handle_store_memory({
231 |             "content": "Test for issue 198 regression",
232 |             "metadata": {"tags": ["issue-198"], "type": "test"}
233 |         })
234 | 
235 |         # Should return success message without KeyError
236 |         assert isinstance(result, list)
237 |         assert len(result) == 1
238 |         assert "successfully" in result[0].text.lower()
239 |         # Should NOT contain the string "message" (old buggy behavior)
240 |         assert result[0].text != "Error storing memory: 'message'"
241 | 
242 |     @pytest.mark.asyncio
243 |     async def test_error_handling_without_keyerror(self):
244 |         """Verify fix for issue #198: Errors handled without KeyError."""
245 |         server = MemoryServer()
246 | 
247 |         # Store with empty content (triggers error path)
248 |         result = await server.handle_store_memory({
249 |             "content": "",
250 |             "metadata": {}
251 |         })
252 | 
253 |         # Should return error message without KeyError
254 |         assert isinstance(result, list)
255 |         assert len(result) == 1
256 |         assert "error" in result[0].text.lower()
257 |         # Should NOT be KeyError message
258 |         assert "'message'" not in result[0].text
259 | 
```