This is page 54 of 62. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .claude
│ ├── agents
│ │ ├── amp-bridge.md
│ │ ├── amp-pr-automator.md
│ │ ├── code-quality-guard.md
│ │ ├── gemini-pr-automator.md
│ │ └── github-release-manager.md
│ ├── commands
│ │ ├── README.md
│ │ ├── refactor-function
│ │ ├── refactor-function-prod
│ │ └── refactor-function.md
│ ├── consolidation-fix-handoff.md
│ ├── consolidation-hang-fix-summary.md
│ ├── directives
│ │ ├── agents.md
│ │ ├── code-quality-workflow.md
│ │ ├── consolidation-details.md
│ │ ├── development-setup.md
│ │ ├── hooks-configuration.md
│ │ ├── memory-first.md
│ │ ├── memory-tagging.md
│ │ ├── pr-workflow.md
│ │ ├── quality-system-details.md
│ │ ├── README.md
│ │ ├── refactoring-checklist.md
│ │ ├── storage-backends.md
│ │ └── version-management.md
│ ├── prompts
│ │ └── hybrid-cleanup-integration.md
│ ├── settings.local.json.backup
│ └── settings.local.json.local
├── .commit-message
├── .coveragerc
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── feature_request.yml
│ │ └── performance_issue.yml
│ ├── pull_request_template.md
│ └── workflows
│ ├── bridge-tests.yml
│ ├── CACHE_FIX.md
│ ├── claude-branch-automation.yml
│ ├── claude-code-review.yml
│ ├── claude.yml
│ ├── cleanup-images.yml.disabled
│ ├── dev-setup-validation.yml
│ ├── docker-publish.yml
│ ├── dockerfile-lint.yml
│ ├── LATEST_FIXES.md
│ ├── main-optimized.yml.disabled
│ ├── main.yml
│ ├── publish-and-test.yml
│ ├── publish-dual.yml
│ ├── README_OPTIMIZATION.md
│ ├── release-tag.yml.disabled
│ ├── release.yml
│ ├── roadmap-review-reminder.yml
│ ├── SECRET_CONDITIONAL_FIX.md
│ └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .metrics
│ ├── baseline_cc_install_hooks.txt
│ ├── baseline_mi_install_hooks.txt
│ ├── baseline_nesting_install_hooks.txt
│ ├── BASELINE_REPORT.md
│ ├── COMPLEXITY_COMPARISON.txt
│ ├── QUICK_REFERENCE.txt
│ ├── README.md
│ ├── REFACTORED_BASELINE.md
│ ├── REFACTORING_COMPLETION_REPORT.md
│ └── TRACKING_TABLE.md
├── .pyscn
│ ├── .gitignore
│ └── reports
│ └── analyze_20251123_214224.html
├── AGENTS.md
├── ai-optimized-tool-descriptions.py
├── archive
│ ├── deployment
│ │ ├── deploy_fastmcp_fixed.sh
│ │ ├── deploy_http_with_mcp.sh
│ │ └── deploy_mcp_v4.sh
│ ├── deployment-configs
│ │ ├── empty_config.yml
│ │ └── smithery.yaml
│ ├── development
│ │ └── test_fastmcp.py
│ ├── docs-removed-2025-08-23
│ │ ├── authentication.md
│ │ ├── claude_integration.md
│ │ ├── claude-code-compatibility.md
│ │ ├── claude-code-integration.md
│ │ ├── claude-code-quickstart.md
│ │ ├── claude-desktop-setup.md
│ │ ├── complete-setup-guide.md
│ │ ├── database-synchronization.md
│ │ ├── development
│ │ │ ├── autonomous-memory-consolidation.md
│ │ │ ├── CLEANUP_PLAN.md
│ │ │ ├── CLEANUP_README.md
│ │ │ ├── CLEANUP_SUMMARY.md
│ │ │ ├── dream-inspired-memory-consolidation.md
│ │ │ ├── hybrid-slm-memory-consolidation.md
│ │ │ ├── mcp-milestone.md
│ │ │ ├── multi-client-architecture.md
│ │ │ ├── test-results.md
│ │ │ └── TIMESTAMP_FIX_SUMMARY.md
│ │ ├── distributed-sync.md
│ │ ├── invocation_guide.md
│ │ ├── macos-intel.md
│ │ ├── master-guide.md
│ │ ├── mcp-client-configuration.md
│ │ ├── multi-client-server.md
│ │ ├── service-installation.md
│ │ ├── sessions
│ │ │ └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│ │ ├── UBUNTU_SETUP.md
│ │ ├── ubuntu.md
│ │ ├── windows-setup.md
│ │ └── windows.md
│ ├── docs-root-cleanup-2025-08-23
│ │ ├── AWESOME_LIST_SUBMISSION.md
│ │ ├── CLOUDFLARE_IMPLEMENTATION.md
│ │ ├── DOCUMENTATION_ANALYSIS.md
│ │ ├── DOCUMENTATION_CLEANUP_PLAN.md
│ │ ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│ │ ├── LITESTREAM_SETUP_GUIDE.md
│ │ ├── lm_studio_system_prompt.md
│ │ ├── PYTORCH_DOWNLOAD_FIX.md
│ │ └── README-ORIGINAL-BACKUP.md
│ ├── investigations
│ │ └── MACOS_HOOKS_INVESTIGATION.md
│ ├── litestream-configs-v6.3.0
│ │ ├── install_service.sh
│ │ ├── litestream_master_config_fixed.yml
│ │ ├── litestream_master_config.yml
│ │ ├── litestream_replica_config_fixed.yml
│ │ ├── litestream_replica_config.yml
│ │ ├── litestream_replica_simple.yml
│ │ ├── litestream-http.service
│ │ ├── litestream.service
│ │ └── requirements-cloudflare.txt
│ ├── release-notes
│ │ └── release-notes-v7.1.4.md
│ └── setup-development
│ ├── README.md
│ ├── setup_consolidation_mdns.sh
│ ├── STARTUP_SETUP_GUIDE.md
│ └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│ ├── memory-context.md
│ ├── memory-health.md
│ ├── memory-ingest-dir.md
│ ├── memory-ingest.md
│ ├── memory-recall.md
│ ├── memory-search.md
│ ├── memory-store.md
│ ├── README.md
│ └── session-start.md
├── claude-hooks
│ ├── config.json
│ ├── config.template.json
│ ├── CONFIGURATION.md
│ ├── core
│ │ ├── auto-capture-hook.js
│ │ ├── auto-capture-hook.ps1
│ │ ├── memory-retrieval.js
│ │ ├── mid-conversation.js
│ │ ├── permission-request.js
│ │ ├── session-end.js
│ │ ├── session-start.js
│ │ └── topic-change.js
│ ├── debug-pattern-test.js
│ ├── install_claude_hooks_windows.ps1
│ ├── install_hooks.py
│ ├── memory-mode-controller.js
│ ├── MIGRATION.md
│ ├── README-AUTO-CAPTURE.md
│ ├── README-NATURAL-TRIGGERS.md
│ ├── README-PERMISSION-REQUEST.md
│ ├── README-phase2.md
│ ├── README.md
│ ├── simple-test.js
│ ├── statusline.sh
│ ├── test-adaptive-weights.js
│ ├── test-dual-protocol-hook.js
│ ├── test-mcp-hook.js
│ ├── test-natural-triggers.js
│ ├── test-recency-scoring.js
│ ├── tests
│ │ ├── integration-test.js
│ │ ├── phase2-integration-test.js
│ │ ├── test-code-execution.js
│ │ ├── test-cross-session.json
│ │ ├── test-permission-request.js
│ │ ├── test-session-tracking.json
│ │ └── test-threading.json
│ ├── utilities
│ │ ├── adaptive-pattern-detector.js
│ │ ├── auto-capture-patterns.js
│ │ ├── context-formatter.js
│ │ ├── context-shift-detector.js
│ │ ├── conversation-analyzer.js
│ │ ├── dynamic-context-updater.js
│ │ ├── git-analyzer.js
│ │ ├── mcp-client.js
│ │ ├── memory-client.js
│ │ ├── memory-scorer.js
│ │ ├── performance-manager.js
│ │ ├── project-detector.js
│ │ ├── session-cache.json
│ │ ├── session-tracker.js
│ │ ├── tiered-conversation-monitor.js
│ │ ├── user-override-detector.js
│ │ └── version-checker.js
│ └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── COMMIT_MESSAGE.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│ ├── amp-cli-bridge.md
│ ├── api
│ │ ├── code-execution-interface.md
│ │ ├── memory-metadata-api.md
│ │ ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_REPORT.md
│ │ └── tag-standardization.md
│ ├── architecture
│ │ ├── graph-database-design.md
│ │ ├── search-enhancement-spec.md
│ │ └── search-examples.md
│ ├── architecture.md
│ ├── archive
│ │ └── obsolete-workflows
│ │ ├── load_memory_context.md
│ │ └── README.md
│ ├── assets
│ │ └── images
│ │ ├── dashboard-v3.3.0-preview.png
│ │ ├── memory-awareness-hooks-example.png
│ │ ├── project-infographic.svg
│ │ └── README.md
│ ├── CLAUDE_CODE_QUICK_REFERENCE.md
│ ├── cloudflare-setup.md
│ ├── demo-recording-script.md
│ ├── deployment
│ │ ├── docker.md
│ │ ├── dual-service.md
│ │ ├── production-guide.md
│ │ └── systemd-service.md
│ ├── development
│ │ ├── ai-agent-instructions.md
│ │ ├── code-quality
│ │ │ ├── phase-2a-completion.md
│ │ │ ├── phase-2a-handle-get-prompt.md
│ │ │ ├── phase-2a-index.md
│ │ │ ├── phase-2a-install-package.md
│ │ │ └── phase-2b-session-summary.md
│ │ ├── code-quality-workflow.md
│ │ ├── dashboard-workflow.md
│ │ ├── issue-management.md
│ │ ├── pr-280-post-mortem.md
│ │ ├── pr-review-guide.md
│ │ ├── refactoring-notes.md
│ │ ├── release-checklist.md
│ │ └── todo-tracker.md
│ ├── docker-optimized-build.md
│ ├── document-ingestion.md
│ ├── DOCUMENTATION_AUDIT.md
│ ├── enhancement-roadmap-issue-14.md
│ ├── examples
│ │ ├── analysis-scripts.js
│ │ ├── maintenance-session-example.md
│ │ ├── memory-distribution-chart.jsx
│ │ ├── quality-system-configs.md
│ │ └── tag-schema.json
│ ├── features
│ │ └── association-quality-boost.md
│ ├── first-time-setup.md
│ ├── glama-deployment.md
│ ├── guides
│ │ ├── advanced-command-examples.md
│ │ ├── chromadb-migration.md
│ │ ├── commands-vs-mcp-server.md
│ │ ├── mcp-enhancements.md
│ │ ├── mdns-service-discovery.md
│ │ ├── memory-consolidation-guide.md
│ │ ├── memory-quality-guide.md
│ │ ├── migration.md
│ │ ├── scripts.md
│ │ └── STORAGE_BACKENDS.md
│ ├── HOOK_IMPROVEMENTS.md
│ ├── hooks
│ │ └── phase2-code-execution-migration.md
│ ├── http-server-management.md
│ ├── ide-compatability.md
│ ├── IMAGE_RETENTION_POLICY.md
│ ├── images
│ │ ├── dashboard-placeholder.md
│ │ └── update-restart-demo.png
│ ├── implementation
│ │ ├── health_checks.md
│ │ └── performance.md
│ ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│ ├── integration
│ │ ├── homebrew.md
│ │ └── multi-client.md
│ ├── integrations
│ │ ├── gemini.md
│ │ ├── groq-bridge.md
│ │ ├── groq-integration-summary.md
│ │ └── groq-model-comparison.md
│ ├── integrations.md
│ ├── legacy
│ │ └── dual-protocol-hooks.md
│ ├── LIGHTWEIGHT_ONNX_SETUP.md
│ ├── LM_STUDIO_COMPATIBILITY.md
│ ├── maintenance
│ │ └── memory-maintenance.md
│ ├── mastery
│ │ ├── api-reference.md
│ │ ├── architecture-overview.md
│ │ ├── configuration-guide.md
│ │ ├── local-setup-and-run.md
│ │ ├── testing-guide.md
│ │ └── troubleshooting.md
│ ├── migration
│ │ ├── code-execution-api-quick-start.md
│ │ └── graph-migration-guide.md
│ ├── natural-memory-triggers
│ │ ├── cli-reference.md
│ │ ├── installation-guide.md
│ │ └── performance-optimization.md
│ ├── oauth-setup.md
│ ├── pr-graphql-integration.md
│ ├── quality-system-ui-implementation.md
│ ├── quick-setup-cloudflare-dual-environment.md
│ ├── README.md
│ ├── refactoring
│ │ └── phase-3-3-analysis.md
│ ├── releases
│ │ └── v8.72.0-testing.md
│ ├── remote-configuration-wiki-section.md
│ ├── research
│ │ ├── code-execution-interface-implementation.md
│ │ └── code-execution-interface-summary.md
│ ├── ROADMAP.md
│ ├── sqlite-vec-backend.md
│ ├── statistics
│ │ ├── charts
│ │ │ ├── activity_patterns.png
│ │ │ ├── contributors.png
│ │ │ ├── growth_trajectory.png
│ │ │ ├── monthly_activity.png
│ │ │ └── october_sprint.png
│ │ ├── data
│ │ │ ├── activity_by_day.csv
│ │ │ ├── activity_by_hour.csv
│ │ │ ├── contributors.csv
│ │ │ └── monthly_activity.csv
│ │ ├── generate_charts.py
│ │ └── REPOSITORY_STATISTICS.md
│ ├── technical
│ │ ├── development.md
│ │ ├── memory-migration.md
│ │ ├── migration-log.md
│ │ ├── sqlite-vec-embedding-fixes.md
│ │ └── tag-storage.md
│ ├── testing
│ │ └── regression-tests.md
│ ├── testing-cloudflare-backend.md
│ ├── troubleshooting
│ │ ├── cloudflare-api-token-setup.md
│ │ ├── cloudflare-authentication.md
│ │ ├── database-transfer-migration.md
│ │ ├── general.md
│ │ ├── hooks-quick-reference.md
│ │ ├── memory-management.md
│ │ ├── pr162-schema-caching-issue.md
│ │ ├── session-end-hooks.md
│ │ └── sync-issues.md
│ ├── tutorials
│ │ ├── advanced-techniques.md
│ │ ├── data-analysis.md
│ │ └── demo-session-walkthrough.md
│ ├── wiki-documentation-plan.md
│ └── wiki-Graph-Database-Architecture.md
├── examples
│ ├── claude_desktop_config_template.json
│ ├── claude_desktop_config_windows.json
│ ├── claude-desktop-http-config.json
│ ├── config
│ │ └── claude_desktop_config.json
│ ├── http-mcp-bridge.js
│ ├── memory_export_template.json
│ ├── README.md
│ ├── setup
│ │ └── setup_multi_client_complete.py
│ └── start_https_example.sh
├── IMPLEMENTATION_SUMMARY.md
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── PR_DESCRIPTION.md
├── pyproject-lite.toml
├── pyproject.toml
├── pytest.ini
├── README.md
├── release-notes-v8.61.0.md
├── run_server.py
├── scripts
│ ├── .claude
│ │ └── settings.local.json
│ ├── archive
│ │ └── check_missing_timestamps.py
│ ├── backup
│ │ ├── backup_memories.py
│ │ ├── backup_sqlite_vec.sh
│ │ ├── export_distributable_memories.sh
│ │ └── restore_memories.py
│ ├── benchmarks
│ │ ├── benchmark_code_execution_api.py
│ │ ├── benchmark_hybrid_sync.py
│ │ └── benchmark_server_caching.py
│ ├── ci
│ │ ├── check_dockerfile_args.sh
│ │ └── validate_imports.sh
│ ├── database
│ │ ├── analyze_sqlite_vec_db.py
│ │ ├── check_sqlite_vec_status.py
│ │ ├── db_health_check.py
│ │ └── simple_timestamp_check.py
│ ├── development
│ │ ├── debug_server_initialization.py
│ │ ├── find_orphaned_files.py
│ │ ├── fix_mdns.sh
│ │ ├── fix_sitecustomize.py
│ │ ├── remote_ingest.sh
│ │ ├── setup-git-merge-drivers.sh
│ │ ├── uv-lock-merge.sh
│ │ └── verify_hybrid_sync.py
│ ├── hooks
│ │ └── pre-commit
│ ├── installation
│ │ ├── install_linux_service.py
│ │ ├── install_macos_service.py
│ │ ├── install_uv.py
│ │ ├── install_windows_service.py
│ │ ├── install.py
│ │ ├── setup_backup_cron.sh
│ │ ├── setup_claude_mcp.sh
│ │ └── setup_cloudflare_resources.py
│ ├── linux
│ │ ├── service_status.sh
│ │ ├── start_service.sh
│ │ ├── stop_service.sh
│ │ ├── uninstall_service.sh
│ │ └── view_logs.sh
│ ├── maintenance
│ │ ├── add_project_tags.py
│ │ ├── apply_quality_boost_retroactively.py
│ │ ├── assign_memory_types.py
│ │ ├── auto_retag_memory_merge.py
│ │ ├── auto_retag_memory.py
│ │ ├── backfill_graph_table.py
│ │ ├── check_memory_types.py
│ │ ├── cleanup_association_memories_hybrid.py
│ │ ├── cleanup_association_memories.py
│ │ ├── cleanup_corrupted_encoding.py
│ │ ├── cleanup_low_quality.py
│ │ ├── cleanup_memories.py
│ │ ├── cleanup_organize.py
│ │ ├── consolidate_memory_types.py
│ │ ├── consolidation_mappings.json
│ │ ├── delete_orphaned_vectors_fixed.py
│ │ ├── delete_test_memories.py
│ │ ├── fast_cleanup_duplicates_with_tracking.sh
│ │ ├── find_all_duplicates.py
│ │ ├── find_cloudflare_duplicates.py
│ │ ├── find_duplicates.py
│ │ ├── memory-types.md
│ │ ├── README.md
│ │ ├── recover_timestamps_from_cloudflare.py
│ │ ├── regenerate_embeddings.py
│ │ ├── repair_malformed_tags.py
│ │ ├── repair_memories.py
│ │ ├── repair_sqlite_vec_embeddings.py
│ │ ├── repair_zero_embeddings.py
│ │ ├── restore_from_json_export.py
│ │ ├── retag_valuable_memories.py
│ │ ├── scan_todos.sh
│ │ ├── soft_delete_test_memories.py
│ │ └── sync_status.py
│ ├── migration
│ │ ├── cleanup_mcp_timestamps.py
│ │ ├── legacy
│ │ │ └── migrate_chroma_to_sqlite.py
│ │ ├── mcp-migration.py
│ │ ├── migrate_sqlite_vec_embeddings.py
│ │ ├── migrate_storage.py
│ │ ├── migrate_tags.py
│ │ ├── migrate_timestamps.py
│ │ ├── migrate_to_cloudflare.py
│ │ ├── migrate_to_sqlite_vec.py
│ │ ├── migrate_v5_enhanced.py
│ │ ├── TIMESTAMP_CLEANUP_README.md
│ │ └── verify_mcp_timestamps.py
│ ├── pr
│ │ ├── amp_collect_results.sh
│ │ ├── amp_detect_breaking_changes.sh
│ │ ├── amp_generate_tests.sh
│ │ ├── amp_pr_review.sh
│ │ ├── amp_quality_gate.sh
│ │ ├── amp_suggest_fixes.sh
│ │ ├── auto_review.sh
│ │ ├── detect_breaking_changes.sh
│ │ ├── generate_tests.sh
│ │ ├── lib
│ │ │ └── graphql_helpers.sh
│ │ ├── pre_pr_check.sh
│ │ ├── quality_gate.sh
│ │ ├── resolve_threads.sh
│ │ ├── run_pyscn_analysis.sh
│ │ ├── run_quality_checks_on_files.sh
│ │ ├── run_quality_checks.sh
│ │ ├── thread_status.sh
│ │ └── watch_reviews.sh
│ ├── quality
│ │ ├── bulk_evaluate_onnx.py
│ │ ├── check_test_scores.py
│ │ ├── debug_deberta_scoring.py
│ │ ├── export_deberta_onnx.py
│ │ ├── fix_dead_code_install.sh
│ │ ├── migrate_to_deberta.py
│ │ ├── phase1_dead_code_analysis.md
│ │ ├── phase2_complexity_analysis.md
│ │ ├── README_PHASE1.md
│ │ ├── README_PHASE2.md
│ │ ├── rescore_deberta.py
│ │ ├── rescore_fallback.py
│ │ ├── reset_onnx_scores.py
│ │ ├── track_pyscn_metrics.sh
│ │ └── weekly_quality_review.sh
│ ├── README.md
│ ├── run
│ │ ├── memory_wrapper_cleanup.ps1
│ │ ├── memory_wrapper_cleanup.py
│ │ ├── memory_wrapper_cleanup.sh
│ │ ├── README_CLEANUP_WRAPPER.md
│ │ ├── run_mcp_memory.sh
│ │ ├── run-with-uv.sh
│ │ └── start_sqlite_vec.sh
│ ├── run_memory_server.py
│ ├── server
│ │ ├── check_http_server.py
│ │ ├── check_server_health.py
│ │ ├── memory_offline.py
│ │ ├── preload_models.py
│ │ ├── run_http_server.py
│ │ ├── run_memory_server.py
│ │ ├── start_http_server.bat
│ │ └── start_http_server.sh
│ ├── service
│ │ ├── deploy_dual_services.sh
│ │ ├── http_server_manager.sh
│ │ ├── install_http_service.sh
│ │ ├── mcp-memory-http.service
│ │ ├── mcp-memory.service
│ │ ├── memory_service_manager.sh
│ │ ├── service_control.sh
│ │ ├── service_utils.py
│ │ ├── update_service.sh
│ │ └── windows
│ │ ├── add_watchdog_trigger.ps1
│ │ ├── install_scheduled_task.ps1
│ │ ├── manage_service.ps1
│ │ ├── run_http_server_background.ps1
│ │ ├── uninstall_scheduled_task.ps1
│ │ └── update_and_restart.ps1
│ ├── setup-lightweight.sh
│ ├── sync
│ │ ├── check_drift.py
│ │ ├── claude_sync_commands.py
│ │ ├── export_memories.py
│ │ ├── import_memories.py
│ │ ├── litestream
│ │ │ ├── apply_local_changes.sh
│ │ │ ├── enhanced_memory_store.sh
│ │ │ ├── init_staging_db.sh
│ │ │ ├── io.litestream.replication.plist
│ │ │ ├── manual_sync.sh
│ │ │ ├── memory_sync.sh
│ │ │ ├── pull_remote_changes.sh
│ │ │ ├── push_to_remote.sh
│ │ │ ├── README.md
│ │ │ ├── resolve_conflicts.sh
│ │ │ ├── setup_local_litestream.sh
│ │ │ ├── setup_remote_litestream.sh
│ │ │ ├── staging_db_init.sql
│ │ │ ├── stash_local_changes.sh
│ │ │ ├── sync_from_remote_noconfig.sh
│ │ │ └── sync_from_remote.sh
│ │ ├── README.md
│ │ ├── safe_cloudflare_update.sh
│ │ ├── sync_memory_backends.py
│ │ └── sync_now.py
│ ├── testing
│ │ ├── run_complete_test.py
│ │ ├── run_memory_test.sh
│ │ ├── simple_test.py
│ │ ├── test_cleanup_logic.py
│ │ ├── test_cloudflare_backend.py
│ │ ├── test_docker_functionality.py
│ │ ├── test_installation.py
│ │ ├── test_mdns.py
│ │ ├── test_memory_api.py
│ │ ├── test_memory_simple.py
│ │ ├── test_migration.py
│ │ ├── test_search_api.py
│ │ ├── test_sqlite_vec_embeddings.py
│ │ ├── test_sse_events.py
│ │ ├── test-connection.py
│ │ └── test-hook.js
│ ├── update_and_restart.sh
│ ├── utils
│ │ ├── claude_commands_utils.py
│ │ ├── detect_platform.py
│ │ ├── generate_personalized_claude_md.sh
│ │ ├── groq
│ │ ├── groq_agent_bridge.py
│ │ ├── list-collections.py
│ │ ├── memory_wrapper_uv.py
│ │ ├── query_memories.py
│ │ ├── README_detect_platform.md
│ │ ├── smithery_wrapper.py
│ │ ├── test_groq_bridge.sh
│ │ └── uv_wrapper.py
│ └── validation
│ ├── check_dev_setup.py
│ ├── check_documentation_links.py
│ ├── check_handler_coverage.py
│ ├── diagnose_backend_config.py
│ ├── validate_configuration_complete.py
│ ├── validate_graph_tools.py
│ ├── validate_memories.py
│ ├── validate_migration.py
│ ├── validate_timestamp_integrity.py
│ ├── verify_environment.py
│ ├── verify_pytorch_windows.py
│ └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│ └── mcp_memory_service
│ ├── __init__.py
│ ├── _version.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── operations.py
│ │ ├── sync_wrapper.py
│ │ └── types.py
│ ├── backup
│ │ ├── __init__.py
│ │ └── scheduler.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── ingestion.py
│ │ ├── main.py
│ │ └── utils.py
│ ├── config.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── associations.py
│ │ ├── base.py
│ │ ├── clustering.py
│ │ ├── compression.py
│ │ ├── consolidator.py
│ │ ├── decay.py
│ │ ├── forgetting.py
│ │ ├── health.py
│ │ └── scheduler.py
│ ├── dependency_check.py
│ ├── discovery
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── mdns_service.py
│ ├── embeddings
│ │ ├── __init__.py
│ │ └── onnx_embeddings.py
│ ├── ingestion
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── chunker.py
│ │ ├── csv_loader.py
│ │ ├── json_loader.py
│ │ ├── pdf_loader.py
│ │ ├── registry.py
│ │ ├── semtools_loader.py
│ │ └── text_loader.py
│ ├── lm_studio_compat.py
│ ├── mcp_server.py
│ ├── models
│ │ ├── __init__.py
│ │ └── memory.py
│ ├── quality
│ │ ├── __init__.py
│ │ ├── ai_evaluator.py
│ │ ├── async_scorer.py
│ │ ├── config.py
│ │ ├── implicit_signals.py
│ │ ├── metadata_codec.py
│ │ ├── onnx_ranker.py
│ │ └── scorer.py
│ ├── server
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── cache_manager.py
│ │ ├── client_detection.py
│ │ ├── environment.py
│ │ ├── handlers
│ │ │ ├── __init__.py
│ │ │ ├── consolidation.py
│ │ │ ├── documents.py
│ │ │ ├── graph.py
│ │ │ ├── memory.py
│ │ │ ├── quality.py
│ │ │ └── utility.py
│ │ └── logging_config.py
│ ├── server_impl.py
│ ├── services
│ │ ├── __init__.py
│ │ └── memory_service.py
│ ├── storage
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cloudflare.py
│ │ ├── factory.py
│ │ ├── graph.py
│ │ ├── http_client.py
│ │ ├── hybrid.py
│ │ ├── migrations
│ │ │ └── 008_add_graph_table.sql
│ │ └── sqlite_vec.py
│ ├── sync
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── importer.py
│ │ └── litestream_config.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cache_manager.py
│ │ ├── content_splitter.py
│ │ ├── db_utils.py
│ │ ├── debug.py
│ │ ├── directory_ingestion.py
│ │ ├── document_processing.py
│ │ ├── gpu_detection.py
│ │ ├── hashing.py
│ │ ├── health_check.py
│ │ ├── http_server_manager.py
│ │ ├── port_detection.py
│ │ ├── quality_analytics.py
│ │ ├── startup_orchestrator.py
│ │ ├── system_detection.py
│ │ └── time_parser.py
│ └── web
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── analytics.py
│ │ ├── backup.py
│ │ ├── consolidation.py
│ │ ├── documents.py
│ │ ├── events.py
│ │ ├── health.py
│ │ ├── manage.py
│ │ ├── mcp.py
│ │ ├── memories.py
│ │ ├── quality.py
│ │ ├── search.py
│ │ └── sync.py
│ ├── app.py
│ ├── dependencies.py
│ ├── oauth
│ │ ├── __init__.py
│ │ ├── authorization.py
│ │ ├── discovery.py
│ │ ├── middleware.py
│ │ ├── models.py
│ │ ├── registration.py
│ │ └── storage.py
│ ├── sse.py
│ └── static
│ ├── app.js
│ ├── i18n
│ │ ├── de.json
│ │ ├── en.json
│ │ ├── es.json
│ │ ├── fr.json
│ │ ├── ja.json
│ │ ├── ko.json
│ │ └── zh.json
│ ├── index.html
│ ├── README.md
│ ├── sse_test.html
│ └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── TESTING_NOTES.md
├── tests
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── test_compact_types.py
│ │ └── test_operations.py
│ ├── bridge
│ │ ├── mock_responses.js
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ └── test_http_mcp_bridge.js
│ ├── conftest.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_associations.py
│ │ ├── test_clustering.py
│ │ ├── test_compression.py
│ │ ├── test_consolidator.py
│ │ ├── test_decay.py
│ │ ├── test_forgetting.py
│ │ └── test_graph_modes.py
│ ├── contracts
│ │ └── api-specification.yml
│ ├── integration
│ │ ├── conftest.py
│ │ ├── HANDLER_COVERAGE_REPORT.md
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ ├── test_all_memory_handlers.py
│ │ ├── test_api_key_fallback.py
│ │ ├── test_api_memories_chronological.py
│ │ ├── test_api_tag_time_search.py
│ │ ├── test_api_with_memory_service.py
│ │ ├── test_bridge_integration.js
│ │ ├── test_cli_interfaces.py
│ │ ├── test_cloudflare_connection.py
│ │ ├── test_concurrent_clients.py
│ │ ├── test_data_serialization_consistency.py
│ │ ├── test_http_server_startup.py
│ │ ├── test_mcp_memory.py
│ │ ├── test_mdns_integration.py
│ │ ├── test_oauth_basic_auth.py
│ │ ├── test_oauth_flow.py
│ │ ├── test_server_handlers.py
│ │ └── test_store_memory.py
│ ├── performance
│ │ ├── test_background_sync.py
│ │ └── test_hybrid_live.py
│ ├── README.md
│ ├── smithery
│ │ └── test_smithery.py
│ ├── sqlite
│ │ └── simple_sqlite_vec_test.py
│ ├── storage
│ │ ├── conftest.py
│ │ └── test_graph_storage.py
│ ├── test_client.py
│ ├── test_content_splitting.py
│ ├── test_database.py
│ ├── test_deberta_quality.py
│ ├── test_fallback_quality.py
│ ├── test_graph_traversal.py
│ ├── test_hybrid_cloudflare_limits.py
│ ├── test_hybrid_storage.py
│ ├── test_lightweight_onnx.py
│ ├── test_memory_ops.py
│ ├── test_memory_wrapper_cleanup.py
│ ├── test_quality_integration.py
│ ├── test_quality_system.py
│ ├── test_semantic_search.py
│ ├── test_sqlite_vec_storage.py
│ ├── test_time_parser.py
│ ├── test_timestamp_preservation.py
│ ├── timestamp
│ │ ├── test_hook_vs_manual_storage.py
│ │ ├── test_issue99_final_validation.py
│ │ ├── test_search_retrieval_inconsistency.py
│ │ ├── test_timestamp_issue.py
│ │ └── test_timestamp_simple.py
│ └── unit
│ ├── conftest.py
│ ├── test_cloudflare_storage.py
│ ├── test_csv_loader.py
│ ├── test_fastapi_dependencies.py
│ ├── test_import.py
│ ├── test_imports.py
│ ├── test_json_loader.py
│ ├── test_mdns_simple.py
│ ├── test_mdns.py
│ ├── test_memory_service.py
│ ├── test_memory.py
│ ├── test_semtools_loader.py
│ ├── test_storage_interface_compatibility.py
│ ├── test_tag_time_filtering.py
│ └── test_uv_no_pip_installer_fallback.py
├── tools
│ ├── docker
│ │ ├── DEPRECATED.md
│ │ ├── docker-compose.http.yml
│ │ ├── docker-compose.pythonpath.yml
│ │ ├── docker-compose.standalone.yml
│ │ ├── docker-compose.uv.yml
│ │ ├── docker-compose.yml
│ │ ├── docker-entrypoint-persistent.sh
│ │ ├── docker-entrypoint-unified.sh
│ │ ├── docker-entrypoint.sh
│ │ ├── Dockerfile
│ │ ├── Dockerfile.glama
│ │ ├── Dockerfile.slim
│ │ ├── README.md
│ │ └── test-docker-modes.sh
│ └── README.md
├── uv.lock
└── verify_compression.sh
```
# Files
--------------------------------------------------------------------------------
/src/mcp_memory_service/storage/hybrid.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | Hybrid memory storage backend for MCP Memory Service.
17 |
18 | This implementation provides the best of both worlds:
19 | - SQLite-vec as primary storage for ultra-fast reads (~5ms)
20 | - Cloudflare as secondary storage for cloud persistence and multi-device sync
21 | - Background synchronization service for seamless integration
22 | - Graceful degradation when cloud services are unavailable
23 | """
24 |
25 | import asyncio
26 | import logging
27 | import time
28 | from typing import List, Dict, Any, Tuple, Optional
29 | from collections import deque
30 | from dataclasses import dataclass
31 | from datetime import date
32 |
33 | from .base import MemoryStorage
34 | from .sqlite_vec import SqliteVecMemoryStorage
35 | from .cloudflare import CloudflareStorage
36 | from ..models.memory import Memory, MemoryQueryResult
37 |
38 | # Import SSE for real-time progress updates
39 | try:
40 | from ..web.sse import sse_manager, create_sync_progress_event, create_sync_completed_event
41 | SSE_AVAILABLE = True
42 | except ImportError:
43 | SSE_AVAILABLE = False
44 |
45 | # Import config to check if limit constants are available
46 | from .. import config as app_config
47 |
48 | # Use getattr to provide fallbacks if attributes don't exist (prevents duplicate defaults)
49 | # Defensive None checks to prevent comparison errors (Issue #316)
50 | _d1_max = getattr(app_config, 'CLOUDFLARE_D1_MAX_SIZE_GB', 10)
51 | CLOUDFLARE_D1_MAX_SIZE_GB = _d1_max if _d1_max is not None else 10
52 |
53 | _vec_max = getattr(app_config, 'CLOUDFLARE_VECTORIZE_MAX_VECTORS', 5_000_000)
54 | CLOUDFLARE_VECTORIZE_MAX_VECTORS = _vec_max if _vec_max is not None else 5_000_000
55 |
56 | _meta_max = getattr(app_config, 'CLOUDFLARE_MAX_METADATA_SIZE_KB', 10)
57 | CLOUDFLARE_MAX_METADATA_SIZE_KB = _meta_max if _meta_max is not None else 10
58 |
59 | _warn_thresh = getattr(app_config, 'CLOUDFLARE_WARNING_THRESHOLD_PERCENT', 80)
60 | CLOUDFLARE_WARNING_THRESHOLD_PERCENT = _warn_thresh if _warn_thresh is not None else 80
61 |
62 | _crit_thresh = getattr(app_config, 'CLOUDFLARE_CRITICAL_THRESHOLD_PERCENT', 95)
63 | CLOUDFLARE_CRITICAL_THRESHOLD_PERCENT = _crit_thresh if _crit_thresh is not None else 95
64 | _sync_startup = getattr(app_config, 'HYBRID_SYNC_ON_STARTUP', True)
65 | HYBRID_SYNC_ON_STARTUP = _sync_startup if _sync_startup is not None else True
66 |
67 | _max_len = getattr(app_config, 'HYBRID_MAX_CONTENT_LENGTH', 800)
68 | HYBRID_MAX_CONTENT_LENGTH = _max_len if _max_len is not None else 800
69 |
70 | _max_empty = getattr(app_config, 'HYBRID_MAX_EMPTY_BATCHES', 20)
71 | HYBRID_MAX_EMPTY_BATCHES = _max_empty if _max_empty is not None else 20
72 |
73 | _min_check = getattr(app_config, 'HYBRID_MIN_CHECK_COUNT', 1000)
74 | HYBRID_MIN_CHECK_COUNT = _min_check if _min_check is not None else 1000
75 |
76 | logger = logging.getLogger(__name__)
77 |
78 | @dataclass
79 | class SyncOperation:
80 | """Represents a pending sync operation."""
81 | operation: str # 'store', 'delete', 'update', 'delete_by_timeframe', 'delete_before_date'
82 | memory: Optional[Memory] = None
83 | content_hash: Optional[str] = None
84 | updates: Optional[Dict[str, Any]] = None
85 | preserve_timestamps: bool = True
86 | start_date: Optional[date] = None
87 | end_date: Optional[date] = None
88 | before_date: Optional[date] = None
89 | tag: Optional[str] = None
90 | timestamp: float = None
91 | retries: int = 0
92 | max_retries: int = 3
93 |
94 | def __post_init__(self):
95 | if self.timestamp is None:
96 | self.timestamp = time.time()
97 |
98 |
99 | def _normalize_metadata_for_cloudflare(updates: Dict[str, Any]) -> Dict[str, Any]:
100 | """
101 | Normalize metadata updates for Cloudflare storage backend.
102 |
103 | Cloudflare expects metadata fields wrapped in a 'metadata' key,
104 | while SQLite-vec accepts flat fields. This function transforms
105 | flat metadata fields into the wrapped format and applies CSV
106 | compression for quality/consolidation metadata (60% size reduction).
107 |
108 | Args:
109 | updates: Raw updates dict (may be flat or already wrapped)
110 |
111 | Returns:
112 | Normalized and compressed updates dict suitable for Cloudflare
113 | """
114 | # Protected top-level keys that Cloudflare recognizes
115 | cloudflare_keys = {"metadata", "memory_type", "tags", "created_at",
116 | "created_at_iso", "updated_at", "updated_at_iso"}
117 |
118 | # If updates already has 'metadata' key, apply compression
119 | if "metadata" in updates:
120 | from ..quality.metadata_codec import compress_metadata_for_sync
121 | compressed = updates.copy()
122 | compressed["metadata"] = compress_metadata_for_sync(updates["metadata"])
123 | return compressed
124 |
125 | # Separate Cloudflare-recognized keys from metadata fields
126 | wrapped_updates = {}
127 | metadata_fields = {}
128 |
129 | for key, value in updates.items():
130 | if key in cloudflare_keys:
131 | wrapped_updates[key] = value
132 | else:
133 | # These are metadata fields that need wrapping
134 | metadata_fields[key] = value
135 |
136 | # Only add metadata wrapper if there are fields to wrap
137 | if metadata_fields:
138 | from ..quality.metadata_codec import compress_metadata_for_sync
139 | wrapped_updates["metadata"] = compress_metadata_for_sync(metadata_fields)
140 |
141 | return wrapped_updates
142 |
143 |
144 | class BackgroundSyncService:
145 | """
146 | Handles background synchronization between SQLite-vec and Cloudflare.
147 |
148 | Features:
149 | - Asynchronous operation queue
150 | - Retry logic with exponential backoff
151 | - Health monitoring and error handling
152 | - Configurable sync intervals and batch sizes
153 | - Graceful degradation when cloud is unavailable
154 | """
155 |
156 | def __init__(self,
157 | primary_storage: SqliteVecMemoryStorage,
158 | secondary_storage: CloudflareStorage,
159 | sync_interval: int = None, # Use config default if None
160 | batch_size: int = None, # Use config default if None
161 | max_queue_size: int = None): # Use config default if None
162 | self.primary = primary_storage
163 | self.secondary = secondary_storage
164 |
165 | # Use config values if parameters not provided (for backward compatibility)
166 | # Defensive None checks to prevent Queue initialization with maxsize=None (Issue #316)
167 | sync_int = sync_interval if sync_interval is not None else getattr(app_config, 'HYBRID_SYNC_INTERVAL', 300)
168 | self.sync_interval = sync_int if sync_int is not None else 300
169 |
170 | batch = batch_size if batch_size is not None else getattr(app_config, 'HYBRID_BATCH_SIZE', 100)
171 | self.batch_size = batch if batch is not None else 100
172 |
173 | queue_size = max_queue_size if max_queue_size is not None else getattr(app_config, 'HYBRID_QUEUE_SIZE', 2000)
174 | self.max_queue_size = queue_size if queue_size is not None else 2000
175 |
176 | # Sync queues and state
177 | self.operation_queue = asyncio.Queue(maxsize=self.max_queue_size)
178 | self.failed_operations = deque(maxlen=100) # Keep track of failed operations
179 | self.is_running = False
180 | self.sync_task = None
181 | self.last_sync_time = 0
182 |
183 | # Drift detection state (v8.25.0+)
184 | self.last_drift_check_time = 0
185 | self.drift_check_enabled = getattr(app_config, 'HYBRID_SYNC_UPDATES', True)
186 | self.drift_check_interval = getattr(app_config, 'HYBRID_DRIFT_CHECK_INTERVAL', 3600)
187 |
188 | # Tombstone purge state (v8.64.0+)
189 | self.last_purge_time = 0
190 | self.purge_interval = 86400 # Daily purge check (24 hours)
191 | self.tombstone_retention_days = getattr(app_config, 'TOMBSTONE_RETENTION_DAYS', 30)
192 |
193 | self.sync_stats = {
194 | 'operations_processed': 0,
195 | 'operations_failed': 0,
196 | 'last_sync_duration': 0,
197 | 'cloudflare_available': True,
198 | 'last_drift_check': 0,
199 | 'drift_detected_count': 0,
200 | 'drift_synced_count': 0
201 | }
202 |
203 | # Health monitoring
204 | self.consecutive_failures = 0
205 | self.max_consecutive_failures = 5
206 | self.backoff_time = 60 # Start with 1 minute backoff
207 |
208 | # Cloudflare capacity tracking
209 | self.cloudflare_stats = {
210 | 'vector_count': 0,
211 | 'estimated_d1_size_gb': 0,
212 | 'last_capacity_check': 0,
213 | 'approaching_limits': False,
214 | 'limit_warnings': []
215 | }
216 |
217 | async def start(self):
218 | """Start the background sync service."""
219 | if self.is_running:
220 | logger.warning("Background sync service is already running")
221 | return
222 |
223 | self.is_running = True
224 | self.sync_task = asyncio.create_task(self._sync_loop())
225 | logger.info(f"Background sync service started with {self.sync_interval}s interval")
226 |
227 | async def stop(self):
228 | """Stop the background sync service and process remaining operations."""
229 | if not self.is_running:
230 | return
231 |
232 | self.is_running = False
233 |
234 | # Process remaining operations in queue
235 | remaining_operations = []
236 | while not self.operation_queue.empty():
237 | try:
238 | operation = self.operation_queue.get_nowait()
239 | remaining_operations.append(operation)
240 | except asyncio.QueueEmpty:
241 | break
242 |
243 | if remaining_operations:
244 | logger.info(f"Processing {len(remaining_operations)} remaining operations before shutdown")
245 | await self._process_operations_batch(remaining_operations)
246 |
247 | # Cancel the sync task
248 | if self.sync_task:
249 | self.sync_task.cancel()
250 | try:
251 | await self.sync_task
252 | except asyncio.CancelledError:
253 | pass
254 |
255 | logger.info("Background sync service stopped")
256 |
257 | async def enqueue_operation(self, operation: SyncOperation):
258 | """Enqueue a sync operation for background processing."""
259 | try:
260 | # Add 5-second timeout to prevent indefinite blocking (v8.47.1)
261 | await asyncio.wait_for(
262 | self.operation_queue.put(operation),
263 | timeout=5.0
264 | )
265 | logger.debug(f"Enqueued {operation.operation} operation")
266 | except asyncio.TimeoutError:
267 | # Queue full and can't add within timeout - fallback to immediate sync
268 | logger.warning("Sync queue full (timeout), processing operation immediately")
269 | await self._process_single_operation(operation)
270 | except asyncio.QueueFull:
271 | # If queue is full, process immediately to avoid blocking
272 | logger.warning("Sync queue full, processing operation immediately")
273 | await self._process_single_operation(operation)
274 |
275 | async def force_sync(self) -> Dict[str, Any]:
276 | """Force an immediate full synchronization between backends."""
277 | logger.info("Starting forced sync between primary and secondary storage")
278 | sync_start_time = time.time()
279 |
280 | try:
281 | # Get all memories from primary storage
282 | primary_memories = await self.primary.get_all_memories()
283 |
284 | # Check Cloudflare availability
285 | try:
286 | await self.secondary.get_stats() # Simple health check
287 | cloudflare_available = True
288 | except Exception as e:
289 | logger.warning(f"Cloudflare not available during force sync: {e}")
290 | cloudflare_available = False
291 | self.sync_stats['cloudflare_available'] = False
292 | return {
293 | 'status': 'partial',
294 | 'cloudflare_available': False,
295 | 'primary_memories': len(primary_memories),
296 | 'synced_to_secondary': 0,
297 | 'duration': time.time() - sync_start_time
298 | }
299 |
300 | # Sync from primary to secondary using concurrent operations
301 | async def sync_memory(memory):
302 | try:
303 | success, message = await self.secondary.store(memory)
304 | if success:
305 | return True, None
306 | else:
307 | logger.debug(f"Failed to sync memory to secondary: {message}")
308 | return False, message
309 | except Exception as e:
310 | logger.debug(f"Exception syncing memory to secondary: {e}")
311 | return False, str(e)
312 |
313 | # Process memories concurrently in batches
314 | synced_count = 0
315 | failed_count = 0
316 |
317 | # Process in batches to avoid overwhelming the system
318 | batch_size = self.batch_size # Use configured batch size
319 | for i in range(0, len(primary_memories), batch_size):
320 | batch = primary_memories[i:i + batch_size]
321 | results = await asyncio.gather(*[sync_memory(m) for m in batch], return_exceptions=True)
322 |
323 | for result in results:
324 | if isinstance(result, Exception):
325 | failed_count += 1
326 | logger.debug(f"Exception in batch sync: {result}")
327 | elif isinstance(result, tuple):
328 | success, _ = result
329 | if success:
330 | synced_count += 1
331 | else:
332 | failed_count += 1
333 |
334 | sync_duration = time.time() - sync_start_time
335 | self.sync_stats['last_sync_duration'] = sync_duration
336 | self.sync_stats['cloudflare_available'] = cloudflare_available
337 |
338 | logger.info(f"Force sync completed: {synced_count} synced, {failed_count} failed in {sync_duration:.2f}s")
339 |
340 | return {
341 | 'status': 'completed',
342 | 'cloudflare_available': cloudflare_available,
343 | 'primary_memories': len(primary_memories),
344 | 'synced_to_secondary': synced_count,
345 | 'failed_operations': failed_count,
346 | 'duration': sync_duration
347 | }
348 |
349 | except Exception as e:
350 | logger.error(f"Error during force sync: {e}")
351 | return {
352 | 'status': 'error',
353 | 'error': str(e),
354 | 'duration': time.time() - sync_start_time
355 | }
356 |
357 | async def get_sync_status(self) -> Dict[str, Any]:
358 | """Get current sync service status and statistics."""
359 | queue_size = self.operation_queue.qsize()
360 |
361 | status = {
362 | 'is_running': self.is_running,
363 | 'is_paused': not self.is_running,
364 | 'pending_operations': queue_size,
365 | 'queue_size': queue_size,
366 | 'failed_operations': len(self.failed_operations),
367 | 'last_sync_time': self.last_sync_time,
368 | 'consecutive_failures': self.consecutive_failures,
369 | 'stats': self.sync_stats.copy(),
370 | 'operations_processed': self.sync_stats.get('operations_processed', 0),
371 | 'operations_failed': self.sync_stats.get('operations_failed', 0),
372 | 'cloudflare_available': self.sync_stats['cloudflare_available'],
373 | 'sync_interval': self.sync_interval,
374 | 'next_sync_in': max(0, self.sync_interval - (time.time() - self.last_sync_time)),
375 | 'capacity': {
376 | 'vector_count': self.cloudflare_stats['vector_count'],
377 | 'vector_limit': CLOUDFLARE_VECTORIZE_MAX_VECTORS,
378 | 'approaching_limits': self.cloudflare_stats['approaching_limits'],
379 | 'warnings': self.cloudflare_stats['limit_warnings']
380 | }
381 | }
382 |
383 | return status
384 |
385 | async def validate_memory_for_cloudflare(self, memory: Memory) -> Tuple[bool, Optional[str]]:
386 | """
387 | Validate if a memory can be synced to Cloudflare.
388 |
389 | Returns:
390 | Tuple of (is_valid, error_message)
391 | """
392 | # Check metadata size
393 | if memory.metadata:
394 | import json
395 | metadata_json = json.dumps(memory.metadata)
396 | metadata_size_kb = len(metadata_json.encode('utf-8')) / 1024
397 |
398 | if metadata_size_kb > CLOUDFLARE_MAX_METADATA_SIZE_KB:
399 | return False, f"Metadata size {metadata_size_kb:.2f}KB exceeds Cloudflare limit of {CLOUDFLARE_MAX_METADATA_SIZE_KB}KB"
400 |
401 | # Check if we're approaching vector count limit (defensive None check)
402 | vector_count = self.cloudflare_stats.get('vector_count') or 0
403 | if vector_count >= CLOUDFLARE_VECTORIZE_MAX_VECTORS:
404 | return False, f"Cloudflare vector limit of {CLOUDFLARE_VECTORIZE_MAX_VECTORS} reached"
405 |
406 | return True, None
407 |
408 | async def check_cloudflare_capacity(self) -> Dict[str, Any]:
409 | """
410 | Check remaining Cloudflare capacity and return status.
411 | """
412 | try:
413 | # Get current stats from Cloudflare
414 | cf_stats = await self.secondary.get_stats()
415 |
416 | # Update our tracking (use 'or 0' to handle None values - v8.62.9)
417 | # Try multiple field names for compatibility
418 | vector_count = (cf_stats.get('vector_count') or
419 | cf_stats.get('total_vectors') or
420 | cf_stats.get('total_memories') or 0)
421 | self.cloudflare_stats['vector_count'] = vector_count
422 | self.cloudflare_stats['last_capacity_check'] = time.time()
423 |
424 | # Calculate usage percentages (defensive None check)
425 | vector_usage_percent = (self.cloudflare_stats.get('vector_count', 0) / CLOUDFLARE_VECTORIZE_MAX_VECTORS) * 100
426 |
427 | # Clear previous warnings
428 | self.cloudflare_stats['limit_warnings'] = []
429 |
430 | # Check vector count limits
431 | current_vector_count = self.cloudflare_stats.get('vector_count', 0)
432 | if vector_usage_percent >= CLOUDFLARE_CRITICAL_THRESHOLD_PERCENT:
433 | warning = f"CRITICAL: Vector usage at {vector_usage_percent:.1f}% ({current_vector_count:,}/{CLOUDFLARE_VECTORIZE_MAX_VECTORS:,})"
434 | self.cloudflare_stats['limit_warnings'].append(warning)
435 | logger.error(warning)
436 | self.cloudflare_stats['approaching_limits'] = True
437 | elif vector_usage_percent >= CLOUDFLARE_WARNING_THRESHOLD_PERCENT:
438 | warning = f"WARNING: Vector usage at {vector_usage_percent:.1f}% ({current_vector_count:,}/{CLOUDFLARE_VECTORIZE_MAX_VECTORS:,})"
439 | self.cloudflare_stats['limit_warnings'].append(warning)
440 | logger.warning(warning)
441 | self.cloudflare_stats['approaching_limits'] = True
442 | else:
443 | self.cloudflare_stats['approaching_limits'] = False
444 |
445 | return {
446 | 'vector_count': current_vector_count,
447 | 'vector_limit': CLOUDFLARE_VECTORIZE_MAX_VECTORS,
448 | 'vector_usage_percent': vector_usage_percent,
449 | 'approaching_limits': self.cloudflare_stats.get('approaching_limits', False),
450 | 'warnings': self.cloudflare_stats.get('limit_warnings', [])
451 | }
452 |
453 | except Exception as e:
454 | logger.error(f"Failed to check Cloudflare capacity: {e}")
455 | return {
456 | 'error': str(e),
457 | 'vector_count': self.cloudflare_stats.get('vector_count', 0),
458 | 'vector_limit': CLOUDFLARE_VECTORIZE_MAX_VECTORS,
459 | 'vector_usage_percent': 0,
460 | 'approaching_limits': False,
461 | 'warnings': []
462 | }
463 |
464 | async def _sync_loop(self):
465 | """Main background sync loop."""
466 | logger.info("Background sync loop started")
467 |
468 | while self.is_running:
469 | try:
470 | # Process queued operations
471 | await self._process_operation_queue()
472 |
473 | # Periodic full sync if enough time has passed
474 | current_time = time.time()
475 | if current_time - self.last_sync_time >= self.sync_interval:
476 | await self._periodic_sync()
477 | self.last_sync_time = current_time
478 |
479 | # Daily tombstone purge (v8.64.0+)
480 | if current_time - self.last_purge_time >= self.purge_interval:
481 | await self._purge_old_tombstones()
482 | self.last_purge_time = current_time
483 |
484 | # Sleep before next iteration
485 | await asyncio.sleep(5) # Check every 5 seconds
486 |
487 | except Exception as e:
488 | logger.error(f"Error in sync loop: {e}")
489 | self.consecutive_failures += 1
490 |
491 | if self.consecutive_failures >= self.max_consecutive_failures:
492 | logger.warning(f"Too many consecutive sync failures ({self.consecutive_failures}), backing off for {self.backoff_time}s")
493 | await asyncio.sleep(self.backoff_time)
494 | self.backoff_time = min(self.backoff_time * 2, 1800) # Max 30 minutes
495 | else:
496 | await asyncio.sleep(1)
497 |
498 | async def _process_operation_queue(self):
499 | """Process operations from the queue in batches."""
500 | operations = []
501 |
502 | # Collect up to batch_size operations
503 | for _ in range(self.batch_size):
504 | try:
505 | operation = self.operation_queue.get_nowait()
506 | operations.append(operation)
507 | except asyncio.QueueEmpty:
508 | break
509 |
510 | if operations:
511 | await self._process_operations_batch(operations)
512 |
513 | async def _purge_old_tombstones(self):
514 | """
515 | Purge soft-deleted memories older than retention period.
516 |
517 | This runs daily to clean up tombstones that have been synced to all devices.
518 | Default retention: 30 days (configurable via TOMBSTONE_RETENTION_DAYS).
519 | """
520 | try:
521 | if hasattr(self.primary, 'purge_deleted'):
522 | purged_count = await self.primary.purge_deleted(
523 | older_than_days=self.tombstone_retention_days
524 | )
525 | if purged_count > 0:
526 | logger.info(f"Purged {purged_count} tombstones older than {self.tombstone_retention_days} days")
527 | self.sync_stats['tombstones_purged'] = self.sync_stats.get('tombstones_purged', 0) + purged_count
528 | else:
529 | logger.debug("Primary storage does not support tombstone purging")
530 | except Exception as e:
531 | logger.error(f"Error purging old tombstones: {e}")
532 |
533 | async def _process_operations_batch(self, operations: List[SyncOperation]):
534 | """Process a batch of sync operations."""
535 | logger.debug(f"Processing batch of {len(operations)} sync operations")
536 |
537 | for operation in operations:
538 | try:
539 | await self._process_single_operation(operation)
540 | self.sync_stats['operations_processed'] += 1
541 |
542 | except Exception as e:
543 | await self._handle_sync_error(e, operation)
544 |
545 | async def _handle_sync_error(self, error: Exception, operation: SyncOperation):
546 | """
547 | Handle sync operation errors with intelligent retry logic.
548 |
549 | Args:
550 | error: The exception that occurred
551 | operation: The failed operation
552 | """
553 | error_str = str(error).lower()
554 |
555 | # Check for specific Cloudflare limit errors
556 | is_limit_error = any(term in error_str for term in [
557 | 'limit exceeded', 'quota exceeded', 'maximum', 'too large',
558 | '413', '507', 'insufficient storage', 'capacity'
559 | ])
560 |
561 | if is_limit_error:
562 | # Don't retry limit errors - they won't succeed
563 | logger.error(f"Cloudflare limit error for {operation.operation}: {error}")
564 | self.sync_stats['operations_failed'] += 1
565 |
566 | # Update capacity tracking
567 | self.cloudflare_stats['approaching_limits'] = True
568 | self.cloudflare_stats['limit_warnings'].append(f"Limit error: {error}")
569 |
570 | # Check capacity to understand the issue
571 | await self.check_cloudflare_capacity()
572 | return
573 |
574 | # Check for temporary/network errors
575 | is_temporary_error = any(term in error_str for term in [
576 | 'timeout', 'connection', 'network', '500', '502', '503', '504',
577 | 'temporarily unavailable', 'retry'
578 | ])
579 |
580 | if is_temporary_error or operation.retries < operation.max_retries:
581 | # Retry temporary errors
582 | logger.warning(f"Temporary error for {operation.operation} (retry {operation.retries + 1}/{operation.max_retries}): {error}")
583 | operation.retries += 1
584 |
585 | if operation.retries < operation.max_retries:
586 | # Add back to queue for retry with exponential backoff
587 | await asyncio.sleep(min(2 ** operation.retries, 60)) # Max 60 second delay
588 | self.failed_operations.append(operation)
589 | else:
590 | logger.error(f"Max retries reached for {operation.operation}")
591 | self.sync_stats['operations_failed'] += 1
592 | else:
593 | # Permanent error - don't retry
594 | logger.error(f"Permanent error for {operation.operation}: {error}")
595 | self.sync_stats['operations_failed'] += 1
596 |
597 | async def _process_single_operation(self, operation: SyncOperation):
598 | """Process a single sync operation to secondary storage."""
599 | try:
600 | if operation.operation == 'store' and operation.memory:
601 | # Validate memory before syncing
602 | is_valid, validation_error = await self.validate_memory_for_cloudflare(operation.memory)
603 | if not is_valid:
604 | logger.warning(f"Memory validation failed for sync: {validation_error}")
605 | # Don't retry if it's a hard limit
606 | if "exceeds Cloudflare limit" in validation_error or "limit of" in validation_error:
607 | self.sync_stats['operations_failed'] += 1
608 | return # Skip this memory permanently
609 | raise Exception(validation_error)
610 |
611 | success, message = await self.secondary.store(operation.memory)
612 | if not success:
613 | raise Exception(f"Store operation failed: {message}")
614 |
615 | elif operation.operation == 'delete' and operation.content_hash:
616 | success, message = await self.secondary.delete(operation.content_hash)
617 | if not success:
618 | raise Exception(f"Delete operation failed: {message}")
619 |
620 | elif operation.operation == 'update' and operation.content_hash and operation.updates:
621 | # Validate metadata size before syncing to Cloudflare
622 | if 'metadata' in operation.updates:
623 | import json
624 | metadata_json = json.dumps(operation.updates['metadata'])
625 | metadata_size_kb = len(metadata_json.encode('utf-8')) / 1024
626 |
627 | if metadata_size_kb > 9.5: # 9.5KB safety margin (Cloudflare limit is 10KB)
628 | logger.warning(
629 | f"Skipping Cloudflare sync for {operation.content_hash[:16]}: "
630 | f"metadata too large ({metadata_size_kb:.2f}KB > 9.5KB limit)"
631 | )
632 | self.sync_stats['operations_failed'] += 1
633 | return # Skip this update permanently (too large for Cloudflare)
634 |
635 | # Normalize metadata for Cloudflare backend
636 | normalized_updates = _normalize_metadata_for_cloudflare(operation.updates)
637 |
638 | success, message = await self.secondary.update_memory_metadata(
639 | operation.content_hash,
640 | normalized_updates,
641 | preserve_timestamps=operation.preserve_timestamps
642 | )
643 | if not success:
644 | raise Exception(f"Update operation failed: {message}")
645 |
646 | elif operation.operation == 'delete_by_timeframe':
647 | # Delete memories by timeframe in secondary storage
648 | if operation.start_date and operation.end_date:
649 | success, message = await self.secondary.delete_by_timeframe(
650 | operation.start_date,
651 | operation.end_date,
652 | operation.tag
653 | )
654 | if not success:
655 | raise Exception(f"Delete by timeframe failed: {message}")
656 | self.sync_stats['operations_synced'] += 1
657 | logger.debug(f"Synced delete_by_timeframe: {message}")
658 | else:
659 | raise ValueError("delete_by_timeframe operation missing start_date or end_date")
660 |
661 | elif operation.operation == 'delete_before_date':
662 | # Delete memories before date in secondary storage
663 | if operation.before_date:
664 | success, message = await self.secondary.delete_before_date(
665 | operation.before_date,
666 | operation.tag
667 | )
668 | if not success:
669 | raise Exception(f"Delete before date failed: {message}")
670 | self.sync_stats['operations_synced'] += 1
671 | logger.debug(f"Synced delete_before_date: {message}")
672 | else:
673 | raise ValueError("delete_before_date operation missing before_date")
674 |
675 | # Reset failure counters on success
676 | self.consecutive_failures = 0
677 | self.backoff_time = 60
678 | self.sync_stats['cloudflare_available'] = True
679 |
680 | except Exception as e:
681 | # Mark Cloudflare as potentially unavailable
682 | self.sync_stats['cloudflare_available'] = False
683 | raise
684 |
685 | async def _periodic_sync(self):
686 | """Perform periodic full synchronization."""
687 | logger.debug("Starting periodic sync")
688 |
689 | try:
690 | # Retry any failed operations first
691 | if self.failed_operations:
692 | retry_operations = list(self.failed_operations)
693 | self.failed_operations.clear()
694 | logger.info(f"Retrying {len(retry_operations)} failed operations")
695 | await self._process_operations_batch(retry_operations)
696 |
697 | # Perform a lightweight health check
698 | try:
699 | stats = await self.secondary.get_stats()
700 | logger.debug(f"Secondary storage health check passed: {stats}")
701 | self.sync_stats['cloudflare_available'] = True
702 |
703 | # Check Cloudflare capacity every periodic sync
704 | capacity_status = await self.check_cloudflare_capacity()
705 | if capacity_status.get('approaching_limits'):
706 | logger.warning("Cloudflare approaching capacity limits")
707 | for warning in capacity_status.get('warnings', []):
708 | logger.warning(warning)
709 |
710 | # Periodic drift detection (v8.25.0+)
711 | if self.drift_check_enabled:
712 | time_since_last_check = time.time() - self.last_drift_check_time
713 | if time_since_last_check >= self.drift_check_interval:
714 | logger.info(f"Running periodic drift check (interval: {self.drift_check_interval}s)")
715 | drift_stats = await self._detect_and_sync_drift()
716 | logger.info(f"Drift check complete: {drift_stats}")
717 |
718 | except Exception as e:
719 | logger.warning(f"Secondary storage health check failed: {e}")
720 | self.sync_stats['cloudflare_available'] = False
721 |
722 | except Exception as e:
723 | logger.error(f"Error during periodic sync: {e}")
724 |
725 | async def _detect_and_sync_drift(self, dry_run: bool = False) -> Dict[str, int]:
726 | """
727 | Detect and sync memories with divergent metadata between backends.
728 |
729 | Compares updated_at timestamps to identify metadata drift (tags, types, custom fields)
730 | and synchronizes changes using "newer timestamp wins" strategy.
731 |
732 | Args:
733 | dry_run: If True, detect drift but don't apply changes (preview mode)
734 |
735 | Returns:
736 | Dictionary with drift detection statistics:
737 | - checked: Number of memories examined
738 | - drift_detected: Number of memories with divergent metadata
739 | - synced: Number of memories synchronized
740 | - failed: Number of sync failures
741 | """
742 | if not self.drift_check_enabled:
743 | return {'checked': 0, 'drift_detected': 0, 'synced': 0, 'failed': 0}
744 |
745 | logger.info(f"Starting drift detection scan (dry_run={dry_run})...")
746 | stats = {'checked': 0, 'drift_detected': 0, 'synced': 0, 'failed': 0}
747 |
748 | try:
749 | # Get batch of recently updated memories from Cloudflare
750 | batch_size = getattr(app_config, 'HYBRID_DRIFT_BATCH_SIZE', 100)
751 |
752 | # Strategy: Check memories updated since last drift check
753 | time_threshold = self.last_drift_check_time or (time.time() - self.drift_check_interval)
754 |
755 | # Get recently updated from Cloudflare
756 | if hasattr(self.secondary, 'get_memories_updated_since'):
757 | cf_updated = await self.secondary.get_memories_updated_since(
758 | time_threshold,
759 | limit=batch_size
760 | )
761 | else:
762 | # Fallback: Get recent memories and filter by timestamp
763 | cf_memories = await self.secondary.get_all_memories(limit=batch_size)
764 | cf_updated = [m for m in cf_memories if m.updated_at and m.updated_at >= time_threshold]
765 |
766 | logger.info(f"Found {len(cf_updated)} memories updated in Cloudflare since last check")
767 |
768 | # Compare with local versions
769 | for cf_memory in cf_updated:
770 | stats['checked'] += 1
771 | try:
772 | local_memory = await self.primary.get_by_hash(cf_memory.content_hash)
773 |
774 | if not local_memory:
775 | # Memory missing locally - sync it
776 | stats['drift_detected'] += 1
777 | logger.debug(f"Memory {cf_memory.content_hash[:8]} missing locally, syncing...")
778 | if not dry_run:
779 | success, _ = await self.primary.store(cf_memory)
780 | if success:
781 | stats['synced'] += 1
782 | else:
783 | logger.info(f"[DRY RUN] Would sync missing memory: {cf_memory.content_hash[:8]}")
784 | stats['synced'] += 1
785 | continue
786 |
787 | # Compare updated_at timestamps
788 | cf_updated_at = cf_memory.updated_at or 0
789 | local_updated_at = local_memory.updated_at or 0
790 |
791 | # Allow 1 second tolerance for timestamp precision
792 | if abs(cf_updated_at - local_updated_at) > 1.0:
793 | stats['drift_detected'] += 1
794 | logger.debug(
795 | f"Drift detected for {cf_memory.content_hash[:8]}: "
796 | f"Cloudflare={cf_updated_at:.2f}, Local={local_updated_at:.2f}"
797 | )
798 |
799 | # Use "newer timestamp wins" strategy
800 | if cf_updated_at > local_updated_at:
801 | # Cloudflare is newer - update local
802 | if not dry_run:
803 | success, _ = await self.primary.update_memory_metadata(
804 | cf_memory.content_hash,
805 | {
806 | 'tags': cf_memory.tags,
807 | 'memory_type': cf_memory.memory_type,
808 | 'metadata': cf_memory.metadata,
809 | 'created_at': cf_memory.created_at,
810 | 'created_at_iso': cf_memory.created_at_iso,
811 | 'updated_at': cf_memory.updated_at,
812 | 'updated_at_iso': cf_memory.updated_at_iso,
813 | },
814 | preserve_timestamps=False # Use Cloudflare timestamps
815 | )
816 | if success:
817 | stats['synced'] += 1
818 | logger.info(f"Synced metadata from Cloudflare → local: {cf_memory.content_hash[:8]}")
819 | else:
820 | stats['failed'] += 1
821 | else:
822 | logger.info(f"[DRY RUN] Would sync metadata from Cloudflare → local: {cf_memory.content_hash[:8]}")
823 | stats['synced'] += 1
824 | else:
825 | # Local is newer - update Cloudflare
826 | if not dry_run:
827 | operation = SyncOperation(
828 | operation='update',
829 | content_hash=local_memory.content_hash,
830 | updates={
831 | 'tags': local_memory.tags,
832 | 'memory_type': local_memory.memory_type,
833 | 'metadata': local_memory.metadata,
834 | }
835 | )
836 | await self.enqueue_operation(operation)
837 | stats['synced'] += 1
838 | logger.info(f"Queued metadata sync from local → Cloudflare: {local_memory.content_hash[:8]}")
839 | else:
840 | logger.info(f"[DRY RUN] Would queue metadata sync from local → Cloudflare: {local_memory.content_hash[:8]}")
841 | stats['synced'] += 1
842 |
843 | except Exception as e:
844 | logger.warning(f"Error checking drift for memory: {e}")
845 | stats['failed'] += 1
846 | continue
847 |
848 | # Update tracking
849 | if not dry_run:
850 | self.last_drift_check_time = time.time()
851 | self.sync_stats['last_drift_check'] = self.last_drift_check_time
852 | self.sync_stats['drift_detected_count'] += stats['drift_detected']
853 | self.sync_stats['drift_synced_count'] += stats['synced']
854 |
855 | logger.info(
856 | f"Drift detection complete: checked={stats['checked']}, "
857 | f"drift_detected={stats['drift_detected']}, synced={stats['synced']}, failed={stats['failed']}"
858 | )
859 |
860 | except Exception as e:
861 | logger.error(f"Error during drift detection: {e}")
862 |
863 | return stats
864 |
865 |
866 | class HybridMemoryStorage(MemoryStorage):
867 | """
868 | Hybrid memory storage using SQLite-vec as primary and Cloudflare as secondary.
869 |
870 | This implementation provides:
871 | - Ultra-fast reads and writes (~5ms) via SQLite-vec
872 | - Cloud persistence and multi-device sync via Cloudflare
873 | - Background synchronization with retry logic
874 | - Graceful degradation when cloud services are unavailable
875 | - Full compatibility with the MemoryStorage interface
876 | """
877 |
878 | @property
879 | def max_content_length(self) -> Optional[int]:
880 | """
881 | Maximum content length constrained by Cloudflare secondary storage.
882 | Uses configured hybrid limit (defaults to Cloudflare limit).
883 | """
884 | return HYBRID_MAX_CONTENT_LENGTH
885 |
886 | @property
887 | def supports_chunking(self) -> bool:
888 | """Hybrid backend supports content chunking with metadata linking."""
889 | return True
890 |
891 | def __init__(self,
892 | sqlite_db_path: str,
893 | embedding_model: str = "all-MiniLM-L6-v2",
894 | cloudflare_config: Dict[str, Any] = None,
895 | sync_interval: int = 300,
896 | batch_size: int = 50):
897 | """
898 | Initialize hybrid storage with primary SQLite-vec and secondary Cloudflare.
899 |
900 | Args:
901 | sqlite_db_path: Path to SQLite-vec database file
902 | embedding_model: Embedding model name for SQLite-vec
903 | cloudflare_config: Cloudflare configuration dict
904 | sync_interval: Background sync interval in seconds (default: 5 minutes)
905 | batch_size: Batch size for sync operations (default: 50)
906 | """
907 | self.primary = SqliteVecMemoryStorage(
908 | db_path=sqlite_db_path,
909 | embedding_model=embedding_model
910 | )
911 |
912 | # Initialize Cloudflare storage if config provided
913 | self.secondary = None
914 | self.sync_service = None
915 |
916 | if cloudflare_config and all(key in cloudflare_config for key in
917 | ['api_token', 'account_id', 'vectorize_index', 'd1_database_id']):
918 | self.secondary = CloudflareStorage(**cloudflare_config)
919 | else:
920 | logger.warning("Cloudflare config incomplete, running in SQLite-only mode")
921 |
922 | self.sync_interval = sync_interval
923 | self.batch_size = batch_size
924 | self.initialized = False
925 |
926 | # Initial sync status tracking
927 | self.initial_sync_in_progress = False
928 | self.initial_sync_total = 0
929 | self.initial_sync_completed = 0
930 | self.initial_sync_finished = False
931 |
932 | # Pause state tracking (v8.47.1 - fix consolidation hang)
933 | self._sync_paused = False
934 |
935 | # Track background tasks for proper cleanup (v8.62.9 - fix CI race condition)
936 | self._initial_sync_task: Optional[asyncio.Task] = None
937 |
938 | async def initialize(self) -> None:
939 | """Initialize the hybrid storage system."""
940 | logger.info("Initializing hybrid memory storage...")
941 |
942 | # Always initialize primary storage
943 | await self.primary.initialize()
944 | logger.info("Primary storage (SQLite-vec) initialized")
945 |
946 | # Initialize secondary storage and sync service if available
947 | if self.secondary:
948 | try:
949 | await self.secondary.initialize()
950 | logger.info("Secondary storage (Cloudflare) initialized")
951 |
952 | # Start background sync service
953 | self.sync_service = BackgroundSyncService(
954 | self.primary,
955 | self.secondary,
956 | sync_interval=self.sync_interval,
957 | batch_size=self.batch_size
958 | )
959 | await self.sync_service.start()
960 | logger.info("Background sync service started")
961 |
962 | # Schedule initial sync to run after server startup (non-blocking)
963 | if HYBRID_SYNC_ON_STARTUP:
964 | self._initial_sync_task = asyncio.create_task(self._perform_initial_sync_after_startup())
965 | logger.info("Initial sync scheduled to run after server startup")
966 |
967 | except Exception as e:
968 | logger.warning(f"Failed to initialize secondary storage: {e}")
969 | self.secondary = None
970 |
971 | self.initialized = True
972 | logger.info("Hybrid memory storage initialization completed")
973 |
974 | async def _perform_initial_sync_after_startup(self) -> None:
975 | """
976 | Wrapper for initial sync that waits for server startup to complete.
977 | This allows the web server to be accessible during the sync process.
978 | """
979 | # Wait a bit for server to fully start up
980 | await asyncio.sleep(2)
981 | logger.info("Starting initial sync in background (server is now accessible)")
982 | await self._perform_initial_sync()
983 |
984 | async def _sync_memories_from_cloudflare(
985 | self,
986 | sync_type: str = "initial",
987 | broadcast_sse: bool = True,
988 | enable_drift_check: bool = True
989 | ) -> Dict[str, Any]:
990 | """
991 | Shared logic for syncing memories FROM Cloudflare TO local storage.
992 |
993 | Args:
994 | sync_type: Type of sync ("initial" or "manual") for logging/SSE events
995 | broadcast_sse: Whether to broadcast SSE progress events
996 | enable_drift_check: Whether to check for metadata drift (only for initial sync)
997 |
998 | Returns:
999 | Dict with:
1000 | - success: bool
1001 | - memories_synced: int
1002 | - total_checked: int
1003 | - message: str
1004 | - time_taken_seconds: float
1005 | """
1006 | import time
1007 | sync_start_time = time.time()
1008 |
1009 | try:
1010 | # Get memory count from both storages to compare
1011 | primary_stats = await self.primary.get_stats()
1012 | secondary_stats = await self.secondary.get_stats()
1013 |
1014 | # Use 'or 0' to handle both missing keys AND None values (v8.62.9 - fix CI TypeError)
1015 | primary_count = primary_stats.get('total_memories') or 0
1016 | secondary_count = secondary_stats.get('total_memories') or 0
1017 |
1018 | logger.info(f"{sync_type.capitalize()} sync: Local={primary_count}, Cloudflare={secondary_count}")
1019 |
1020 | if secondary_count <= primary_count:
1021 | logger.info(f"No new memories to sync from Cloudflare ({sync_type} sync)")
1022 | return {
1023 | 'success': True,
1024 | 'memories_synced': 0,
1025 | 'total_checked': 0,
1026 | 'message': 'No new memories to pull from Cloudflare',
1027 | 'time_taken_seconds': round(time.time() - sync_start_time, 3)
1028 | }
1029 |
1030 | # Pull missing memories from Cloudflare using optimized batch processing
1031 | missing_count = secondary_count - primary_count
1032 | synced_count = 0
1033 | batch_size = min(500, self.batch_size * 5) # 5x larger batches for sync
1034 | cursor = None
1035 | processed_count = 0
1036 | consecutive_empty_batches = 0
1037 |
1038 | # Get all local hashes once for O(1) lookup
1039 | local_hashes = await self.primary.get_all_content_hashes()
1040 | logger.info(f"Pulling {missing_count} potential memories from Cloudflare...")
1041 |
1042 | while True:
1043 | try:
1044 | # Get batch from Cloudflare using cursor-based pagination
1045 | logger.debug(f"Fetching batch: cursor={cursor}, batch_size={batch_size}")
1046 |
1047 | if hasattr(self.secondary, 'get_all_memories_cursor'):
1048 | cloudflare_memories = await self.secondary.get_all_memories_cursor(
1049 | limit=batch_size,
1050 | cursor=cursor
1051 | )
1052 | else:
1053 | cloudflare_memories = await self.secondary.get_all_memories(
1054 | limit=batch_size,
1055 | offset=processed_count
1056 | )
1057 |
1058 | if not cloudflare_memories:
1059 | logger.debug(f"No more memories from Cloudflare at cursor {cursor}")
1060 | break
1061 |
1062 | logger.debug(f"Processing batch of {len(cloudflare_memories)} memories")
1063 | batch_checked = 0
1064 | batch_missing = 0
1065 | batch_synced = 0
1066 |
1067 | # Parallel processing with concurrency limit
1068 | semaphore = asyncio.Semaphore(15)
1069 |
1070 | async def sync_single_memory(cf_memory):
1071 | """Process a single memory with concurrency control."""
1072 | nonlocal batch_checked, batch_missing, batch_synced, synced_count, processed_count
1073 |
1074 | async with semaphore:
1075 | batch_checked += 1
1076 | processed_count += 1
1077 | try:
1078 | # Fast O(1) existence check
1079 | if cf_memory.content_hash not in local_hashes:
1080 | # Defense-in-depth: Skip if Cloudflare record itself is soft-deleted
1081 | # (should not happen if queries filter correctly, but prevents edge cases)
1082 | cf_deleted_at = getattr(cf_memory, 'deleted_at', None)
1083 | if cf_deleted_at is None and cf_memory.metadata:
1084 | cf_deleted_at = cf_memory.metadata.get('deleted_at')
1085 | if cf_deleted_at is not None:
1086 | logger.debug(f"Memory {cf_memory.content_hash[:8]} is soft-deleted in Cloudflare, skipping")
1087 | return ('skipped_deleted', cf_memory.content_hash)
1088 |
1089 | # Check if memory was soft-deleted locally (tombstone check)
1090 | # This prevents re-syncing memories that were intentionally deleted
1091 | if hasattr(self.primary, 'is_deleted') and await self.primary.is_deleted(cf_memory.content_hash):
1092 | logger.debug(f"Memory {cf_memory.content_hash[:8]} was deleted locally, skipping cloud sync")
1093 | # Propagate deletion to cloud if sync service available
1094 | if self.sync_service:
1095 | operation = SyncOperation(operation='delete', content_hash=cf_memory.content_hash)
1096 | await self.sync_service.enqueue_operation(operation)
1097 | return ('tombstone', cf_memory.content_hash)
1098 |
1099 | batch_missing += 1
1100 | # Memory doesn't exist locally, sync it
1101 | success, message = await self.primary.store(cf_memory)
1102 | if success:
1103 | batch_synced += 1
1104 | synced_count += 1
1105 | local_hashes.add(cf_memory.content_hash) # Update cache
1106 |
1107 | if sync_type == "initial":
1108 | self.initial_sync_completed = synced_count
1109 |
1110 | if synced_count % 10 == 0:
1111 | logger.info(f"{sync_type.capitalize()} sync progress: {synced_count}/{missing_count} memories synced")
1112 |
1113 | # Broadcast SSE progress event
1114 | if broadcast_sse and SSE_AVAILABLE:
1115 | try:
1116 | progress_event = create_sync_progress_event(
1117 | synced_count=synced_count,
1118 | total_count=missing_count,
1119 | sync_type=sync_type
1120 | )
1121 | await sse_manager.broadcast_event(progress_event)
1122 | except Exception as e:
1123 | logger.debug(f"Failed to broadcast SSE progress: {e}")
1124 |
1125 | return ('synced', cf_memory.content_hash)
1126 | else:
1127 | logger.warning(f"Failed to sync memory {cf_memory.content_hash}: {message}")
1128 | return ('failed', cf_memory.content_hash, message)
1129 | elif enable_drift_check and self.sync_service and self.sync_service.drift_check_enabled:
1130 | # Memory exists - check for metadata drift
1131 | existing = await self.primary.get_by_hash(cf_memory.content_hash)
1132 | if existing:
1133 | cf_updated = cf_memory.updated_at or 0
1134 | local_updated = existing.updated_at or 0
1135 |
1136 | # If Cloudflare version is newer, sync metadata
1137 | if cf_updated > local_updated + 1.0:
1138 | logger.debug(f"Metadata drift detected: {cf_memory.content_hash[:8]}")
1139 | success, _ = await self.primary.update_memory_metadata(
1140 | cf_memory.content_hash,
1141 | {
1142 | 'tags': cf_memory.tags,
1143 | 'memory_type': cf_memory.memory_type,
1144 | 'metadata': cf_memory.metadata,
1145 | 'created_at': cf_memory.created_at,
1146 | 'created_at_iso': cf_memory.created_at_iso,
1147 | 'updated_at': cf_memory.updated_at,
1148 | 'updated_at_iso': cf_memory.updated_at_iso,
1149 | },
1150 | preserve_timestamps=False
1151 | )
1152 | if success:
1153 | batch_synced += 1
1154 | synced_count += 1
1155 | logger.debug(f"Synced metadata for: {cf_memory.content_hash[:8]}")
1156 | return ('drift_synced', cf_memory.content_hash)
1157 | return ('skipped', cf_memory.content_hash)
1158 | except Exception as e:
1159 | logger.warning(f"Error syncing memory {cf_memory.content_hash}: {e}")
1160 | return ('error', cf_memory.content_hash, str(e))
1161 |
1162 | # Process batch in parallel
1163 | tasks = [sync_single_memory(mem) for mem in cloudflare_memories]
1164 | results = await asyncio.gather(*tasks, return_exceptions=True)
1165 | for result in results:
1166 | if isinstance(result, Exception):
1167 | logger.error(f"Error during {sync_type} sync batch processing: {result}")
1168 |
1169 | logger.debug(f"Batch complete: checked={batch_checked}, missing={batch_missing}, synced={batch_synced}")
1170 |
1171 | # Track consecutive empty batches
1172 | if batch_synced == 0:
1173 | consecutive_empty_batches += 1
1174 | logger.debug(f"Empty batch: consecutive={consecutive_empty_batches}/{HYBRID_MAX_EMPTY_BATCHES}")
1175 | else:
1176 | consecutive_empty_batches = 0
1177 |
1178 | # Log progress summary
1179 | if processed_count > 0 and processed_count % 100 == 0:
1180 | logger.info(f"Sync progress: processed={processed_count}, synced={synced_count}/{missing_count}")
1181 |
1182 | # Update cursor for next batch
1183 | if cloudflare_memories and hasattr(self.secondary, 'get_all_memories_cursor'):
1184 | cursor = min(memory.created_at for memory in cloudflare_memories if memory.created_at)
1185 | logger.debug(f"Next cursor: {cursor}")
1186 |
1187 | # Early break conditions
1188 | if consecutive_empty_batches >= HYBRID_MAX_EMPTY_BATCHES and synced_count > 0:
1189 | logger.info(f"Completed after {consecutive_empty_batches} empty batches - {synced_count}/{missing_count} synced")
1190 | break
1191 | elif processed_count >= HYBRID_MIN_CHECK_COUNT and synced_count == 0:
1192 | logger.info(f"No missing memories after checking {processed_count} memories")
1193 | break
1194 |
1195 | await asyncio.sleep(0.01)
1196 |
1197 | except Exception as e:
1198 | # Handle Cloudflare D1 errors
1199 | if "400" in str(e) and not hasattr(self.secondary, 'get_all_memories_cursor'):
1200 | logger.error(f"D1 OFFSET limitation at processed_count={processed_count}: {e}")
1201 | logger.warning("Cloudflare D1 OFFSET limits reached - sync incomplete")
1202 | break
1203 | else:
1204 | logger.error(f"Error during {sync_type} sync: {e}")
1205 | break
1206 |
1207 | time_taken = time.time() - sync_start_time
1208 | logger.info(f"{sync_type.capitalize()} sync completed: {synced_count} memories in {time_taken:.2f}s")
1209 |
1210 | # Broadcast SSE completion event
1211 | if broadcast_sse and SSE_AVAILABLE and missing_count > 0:
1212 | try:
1213 | completion_event = create_sync_completed_event(
1214 | synced_count=synced_count,
1215 | total_count=missing_count,
1216 | time_taken_seconds=time_taken,
1217 | sync_type=sync_type
1218 | )
1219 | await sse_manager.broadcast_event(completion_event)
1220 | except Exception as e:
1221 | logger.debug(f"Failed to broadcast SSE completion: {e}")
1222 |
1223 | return {
1224 | 'success': True,
1225 | 'memories_synced': synced_count,
1226 | 'total_checked': processed_count,
1227 | 'message': f'Successfully pulled {synced_count} memories from Cloudflare',
1228 | 'time_taken_seconds': round(time_taken, 3)
1229 | }
1230 |
1231 | except Exception as e:
1232 | logger.error(f"{sync_type.capitalize()} sync failed: {e}")
1233 | return {
1234 | 'success': False,
1235 | 'memories_synced': 0,
1236 | 'total_checked': 0,
1237 | 'message': f'Failed to pull from Cloudflare: {str(e)}',
1238 | 'time_taken_seconds': round(time.time() - sync_start_time, 3)
1239 | }
1240 |
1241 | async def _perform_initial_sync(self) -> None:
1242 | """
1243 | Perform initial sync from Cloudflare to SQLite if enabled.
1244 |
1245 | This downloads all memories from Cloudflare that are missing in local SQLite,
1246 | providing immediate access to existing cloud memories.
1247 | """
1248 | if not HYBRID_SYNC_ON_STARTUP or not self.secondary:
1249 | return
1250 |
1251 | logger.info("Starting initial sync from Cloudflare to SQLite...")
1252 |
1253 | self.initial_sync_in_progress = True
1254 | self.initial_sync_completed = 0
1255 | self.initial_sync_finished = False
1256 |
1257 | try:
1258 | # Set initial_sync_total before calling the shared helper
1259 | primary_stats = await self.primary.get_stats()
1260 | secondary_stats = await self.secondary.get_stats()
1261 | # Use 'or 0' to handle both missing keys AND None values (v8.62.9 - fix CI TypeError)
1262 | primary_count = primary_stats.get('total_memories') or 0
1263 | secondary_count = secondary_stats.get('total_memories') or 0
1264 |
1265 | if secondary_count > primary_count:
1266 | self.initial_sync_total = secondary_count - primary_count
1267 | else:
1268 | self.initial_sync_total = 0
1269 |
1270 | # Call shared helper method with drift checking enabled
1271 | result = await self._sync_memories_from_cloudflare(
1272 | sync_type="initial",
1273 | broadcast_sse=True,
1274 | enable_drift_check=True
1275 | )
1276 |
1277 | synced_count = result['memories_synced']
1278 |
1279 | # Update sync tracking to reflect actual sync completion
1280 | if synced_count == 0 and self.initial_sync_total > 0:
1281 | # All memories were already present - this is a successful "no-op" sync
1282 | self.initial_sync_completed = self.initial_sync_total
1283 | logger.info(f"Sync completed successfully: All {self.initial_sync_total} memories were already present locally")
1284 |
1285 | self.initial_sync_finished = True
1286 |
1287 | except Exception as e:
1288 | logger.error(f"Initial sync failed: {e}")
1289 | # Don't fail initialization if initial sync fails
1290 | logger.warning("Continuing with hybrid storage despite initial sync failure")
1291 | finally:
1292 | self.initial_sync_in_progress = False
1293 |
1294 | def get_initial_sync_status(self) -> Dict[str, Any]:
1295 | """Get current initial sync status for monitoring."""
1296 | return {
1297 | "in_progress": self.initial_sync_in_progress,
1298 | "total": self.initial_sync_total,
1299 | "completed": self.initial_sync_completed,
1300 | "finished": self.initial_sync_finished,
1301 | "progress_percentage": round((self.initial_sync_completed / max(self.initial_sync_total, 1)) * 100, 1) if self.initial_sync_total > 0 else 0
1302 | }
1303 |
1304 | async def store(self, memory: Memory) -> Tuple[bool, str]:
1305 | """Store a memory in primary storage and queue for secondary sync."""
1306 | # Always store in primary first for immediate availability
1307 | success, message = await self.primary.store(memory)
1308 |
1309 | if success and self.sync_service:
1310 | # Queue for background sync to secondary
1311 | operation = SyncOperation(operation='store', memory=memory)
1312 | await self.sync_service.enqueue_operation(operation)
1313 |
1314 | return success, message
1315 |
1316 | async def retrieve(self, query: str, n_results: int = 5) -> List[MemoryQueryResult]:
1317 | """Retrieve memories from primary storage (fast)."""
1318 | return await self.primary.retrieve(query, n_results)
1319 |
1320 | async def search(self, query: str, n_results: int = 5, min_similarity: float = 0.0) -> List[MemoryQueryResult]:
1321 | """Search memories in primary storage."""
1322 | return await self.primary.search(query, n_results)
1323 |
1324 | async def search_by_tag(self, tags: List[str], time_start: Optional[float] = None) -> List[Memory]:
1325 | """Search memories by tags in primary storage with optional time filtering.
1326 |
1327 | This method performs an OR search for tags. The `match_all` (AND) logic
1328 | is handled at the API layer.
1329 |
1330 | Args:
1331 | tags: List of tags to search for
1332 | time_start: Optional Unix timestamp (in seconds) to filter memories created after this time
1333 |
1334 | Returns:
1335 | List of Memory objects matching the tag criteria and time filter
1336 | """
1337 | return await self.primary.search_by_tag(tags, time_start=time_start)
1338 |
1339 | async def search_by_tags(
1340 | self,
1341 | tags: List[str],
1342 | operation: str = "AND",
1343 | time_start: Optional[float] = None,
1344 | time_end: Optional[float] = None
1345 | ) -> List[Memory]:
1346 | """Search memories by tags using consistent operation parameter across backends."""
1347 | normalized_operation = operation.strip().upper() if isinstance(operation, str) else "AND"
1348 | if normalized_operation not in {"AND", "OR"}:
1349 | logger.warning("Unsupported tag operation %s; defaulting to AND", operation)
1350 | normalized_operation = "AND"
1351 |
1352 | return await self.primary.search_by_tags(
1353 | tags,
1354 | operation=normalized_operation,
1355 | time_start=time_start,
1356 | time_end=time_end
1357 | )
1358 |
1359 | async def delete(self, content_hash: str) -> Tuple[bool, str]:
1360 | """Delete a memory from primary storage and queue for secondary sync."""
1361 | success, message = await self.primary.delete(content_hash)
1362 |
1363 | if success and self.sync_service:
1364 | # Queue for background sync to secondary
1365 | operation = SyncOperation(operation='delete', content_hash=content_hash)
1366 | await self.sync_service.enqueue_operation(operation)
1367 |
1368 | return success, message
1369 |
1370 | async def delete_by_tag(self, tag: str) -> Tuple[int, str]:
1371 | """Delete memories by tag from primary storage and queue for secondary sync."""
1372 | # First, get the memories with this tag to get their hashes for sync
1373 | memories_to_delete = await self.primary.search_by_tags([tag])
1374 |
1375 | # Delete from primary
1376 | count_deleted, message = await self.primary.delete_by_tag(tag)
1377 |
1378 | # Queue individual deletes for secondary sync
1379 | if count_deleted > 0 and self.sync_service:
1380 | for memory in memories_to_delete:
1381 | operation = SyncOperation(operation='delete', content_hash=memory.content_hash)
1382 | await self.sync_service.enqueue_operation(operation)
1383 |
1384 | return count_deleted, message
1385 |
1386 | async def delete_by_tags(self, tags: List[str]) -> Tuple[int, str]:
1387 | """
1388 | Delete memories matching ANY of the given tags from primary storage and queue for secondary sync.
1389 |
1390 | Optimized to use primary storage's delete_by_tags if available, otherwise falls back to
1391 | calling delete_by_tag for each tag.
1392 | """
1393 | if not tags:
1394 | return 0, "No tags provided"
1395 |
1396 | # First, get all memories with any of these tags for sync queue
1397 | memories_to_delete = await self.primary.search_by_tags(tags, operation="OR")
1398 |
1399 | # Remove duplicates based on content_hash
1400 | unique_memories = {m.content_hash: m for m in memories_to_delete}.values()
1401 |
1402 | # Delete from primary using optimized method if available
1403 | count_deleted, message = await self.primary.delete_by_tags(tags)
1404 |
1405 | # Queue individual deletes for secondary sync
1406 | if count_deleted > 0 and self.sync_service:
1407 | for memory in unique_memories:
1408 | operation = SyncOperation(operation='delete', content_hash=memory.content_hash)
1409 | await self.sync_service.enqueue_operation(operation)
1410 |
1411 | return count_deleted, message
1412 |
1413 | async def get_by_exact_content(self, content: str) -> List[Memory]:
1414 | """Retrieve memories by exact content match from primary storage."""
1415 | return await self.primary.get_by_exact_content(content)
1416 |
1417 | async def delete_by_timeframe(self, start_date: date, end_date: date, tag: Optional[str] = None) -> Tuple[int, str]:
1418 | """Delete memories within a specific date range in primary storage and queue for secondary sync."""
1419 | count, message = await self.primary.delete_by_timeframe(start_date, end_date, tag)
1420 |
1421 | if count > 0 and self.sync_service and not self._sync_paused:
1422 | # Queue for background sync to secondary
1423 | operation = SyncOperation(
1424 | operation='delete_by_timeframe',
1425 | start_date=start_date,
1426 | end_date=end_date,
1427 | tag=tag
1428 | )
1429 | await self.sync_service.enqueue_operation(operation)
1430 |
1431 | return count, message
1432 |
1433 | async def delete_before_date(self, before_date: date, tag: Optional[str] = None) -> Tuple[int, str]:
1434 | """Delete memories created before a specific date in primary storage and queue for secondary sync."""
1435 | count, message = await self.primary.delete_before_date(before_date, tag)
1436 |
1437 | if count > 0 and self.sync_service and not self._sync_paused:
1438 | # Queue for background sync to secondary
1439 | operation = SyncOperation(
1440 | operation='delete_before_date',
1441 | before_date=before_date,
1442 | tag=tag
1443 | )
1444 | await self.sync_service.enqueue_operation(operation)
1445 |
1446 | return count, message
1447 |
1448 | async def cleanup_duplicates(self) -> Tuple[int, str]:
1449 | """Clean up duplicates in primary storage."""
1450 | # Only cleanup primary, secondary will sync naturally
1451 | return await self.primary.cleanup_duplicates()
1452 |
1453 | async def update_memory_metadata(self, content_hash: str, updates: Dict[str, Any], preserve_timestamps: bool = True) -> Tuple[bool, str]:
1454 | """Update memory metadata in primary storage and queue for secondary sync."""
1455 | success, message = await self.primary.update_memory_metadata(content_hash, updates, preserve_timestamps)
1456 |
1457 | # Skip enqueuing if sync is paused (v8.47.1 - fix consolidation hang)
1458 | # Operations will be synced when sync resumes
1459 | if success and self.sync_service and not self._sync_paused:
1460 | # Queue for background sync to secondary
1461 | operation = SyncOperation(
1462 | operation='update',
1463 | content_hash=content_hash,
1464 | updates=updates,
1465 | preserve_timestamps=preserve_timestamps
1466 | )
1467 | await self.sync_service.enqueue_operation(operation)
1468 |
1469 | return success, message
1470 |
1471 | async def update_memories_batch(self, memories: List[Memory]) -> List[bool]:
1472 | """
1473 | Update multiple memories in a batch operation with optimal performance.
1474 |
1475 | Delegates directly to primary storage's optimized batch update method,
1476 | then queues secondary sync operations in background.
1477 |
1478 | Args:
1479 | memories: List of Memory objects with updated fields
1480 |
1481 | Returns:
1482 | List of success booleans, one for each memory in the batch
1483 | """
1484 | # Use primary storage's optimized batch update (single transaction)
1485 | results = await self.primary.update_memories_batch(memories)
1486 |
1487 | # Queue successful updates for background sync to secondary
1488 | if self.sync_service:
1489 | for idx, (memory, success) in enumerate(zip(memories, results)):
1490 | if success:
1491 | operation = SyncOperation(
1492 | operation='update',
1493 | content_hash=memory.content_hash,
1494 | updates={
1495 | 'tags': memory.tags,
1496 | 'metadata': memory.metadata,
1497 | 'memory_type': memory.memory_type
1498 | }
1499 | )
1500 | # Don't await - queue asynchronously for background processing
1501 | try:
1502 | await self.sync_service.enqueue_operation(operation)
1503 | except Exception as e:
1504 | logger.warning(f"Failed to queue sync for {memory.content_hash}: {e}")
1505 |
1506 | return results
1507 |
1508 | async def get_stats(self) -> Dict[str, Any]:
1509 | """Get comprehensive statistics from both storage backends."""
1510 | # SQLite-vec get_stats is now async
1511 | primary_stats = await self.primary.get_stats()
1512 |
1513 | stats = {
1514 | "storage_backend": "Hybrid (SQLite-vec + Cloudflare)",
1515 | "primary_backend": "SQLite-vec",
1516 | "secondary_backend": "Cloudflare" if self.secondary else "None",
1517 | "total_memories": primary_stats.get("total_memories", 0),
1518 | "unique_tags": primary_stats.get("unique_tags", 0),
1519 | "memories_this_week": primary_stats.get("memories_this_week", 0),
1520 | "database_size_bytes": primary_stats.get("database_size_bytes", 0),
1521 | "database_size_mb": primary_stats.get("database_size_mb", 0),
1522 | "primary_stats": primary_stats,
1523 | "sync_enabled": self.sync_service is not None
1524 | }
1525 |
1526 | # Add sync service statistics if available
1527 | if self.sync_service:
1528 | sync_status = await self.sync_service.get_sync_status()
1529 | stats["sync_status"] = sync_status
1530 |
1531 | # Add secondary stats if available and healthy
1532 | if self.secondary and self.sync_service and self.sync_service.sync_stats['cloudflare_available']:
1533 | try:
1534 | secondary_stats = await self.secondary.get_stats()
1535 | stats["secondary_stats"] = secondary_stats
1536 | except Exception as e:
1537 | stats["secondary_error"] = str(e)
1538 |
1539 | return stats
1540 |
1541 | async def get_all_tags_with_counts(self) -> List[Dict[str, Any]]:
1542 | """Get all tags with their usage counts from primary storage."""
1543 | return await self.primary.get_all_tags_with_counts()
1544 |
1545 | async def get_all_tags(self) -> List[str]:
1546 | """Get all unique tags from primary storage."""
1547 | return await self.primary.get_all_tags()
1548 |
1549 | async def get_recent_memories(self, n: int = 10) -> List[Memory]:
1550 | """Get recent memories from primary storage."""
1551 | return await self.primary.get_recent_memories(n)
1552 |
1553 | async def get_largest_memories(self, n: int = 10) -> List[Memory]:
1554 | """Get largest memories by content length from primary storage."""
1555 | return await self.primary.get_largest_memories(n)
1556 |
1557 | async def get_memory_timestamps(self, days: Optional[int] = None) -> List[float]:
1558 | """
1559 | Get memory creation timestamps only, without loading full memory objects.
1560 |
1561 | Delegates to primary storage (SQLite-vec) for optimal performance.
1562 |
1563 | Args:
1564 | days: Optional filter to only get memories from last N days
1565 |
1566 | Returns:
1567 | List of Unix timestamps (float) in descending order (newest first)
1568 | """
1569 | return await self.primary.get_memory_timestamps(days)
1570 |
1571 | async def recall(self, query: Optional[str] = None, n_results: int = 5, start_timestamp: Optional[float] = None, end_timestamp: Optional[float] = None) -> List[MemoryQueryResult]:
1572 | """
1573 | Retrieve memories with combined time filtering and optional semantic search.
1574 |
1575 | Args:
1576 | query: Optional semantic search query. If None, only time filtering is applied.
1577 | n_results: Maximum number of results to return.
1578 | start_timestamp: Optional start time for filtering.
1579 | end_timestamp: Optional end time for filtering.
1580 |
1581 | Returns:
1582 | List of MemoryQueryResult objects.
1583 | """
1584 | return await self.primary.recall(query=query, n_results=n_results, start_timestamp=start_timestamp, end_timestamp=end_timestamp)
1585 |
1586 | async def recall_memory(self, query: str, n_results: int = 5) -> List[Memory]:
1587 | """Recall memories using natural language time expressions."""
1588 | return await self.primary.recall_memory(query, n_results)
1589 |
1590 | async def get_all_memories(self, limit: int = None, offset: int = 0, memory_type: Optional[str] = None, tags: Optional[List[str]] = None) -> List[Memory]:
1591 | """Get all memories from primary storage."""
1592 | return await self.primary.get_all_memories(limit=limit, offset=offset, memory_type=memory_type, tags=tags)
1593 |
1594 | async def get_by_hash(self, content_hash: str) -> Optional[Memory]:
1595 | """Get a memory by its content hash from primary storage."""
1596 | return await self.primary.get_by_hash(content_hash)
1597 |
1598 | async def count_all_memories(self, memory_type: Optional[str] = None, tags: Optional[List[str]] = None) -> int:
1599 | """Get total count of memories from primary storage."""
1600 | return await self.primary.count_all_memories(memory_type=memory_type, tags=tags)
1601 |
1602 | async def get_memories_by_time_range(self, start_time: float, end_time: float) -> List[Memory]:
1603 | """Get memories within time range from primary storage."""
1604 | return await self.primary.get_memories_by_time_range(start_time, end_time)
1605 |
1606 | async def close(self):
1607 | """Clean shutdown of hybrid storage system."""
1608 | logger.info("Shutting down hybrid memory storage...")
1609 |
1610 | # Cancel initial sync task if still running (v8.62.9 - fix CI race condition)
1611 | if self._initial_sync_task and not self._initial_sync_task.done():
1612 | self._initial_sync_task.cancel()
1613 | try:
1614 | await self._initial_sync_task
1615 | except asyncio.CancelledError:
1616 | logger.debug("Initial sync task cancelled during shutdown")
1617 | except Exception as e:
1618 | logger.debug(f"Initial sync task error during shutdown: {e}")
1619 |
1620 | # Stop sync service first
1621 | if self.sync_service:
1622 | await self.sync_service.stop()
1623 |
1624 | # Close storage backends (with exception handling to prevent resource leaks)
1625 | if self.primary is not None and hasattr(self.primary, 'close'):
1626 | try:
1627 | if asyncio.iscoroutinefunction(self.primary.close):
1628 | await self.primary.close()
1629 | else:
1630 | self.primary.close()
1631 | except Exception as e:
1632 | logger.error(f"Error closing primary storage: {e}")
1633 |
1634 | if self.secondary and hasattr(self.secondary, 'close'):
1635 | try:
1636 | if asyncio.iscoroutinefunction(self.secondary.close):
1637 | await self.secondary.close()
1638 | else:
1639 | self.secondary.close()
1640 | except Exception as e:
1641 | logger.error(f"Error closing secondary storage: {e}")
1642 |
1643 | logger.info("Hybrid memory storage shutdown completed")
1644 |
1645 | async def force_sync(self) -> Dict[str, Any]:
1646 | """Force immediate synchronization with secondary storage."""
1647 | if not self.sync_service:
1648 | return {
1649 | 'status': 'disabled',
1650 | 'message': 'Background sync service not available'
1651 | }
1652 |
1653 | return await self.sync_service.force_sync()
1654 |
1655 | async def force_pull_sync(self) -> Dict[str, Any]:
1656 | """
1657 | Force immediate pull synchronization FROM Cloudflare TO local SQLite.
1658 |
1659 | This triggers the same logic as initial_sync but can be called on demand.
1660 | Useful for manually refreshing local storage with memories stored via MCP.
1661 |
1662 | Returns:
1663 | Dict with sync results including:
1664 | - success: bool
1665 | - message: str
1666 | - memories_pulled: int
1667 | - time_taken_seconds: float
1668 | """
1669 | # Call shared helper method without drift checking (manual sync doesn't need it)
1670 | result = await self._sync_memories_from_cloudflare(
1671 | sync_type="manual",
1672 | broadcast_sse=True,
1673 | enable_drift_check=False
1674 | )
1675 |
1676 | # Map result to expected return format
1677 | return {
1678 | 'success': result['success'],
1679 | 'message': result['message'],
1680 | 'memories_pulled': result['memories_synced'],
1681 | 'time_taken_seconds': result['time_taken_seconds']
1682 | }
1683 |
1684 | async def get_sync_status(self) -> Dict[str, Any]:
1685 | """Get current background sync status and statistics."""
1686 | if not self.sync_service:
1687 | return {
1688 | 'is_running': False,
1689 | 'pending_operations': 0,
1690 | 'operations_processed': 0,
1691 | 'operations_failed': 0,
1692 | 'last_sync_time': 0,
1693 | 'sync_interval': 0
1694 | }
1695 |
1696 | return await self.sync_service.get_sync_status()
1697 |
1698 | async def pause_sync(self) -> Dict[str, Any]:
1699 | """Pause background sync operations for safe database operations."""
1700 | if not self.sync_service:
1701 | return {
1702 | 'success': False,
1703 | 'message': 'Sync service not available'
1704 | }
1705 |
1706 | try:
1707 | if not self.sync_service.is_running:
1708 | self._sync_paused = True # Ensure flag is set even if already paused
1709 | return {
1710 | 'success': True,
1711 | 'message': 'Sync already paused'
1712 | }
1713 |
1714 | # Set pause flag before stopping sync (v8.47.1 - fix consolidation hang)
1715 | self._sync_paused = True
1716 |
1717 | # Stop the sync service
1718 | await self.sync_service.stop()
1719 | logger.info("Background sync paused")
1720 |
1721 | return {
1722 | 'success': True,
1723 | 'message': 'Sync paused successfully'
1724 | }
1725 |
1726 | except Exception as e:
1727 | logger.error(f"Failed to pause sync: {e}")
1728 | return {
1729 | 'success': False,
1730 | 'message': f'Failed to pause sync: {str(e)}'
1731 | }
1732 |
1733 | async def resume_sync(self) -> Dict[str, Any]:
1734 | """Resume background sync operations after pause."""
1735 | if not self.sync_service:
1736 | return {
1737 | 'success': False,
1738 | 'message': 'Sync service not available'
1739 | }
1740 |
1741 | try:
1742 | if self.sync_service.is_running:
1743 | self._sync_paused = False # Ensure flag is cleared even if already running
1744 | return {
1745 | 'success': True,
1746 | 'message': 'Sync already running'
1747 | }
1748 |
1749 | # Clear pause flag before starting sync (v8.47.1 - fix consolidation hang)
1750 | self._sync_paused = False
1751 |
1752 | # Start the sync service
1753 | await self.sync_service.start()
1754 | logger.info("Background sync resumed")
1755 |
1756 | return {
1757 | 'success': True,
1758 | 'message': 'Sync resumed successfully'
1759 | }
1760 |
1761 | except Exception as e:
1762 | logger.error(f"Failed to resume sync: {e}")
1763 | return {
1764 | 'success': False,
1765 | 'message': f'Failed to resume sync: {str(e)}'
1766 | }
1767 |
1768 | async def wait_for_sync_completion(self, timeout: int = 600) -> Dict[str, Any]:
1769 | """Wait for sync queue to drain after bulk operations.
1770 |
1771 | This method is useful for bulk operations like quality evaluation
1772 | that generate many sync operations. It waits for the queue to drain
1773 | and returns statistics about the sync operation.
1774 |
1775 | Args:
1776 | timeout: Maximum seconds to wait (default: 600 = 10 minutes)
1777 |
1778 | Returns:
1779 | dict with sync stats:
1780 | - success_count: Number of operations synced successfully
1781 | - failure_count: Number of operations that failed
1782 | - queue_size: Remaining queue size when timeout/completion occurred
1783 |
1784 | Raises:
1785 | TimeoutError: If sync queue did not drain within timeout
1786 | """
1787 | if not self.sync_service:
1788 | return {
1789 | 'success_count': 0,
1790 | 'failure_count': 0,
1791 | 'queue_size': 0,
1792 | 'message': 'Sync service not available'
1793 | }
1794 |
1795 | import time
1796 | start = time.time()
1797 | initial_processed = self.sync_service.sync_stats.get('operations_processed', 0)
1798 | initial_failed = self.sync_service.sync_stats.get('operations_failed', 0)
1799 |
1800 | while time.time() - start < timeout:
1801 | queue_size = self.sync_service.operation_queue.qsize()
1802 |
1803 | if queue_size == 0:
1804 | # Queue drained, calculate stats
1805 | final_processed = self.sync_service.sync_stats.get('operations_processed', 0)
1806 | final_failed = self.sync_service.sync_stats.get('operations_failed', 0)
1807 |
1808 | return {
1809 | 'success_count': final_processed - initial_processed,
1810 | 'failure_count': final_failed - initial_failed,
1811 | 'queue_size': 0,
1812 | 'message': 'Sync queue drained successfully'
1813 | }
1814 |
1815 | # Wait a bit before checking again
1816 | await asyncio.sleep(1)
1817 |
1818 | # Timeout reached - return partial stats
1819 | final_processed = self.sync_service.sync_stats.get('operations_processed', 0)
1820 | final_failed = self.sync_service.sync_stats.get('operations_failed', 0)
1821 | queue_size = self.sync_service.operation_queue.qsize()
1822 |
1823 | raise TimeoutError(
1824 | f"Sync queue did not drain within {timeout}s. "
1825 | f"Processed: {final_processed - initial_processed}, "
1826 | f"Failed: {final_failed - initial_failed}, "
1827 | f"Remaining in queue: {queue_size}"
1828 | )
1829 |
1830 | def sanitized(self, tags):
1831 | """Sanitize and normalize tags to a JSON string.
1832 |
1833 | This method provides compatibility with the storage interface.
1834 | Delegates to primary storage for consistent tag handling.
1835 | """
1836 | return self.primary.sanitized(tags)
1837 |
```