This is page 42 of 46. Use http://codebase.md/doobidoo/mcp-memory-service?lines=false&page={x} to view the full context.
# Directory Structure
```
├── .claude
│ ├── agents
│ │ ├── amp-bridge.md
│ │ ├── amp-pr-automator.md
│ │ ├── code-quality-guard.md
│ │ ├── gemini-pr-automator.md
│ │ └── github-release-manager.md
│ ├── commands
│ │ ├── README.md
│ │ ├── refactor-function
│ │ ├── refactor-function-prod
│ │ └── refactor-function.md
│ ├── consolidation-fix-handoff.md
│ ├── consolidation-hang-fix-summary.md
│ ├── directives
│ │ ├── agents.md
│ │ ├── code-quality-workflow.md
│ │ ├── consolidation-details.md
│ │ ├── development-setup.md
│ │ ├── hooks-configuration.md
│ │ ├── memory-first.md
│ │ ├── memory-tagging.md
│ │ ├── pr-workflow.md
│ │ ├── quality-system-details.md
│ │ ├── README.md
│ │ ├── refactoring-checklist.md
│ │ ├── storage-backends.md
│ │ └── version-management.md
│ ├── prompts
│ │ └── hybrid-cleanup-integration.md
│ ├── settings.local.json.backup
│ └── settings.local.json.local
├── .commit-message
├── .coveragerc
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── feature_request.yml
│ │ └── performance_issue.yml
│ ├── pull_request_template.md
│ └── workflows
│ ├── bridge-tests.yml
│ ├── CACHE_FIX.md
│ ├── claude-branch-automation.yml
│ ├── claude-code-review.yml
│ ├── claude.yml
│ ├── cleanup-images.yml.disabled
│ ├── dev-setup-validation.yml
│ ├── docker-publish.yml
│ ├── dockerfile-lint.yml
│ ├── LATEST_FIXES.md
│ ├── main-optimized.yml.disabled
│ ├── main.yml
│ ├── publish-and-test.yml
│ ├── publish-dual.yml
│ ├── README_OPTIMIZATION.md
│ ├── release-tag.yml.disabled
│ ├── release.yml
│ ├── roadmap-review-reminder.yml
│ ├── SECRET_CONDITIONAL_FIX.md
│ └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .metrics
│ ├── baseline_cc_install_hooks.txt
│ ├── baseline_mi_install_hooks.txt
│ ├── baseline_nesting_install_hooks.txt
│ ├── BASELINE_REPORT.md
│ ├── COMPLEXITY_COMPARISON.txt
│ ├── QUICK_REFERENCE.txt
│ ├── README.md
│ ├── REFACTORED_BASELINE.md
│ ├── REFACTORING_COMPLETION_REPORT.md
│ └── TRACKING_TABLE.md
├── .pyscn
│ ├── .gitignore
│ └── reports
│ └── analyze_20251123_214224.html
├── AGENTS.md
├── ai-optimized-tool-descriptions.py
├── archive
│ ├── deployment
│ │ ├── deploy_fastmcp_fixed.sh
│ │ ├── deploy_http_with_mcp.sh
│ │ └── deploy_mcp_v4.sh
│ ├── deployment-configs
│ │ ├── empty_config.yml
│ │ └── smithery.yaml
│ ├── development
│ │ └── test_fastmcp.py
│ ├── docs-removed-2025-08-23
│ │ ├── authentication.md
│ │ ├── claude_integration.md
│ │ ├── claude-code-compatibility.md
│ │ ├── claude-code-integration.md
│ │ ├── claude-code-quickstart.md
│ │ ├── claude-desktop-setup.md
│ │ ├── complete-setup-guide.md
│ │ ├── database-synchronization.md
│ │ ├── development
│ │ │ ├── autonomous-memory-consolidation.md
│ │ │ ├── CLEANUP_PLAN.md
│ │ │ ├── CLEANUP_README.md
│ │ │ ├── CLEANUP_SUMMARY.md
│ │ │ ├── dream-inspired-memory-consolidation.md
│ │ │ ├── hybrid-slm-memory-consolidation.md
│ │ │ ├── mcp-milestone.md
│ │ │ ├── multi-client-architecture.md
│ │ │ ├── test-results.md
│ │ │ └── TIMESTAMP_FIX_SUMMARY.md
│ │ ├── distributed-sync.md
│ │ ├── invocation_guide.md
│ │ ├── macos-intel.md
│ │ ├── master-guide.md
│ │ ├── mcp-client-configuration.md
│ │ ├── multi-client-server.md
│ │ ├── service-installation.md
│ │ ├── sessions
│ │ │ └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│ │ ├── UBUNTU_SETUP.md
│ │ ├── ubuntu.md
│ │ ├── windows-setup.md
│ │ └── windows.md
│ ├── docs-root-cleanup-2025-08-23
│ │ ├── AWESOME_LIST_SUBMISSION.md
│ │ ├── CLOUDFLARE_IMPLEMENTATION.md
│ │ ├── DOCUMENTATION_ANALYSIS.md
│ │ ├── DOCUMENTATION_CLEANUP_PLAN.md
│ │ ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│ │ ├── LITESTREAM_SETUP_GUIDE.md
│ │ ├── lm_studio_system_prompt.md
│ │ ├── PYTORCH_DOWNLOAD_FIX.md
│ │ └── README-ORIGINAL-BACKUP.md
│ ├── investigations
│ │ └── MACOS_HOOKS_INVESTIGATION.md
│ ├── litestream-configs-v6.3.0
│ │ ├── install_service.sh
│ │ ├── litestream_master_config_fixed.yml
│ │ ├── litestream_master_config.yml
│ │ ├── litestream_replica_config_fixed.yml
│ │ ├── litestream_replica_config.yml
│ │ ├── litestream_replica_simple.yml
│ │ ├── litestream-http.service
│ │ ├── litestream.service
│ │ └── requirements-cloudflare.txt
│ ├── release-notes
│ │ └── release-notes-v7.1.4.md
│ └── setup-development
│ ├── README.md
│ ├── setup_consolidation_mdns.sh
│ ├── STARTUP_SETUP_GUIDE.md
│ └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│ ├── memory-context.md
│ ├── memory-health.md
│ ├── memory-ingest-dir.md
│ ├── memory-ingest.md
│ ├── memory-recall.md
│ ├── memory-search.md
│ ├── memory-store.md
│ ├── README.md
│ └── session-start.md
├── claude-hooks
│ ├── config.json
│ ├── config.template.json
│ ├── CONFIGURATION.md
│ ├── core
│ │ ├── auto-capture-hook.js
│ │ ├── auto-capture-hook.ps1
│ │ ├── memory-retrieval.js
│ │ ├── mid-conversation.js
│ │ ├── permission-request.js
│ │ ├── session-end.js
│ │ ├── session-start.js
│ │ └── topic-change.js
│ ├── debug-pattern-test.js
│ ├── install_claude_hooks_windows.ps1
│ ├── install_hooks.py
│ ├── memory-mode-controller.js
│ ├── MIGRATION.md
│ ├── README-AUTO-CAPTURE.md
│ ├── README-NATURAL-TRIGGERS.md
│ ├── README-PERMISSION-REQUEST.md
│ ├── README-phase2.md
│ ├── README.md
│ ├── simple-test.js
│ ├── statusline.sh
│ ├── test-adaptive-weights.js
│ ├── test-dual-protocol-hook.js
│ ├── test-mcp-hook.js
│ ├── test-natural-triggers.js
│ ├── test-recency-scoring.js
│ ├── tests
│ │ ├── integration-test.js
│ │ ├── phase2-integration-test.js
│ │ ├── test-code-execution.js
│ │ ├── test-cross-session.json
│ │ ├── test-permission-request.js
│ │ ├── test-session-tracking.json
│ │ └── test-threading.json
│ ├── utilities
│ │ ├── adaptive-pattern-detector.js
│ │ ├── auto-capture-patterns.js
│ │ ├── context-formatter.js
│ │ ├── context-shift-detector.js
│ │ ├── conversation-analyzer.js
│ │ ├── dynamic-context-updater.js
│ │ ├── git-analyzer.js
│ │ ├── mcp-client.js
│ │ ├── memory-client.js
│ │ ├── memory-scorer.js
│ │ ├── performance-manager.js
│ │ ├── project-detector.js
│ │ ├── session-cache.json
│ │ ├── session-tracker.js
│ │ ├── tiered-conversation-monitor.js
│ │ ├── user-override-detector.js
│ │ └── version-checker.js
│ └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── COMMIT_MESSAGE.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│ ├── amp-cli-bridge.md
│ ├── api
│ │ ├── code-execution-interface.md
│ │ ├── memory-metadata-api.md
│ │ ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_REPORT.md
│ │ └── tag-standardization.md
│ ├── architecture
│ │ ├── graph-database-design.md
│ │ ├── search-enhancement-spec.md
│ │ └── search-examples.md
│ ├── architecture.md
│ ├── archive
│ │ └── obsolete-workflows
│ │ ├── load_memory_context.md
│ │ └── README.md
│ ├── assets
│ │ └── images
│ │ ├── dashboard-v3.3.0-preview.png
│ │ ├── memory-awareness-hooks-example.png
│ │ ├── project-infographic.svg
│ │ └── README.md
│ ├── CLAUDE_CODE_QUICK_REFERENCE.md
│ ├── cloudflare-setup.md
│ ├── demo-recording-script.md
│ ├── deployment
│ │ ├── docker.md
│ │ ├── dual-service.md
│ │ ├── production-guide.md
│ │ └── systemd-service.md
│ ├── development
│ │ ├── ai-agent-instructions.md
│ │ ├── code-quality
│ │ │ ├── phase-2a-completion.md
│ │ │ ├── phase-2a-handle-get-prompt.md
│ │ │ ├── phase-2a-index.md
│ │ │ ├── phase-2a-install-package.md
│ │ │ └── phase-2b-session-summary.md
│ │ ├── code-quality-workflow.md
│ │ ├── dashboard-workflow.md
│ │ ├── issue-management.md
│ │ ├── pr-280-post-mortem.md
│ │ ├── pr-review-guide.md
│ │ ├── refactoring-notes.md
│ │ ├── release-checklist.md
│ │ └── todo-tracker.md
│ ├── docker-optimized-build.md
│ ├── document-ingestion.md
│ ├── DOCUMENTATION_AUDIT.md
│ ├── enhancement-roadmap-issue-14.md
│ ├── examples
│ │ ├── analysis-scripts.js
│ │ ├── maintenance-session-example.md
│ │ ├── memory-distribution-chart.jsx
│ │ ├── quality-system-configs.md
│ │ └── tag-schema.json
│ ├── features
│ │ └── association-quality-boost.md
│ ├── first-time-setup.md
│ ├── glama-deployment.md
│ ├── guides
│ │ ├── advanced-command-examples.md
│ │ ├── chromadb-migration.md
│ │ ├── commands-vs-mcp-server.md
│ │ ├── mcp-enhancements.md
│ │ ├── mdns-service-discovery.md
│ │ ├── memory-consolidation-guide.md
│ │ ├── memory-quality-guide.md
│ │ ├── migration.md
│ │ ├── scripts.md
│ │ └── STORAGE_BACKENDS.md
│ ├── HOOK_IMPROVEMENTS.md
│ ├── hooks
│ │ └── phase2-code-execution-migration.md
│ ├── http-server-management.md
│ ├── ide-compatability.md
│ ├── IMAGE_RETENTION_POLICY.md
│ ├── images
│ │ ├── dashboard-placeholder.md
│ │ └── update-restart-demo.png
│ ├── implementation
│ │ ├── health_checks.md
│ │ └── performance.md
│ ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│ ├── integration
│ │ ├── homebrew.md
│ │ └── multi-client.md
│ ├── integrations
│ │ ├── gemini.md
│ │ ├── groq-bridge.md
│ │ ├── groq-integration-summary.md
│ │ └── groq-model-comparison.md
│ ├── integrations.md
│ ├── legacy
│ │ └── dual-protocol-hooks.md
│ ├── LIGHTWEIGHT_ONNX_SETUP.md
│ ├── LM_STUDIO_COMPATIBILITY.md
│ ├── maintenance
│ │ └── memory-maintenance.md
│ ├── mastery
│ │ ├── api-reference.md
│ │ ├── architecture-overview.md
│ │ ├── configuration-guide.md
│ │ ├── local-setup-and-run.md
│ │ ├── testing-guide.md
│ │ └── troubleshooting.md
│ ├── migration
│ │ ├── code-execution-api-quick-start.md
│ │ └── graph-migration-guide.md
│ ├── natural-memory-triggers
│ │ ├── cli-reference.md
│ │ ├── installation-guide.md
│ │ └── performance-optimization.md
│ ├── oauth-setup.md
│ ├── pr-graphql-integration.md
│ ├── quality-system-ui-implementation.md
│ ├── quick-setup-cloudflare-dual-environment.md
│ ├── README.md
│ ├── refactoring
│ │ └── phase-3-3-analysis.md
│ ├── releases
│ │ └── v8.72.0-testing.md
│ ├── remote-configuration-wiki-section.md
│ ├── research
│ │ ├── code-execution-interface-implementation.md
│ │ └── code-execution-interface-summary.md
│ ├── ROADMAP.md
│ ├── sqlite-vec-backend.md
│ ├── statistics
│ │ ├── charts
│ │ │ ├── activity_patterns.png
│ │ │ ├── contributors.png
│ │ │ ├── growth_trajectory.png
│ │ │ ├── monthly_activity.png
│ │ │ └── october_sprint.png
│ │ ├── data
│ │ │ ├── activity_by_day.csv
│ │ │ ├── activity_by_hour.csv
│ │ │ ├── contributors.csv
│ │ │ └── monthly_activity.csv
│ │ ├── generate_charts.py
│ │ └── REPOSITORY_STATISTICS.md
│ ├── technical
│ │ ├── development.md
│ │ ├── memory-migration.md
│ │ ├── migration-log.md
│ │ ├── sqlite-vec-embedding-fixes.md
│ │ └── tag-storage.md
│ ├── testing
│ │ └── regression-tests.md
│ ├── testing-cloudflare-backend.md
│ ├── troubleshooting
│ │ ├── cloudflare-api-token-setup.md
│ │ ├── cloudflare-authentication.md
│ │ ├── database-transfer-migration.md
│ │ ├── general.md
│ │ ├── hooks-quick-reference.md
│ │ ├── memory-management.md
│ │ ├── pr162-schema-caching-issue.md
│ │ ├── session-end-hooks.md
│ │ └── sync-issues.md
│ ├── tutorials
│ │ ├── advanced-techniques.md
│ │ ├── data-analysis.md
│ │ └── demo-session-walkthrough.md
│ ├── wiki-documentation-plan.md
│ └── wiki-Graph-Database-Architecture.md
├── examples
│ ├── claude_desktop_config_template.json
│ ├── claude_desktop_config_windows.json
│ ├── claude-desktop-http-config.json
│ ├── config
│ │ └── claude_desktop_config.json
│ ├── http-mcp-bridge.js
│ ├── memory_export_template.json
│ ├── README.md
│ ├── setup
│ │ └── setup_multi_client_complete.py
│ └── start_https_example.sh
├── IMPLEMENTATION_SUMMARY.md
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── PR_DESCRIPTION.md
├── pyproject-lite.toml
├── pyproject.toml
├── pytest.ini
├── README.md
├── release-notes-v8.61.0.md
├── run_server.py
├── scripts
│ ├── .claude
│ │ └── settings.local.json
│ ├── archive
│ │ └── check_missing_timestamps.py
│ ├── backup
│ │ ├── backup_memories.py
│ │ ├── backup_sqlite_vec.sh
│ │ ├── export_distributable_memories.sh
│ │ └── restore_memories.py
│ ├── benchmarks
│ │ ├── benchmark_code_execution_api.py
│ │ ├── benchmark_hybrid_sync.py
│ │ └── benchmark_server_caching.py
│ ├── ci
│ │ ├── check_dockerfile_args.sh
│ │ └── validate_imports.sh
│ ├── database
│ │ ├── analyze_sqlite_vec_db.py
│ │ ├── check_sqlite_vec_status.py
│ │ ├── db_health_check.py
│ │ └── simple_timestamp_check.py
│ ├── development
│ │ ├── debug_server_initialization.py
│ │ ├── find_orphaned_files.py
│ │ ├── fix_mdns.sh
│ │ ├── fix_sitecustomize.py
│ │ ├── remote_ingest.sh
│ │ ├── setup-git-merge-drivers.sh
│ │ ├── uv-lock-merge.sh
│ │ └── verify_hybrid_sync.py
│ ├── hooks
│ │ └── pre-commit
│ ├── installation
│ │ ├── install_linux_service.py
│ │ ├── install_macos_service.py
│ │ ├── install_uv.py
│ │ ├── install_windows_service.py
│ │ ├── install.py
│ │ ├── setup_backup_cron.sh
│ │ ├── setup_claude_mcp.sh
│ │ └── setup_cloudflare_resources.py
│ ├── linux
│ │ ├── service_status.sh
│ │ ├── start_service.sh
│ │ ├── stop_service.sh
│ │ ├── uninstall_service.sh
│ │ └── view_logs.sh
│ ├── maintenance
│ │ ├── add_project_tags.py
│ │ ├── apply_quality_boost_retroactively.py
│ │ ├── assign_memory_types.py
│ │ ├── auto_retag_memory_merge.py
│ │ ├── auto_retag_memory.py
│ │ ├── backfill_graph_table.py
│ │ ├── check_memory_types.py
│ │ ├── cleanup_association_memories_hybrid.py
│ │ ├── cleanup_association_memories.py
│ │ ├── cleanup_corrupted_encoding.py
│ │ ├── cleanup_low_quality.py
│ │ ├── cleanup_memories.py
│ │ ├── cleanup_organize.py
│ │ ├── consolidate_memory_types.py
│ │ ├── consolidation_mappings.json
│ │ ├── delete_orphaned_vectors_fixed.py
│ │ ├── delete_test_memories.py
│ │ ├── fast_cleanup_duplicates_with_tracking.sh
│ │ ├── find_all_duplicates.py
│ │ ├── find_cloudflare_duplicates.py
│ │ ├── find_duplicates.py
│ │ ├── memory-types.md
│ │ ├── README.md
│ │ ├── recover_timestamps_from_cloudflare.py
│ │ ├── regenerate_embeddings.py
│ │ ├── repair_malformed_tags.py
│ │ ├── repair_memories.py
│ │ ├── repair_sqlite_vec_embeddings.py
│ │ ├── repair_zero_embeddings.py
│ │ ├── restore_from_json_export.py
│ │ ├── retag_valuable_memories.py
│ │ ├── scan_todos.sh
│ │ ├── soft_delete_test_memories.py
│ │ └── sync_status.py
│ ├── migration
│ │ ├── cleanup_mcp_timestamps.py
│ │ ├── legacy
│ │ │ └── migrate_chroma_to_sqlite.py
│ │ ├── mcp-migration.py
│ │ ├── migrate_sqlite_vec_embeddings.py
│ │ ├── migrate_storage.py
│ │ ├── migrate_tags.py
│ │ ├── migrate_timestamps.py
│ │ ├── migrate_to_cloudflare.py
│ │ ├── migrate_to_sqlite_vec.py
│ │ ├── migrate_v5_enhanced.py
│ │ ├── TIMESTAMP_CLEANUP_README.md
│ │ └── verify_mcp_timestamps.py
│ ├── pr
│ │ ├── amp_collect_results.sh
│ │ ├── amp_detect_breaking_changes.sh
│ │ ├── amp_generate_tests.sh
│ │ ├── amp_pr_review.sh
│ │ ├── amp_quality_gate.sh
│ │ ├── amp_suggest_fixes.sh
│ │ ├── auto_review.sh
│ │ ├── detect_breaking_changes.sh
│ │ ├── generate_tests.sh
│ │ ├── lib
│ │ │ └── graphql_helpers.sh
│ │ ├── pre_pr_check.sh
│ │ ├── quality_gate.sh
│ │ ├── resolve_threads.sh
│ │ ├── run_pyscn_analysis.sh
│ │ ├── run_quality_checks_on_files.sh
│ │ ├── run_quality_checks.sh
│ │ ├── thread_status.sh
│ │ └── watch_reviews.sh
│ ├── quality
│ │ ├── bulk_evaluate_onnx.py
│ │ ├── check_test_scores.py
│ │ ├── debug_deberta_scoring.py
│ │ ├── export_deberta_onnx.py
│ │ ├── fix_dead_code_install.sh
│ │ ├── migrate_to_deberta.py
│ │ ├── phase1_dead_code_analysis.md
│ │ ├── phase2_complexity_analysis.md
│ │ ├── README_PHASE1.md
│ │ ├── README_PHASE2.md
│ │ ├── rescore_deberta.py
│ │ ├── rescore_fallback.py
│ │ ├── reset_onnx_scores.py
│ │ ├── track_pyscn_metrics.sh
│ │ └── weekly_quality_review.sh
│ ├── README.md
│ ├── run
│ │ ├── memory_wrapper_cleanup.ps1
│ │ ├── memory_wrapper_cleanup.py
│ │ ├── memory_wrapper_cleanup.sh
│ │ ├── README_CLEANUP_WRAPPER.md
│ │ ├── run_mcp_memory.sh
│ │ ├── run-with-uv.sh
│ │ └── start_sqlite_vec.sh
│ ├── run_memory_server.py
│ ├── server
│ │ ├── check_http_server.py
│ │ ├── check_server_health.py
│ │ ├── memory_offline.py
│ │ ├── preload_models.py
│ │ ├── run_http_server.py
│ │ ├── run_memory_server.py
│ │ ├── start_http_server.bat
│ │ └── start_http_server.sh
│ ├── service
│ │ ├── deploy_dual_services.sh
│ │ ├── http_server_manager.sh
│ │ ├── install_http_service.sh
│ │ ├── mcp-memory-http.service
│ │ ├── mcp-memory.service
│ │ ├── memory_service_manager.sh
│ │ ├── service_control.sh
│ │ ├── service_utils.py
│ │ ├── update_service.sh
│ │ └── windows
│ │ ├── add_watchdog_trigger.ps1
│ │ ├── install_scheduled_task.ps1
│ │ ├── manage_service.ps1
│ │ ├── run_http_server_background.ps1
│ │ ├── uninstall_scheduled_task.ps1
│ │ └── update_and_restart.ps1
│ ├── setup-lightweight.sh
│ ├── sync
│ │ ├── check_drift.py
│ │ ├── claude_sync_commands.py
│ │ ├── export_memories.py
│ │ ├── import_memories.py
│ │ ├── litestream
│ │ │ ├── apply_local_changes.sh
│ │ │ ├── enhanced_memory_store.sh
│ │ │ ├── init_staging_db.sh
│ │ │ ├── io.litestream.replication.plist
│ │ │ ├── manual_sync.sh
│ │ │ ├── memory_sync.sh
│ │ │ ├── pull_remote_changes.sh
│ │ │ ├── push_to_remote.sh
│ │ │ ├── README.md
│ │ │ ├── resolve_conflicts.sh
│ │ │ ├── setup_local_litestream.sh
│ │ │ ├── setup_remote_litestream.sh
│ │ │ ├── staging_db_init.sql
│ │ │ ├── stash_local_changes.sh
│ │ │ ├── sync_from_remote_noconfig.sh
│ │ │ └── sync_from_remote.sh
│ │ ├── README.md
│ │ ├── safe_cloudflare_update.sh
│ │ ├── sync_memory_backends.py
│ │ └── sync_now.py
│ ├── testing
│ │ ├── run_complete_test.py
│ │ ├── run_memory_test.sh
│ │ ├── simple_test.py
│ │ ├── test_cleanup_logic.py
│ │ ├── test_cloudflare_backend.py
│ │ ├── test_docker_functionality.py
│ │ ├── test_installation.py
│ │ ├── test_mdns.py
│ │ ├── test_memory_api.py
│ │ ├── test_memory_simple.py
│ │ ├── test_migration.py
│ │ ├── test_search_api.py
│ │ ├── test_sqlite_vec_embeddings.py
│ │ ├── test_sse_events.py
│ │ ├── test-connection.py
│ │ └── test-hook.js
│ ├── update_and_restart.sh
│ ├── utils
│ │ ├── claude_commands_utils.py
│ │ ├── detect_platform.py
│ │ ├── generate_personalized_claude_md.sh
│ │ ├── groq
│ │ ├── groq_agent_bridge.py
│ │ ├── list-collections.py
│ │ ├── memory_wrapper_uv.py
│ │ ├── query_memories.py
│ │ ├── README_detect_platform.md
│ │ ├── smithery_wrapper.py
│ │ ├── test_groq_bridge.sh
│ │ └── uv_wrapper.py
│ └── validation
│ ├── check_dev_setup.py
│ ├── check_documentation_links.py
│ ├── check_handler_coverage.py
│ ├── diagnose_backend_config.py
│ ├── validate_configuration_complete.py
│ ├── validate_graph_tools.py
│ ├── validate_memories.py
│ ├── validate_migration.py
│ ├── validate_timestamp_integrity.py
│ ├── verify_environment.py
│ ├── verify_pytorch_windows.py
│ └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│ └── mcp_memory_service
│ ├── __init__.py
│ ├── _version.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── operations.py
│ │ ├── sync_wrapper.py
│ │ └── types.py
│ ├── backup
│ │ ├── __init__.py
│ │ └── scheduler.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── ingestion.py
│ │ ├── main.py
│ │ └── utils.py
│ ├── config.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── associations.py
│ │ ├── base.py
│ │ ├── clustering.py
│ │ ├── compression.py
│ │ ├── consolidator.py
│ │ ├── decay.py
│ │ ├── forgetting.py
│ │ ├── health.py
│ │ └── scheduler.py
│ ├── dependency_check.py
│ ├── discovery
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── mdns_service.py
│ ├── embeddings
│ │ ├── __init__.py
│ │ └── onnx_embeddings.py
│ ├── ingestion
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── chunker.py
│ │ ├── csv_loader.py
│ │ ├── json_loader.py
│ │ ├── pdf_loader.py
│ │ ├── registry.py
│ │ ├── semtools_loader.py
│ │ └── text_loader.py
│ ├── lm_studio_compat.py
│ ├── mcp_server.py
│ ├── models
│ │ ├── __init__.py
│ │ └── memory.py
│ ├── quality
│ │ ├── __init__.py
│ │ ├── ai_evaluator.py
│ │ ├── async_scorer.py
│ │ ├── config.py
│ │ ├── implicit_signals.py
│ │ ├── metadata_codec.py
│ │ ├── onnx_ranker.py
│ │ └── scorer.py
│ ├── server
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── cache_manager.py
│ │ ├── client_detection.py
│ │ ├── environment.py
│ │ ├── handlers
│ │ │ ├── __init__.py
│ │ │ ├── consolidation.py
│ │ │ ├── documents.py
│ │ │ ├── graph.py
│ │ │ ├── memory.py
│ │ │ ├── quality.py
│ │ │ └── utility.py
│ │ └── logging_config.py
│ ├── server_impl.py
│ ├── services
│ │ ├── __init__.py
│ │ └── memory_service.py
│ ├── storage
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cloudflare.py
│ │ ├── factory.py
│ │ ├── graph.py
│ │ ├── http_client.py
│ │ ├── hybrid.py
│ │ ├── migrations
│ │ │ └── 008_add_graph_table.sql
│ │ └── sqlite_vec.py
│ ├── sync
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── importer.py
│ │ └── litestream_config.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cache_manager.py
│ │ ├── content_splitter.py
│ │ ├── db_utils.py
│ │ ├── debug.py
│ │ ├── directory_ingestion.py
│ │ ├── document_processing.py
│ │ ├── gpu_detection.py
│ │ ├── hashing.py
│ │ ├── health_check.py
│ │ ├── http_server_manager.py
│ │ ├── port_detection.py
│ │ ├── quality_analytics.py
│ │ ├── startup_orchestrator.py
│ │ ├── system_detection.py
│ │ └── time_parser.py
│ └── web
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── analytics.py
│ │ ├── backup.py
│ │ ├── consolidation.py
│ │ ├── documents.py
│ │ ├── events.py
│ │ ├── health.py
│ │ ├── manage.py
│ │ ├── mcp.py
│ │ ├── memories.py
│ │ ├── quality.py
│ │ ├── search.py
│ │ └── sync.py
│ ├── app.py
│ ├── dependencies.py
│ ├── oauth
│ │ ├── __init__.py
│ │ ├── authorization.py
│ │ ├── discovery.py
│ │ ├── middleware.py
│ │ ├── models.py
│ │ ├── registration.py
│ │ └── storage.py
│ ├── sse.py
│ └── static
│ ├── app.js
│ ├── i18n
│ │ ├── de.json
│ │ ├── en.json
│ │ ├── es.json
│ │ ├── fr.json
│ │ ├── ja.json
│ │ ├── ko.json
│ │ └── zh.json
│ ├── index.html
│ ├── README.md
│ ├── sse_test.html
│ └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── TESTING_NOTES.md
├── tests
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── test_compact_types.py
│ │ └── test_operations.py
│ ├── bridge
│ │ ├── mock_responses.js
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ └── test_http_mcp_bridge.js
│ ├── conftest.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_associations.py
│ │ ├── test_clustering.py
│ │ ├── test_compression.py
│ │ ├── test_consolidator.py
│ │ ├── test_decay.py
│ │ ├── test_forgetting.py
│ │ └── test_graph_modes.py
│ ├── contracts
│ │ └── api-specification.yml
│ ├── integration
│ │ ├── conftest.py
│ │ ├── HANDLER_COVERAGE_REPORT.md
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ ├── test_all_memory_handlers.py
│ │ ├── test_api_key_fallback.py
│ │ ├── test_api_memories_chronological.py
│ │ ├── test_api_tag_time_search.py
│ │ ├── test_api_with_memory_service.py
│ │ ├── test_bridge_integration.js
│ │ ├── test_cli_interfaces.py
│ │ ├── test_cloudflare_connection.py
│ │ ├── test_concurrent_clients.py
│ │ ├── test_data_serialization_consistency.py
│ │ ├── test_http_server_startup.py
│ │ ├── test_mcp_memory.py
│ │ ├── test_mdns_integration.py
│ │ ├── test_oauth_basic_auth.py
│ │ ├── test_oauth_flow.py
│ │ ├── test_server_handlers.py
│ │ └── test_store_memory.py
│ ├── performance
│ │ ├── test_background_sync.py
│ │ └── test_hybrid_live.py
│ ├── README.md
│ ├── smithery
│ │ └── test_smithery.py
│ ├── sqlite
│ │ └── simple_sqlite_vec_test.py
│ ├── storage
│ │ ├── conftest.py
│ │ └── test_graph_storage.py
│ ├── test_client.py
│ ├── test_content_splitting.py
│ ├── test_database.py
│ ├── test_deberta_quality.py
│ ├── test_fallback_quality.py
│ ├── test_graph_traversal.py
│ ├── test_hybrid_cloudflare_limits.py
│ ├── test_hybrid_storage.py
│ ├── test_lightweight_onnx.py
│ ├── test_memory_ops.py
│ ├── test_memory_wrapper_cleanup.py
│ ├── test_quality_integration.py
│ ├── test_quality_system.py
│ ├── test_semantic_search.py
│ ├── test_sqlite_vec_storage.py
│ ├── test_time_parser.py
│ ├── test_timestamp_preservation.py
│ ├── timestamp
│ │ ├── test_hook_vs_manual_storage.py
│ │ ├── test_issue99_final_validation.py
│ │ ├── test_search_retrieval_inconsistency.py
│ │ ├── test_timestamp_issue.py
│ │ └── test_timestamp_simple.py
│ └── unit
│ ├── conftest.py
│ ├── test_cloudflare_storage.py
│ ├── test_csv_loader.py
│ ├── test_fastapi_dependencies.py
│ ├── test_import.py
│ ├── test_imports.py
│ ├── test_json_loader.py
│ ├── test_mdns_simple.py
│ ├── test_mdns.py
│ ├── test_memory_service.py
│ ├── test_memory.py
│ ├── test_semtools_loader.py
│ ├── test_storage_interface_compatibility.py
│ ├── test_tag_time_filtering.py
│ └── test_uv_no_pip_installer_fallback.py
├── tools
│ ├── docker
│ │ ├── DEPRECATED.md
│ │ ├── docker-compose.http.yml
│ │ ├── docker-compose.pythonpath.yml
│ │ ├── docker-compose.standalone.yml
│ │ ├── docker-compose.uv.yml
│ │ ├── docker-compose.yml
│ │ ├── docker-entrypoint-persistent.sh
│ │ ├── docker-entrypoint-unified.sh
│ │ ├── docker-entrypoint.sh
│ │ ├── Dockerfile
│ │ ├── Dockerfile.glama
│ │ ├── Dockerfile.slim
│ │ ├── README.md
│ │ └── test-docker-modes.sh
│ └── README.md
├── uv.lock
└── verify_compression.sh
```
# Files
--------------------------------------------------------------------------------
/.pyscn/reports/analyze_20251123_214224.html:
--------------------------------------------------------------------------------
```html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>pyscn Analysis Report</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6;
color: #333;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
}
.container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
.header {
background: white;
border-radius: 10px;
padding: 30px;
margin-bottom: 20px;
box-shadow: 0 10px 30px rgba(0,0,0,0.1);
}
.header h1 {
color: #667eea;
margin-bottom: 10px;
}
.score-badge {
display: inline-block;
padding: 10px 20px;
border-radius: 50px;
font-size: 24px;
font-weight: bold;
margin: 10px 0;
}
.grade-a { background: #4caf50; color: white; }
.grade-b { background: #8bc34a; color: white; }
.grade-c { background: #ff9800; color: white; }
.grade-d { background: #ff5722; color: white; }
.grade-f { background: #f44336; color: white; }
.tabs {
background: white;
border-radius: 10px;
overflow: hidden;
box-shadow: 0 10px 30px rgba(0,0,0,0.1);
}
.tab-buttons {
display: flex;
background: #f5f5f5;
}
.tab-button {
flex: 1;
padding: 15px;
border: none;
background: transparent;
cursor: pointer;
font-size: 16px;
transition: all 0.3s;
}
.tab-button.active {
background: white;
color: #667eea;
font-weight: bold;
}
.tab-content {
display: none;
padding: 30px;
}
.tab-content.active {
display: block;
}
.metric-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin: 20px 0;
}
.metric-card {
background: #f8f9fa;
padding: 20px;
border-radius: 8px;
text-align: center;
}
.metric-value {
font-size: 32px;
font-weight: bold;
color: #667eea;
}
.metric-label {
color: #666;
margin-top: 5px;
}
.table {
width: 100%;
border-collapse: collapse;
margin: 20px 0;
}
.table th, .table td {
padding: 12px;
text-align: left;
border-bottom: 1px solid #ddd;
}
.table th {
background: #f8f9fa;
font-weight: 600;
}
.risk-low { color: #4caf50; }
.risk-medium { color: #ff9800; }
.risk-high { color: #f44336; }
.severity-critical { color: #f44336; }
.severity-warning { color: #ff9800; }
.severity-info { color: #2196f3; }
.score-bars {
margin: 20px 0;
}
.score-bar-item {
margin-bottom: 24px;
}
.score-bar-header {
display: flex;
justify-content: space-between;
margin-bottom: 6px;
font-size: 14px;
}
.score-label {
font-weight: 600;
color: #333;
}
.score-value {
font-weight: 700;
color: #667eea;
}
.score-bar-container {
width: 100%;
height: 12px;
background: #e0e0e0;
border-radius: 6px;
overflow: hidden;
box-shadow: inset 0 1px 3px rgba(0,0,0,0.1);
}
.score-bar-fill {
height: 100%;
transition: width 0.3s ease;
border-radius: 6px;
}
.score-excellent { background: linear-gradient(90deg, #4caf50, #66bb6a); }
.score-good { background: linear-gradient(90deg, #8bc34a, #9ccc65); }
.score-fair { background: linear-gradient(90deg, #ff9800, #ffa726); }
.score-poor { background: linear-gradient(90deg, #f44336, #ef5350); }
.score-detail {
margin-top: 4px;
font-size: 12px;
color: #666;
}
.tab-header-with-score {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 20px;
padding-bottom: 12px;
border-bottom: 2px solid #e0e0e0;
}
.score-badge-compact {
display: inline-block;
padding: 6px 14px;
border-radius: 16px;
font-size: 13px;
font-weight: 700;
color: white;
white-space: nowrap;
}
.score-badge-compact.score-excellent {
background: linear-gradient(135deg, #4caf50, #66bb6a);
box-shadow: 0 2px 6px rgba(76, 175, 80, 0.4);
}
.score-badge-compact.score-good {
background: linear-gradient(135deg, #8bc34a, #9ccc65);
box-shadow: 0 2px 6px rgba(139, 195, 74, 0.4);
}
.score-badge-compact.score-fair {
background: linear-gradient(135deg, #ff9800, #ffa726);
box-shadow: 0 2px 6px rgba(255, 152, 0, 0.4);
}
.score-badge-compact.score-poor {
background: linear-gradient(135deg, #f44336, #ef5350);
box-shadow: 0 2px 6px rgba(244, 67, 54, 0.4);
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>pyscn Analysis Report</h1>
<p>Generated: 2025-11-23 21:42:24</p>
<div class="score-badge grade-c">
Health Score: 63/100 (Grade: C)
</div>
</div>
<div class="tabs">
<div class="tab-buttons">
<button class="tab-button active" onclick="showTab('summary', this)">Summary</button>
<button class="tab-button" onclick="showTab('complexity', this)">Complexity</button>
<button class="tab-button" onclick="showTab('deadcode', this)">Dead Code</button>
<button class="tab-button" onclick="showTab('clone', this)">Clone Detection</button>
<button class="tab-button" onclick="showTab('cbo', this)">Class Coupling</button>
<button class="tab-button" onclick="showTab('sys-deps', this)">Dependencies</button>
<button class="tab-button" onclick="showTab('sys-arch', this)">Architecture</button>
</div>
<div id="summary" class="tab-content active">
<h2>Analysis Summary</h2>
<h3 style="margin-top: 20px; margin-bottom: 16px; color: #2c3e50;">Quality Scores</h3>
<div class="score-bars">
<div class="score-bar-item">
<div class="score-bar-header">
<span class="score-label">Complexity</span>
<span class="score-value">40/100</span>
</div>
<div class="score-bar-container">
<div class="score-bar-fill score-poor" style="width: 40%"></div>
</div>
<div class="score-detail">Avg: 9.5, High-risk: 28</div>
</div>
<div class="score-bar-item">
<div class="score-bar-header">
<span class="score-label">Dead Code</span>
<span class="score-value">70/100</span>
</div>
<div class="score-bar-container">
<div class="score-bar-fill score-fair" style="width: 70%"></div>
</div>
<div class="score-detail">27 issues, 2 critical</div>
</div>
<div class="score-bar-item">
<div class="score-bar-header">
<span class="score-label">Duplication</span>
<span class="score-value">30/100</span>
</div>
<div class="score-bar-container">
<div class="score-bar-fill score-poor" style="width: 30%"></div>
</div>
<div class="score-detail">6.0% duplication, 18 groups</div>
</div>
<div class="score-bar-item">
<div class="score-bar-header">
<span class="score-label">Coupling (CBO)</span>
<span class="score-value">100/100</span>
</div>
<div class="score-bar-container">
<div class="score-bar-fill score-excellent" style="width: 100%"></div>
</div>
<div class="score-detail">Avg: 1.5, High-coupling: 0/145</div>
</div>
<div class="score-bar-item">
<div class="score-bar-header">
<span class="score-label">Dependencies</span>
<span class="score-value">85/100</span>
</div>
<div class="score-bar-container">
<div class="score-bar-fill score-good" style="width: 85%"></div>
</div>
<div class="score-detail">No cycles, Depth: 7</div>
</div>
<div class="score-bar-item">
<div class="score-bar-header">
<span class="score-label">Architecture</span>
<span class="score-value">75/100</span>
</div>
<div class="score-bar-container">
<div class="score-bar-fill score-good" style="width: 75%"></div>
</div>
<div class="score-detail">76% compliant</div>
</div>
</div>
<h3 style="margin-top: 24px; margin-bottom: 16px; color: #2c3e50;">File Statistics</h3>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">252</div>
<div class="metric-label">Total Files</div>
</div>
<div class="metric-card">
<div class="metric-value">252</div>
<div class="metric-label">Analyzed Files</div>
</div>
<div class="metric-card">
<div class="metric-value">9.52</div>
<div class="metric-label">Avg Complexity</div>
</div>
<div class="metric-card">
<div class="metric-value">27</div>
<div class="metric-label">Dead Code Issues</div>
</div>
<div class="metric-card">
<div class="metric-value">2360</div>
<div class="metric-label">Unique Fragments</div>
</div>
<div class="metric-card">
<div class="metric-value">6.0%</div>
<div class="metric-label">Code Duplication</div>
</div>
<div class="metric-card">
<div class="metric-value">145</div>
<div class="metric-label">Total Classes</div>
</div>
<div class="metric-card">
<div class="metric-value">0</div>
<div class="metric-label">High Coupling (CBO)</div>
</div>
<div class="metric-card">
<div class="metric-value">1.50</div>
<div class="metric-label">Avg CBO</div>
</div>
</div>
<h3 style="margin-top: 16px; color: #2c3e50;">Dependencies</h3>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">252</div>
<div class="metric-label">Total Modules</div>
</div>
<div class="metric-card">
<div class="metric-value">237</div>
<div class="metric-label">Total Dependencies</div>
</div>
<div class="metric-card">
<div class="metric-value">7</div>
<div class="metric-label">Max Depth</div>
</div>
<div class="metric-card">
<div class="metric-value">✅ 0</div>
<div class="metric-label">Circular Dependencies</div>
</div>
</div>
<h3 style="margin-top: 8px; color: #2c3e50;">Architecture</h3>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">58</div>
<div class="metric-label">Violations</div>
</div>
<div class="metric-card">
<div class="metric-value">75.5%</div>
<div class="metric-label">Compliance</div>
</div>
<div class="metric-card">
<div class="metric-value">4</div>
<div class="metric-label">Layers Analyzed</div>
</div>
<div class="metric-card">
<div class="metric-value">237</div>
<div class="metric-label">Total Rules</div>
</div>
</div>
</div>
<div id="complexity" class="tab-content">
<div class="tab-header-with-score">
<h2 style="margin: 0;">Complexity Analysis</h2>
<div class="score-badge-compact score-poor">
40/100
</div>
</div>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">567</div>
<div class="metric-label">Total Functions</div>
</div>
<div class="metric-card">
<div class="metric-value">9.52</div>
<div class="metric-label">Average</div>
</div>
<div class="metric-card">
<div class="metric-value">62</div>
<div class="metric-label">Maximum</div>
</div>
</div>
<h3>Top Complex Functions</h3>
<table class="table">
<thead>
<tr>
<th>Function</th>
<th>File</th>
<th>Complexity</th>
<th>Nesting Depth</th>
<th>Risk</th>
</tr>
</thead>
<tbody>
<tr>
<td>main</td>
<td>install.py</td>
<td>62</td>
<td>6</td>
<td class="risk-high">high</td>
</tr>
<tr>
<td>__main__</td>
<td>src/mcp_memory_service/config.py</td>
<td>42</td>
<td>0</td>
<td class="risk-high">high</td>
</tr>
<tr>
<td>SqliteVecMemoryStorage.initialize</td>
<td>src/mcp_memory_service/storage/sqlite_vec.py</td>
<td>38</td>
<td>10</td>
<td class="risk-high">high</td>
</tr>
<tr>
<td>token</td>
<td>src/mcp_memory_service/web/oauth/authorization.py</td>
<td>35</td>
<td>4</td>
<td class="risk-high">high</td>
</tr>
<tr>
<td>install_package</td>
<td>scripts/installation/install.py</td>
<td>33</td>
<td>4</td>
<td class="risk-high">high</td>
</tr>
<tr>
<td>SqliteVecMemoryStorage._initialize_embedding_model</td>
<td>src/mcp_memory_service/storage/sqlite_vec.py</td>
<td>33</td>
<td>6</td>
<td class="risk-high">high</td>
</tr>
<tr>
<td>detect_gpu</td>
<td>scripts/installation/install.py</td>
<td>30</td>
<td>8</td>
<td class="risk-high">high</td>
</tr>
<tr>
<td>detect_gpu</td>
<td>install.py</td>
<td>30</td>
<td>8</td>
<td class="risk-high">high</td>
</tr>
<tr>
<td>verify_installation</td>
<td>install.py</td>
<td>30</td>
<td>5</td>
<td class="risk-high">high</td>
</tr>
<tr>
<td>test_memory_crud</td>
<td>scripts/testing/test_memory_api.py</td>
<td>30</td>
<td>6</td>
<td class="risk-high">high</td>
</tr>
</tbody>
</table>
<p style="color: #666; margin-top: 10px;">Showing top 10 of 567 functions</p>
</div>
<div id="deadcode" class="tab-content">
<div class="tab-header-with-score">
<h2 style="margin: 0;">Dead Code Detection</h2>
<div class="score-badge-compact score-fair">
70/100
</div>
</div>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">27</div>
<div class="metric-label">Total Issues</div>
</div>
<div class="metric-card">
<div class="metric-value">2</div>
<div class="metric-label">Critical</div>
</div>
<div class="metric-card">
<div class="metric-value">25</div>
<div class="metric-label">Warnings</div>
</div>
</div>
<h3>Top Dead Code Issues</h3>
<table class="table">
<thead>
<tr>
<th>File</th>
<th>Function</th>
<th>Lines</th>
<th>Severity</th>
<th>Reason</th>
</tr>
</thead>
<tbody>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1361-1365</td>
<td class="severity-critical">critical</td>
<td>unreachable_after_return</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1367-1436</td>
<td class="severity-warning">warning</td>
<td>unreachable_branch</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1368-1436</td>
<td class="severity-warning">warning</td>
<td>unreachable_branch</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1369-1369</td>
<td class="severity-warning">warning</td>
<td>unreachable_branch</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1371-1371</td>
<td class="severity-warning">warning</td>
<td>unreachable_branch</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1372-1373</td>
<td class="severity-warning">warning</td>
<td>unreachable_branch</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1373-1373</td>
<td class="severity-warning">warning</td>
<td>unreachable_branch</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1376-1377</td>
<td class="severity-warning">warning</td>
<td>unreachable_branch</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1377-1377</td>
<td class="severity-warning">warning</td>
<td>unreachable_branch</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>configure_paths</td>
<td>1380-1388</td>
<td class="severity-warning">warning</td>
<td>unreachable_branch</td>
</tr>
</tbody>
</table>
<p style="color: #666; margin-top: 10px;">Showing top 10 of 27 dead code issues</p>
</div>
<div id="clone" class="tab-content">
<div class="tab-header-with-score">
<h2 style="margin: 0;">Clone Detection</h2>
<div class="score-badge-compact score-poor">
30/100
</div>
</div>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">2360</div>
<div class="metric-label">Unique Fragments</div>
</div>
<div class="metric-card">
<div class="metric-value">18</div>
<div class="metric-label">Clone Groups</div>
</div>
<div class="metric-card">
<div class="metric-value">1.00</div>
<div class="metric-label">Avg Similarity</div>
</div>
</div>
<h3>Clone Groups</h3>
<p style="color: #666; margin-bottom: 15px;">Code fragments grouped by similarity</p>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 17 - 5 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>tests/unit/test_csv_loader.py</td>
<td>135-146</td>
<td>12 lines</td>
</tr>
<tr>
<td>tests/unit/test_csv_loader.py</td>
<td>155-168</td>
<td>14 lines</td>
</tr>
<tr>
<td>tests/unit/test_csv_loader.py</td>
<td>177-189</td>
<td>13 lines</td>
</tr>
<tr>
<td>tests/unit/test_csv_loader.py</td>
<td>269-282</td>
<td>14 lines</td>
</tr>
<tr>
<td>tests/unit/test_json_loader.py</td>
<td>125-147</td>
<td>23 lines</td>
</tr>
</tbody>
</table>
</div>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 5 - 4 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>install.py</td>
<td>1509-1522</td>
<td>14 lines</td>
</tr>
<tr>
<td>scripts/database/db_health_check.py</td>
<td>233-249</td>
<td>17 lines</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>1314-1327</td>
<td>14 lines</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>1334-1347</td>
<td>14 lines</td>
</tr>
</tbody>
</table>
</div>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 7 - 4 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>install.py</td>
<td>2862-2873</td>
<td>12 lines</td>
</tr>
<tr>
<td>install.py</td>
<td>2892-2903</td>
<td>12 lines</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>1785-1796</td>
<td>12 lines</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>1817-1828</td>
<td>12 lines</td>
</tr>
</tbody>
</table>
</div>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 16 - 4 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>tests/unit/test_csv_loader.py</td>
<td>88-103</td>
<td>16 lines</td>
</tr>
<tr>
<td>tests/unit/test_csv_loader.py</td>
<td>112-126</td>
<td>15 lines</td>
</tr>
<tr>
<td>tests/unit/test_csv_loader.py</td>
<td>244-260</td>
<td>17 lines</td>
</tr>
<tr>
<td>tests/unit/test_json_loader.py</td>
<td>215-234</td>
<td>20 lines</td>
</tr>
</tbody>
</table>
</div>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 0 - 3 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>claude-hooks/install_hooks.py</td>
<td>180-203</td>
<td>24 lines</td>
</tr>
<tr>
<td>scripts/testing/test_memory_simple.py</td>
<td>91-102</td>
<td>12 lines</td>
</tr>
<tr>
<td>scripts/testing/test_search_api.py</td>
<td>79-96</td>
<td>18 lines</td>
</tr>
</tbody>
</table>
</div>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 1 - 3 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>install.py</td>
<td>327-358</td>
<td>32 lines</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>274-305</td>
<td>32 lines</td>
</tr>
<tr>
<td>scripts/validation/verify_environment.py</td>
<td>126-159</td>
<td>34 lines</td>
</tr>
</tbody>
</table>
</div>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 2 - 3 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>install.py</td>
<td>344-358</td>
<td>15 lines</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>291-305</td>
<td>15 lines</td>
</tr>
<tr>
<td>scripts/validation/verify_environment.py</td>
<td>144-159</td>
<td>16 lines</td>
</tr>
</tbody>
</table>
</div>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 3 - 3 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>install.py</td>
<td>363-383</td>
<td>21 lines</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>310-330</td>
<td>21 lines</td>
</tr>
<tr>
<td>scripts/validation/verify_environment.py</td>
<td>162-183</td>
<td>22 lines</td>
</tr>
</tbody>
</table>
</div>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 6 - 3 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>install.py</td>
<td>2313-2356</td>
<td>44 lines</td>
</tr>
<tr>
<td>install.py</td>
<td>2383-2421</td>
<td>39 lines</td>
</tr>
<tr>
<td>install.py</td>
<td>2425-2463</td>
<td>39 lines</td>
</tr>
</tbody>
</table>
</div>
<div style="background: #f8f9fa; padding: 15px; margin-bottom: 15px; border-radius: 8px; border-left: 4px solid #667eea;">
<h4 style="margin-top: 0; color: #333;">Group 8 - 3 clones (Type Type-1, similarity: 1.00)</h4>
<table class="table" style="margin-bottom: 0;">
<thead>
<tr>
<th>File</th>
<th>Lines</th>
<th>Size</th>
</tr>
</thead>
<tbody>
<tr>
<td>install.py</td>
<td>3124-3151</td>
<td>28 lines</td>
</tr>
<tr>
<td>install.py</td>
<td>3127-3145</td>
<td>19 lines</td>
</tr>
<tr>
<td>scripts/installation/install.py</td>
<td>1927-1945</td>
<td>19 lines</td>
</tr>
</tbody>
</table>
</div>
<p style="color: #666; margin-top: 10px;">Showing top 10 of 18 clone groups</p>
</div>
<div id="cbo" class="tab-content">
<div class="tab-header-with-score">
<h2 style="margin: 0;">Class Coupling</h2>
<div class="score-badge-compact score-excellent">
100/100
</div>
</div>
<p style="margin-bottom: 20px; color: #666;">Coupling Between Objects (CBO) metrics</p>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">145</div>
<div class="metric-label">Total Classes</div>
</div>
<div class="metric-card">
<div class="metric-value">0</div>
<div class="metric-label">High Risk Classes</div>
</div>
<div class="metric-card">
<div class="metric-value">1.50</div>
<div class="metric-label">Average CBO</div>
</div>
<div class="metric-card">
<div class="metric-value">4</div>
<div class="metric-label">Max CBO</div>
</div>
</div>
<h3>Most Dependent Classes</h3>
<table class="table">
<thead>
<tr>
<th>Class</th>
<th>File</th>
<th>CBO</th>
<th>Risk Level</th>
<th>Dependent Classes</th>
</tr>
</thead>
<tbody>
<tr>
<td>ConsolidationHealthMonitor</td>
<td>src/mcp_memory_service/consolidation/health.py</td>
<td>4</td>
<td class="risk-low">low</td>
<td>HealthMetric, HealthAlert, Any, HealthStatus</td>
</tr>
<tr>
<td>ControlledForgettingEngine</td>
<td>src/mcp_memory_service/consolidation/forgetting.py</td>
<td>3</td>
<td class="risk-low">low</td>
<td>ConsolidationBase, ForgettingCandidate, ForgettingResult</td>
</tr>
<tr>
<td>StorageStats</td>
<td>src/mcp_memory_service/web/api/analytics.py</td>
<td>3</td>
<td class="risk-low">low</td>
<td>BaseModel, LargestMemory, GrowthTrendPoint</td>
</tr>
<tr>
<td>HybridMemoryStorage</td>
<td>src/mcp_memory_service/storage/hybrid.py</td>
<td>3</td>
<td class="risk-low">low</td>
<td>MemoryStorage, BackgroundSyncService, SyncOperation</td>
</tr>
<tr>
<td>HTTPClientStorage</td>
<td>src/mcp_memory_service/storage/http_client.py</td>
<td>3</td>
<td class="risk-low">low</td>
<td>MemoryStorage, Any, Memory</td>
</tr>
<tr>
<td>CloudflareStorage</td>
<td>src/mcp_memory_service/storage/cloudflare.py</td>
<td>3</td>
<td class="risk-low">low</td>
<td>Any, Memory, MemoryStorage</td>
</tr>
<tr>
<td>ServiceDiscovery</td>
<td>src/mcp_memory_service/discovery/mdns_service.py</td>
<td>3</td>
<td class="risk-low">low</td>
<td>AsyncZeroconf, AsyncServiceBrowser, DiscoveryListener</td>
</tr>
<tr>
<td>CacheManager</td>
<td>src/mcp_memory_service/utils/cache_manager.py</td>
<td>2</td>
<td class="risk-low">low</td>
<td>Any, CacheStats</td>
</tr>
<tr>
<td>MemoryResult</td>
<td>src/mcp_memory_service/services/memory_service.py</td>
<td>2</td>
<td class="risk-low">low</td>
<td>TypedDict, Any</td>
</tr>
<tr>
<td>SSEManager</td>
<td>src/mcp_memory_service/web/sse.py</td>
<td>2</td>
<td class="risk-low">low</td>
<td>Any, SSEEvent</td>
</tr>
</tbody>
</table>
<p style="color: #666; margin-top: 10px;">Showing top 10 of 145 classes</p>
</div>
<div id="sys-deps" class="tab-content">
<div class="tab-header-with-score">
<h2 style="margin: 0;">Module Dependencies</h2>
<div class="score-badge-compact score-good">
85/100
</div>
</div>
<p style="margin-bottom: 20px; color: #666;">Project-wide module dependency graph metrics</p>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">252</div>
<div class="metric-label">Total Modules</div>
</div>
<div class="metric-card">
<div class="metric-value">237</div>
<div class="metric-label">Total Dependencies</div>
</div>
<div class="metric-card">
<div class="metric-value">7</div>
<div class="metric-label">Max Depth</div>
</div>
<div class="metric-card">
<div class="metric-value">✅ 0</div>
<div class="metric-label">Circular Dependencies</div>
</div>
</div>
<h3 style="margin-top: 30px;">Circular Dependencies</h3>
<div style="padding: 20px; background: #d4edda; border-left: 4px solid #28a745; border-radius: 4px; margin: 20px 0;">
<strong style="color: #155724;">✅ No circular dependencies detected</strong>
<p style="color: #155724; margin: 10px 0 0 0;">All modules have acyclic dependency relationships.</p>
</div>
<h3>Longest Dependency Chains</h3>
<table class="table">
<thead>
<tr>
<th>#</th>
<th>Depth</th>
<th>Path</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>6</td>
<td>src.mcp_memory_service.web.app → src.mcp_memory_service.web.api.mcp → src.mcp_memory_service.web.dependencies → src.mcp_memory_service.services.memory_service → src.mcp_memory_service.storage.base → src.mcp_memory_service.models.memory</td>
</tr>
<tr>
<td>2</td>
<td>6</td>
<td>src.mcp_memory_service.discovery → src.mcp_memory_service.api.client → src.mcp_memory_service.storage.factory → src.mcp_memory_service.storage.sqlite_vec → src.mcp_memory_service.consolidation.base → src.mcp_memory_service.models.memory</td>
</tr>
<tr>
<td>3</td>
<td>6</td>
<td>src.mcp_memory_service.web.api.events → src.mcp_memory_service.web.dependencies → src.mcp_memory_service.storage.factory → src.mcp_memory_service.storage.sqlite_vec → src.mcp_memory_service.consolidation.base → src.mcp_memory_service.models.memory</td>
</tr>
<tr>
<td>4</td>
<td>5</td>
<td>src.mcp_memory_service.api.operations → src.mcp_memory_service.api.client → src.mcp_memory_service.storage.factory → src.mcp_memory_service.storage.sqlite_vec → src.mcp_memory_service.config</td>
</tr>
<tr>
<td>5</td>
<td>5</td>
<td>src.mcp_memory_service.discovery → src.mcp_memory_service.api.client → src.mcp_memory_service.storage.factory → src.mcp_memory_service.consolidation.base → src.mcp_memory_service.models.memory</td>
</tr>
<tr>
<td>6</td>
<td>5</td>
<td>src.mcp_memory_service.web.app → src.mcp_memory_service.web.api.mcp → src.mcp_memory_service.web.dependencies → src.mcp_memory_service.services.memory_service → src.mcp_memory_service.storage.base</td>
</tr>
<tr>
<td>7</td>
<td>5</td>
<td>src.mcp_memory_service.web.app → src.mcp_memory_service.web.api.mcp → src.mcp_memory_service.web.dependencies → src.mcp_memory_service.services.memory_service → src.mcp_memory_service.utils.hashing</td>
</tr>
<tr>
<td>8</td>
<td>5</td>
<td>src.mcp_memory_service.web.app → src.mcp_memory_service.web.api.mcp → src.mcp_memory_service.web.dependencies → src.mcp_memory_service.services.memory_service → src.mcp_memory_service.models.memory</td>
</tr>
<tr>
<td>9</td>
<td>5</td>
<td>src.mcp_memory_service.web.app → src.mcp_memory_service.web.api.mcp → src.mcp_memory_service.web.dependencies → src.mcp_memory_service.services.memory_service → src.mcp_memory_service.utils.content_splitter</td>
</tr>
<tr>
<td>10</td>
<td>5</td>
<td>src.mcp_memory_service.web.api.mcp → src.mcp_memory_service.web.dependencies → src.mcp_memory_service.services.memory_service → src.mcp_memory_service.storage.base → src.mcp_memory_service.models.memory</td>
</tr>
</tbody>
</table>
</div>
<div id="sys-arch" class="tab-content">
<div class="tab-header-with-score">
<h2 style="margin: 0;">Architecture Validation</h2>
<div class="score-badge-compact score-good">
75/100
</div>
</div>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">4</div>
<div class="metric-label">Layers Analyzed</div>
</div>
<div class="metric-card">
<div class="metric-value">237</div>
<div class="metric-label">Total Rules</div>
</div>
<div class="metric-card">
<div class="metric-value">58</div>
<div class="metric-label">Violations</div>
</div>
<div class="metric-card">
<div class="metric-value">75.5%</div>
<div class="metric-label">Compliance</div>
</div>
</div>
<h3>Top Rule Violations</h3>
<table class="table">
<thead>
<tr>
<th>Severity</th>
<th>Rule</th>
<th>From</th>
<th>To</th>
</tr>
</thead>
<tbody>
<tr>
<td>error</td>
<td>application -> {application,domain,infrastructure}</td>
<td>scripts.backup.backup_memories</td>
<td>src.mcp_memory_service.config</td>
</tr>
<tr>
<td>error</td>
<td>infrastructure -> {infrastructure,domain,application}</td>
<td>scripts.backup.restore_memories</td>
<td>src.mcp_memory_service.config</td>
</tr>
<tr>
<td>error</td>
<td>domain !> presentation</td>
<td>scripts.maintenance.cleanup_memories</td>
<td>src.mcp_memory_service.config</td>
</tr>
<tr>
<td>error</td>
<td>application -> {application,domain,infrastructure}</td>
<td>scripts.maintenance.regenerate_embeddings</td>
<td>src.mcp_memory_service.config</td>
</tr>
<tr>
<td>error</td>
<td>infrastructure -> {infrastructure,domain,application}</td>
<td>scripts.maintenance.repair_malformed_tags</td>
<td>src.mcp_memory_service.config</td>
</tr>
<tr>
<td>error</td>
<td>domain !> presentation</td>
<td>src.mcp_memory_service.api.client</td>
<td>src.mcp_memory_service.config</td>
</tr>
<tr>
<td>error</td>
<td>domain !> presentation</td>
<td>src.mcp_memory_service.api.operations</td>
<td>src.mcp_memory_service.api.sync_wrapper</td>
</tr>
<tr>
<td>error</td>
<td>domain !> presentation</td>
<td>src.mcp_memory_service.backup.scheduler</td>
<td>src.mcp_memory_service.config</td>
</tr>
<tr>
<td>error</td>
<td>domain !> presentation</td>
<td>src.mcp_memory_service.cli.main</td>
<td>src.mcp_memory_service.server</td>
</tr>
<tr>
<td>error</td>
<td>domain !> presentation</td>
<td>src.mcp_memory_service.cli.utils</td>
<td>src.mcp_memory_service.config</td>
</tr>
<tr>
<td>error</td>
<td>domain !> application</td>
<td>src.mcp_memory_service.consolidation.associations</td>
<td>src.mcp_memory_service.consolidation.base</td>
</tr>
<tr>
<td>error</td>
<td>domain !> application</td>
<td>src.mcp_memory_service.consolidation.clustering</td>
<td>src.mcp_memory_service.consolidation.base</td>
</tr>
<tr>
<td>error</td>
<td>domain !> application</td>
<td>src.mcp_memory_service.consolidation.compression</td>
<td>src.mcp_memory_service.consolidation.base</td>
</tr>
<tr>
<td>error</td>
<td>domain !> application</td>
<td>src.mcp_memory_service.consolidation.consolidator</td>
<td>src.mcp_memory_service.consolidation.base</td>
</tr>
<tr>
<td>error</td>
<td>domain !> presentation</td>
<td>src.mcp_memory_service.consolidation.consolidator</td>
<td>src.mcp_memory_service.consolidation.decay</td>
</tr>
<tr>
<td>error</td>
<td>domain !> application</td>
<td>src.mcp_memory_service.consolidation.consolidator</td>
<td>src.mcp_memory_service.consolidation.forgetting</td>
</tr>
<tr>
<td>error</td>
<td>domain !> presentation</td>
<td>src.mcp_memory_service.consolidation.consolidator</td>
<td>src.mcp_memory_service.consolidation.health</td>
</tr>
<tr>
<td>error</td>
<td>application -> {application,domain,infrastructure}</td>
<td>src.mcp_memory_service.consolidation.forgetting</td>
<td>src.mcp_memory_service.consolidation.decay</td>
</tr>
<tr>
<td>error</td>
<td>domain !> application</td>
<td>src.mcp_memory_service.consolidation.scheduler</td>
<td>src.mcp_memory_service.consolidation.base</td>
</tr>
<tr>
<td>error</td>
<td>domain !> presentation</td>
<td>src.mcp_memory_service.discovery.client</td>
<td>src.mcp_memory_service.config</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<script>
function showTab(tabName, el) {
const tabs = document.querySelectorAll('.tab-content');
tabs.forEach(tab => tab.classList.remove('active'));
const buttons = document.querySelectorAll('.tab-button');
buttons.forEach(btn => btn.classList.remove('active'));
document.getElementById(tabName).classList.add('active');
if (el) { el.classList.add('active'); }
}
</script>
</body>
</html>
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/storage/sqlite_vec.py:
--------------------------------------------------------------------------------
```python
# Copyright 2024 Heinrich Krupp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SQLite-vec storage backend for MCP Memory Service.
Provides local vector similarity search using sqlite-vec extension.
"""
import sqlite3
import json
import logging
import traceback
import time
import os
import sys
import platform
import hashlib
import struct
from collections import Counter
from typing import List, Dict, Any, Tuple, Optional, Set, Callable
from datetime import datetime, timezone, timedelta, date
import asyncio
import random
# Import sqlite-vec with fallback
try:
import sqlite_vec
from sqlite_vec import serialize_float32
SQLITE_VEC_AVAILABLE = True
except ImportError:
SQLITE_VEC_AVAILABLE = False
logging.getLogger(__name__).warning("sqlite-vec not available. Install with: pip install sqlite-vec")
# Import sentence transformers with fallback
try:
from sentence_transformers import SentenceTransformer
SENTENCE_TRANSFORMERS_AVAILABLE = True
except ImportError:
SENTENCE_TRANSFORMERS_AVAILABLE = False
logging.getLogger(__name__).warning("sentence_transformers not available. Install for embedding support.")
from .base import MemoryStorage
from ..models.memory import Memory, MemoryQueryResult
from ..utils.hashing import generate_content_hash
from ..utils.system_detection import (
get_system_info,
get_optimal_embedding_settings,
get_torch_device,
AcceleratorType
)
from ..config import SQLITEVEC_MAX_CONTENT_LENGTH
logger = logging.getLogger(__name__)
# Global model cache for performance optimization
_MODEL_CACHE = {}
_EMBEDDING_CACHE = {}
def clear_model_caches() -> dict:
"""
Clear embedding model caches to free memory.
This function clears both the model cache (loaded embedding models)
and the embedding cache (computed embeddings). It also triggers
garbage collection to reclaim memory.
Used during graceful shutdown or when memory pressure is detected.
Note: After clearing, models will be reloaded on next use.
Returns:
Dict with counts of cleared items:
- models_cleared: Number of model instances removed
- embeddings_cleared: Number of cached embeddings removed
"""
import gc
global _MODEL_CACHE, _EMBEDDING_CACHE
model_count = len(_MODEL_CACHE)
embedding_count = len(_EMBEDDING_CACHE)
_MODEL_CACHE.clear()
_EMBEDDING_CACHE.clear()
# Force garbage collection to reclaim memory
collected = gc.collect()
logger.info(
f"Model caches cleared - "
f"Models: {model_count}, Embeddings: {embedding_count}, "
f"GC collected: {collected} objects"
)
return {
"models_cleared": model_count,
"embeddings_cleared": embedding_count,
"gc_collected": collected
}
def get_model_cache_stats() -> dict:
"""
Get statistics about the model cache.
Returns:
Dict with cache statistics:
- model_count: Number of cached models
- model_keys: List of cached model keys
- embedding_count: Number of cached embeddings
"""
return {
"model_count": len(_MODEL_CACHE),
"model_keys": list(_MODEL_CACHE.keys()),
"embedding_count": len(_EMBEDDING_CACHE)
}
class _HashEmbeddingModel:
"""Deterministic, pure-Python embedding fallback.
This is a last-resort option intended for environments where native DLL-backed
runtimes (onnxruntime/torch) cannot be imported (e.g., WinError 1114).
It enables basic vector storage/search with reduced quality.
"""
def __init__(self, embedding_dimension: int):
self.embedding_dimension = int(embedding_dimension)
def encode(self, texts: List[str], convert_to_numpy: bool = False):
vectors = [self._embed_one(text) for text in texts]
if convert_to_numpy:
try:
import numpy as np
return np.asarray(vectors, dtype=np.float32)
except Exception:
return vectors
return vectors
def _embed_one(self, text: str) -> List[float]:
if not text:
return [0.0] * self.embedding_dimension
# Expand SHA-256 stream deterministically until we have enough bytes
# for `embedding_dimension` float values.
floats: List[float] = []
counter = 0
needed = self.embedding_dimension
text_bytes = text.encode("utf-8", errors="ignore")
while len(floats) < needed:
digest = hashlib.sha256(text_bytes + b"\x1f" + struct.pack("<I", counter)).digest()
counter += 1
# Use 4 bytes -> signed int32 -> map to [-1, 1]
for i in range(0, len(digest) - 3, 4):
(val,) = struct.unpack("<i", digest[i : i + 4])
floats.append(val / 2147483648.0)
if len(floats) >= needed:
break
return floats
def deserialize_embedding(blob: bytes) -> Optional[List[float]]:
"""
Deserialize embedding blob from sqlite-vec format to list of floats.
Args:
blob: Binary blob containing serialized float32 array
Returns:
List of floats representing the embedding, or None if deserialization fails
"""
if not blob:
return None
try:
# Import numpy locally to avoid hard dependency
import numpy as np
# sqlite-vec stores embeddings as raw float32 arrays
arr = np.frombuffer(blob, dtype=np.float32)
return arr.tolist()
except Exception as e:
logger.warning(f"Failed to deserialize embedding: {e}")
return None
class SqliteVecMemoryStorage(MemoryStorage):
"""
SQLite-vec based memory storage implementation.
This backend provides local vector similarity search using sqlite-vec
while maintaining the same interface as other storage backends.
"""
@property
def max_content_length(self) -> Optional[int]:
"""SQLite-vec content length limit from configuration (default: unlimited)."""
return SQLITEVEC_MAX_CONTENT_LENGTH
@property
def supports_chunking(self) -> bool:
"""SQLite-vec backend supports content chunking with metadata linking."""
return True
def __init__(self, db_path: str, embedding_model: str = "all-MiniLM-L6-v2"):
"""
Initialize SQLite-vec storage.
Args:
db_path: Path to SQLite database file
embedding_model: Name of sentence transformer model to use
"""
self.db_path = db_path
self.embedding_model_name = embedding_model
self.conn = None
self.embedding_model = None
self.embedding_dimension = 384 # Default for all-MiniLM-L6-v2
self._initialized = False # Track initialization state
# Performance settings
self.enable_cache = True
self.batch_size = 32
# Ensure directory exists
os.makedirs(os.path.dirname(self.db_path) if os.path.dirname(self.db_path) else '.', exist_ok=True)
logger.info(f"Initialized SQLite-vec storage at: {self.db_path}")
def _safe_json_loads(self, json_str: str, context: str = "") -> dict:
"""Safely parse JSON with comprehensive error handling and logging."""
if not json_str:
return {}
try:
result = json.loads(json_str)
if not isinstance(result, dict):
logger.warning(f"Non-dict JSON in {context}: {type(result)}")
return {}
return result
except json.JSONDecodeError as e:
logger.error(f"JSON decode error in {context}: {e}, data: {json_str[:100]}...")
return {}
except TypeError as e:
logger.error(f"JSON type error in {context}: {e}")
return {}
async def _execute_with_retry(self, operation: Callable, max_retries: int = 3, initial_delay: float = 0.1):
"""
Execute a database operation with exponential backoff retry logic.
Args:
operation: The database operation to execute
max_retries: Maximum number of retry attempts
initial_delay: Initial delay in seconds before first retry
Returns:
The result of the operation
Raises:
The last exception if all retries fail
"""
last_exception = None
delay = initial_delay
for attempt in range(max_retries + 1):
try:
return operation()
except sqlite3.OperationalError as e:
last_exception = e
error_msg = str(e).lower()
# Check if error is related to database locking
if "locked" in error_msg or "busy" in error_msg:
if attempt < max_retries:
# Add jitter to prevent thundering herd
jittered_delay = delay * (1 + random.uniform(-0.1, 0.1))
logger.warning(f"Database locked, retrying in {jittered_delay:.2f}s (attempt {attempt + 1}/{max_retries})")
await asyncio.sleep(jittered_delay)
# Exponential backoff
delay *= 2
continue
else:
logger.error(f"Database locked after {max_retries} retries")
else:
# Non-retryable error
raise
except Exception as e:
# Non-SQLite errors are not retried
raise
# If we get here, all retries failed
raise last_exception
async def _persist_access_metadata(self, memory: Memory):
"""
Persist access tracking metadata (access_count, last_accessed_at) to storage.
Args:
memory: Memory object with updated access metadata
"""
def update_metadata():
self.conn.execute('''
UPDATE memories
SET metadata = ?
WHERE content_hash = ?
''', (json.dumps(memory.metadata), memory.content_hash))
self.conn.commit()
await self._execute_with_retry(update_metadata)
def _check_extension_support(self):
"""Check if Python's sqlite3 supports loading extensions."""
test_conn = None
try:
test_conn = sqlite3.connect(":memory:")
if not hasattr(test_conn, 'enable_load_extension'):
return False, "Python sqlite3 module not compiled with extension support"
# Test if we can actually enable extension loading
test_conn.enable_load_extension(True)
test_conn.enable_load_extension(False)
return True, "Extension loading supported"
except AttributeError as e:
return False, f"enable_load_extension not available: {e}"
except sqlite3.OperationalError as e:
return False, f"Extension loading disabled: {e}"
except Exception as e:
return False, f"Extension support check failed: {e}"
finally:
if test_conn:
test_conn.close()
def _check_dependencies(self):
"""Check and validate all required dependencies for initialization."""
if not SQLITE_VEC_AVAILABLE:
raise ImportError("sqlite-vec is not available. Install with: pip install sqlite-vec")
# Embeddings backend is selected/initialized later.
# On some Windows setups, importing onnxruntime/torch can fail with DLL init errors
# (e.g. WinError 1114). We support a pure-Python fallback to keep the service usable.
def _handle_extension_loading_failure(self):
"""Provide detailed error guidance when extension loading is not supported."""
error_msg = "SQLite extension loading not supported"
logger.error(error_msg)
platform_info = f"{platform.system()} {platform.release()}"
solutions = []
if platform.system().lower() == "darwin": # macOS
solutions.extend([
"Install Python via Homebrew: brew install python",
"Use pyenv with extension support: PYTHON_CONFIGURE_OPTS='--enable-loadable-sqlite-extensions' pyenv install 3.12.0",
"Consider using Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare"
])
elif platform.system().lower() == "linux":
solutions.extend([
"Install Python with extension support: apt install python3-dev sqlite3",
"Rebuild Python with: ./configure --enable-loadable-sqlite-extensions",
"Consider using Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare"
])
else: # Windows and others
solutions.extend([
"Use official Python installer from python.org",
"Install Python with conda: conda install python",
"Consider using Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare"
])
detailed_error = f"""
{error_msg}
Platform: {platform_info}
Python Version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}
SOLUTIONS:
{chr(10).join(f" • {solution}" for solution in solutions)}
The sqlite-vec backend requires Python compiled with --enable-loadable-sqlite-extensions.
Consider using the Cloudflare backend as an alternative: it provides cloud-based vector
search without requiring local SQLite extensions.
To switch backends permanently, set: MCP_MEMORY_STORAGE_BACKEND=cloudflare
"""
raise RuntimeError(detailed_error.strip())
def _get_connection_timeout(self) -> float:
"""Calculate database connection timeout from environment or use default."""
timeout_seconds = 15.0 # Default: 15 seconds
custom_pragmas_env = os.environ.get("MCP_MEMORY_SQLITE_PRAGMAS", "")
if "busy_timeout" not in custom_pragmas_env:
return timeout_seconds
# Parse busy_timeout value (in milliseconds, convert to seconds)
for pragma_pair in custom_pragmas_env.split(","):
if "busy_timeout" in pragma_pair and "=" in pragma_pair:
try:
timeout_ms = int(pragma_pair.split("=")[1].strip())
timeout_seconds = timeout_ms / 1000.0
logger.info(f"Using custom timeout: {timeout_seconds}s from MCP_MEMORY_SQLITE_PRAGMAS")
return timeout_seconds
except (ValueError, IndexError) as e:
logger.warning(f"Failed to parse busy_timeout from env: {e}, using default {timeout_seconds}s")
return timeout_seconds
return timeout_seconds
def _load_sqlite_vec_extension(self):
"""Load the sqlite-vec extension with proper error handling."""
try:
self.conn.enable_load_extension(True)
sqlite_vec.load(self.conn)
self.conn.enable_load_extension(False)
logger.info("sqlite-vec extension loaded successfully")
except Exception as e:
error_msg = f"Failed to load sqlite-vec extension: {e}"
logger.error(error_msg)
if self.conn:
self.conn.close()
self.conn = None
# Provide specific guidance based on the error
if "enable_load_extension" in str(e):
detailed_error = f"""
{error_msg}
This error occurs when Python's sqlite3 module is not compiled with extension support.
This is common on macOS with the system Python installation.
RECOMMENDED SOLUTIONS:
• Use Homebrew Python: brew install python && rehash
• Use pyenv with extensions: PYTHON_CONFIGURE_OPTS='--enable-loadable-sqlite-extensions' pyenv install 3.12.0
• Switch to Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare
The Cloudflare backend provides cloud-based vector search without requiring local SQLite extensions.
"""
else:
detailed_error = f"""
{error_msg}
Failed to load the sqlite-vec extension. This could be due to:
• Incompatible sqlite-vec version
• Missing system dependencies
• SQLite version incompatibility
SOLUTIONS:
• Reinstall sqlite-vec: pip install --force-reinstall sqlite-vec
• Switch to Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare
• Check SQLite version: python -c "import sqlite3; print(sqlite3.sqlite_version)"
"""
raise RuntimeError(detailed_error.strip())
def _connect_and_load_extension(self):
"""Connect to database and load the sqlite-vec extension."""
# Calculate timeout and connect
timeout_seconds = self._get_connection_timeout()
self.conn = sqlite3.connect(self.db_path, timeout=timeout_seconds, check_same_thread=False)
# Load extension
self._load_sqlite_vec_extension()
# Apply pragmas for concurrent access (must be done per-connection)
default_pragmas = {
"journal_mode": "WAL",
"busy_timeout": "5000",
"synchronous": "NORMAL",
"cache_size": "10000",
"temp_store": "MEMORY"
}
# Override with custom pragmas from environment
custom_pragmas = os.environ.get("MCP_MEMORY_SQLITE_PRAGMAS", "")
if custom_pragmas:
for pragma_pair in custom_pragmas.split(","):
pragma_pair = pragma_pair.strip()
if "=" in pragma_pair:
pragma_name, pragma_value = pragma_pair.split("=", 1)
default_pragmas[pragma_name.strip()] = pragma_value.strip()
logger.debug(f"Custom pragma: {pragma_name}={pragma_value}")
# Apply all pragmas
for pragma_name, pragma_value in default_pragmas.items():
try:
self.conn.execute(f"PRAGMA {pragma_name}={pragma_value}")
logger.debug(f"Applied pragma: {pragma_name}={pragma_value}")
except sqlite3.Error as e:
logger.warning(f"Failed to apply pragma {pragma_name}: {e}")
async def initialize(self):
"""Initialize the SQLite database with vec0 extension."""
# Return early if already initialized to prevent multiple initialization attempts
if self._initialized:
return
try:
self._check_dependencies()
# Check if extension loading is supported
extension_supported, support_message = self._check_extension_support()
if not extension_supported:
self._handle_extension_loading_failure()
# Connect to database and load extension
self._connect_and_load_extension()
# Check if database is already initialized by another process
# This prevents DDL lock conflicts when multiple servers start concurrently
try:
cursor = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='memories'")
memories_table_exists = cursor.fetchone() is not None
cursor = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='memory_embeddings'")
embeddings_table_exists = cursor.fetchone() is not None
if memories_table_exists and embeddings_table_exists:
# Database exists - run migrations for new columns, then skip full DDL
logger.info("Database already initialized, checking for schema migrations...")
# Migration v8.64.0: Add deleted_at column for soft-delete support
try:
cursor = self.conn.execute("PRAGMA table_info(memories)")
columns = [row[1] for row in cursor.fetchall()]
if 'deleted_at' not in columns:
logger.info("Migrating database: Adding deleted_at column for soft-delete support...")
self.conn.execute('ALTER TABLE memories ADD COLUMN deleted_at REAL DEFAULT NULL')
self.conn.execute('CREATE INDEX IF NOT EXISTS idx_deleted_at ON memories(deleted_at)')
self.conn.commit()
logger.info("Migration complete: deleted_at column added")
else:
logger.debug("Migration check: deleted_at column already exists")
except Exception as e:
logger.warning(f"Migration check for deleted_at (non-fatal): {e}")
await self._initialize_embedding_model()
self._initialized = True
logger.info(f"SQLite-vec storage initialized successfully (existing database) with embedding dimension: {self.embedding_dimension}")
return
except sqlite3.Error as e:
# If we can't check tables (e.g., database locked), proceed with normal initialization
logger.debug(f"Could not check existing tables (will attempt full initialization): {e}")
# Apply default pragmas for concurrent access
default_pragmas = {
"journal_mode": "WAL", # Enable WAL mode for concurrent access
"busy_timeout": "5000", # 5 second timeout for locked database
"synchronous": "NORMAL", # Balanced performance/safety
"cache_size": "10000", # Increase cache size
"temp_store": "MEMORY" # Use memory for temp tables
}
# Check for custom pragmas from environment variable
custom_pragmas = os.environ.get("MCP_MEMORY_SQLITE_PRAGMAS", "")
if custom_pragmas:
# Parse custom pragmas (format: "pragma1=value1,pragma2=value2")
for pragma_pair in custom_pragmas.split(","):
pragma_pair = pragma_pair.strip()
if "=" in pragma_pair:
pragma_name, pragma_value = pragma_pair.split("=", 1)
default_pragmas[pragma_name.strip()] = pragma_value.strip()
logger.info(f"Custom pragma from env: {pragma_name}={pragma_value}")
# Apply all pragmas
applied_pragmas = []
for pragma_name, pragma_value in default_pragmas.items():
try:
self.conn.execute(f"PRAGMA {pragma_name}={pragma_value}")
applied_pragmas.append(f"{pragma_name}={pragma_value}")
except sqlite3.Error as e:
logger.warning(f"Failed to set pragma {pragma_name}={pragma_value}: {e}")
logger.info(f"SQLite pragmas applied: {', '.join(applied_pragmas)}")
# Create metadata table for storage configuration
self.conn.execute('''
CREATE TABLE IF NOT EXISTS metadata (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
)
''')
# Create regular table for memory data
self.conn.execute('''
CREATE TABLE IF NOT EXISTS memories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
content_hash TEXT UNIQUE NOT NULL,
content TEXT NOT NULL,
tags TEXT,
memory_type TEXT,
metadata TEXT,
created_at REAL,
updated_at REAL,
created_at_iso TEXT,
updated_at_iso TEXT,
deleted_at REAL DEFAULT NULL
)
''')
# Migration: Add deleted_at column if table exists but column doesn't (v8.64.0)
try:
cursor = self.conn.execute("PRAGMA table_info(memories)")
columns = [row[1] for row in cursor.fetchall()]
if 'deleted_at' not in columns:
logger.info("Migrating database: Adding deleted_at column for soft-delete support...")
self.conn.execute('ALTER TABLE memories ADD COLUMN deleted_at REAL DEFAULT NULL')
self.conn.commit()
logger.info("Migration complete: deleted_at column added")
except Exception as e:
logger.warning(f"Migration check for deleted_at (non-fatal): {e}")
# Initialize embedding model BEFORE creating vector table
await self._initialize_embedding_model()
# Check if we need to migrate from L2 to cosine distance
# This is a one-time migration - embeddings will be regenerated automatically
try:
# First check if metadata table exists
cursor = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='metadata'")
metadata_exists = cursor.fetchone() is not None
if metadata_exists:
cursor = self.conn.execute("SELECT value FROM metadata WHERE key='distance_metric'")
current_metric = cursor.fetchone()
if not current_metric or current_metric[0] != 'cosine':
logger.info("Migrating embeddings table from L2 to cosine distance...")
logger.info("This is a one-time operation - embeddings will be regenerated automatically")
# Use a timeout and retry logic for DROP TABLE to handle concurrent access
max_retries = 3
retry_delay = 1.0 # seconds
for attempt in range(max_retries):
try:
# Drop old embeddings table (memories table is preserved)
# This may fail if another process has the database locked
self.conn.execute("DROP TABLE IF EXISTS memory_embeddings")
logger.info("Successfully dropped old embeddings table")
break
except sqlite3.OperationalError as drop_error:
if "database is locked" in str(drop_error):
if attempt < max_retries - 1:
logger.warning(f"Database locked during migration (attempt {attempt + 1}/{max_retries}), retrying in {retry_delay}s...")
await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
else:
# Last attempt failed - check if table exists
# If it doesn't exist, migration was done by another process
cursor = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='memory_embeddings'")
if not cursor.fetchone():
logger.info("Embeddings table doesn't exist - migration likely completed by another process")
break
else:
logger.error("Failed to drop embeddings table after retries - will attempt to continue")
# Don't fail initialization, just log the issue
break
else:
raise
else:
# No metadata table means fresh install, no migration needed
logger.debug("Fresh database detected, no migration needed")
except Exception as e:
# If anything goes wrong, log but don't fail initialization
logger.warning(f"Migration check warning (non-fatal): {e}")
# Now create virtual table with correct dimensions using cosine distance
# Cosine similarity is better for text embeddings than L2 distance
self.conn.execute(f'''
CREATE VIRTUAL TABLE IF NOT EXISTS memory_embeddings USING vec0(
content_embedding FLOAT[{self.embedding_dimension}] distance_metric=cosine
)
''')
# Store metric in metadata for future migrations
self.conn.execute("""
INSERT OR REPLACE INTO metadata (key, value) VALUES ('distance_metric', 'cosine')
""")
# Create indexes for better performance
self.conn.execute('CREATE INDEX IF NOT EXISTS idx_content_hash ON memories(content_hash)')
self.conn.execute('CREATE INDEX IF NOT EXISTS idx_created_at ON memories(created_at)')
self.conn.execute('CREATE INDEX IF NOT EXISTS idx_memory_type ON memories(memory_type)')
self.conn.execute('CREATE INDEX IF NOT EXISTS idx_deleted_at ON memories(deleted_at)')
# Mark as initialized to prevent re-initialization
self._initialized = True
logger.info(f"SQLite-vec storage initialized successfully with embedding dimension: {self.embedding_dimension}")
except Exception as e:
error_msg = f"Failed to initialize SQLite-vec storage: {str(e)}"
logger.error(error_msg)
logger.error(traceback.format_exc())
raise RuntimeError(error_msg)
def _is_docker_environment(self) -> bool:
"""Detect if running inside a Docker container."""
# Check for Docker-specific files/environment
if os.path.exists('/.dockerenv'):
return True
if os.environ.get('DOCKER_CONTAINER'):
return True
# Check if running in common container environments
if any(os.environ.get(var) for var in ['KUBERNETES_SERVICE_HOST', 'MESOS_SANDBOX']):
return True
# Check cgroup for docker/containerd/podman
try:
with open('/proc/self/cgroup', 'r') as f:
return any('docker' in line or 'containerd' in line for line in f)
except (IOError, FileNotFoundError):
pass
return False
async def _initialize_embedding_model(self):
"""Initialize the embedding model (ONNX or SentenceTransformer based on configuration)."""
global _MODEL_CACHE
# Detect if we're in Docker
is_docker = self._is_docker_environment()
if is_docker:
logger.info("🐳 Docker environment detected - adjusting model loading strategy")
try:
# Check if we should use ONNX
use_onnx = os.environ.get('MCP_MEMORY_USE_ONNX', '').lower() in ('1', 'true', 'yes')
if use_onnx:
# Try to use ONNX embeddings
logger.info("Attempting to use ONNX embeddings (PyTorch-free)")
try:
from ..embeddings import get_onnx_embedding_model
# Check cache first
cache_key = f"onnx_{self.embedding_model_name}"
if cache_key in _MODEL_CACHE:
self.embedding_model = _MODEL_CACHE[cache_key]
logger.info(f"Using cached ONNX embedding model: {self.embedding_model_name}")
return
# Create ONNX model
onnx_model = get_onnx_embedding_model(self.embedding_model_name)
if onnx_model:
self.embedding_model = onnx_model
self.embedding_dimension = onnx_model.embedding_dimension
_MODEL_CACHE[cache_key] = onnx_model
logger.info(f"ONNX embedding model loaded successfully. Dimension: {self.embedding_dimension}")
return
else:
logger.warning("ONNX model creation failed, falling back to SentenceTransformer")
except ImportError as e:
logger.warning(f"ONNX dependencies not available: {e}")
except Exception as e:
logger.warning(f"Failed to initialize ONNX embeddings: {e}")
# Fall back to SentenceTransformer
if not SENTENCE_TRANSFORMERS_AVAILABLE:
logger.warning(
"Neither ONNX nor sentence-transformers available; using pure-Python hash embeddings (quality reduced)."
)
self.embedding_model = _HashEmbeddingModel(self.embedding_dimension)
return
# Check cache first
cache_key = self.embedding_model_name
if cache_key in _MODEL_CACHE:
self.embedding_model = _MODEL_CACHE[cache_key]
logger.info(f"Using cached embedding model: {self.embedding_model_name}")
return
# Get system info for optimal settings
system_info = get_system_info()
device = get_torch_device()
logger.info(f"Loading embedding model: {self.embedding_model_name}")
logger.info(f"Using device: {device}")
# Configure for offline mode if models are cached
# Only set offline mode if we detect cached models to prevent initial downloads
hf_home = os.environ.get('HF_HOME', os.path.expanduser("~/.cache/huggingface"))
model_cache_path = os.path.join(hf_home, "hub", f"models--sentence-transformers--{self.embedding_model_name.replace('/', '--')}")
if os.path.exists(model_cache_path):
os.environ['HF_HUB_OFFLINE'] = '1'
os.environ['TRANSFORMERS_OFFLINE'] = '1'
logger.info("📦 Found cached model - enabling offline mode")
# Try to load from cache first, fallback to direct model name
try:
# First try loading from Hugging Face cache
hf_home = os.environ.get('HF_HOME', os.path.expanduser("~/.cache/huggingface"))
cache_path = os.path.join(hf_home, "hub", f"models--sentence-transformers--{self.embedding_model_name.replace('/', '--')}")
if os.path.exists(cache_path):
# Find the snapshot directory
snapshots_path = os.path.join(cache_path, "snapshots")
if os.path.exists(snapshots_path):
snapshot_dirs = [d for d in os.listdir(snapshots_path) if os.path.isdir(os.path.join(snapshots_path, d))]
if snapshot_dirs:
model_path = os.path.join(snapshots_path, snapshot_dirs[0])
logger.info(f"Loading model from cache: {model_path}")
self.embedding_model = SentenceTransformer(model_path, device=device)
else:
raise FileNotFoundError("No snapshot found")
else:
raise FileNotFoundError("No snapshots directory")
else:
raise FileNotFoundError("No cache found")
except FileNotFoundError as cache_error:
logger.warning(f"Model not in cache: {cache_error}")
# Try to download the model (may fail in Docker without network)
try:
logger.info("Attempting to download model from Hugging Face...")
self.embedding_model = SentenceTransformer(self.embedding_model_name, device=device)
except OSError as download_error:
# Check if this is a network connectivity issue
error_msg = str(download_error)
if any(phrase in error_msg.lower() for phrase in ['connection', 'network', 'couldn\'t connect', 'huggingface.co']):
# Provide Docker-specific help
docker_help = self._get_docker_network_help() if is_docker else ""
raise RuntimeError(
f"🔌 Model Download Error: Cannot connect to huggingface.co\n"
f"{'='*60}\n"
f"The model '{self.embedding_model_name}' needs to be downloaded but the connection failed.\n"
f"{docker_help}"
f"\n💡 Solutions:\n"
f"1. Mount pre-downloaded models as a volume:\n"
f" # On host machine, download the model first:\n"
f" python -c \"from sentence_transformers import SentenceTransformer; SentenceTransformer('{self.embedding_model_name}')\"\n"
f" \n"
f" # Then run container with cache mount:\n"
f" docker run -v ~/.cache/huggingface:/root/.cache/huggingface ...\n"
f"\n"
f"2. Configure Docker network (if behind proxy):\n"
f" docker run -e HTTPS_PROXY=your-proxy -e HTTP_PROXY=your-proxy ...\n"
f"\n"
f"3. Use offline mode with pre-cached models:\n"
f" docker run -e HF_HUB_OFFLINE=1 -e TRANSFORMERS_OFFLINE=1 ...\n"
f"\n"
f"4. Use host network mode (if appropriate for your setup):\n"
f" docker run --network host ...\n"
f"\n"
f"📚 See docs: https://github.com/doobidoo/mcp-memory-service/blob/main/docs/deployment/docker.md#model-download-issues\n"
f"{'='*60}"
) from download_error
else:
# Re-raise if not a network issue
raise
except Exception as cache_error:
logger.warning(f"Failed to load from cache: {cache_error}")
# Fallback to normal loading (may fail if offline)
logger.info("Attempting normal model loading...")
self.embedding_model = SentenceTransformer(self.embedding_model_name, device=device)
# Update embedding dimension based on actual model
test_embedding = self.embedding_model.encode(["test"], convert_to_numpy=True)
self.embedding_dimension = test_embedding.shape[1]
# Cache the model
_MODEL_CACHE[cache_key] = self.embedding_model
logger.info(f"✅ Embedding model loaded successfully. Dimension: {self.embedding_dimension}")
except RuntimeError:
# Re-raise our custom errors with helpful messages
raise
except Exception as e:
logger.error(f"Failed to initialize embedding model: {str(e)}")
logger.error(traceback.format_exc())
logger.warning(
"Falling back to pure-Python hash embeddings due to embedding init failure (quality reduced)."
)
self.embedding_model = _HashEmbeddingModel(self.embedding_dimension)
def _get_docker_network_help(self) -> str:
"""Get Docker-specific network troubleshooting help."""
# Try to detect the Docker platform
docker_platform = "Docker"
if os.environ.get('DOCKER_DESKTOP_VERSION'):
docker_platform = "Docker Desktop"
elif os.path.exists('/proc/version'):
try:
with open('/proc/version', 'r') as f:
version = f.read().lower()
if 'microsoft' in version:
docker_platform = "Docker Desktop for Windows"
except (IOError, FileNotFoundError):
pass
return (
f"\n🐳 Docker Environment Detected ({docker_platform})\n"
f"This appears to be a network connectivity issue common in Docker containers.\n"
)
def _generate_embedding(self, text: str) -> List[float]:
"""Generate embedding for text."""
if not self.embedding_model:
raise RuntimeError("No embedding model available. Ensure sentence-transformers is installed and model is loaded.")
try:
# Check cache first
if self.enable_cache:
cache_key = hash(text)
if cache_key in _EMBEDDING_CACHE:
return _EMBEDDING_CACHE[cache_key]
# Generate embedding
embedding = self.embedding_model.encode([text], convert_to_numpy=True)[0]
if hasattr(embedding, "tolist"):
embedding_list = embedding.tolist()
else:
embedding_list = list(embedding)
# Validate embedding
if not embedding_list:
raise ValueError("Generated embedding is empty")
if len(embedding_list) != self.embedding_dimension:
raise ValueError(f"Embedding dimension mismatch: expected {self.embedding_dimension}, got {len(embedding_list)}")
# Validate values are finite
if not all(isinstance(x, (int, float)) and not (x != x) and x != float('inf') and x != float('-inf') for x in embedding_list):
raise ValueError("Embedding contains invalid values (NaN or infinity)")
# Cache the result
if self.enable_cache:
_EMBEDDING_CACHE[cache_key] = embedding_list
return embedding_list
except Exception as e:
logger.error(f"Failed to generate embedding: {str(e)}")
raise RuntimeError(f"Failed to generate embedding: {str(e)}") from e
async def store(self, memory: Memory) -> Tuple[bool, str]:
"""Store a memory in the SQLite-vec database."""
try:
if not self.conn:
return False, "Database not initialized"
# Check for duplicates (only active memories, not soft-deleted)
cursor = self.conn.execute(
'SELECT content_hash FROM memories WHERE content_hash = ? AND deleted_at IS NULL',
(memory.content_hash,)
)
if cursor.fetchone():
return False, "Duplicate content detected"
# Generate and validate embedding
try:
embedding = self._generate_embedding(memory.content)
except Exception as e:
logger.error(f"Failed to generate embedding for memory {memory.content_hash}: {str(e)}")
return False, f"Failed to generate embedding: {str(e)}"
# Prepare metadata
tags_str = ",".join(memory.tags) if memory.tags else ""
metadata_str = json.dumps(memory.metadata) if memory.metadata else "{}"
# Insert into memories table (metadata) with retry logic
def insert_memory():
cursor = self.conn.execute('''
INSERT INTO memories (
content_hash, content, tags, memory_type,
metadata, created_at, updated_at, created_at_iso, updated_at_iso
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
memory.content_hash,
memory.content,
tags_str,
memory.memory_type,
metadata_str,
memory.created_at,
memory.updated_at,
memory.created_at_iso,
memory.updated_at_iso
))
return cursor.lastrowid
memory_rowid = await self._execute_with_retry(insert_memory)
# Insert into embeddings table with retry logic
def insert_embedding():
# Check if we can insert with specific rowid
try:
self.conn.execute('''
INSERT INTO memory_embeddings (rowid, content_embedding)
VALUES (?, ?)
''', (
memory_rowid,
serialize_float32(embedding)
))
except sqlite3.Error as e:
# If rowid insert fails, try without specifying rowid
logger.warning(f"Failed to insert with rowid {memory_rowid}: {e}. Trying without rowid.")
self.conn.execute('''
INSERT INTO memory_embeddings (content_embedding)
VALUES (?)
''', (
serialize_float32(embedding),
))
await self._execute_with_retry(insert_embedding)
# Commit with retry logic
await self._execute_with_retry(self.conn.commit)
logger.info(f"Successfully stored memory: {memory.content_hash}")
return True, "Memory stored successfully"
except Exception as e:
error_msg = f"Failed to store memory: {str(e)}"
logger.error(error_msg)
logger.error(traceback.format_exc())
return False, error_msg
async def retrieve(self, query: str, n_results: int = 5) -> List[MemoryQueryResult]:
"""Retrieve memories using semantic search."""
try:
if not self.conn:
logger.error("Database not initialized")
return []
if not self.embedding_model:
logger.warning("No embedding model available, cannot perform semantic search")
return []
# Generate query embedding
try:
query_embedding = self._generate_embedding(query)
except Exception as e:
logger.error(f"Failed to generate query embedding: {str(e)}")
return []
# First, check if embeddings table has data
cursor = self.conn.execute('SELECT COUNT(*) FROM memory_embeddings')
embedding_count = cursor.fetchone()[0]
if embedding_count == 0:
logger.warning("No embeddings found in database. Memories may have been stored without embeddings.")
return []
# Perform vector similarity search using JOIN with retry logic
def search_memories():
# Try direct rowid join first - use k=? syntax for sqlite-vec
# Note: ORDER BY distance is implicit with k=? and redundant in subquery
cursor = self.conn.execute('''
SELECT m.content_hash, m.content, m.tags, m.memory_type, m.metadata,
m.created_at, m.updated_at, m.created_at_iso, m.updated_at_iso,
e.distance
FROM memories m
INNER JOIN (
SELECT rowid, distance
FROM memory_embeddings
WHERE content_embedding MATCH ? AND k = ?
) e ON m.id = e.rowid
WHERE m.deleted_at IS NULL
ORDER BY e.distance
''', (serialize_float32(query_embedding), n_results))
# Check if we got results
results = cursor.fetchall()
if not results:
# Log debug info
logger.debug("No results from vector search. Checking database state...")
mem_count = self.conn.execute('SELECT COUNT(*) FROM memories').fetchone()[0]
logger.debug(f"Memories table has {mem_count} rows, embeddings table has {embedding_count} rows")
return results
search_results = await self._execute_with_retry(search_memories)
results = []
for row in search_results:
try:
# Parse row data
content_hash, content, tags_str, memory_type, metadata_str = row[:5]
created_at, updated_at, created_at_iso, updated_at_iso, distance = row[5:]
# Parse tags and metadata
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
metadata = self._safe_json_loads(metadata_str, "memory_metadata")
# Create Memory object
memory = Memory(
content=content,
content_hash=content_hash,
tags=tags,
memory_type=memory_type,
metadata=metadata,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
# Calculate relevance score (lower distance = higher relevance)
# For cosine distance: distance ranges from 0 (identical) to 2 (opposite)
# Convert to similarity score: 1 - (distance/2) gives 0-1 range
relevance_score = max(0.0, 1.0 - (float(distance) / 2.0)) if distance is not None else 0.0
# Record access for quality scoring (implicit signals)
memory.record_access(query)
results.append(MemoryQueryResult(
memory=memory,
relevance_score=relevance_score,
debug_info={"distance": distance, "backend": "sqlite-vec"}
))
except Exception as parse_error:
logger.warning(f"Failed to parse memory result: {parse_error}")
continue
# Persist updated metadata for accessed memories
for result in results:
try:
await self._persist_access_metadata(result.memory)
except Exception as e:
logger.warning(f"Failed to persist access metadata: {e}")
logger.info(f"Retrieved {len(results)} memories for query: {query}")
return results
except Exception as e:
logger.error(f"Failed to retrieve memories: {str(e)}")
logger.error(traceback.format_exc())
return []
async def search_by_tag(self, tags: List[str], time_start: Optional[float] = None) -> List[Memory]:
"""Search memories by tags with optional time filtering.
Args:
tags: List of tags to search for (OR logic)
time_start: Optional Unix timestamp (in seconds) to filter memories created after this time
Returns:
List of Memory objects matching the tag criteria and time filter
"""
try:
if not self.conn:
logger.error("Database not initialized")
return []
if not tags:
return []
# Build query for tag search (OR logic) with EXACT tag matching
# Uses GLOB for case-sensitive matching (LIKE is case-insensitive in SQLite)
# Pattern: (',' || tags || ',') GLOB '*,tag,*' matches exact tag in comma-separated list
# Strip whitespace from tags to match get_all_tags_with_counts behavior
stripped_tags = [tag.strip() for tag in tags]
tag_conditions = " OR ".join(["(',' || REPLACE(tags, ' ', '') || ',') GLOB ?" for _ in stripped_tags])
tag_params = [f"*,{tag},*" for tag in stripped_tags]
# Add time filter to WHERE clause if provided
# Also exclude soft-deleted memories
where_clause = f"WHERE ({tag_conditions}) AND deleted_at IS NULL"
if time_start is not None:
where_clause += " AND created_at >= ?"
tag_params.append(time_start)
cursor = self.conn.execute(f'''
SELECT content_hash, content, tags, memory_type, metadata,
created_at, updated_at, created_at_iso, updated_at_iso
FROM memories
{where_clause}
ORDER BY created_at DESC
''', tag_params)
results = []
for row in cursor.fetchall():
try:
content_hash, content, tags_str, memory_type, metadata_str = row[:5]
created_at, updated_at, created_at_iso, updated_at_iso = row[5:]
# Parse tags and metadata
memory_tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
metadata = self._safe_json_loads(metadata_str, "memory_metadata")
memory = Memory(
content=content,
content_hash=content_hash,
tags=memory_tags,
memory_type=memory_type,
metadata=metadata,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
results.append(memory)
except Exception as parse_error:
logger.warning(f"Failed to parse memory result: {parse_error}")
continue
logger.info(f"Found {len(results)} memories with tags: {tags}")
return results
except Exception as e:
logger.error(f"Failed to search by tags: {str(e)}")
logger.error(traceback.format_exc())
return []
async def search_by_tags(
self,
tags: List[str],
operation: str = "AND",
time_start: Optional[float] = None,
time_end: Optional[float] = None
) -> List[Memory]:
"""Search memories by tags with AND/OR operation and optional time filtering."""
try:
if not self.conn:
logger.error("Database not initialized")
return []
if not tags:
return []
normalized_operation = operation.strip().upper() if isinstance(operation, str) else "AND"
if normalized_operation not in {"AND", "OR"}:
logger.warning("Unsupported tag operation %s; defaulting to AND", operation)
normalized_operation = "AND"
# Use GLOB for case-sensitive exact tag matching
# Pattern: (',' || tags || ',') GLOB '*,tag,*' matches exact tag in comma-separated list
# Strip whitespace from tags to match get_all_tags_with_counts behavior
stripped_tags = [tag.strip() for tag in tags]
comparator = " AND " if normalized_operation == "AND" else " OR "
tag_conditions = comparator.join(["(',' || REPLACE(tags, ' ', '') || ',') GLOB ?" for _ in stripped_tags])
tag_params = [f"*,{tag},*" for tag in stripped_tags]
where_conditions = [f"({tag_conditions})"] if tag_conditions else []
# Always exclude soft-deleted memories
where_conditions.append("deleted_at IS NULL")
if time_start is not None:
where_conditions.append("created_at >= ?")
tag_params.append(time_start)
if time_end is not None:
where_conditions.append("created_at <= ?")
tag_params.append(time_end)
where_clause = f"WHERE {' AND '.join(where_conditions)}" if where_conditions else ""
cursor = self.conn.execute(f'''
SELECT content_hash, content, tags, memory_type, metadata,
created_at, updated_at, created_at_iso, updated_at_iso
FROM memories
{where_clause}
ORDER BY updated_at DESC
''', tag_params)
results = []
for row in cursor.fetchall():
try:
content_hash, content, tags_str, memory_type, metadata_str, created_at, updated_at, created_at_iso, updated_at_iso = row
# Parse tags and metadata
memory_tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
metadata = self._safe_json_loads(metadata_str, "memory_metadata")
memory = Memory(
content=content,
content_hash=content_hash,
tags=memory_tags,
memory_type=memory_type,
metadata=metadata,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
results.append(memory)
except Exception as parse_error:
logger.warning(f"Failed to parse memory result: {parse_error}")
continue
logger.info(f"Found {len(results)} memories with tags: {tags} (operation: {operation})")
return results
except Exception as e:
logger.error(f"Failed to search by tags with operation {operation}: {str(e)}")
logger.error(traceback.format_exc())
return []
async def search_by_tag_chronological(self, tags: List[str], limit: int = None, offset: int = 0) -> List[Memory]:
"""
Search memories by tags with chronological ordering and database-level pagination.
This method addresses Gemini Code Assist's performance concern by pushing
ordering and pagination to the database level instead of doing it in Python.
Args:
tags: List of tags to search for
limit: Maximum number of memories to return (None for all)
offset: Number of memories to skip (for pagination)
Returns:
List of Memory objects ordered by created_at DESC
"""
try:
if not self.conn:
logger.error("Database not initialized")
return []
if not tags:
return []
# Build query for tag search (OR logic) with database-level ordering and pagination
# Use GLOB for case-sensitive exact tag matching
# Strip whitespace from tags to match get_all_tags_with_counts behavior
stripped_tags = [tag.strip() for tag in tags]
tag_conditions = " OR ".join(["(',' || REPLACE(tags, ' ', '') || ',') GLOB ?" for _ in stripped_tags])
tag_params = [f"*,{tag},*" for tag in stripped_tags]
# Build pagination clauses
limit_clause = f"LIMIT {limit}" if limit is not None else ""
offset_clause = f"OFFSET {offset}" if offset > 0 else ""
query = f'''
SELECT content_hash, content, tags, memory_type, metadata,
created_at, updated_at, created_at_iso, updated_at_iso
FROM memories
WHERE {tag_conditions}
ORDER BY created_at DESC
{limit_clause} {offset_clause}
'''
cursor = self.conn.execute(query, tag_params)
results = []
for row in cursor.fetchall():
try:
content_hash, content, tags_str, memory_type, metadata_str, created_at, updated_at, created_at_iso, updated_at_iso = row
# Parse tags and metadata
memory_tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
metadata = self._safe_json_loads(metadata_str, "memory_metadata")
memory = Memory(
content=content,
content_hash=content_hash,
tags=memory_tags,
memory_type=memory_type,
metadata=metadata,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
results.append(memory)
except Exception as parse_error:
logger.warning(f"Failed to parse memory result: {parse_error}")
continue
logger.info(f"Found {len(results)} memories with tags: {tags} using database-level pagination (limit={limit}, offset={offset})")
return results
except Exception as e:
logger.error(f"Failed to search by tags chronologically: {str(e)}")
logger.error(traceback.format_exc())
return []
async def delete(self, content_hash: str) -> Tuple[bool, str]:
"""
Soft-delete a memory by setting deleted_at timestamp.
The memory is marked as deleted but retained for sync conflict resolution.
Use purge_deleted() to permanently remove old tombstones.
"""
try:
if not self.conn:
return False, "Database not initialized"
# Get the id first to delete corresponding embedding
cursor = self.conn.execute(
'SELECT id FROM memories WHERE content_hash = ? AND deleted_at IS NULL',
(content_hash,)
)
row = cursor.fetchone()
if row:
memory_id = row[0]
# Delete embedding (won't be needed for search)
self.conn.execute('DELETE FROM memory_embeddings WHERE rowid = ?', (memory_id,))
# Soft-delete: set deleted_at timestamp instead of DELETE
cursor = self.conn.execute(
'UPDATE memories SET deleted_at = ? WHERE content_hash = ? AND deleted_at IS NULL',
(time.time(), content_hash)
)
self.conn.commit()
else:
return False, f"Memory with hash {content_hash} not found"
if cursor.rowcount > 0:
logger.info(f"Soft-deleted memory: {content_hash}")
return True, f"Successfully deleted memory {content_hash}"
else:
return False, f"Memory with hash {content_hash} not found"
except Exception as e:
error_msg = f"Failed to delete memory: {str(e)}"
logger.error(error_msg)
return False, error_msg
async def is_deleted(self, content_hash: str) -> bool:
"""
Check if a memory has been soft-deleted (tombstone exists).
Used by hybrid sync to prevent re-syncing deleted memories from cloud.
"""
try:
if not self.conn:
return False
cursor = self.conn.execute(
'SELECT deleted_at FROM memories WHERE content_hash = ? AND deleted_at IS NOT NULL',
(content_hash,)
)
return cursor.fetchone() is not None
except Exception as e:
logger.error(f"Failed to check if memory is deleted: {str(e)}")
return False
async def purge_deleted(self, older_than_days: int = 30) -> int:
"""
Permanently delete tombstones older than specified days.
This should be called periodically to clean up old soft-deleted records.
Default: 30 days retention to allow all devices to sync deletions.
"""
try:
if not self.conn:
return 0
cutoff = time.time() - (older_than_days * 86400)
cursor = self.conn.execute(
'DELETE FROM memories WHERE deleted_at IS NOT NULL AND deleted_at < ?',
(cutoff,)
)
self.conn.commit()
count = cursor.rowcount
if count > 0:
logger.info(f"Purged {count} tombstones older than {older_than_days} days")
return count
except Exception as e:
logger.error(f"Failed to purge deleted memories: {str(e)}")
return 0
async def get_by_hash(self, content_hash: str) -> Optional[Memory]:
"""Get a memory by its content hash."""
try:
if not self.conn:
return None
cursor = self.conn.execute('''
SELECT content_hash, content, tags, memory_type, metadata,
created_at, updated_at, created_at_iso, updated_at_iso
FROM memories WHERE content_hash = ? AND deleted_at IS NULL
''', (content_hash,))
row = cursor.fetchone()
if not row:
return None
content_hash, content, tags_str, memory_type, metadata_str = row[:5]
created_at, updated_at, created_at_iso, updated_at_iso = row[5:]
# Parse tags and metadata
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
metadata = self._safe_json_loads(metadata_str, "memory_retrieval")
memory = Memory(
content=content,
content_hash=content_hash,
tags=tags,
memory_type=memory_type,
metadata=metadata,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
return memory
except Exception as e:
logger.error(f"Failed to get memory by hash {content_hash}: {str(e)}")
return None
async def get_all_content_hashes(self, include_deleted: bool = False) -> Set[str]:
"""
Get all content hashes in database for bulk existence checking.
This is optimized for sync operations to avoid individual existence checks.
Returns a set for O(1) lookup performance.
Args:
include_deleted: If True, includes soft-deleted memories. Default False.
Returns:
Set of all content_hash values currently in the database
"""
try:
if not self.conn:
return set()
if include_deleted:
cursor = self.conn.execute('SELECT content_hash FROM memories')
else:
cursor = self.conn.execute('SELECT content_hash FROM memories WHERE deleted_at IS NULL')
return {row[0] for row in cursor.fetchall()}
except Exception as e:
logger.error(f"Failed to get all content hashes: {str(e)}")
return set()
async def delete_by_tag(self, tag: str) -> Tuple[int, str]:
"""Soft-delete memories by tag (exact match only)."""
try:
if not self.conn:
return 0, "Database not initialized"
# Use GLOB for case-sensitive exact tag matching
# Pattern: (',' || tags || ',') GLOB '*,tag,*' matches exact tag in comma-separated list
# Strip whitespace to match get_all_tags_with_counts behavior
stripped_tag = tag.strip()
exact_match_pattern = f"*,{stripped_tag},*"
# Get the ids first to delete corresponding embeddings (only non-deleted)
cursor = self.conn.execute(
"SELECT id FROM memories WHERE (',' || REPLACE(tags, ' ', '') || ',') GLOB ? AND deleted_at IS NULL",
(exact_match_pattern,)
)
memory_ids = [row[0] for row in cursor.fetchall()]
# Delete embeddings (won't be needed for search)
for memory_id in memory_ids:
self.conn.execute('DELETE FROM memory_embeddings WHERE rowid = ?', (memory_id,))
# Soft-delete: set deleted_at timestamp instead of DELETE
cursor = self.conn.execute(
"UPDATE memories SET deleted_at = ? WHERE (',' || REPLACE(tags, ' ', '') || ',') GLOB ? AND deleted_at IS NULL",
(time.time(), exact_match_pattern)
)
self.conn.commit()
count = cursor.rowcount
logger.info(f"Soft-deleted {count} memories with tag: {tag}")
if count > 0:
return count, f"Successfully deleted {count} memories with tag '{tag}'"
else:
return 0, f"No memories found with tag '{tag}'"
except Exception as e:
error_msg = f"Failed to delete by tag: {str(e)}"
logger.error(error_msg)
return 0, error_msg
async def delete_by_tags(self, tags: List[str]) -> Tuple[int, str]:
"""
Soft-delete memories matching ANY of the given tags (optimized single-query version).
Overrides base class implementation for better performance using OR conditions.
"""
try:
if not self.conn:
return 0, "Database not initialized"
if not tags:
return 0, "No tags provided"
# Build OR condition with GLOB for case-sensitive exact tag matching
# Pattern: (',' || tags || ',') GLOB '*,tag,*' matches exact tag in comma-separated list
# Strip whitespace to match get_all_tags_with_counts behavior
stripped_tags = [tag.strip() for tag in tags]
conditions = " OR ".join(["(',' || REPLACE(tags, ' ', '') || ',') GLOB ?" for _ in stripped_tags])
params = [f"*,{tag},*" for tag in stripped_tags]
# Get the ids first to delete corresponding embeddings (only non-deleted)
query = f'SELECT id FROM memories WHERE ({conditions}) AND deleted_at IS NULL'
cursor = self.conn.execute(query, params)
memory_ids = [row[0] for row in cursor.fetchall()]
# Delete from embeddings table using single query with IN clause
if memory_ids:
placeholders = ','.join('?' for _ in memory_ids)
self.conn.execute(f'DELETE FROM memory_embeddings WHERE rowid IN ({placeholders})', memory_ids)
# Soft-delete: set deleted_at timestamp instead of DELETE
update_query = f'UPDATE memories SET deleted_at = ? WHERE ({conditions}) AND deleted_at IS NULL'
cursor = self.conn.execute(update_query, [time.time()] + params)
self.conn.commit()
count = cursor.rowcount
logger.info(f"Soft-deleted {count} memories matching tags: {tags}")
if count > 0:
return count, f"Successfully deleted {count} memories matching {len(tags)} tag(s)"
else:
return 0, f"No memories found matching any of the {len(tags)} tags"
except Exception as e:
error_msg = f"Failed to delete by tags: {str(e)}"
logger.error(error_msg)
return 0, error_msg
async def delete_by_timeframe(self, start_date: date, end_date: date, tag: Optional[str] = None) -> Tuple[int, str]:
"""Delete memories within a specific date range."""
try:
if not self.conn:
return 0, "Database not initialized"
# Convert dates to timestamps
start_ts = datetime.combine(start_date, datetime.min.time()).timestamp()
end_ts = datetime.combine(end_date, datetime.max.time()).timestamp()
if tag:
# Delete with tag filter
cursor = self.conn.execute('''
SELECT content_hash FROM memories
WHERE created_at >= ? AND created_at <= ?
AND (tags LIKE ? OR tags LIKE ? OR tags LIKE ? OR tags = ?)
AND deleted_at IS NULL
''', (start_ts, end_ts, f"{tag},%", f"%,{tag},%", f"%,{tag}", tag))
else:
# Delete all in timeframe
cursor = self.conn.execute('''
SELECT content_hash FROM memories
WHERE created_at >= ? AND created_at <= ?
AND deleted_at IS NULL
''', (start_ts, end_ts))
hashes = [row[0] for row in cursor.fetchall()]
# Use soft-delete for each hash
deleted_count = 0
for content_hash in hashes:
success, _ = await self.delete(content_hash)
if success:
deleted_count += 1
return deleted_count, f"Deleted {deleted_count} memories from {start_date} to {end_date}" + (f" with tag '{tag}'" if tag else "")
except Exception as e:
logger.error(f"Error deleting by timeframe: {str(e)}")
return 0, f"Error: {str(e)}"
async def delete_before_date(self, before_date: date, tag: Optional[str] = None) -> Tuple[int, str]:
"""Delete memories created before a specific date."""
try:
if not self.conn:
return 0, "Database not initialized"
# Convert date to timestamp
before_ts = datetime.combine(before_date, datetime.min.time()).timestamp()
if tag:
# Delete with tag filter
cursor = self.conn.execute('''
SELECT content_hash FROM memories
WHERE created_at < ?
AND (tags LIKE ? OR tags LIKE ? OR tags LIKE ? OR tags = ?)
AND deleted_at IS NULL
''', (before_ts, f"{tag},%", f"%,{tag},%", f"%,{tag}", tag))
else:
# Delete all before date
cursor = self.conn.execute('''
SELECT content_hash FROM memories
WHERE created_at < ?
AND deleted_at IS NULL
''', (before_ts,))
hashes = [row[0] for row in cursor.fetchall()]
# Use soft-delete for each hash
deleted_count = 0
for content_hash in hashes:
success, _ = await self.delete(content_hash)
if success:
deleted_count += 1
return deleted_count, f"Deleted {deleted_count} memories before {before_date}" + (f" with tag '{tag}'" if tag else "")
except Exception as e:
logger.error(f"Error deleting before date: {str(e)}")
return 0, f"Error: {str(e)}"
async def get_by_exact_content(self, content: str) -> List[Memory]:
"""Retrieve memories by exact content match."""
try:
if not self.conn:
return []
cursor = self.conn.execute('''
SELECT content, tags, memory_type, metadata, content_hash,
created_at, created_at_iso, updated_at, updated_at_iso
FROM memories
WHERE content = ? AND deleted_at IS NULL
''', (content,))
memories = []
for row in cursor.fetchall():
content_str, tags_str, memory_type, metadata_str, content_hash, \
created_at, created_at_iso, updated_at, updated_at_iso = row
metadata = self._safe_json_loads(metadata_str, "get_by_exact_content")
tags = [tag.strip() for tag in tags_str.split(',')] if tags_str else []
memory = Memory(
content=content_str,
content_hash=content_hash,
tags=tags,
memory_type=memory_type,
metadata=metadata,
created_at=created_at,
created_at_iso=created_at_iso,
updated_at=updated_at,
updated_at_iso=updated_at_iso
)
memories.append(memory)
return memories
except Exception as e:
logger.error(f"Error in exact content match: {str(e)}")
return []
async def cleanup_duplicates(self) -> Tuple[int, str]:
"""Soft-delete duplicate memories based on content hash."""
try:
if not self.conn:
return 0, "Database not initialized"
# Soft delete duplicates (keep the first occurrence by rowid)
cursor = self.conn.execute('''
UPDATE memories
SET deleted_at = ?
WHERE rowid NOT IN (
SELECT MIN(rowid)
FROM memories
WHERE deleted_at IS NULL
GROUP BY content_hash
)
AND deleted_at IS NULL
''', (time.time(),))
self.conn.commit()
count = cursor.rowcount
logger.info(f"Soft-deleted {count} duplicate memories")
if count > 0:
return count, f"Successfully soft-deleted {count} duplicate memories"
else:
return 0, "No duplicate memories found"
except Exception as e:
error_msg = f"Failed to cleanup duplicates: {str(e)}"
logger.error(error_msg)
return 0, error_msg
async def update_memory_metadata(self, content_hash: str, updates: Dict[str, Any], preserve_timestamps: bool = True) -> Tuple[bool, str]:
"""Update memory metadata without recreating the entire memory entry."""
try:
if not self.conn:
return False, "Database not initialized"
# Get current memory
cursor = self.conn.execute('''
SELECT content, tags, memory_type, metadata, created_at, created_at_iso
FROM memories WHERE content_hash = ?
''', (content_hash,))
row = cursor.fetchone()
if not row:
return False, f"Memory with hash {content_hash} not found"
content, current_tags, current_type, current_metadata_str, created_at, created_at_iso = row
# Parse current metadata
current_metadata = self._safe_json_loads(current_metadata_str, "update_memory_metadata")
# Apply updates
new_tags = current_tags
new_type = current_type
new_metadata = current_metadata.copy()
# Handle tag updates
if "tags" in updates:
if isinstance(updates["tags"], list):
new_tags = ",".join(updates["tags"])
else:
return False, "Tags must be provided as a list of strings"
# Handle memory type updates
if "memory_type" in updates:
new_type = updates["memory_type"]
# Handle metadata updates
if "metadata" in updates:
if isinstance(updates["metadata"], dict):
new_metadata.update(updates["metadata"])
else:
return False, "Metadata must be provided as a dictionary"
# Handle other custom fields
protected_fields = {
"content", "content_hash", "tags", "memory_type", "metadata",
"embedding", "created_at", "created_at_iso", "updated_at", "updated_at_iso"
}
for key, value in updates.items():
if key not in protected_fields:
new_metadata[key] = value
# Update timestamps
now = time.time()
now_iso = datetime.utcfromtimestamp(now).isoformat() + "Z"
# Handle timestamp updates based on preserve_timestamps flag
if not preserve_timestamps:
# When preserve_timestamps=False, use timestamps from updates dict if provided
# This allows syncing timestamps from source (e.g., Cloudflare → SQLite)
# Always preserve created_at (never reset to current time!)
created_at = updates.get('created_at', created_at)
created_at_iso = updates.get('created_at_iso', created_at_iso)
# Use updated_at from updates or current time
updated_at = updates.get('updated_at', now)
updated_at_iso = updates.get('updated_at_iso', now_iso)
else:
# preserve_timestamps=True: only update updated_at to current time
updated_at = now
updated_at_iso = now_iso
# Update the memory
self.conn.execute('''
UPDATE memories SET
tags = ?, memory_type = ?, metadata = ?,
updated_at = ?, updated_at_iso = ?,
created_at = ?, created_at_iso = ?
WHERE content_hash = ?
''', (
new_tags, new_type, json.dumps(new_metadata),
updated_at, updated_at_iso, created_at, created_at_iso, content_hash
))
self.conn.commit()
# Create summary of updated fields
updated_fields = []
if "tags" in updates:
updated_fields.append("tags")
if "memory_type" in updates:
updated_fields.append("memory_type")
if "metadata" in updates:
updated_fields.append("custom_metadata")
for key in updates.keys():
if key not in protected_fields and key not in ["tags", "memory_type", "metadata"]:
updated_fields.append(key)
updated_fields.append("updated_at")
summary = f"Updated fields: {', '.join(updated_fields)}"
logger.info(f"Successfully updated metadata for memory {content_hash}")
return True, summary
except Exception as e:
error_msg = f"Error updating memory metadata: {str(e)}"
logger.error(error_msg)
logger.error(traceback.format_exc())
return False, error_msg
async def update_memories_batch(self, memories: List[Memory]) -> List[bool]:
"""
Update multiple memories in a single database transaction for optimal performance.
This method processes all updates in a single transaction, significantly improving
performance compared to individual update_memory() calls.
Args:
memories: List of Memory objects with updated fields
Returns:
List of success booleans, one for each memory in the batch
"""
if not memories:
return []
try:
if not self.conn:
return [False] * len(memories)
results = [False] * len(memories)
now = time.time()
now_iso = datetime.utcfromtimestamp(now).isoformat() + "Z"
# Start transaction (will be committed at the end)
# SQLite doesn't have explicit BEGIN for Python DB-API, but we can use savepoint
cursor = self.conn.cursor()
for idx, memory in enumerate(memories):
try:
# Get current memory data
cursor.execute('''
SELECT content, tags, memory_type, metadata, created_at, created_at_iso
FROM memories WHERE content_hash = ?
''', (memory.content_hash,))
row = cursor.fetchone()
if not row:
logger.warning(f"Memory {memory.content_hash} not found during batch update")
continue
content, current_tags, current_type, current_metadata_str, created_at, created_at_iso = row
# Parse current metadata
current_metadata = self._safe_json_loads(current_metadata_str, "update_memories_batch")
# Merge metadata (new metadata takes precedence)
if memory.metadata:
merged_metadata = current_metadata.copy()
merged_metadata.update(memory.metadata)
else:
merged_metadata = current_metadata
# Prepare new values
new_tags = ",".join(memory.tags) if memory.tags else current_tags
new_type = memory.memory_type if memory.memory_type else current_type
# Execute update
cursor.execute('''
UPDATE memories SET
tags = ?, memory_type = ?, metadata = ?,
updated_at = ?, updated_at_iso = ?
WHERE content_hash = ?
''', (
new_tags, new_type, json.dumps(merged_metadata),
now, now_iso, memory.content_hash
))
results[idx] = True
except Exception as e:
logger.warning(f"Failed to update memory {memory.content_hash} in batch: {e}")
continue
# Commit all updates in a single transaction
self.conn.commit()
success_count = sum(results)
logger.info(f"Batch update completed: {success_count}/{len(memories)} memories updated successfully")
return results
except Exception as e:
# Rollback on error
if self.conn:
self.conn.rollback()
logger.error(f"Batch update failed: {e}")
logger.error(traceback.format_exc())
return [False] * len(memories)
async def get_stats(self) -> Dict[str, Any]:
"""Get storage statistics."""
try:
if not self.conn:
return {"error": "Database not initialized"}
# Exclude soft-deleted memories from all stats
cursor = self.conn.execute('SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL')
total_memories = cursor.fetchone()[0]
# Count unique individual tags (not tag sets)
cursor = self.conn.execute('SELECT tags FROM memories WHERE tags IS NOT NULL AND tags != "" AND deleted_at IS NULL')
unique_tags = len(set(
tag.strip()
for (tag_string,) in cursor
if tag_string
for tag in tag_string.split(",")
if tag.strip()
))
# Count memories from this week (last 7 days)
import time
week_ago = time.time() - (7 * 24 * 60 * 60)
cursor = self.conn.execute('SELECT COUNT(*) FROM memories WHERE created_at >= ? AND deleted_at IS NULL', (week_ago,))
memories_this_week = cursor.fetchone()[0]
# Get database file size
file_size = os.path.getsize(self.db_path) if os.path.exists(self.db_path) else 0
return {
"backend": "sqlite-vec",
"total_memories": total_memories,
"unique_tags": unique_tags,
"memories_this_week": memories_this_week,
"database_size_bytes": file_size,
"database_size_mb": round(file_size / (1024 * 1024), 2),
"embedding_model": self.embedding_model_name,
"embedding_dimension": self.embedding_dimension
}
except sqlite3.Error as e:
logger.error(f"Database error getting stats: {str(e)}")
return {"error": f"Database error: {str(e)}"}
except OSError as e:
logger.error(f"File system error getting stats: {str(e)}")
return {"error": f"File system error: {str(e)}"}
except Exception as e:
logger.error(f"Unexpected error getting stats: {str(e)}")
return {"error": f"Unexpected error: {str(e)}"}
def sanitized(self, tags):
"""Sanitize and normalize tags to a JSON string.
This method provides compatibility with the storage backend interface.
"""
if tags is None:
return json.dumps([])
# If we get a string, split it into an array
if isinstance(tags, str):
tags = [tag.strip() for tag in tags.split(",") if tag.strip()]
# If we get an array, use it directly
elif isinstance(tags, list):
tags = [str(tag).strip() for tag in tags if str(tag).strip()]
else:
return json.dumps([])
# Return JSON string representation of the array
return json.dumps(tags)
async def recall(self, query: Optional[str] = None, n_results: int = 5, start_timestamp: Optional[float] = None, end_timestamp: Optional[float] = None) -> List[MemoryQueryResult]:
"""
Retrieve memories with combined time filtering and optional semantic search.
Args:
query: Optional semantic search query. If None, only time filtering is applied.
n_results: Maximum number of results to return.
start_timestamp: Optional start time for filtering.
end_timestamp: Optional end time for filtering.
Returns:
List of MemoryQueryResult objects.
"""
try:
if not self.conn:
logger.error("Database not initialized, cannot retrieve memories")
return []
# Build time filtering WHERE clause
time_conditions = []
params = []
if start_timestamp is not None:
time_conditions.append("created_at >= ?")
params.append(float(start_timestamp))
if end_timestamp is not None:
time_conditions.append("created_at <= ?")
params.append(float(end_timestamp))
time_where = " AND ".join(time_conditions) if time_conditions else ""
logger.info(f"Time filtering conditions: {time_where}, params: {params}")
# Determine whether to use semantic search or just time-based filtering
if query and self.embedding_model:
# Combined semantic search with time filtering
try:
# Generate query embedding
query_embedding = self._generate_embedding(query)
# Build SQL query with time filtering
base_query = '''
SELECT m.content_hash, m.content, m.tags, m.memory_type, m.metadata,
m.created_at, m.updated_at, m.created_at_iso, m.updated_at_iso,
e.distance
FROM memories m
JOIN (
SELECT rowid, distance
FROM memory_embeddings
WHERE content_embedding MATCH ? AND k = ?
) e ON m.id = e.rowid
'''
if time_where:
base_query += f" WHERE {time_where}"
base_query += " ORDER BY e.distance"
# Prepare parameters: embedding, limit, then time filter params
query_params = [serialize_float32(query_embedding), n_results] + params
cursor = self.conn.execute(base_query, query_params)
results = []
for row in cursor.fetchall():
try:
# Parse row data
content_hash, content, tags_str, memory_type, metadata_str = row[:5]
created_at, updated_at, created_at_iso, updated_at_iso, distance = row[5:]
# Parse tags and metadata
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
metadata = self._safe_json_loads(metadata_str, "memory_metadata")
# Create Memory object
memory = Memory(
content=content,
content_hash=content_hash,
tags=tags,
memory_type=memory_type,
metadata=metadata,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
# Calculate relevance score (lower distance = higher relevance)
relevance_score = max(0.0, 1.0 - distance)
results.append(MemoryQueryResult(
memory=memory,
relevance_score=relevance_score,
debug_info={"distance": distance, "backend": "sqlite-vec", "time_filtered": bool(time_where)}
))
except Exception as parse_error:
logger.warning(f"Failed to parse memory result: {parse_error}")
continue
logger.info(f"Retrieved {len(results)} memories for semantic query with time filter")
return results
except Exception as query_error:
logger.error(f"Error in semantic search with time filter: {str(query_error)}")
# Fall back to time-based retrieval on error
logger.info("Falling back to time-based retrieval")
# Time-based filtering only (or fallback from failed semantic search)
base_query = '''
SELECT content_hash, content, tags, memory_type, metadata,
created_at, updated_at, created_at_iso, updated_at_iso
FROM memories
'''
if time_where:
base_query += f" WHERE {time_where}"
base_query += " ORDER BY created_at DESC LIMIT ?"
# Add limit parameter
params.append(n_results)
cursor = self.conn.execute(base_query, params)
results = []
for row in cursor.fetchall():
try:
content_hash, content, tags_str, memory_type, metadata_str = row[:5]
created_at, updated_at, created_at_iso, updated_at_iso = row[5:]
# Parse tags and metadata
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
metadata = self._safe_json_loads(metadata_str, "memory_metadata")
memory = Memory(
content=content,
content_hash=content_hash,
tags=tags,
memory_type=memory_type,
metadata=metadata,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
# For time-based retrieval, we don't have a relevance score
results.append(MemoryQueryResult(
memory=memory,
relevance_score=None,
debug_info={"backend": "sqlite-vec", "time_filtered": bool(time_where), "query_type": "time_based"}
))
except Exception as parse_error:
logger.warning(f"Failed to parse memory result: {parse_error}")
continue
logger.info(f"Retrieved {len(results)} memories for time-based query")
return results
except Exception as e:
logger.error(f"Error in recall: {str(e)}")
logger.error(traceback.format_exc())
return []
async def get_all_memories(self) -> List[Memory]:
"""
Get all memories from the database.
Returns:
List of all Memory objects in the database.
"""
try:
if not self.conn:
logger.error("Database not initialized, cannot retrieve memories")
return []
cursor = self.conn.execute('''
SELECT m.content_hash, m.content, m.tags, m.memory_type, m.metadata,
m.created_at, m.updated_at, m.created_at_iso, m.updated_at_iso,
e.content_embedding
FROM memories m
LEFT JOIN memory_embeddings e ON m.id = e.rowid
WHERE m.deleted_at IS NULL
ORDER BY m.created_at DESC
''')
results = []
for row in cursor.fetchall():
try:
content_hash, content, tags_str, memory_type, metadata_str = row[:5]
created_at, updated_at, created_at_iso, updated_at_iso = row[5:9]
embedding_blob = row[9] if len(row) > 9 else None
# Parse tags and metadata
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
metadata = self._safe_json_loads(metadata_str, "memory_metadata")
# Deserialize embedding if present
embedding = None
if embedding_blob:
embedding = deserialize_embedding(embedding_blob)
memory = Memory(
content=content,
content_hash=content_hash,
tags=tags,
memory_type=memory_type,
metadata=metadata,
embedding=embedding,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
results.append(memory)
except Exception as parse_error:
logger.warning(f"Failed to parse memory result: {parse_error}")
continue
logger.info(f"Retrieved {len(results)} total memories")
return results
except Exception as e:
logger.error(f"Error getting all memories: {str(e)}")
return []
async def get_memories_by_time_range(self, start_time: float, end_time: float) -> List[Memory]:
"""Get memories within a specific time range."""
try:
await self.initialize()
cursor = self.conn.execute('''
SELECT content_hash, content, tags, memory_type, metadata,
created_at, updated_at, created_at_iso, updated_at_iso
FROM memories
WHERE created_at BETWEEN ? AND ?
ORDER BY created_at DESC
''', (start_time, end_time))
results = []
for row in cursor.fetchall():
try:
content_hash, content, tags_str, memory_type, metadata_str = row[:5]
created_at, updated_at, created_at_iso, updated_at_iso = row[5:]
# Parse tags and metadata
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
metadata = self._safe_json_loads(metadata_str, "memory_metadata")
memory = Memory(
content=content,
content_hash=content_hash,
tags=tags,
memory_type=memory_type,
metadata=metadata,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
results.append(memory)
except Exception as parse_error:
logger.warning(f"Failed to parse memory result: {parse_error}")
continue
logger.info(f"Retrieved {len(results)} memories in time range {start_time}-{end_time}")
return results
except Exception as e:
logger.error(f"Error getting memories by time range: {str(e)}")
return []
async def get_memory_connections(self) -> Dict[str, int]:
"""Get memory connection statistics."""
try:
await self.initialize()
# For now, return basic statistics based on tags and content similarity
cursor = self.conn.execute('''
SELECT tags, COUNT(*) as count
FROM memories
WHERE tags IS NOT NULL AND tags != ''
GROUP BY tags
''')
connections = {}
for row in cursor.fetchall():
tags_str, count = row
if tags_str:
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
for tag in tags:
connections[f"tag:{tag}"] = connections.get(f"tag:{tag}", 0) + count
return connections
except Exception as e:
logger.error(f"Error getting memory connections: {str(e)}")
return {}
async def get_access_patterns(self) -> Dict[str, datetime]:
"""Get memory access pattern statistics."""
try:
await self.initialize()
# Return recent access patterns based on updated_at timestamps
cursor = self.conn.execute('''
SELECT content_hash, updated_at_iso
FROM memories
WHERE updated_at_iso IS NOT NULL
ORDER BY updated_at DESC
LIMIT 100
''')
patterns = {}
for row in cursor.fetchall():
content_hash, updated_at_iso = row
try:
patterns[content_hash] = datetime.fromisoformat(updated_at_iso.replace('Z', '+00:00'))
except Exception:
# Fallback for timestamp parsing issues
patterns[content_hash] = datetime.now()
return patterns
except Exception as e:
logger.error(f"Error getting access patterns: {str(e)}")
return {}
def _row_to_memory(self, row) -> Optional[Memory]:
"""Convert database row to Memory object."""
try:
# Handle both 9-column (without embedding) and 10-column (with embedding) rows
content_hash, content, tags_str, memory_type, metadata_str, created_at, updated_at, created_at_iso, updated_at_iso = row[:9]
embedding_blob = row[9] if len(row) > 9 else None
# Parse tags (comma-separated format)
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
# Parse metadata
metadata = self._safe_json_loads(metadata_str, "get_by_hash")
# Deserialize embedding if present
embedding = None
if embedding_blob:
embedding = deserialize_embedding(embedding_blob)
return Memory(
content=content,
content_hash=content_hash,
tags=tags,
memory_type=memory_type,
metadata=metadata,
embedding=embedding,
created_at=created_at,
updated_at=updated_at,
created_at_iso=created_at_iso,
updated_at_iso=updated_at_iso
)
except Exception as e:
logger.error(f"Error converting row to memory: {str(e)}")
return None
async def get_all_memories(self, limit: int = None, offset: int = 0, memory_type: Optional[str] = None, tags: Optional[List[str]] = None) -> List[Memory]:
"""
Get all memories in storage ordered by creation time (newest first).
Args:
limit: Maximum number of memories to return (None for all)
offset: Number of memories to skip (for pagination)
memory_type: Optional filter by memory type
tags: Optional filter by tags (matches ANY of the provided tags)
Returns:
List of Memory objects ordered by created_at DESC, optionally filtered by type and tags
"""
try:
await self.initialize()
# Build query with optional memory_type and tags filters
query = '''
SELECT m.content_hash, m.content, m.tags, m.memory_type, m.metadata,
m.created_at, m.updated_at, m.created_at_iso, m.updated_at_iso,
e.content_embedding
FROM memories m
LEFT JOIN memory_embeddings e ON m.id = e.rowid
'''
params = []
where_conditions = []
# Always exclude soft-deleted memories
where_conditions.append('m.deleted_at IS NULL')
# Add memory_type filter if specified
if memory_type is not None:
where_conditions.append('m.memory_type = ?')
params.append(memory_type)
# Add tags filter if specified (using database-level filtering like search_by_tag_chronological)
if tags and len(tags) > 0:
tag_conditions = " OR ".join(["m.tags LIKE ?" for _ in tags])
where_conditions.append(f"({tag_conditions})")
params.extend([f"%{tag}%" for tag in tags])
# Apply WHERE clause
query += ' WHERE ' + ' AND '.join(where_conditions)
query += ' ORDER BY m.created_at DESC'
if limit is not None:
query += ' LIMIT ?'
params.append(limit)
if offset > 0:
query += ' OFFSET ?'
params.append(offset)
cursor = self.conn.execute(query, params)
memories = []
for row in cursor.fetchall():
memory = self._row_to_memory(row)
if memory:
memories.append(memory)
return memories
except Exception as e:
logger.error(f"Error getting all memories: {str(e)}")
return []
async def get_recent_memories(self, n: int = 10) -> List[Memory]:
"""
Get n most recent memories.
Args:
n: Number of recent memories to return
Returns:
List of the n most recent Memory objects
"""
return await self.get_all_memories(limit=n, offset=0)
async def get_largest_memories(self, n: int = 10) -> List[Memory]:
"""
Get n largest memories by content length.
Args:
n: Number of largest memories to return
Returns:
List of the n largest Memory objects ordered by content length descending
"""
try:
await self.initialize()
# Query for largest memories by content length
query = """
SELECT content_hash, content, tags, memory_type, metadata, created_at, updated_at
FROM memories
ORDER BY LENGTH(content) DESC
LIMIT ?
"""
cursor = self.conn.execute(query, (n,))
rows = cursor.fetchall()
memories = []
for row in rows:
try:
memory = Memory(
content_hash=row[0],
content=row[1],
tags=json.loads(row[2]) if row[2] else [],
memory_type=row[3],
metadata=json.loads(row[4]) if row[4] else {},
created_at=row[5],
updated_at=row[6]
)
memories.append(memory)
except Exception as parse_error:
logger.warning(f"Failed to parse memory {row[0]}: {parse_error}")
continue
return memories
except Exception as e:
logger.error(f"Error getting largest memories: {e}")
return []
async def get_memory_timestamps(self, days: Optional[int] = None) -> List[float]:
"""
Get memory creation timestamps only, without loading full memory objects.
This is an optimized method for analytics that only needs timestamps,
avoiding the overhead of loading full memory content and embeddings.
Args:
days: Optional filter to only get memories from last N days
Returns:
List of Unix timestamps (float) in descending order (newest first)
"""
try:
await self.initialize()
if days is not None:
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
cutoff_timestamp = cutoff.timestamp()
query = """
SELECT created_at
FROM memories
WHERE created_at >= ?
ORDER BY created_at DESC
"""
cursor = self.conn.execute(query, (cutoff_timestamp,))
else:
query = """
SELECT created_at
FROM memories
ORDER BY created_at DESC
"""
cursor = self.conn.execute(query)
rows = cursor.fetchall()
timestamps = [row[0] for row in rows if row[0] is not None]
return timestamps
except Exception as e:
logger.error(f"Error getting memory timestamps: {e}")
return []
async def count_all_memories(self, memory_type: Optional[str] = None, tags: Optional[List[str]] = None) -> int:
"""
Get total count of memories in storage.
Args:
memory_type: Optional filter by memory type
tags: Optional filter by tags (memories matching ANY of the tags)
Returns:
Total number of memories, optionally filtered by type and/or tags
"""
try:
await self.initialize()
# Build query with filters
conditions = []
params = []
if memory_type is not None:
conditions.append('memory_type = ?')
params.append(memory_type)
if tags:
# Filter by tags - match ANY tag (OR logic)
tag_conditions = ' OR '.join(['tags LIKE ?' for _ in tags])
conditions.append(f'({tag_conditions})')
# Add each tag with wildcards for LIKE matching
for tag in tags:
params.append(f'%{tag}%')
# Build final query (always exclude soft-deleted)
conditions.append('deleted_at IS NULL')
query = 'SELECT COUNT(*) FROM memories WHERE ' + ' AND '.join(conditions)
cursor = self.conn.execute(query, tuple(params))
result = cursor.fetchone()
return result[0] if result else 0
except Exception as e:
logger.error(f"Error counting memories: {str(e)}")
return 0
async def get_all_tags_with_counts(self) -> List[Dict[str, Any]]:
"""
Get all tags with their usage counts.
Returns:
List of dictionaries with 'tag' and 'count' keys, sorted by count descending
"""
try:
await self.initialize()
# No explicit transaction needed - SQLite in WAL mode handles this automatically
# Get all tags from the database (exclude soft-deleted)
cursor = self.conn.execute('''
SELECT tags
FROM memories
WHERE tags IS NOT NULL AND tags != '' AND deleted_at IS NULL
''')
# Fetch all rows first to avoid holding cursor during processing
rows = cursor.fetchall()
# Yield control to event loop before processing
await asyncio.sleep(0)
# Use Counter with generator expression for memory efficiency
tag_counter = Counter(
tag.strip()
for (tag_string,) in rows
if tag_string
for tag in tag_string.split(",")
if tag.strip()
)
# Return as list of dicts sorted by count descending
return [{"tag": tag, "count": count} for tag, count in tag_counter.most_common()]
except sqlite3.Error as e:
logger.error(f"Database error getting tags with counts: {str(e)}")
return []
except Exception as e:
logger.error(f"Unexpected error getting tags with counts: {str(e)}")
raise
def close(self):
"""Close the database connection."""
if self.conn:
self.conn.close()
self.conn = None
logger.info("SQLite-vec storage connection closed")
```