doobidoo/mcp-memory-service # codebase.md

This is page 42 of 47. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── agents
│   │   ├── amp-bridge.md
│   │   ├── amp-pr-automator.md
│   │   ├── code-quality-guard.md
│   │   ├── gemini-pr-automator.md
│   │   └── github-release-manager.md
│   ├── settings.local.json.backup
│   └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   ├── feature_request.yml
│   │   └── performance_issue.yml
│   ├── pull_request_template.md
│   └── workflows
│       ├── bridge-tests.yml
│       ├── CACHE_FIX.md
│       ├── claude-code-review.yml
│       ├── claude.yml
│       ├── cleanup-images.yml.disabled
│       ├── dev-setup-validation.yml
│       ├── docker-publish.yml
│       ├── LATEST_FIXES.md
│       ├── main-optimized.yml.disabled
│       ├── main.yml
│       ├── publish-and-test.yml
│       ├── README_OPTIMIZATION.md
│       ├── release-tag.yml.disabled
│       ├── release.yml
│       ├── roadmap-review-reminder.yml
│       ├── SECRET_CONDITIONAL_FIX.md
│       └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│   ├── .gitignore
│   └── reports
│       └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│   ├── deployment
│   │   ├── deploy_fastmcp_fixed.sh
│   │   ├── deploy_http_with_mcp.sh
│   │   └── deploy_mcp_v4.sh
│   ├── deployment-configs
│   │   ├── empty_config.yml
│   │   └── smithery.yaml
│   ├── development
│   │   └── test_fastmcp.py
│   ├── docs-removed-2025-08-23
│   │   ├── authentication.md
│   │   ├── claude_integration.md
│   │   ├── claude-code-compatibility.md
│   │   ├── claude-code-integration.md
│   │   ├── claude-code-quickstart.md
│   │   ├── claude-desktop-setup.md
│   │   ├── complete-setup-guide.md
│   │   ├── database-synchronization.md
│   │   ├── development
│   │   │   ├── autonomous-memory-consolidation.md
│   │   │   ├── CLEANUP_PLAN.md
│   │   │   ├── CLEANUP_README.md
│   │   │   ├── CLEANUP_SUMMARY.md
│   │   │   ├── dream-inspired-memory-consolidation.md
│   │   │   ├── hybrid-slm-memory-consolidation.md
│   │   │   ├── mcp-milestone.md
│   │   │   ├── multi-client-architecture.md
│   │   │   ├── test-results.md
│   │   │   └── TIMESTAMP_FIX_SUMMARY.md
│   │   ├── distributed-sync.md
│   │   ├── invocation_guide.md
│   │   ├── macos-intel.md
│   │   ├── master-guide.md
│   │   ├── mcp-client-configuration.md
│   │   ├── multi-client-server.md
│   │   ├── service-installation.md
│   │   ├── sessions
│   │   │   └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│   │   ├── UBUNTU_SETUP.md
│   │   ├── ubuntu.md
│   │   ├── windows-setup.md
│   │   └── windows.md
│   ├── docs-root-cleanup-2025-08-23
│   │   ├── AWESOME_LIST_SUBMISSION.md
│   │   ├── CLOUDFLARE_IMPLEMENTATION.md
│   │   ├── DOCUMENTATION_ANALYSIS.md
│   │   ├── DOCUMENTATION_CLEANUP_PLAN.md
│   │   ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│   │   ├── LITESTREAM_SETUP_GUIDE.md
│   │   ├── lm_studio_system_prompt.md
│   │   ├── PYTORCH_DOWNLOAD_FIX.md
│   │   └── README-ORIGINAL-BACKUP.md
│   ├── investigations
│   │   └── MACOS_HOOKS_INVESTIGATION.md
│   ├── litestream-configs-v6.3.0
│   │   ├── install_service.sh
│   │   ├── litestream_master_config_fixed.yml
│   │   ├── litestream_master_config.yml
│   │   ├── litestream_replica_config_fixed.yml
│   │   ├── litestream_replica_config.yml
│   │   ├── litestream_replica_simple.yml
│   │   ├── litestream-http.service
│   │   ├── litestream.service
│   │   └── requirements-cloudflare.txt
│   ├── release-notes
│   │   └── release-notes-v7.1.4.md
│   └── setup-development
│       ├── README.md
│       ├── setup_consolidation_mdns.sh
│       ├── STARTUP_SETUP_GUIDE.md
│       └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│   ├── memory-context.md
│   ├── memory-health.md
│   ├── memory-ingest-dir.md
│   ├── memory-ingest.md
│   ├── memory-recall.md
│   ├── memory-search.md
│   ├── memory-store.md
│   ├── README.md
│   └── session-start.md
├── claude-hooks
│   ├── config.json
│   ├── config.template.json
│   ├── CONFIGURATION.md
│   ├── core
│   │   ├── memory-retrieval.js
│   │   ├── mid-conversation.js
│   │   ├── session-end.js
│   │   ├── session-start.js
│   │   └── topic-change.js
│   ├── debug-pattern-test.js
│   ├── install_claude_hooks_windows.ps1
│   ├── install_hooks.py
│   ├── memory-mode-controller.js
│   ├── MIGRATION.md
│   ├── README-NATURAL-TRIGGERS.md
│   ├── README-phase2.md
│   ├── README.md
│   ├── simple-test.js
│   ├── statusline.sh
│   ├── test-adaptive-weights.js
│   ├── test-dual-protocol-hook.js
│   ├── test-mcp-hook.js
│   ├── test-natural-triggers.js
│   ├── test-recency-scoring.js
│   ├── tests
│   │   ├── integration-test.js
│   │   ├── phase2-integration-test.js
│   │   ├── test-code-execution.js
│   │   ├── test-cross-session.json
│   │   ├── test-session-tracking.json
│   │   └── test-threading.json
│   ├── utilities
│   │   ├── adaptive-pattern-detector.js
│   │   ├── context-formatter.js
│   │   ├── context-shift-detector.js
│   │   ├── conversation-analyzer.js
│   │   ├── dynamic-context-updater.js
│   │   ├── git-analyzer.js
│   │   ├── mcp-client.js
│   │   ├── memory-client.js
│   │   ├── memory-scorer.js
│   │   ├── performance-manager.js
│   │   ├── project-detector.js
│   │   ├── session-tracker.js
│   │   ├── tiered-conversation-monitor.js
│   │   └── version-checker.js
│   └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│   ├── amp-cli-bridge.md
│   ├── api
│   │   ├── code-execution-interface.md
│   │   ├── memory-metadata-api.md
│   │   ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_REPORT.md
│   │   └── tag-standardization.md
│   ├── architecture
│   │   ├── search-enhancement-spec.md
│   │   └── search-examples.md
│   ├── architecture.md
│   ├── archive
│   │   └── obsolete-workflows
│   │       ├── load_memory_context.md
│   │       └── README.md
│   ├── assets
│   │   └── images
│   │       ├── dashboard-v3.3.0-preview.png
│   │       ├── memory-awareness-hooks-example.png
│   │       ├── project-infographic.svg
│   │       └── README.md
│   ├── CLAUDE_CODE_QUICK_REFERENCE.md
│   ├── cloudflare-setup.md
│   ├── deployment
│   │   ├── docker.md
│   │   ├── dual-service.md
│   │   ├── production-guide.md
│   │   └── systemd-service.md
│   ├── development
│   │   ├── ai-agent-instructions.md
│   │   ├── code-quality
│   │   │   ├── phase-2a-completion.md
│   │   │   ├── phase-2a-handle-get-prompt.md
│   │   │   ├── phase-2a-index.md
│   │   │   ├── phase-2a-install-package.md
│   │   │   └── phase-2b-session-summary.md
│   │   ├── code-quality-workflow.md
│   │   ├── dashboard-workflow.md
│   │   ├── issue-management.md
│   │   ├── pr-review-guide.md
│   │   ├── refactoring-notes.md
│   │   ├── release-checklist.md
│   │   └── todo-tracker.md
│   ├── docker-optimized-build.md
│   ├── document-ingestion.md
│   ├── DOCUMENTATION_AUDIT.md
│   ├── enhancement-roadmap-issue-14.md
│   ├── examples
│   │   ├── analysis-scripts.js
│   │   ├── maintenance-session-example.md
│   │   ├── memory-distribution-chart.jsx
│   │   └── tag-schema.json
│   ├── first-time-setup.md
│   ├── glama-deployment.md
│   ├── guides
│   │   ├── advanced-command-examples.md
│   │   ├── chromadb-migration.md
│   │   ├── commands-vs-mcp-server.md
│   │   ├── mcp-enhancements.md
│   │   ├── mdns-service-discovery.md
│   │   ├── memory-consolidation-guide.md
│   │   ├── migration.md
│   │   ├── scripts.md
│   │   └── STORAGE_BACKENDS.md
│   ├── HOOK_IMPROVEMENTS.md
│   ├── hooks
│   │   └── phase2-code-execution-migration.md
│   ├── http-server-management.md
│   ├── ide-compatability.md
│   ├── IMAGE_RETENTION_POLICY.md
│   ├── images
│   │   └── dashboard-placeholder.md
│   ├── implementation
│   │   ├── health_checks.md
│   │   └── performance.md
│   ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│   ├── integration
│   │   ├── homebrew.md
│   │   └── multi-client.md
│   ├── integrations
│   │   ├── gemini.md
│   │   ├── groq-bridge.md
│   │   ├── groq-integration-summary.md
│   │   └── groq-model-comparison.md
│   ├── integrations.md
│   ├── legacy
│   │   └── dual-protocol-hooks.md
│   ├── LM_STUDIO_COMPATIBILITY.md
│   ├── maintenance
│   │   └── memory-maintenance.md
│   ├── mastery
│   │   ├── api-reference.md
│   │   ├── architecture-overview.md
│   │   ├── configuration-guide.md
│   │   ├── local-setup-and-run.md
│   │   ├── testing-guide.md
│   │   └── troubleshooting.md
│   ├── migration
│   │   └── code-execution-api-quick-start.md
│   ├── natural-memory-triggers
│   │   ├── cli-reference.md
│   │   ├── installation-guide.md
│   │   └── performance-optimization.md
│   ├── oauth-setup.md
│   ├── pr-graphql-integration.md
│   ├── quick-setup-cloudflare-dual-environment.md
│   ├── README.md
│   ├── remote-configuration-wiki-section.md
│   ├── research
│   │   ├── code-execution-interface-implementation.md
│   │   └── code-execution-interface-summary.md
│   ├── ROADMAP.md
│   ├── sqlite-vec-backend.md
│   ├── statistics
│   │   ├── charts
│   │   │   ├── activity_patterns.png
│   │   │   ├── contributors.png
│   │   │   ├── growth_trajectory.png
│   │   │   ├── monthly_activity.png
│   │   │   └── october_sprint.png
│   │   ├── data
│   │   │   ├── activity_by_day.csv
│   │   │   ├── activity_by_hour.csv
│   │   │   ├── contributors.csv
│   │   │   └── monthly_activity.csv
│   │   ├── generate_charts.py
│   │   └── REPOSITORY_STATISTICS.md
│   ├── technical
│   │   ├── development.md
│   │   ├── memory-migration.md
│   │   ├── migration-log.md
│   │   ├── sqlite-vec-embedding-fixes.md
│   │   └── tag-storage.md
│   ├── testing
│   │   └── regression-tests.md
│   ├── testing-cloudflare-backend.md
│   ├── troubleshooting
│   │   ├── cloudflare-api-token-setup.md
│   │   ├── cloudflare-authentication.md
│   │   ├── general.md
│   │   ├── hooks-quick-reference.md
│   │   ├── pr162-schema-caching-issue.md
│   │   ├── session-end-hooks.md
│   │   └── sync-issues.md
│   └── tutorials
│       ├── advanced-techniques.md
│       ├── data-analysis.md
│       └── demo-session-walkthrough.md
├── examples
│   ├── claude_desktop_config_template.json
│   ├── claude_desktop_config_windows.json
│   ├── claude-desktop-http-config.json
│   ├── config
│   │   └── claude_desktop_config.json
│   ├── http-mcp-bridge.js
│   ├── memory_export_template.json
│   ├── README.md
│   ├── setup
│   │   └── setup_multi_client_complete.py
│   └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│   ├── .claude
│   │   └── settings.local.json
│   ├── archive
│   │   └── check_missing_timestamps.py
│   ├── backup
│   │   ├── backup_memories.py
│   │   ├── backup_sqlite_vec.sh
│   │   ├── export_distributable_memories.sh
│   │   └── restore_memories.py
│   ├── benchmarks
│   │   ├── benchmark_code_execution_api.py
│   │   ├── benchmark_hybrid_sync.py
│   │   └── benchmark_server_caching.py
│   ├── database
│   │   ├── analyze_sqlite_vec_db.py
│   │   ├── check_sqlite_vec_status.py
│   │   ├── db_health_check.py
│   │   └── simple_timestamp_check.py
│   ├── development
│   │   ├── debug_server_initialization.py
│   │   ├── find_orphaned_files.py
│   │   ├── fix_mdns.sh
│   │   ├── fix_sitecustomize.py
│   │   ├── remote_ingest.sh
│   │   ├── setup-git-merge-drivers.sh
│   │   ├── uv-lock-merge.sh
│   │   └── verify_hybrid_sync.py
│   ├── hooks
│   │   └── pre-commit
│   ├── installation
│   │   ├── install_linux_service.py
│   │   ├── install_macos_service.py
│   │   ├── install_uv.py
│   │   ├── install_windows_service.py
│   │   ├── install.py
│   │   ├── setup_backup_cron.sh
│   │   ├── setup_claude_mcp.sh
│   │   └── setup_cloudflare_resources.py
│   ├── linux
│   │   ├── service_status.sh
│   │   ├── start_service.sh
│   │   ├── stop_service.sh
│   │   ├── uninstall_service.sh
│   │   └── view_logs.sh
│   ├── maintenance
│   │   ├── assign_memory_types.py
│   │   ├── check_memory_types.py
│   │   ├── cleanup_corrupted_encoding.py
│   │   ├── cleanup_memories.py
│   │   ├── cleanup_organize.py
│   │   ├── consolidate_memory_types.py
│   │   ├── consolidation_mappings.json
│   │   ├── delete_orphaned_vectors_fixed.py
│   │   ├── fast_cleanup_duplicates_with_tracking.sh
│   │   ├── find_all_duplicates.py
│   │   ├── find_cloudflare_duplicates.py
│   │   ├── find_duplicates.py
│   │   ├── memory-types.md
│   │   ├── README.md
│   │   ├── recover_timestamps_from_cloudflare.py
│   │   ├── regenerate_embeddings.py
│   │   ├── repair_malformed_tags.py
│   │   ├── repair_memories.py
│   │   ├── repair_sqlite_vec_embeddings.py
│   │   ├── repair_zero_embeddings.py
│   │   ├── restore_from_json_export.py
│   │   └── scan_todos.sh
│   ├── migration
│   │   ├── cleanup_mcp_timestamps.py
│   │   ├── legacy
│   │   │   └── migrate_chroma_to_sqlite.py
│   │   ├── mcp-migration.py
│   │   ├── migrate_sqlite_vec_embeddings.py
│   │   ├── migrate_storage.py
│   │   ├── migrate_tags.py
│   │   ├── migrate_timestamps.py
│   │   ├── migrate_to_cloudflare.py
│   │   ├── migrate_to_sqlite_vec.py
│   │   ├── migrate_v5_enhanced.py
│   │   ├── TIMESTAMP_CLEANUP_README.md
│   │   └── verify_mcp_timestamps.py
│   ├── pr
│   │   ├── amp_collect_results.sh
│   │   ├── amp_detect_breaking_changes.sh
│   │   ├── amp_generate_tests.sh
│   │   ├── amp_pr_review.sh
│   │   ├── amp_quality_gate.sh
│   │   ├── amp_suggest_fixes.sh
│   │   ├── auto_review.sh
│   │   ├── detect_breaking_changes.sh
│   │   ├── generate_tests.sh
│   │   ├── lib
│   │   │   └── graphql_helpers.sh
│   │   ├── quality_gate.sh
│   │   ├── resolve_threads.sh
│   │   ├── run_pyscn_analysis.sh
│   │   ├── run_quality_checks.sh
│   │   ├── thread_status.sh
│   │   └── watch_reviews.sh
│   ├── quality
│   │   ├── fix_dead_code_install.sh
│   │   ├── phase1_dead_code_analysis.md
│   │   ├── phase2_complexity_analysis.md
│   │   ├── README_PHASE1.md
│   │   ├── README_PHASE2.md
│   │   ├── track_pyscn_metrics.sh
│   │   └── weekly_quality_review.sh
│   ├── README.md
│   ├── run
│   │   ├── run_mcp_memory.sh
│   │   ├── run-with-uv.sh
│   │   └── start_sqlite_vec.sh
│   ├── run_memory_server.py
│   ├── server
│   │   ├── check_http_server.py
│   │   ├── check_server_health.py
│   │   ├── memory_offline.py
│   │   ├── preload_models.py
│   │   ├── run_http_server.py
│   │   ├── run_memory_server.py
│   │   ├── start_http_server.bat
│   │   └── start_http_server.sh
│   ├── service
│   │   ├── deploy_dual_services.sh
│   │   ├── install_http_service.sh
│   │   ├── mcp-memory-http.service
│   │   ├── mcp-memory.service
│   │   ├── memory_service_manager.sh
│   │   ├── service_control.sh
│   │   ├── service_utils.py
│   │   └── update_service.sh
│   ├── sync
│   │   ├── check_drift.py
│   │   ├── claude_sync_commands.py
│   │   ├── export_memories.py
│   │   ├── import_memories.py
│   │   ├── litestream
│   │   │   ├── apply_local_changes.sh
│   │   │   ├── enhanced_memory_store.sh
│   │   │   ├── init_staging_db.sh
│   │   │   ├── io.litestream.replication.plist
│   │   │   ├── manual_sync.sh
│   │   │   ├── memory_sync.sh
│   │   │   ├── pull_remote_changes.sh
│   │   │   ├── push_to_remote.sh
│   │   │   ├── README.md
│   │   │   ├── resolve_conflicts.sh
│   │   │   ├── setup_local_litestream.sh
│   │   │   ├── setup_remote_litestream.sh
│   │   │   ├── staging_db_init.sql
│   │   │   ├── stash_local_changes.sh
│   │   │   ├── sync_from_remote_noconfig.sh
│   │   │   └── sync_from_remote.sh
│   │   ├── README.md
│   │   ├── safe_cloudflare_update.sh
│   │   ├── sync_memory_backends.py
│   │   └── sync_now.py
│   ├── testing
│   │   ├── run_complete_test.py
│   │   ├── run_memory_test.sh
│   │   ├── simple_test.py
│   │   ├── test_cleanup_logic.py
│   │   ├── test_cloudflare_backend.py
│   │   ├── test_docker_functionality.py
│   │   ├── test_installation.py
│   │   ├── test_mdns.py
│   │   ├── test_memory_api.py
│   │   ├── test_memory_simple.py
│   │   ├── test_migration.py
│   │   ├── test_search_api.py
│   │   ├── test_sqlite_vec_embeddings.py
│   │   ├── test_sse_events.py
│   │   ├── test-connection.py
│   │   └── test-hook.js
│   ├── utils
│   │   ├── claude_commands_utils.py
│   │   ├── generate_personalized_claude_md.sh
│   │   ├── groq
│   │   ├── groq_agent_bridge.py
│   │   ├── list-collections.py
│   │   ├── memory_wrapper_uv.py
│   │   ├── query_memories.py
│   │   ├── smithery_wrapper.py
│   │   ├── test_groq_bridge.sh
│   │   └── uv_wrapper.py
│   └── validation
│       ├── check_dev_setup.py
│       ├── check_documentation_links.py
│       ├── diagnose_backend_config.py
│       ├── validate_configuration_complete.py
│       ├── validate_memories.py
│       ├── validate_migration.py
│       ├── validate_timestamp_integrity.py
│       ├── verify_environment.py
│       ├── verify_pytorch_windows.py
│       └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│   └── mcp_memory_service
│       ├── __init__.py
│       ├── api
│       │   ├── __init__.py
│       │   ├── client.py
│       │   ├── operations.py
│       │   ├── sync_wrapper.py
│       │   └── types.py
│       ├── backup
│       │   ├── __init__.py
│       │   └── scheduler.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── ingestion.py
│       │   ├── main.py
│       │   └── utils.py
│       ├── config.py
│       ├── consolidation
│       │   ├── __init__.py
│       │   ├── associations.py
│       │   ├── base.py
│       │   ├── clustering.py
│       │   ├── compression.py
│       │   ├── consolidator.py
│       │   ├── decay.py
│       │   ├── forgetting.py
│       │   ├── health.py
│       │   └── scheduler.py
│       ├── dependency_check.py
│       ├── discovery
│       │   ├── __init__.py
│       │   ├── client.py
│       │   └── mdns_service.py
│       ├── embeddings
│       │   ├── __init__.py
│       │   └── onnx_embeddings.py
│       ├── ingestion
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── chunker.py
│       │   ├── csv_loader.py
│       │   ├── json_loader.py
│       │   ├── pdf_loader.py
│       │   ├── registry.py
│       │   ├── semtools_loader.py
│       │   └── text_loader.py
│       ├── lm_studio_compat.py
│       ├── mcp_server.py
│       ├── models
│       │   ├── __init__.py
│       │   └── memory.py
│       ├── server.py
│       ├── services
│       │   ├── __init__.py
│       │   └── memory_service.py
│       ├── storage
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── cloudflare.py
│       │   ├── factory.py
│       │   ├── http_client.py
│       │   ├── hybrid.py
│       │   └── sqlite_vec.py
│       ├── sync
│       │   ├── __init__.py
│       │   ├── exporter.py
│       │   ├── importer.py
│       │   └── litestream_config.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cache_manager.py
│       │   ├── content_splitter.py
│       │   ├── db_utils.py
│       │   ├── debug.py
│       │   ├── document_processing.py
│       │   ├── gpu_detection.py
│       │   ├── hashing.py
│       │   ├── http_server_manager.py
│       │   ├── port_detection.py
│       │   ├── system_detection.py
│       │   └── time_parser.py
│       └── web
│           ├── __init__.py
│           ├── api
│           │   ├── __init__.py
│           │   ├── analytics.py
│           │   ├── backup.py
│           │   ├── consolidation.py
│           │   ├── documents.py
│           │   ├── events.py
│           │   ├── health.py
│           │   ├── manage.py
│           │   ├── mcp.py
│           │   ├── memories.py
│           │   ├── search.py
│           │   └── sync.py
│           ├── app.py
│           ├── dependencies.py
│           ├── oauth
│           │   ├── __init__.py
│           │   ├── authorization.py
│           │   ├── discovery.py
│           │   ├── middleware.py
│           │   ├── models.py
│           │   ├── registration.py
│           │   └── storage.py
│           ├── sse.py
│           └── static
│               ├── app.js
│               ├── index.html
│               ├── README.md
│               ├── sse_test.html
│               └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│   ├── __init__.py
│   ├── api
│   │   ├── __init__.py
│   │   ├── test_compact_types.py
│   │   └── test_operations.py
│   ├── bridge
│   │   ├── mock_responses.js
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   └── test_http_mcp_bridge.js
│   ├── conftest.py
│   ├── consolidation
│   │   ├── __init__.py
│   │   ├── conftest.py
│   │   ├── test_associations.py
│   │   ├── test_clustering.py
│   │   ├── test_compression.py
│   │   ├── test_consolidator.py
│   │   ├── test_decay.py
│   │   └── test_forgetting.py
│   ├── contracts
│   │   └── api-specification.yml
│   ├── integration
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── test_api_key_fallback.py
│   │   ├── test_api_memories_chronological.py
│   │   ├── test_api_tag_time_search.py
│   │   ├── test_api_with_memory_service.py
│   │   ├── test_bridge_integration.js
│   │   ├── test_cli_interfaces.py
│   │   ├── test_cloudflare_connection.py
│   │   ├── test_concurrent_clients.py
│   │   ├── test_data_serialization_consistency.py
│   │   ├── test_http_server_startup.py
│   │   ├── test_mcp_memory.py
│   │   ├── test_mdns_integration.py
│   │   ├── test_oauth_basic_auth.py
│   │   ├── test_oauth_flow.py
│   │   ├── test_server_handlers.py
│   │   └── test_store_memory.py
│   ├── performance
│   │   ├── test_background_sync.py
│   │   └── test_hybrid_live.py
│   ├── README.md
│   ├── smithery
│   │   └── test_smithery.py
│   ├── sqlite
│   │   └── simple_sqlite_vec_test.py
│   ├── test_client.py
│   ├── test_content_splitting.py
│   ├── test_database.py
│   ├── test_hybrid_cloudflare_limits.py
│   ├── test_hybrid_storage.py
│   ├── test_memory_ops.py
│   ├── test_semantic_search.py
│   ├── test_sqlite_vec_storage.py
│   ├── test_time_parser.py
│   ├── test_timestamp_preservation.py
│   ├── timestamp
│   │   ├── test_hook_vs_manual_storage.py
│   │   ├── test_issue99_final_validation.py
│   │   ├── test_search_retrieval_inconsistency.py
│   │   ├── test_timestamp_issue.py
│   │   └── test_timestamp_simple.py
│   └── unit
│       ├── conftest.py
│       ├── test_cloudflare_storage.py
│       ├── test_csv_loader.py
│       ├── test_fastapi_dependencies.py
│       ├── test_import.py
│       ├── test_json_loader.py
│       ├── test_mdns_simple.py
│       ├── test_mdns.py
│       ├── test_memory_service.py
│       ├── test_memory.py
│       ├── test_semtools_loader.py
│       ├── test_storage_interface_compatibility.py
│       └── test_tag_time_filtering.py
├── tools
│   ├── docker
│   │   ├── DEPRECATED.md
│   │   ├── docker-compose.http.yml
│   │   ├── docker-compose.pythonpath.yml
│   │   ├── docker-compose.standalone.yml
│   │   ├── docker-compose.uv.yml
│   │   ├── docker-compose.yml
│   │   ├── docker-entrypoint-persistent.sh
│   │   ├── docker-entrypoint-unified.sh
│   │   ├── docker-entrypoint.sh
│   │   ├── Dockerfile
│   │   ├── Dockerfile.glama
│   │   ├── Dockerfile.slim
│   │   ├── README.md
│   │   └── test-docker-modes.sh
│   └── README.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/src/mcp_memory_service/storage/sqlite_vec.py:
--------------------------------------------------------------------------------

```python
   1 | # Copyright 2024 Heinrich Krupp
   2 | #
   3 | # Licensed under the Apache License, Version 2.0 (the "License");
   4 | # you may not use this file except in compliance with the License.
   5 | # You may obtain a copy of the License at
   6 | #
   7 | #     http://www.apache.org/licenses/LICENSE-2.0
   8 | #
   9 | # Unless required by applicable law or agreed to in writing, software
  10 | # distributed under the License is distributed on an "AS IS" BASIS,
  11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 | # See the License for the specific language governing permissions and
  13 | # limitations under the License.
  14 | 
  15 | """
  16 | SQLite-vec storage backend for MCP Memory Service.
  17 | Provides local vector similarity search using sqlite-vec extension.
  18 | """
  19 | 
  20 | import sqlite3
  21 | import json
  22 | import logging
  23 | import traceback
  24 | import time
  25 | import os
  26 | import sys
  27 | import platform
  28 | from collections import Counter
  29 | from typing import List, Dict, Any, Tuple, Optional, Set, Callable
  30 | from datetime import datetime, timezone, timedelta
  31 | import asyncio
  32 | import random
  33 | 
  34 | # Import sqlite-vec with fallback
  35 | try:
  36 |     import sqlite_vec
  37 |     from sqlite_vec import serialize_float32
  38 |     SQLITE_VEC_AVAILABLE = True
  39 | except ImportError:
  40 |     SQLITE_VEC_AVAILABLE = False
  41 |     print("WARNING: sqlite-vec not available. Install with: pip install sqlite-vec")
  42 | 
  43 | # Import sentence transformers with fallback
  44 | try:
  45 |     from sentence_transformers import SentenceTransformer
  46 |     SENTENCE_TRANSFORMERS_AVAILABLE = True
  47 | except ImportError:
  48 |     SENTENCE_TRANSFORMERS_AVAILABLE = False
  49 |     print("WARNING: sentence_transformers not available. Install for embedding support.")
  50 | 
  51 | from .base import MemoryStorage
  52 | from ..models.memory import Memory, MemoryQueryResult
  53 | from ..utils.hashing import generate_content_hash
  54 | from ..utils.system_detection import (
  55 |     get_system_info,
  56 |     get_optimal_embedding_settings,
  57 |     get_torch_device,
  58 |     AcceleratorType
  59 | )
  60 | from ..config import SQLITEVEC_MAX_CONTENT_LENGTH
  61 | 
  62 | logger = logging.getLogger(__name__)
  63 | 
  64 | # Global model cache for performance optimization
  65 | _MODEL_CACHE = {}
  66 | _EMBEDDING_CACHE = {}
  67 | 
  68 | 
  69 | def deserialize_embedding(blob: bytes) -> Optional[List[float]]:
  70 |     """
  71 |     Deserialize embedding blob from sqlite-vec format to list of floats.
  72 | 
  73 |     Args:
  74 |         blob: Binary blob containing serialized float32 array
  75 | 
  76 |     Returns:
  77 |         List of floats representing the embedding, or None if deserialization fails
  78 |     """
  79 |     if not blob:
  80 |         return None
  81 | 
  82 |     try:
  83 |         # Import numpy locally to avoid hard dependency
  84 |         import numpy as np
  85 |         # sqlite-vec stores embeddings as raw float32 arrays
  86 |         arr = np.frombuffer(blob, dtype=np.float32)
  87 |         return arr.tolist()
  88 |     except Exception as e:
  89 |         logger.warning(f"Failed to deserialize embedding: {e}")
  90 |         return None
  91 | 
  92 | 
  93 | class SqliteVecMemoryStorage(MemoryStorage):
  94 |     """
  95 |     SQLite-vec based memory storage implementation.
  96 | 
  97 |     This backend provides local vector similarity search using sqlite-vec
  98 |     while maintaining the same interface as other storage backends.
  99 |     """
 100 | 
 101 |     @property
 102 |     def max_content_length(self) -> Optional[int]:
 103 |         """SQLite-vec content length limit from configuration (default: unlimited)."""
 104 |         return SQLITEVEC_MAX_CONTENT_LENGTH
 105 | 
 106 |     @property
 107 |     def supports_chunking(self) -> bool:
 108 |         """SQLite-vec backend supports content chunking with metadata linking."""
 109 |         return True
 110 | 
 111 |     def __init__(self, db_path: str, embedding_model: str = "all-MiniLM-L6-v2"):
 112 |         """
 113 |         Initialize SQLite-vec storage.
 114 | 
 115 |         Args:
 116 |             db_path: Path to SQLite database file
 117 |             embedding_model: Name of sentence transformer model to use
 118 |         """
 119 |         self.db_path = db_path
 120 |         self.embedding_model_name = embedding_model
 121 |         self.conn = None
 122 |         self.embedding_model = None
 123 |         self.embedding_dimension = 384  # Default for all-MiniLM-L6-v2
 124 |         self._initialized = False  # Track initialization state
 125 | 
 126 |         # Performance settings
 127 |         self.enable_cache = True
 128 |         self.batch_size = 32
 129 | 
 130 |         # Ensure directory exists
 131 |         os.makedirs(os.path.dirname(self.db_path) if os.path.dirname(self.db_path) else '.', exist_ok=True)
 132 | 
 133 |         logger.info(f"Initialized SQLite-vec storage at: {self.db_path}")
 134 | 
 135 |     def _safe_json_loads(self, json_str: str, context: str = "") -> dict:
 136 |         """Safely parse JSON with comprehensive error handling and logging."""
 137 |         if not json_str:
 138 |             return {}
 139 |         try:
 140 |             result = json.loads(json_str)
 141 |             if not isinstance(result, dict):
 142 |                 logger.warning(f"Non-dict JSON in {context}: {type(result)}")
 143 |                 return {}
 144 |             return result
 145 |         except json.JSONDecodeError as e:
 146 |             logger.error(f"JSON decode error in {context}: {e}, data: {json_str[:100]}...")
 147 |             return {}
 148 |         except TypeError as e:
 149 |             logger.error(f"JSON type error in {context}: {e}")
 150 |             return {}
 151 | 
 152 |     async def _execute_with_retry(self, operation: Callable, max_retries: int = 3, initial_delay: float = 0.1):
 153 |         """
 154 |         Execute a database operation with exponential backoff retry logic.
 155 |         
 156 |         Args:
 157 |             operation: The database operation to execute
 158 |             max_retries: Maximum number of retry attempts
 159 |             initial_delay: Initial delay in seconds before first retry
 160 |             
 161 |         Returns:
 162 |             The result of the operation
 163 |             
 164 |         Raises:
 165 |             The last exception if all retries fail
 166 |         """
 167 |         last_exception = None
 168 |         delay = initial_delay
 169 |         
 170 |         for attempt in range(max_retries + 1):
 171 |             try:
 172 |                 return operation()
 173 |             except sqlite3.OperationalError as e:
 174 |                 last_exception = e
 175 |                 error_msg = str(e).lower()
 176 |                 
 177 |                 # Check if error is related to database locking
 178 |                 if "locked" in error_msg or "busy" in error_msg:
 179 |                     if attempt < max_retries:
 180 |                         # Add jitter to prevent thundering herd
 181 |                         jittered_delay = delay * (1 + random.uniform(-0.1, 0.1))
 182 |                         logger.warning(f"Database locked, retrying in {jittered_delay:.2f}s (attempt {attempt + 1}/{max_retries})")
 183 |                         await asyncio.sleep(jittered_delay)
 184 |                         # Exponential backoff
 185 |                         delay *= 2
 186 |                         continue
 187 |                     else:
 188 |                         logger.error(f"Database locked after {max_retries} retries")
 189 |                 else:
 190 |                     # Non-retryable error
 191 |                     raise
 192 |             except Exception as e:
 193 |                 # Non-SQLite errors are not retried
 194 |                 raise
 195 |         
 196 |         # If we get here, all retries failed
 197 |         raise last_exception
 198 |     
 199 |     def _check_extension_support(self):
 200 |         """Check if Python's sqlite3 supports loading extensions."""
 201 |         test_conn = None
 202 |         try:
 203 |             test_conn = sqlite3.connect(":memory:")
 204 |             if not hasattr(test_conn, 'enable_load_extension'):
 205 |                 return False, "Python sqlite3 module not compiled with extension support"
 206 |             
 207 |             # Test if we can actually enable extension loading
 208 |             test_conn.enable_load_extension(True)
 209 |             test_conn.enable_load_extension(False)
 210 |             return True, "Extension loading supported"
 211 |             
 212 |         except AttributeError as e:
 213 |             return False, f"enable_load_extension not available: {e}"
 214 |         except sqlite3.OperationalError as e:
 215 |             return False, f"Extension loading disabled: {e}"
 216 |         except Exception as e:
 217 |             return False, f"Extension support check failed: {e}"
 218 |         finally:
 219 |             if test_conn:
 220 |                 test_conn.close()
 221 | 
 222 |     def _check_dependencies(self):
 223 |         """Check and validate all required dependencies for initialization."""
 224 |         if not SQLITE_VEC_AVAILABLE:
 225 |             raise ImportError("sqlite-vec is not available. Install with: pip install sqlite-vec")
 226 |         
 227 |         # Check if ONNX embeddings are enabled (preferred for Docker)
 228 |         from ..config import USE_ONNX
 229 |         if USE_ONNX:
 230 |             logger.info("ONNX embeddings enabled - skipping sentence-transformers installation")
 231 |             return
 232 |                 
 233 |         # Check sentence-transformers availability (only if ONNX disabled)
 234 |         global SENTENCE_TRANSFORMERS_AVAILABLE
 235 |         if not SENTENCE_TRANSFORMERS_AVAILABLE:
 236 |             raise ImportError("sentence-transformers is not available. Install with: pip install sentence-transformers torch")
 237 | 
 238 |     def _handle_extension_loading_failure(self):
 239 |         """Provide detailed error guidance when extension loading is not supported."""
 240 |         error_msg = "SQLite extension loading not supported"
 241 |         logger.error(error_msg)
 242 |         
 243 |         platform_info = f"{platform.system()} {platform.release()}"
 244 |         solutions = []
 245 |         
 246 |         if platform.system().lower() == "darwin":  # macOS
 247 |             solutions.extend([
 248 |                 "Install Python via Homebrew: brew install python",
 249 |                 "Use pyenv with extension support: PYTHON_CONFIGURE_OPTS='--enable-loadable-sqlite-extensions' pyenv install 3.12.0",
 250 |                 "Consider using Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare"
 251 |             ])
 252 |         elif platform.system().lower() == "linux":
 253 |             solutions.extend([
 254 |                 "Install Python with extension support: apt install python3-dev sqlite3",
 255 |                 "Rebuild Python with: ./configure --enable-loadable-sqlite-extensions",
 256 |                 "Consider using Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare"
 257 |             ])
 258 |         else:  # Windows and others
 259 |             solutions.extend([
 260 |                 "Use official Python installer from python.org",
 261 |                 "Install Python with conda: conda install python",
 262 |                 "Consider using Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare"
 263 |             ])
 264 |         
 265 |         detailed_error = f"""
 266 | {error_msg}
 267 | 
 268 | Platform: {platform_info}
 269 | Python Version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}
 270 | 
 271 | SOLUTIONS:
 272 | {chr(10).join(f"  • {solution}" for solution in solutions)}
 273 | 
 274 | The sqlite-vec backend requires Python compiled with --enable-loadable-sqlite-extensions.
 275 | Consider using the Cloudflare backend as an alternative: it provides cloud-based vector
 276 | search without requiring local SQLite extensions.
 277 | 
 278 | To switch backends permanently, set: MCP_MEMORY_STORAGE_BACKEND=cloudflare
 279 | """
 280 |         raise RuntimeError(detailed_error.strip())
 281 | 
 282 |     def _get_connection_timeout(self) -> float:
 283 |         """Calculate database connection timeout from environment or use default."""
 284 |         timeout_seconds = 15.0  # Default: 15 seconds
 285 |         custom_pragmas_env = os.environ.get("MCP_MEMORY_SQLITE_PRAGMAS", "")
 286 |         
 287 |         if "busy_timeout" not in custom_pragmas_env:
 288 |             return timeout_seconds
 289 |         
 290 |         # Parse busy_timeout value (in milliseconds, convert to seconds)
 291 |         for pragma_pair in custom_pragmas_env.split(","):
 292 |             if "busy_timeout" in pragma_pair and "=" in pragma_pair:
 293 |                 try:
 294 |                     timeout_ms = int(pragma_pair.split("=")[1].strip())
 295 |                     timeout_seconds = timeout_ms / 1000.0
 296 |                     logger.info(f"Using custom timeout: {timeout_seconds}s from MCP_MEMORY_SQLITE_PRAGMAS")
 297 |                     return timeout_seconds
 298 |                 except (ValueError, IndexError) as e:
 299 |                     logger.warning(f"Failed to parse busy_timeout from env: {e}, using default {timeout_seconds}s")
 300 |                     return timeout_seconds
 301 |         
 302 |         return timeout_seconds
 303 | 
 304 |     def _load_sqlite_vec_extension(self):
 305 |         """Load the sqlite-vec extension with proper error handling."""
 306 |         try:
 307 |             self.conn.enable_load_extension(True)
 308 |             sqlite_vec.load(self.conn)
 309 |             self.conn.enable_load_extension(False)
 310 |             logger.info("sqlite-vec extension loaded successfully")
 311 |         except Exception as e:
 312 |             error_msg = f"Failed to load sqlite-vec extension: {e}"
 313 |             logger.error(error_msg)
 314 |             if self.conn:
 315 |                 self.conn.close()
 316 |                 self.conn = None
 317 |             
 318 |             # Provide specific guidance based on the error
 319 |             if "enable_load_extension" in str(e):
 320 |                 detailed_error = f"""
 321 | {error_msg}
 322 | 
 323 | This error occurs when Python's sqlite3 module is not compiled with extension support.
 324 | This is common on macOS with the system Python installation.
 325 | 
 326 | RECOMMENDED SOLUTIONS:
 327 |   • Use Homebrew Python: brew install python && rehash
 328 |   • Use pyenv with extensions: PYTHON_CONFIGURE_OPTS='--enable-loadable-sqlite-extensions' pyenv install 3.12.0
 329 |   • Switch to Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare
 330 | 
 331 | The Cloudflare backend provides cloud-based vector search without requiring local SQLite extensions.
 332 | """
 333 |             else:
 334 |                 detailed_error = f"""
 335 | {error_msg}
 336 | 
 337 | Failed to load the sqlite-vec extension. This could be due to:
 338 |   • Incompatible sqlite-vec version
 339 |   • Missing system dependencies
 340 |   • SQLite version incompatibility
 341 | 
 342 | SOLUTIONS:
 343 |   • Reinstall sqlite-vec: pip install --force-reinstall sqlite-vec
 344 |   • Switch to Cloudflare backend: export MCP_MEMORY_STORAGE_BACKEND=cloudflare
 345 |   • Check SQLite version: python -c "import sqlite3; print(sqlite3.sqlite_version)"
 346 | """
 347 |             raise RuntimeError(detailed_error.strip())
 348 | 
 349 |     def _connect_and_load_extension(self):
 350 |         """Connect to database and load the sqlite-vec extension."""
 351 |         # Calculate timeout and connect
 352 |         timeout_seconds = self._get_connection_timeout()
 353 |         self.conn = sqlite3.connect(self.db_path, timeout=timeout_seconds)
 354 |         
 355 |         # Load extension
 356 |         self._load_sqlite_vec_extension()
 357 | 
 358 |     async def initialize(self):
 359 |         """Initialize the SQLite database with vec0 extension."""
 360 |         # Return early if already initialized to prevent multiple initialization attempts
 361 |         if self._initialized:
 362 |             return
 363 | 
 364 |         try:
 365 |             self._check_dependencies()
 366 |             
 367 |             # Check if extension loading is supported
 368 |             extension_supported, support_message = self._check_extension_support()
 369 |             if not extension_supported:
 370 |                 self._handle_extension_loading_failure()
 371 | 
 372 |             # Connect to database and load extension
 373 |             self._connect_and_load_extension()
 374 | 
 375 |             # Check if database is already initialized by another process
 376 |             # This prevents DDL lock conflicts when multiple servers start concurrently
 377 |             try:
 378 |                 cursor = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='memories'")
 379 |                 memories_table_exists = cursor.fetchone() is not None
 380 | 
 381 |                 cursor = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='memory_embeddings'")
 382 |                 embeddings_table_exists = cursor.fetchone() is not None
 383 | 
 384 |                 if memories_table_exists and embeddings_table_exists:
 385 |                     # Database is already initialized, just load the embedding model and mark as initialized
 386 |                     logger.info("Database already initialized by another process, skipping DDL operations")
 387 |                     await self._initialize_embedding_model()
 388 |                     self._initialized = True
 389 |                     logger.info(f"SQLite-vec storage initialized successfully (existing database) with embedding dimension: {self.embedding_dimension}")
 390 |                     return
 391 |             except sqlite3.Error as e:
 392 |                 # If we can't check tables (e.g., database locked), proceed with normal initialization
 393 |                 logger.debug(f"Could not check existing tables (will attempt full initialization): {e}")
 394 | 
 395 |             # Apply default pragmas for concurrent access
 396 |             default_pragmas = {
 397 |                 "journal_mode": "WAL",  # Enable WAL mode for concurrent access
 398 |                 "busy_timeout": "5000",  # 5 second timeout for locked database
 399 |                 "synchronous": "NORMAL",  # Balanced performance/safety
 400 |                 "cache_size": "10000",  # Increase cache size
 401 |                 "temp_store": "MEMORY"  # Use memory for temp tables
 402 |             }
 403 |             
 404 |             # Check for custom pragmas from environment variable
 405 |             custom_pragmas = os.environ.get("MCP_MEMORY_SQLITE_PRAGMAS", "")
 406 |             if custom_pragmas:
 407 |                 # Parse custom pragmas (format: "pragma1=value1,pragma2=value2")
 408 |                 for pragma_pair in custom_pragmas.split(","):
 409 |                     pragma_pair = pragma_pair.strip()
 410 |                     if "=" in pragma_pair:
 411 |                         pragma_name, pragma_value = pragma_pair.split("=", 1)
 412 |                         default_pragmas[pragma_name.strip()] = pragma_value.strip()
 413 |                         logger.info(f"Custom pragma from env: {pragma_name}={pragma_value}")
 414 |             
 415 |             # Apply all pragmas
 416 |             applied_pragmas = []
 417 |             for pragma_name, pragma_value in default_pragmas.items():
 418 |                 try:
 419 |                     self.conn.execute(f"PRAGMA {pragma_name}={pragma_value}")
 420 |                     applied_pragmas.append(f"{pragma_name}={pragma_value}")
 421 |                 except sqlite3.Error as e:
 422 |                     logger.warning(f"Failed to set pragma {pragma_name}={pragma_value}: {e}")
 423 |             
 424 |             logger.info(f"SQLite pragmas applied: {', '.join(applied_pragmas)}")
 425 |             
 426 |             # Create metadata table for storage configuration
 427 |             self.conn.execute('''
 428 |                 CREATE TABLE IF NOT EXISTS metadata (
 429 |                     key TEXT PRIMARY KEY,
 430 |                     value TEXT NOT NULL
 431 |                 )
 432 |             ''')
 433 | 
 434 |             # Create regular table for memory data
 435 |             self.conn.execute('''
 436 |                 CREATE TABLE IF NOT EXISTS memories (
 437 |                     id INTEGER PRIMARY KEY AUTOINCREMENT,
 438 |                     content_hash TEXT UNIQUE NOT NULL,
 439 |                     content TEXT NOT NULL,
 440 |                     tags TEXT,
 441 |                     memory_type TEXT,
 442 |                     metadata TEXT,
 443 |                     created_at REAL,
 444 |                     updated_at REAL,
 445 |                     created_at_iso TEXT,
 446 |                     updated_at_iso TEXT
 447 |                 )
 448 |             ''')
 449 |             
 450 |             # Initialize embedding model BEFORE creating vector table
 451 |             await self._initialize_embedding_model()
 452 | 
 453 |             # Check if we need to migrate from L2 to cosine distance
 454 |             # This is a one-time migration - embeddings will be regenerated automatically
 455 |             try:
 456 |                 # First check if metadata table exists
 457 |                 cursor = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='metadata'")
 458 |                 metadata_exists = cursor.fetchone() is not None
 459 | 
 460 |                 if metadata_exists:
 461 |                     cursor = self.conn.execute("SELECT value FROM metadata WHERE key='distance_metric'")
 462 |                     current_metric = cursor.fetchone()
 463 | 
 464 |                     if not current_metric or current_metric[0] != 'cosine':
 465 |                         logger.info("Migrating embeddings table from L2 to cosine distance...")
 466 |                         logger.info("This is a one-time operation - embeddings will be regenerated automatically")
 467 | 
 468 |                         # Use a timeout and retry logic for DROP TABLE to handle concurrent access
 469 |                         max_retries = 3
 470 |                         retry_delay = 1.0  # seconds
 471 | 
 472 |                         for attempt in range(max_retries):
 473 |                             try:
 474 |                                 # Drop old embeddings table (memories table is preserved)
 475 |                                 # This may fail if another process has the database locked
 476 |                                 self.conn.execute("DROP TABLE IF EXISTS memory_embeddings")
 477 |                                 logger.info("Successfully dropped old embeddings table")
 478 |                                 break
 479 |                             except sqlite3.OperationalError as drop_error:
 480 |                                 if "database is locked" in str(drop_error):
 481 |                                     if attempt < max_retries - 1:
 482 |                                         logger.warning(f"Database locked during migration (attempt {attempt + 1}/{max_retries}), retrying in {retry_delay}s...")
 483 |                                         await asyncio.sleep(retry_delay)
 484 |                                         retry_delay *= 2  # Exponential backoff
 485 |                                     else:
 486 |                                         # Last attempt failed - check if table exists
 487 |                                         # If it doesn't exist, migration was done by another process
 488 |                                         cursor = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='memory_embeddings'")
 489 |                                         if not cursor.fetchone():
 490 |                                             logger.info("Embeddings table doesn't exist - migration likely completed by another process")
 491 |                                             break
 492 |                                         else:
 493 |                                             logger.error("Failed to drop embeddings table after retries - will attempt to continue")
 494 |                                             # Don't fail initialization, just log the issue
 495 |                                             break
 496 |                                 else:
 497 |                                     raise
 498 |                 else:
 499 |                     # No metadata table means fresh install, no migration needed
 500 |                     logger.debug("Fresh database detected, no migration needed")
 501 |             except Exception as e:
 502 |                 # If anything goes wrong, log but don't fail initialization
 503 |                 logger.warning(f"Migration check warning (non-fatal): {e}")
 504 | 
 505 |             # Now create virtual table with correct dimensions using cosine distance
 506 |             # Cosine similarity is better for text embeddings than L2 distance
 507 |             self.conn.execute(f'''
 508 |                 CREATE VIRTUAL TABLE IF NOT EXISTS memory_embeddings USING vec0(
 509 |                     content_embedding FLOAT[{self.embedding_dimension}] distance_metric=cosine
 510 |                 )
 511 |             ''')
 512 | 
 513 |             # Store metric in metadata for future migrations
 514 |             self.conn.execute("""
 515 |                 INSERT OR REPLACE INTO metadata (key, value) VALUES ('distance_metric', 'cosine')
 516 |             """)
 517 |             
 518 |             # Create indexes for better performance
 519 |             self.conn.execute('CREATE INDEX IF NOT EXISTS idx_content_hash ON memories(content_hash)')
 520 |             self.conn.execute('CREATE INDEX IF NOT EXISTS idx_created_at ON memories(created_at)')
 521 |             self.conn.execute('CREATE INDEX IF NOT EXISTS idx_memory_type ON memories(memory_type)')
 522 | 
 523 |             # Mark as initialized to prevent re-initialization
 524 |             self._initialized = True
 525 | 
 526 |             logger.info(f"SQLite-vec storage initialized successfully with embedding dimension: {self.embedding_dimension}")
 527 | 
 528 |         except Exception as e:
 529 |             error_msg = f"Failed to initialize SQLite-vec storage: {str(e)}"
 530 |             logger.error(error_msg)
 531 |             logger.error(traceback.format_exc())
 532 |             raise RuntimeError(error_msg)
 533 |     
 534 |     def _is_docker_environment(self) -> bool:
 535 |         """Detect if running inside a Docker container."""
 536 |         # Check for Docker-specific files/environment
 537 |         if os.path.exists('/.dockerenv'):
 538 |             return True
 539 |         if os.environ.get('DOCKER_CONTAINER'):
 540 |             return True
 541 |         # Check if running in common container environments
 542 |         if any(os.environ.get(var) for var in ['KUBERNETES_SERVICE_HOST', 'MESOS_SANDBOX']):
 543 |             return True
 544 |         # Check cgroup for docker/containerd/podman
 545 |         try:
 546 |             with open('/proc/self/cgroup', 'r') as f:
 547 |                 return any('docker' in line or 'containerd' in line for line in f)
 548 |         except (IOError, FileNotFoundError):
 549 |             pass
 550 |         return False
 551 | 
 552 |     async def _initialize_embedding_model(self):
 553 |         """Initialize the embedding model (ONNX or SentenceTransformer based on configuration)."""
 554 |         global _MODEL_CACHE
 555 | 
 556 |         # Detect if we're in Docker
 557 |         is_docker = self._is_docker_environment()
 558 |         if is_docker:
 559 |             logger.info("🐳 Docker environment detected - adjusting model loading strategy")
 560 | 
 561 |         try:
 562 |             # Check if we should use ONNX
 563 |             use_onnx = os.environ.get('MCP_MEMORY_USE_ONNX', '').lower() in ('1', 'true', 'yes')
 564 | 
 565 |             if use_onnx:
 566 |                 # Try to use ONNX embeddings
 567 |                 logger.info("Attempting to use ONNX embeddings (PyTorch-free)")
 568 |                 try:
 569 |                     from ..embeddings import get_onnx_embedding_model
 570 | 
 571 |                     # Check cache first
 572 |                     cache_key = f"onnx_{self.embedding_model_name}"
 573 |                     if cache_key in _MODEL_CACHE:
 574 |                         self.embedding_model = _MODEL_CACHE[cache_key]
 575 |                         logger.info(f"Using cached ONNX embedding model: {self.embedding_model_name}")
 576 |                         return
 577 | 
 578 |                     # Create ONNX model
 579 |                     onnx_model = get_onnx_embedding_model(self.embedding_model_name)
 580 |                     if onnx_model:
 581 |                         self.embedding_model = onnx_model
 582 |                         self.embedding_dimension = onnx_model.embedding_dimension
 583 |                         _MODEL_CACHE[cache_key] = onnx_model
 584 |                         logger.info(f"ONNX embedding model loaded successfully. Dimension: {self.embedding_dimension}")
 585 |                         return
 586 |                     else:
 587 |                         logger.warning("ONNX model creation failed, falling back to SentenceTransformer")
 588 |                 except ImportError as e:
 589 |                     logger.warning(f"ONNX dependencies not available: {e}")
 590 |                 except Exception as e:
 591 |                     logger.warning(f"Failed to initialize ONNX embeddings: {e}")
 592 | 
 593 |             # Fall back to SentenceTransformer
 594 |             if not SENTENCE_TRANSFORMERS_AVAILABLE:
 595 |                 raise RuntimeError("Neither ONNX nor sentence-transformers available. Install one: pip install onnxruntime tokenizers OR pip install sentence-transformers torch")
 596 | 
 597 |             # Check cache first
 598 |             cache_key = self.embedding_model_name
 599 |             if cache_key in _MODEL_CACHE:
 600 |                 self.embedding_model = _MODEL_CACHE[cache_key]
 601 |                 logger.info(f"Using cached embedding model: {self.embedding_model_name}")
 602 |                 return
 603 | 
 604 |             # Get system info for optimal settings
 605 |             system_info = get_system_info()
 606 |             device = get_torch_device()
 607 | 
 608 |             logger.info(f"Loading embedding model: {self.embedding_model_name}")
 609 |             logger.info(f"Using device: {device}")
 610 | 
 611 |             # Configure for offline mode if models are cached
 612 |             # Only set offline mode if we detect cached models to prevent initial downloads
 613 |             hf_home = os.environ.get('HF_HOME', os.path.expanduser("~/.cache/huggingface"))
 614 |             model_cache_path = os.path.join(hf_home, "hub", f"models--sentence-transformers--{self.embedding_model_name.replace('/', '--')}")
 615 |             if os.path.exists(model_cache_path):
 616 |                 os.environ['HF_HUB_OFFLINE'] = '1'
 617 |                 os.environ['TRANSFORMERS_OFFLINE'] = '1'
 618 |                 logger.info("📦 Found cached model - enabling offline mode")
 619 | 
 620 |             # Try to load from cache first, fallback to direct model name
 621 |             try:
 622 |                 # First try loading from Hugging Face cache
 623 |                 hf_home = os.environ.get('HF_HOME', os.path.expanduser("~/.cache/huggingface"))
 624 |                 cache_path = os.path.join(hf_home, "hub", f"models--sentence-transformers--{self.embedding_model_name.replace('/', '--')}")
 625 |                 if os.path.exists(cache_path):
 626 |                     # Find the snapshot directory
 627 |                     snapshots_path = os.path.join(cache_path, "snapshots")
 628 |                     if os.path.exists(snapshots_path):
 629 |                         snapshot_dirs = [d for d in os.listdir(snapshots_path) if os.path.isdir(os.path.join(snapshots_path, d))]
 630 |                         if snapshot_dirs:
 631 |                             model_path = os.path.join(snapshots_path, snapshot_dirs[0])
 632 |                             logger.info(f"Loading model from cache: {model_path}")
 633 |                             self.embedding_model = SentenceTransformer(model_path, device=device)
 634 |                         else:
 635 |                             raise FileNotFoundError("No snapshot found")
 636 |                     else:
 637 |                         raise FileNotFoundError("No snapshots directory")
 638 |                 else:
 639 |                     raise FileNotFoundError("No cache found")
 640 |             except FileNotFoundError as cache_error:
 641 |                 logger.warning(f"Model not in cache: {cache_error}")
 642 |                 # Try to download the model (may fail in Docker without network)
 643 |                 try:
 644 |                     logger.info("Attempting to download model from Hugging Face...")
 645 |                     self.embedding_model = SentenceTransformer(self.embedding_model_name, device=device)
 646 |                 except OSError as download_error:
 647 |                     # Check if this is a network connectivity issue
 648 |                     error_msg = str(download_error)
 649 |                     if any(phrase in error_msg.lower() for phrase in ['connection', 'network', 'couldn\'t connect', 'huggingface.co']):
 650 |                         # Provide Docker-specific help
 651 |                         docker_help = self._get_docker_network_help() if is_docker else ""
 652 |                         raise RuntimeError(
 653 |                             f"🔌 Model Download Error: Cannot connect to huggingface.co\n"
 654 |                             f"{'='*60}\n"
 655 |                             f"The model '{self.embedding_model_name}' needs to be downloaded but the connection failed.\n"
 656 |                             f"{docker_help}"
 657 |                             f"\n💡 Solutions:\n"
 658 |                             f"1. Mount pre-downloaded models as a volume:\n"
 659 |                             f"   # On host machine, download the model first:\n"
 660 |                             f"   python -c \"from sentence_transformers import SentenceTransformer; SentenceTransformer('{self.embedding_model_name}')\"\n"
 661 |                             f"   \n"
 662 |                             f"   # Then run container with cache mount:\n"
 663 |                             f"   docker run -v ~/.cache/huggingface:/root/.cache/huggingface ...\n"
 664 |                             f"\n"
 665 |                             f"2. Configure Docker network (if behind proxy):\n"
 666 |                             f"   docker run -e HTTPS_PROXY=your-proxy -e HTTP_PROXY=your-proxy ...\n"
 667 |                             f"\n"
 668 |                             f"3. Use offline mode with pre-cached models:\n"
 669 |                             f"   docker run -e HF_HUB_OFFLINE=1 -e TRANSFORMERS_OFFLINE=1 ...\n"
 670 |                             f"\n"
 671 |                             f"4. Use host network mode (if appropriate for your setup):\n"
 672 |                             f"   docker run --network host ...\n"
 673 |                             f"\n"
 674 |                             f"📚 See docs: https://github.com/doobidoo/mcp-memory-service/blob/main/docs/deployment/docker.md#model-download-issues\n"
 675 |                             f"{'='*60}"
 676 |                         ) from download_error
 677 |                     else:
 678 |                         # Re-raise if not a network issue
 679 |                         raise
 680 |             except Exception as cache_error:
 681 |                 logger.warning(f"Failed to load from cache: {cache_error}")
 682 |                 # Fallback to normal loading (may fail if offline)
 683 |                 logger.info("Attempting normal model loading...")
 684 |                 self.embedding_model = SentenceTransformer(self.embedding_model_name, device=device)
 685 | 
 686 |             # Update embedding dimension based on actual model
 687 |             test_embedding = self.embedding_model.encode(["test"], convert_to_numpy=True)
 688 |             self.embedding_dimension = test_embedding.shape[1]
 689 | 
 690 |             # Cache the model
 691 |             _MODEL_CACHE[cache_key] = self.embedding_model
 692 | 
 693 |             logger.info(f"✅ Embedding model loaded successfully. Dimension: {self.embedding_dimension}")
 694 | 
 695 |         except RuntimeError:
 696 |             # Re-raise our custom errors with helpful messages
 697 |             raise
 698 |         except Exception as e:
 699 |             logger.error(f"Failed to initialize embedding model: {str(e)}")
 700 |             logger.error(traceback.format_exc())
 701 |             # Continue without embeddings - some operations may still work
 702 |             logger.warning("⚠️ Continuing without embedding support - search functionality will be limited")
 703 | 
 704 |     def _get_docker_network_help(self) -> str:
 705 |         """Get Docker-specific network troubleshooting help."""
 706 |         # Try to detect the Docker platform
 707 |         docker_platform = "Docker"
 708 |         if os.environ.get('DOCKER_DESKTOP_VERSION'):
 709 |             docker_platform = "Docker Desktop"
 710 |         elif os.path.exists('/proc/version'):
 711 |             try:
 712 |                 with open('/proc/version', 'r') as f:
 713 |                     version = f.read().lower()
 714 |                     if 'microsoft' in version:
 715 |                         docker_platform = "Docker Desktop for Windows"
 716 |             except (IOError, FileNotFoundError):
 717 |                 pass
 718 | 
 719 |         return (
 720 |             f"\n🐳 Docker Environment Detected ({docker_platform})\n"
 721 |             f"This appears to be a network connectivity issue common in Docker containers.\n"
 722 |         )
 723 |     
 724 |     def _generate_embedding(self, text: str) -> List[float]:
 725 |         """Generate embedding for text."""
 726 |         if not self.embedding_model:
 727 |             raise RuntimeError("No embedding model available. Ensure sentence-transformers is installed and model is loaded.")
 728 |         
 729 |         try:
 730 |             # Check cache first
 731 |             if self.enable_cache:
 732 |                 cache_key = hash(text)
 733 |                 if cache_key in _EMBEDDING_CACHE:
 734 |                     return _EMBEDDING_CACHE[cache_key]
 735 |             
 736 |             # Generate embedding
 737 |             embedding = self.embedding_model.encode([text], convert_to_numpy=True)[0]
 738 |             embedding_list = embedding.tolist()
 739 |             
 740 |             # Validate embedding
 741 |             if not embedding_list:
 742 |                 raise ValueError("Generated embedding is empty")
 743 |             
 744 |             if len(embedding_list) != self.embedding_dimension:
 745 |                 raise ValueError(f"Embedding dimension mismatch: expected {self.embedding_dimension}, got {len(embedding_list)}")
 746 |             
 747 |             # Validate values are finite
 748 |             if not all(isinstance(x, (int, float)) and not (x != x) and x != float('inf') and x != float('-inf') for x in embedding_list):
 749 |                 raise ValueError("Embedding contains invalid values (NaN or infinity)")
 750 |             
 751 |             # Cache the result
 752 |             if self.enable_cache:
 753 |                 _EMBEDDING_CACHE[cache_key] = embedding_list
 754 |             
 755 |             return embedding_list
 756 |             
 757 |         except Exception as e:
 758 |             logger.error(f"Failed to generate embedding: {str(e)}")
 759 |             raise RuntimeError(f"Failed to generate embedding: {str(e)}") from e
 760 |     
 761 |     async def store(self, memory: Memory) -> Tuple[bool, str]:
 762 |         """Store a memory in the SQLite-vec database."""
 763 |         try:
 764 |             if not self.conn:
 765 |                 return False, "Database not initialized"
 766 |             
 767 |             # Check for duplicates
 768 |             cursor = self.conn.execute(
 769 |                 'SELECT content_hash FROM memories WHERE content_hash = ?',
 770 |                 (memory.content_hash,)
 771 |             )
 772 |             if cursor.fetchone():
 773 |                 return False, "Duplicate content detected"
 774 |             
 775 |             # Generate and validate embedding
 776 |             try:
 777 |                 embedding = self._generate_embedding(memory.content)
 778 |             except Exception as e:
 779 |                 logger.error(f"Failed to generate embedding for memory {memory.content_hash}: {str(e)}")
 780 |                 return False, f"Failed to generate embedding: {str(e)}"
 781 |             
 782 |             # Prepare metadata
 783 |             tags_str = ",".join(memory.tags) if memory.tags else ""
 784 |             metadata_str = json.dumps(memory.metadata) if memory.metadata else "{}"
 785 |             
 786 |             # Insert into memories table (metadata) with retry logic
 787 |             def insert_memory():
 788 |                 cursor = self.conn.execute('''
 789 |                     INSERT INTO memories (
 790 |                         content_hash, content, tags, memory_type,
 791 |                         metadata, created_at, updated_at, created_at_iso, updated_at_iso
 792 |                     ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
 793 |                 ''', (
 794 |                     memory.content_hash,
 795 |                     memory.content,
 796 |                     tags_str,
 797 |                     memory.memory_type,
 798 |                     metadata_str,
 799 |                     memory.created_at,
 800 |                     memory.updated_at,
 801 |                     memory.created_at_iso,
 802 |                     memory.updated_at_iso
 803 |                 ))
 804 |                 return cursor.lastrowid
 805 |             
 806 |             memory_rowid = await self._execute_with_retry(insert_memory)
 807 |             
 808 |             # Insert into embeddings table with retry logic
 809 |             def insert_embedding():
 810 |                 # Check if we can insert with specific rowid
 811 |                 try:
 812 |                     self.conn.execute('''
 813 |                         INSERT INTO memory_embeddings (rowid, content_embedding)
 814 |                         VALUES (?, ?)
 815 |                     ''', (
 816 |                         memory_rowid,
 817 |                         serialize_float32(embedding)
 818 |                     ))
 819 |                 except sqlite3.Error as e:
 820 |                     # If rowid insert fails, try without specifying rowid
 821 |                     logger.warning(f"Failed to insert with rowid {memory_rowid}: {e}. Trying without rowid.")
 822 |                     self.conn.execute('''
 823 |                         INSERT INTO memory_embeddings (content_embedding)
 824 |                         VALUES (?)
 825 |                     ''', (
 826 |                         serialize_float32(embedding),
 827 |                     ))
 828 |             
 829 |             await self._execute_with_retry(insert_embedding)
 830 |             
 831 |             # Commit with retry logic
 832 |             await self._execute_with_retry(self.conn.commit)
 833 |             
 834 |             logger.info(f"Successfully stored memory: {memory.content_hash}")
 835 |             return True, "Memory stored successfully"
 836 |             
 837 |         except Exception as e:
 838 |             error_msg = f"Failed to store memory: {str(e)}"
 839 |             logger.error(error_msg)
 840 |             logger.error(traceback.format_exc())
 841 |             return False, error_msg
 842 |     
 843 |     async def retrieve(self, query: str, n_results: int = 5) -> List[MemoryQueryResult]:
 844 |         """Retrieve memories using semantic search."""
 845 |         try:
 846 |             if not self.conn:
 847 |                 logger.error("Database not initialized")
 848 |                 return []
 849 |             
 850 |             if not self.embedding_model:
 851 |                 logger.warning("No embedding model available, cannot perform semantic search")
 852 |                 return []
 853 |             
 854 |             # Generate query embedding
 855 |             try:
 856 |                 query_embedding = self._generate_embedding(query)
 857 |             except Exception as e:
 858 |                 logger.error(f"Failed to generate query embedding: {str(e)}")
 859 |                 return []
 860 |             
 861 |             # First, check if embeddings table has data
 862 |             cursor = self.conn.execute('SELECT COUNT(*) FROM memory_embeddings')
 863 |             embedding_count = cursor.fetchone()[0]
 864 |             
 865 |             if embedding_count == 0:
 866 |                 logger.warning("No embeddings found in database. Memories may have been stored without embeddings.")
 867 |                 return []
 868 |             
 869 |             # Perform vector similarity search using JOIN with retry logic
 870 |             def search_memories():
 871 |                 # Try direct rowid join first
 872 |                 cursor = self.conn.execute('''
 873 |                     SELECT m.content_hash, m.content, m.tags, m.memory_type, m.metadata,
 874 |                            m.created_at, m.updated_at, m.created_at_iso, m.updated_at_iso, 
 875 |                            e.distance
 876 |                     FROM memories m
 877 |                     INNER JOIN (
 878 |                         SELECT rowid, distance 
 879 |                         FROM memory_embeddings 
 880 |                         WHERE content_embedding MATCH ?
 881 |                         ORDER BY distance
 882 |                         LIMIT ?
 883 |                     ) e ON m.id = e.rowid
 884 |                     ORDER BY e.distance
 885 |                 ''', (serialize_float32(query_embedding), n_results))
 886 |                 
 887 |                 # Check if we got results
 888 |                 results = cursor.fetchall()
 889 |                 if not results:
 890 |                     # Log debug info
 891 |                     logger.debug("No results from vector search. Checking database state...")
 892 |                     mem_count = self.conn.execute('SELECT COUNT(*) FROM memories').fetchone()[0]
 893 |                     logger.debug(f"Memories table has {mem_count} rows, embeddings table has {embedding_count} rows")
 894 |                 
 895 |                 return results
 896 |             
 897 |             search_results = await self._execute_with_retry(search_memories)
 898 |             
 899 |             results = []
 900 |             for row in search_results:
 901 |                 try:
 902 |                     # Parse row data
 903 |                     content_hash, content, tags_str, memory_type, metadata_str = row[:5]
 904 |                     created_at, updated_at, created_at_iso, updated_at_iso, distance = row[5:]
 905 |                     
 906 |                     # Parse tags and metadata
 907 |                     tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
 908 |                     metadata = self._safe_json_loads(metadata_str, "memory_metadata")
 909 |                     
 910 |                     # Create Memory object
 911 |                     memory = Memory(
 912 |                         content=content,
 913 |                         content_hash=content_hash,
 914 |                         tags=tags,
 915 |                         memory_type=memory_type,
 916 |                         metadata=metadata,
 917 |                         created_at=created_at,
 918 |                         updated_at=updated_at,
 919 |                         created_at_iso=created_at_iso,
 920 |                         updated_at_iso=updated_at_iso
 921 |                     )
 922 |                     
 923 |                     # Calculate relevance score (lower distance = higher relevance)
 924 |                     # For cosine distance: distance ranges from 0 (identical) to 2 (opposite)
 925 |                     # Convert to similarity score: 1 - (distance/2) gives 0-1 range
 926 |                     relevance_score = max(0.0, 1.0 - (float(distance) / 2.0)) if distance is not None else 0.0
 927 |                     
 928 |                     results.append(MemoryQueryResult(
 929 |                         memory=memory,
 930 |                         relevance_score=relevance_score,
 931 |                         debug_info={"distance": distance, "backend": "sqlite-vec"}
 932 |                     ))
 933 |                     
 934 |                 except Exception as parse_error:
 935 |                     logger.warning(f"Failed to parse memory result: {parse_error}")
 936 |                     continue
 937 |             
 938 |             logger.info(f"Retrieved {len(results)} memories for query: {query}")
 939 |             return results
 940 |             
 941 |         except Exception as e:
 942 |             logger.error(f"Failed to retrieve memories: {str(e)}")
 943 |             logger.error(traceback.format_exc())
 944 |             return []
 945 |     
 946 |     async def search_by_tag(self, tags: List[str], time_start: Optional[float] = None) -> List[Memory]:
 947 |         """Search memories by tags with optional time filtering.
 948 | 
 949 |         Args:
 950 |             tags: List of tags to search for (OR logic)
 951 |             time_start: Optional Unix timestamp (in seconds) to filter memories created after this time
 952 | 
 953 |         Returns:
 954 |             List of Memory objects matching the tag criteria and time filter
 955 |         """
 956 |         try:
 957 |             if not self.conn:
 958 |                 logger.error("Database not initialized")
 959 |                 return []
 960 | 
 961 |             if not tags:
 962 |                 return []
 963 | 
 964 |             # Build query for tag search (OR logic)
 965 |             tag_conditions = " OR ".join(["tags LIKE ?" for _ in tags])
 966 |             tag_params = [f"%{tag}%" for tag in tags]
 967 | 
 968 |             # Add time filter to WHERE clause if provided
 969 |             where_clause = f"WHERE ({tag_conditions})"
 970 |             if time_start is not None:
 971 |                 where_clause += " AND created_at >= ?"
 972 |                 tag_params.append(time_start)
 973 | 
 974 |             cursor = self.conn.execute(f'''
 975 |                 SELECT content_hash, content, tags, memory_type, metadata,
 976 |                        created_at, updated_at, created_at_iso, updated_at_iso
 977 |                 FROM memories
 978 |                 {where_clause}
 979 |                 ORDER BY created_at DESC
 980 |             ''', tag_params)
 981 |             
 982 |             results = []
 983 |             for row in cursor.fetchall():
 984 |                 try:
 985 |                     content_hash, content, tags_str, memory_type, metadata_str = row[:5]
 986 |                     created_at, updated_at, created_at_iso, updated_at_iso = row[5:]
 987 |                     
 988 |                     # Parse tags and metadata
 989 |                     memory_tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
 990 |                     metadata = self._safe_json_loads(metadata_str, "memory_metadata")
 991 |                     
 992 |                     memory = Memory(
 993 |                         content=content,
 994 |                         content_hash=content_hash,
 995 |                         tags=memory_tags,
 996 |                         memory_type=memory_type,
 997 |                         metadata=metadata,
 998 |                         created_at=created_at,
 999 |                         updated_at=updated_at,
1000 |                         created_at_iso=created_at_iso,
1001 |                         updated_at_iso=updated_at_iso
1002 |                     )
1003 |                     
1004 |                     results.append(memory)
1005 |                     
1006 |                 except Exception as parse_error:
1007 |                     logger.warning(f"Failed to parse memory result: {parse_error}")
1008 |                     continue
1009 |             
1010 |             logger.info(f"Found {len(results)} memories with tags: {tags}")
1011 |             return results
1012 |             
1013 |         except Exception as e:
1014 |             logger.error(f"Failed to search by tags: {str(e)}")
1015 |             logger.error(traceback.format_exc())
1016 |             return []
1017 |     
1018 |     async def search_by_tags(
1019 |         self,
1020 |         tags: List[str],
1021 |         operation: str = "AND",
1022 |         time_start: Optional[float] = None,
1023 |         time_end: Optional[float] = None
1024 |     ) -> List[Memory]:
1025 |         """Search memories by tags with AND/OR operation and optional time filtering."""
1026 |         try:
1027 |             if not self.conn:
1028 |                 logger.error("Database not initialized")
1029 |                 return []
1030 |             
1031 |             if not tags:
1032 |                 return []
1033 |             
1034 |             normalized_operation = operation.strip().upper() if isinstance(operation, str) else "AND"
1035 |             if normalized_operation not in {"AND", "OR"}:
1036 |                 logger.warning("Unsupported tag operation %s; defaulting to AND", operation)
1037 |                 normalized_operation = "AND"
1038 | 
1039 |             comparator = " AND " if normalized_operation == "AND" else " OR "
1040 |             tag_conditions = comparator.join(["tags LIKE ?" for _ in tags])
1041 |             tag_params = [f"%{tag}%" for tag in tags]
1042 | 
1043 |             where_conditions = [f"({tag_conditions})"] if tag_conditions else []
1044 |             if time_start is not None:
1045 |                 where_conditions.append("created_at >= ?")
1046 |                 tag_params.append(time_start)
1047 |             if time_end is not None:
1048 |                 where_conditions.append("created_at <= ?")
1049 |                 tag_params.append(time_end)
1050 | 
1051 |             where_clause = f"WHERE {' AND '.join(where_conditions)}" if where_conditions else ""
1052 |             
1053 |             cursor = self.conn.execute(f'''
1054 |                 SELECT content_hash, content, tags, memory_type, metadata,
1055 |                        created_at, updated_at, created_at_iso, updated_at_iso
1056 |                 FROM memories 
1057 |                 {where_clause}
1058 |                 ORDER BY updated_at DESC
1059 |             ''', tag_params)
1060 |             
1061 |             results = []
1062 |             for row in cursor.fetchall():
1063 |                 try:
1064 |                     content_hash, content, tags_str, memory_type, metadata_str, created_at, updated_at, created_at_iso, updated_at_iso = row
1065 |                     
1066 |                     # Parse tags and metadata
1067 |                     memory_tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
1068 |                     metadata = self._safe_json_loads(metadata_str, "memory_metadata")
1069 |                     
1070 |                     memory = Memory(
1071 |                         content=content,
1072 |                         content_hash=content_hash,
1073 |                         tags=memory_tags,
1074 |                         memory_type=memory_type,
1075 |                         metadata=metadata,
1076 |                         created_at=created_at,
1077 |                         updated_at=updated_at,
1078 |                         created_at_iso=created_at_iso,
1079 |                         updated_at_iso=updated_at_iso
1080 |                     )
1081 |                     
1082 |                     results.append(memory)
1083 |                     
1084 |                 except Exception as parse_error:
1085 |                     logger.warning(f"Failed to parse memory result: {parse_error}")
1086 |                     continue
1087 |             
1088 |             logger.info(f"Found {len(results)} memories with tags: {tags} (operation: {operation})")
1089 |             return results
1090 | 
1091 |         except Exception as e:
1092 |             logger.error(f"Failed to search by tags with operation {operation}: {str(e)}")
1093 |             logger.error(traceback.format_exc())
1094 |             return []
1095 | 
1096 |     async def search_by_tag_chronological(self, tags: List[str], limit: int = None, offset: int = 0) -> List[Memory]:
1097 |         """
1098 |         Search memories by tags with chronological ordering and database-level pagination.
1099 | 
1100 |         This method addresses Gemini Code Assist's performance concern by pushing
1101 |         ordering and pagination to the database level instead of doing it in Python.
1102 | 
1103 |         Args:
1104 |             tags: List of tags to search for
1105 |             limit: Maximum number of memories to return (None for all)
1106 |             offset: Number of memories to skip (for pagination)
1107 | 
1108 |         Returns:
1109 |             List of Memory objects ordered by created_at DESC
1110 |         """
1111 |         try:
1112 |             if not self.conn:
1113 |                 logger.error("Database not initialized")
1114 |                 return []
1115 | 
1116 |             if not tags:
1117 |                 return []
1118 | 
1119 |             # Build query for tag search (OR logic) with database-level ordering and pagination
1120 |             tag_conditions = " OR ".join(["tags LIKE ?" for _ in tags])
1121 |             tag_params = [f"%{tag}%" for tag in tags]
1122 | 
1123 |             # Build pagination clauses
1124 |             limit_clause = f"LIMIT {limit}" if limit is not None else ""
1125 |             offset_clause = f"OFFSET {offset}" if offset > 0 else ""
1126 | 
1127 |             query = f'''
1128 |                 SELECT content_hash, content, tags, memory_type, metadata,
1129 |                        created_at, updated_at, created_at_iso, updated_at_iso
1130 |                 FROM memories
1131 |                 WHERE {tag_conditions}
1132 |                 ORDER BY created_at DESC
1133 |                 {limit_clause} {offset_clause}
1134 |             '''
1135 | 
1136 |             cursor = self.conn.execute(query, tag_params)
1137 |             results = []
1138 | 
1139 |             for row in cursor.fetchall():
1140 |                 try:
1141 |                     content_hash, content, tags_str, memory_type, metadata_str, created_at, updated_at, created_at_iso, updated_at_iso = row
1142 | 
1143 |                     # Parse tags and metadata
1144 |                     memory_tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
1145 |                     metadata = self._safe_json_loads(metadata_str, "memory_metadata")
1146 | 
1147 |                     memory = Memory(
1148 |                         content=content,
1149 |                         content_hash=content_hash,
1150 |                         tags=memory_tags,
1151 |                         memory_type=memory_type,
1152 |                         metadata=metadata,
1153 |                         created_at=created_at,
1154 |                         updated_at=updated_at,
1155 |                         created_at_iso=created_at_iso,
1156 |                         updated_at_iso=updated_at_iso
1157 |                     )
1158 | 
1159 |                     results.append(memory)
1160 | 
1161 |                 except Exception as parse_error:
1162 |                     logger.warning(f"Failed to parse memory result: {parse_error}")
1163 |                     continue
1164 | 
1165 |             logger.info(f"Found {len(results)} memories with tags: {tags} using database-level pagination (limit={limit}, offset={offset})")
1166 |             return results
1167 | 
1168 |         except Exception as e:
1169 |             logger.error(f"Failed to search by tags chronologically: {str(e)}")
1170 |             logger.error(traceback.format_exc())
1171 |             return []
1172 | 
1173 |     async def delete(self, content_hash: str) -> Tuple[bool, str]:
1174 |         """Delete a memory by its content hash."""
1175 |         try:
1176 |             if not self.conn:
1177 |                 return False, "Database not initialized"
1178 |             
1179 |             # Get the id first to delete corresponding embedding
1180 |             cursor = self.conn.execute('SELECT id FROM memories WHERE content_hash = ?', (content_hash,))
1181 |             row = cursor.fetchone()
1182 |             
1183 |             if row:
1184 |                 memory_id = row[0]
1185 |                 # Delete from both tables
1186 |                 self.conn.execute('DELETE FROM memory_embeddings WHERE rowid = ?', (memory_id,))
1187 |                 cursor = self.conn.execute('DELETE FROM memories WHERE content_hash = ?', (content_hash,))
1188 |                 self.conn.commit()
1189 |             else:
1190 |                 return False, f"Memory with hash {content_hash} not found"
1191 |             
1192 |             if cursor.rowcount > 0:
1193 |                 logger.info(f"Deleted memory: {content_hash}")
1194 |                 return True, f"Successfully deleted memory {content_hash}"
1195 |             else:
1196 |                 return False, f"Memory with hash {content_hash} not found"
1197 |                 
1198 |         except Exception as e:
1199 |             error_msg = f"Failed to delete memory: {str(e)}"
1200 |             logger.error(error_msg)
1201 |             return False, error_msg
1202 |     
1203 |     async def get_by_hash(self, content_hash: str) -> Optional[Memory]:
1204 |         """Get a memory by its content hash."""
1205 |         try:
1206 |             if not self.conn:
1207 |                 return None
1208 |             
1209 |             cursor = self.conn.execute('''
1210 |                 SELECT content_hash, content, tags, memory_type, metadata,
1211 |                        created_at, updated_at, created_at_iso, updated_at_iso
1212 |                 FROM memories WHERE content_hash = ?
1213 |             ''', (content_hash,))
1214 |             
1215 |             row = cursor.fetchone()
1216 |             if not row:
1217 |                 return None
1218 |             
1219 |             content_hash, content, tags_str, memory_type, metadata_str = row[:5]
1220 |             created_at, updated_at, created_at_iso, updated_at_iso = row[5:]
1221 |             
1222 |             # Parse tags and metadata
1223 |             tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
1224 |             metadata = self._safe_json_loads(metadata_str, "memory_retrieval")
1225 |             
1226 |             memory = Memory(
1227 |                 content=content,
1228 |                 content_hash=content_hash,
1229 |                 tags=tags,
1230 |                 memory_type=memory_type,
1231 |                 metadata=metadata,
1232 |                 created_at=created_at,
1233 |                 updated_at=updated_at,
1234 |                 created_at_iso=created_at_iso,
1235 |                 updated_at_iso=updated_at_iso
1236 |             )
1237 |             
1238 |             return memory
1239 | 
1240 |         except Exception as e:
1241 |             logger.error(f"Failed to get memory by hash {content_hash}: {str(e)}")
1242 |             return None
1243 | 
1244 |     async def get_all_content_hashes(self) -> Set[str]:
1245 |         """
1246 |         Get all content hashes in database for bulk existence checking.
1247 | 
1248 |         This is optimized for sync operations to avoid individual existence checks.
1249 |         Returns a set for O(1) lookup performance.
1250 | 
1251 |         Returns:
1252 |             Set of all content_hash values currently in the database
1253 |         """
1254 |         try:
1255 |             if not self.conn:
1256 |                 return set()
1257 | 
1258 |             cursor = self.conn.execute('SELECT content_hash FROM memories')
1259 |             return {row[0] for row in cursor.fetchall()}
1260 | 
1261 |         except Exception as e:
1262 |             logger.error(f"Failed to get all content hashes: {str(e)}")
1263 |             return set()
1264 | 
1265 |     async def delete_by_tag(self, tag: str) -> Tuple[int, str]:
1266 |         """Delete memories by tag."""
1267 |         try:
1268 |             if not self.conn:
1269 |                 return 0, "Database not initialized"
1270 |             
1271 |             # Get the ids first to delete corresponding embeddings
1272 |             cursor = self.conn.execute('SELECT id FROM memories WHERE tags LIKE ?', (f"%{tag}%",))
1273 |             memory_ids = [row[0] for row in cursor.fetchall()]
1274 |             
1275 |             # Delete from both tables
1276 |             for memory_id in memory_ids:
1277 |                 self.conn.execute('DELETE FROM memory_embeddings WHERE rowid = ?', (memory_id,))
1278 |             
1279 |             cursor = self.conn.execute('DELETE FROM memories WHERE tags LIKE ?', (f"%{tag}%",))
1280 |             self.conn.commit()
1281 |             
1282 |             count = cursor.rowcount
1283 |             logger.info(f"Deleted {count} memories with tag: {tag}")
1284 |             
1285 |             if count > 0:
1286 |                 return count, f"Successfully deleted {count} memories with tag '{tag}'"
1287 |             else:
1288 |                 return 0, f"No memories found with tag '{tag}'"
1289 |                 
1290 |         except Exception as e:
1291 |             error_msg = f"Failed to delete by tag: {str(e)}"
1292 |             logger.error(error_msg)
1293 |             return 0, error_msg
1294 | 
1295 |     async def delete_by_tags(self, tags: List[str]) -> Tuple[int, str]:
1296 |         """
1297 |         Delete memories matching ANY of the given tags (optimized single-query version).
1298 | 
1299 |         Overrides base class implementation for better performance using OR conditions.
1300 |         """
1301 |         try:
1302 |             if not self.conn:
1303 |                 return 0, "Database not initialized"
1304 | 
1305 |             if not tags:
1306 |                 return 0, "No tags provided"
1307 | 
1308 |             # Build OR condition for all tags
1309 |             # Using LIKE for each tag to match partial tag strings (same as delete_by_tag)
1310 |             conditions = " OR ".join(["tags LIKE ?" for _ in tags])
1311 |             params = [f"%{tag}%" for tag in tags]
1312 | 
1313 |             # Get the ids first to delete corresponding embeddings
1314 |             query = f'SELECT id FROM memories WHERE {conditions}'
1315 |             cursor = self.conn.execute(query, params)
1316 |             memory_ids = [row[0] for row in cursor.fetchall()]
1317 | 
1318 |             # Delete from embeddings table using single query with IN clause
1319 |             if memory_ids:
1320 |                 placeholders = ','.join('?' for _ in memory_ids)
1321 |                 self.conn.execute(f'DELETE FROM memory_embeddings WHERE rowid IN ({placeholders})', memory_ids)
1322 | 
1323 |             # Delete from memories table
1324 |             delete_query = f'DELETE FROM memories WHERE {conditions}'
1325 |             cursor = self.conn.execute(delete_query, params)
1326 |             self.conn.commit()
1327 | 
1328 |             count = cursor.rowcount
1329 |             logger.info(f"Deleted {count} memories matching tags: {tags}")
1330 | 
1331 |             if count > 0:
1332 |                 return count, f"Successfully deleted {count} memories matching {len(tags)} tag(s)"
1333 |             else:
1334 |                 return 0, f"No memories found matching any of the {len(tags)} tags"
1335 | 
1336 |         except Exception as e:
1337 |             error_msg = f"Failed to delete by tags: {str(e)}"
1338 |             logger.error(error_msg)
1339 |             return 0, error_msg
1340 | 
1341 |     async def cleanup_duplicates(self) -> Tuple[int, str]:
1342 |         """Remove duplicate memories based on content hash."""
1343 |         try:
1344 |             if not self.conn:
1345 |                 return 0, "Database not initialized"
1346 |             
1347 |             # Find duplicates (keep the first occurrence)
1348 |             cursor = self.conn.execute('''
1349 |                 DELETE FROM memories 
1350 |                 WHERE rowid NOT IN (
1351 |                     SELECT MIN(rowid) 
1352 |                     FROM memories 
1353 |                     GROUP BY content_hash
1354 |                 )
1355 |             ''')
1356 |             self.conn.commit()
1357 |             
1358 |             count = cursor.rowcount
1359 |             logger.info(f"Cleaned up {count} duplicate memories")
1360 |             
1361 |             if count > 0:
1362 |                 return count, f"Successfully removed {count} duplicate memories"
1363 |             else:
1364 |                 return 0, "No duplicate memories found"
1365 |                 
1366 |         except Exception as e:
1367 |             error_msg = f"Failed to cleanup duplicates: {str(e)}"
1368 |             logger.error(error_msg)
1369 |             return 0, error_msg
1370 |     
1371 |     async def update_memory_metadata(self, content_hash: str, updates: Dict[str, Any], preserve_timestamps: bool = True) -> Tuple[bool, str]:
1372 |         """Update memory metadata without recreating the entire memory entry."""
1373 |         try:
1374 |             if not self.conn:
1375 |                 return False, "Database not initialized"
1376 |             
1377 |             # Get current memory
1378 |             cursor = self.conn.execute('''
1379 |                 SELECT content, tags, memory_type, metadata, created_at, created_at_iso
1380 |                 FROM memories WHERE content_hash = ?
1381 |             ''', (content_hash,))
1382 |             
1383 |             row = cursor.fetchone()
1384 |             if not row:
1385 |                 return False, f"Memory with hash {content_hash} not found"
1386 |             
1387 |             content, current_tags, current_type, current_metadata_str, created_at, created_at_iso = row
1388 |             
1389 |             # Parse current metadata
1390 |             current_metadata = self._safe_json_loads(current_metadata_str, "update_memory_metadata")
1391 |             
1392 |             # Apply updates
1393 |             new_tags = current_tags
1394 |             new_type = current_type
1395 |             new_metadata = current_metadata.copy()
1396 |             
1397 |             # Handle tag updates
1398 |             if "tags" in updates:
1399 |                 if isinstance(updates["tags"], list):
1400 |                     new_tags = ",".join(updates["tags"])
1401 |                 else:
1402 |                     return False, "Tags must be provided as a list of strings"
1403 |             
1404 |             # Handle memory type updates
1405 |             if "memory_type" in updates:
1406 |                 new_type = updates["memory_type"]
1407 |             
1408 |             # Handle metadata updates
1409 |             if "metadata" in updates:
1410 |                 if isinstance(updates["metadata"], dict):
1411 |                     new_metadata.update(updates["metadata"])
1412 |                 else:
1413 |                     return False, "Metadata must be provided as a dictionary"
1414 |             
1415 |             # Handle other custom fields
1416 |             protected_fields = {
1417 |                 "content", "content_hash", "tags", "memory_type", "metadata",
1418 |                 "embedding", "created_at", "created_at_iso", "updated_at", "updated_at_iso"
1419 |             }
1420 |             
1421 |             for key, value in updates.items():
1422 |                 if key not in protected_fields:
1423 |                     new_metadata[key] = value
1424 |             
1425 |             # Update timestamps
1426 |             now = time.time()
1427 |             now_iso = datetime.utcfromtimestamp(now).isoformat() + "Z"
1428 | 
1429 |             # Handle timestamp updates based on preserve_timestamps flag
1430 |             if not preserve_timestamps:
1431 |                 # When preserve_timestamps=False, use timestamps from updates dict if provided
1432 |                 # This allows syncing timestamps from source (e.g., Cloudflare → SQLite)
1433 |                 # Always preserve created_at (never reset to current time!)
1434 |                 created_at = updates.get('created_at', created_at)
1435 |                 created_at_iso = updates.get('created_at_iso', created_at_iso)
1436 |                 # Use updated_at from updates or current time
1437 |                 updated_at = updates.get('updated_at', now)
1438 |                 updated_at_iso = updates.get('updated_at_iso', now_iso)
1439 |             else:
1440 |                 # preserve_timestamps=True: only update updated_at to current time
1441 |                 updated_at = now
1442 |                 updated_at_iso = now_iso
1443 | 
1444 |             # Update the memory
1445 |             self.conn.execute('''
1446 |                 UPDATE memories SET
1447 |                     tags = ?, memory_type = ?, metadata = ?,
1448 |                     updated_at = ?, updated_at_iso = ?,
1449 |                     created_at = ?, created_at_iso = ?
1450 |                 WHERE content_hash = ?
1451 |             ''', (
1452 |                 new_tags, new_type, json.dumps(new_metadata),
1453 |                 updated_at, updated_at_iso, created_at, created_at_iso, content_hash
1454 |             ))
1455 |             
1456 |             self.conn.commit()
1457 |             
1458 |             # Create summary of updated fields
1459 |             updated_fields = []
1460 |             if "tags" in updates:
1461 |                 updated_fields.append("tags")
1462 |             if "memory_type" in updates:
1463 |                 updated_fields.append("memory_type")
1464 |             if "metadata" in updates:
1465 |                 updated_fields.append("custom_metadata")
1466 |             
1467 |             for key in updates.keys():
1468 |                 if key not in protected_fields and key not in ["tags", "memory_type", "metadata"]:
1469 |                     updated_fields.append(key)
1470 |             
1471 |             updated_fields.append("updated_at")
1472 | 
1473 |             summary = f"Updated fields: {', '.join(updated_fields)}"
1474 |             logger.info(f"Successfully updated metadata for memory {content_hash}")
1475 |             return True, summary
1476 | 
1477 |         except Exception as e:
1478 |             error_msg = f"Error updating memory metadata: {str(e)}"
1479 |             logger.error(error_msg)
1480 |             logger.error(traceback.format_exc())
1481 |             return False, error_msg
1482 | 
1483 |     async def update_memories_batch(self, memories: List[Memory]) -> List[bool]:
1484 |         """
1485 |         Update multiple memories in a single database transaction for optimal performance.
1486 | 
1487 |         This method processes all updates in a single transaction, significantly improving
1488 |         performance compared to individual update_memory() calls.
1489 | 
1490 |         Args:
1491 |             memories: List of Memory objects with updated fields
1492 | 
1493 |         Returns:
1494 |             List of success booleans, one for each memory in the batch
1495 |         """
1496 |         if not memories:
1497 |             return []
1498 | 
1499 |         try:
1500 |             if not self.conn:
1501 |                 return [False] * len(memories)
1502 | 
1503 |             results = [False] * len(memories)
1504 |             now = time.time()
1505 |             now_iso = datetime.utcfromtimestamp(now).isoformat() + "Z"
1506 | 
1507 |             # Start transaction (will be committed at the end)
1508 |             # SQLite doesn't have explicit BEGIN for Python DB-API, but we can use savepoint
1509 |             cursor = self.conn.cursor()
1510 | 
1511 |             for idx, memory in enumerate(memories):
1512 |                 try:
1513 |                     # Get current memory data
1514 |                     cursor.execute('''
1515 |                         SELECT content, tags, memory_type, metadata, created_at, created_at_iso
1516 |                         FROM memories WHERE content_hash = ?
1517 |                     ''', (memory.content_hash,))
1518 | 
1519 |                     row = cursor.fetchone()
1520 |                     if not row:
1521 |                         logger.warning(f"Memory {memory.content_hash} not found during batch update")
1522 |                         continue
1523 | 
1524 |                     content, current_tags, current_type, current_metadata_str, created_at, created_at_iso = row
1525 | 
1526 |                     # Parse current metadata
1527 |                     current_metadata = self._safe_json_loads(current_metadata_str, "update_memories_batch")
1528 | 
1529 |                     # Merge metadata (new metadata takes precedence)
1530 |                     if memory.metadata:
1531 |                         merged_metadata = current_metadata.copy()
1532 |                         merged_metadata.update(memory.metadata)
1533 |                     else:
1534 |                         merged_metadata = current_metadata
1535 | 
1536 |                     # Prepare new values
1537 |                     new_tags = ",".join(memory.tags) if memory.tags else current_tags
1538 |                     new_type = memory.memory_type if memory.memory_type else current_type
1539 | 
1540 |                     # Execute update
1541 |                     cursor.execute('''
1542 |                         UPDATE memories SET
1543 |                             tags = ?, memory_type = ?, metadata = ?,
1544 |                             updated_at = ?, updated_at_iso = ?
1545 |                         WHERE content_hash = ?
1546 |                     ''', (
1547 |                         new_tags, new_type, json.dumps(merged_metadata),
1548 |                         now, now_iso, memory.content_hash
1549 |                     ))
1550 | 
1551 |                     results[idx] = True
1552 | 
1553 |                 except Exception as e:
1554 |                     logger.warning(f"Failed to update memory {memory.content_hash} in batch: {e}")
1555 |                     continue
1556 | 
1557 |             # Commit all updates in a single transaction
1558 |             self.conn.commit()
1559 | 
1560 |             success_count = sum(results)
1561 |             logger.info(f"Batch update completed: {success_count}/{len(memories)} memories updated successfully")
1562 | 
1563 |             return results
1564 | 
1565 |         except Exception as e:
1566 |             # Rollback on error
1567 |             if self.conn:
1568 |                 self.conn.rollback()
1569 |             logger.error(f"Batch update failed: {e}")
1570 |             logger.error(traceback.format_exc())
1571 |             return [False] * len(memories)
1572 | 
1573 |     async def get_stats(self) -> Dict[str, Any]:
1574 |         """Get storage statistics."""
1575 |         try:
1576 |             if not self.conn:
1577 |                 return {"error": "Database not initialized"}
1578 | 
1579 |             cursor = self.conn.execute('SELECT COUNT(*) FROM memories')
1580 |             total_memories = cursor.fetchone()[0]
1581 | 
1582 |             # Count unique individual tags (not tag sets)
1583 |             cursor = self.conn.execute('SELECT tags FROM memories WHERE tags IS NOT NULL AND tags != ""')
1584 |             unique_tags = len(set(
1585 |                 tag.strip()
1586 |                 for (tag_string,) in cursor
1587 |                 if tag_string
1588 |                 for tag in tag_string.split(",")
1589 |                 if tag.strip()
1590 |             ))
1591 | 
1592 |             # Count memories from this week (last 7 days)
1593 |             import time
1594 |             week_ago = time.time() - (7 * 24 * 60 * 60)
1595 |             cursor = self.conn.execute('SELECT COUNT(*) FROM memories WHERE created_at >= ?', (week_ago,))
1596 |             memories_this_week = cursor.fetchone()[0]
1597 | 
1598 |             # Get database file size
1599 |             file_size = os.path.getsize(self.db_path) if os.path.exists(self.db_path) else 0
1600 | 
1601 |             return {
1602 |                 "backend": "sqlite-vec",
1603 |                 "total_memories": total_memories,
1604 |                 "unique_tags": unique_tags,
1605 |                 "memories_this_week": memories_this_week,
1606 |                 "database_size_bytes": file_size,
1607 |                 "database_size_mb": round(file_size / (1024 * 1024), 2),
1608 |                 "embedding_model": self.embedding_model_name,
1609 |                 "embedding_dimension": self.embedding_dimension
1610 |             }
1611 | 
1612 |         except sqlite3.Error as e:
1613 |             logger.error(f"Database error getting stats: {str(e)}")
1614 |             return {"error": f"Database error: {str(e)}"}
1615 |         except OSError as e:
1616 |             logger.error(f"File system error getting stats: {str(e)}")
1617 |             return {"error": f"File system error: {str(e)}"}
1618 |         except Exception as e:
1619 |             logger.error(f"Unexpected error getting stats: {str(e)}")
1620 |             return {"error": f"Unexpected error: {str(e)}"}
1621 |     
1622 |     def sanitized(self, tags):
1623 |         """Sanitize and normalize tags to a JSON string.
1624 | 
1625 |         This method provides compatibility with the storage backend interface.
1626 |         """
1627 |         if tags is None:
1628 |             return json.dumps([])
1629 |         
1630 |         # If we get a string, split it into an array
1631 |         if isinstance(tags, str):
1632 |             tags = [tag.strip() for tag in tags.split(",") if tag.strip()]
1633 |         # If we get an array, use it directly
1634 |         elif isinstance(tags, list):
1635 |             tags = [str(tag).strip() for tag in tags if str(tag).strip()]
1636 |         else:
1637 |             return json.dumps([])
1638 |                 
1639 |         # Return JSON string representation of the array
1640 |         return json.dumps(tags)
1641 |     
1642 |     async def recall(self, query: Optional[str] = None, n_results: int = 5, start_timestamp: Optional[float] = None, end_timestamp: Optional[float] = None) -> List[MemoryQueryResult]:
1643 |         """
1644 |         Retrieve memories with combined time filtering and optional semantic search.
1645 |         
1646 |         Args:
1647 |             query: Optional semantic search query. If None, only time filtering is applied.
1648 |             n_results: Maximum number of results to return.
1649 |             start_timestamp: Optional start time for filtering.
1650 |             end_timestamp: Optional end time for filtering.
1651 |             
1652 |         Returns:
1653 |             List of MemoryQueryResult objects.
1654 |         """
1655 |         try:
1656 |             if not self.conn:
1657 |                 logger.error("Database not initialized, cannot retrieve memories")
1658 |                 return []
1659 |             
1660 |             # Build time filtering WHERE clause
1661 |             time_conditions = []
1662 |             params = []
1663 |             
1664 |             if start_timestamp is not None:
1665 |                 time_conditions.append("created_at >= ?")
1666 |                 params.append(float(start_timestamp))
1667 |             
1668 |             if end_timestamp is not None:
1669 |                 time_conditions.append("created_at <= ?")
1670 |                 params.append(float(end_timestamp))
1671 |             
1672 |             time_where = " AND ".join(time_conditions) if time_conditions else ""
1673 |             
1674 |             logger.info(f"Time filtering conditions: {time_where}, params: {params}")
1675 |             
1676 |             # Determine whether to use semantic search or just time-based filtering
1677 |             if query and self.embedding_model:
1678 |                 # Combined semantic search with time filtering
1679 |                 try:
1680 |                     # Generate query embedding
1681 |                     query_embedding = self._generate_embedding(query)
1682 |                     
1683 |                     # Build SQL query with time filtering
1684 |                     base_query = '''
1685 |                         SELECT m.content_hash, m.content, m.tags, m.memory_type, m.metadata,
1686 |                                m.created_at, m.updated_at, m.created_at_iso, m.updated_at_iso, 
1687 |                                e.distance
1688 |                         FROM memories m
1689 |                         JOIN (
1690 |                             SELECT rowid, distance 
1691 |                             FROM memory_embeddings 
1692 |                             WHERE content_embedding MATCH ?
1693 |                             ORDER BY distance
1694 |                             LIMIT ?
1695 |                         ) e ON m.id = e.rowid
1696 |                     '''
1697 |                     
1698 |                     if time_where:
1699 |                         base_query += f" WHERE {time_where}"
1700 |                     
1701 |                     base_query += " ORDER BY e.distance"
1702 |                     
1703 |                     # Prepare parameters: embedding, limit, then time filter params
1704 |                     query_params = [serialize_float32(query_embedding), n_results] + params
1705 |                     
1706 |                     cursor = self.conn.execute(base_query, query_params)
1707 |                     
1708 |                     results = []
1709 |                     for row in cursor.fetchall():
1710 |                         try:
1711 |                             # Parse row data
1712 |                             content_hash, content, tags_str, memory_type, metadata_str = row[:5]
1713 |                             created_at, updated_at, created_at_iso, updated_at_iso, distance = row[5:]
1714 |                             
1715 |                             # Parse tags and metadata
1716 |                             tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
1717 |                             metadata = self._safe_json_loads(metadata_str, "memory_metadata")
1718 |                             
1719 |                             # Create Memory object
1720 |                             memory = Memory(
1721 |                                 content=content,
1722 |                                 content_hash=content_hash,
1723 |                                 tags=tags,
1724 |                                 memory_type=memory_type,
1725 |                                 metadata=metadata,
1726 |                                 created_at=created_at,
1727 |                                 updated_at=updated_at,
1728 |                                 created_at_iso=created_at_iso,
1729 |                                 updated_at_iso=updated_at_iso
1730 |                             )
1731 |                             
1732 |                             # Calculate relevance score (lower distance = higher relevance)
1733 |                             relevance_score = max(0.0, 1.0 - distance)
1734 |                             
1735 |                             results.append(MemoryQueryResult(
1736 |                                 memory=memory,
1737 |                                 relevance_score=relevance_score,
1738 |                                 debug_info={"distance": distance, "backend": "sqlite-vec", "time_filtered": bool(time_where)}
1739 |                             ))
1740 |                             
1741 |                         except Exception as parse_error:
1742 |                             logger.warning(f"Failed to parse memory result: {parse_error}")
1743 |                             continue
1744 |                     
1745 |                     logger.info(f"Retrieved {len(results)} memories for semantic query with time filter")
1746 |                     return results
1747 |                     
1748 |                 except Exception as query_error:
1749 |                     logger.error(f"Error in semantic search with time filter: {str(query_error)}")
1750 |                     # Fall back to time-based retrieval on error
1751 |                     logger.info("Falling back to time-based retrieval")
1752 |             
1753 |             # Time-based filtering only (or fallback from failed semantic search)
1754 |             base_query = '''
1755 |                 SELECT content_hash, content, tags, memory_type, metadata,
1756 |                        created_at, updated_at, created_at_iso, updated_at_iso
1757 |                 FROM memories
1758 |             '''
1759 |             
1760 |             if time_where:
1761 |                 base_query += f" WHERE {time_where}"
1762 |             
1763 |             base_query += " ORDER BY created_at DESC LIMIT ?"
1764 |             
1765 |             # Add limit parameter
1766 |             params.append(n_results)
1767 |             
1768 |             cursor = self.conn.execute(base_query, params)
1769 |             
1770 |             results = []
1771 |             for row in cursor.fetchall():
1772 |                 try:
1773 |                     content_hash, content, tags_str, memory_type, metadata_str = row[:5]
1774 |                     created_at, updated_at, created_at_iso, updated_at_iso = row[5:]
1775 |                     
1776 |                     # Parse tags and metadata
1777 |                     tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
1778 |                     metadata = self._safe_json_loads(metadata_str, "memory_metadata")
1779 |                     
1780 |                     memory = Memory(
1781 |                         content=content,
1782 |                         content_hash=content_hash,
1783 |                         tags=tags,
1784 |                         memory_type=memory_type,
1785 |                         metadata=metadata,
1786 |                         created_at=created_at,
1787 |                         updated_at=updated_at,
1788 |                         created_at_iso=created_at_iso,
1789 |                         updated_at_iso=updated_at_iso
1790 |                     )
1791 |                     
1792 |                     # For time-based retrieval, we don't have a relevance score
1793 |                     results.append(MemoryQueryResult(
1794 |                         memory=memory,
1795 |                         relevance_score=None,
1796 |                         debug_info={"backend": "sqlite-vec", "time_filtered": bool(time_where), "query_type": "time_based"}
1797 |                     ))
1798 |                     
1799 |                 except Exception as parse_error:
1800 |                     logger.warning(f"Failed to parse memory result: {parse_error}")
1801 |                     continue
1802 |             
1803 |             logger.info(f"Retrieved {len(results)} memories for time-based query")
1804 |             return results
1805 |             
1806 |         except Exception as e:
1807 |             logger.error(f"Error in recall: {str(e)}")
1808 |             logger.error(traceback.format_exc())
1809 |             return []
1810 |     
1811 |     async def get_all_memories(self) -> List[Memory]:
1812 |         """
1813 |         Get all memories from the database.
1814 |         
1815 |         Returns:
1816 |             List of all Memory objects in the database.
1817 |         """
1818 |         try:
1819 |             if not self.conn:
1820 |                 logger.error("Database not initialized, cannot retrieve memories")
1821 |                 return []
1822 |             
1823 |             cursor = self.conn.execute('''
1824 |                 SELECT m.content_hash, m.content, m.tags, m.memory_type, m.metadata,
1825 |                        m.created_at, m.updated_at, m.created_at_iso, m.updated_at_iso,
1826 |                        e.content_embedding
1827 |                 FROM memories m
1828 |                 LEFT JOIN memory_embeddings e ON m.id = e.rowid
1829 |                 ORDER BY m.created_at DESC
1830 |             ''')
1831 | 
1832 |             results = []
1833 |             for row in cursor.fetchall():
1834 |                 try:
1835 |                     content_hash, content, tags_str, memory_type, metadata_str = row[:5]
1836 |                     created_at, updated_at, created_at_iso, updated_at_iso = row[5:9]
1837 |                     embedding_blob = row[9] if len(row) > 9 else None
1838 | 
1839 |                     # Parse tags and metadata
1840 |                     tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
1841 |                     metadata = self._safe_json_loads(metadata_str, "memory_metadata")
1842 | 
1843 |                     # Deserialize embedding if present
1844 |                     embedding = None
1845 |                     if embedding_blob:
1846 |                         embedding = deserialize_embedding(embedding_blob)
1847 | 
1848 |                     memory = Memory(
1849 |                         content=content,
1850 |                         content_hash=content_hash,
1851 |                         tags=tags,
1852 |                         memory_type=memory_type,
1853 |                         metadata=metadata,
1854 |                         embedding=embedding,
1855 |                         created_at=created_at,
1856 |                         updated_at=updated_at,
1857 |                         created_at_iso=created_at_iso,
1858 |                         updated_at_iso=updated_at_iso
1859 |                     )
1860 |                     
1861 |                     results.append(memory)
1862 |                     
1863 |                 except Exception as parse_error:
1864 |                     logger.warning(f"Failed to parse memory result: {parse_error}")
1865 |                     continue
1866 |             
1867 |             logger.info(f"Retrieved {len(results)} total memories")
1868 |             return results
1869 |             
1870 |         except Exception as e:
1871 |             logger.error(f"Error getting all memories: {str(e)}")
1872 |             return []
1873 | 
1874 |     async def get_memories_by_time_range(self, start_time: float, end_time: float) -> List[Memory]:
1875 |         """Get memories within a specific time range."""
1876 |         try:
1877 |             await self.initialize()
1878 |             cursor = self.conn.execute('''
1879 |                 SELECT content_hash, content, tags, memory_type, metadata,
1880 |                        created_at, updated_at, created_at_iso, updated_at_iso
1881 |                 FROM memories
1882 |                 WHERE created_at BETWEEN ? AND ?
1883 |                 ORDER BY created_at DESC
1884 |             ''', (start_time, end_time))
1885 |             
1886 |             results = []
1887 |             for row in cursor.fetchall():
1888 |                 try:
1889 |                     content_hash, content, tags_str, memory_type, metadata_str = row[:5]
1890 |                     created_at, updated_at, created_at_iso, updated_at_iso = row[5:]
1891 |                     
1892 |                     # Parse tags and metadata
1893 |                     tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
1894 |                     metadata = self._safe_json_loads(metadata_str, "memory_metadata")
1895 |                     
1896 |                     memory = Memory(
1897 |                         content=content,
1898 |                         content_hash=content_hash,
1899 |                         tags=tags,
1900 |                         memory_type=memory_type,
1901 |                         metadata=metadata,
1902 |                         created_at=created_at,
1903 |                         updated_at=updated_at,
1904 |                         created_at_iso=created_at_iso,
1905 |                         updated_at_iso=updated_at_iso
1906 |                     )
1907 |                     
1908 |                     results.append(memory)
1909 |                     
1910 |                 except Exception as parse_error:
1911 |                     logger.warning(f"Failed to parse memory result: {parse_error}")
1912 |                     continue
1913 |             
1914 |             logger.info(f"Retrieved {len(results)} memories in time range {start_time}-{end_time}")
1915 |             return results
1916 |             
1917 |         except Exception as e:
1918 |             logger.error(f"Error getting memories by time range: {str(e)}")
1919 |             return []
1920 | 
1921 |     async def get_memory_connections(self) -> Dict[str, int]:
1922 |         """Get memory connection statistics."""
1923 |         try:
1924 |             await self.initialize()
1925 |             # For now, return basic statistics based on tags and content similarity
1926 |             cursor = self.conn.execute('''
1927 |                 SELECT tags, COUNT(*) as count
1928 |                 FROM memories
1929 |                 WHERE tags IS NOT NULL AND tags != ''
1930 |                 GROUP BY tags
1931 |             ''')
1932 |             
1933 |             connections = {}
1934 |             for row in cursor.fetchall():
1935 |                 tags_str, count = row
1936 |                 if tags_str:
1937 |                     tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
1938 |                     for tag in tags:
1939 |                         connections[f"tag:{tag}"] = connections.get(f"tag:{tag}", 0) + count
1940 |             
1941 |             return connections
1942 |             
1943 |         except Exception as e:
1944 |             logger.error(f"Error getting memory connections: {str(e)}")
1945 |             return {}
1946 | 
1947 |     async def get_access_patterns(self) -> Dict[str, datetime]:
1948 |         """Get memory access pattern statistics."""
1949 |         try:
1950 |             await self.initialize()
1951 |             # Return recent access patterns based on updated_at timestamps
1952 |             cursor = self.conn.execute('''
1953 |                 SELECT content_hash, updated_at_iso
1954 |                 FROM memories
1955 |                 WHERE updated_at_iso IS NOT NULL
1956 |                 ORDER BY updated_at DESC
1957 |                 LIMIT 100
1958 |             ''')
1959 |             
1960 |             patterns = {}
1961 |             for row in cursor.fetchall():
1962 |                 content_hash, updated_at_iso = row
1963 |                 try:
1964 |                     patterns[content_hash] = datetime.fromisoformat(updated_at_iso.replace('Z', '+00:00'))
1965 |                 except Exception:
1966 |                     # Fallback for timestamp parsing issues
1967 |                     patterns[content_hash] = datetime.now()
1968 |             
1969 |             return patterns
1970 |             
1971 |         except Exception as e:
1972 |             logger.error(f"Error getting access patterns: {str(e)}")
1973 |             return {}
1974 | 
1975 |     def _row_to_memory(self, row) -> Optional[Memory]:
1976 |         """Convert database row to Memory object."""
1977 |         try:
1978 |             # Handle both 9-column (without embedding) and 10-column (with embedding) rows
1979 |             content_hash, content, tags_str, memory_type, metadata_str, created_at, updated_at, created_at_iso, updated_at_iso = row[:9]
1980 |             embedding_blob = row[9] if len(row) > 9 else None
1981 | 
1982 |             # Parse tags (comma-separated format)
1983 |             tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] if tags_str else []
1984 | 
1985 |             # Parse metadata
1986 |             metadata = self._safe_json_loads(metadata_str, "get_by_hash")
1987 | 
1988 |             # Deserialize embedding if present
1989 |             embedding = None
1990 |             if embedding_blob:
1991 |                 embedding = deserialize_embedding(embedding_blob)
1992 | 
1993 |             return Memory(
1994 |                 content=content,
1995 |                 content_hash=content_hash,
1996 |                 tags=tags,
1997 |                 memory_type=memory_type,
1998 |                 metadata=metadata,
1999 |                 embedding=embedding,
2000 |                 created_at=created_at,
2001 |                 updated_at=updated_at,
2002 |                 created_at_iso=created_at_iso,
2003 |                 updated_at_iso=updated_at_iso
2004 |             )
2005 |             
2006 |         except Exception as e:
2007 |             logger.error(f"Error converting row to memory: {str(e)}")
2008 |             return None
2009 | 
2010 |     async def get_all_memories(self, limit: int = None, offset: int = 0, memory_type: Optional[str] = None, tags: Optional[List[str]] = None) -> List[Memory]:
2011 |         """
2012 |         Get all memories in storage ordered by creation time (newest first).
2013 | 
2014 |         Args:
2015 |             limit: Maximum number of memories to return (None for all)
2016 |             offset: Number of memories to skip (for pagination)
2017 |             memory_type: Optional filter by memory type
2018 |             tags: Optional filter by tags (matches ANY of the provided tags)
2019 | 
2020 |         Returns:
2021 |             List of Memory objects ordered by created_at DESC, optionally filtered by type and tags
2022 |         """
2023 |         try:
2024 |             await self.initialize()
2025 | 
2026 |             # Build query with optional memory_type and tags filters
2027 |             query = '''
2028 |                 SELECT m.content_hash, m.content, m.tags, m.memory_type, m.metadata,
2029 |                        m.created_at, m.updated_at, m.created_at_iso, m.updated_at_iso,
2030 |                        e.content_embedding
2031 |                 FROM memories m
2032 |                 LEFT JOIN memory_embeddings e ON m.id = e.rowid
2033 |             '''
2034 | 
2035 |             params = []
2036 |             where_conditions = []
2037 | 
2038 |             # Add memory_type filter if specified
2039 |             if memory_type is not None:
2040 |                 where_conditions.append('m.memory_type = ?')
2041 |                 params.append(memory_type)
2042 | 
2043 |             # Add tags filter if specified (using database-level filtering like search_by_tag_chronological)
2044 |             if tags and len(tags) > 0:
2045 |                 tag_conditions = " OR ".join(["m.tags LIKE ?" for _ in tags])
2046 |                 where_conditions.append(f"({tag_conditions})")
2047 |                 params.extend([f"%{tag}%" for tag in tags])
2048 | 
2049 |             # Apply WHERE clause if we have any conditions
2050 |             if where_conditions:
2051 |                 query += ' WHERE ' + ' AND '.join(where_conditions)
2052 | 
2053 |             query += ' ORDER BY m.created_at DESC'
2054 | 
2055 |             if limit is not None:
2056 |                 query += ' LIMIT ?'
2057 |                 params.append(limit)
2058 | 
2059 |             if offset > 0:
2060 |                 query += ' OFFSET ?'
2061 |                 params.append(offset)
2062 |             
2063 |             cursor = self.conn.execute(query, params)
2064 |             memories = []
2065 |             
2066 |             for row in cursor.fetchall():
2067 |                 memory = self._row_to_memory(row)
2068 |                 if memory:
2069 |                     memories.append(memory)
2070 |             
2071 |             return memories
2072 |             
2073 |         except Exception as e:
2074 |             logger.error(f"Error getting all memories: {str(e)}")
2075 |             return []
2076 | 
2077 |     async def get_recent_memories(self, n: int = 10) -> List[Memory]:
2078 |         """
2079 |         Get n most recent memories.
2080 | 
2081 |         Args:
2082 |             n: Number of recent memories to return
2083 | 
2084 |         Returns:
2085 |             List of the n most recent Memory objects
2086 |         """
2087 |         return await self.get_all_memories(limit=n, offset=0)
2088 | 
2089 |     async def get_largest_memories(self, n: int = 10) -> List[Memory]:
2090 |         """
2091 |         Get n largest memories by content length.
2092 | 
2093 |         Args:
2094 |             n: Number of largest memories to return
2095 | 
2096 |         Returns:
2097 |             List of the n largest Memory objects ordered by content length descending
2098 |         """
2099 |         try:
2100 |             await self.initialize()
2101 | 
2102 |             # Query for largest memories by content length
2103 |             query = """
2104 |                 SELECT content_hash, content, tags, memory_type, metadata, created_at, updated_at
2105 |                 FROM memories
2106 |                 ORDER BY LENGTH(content) DESC
2107 |                 LIMIT ?
2108 |             """
2109 | 
2110 |             cursor = self.conn.execute(query, (n,))
2111 |             rows = cursor.fetchall()
2112 | 
2113 |             memories = []
2114 |             for row in rows:
2115 |                 try:
2116 |                     memory = Memory(
2117 |                         content_hash=row[0],
2118 |                         content=row[1],
2119 |                         tags=json.loads(row[2]) if row[2] else [],
2120 |                         memory_type=row[3],
2121 |                         metadata=json.loads(row[4]) if row[4] else {},
2122 |                         created_at=row[5],
2123 |                         updated_at=row[6]
2124 |                     )
2125 |                     memories.append(memory)
2126 |                 except Exception as parse_error:
2127 |                     logger.warning(f"Failed to parse memory {row[0]}: {parse_error}")
2128 |                     continue
2129 | 
2130 |             return memories
2131 | 
2132 |         except Exception as e:
2133 |             logger.error(f"Error getting largest memories: {e}")
2134 |             return []
2135 | 
2136 |     async def get_memory_timestamps(self, days: Optional[int] = None) -> List[float]:
2137 |         """
2138 |         Get memory creation timestamps only, without loading full memory objects.
2139 | 
2140 |         This is an optimized method for analytics that only needs timestamps,
2141 |         avoiding the overhead of loading full memory content and embeddings.
2142 | 
2143 |         Args:
2144 |             days: Optional filter to only get memories from last N days
2145 | 
2146 |         Returns:
2147 |             List of Unix timestamps (float) in descending order (newest first)
2148 |         """
2149 |         try:
2150 |             await self.initialize()
2151 | 
2152 |             if days is not None:
2153 |                 cutoff = datetime.now(timezone.utc) - timedelta(days=days)
2154 |                 cutoff_timestamp = cutoff.timestamp()
2155 | 
2156 |                 query = """
2157 |                     SELECT created_at
2158 |                     FROM memories
2159 |                     WHERE created_at >= ?
2160 |                     ORDER BY created_at DESC
2161 |                 """
2162 |                 cursor = self.conn.execute(query, (cutoff_timestamp,))
2163 |             else:
2164 |                 query = """
2165 |                     SELECT created_at
2166 |                     FROM memories
2167 |                     ORDER BY created_at DESC
2168 |                 """
2169 |                 cursor = self.conn.execute(query)
2170 | 
2171 |             rows = cursor.fetchall()
2172 |             timestamps = [row[0] for row in rows if row[0] is not None]
2173 | 
2174 |             return timestamps
2175 | 
2176 |         except Exception as e:
2177 |             logger.error(f"Error getting memory timestamps: {e}")
2178 |             return []
2179 | 
2180 |     async def count_all_memories(self, memory_type: Optional[str] = None, tags: Optional[List[str]] = None) -> int:
2181 |         """
2182 |         Get total count of memories in storage.
2183 | 
2184 |         Args:
2185 |             memory_type: Optional filter by memory type
2186 |             tags: Optional filter by tags (memories matching ANY of the tags)
2187 | 
2188 |         Returns:
2189 |             Total number of memories, optionally filtered by type and/or tags
2190 |         """
2191 |         try:
2192 |             await self.initialize()
2193 | 
2194 |             # Build query with filters
2195 |             conditions = []
2196 |             params = []
2197 | 
2198 |             if memory_type is not None:
2199 |                 conditions.append('memory_type = ?')
2200 |                 params.append(memory_type)
2201 | 
2202 |             if tags:
2203 |                 # Filter by tags - match ANY tag (OR logic)
2204 |                 tag_conditions = ' OR '.join(['tags LIKE ?' for _ in tags])
2205 |                 conditions.append(f'({tag_conditions})')
2206 |                 # Add each tag with wildcards for LIKE matching
2207 |                 for tag in tags:
2208 |                     params.append(f'%{tag}%')
2209 | 
2210 |             # Build final query
2211 |             if conditions:
2212 |                 query = 'SELECT COUNT(*) FROM memories WHERE ' + ' AND '.join(conditions)
2213 |                 cursor = self.conn.execute(query, tuple(params))
2214 |             else:
2215 |                 cursor = self.conn.execute('SELECT COUNT(*) FROM memories')
2216 | 
2217 |             result = cursor.fetchone()
2218 |             return result[0] if result else 0
2219 | 
2220 |         except Exception as e:
2221 |             logger.error(f"Error counting memories: {str(e)}")
2222 |             return 0
2223 | 
2224 |     async def get_all_tags_with_counts(self) -> List[Dict[str, Any]]:
2225 |         """
2226 |         Get all tags with their usage counts.
2227 | 
2228 |         Returns:
2229 |             List of dictionaries with 'tag' and 'count' keys, sorted by count descending
2230 |         """
2231 |         try:
2232 |             await self.initialize()
2233 | 
2234 |             # No explicit transaction needed - SQLite in WAL mode handles this automatically
2235 |             # Get all tags from the database
2236 |             cursor = self.conn.execute('''
2237 |                 SELECT tags
2238 |                 FROM memories
2239 |                 WHERE tags IS NOT NULL AND tags != ''
2240 |             ''')
2241 | 
2242 |             # Fetch all rows first to avoid holding cursor during processing
2243 |             rows = cursor.fetchall()
2244 | 
2245 |             # Yield control to event loop before processing
2246 |             await asyncio.sleep(0)
2247 | 
2248 |             # Use Counter with generator expression for memory efficiency
2249 |             tag_counter = Counter(
2250 |                 tag.strip()
2251 |                 for (tag_string,) in rows
2252 |                 if tag_string
2253 |                 for tag in tag_string.split(",")
2254 |                 if tag.strip()
2255 |             )
2256 | 
2257 |             # Return as list of dicts sorted by count descending
2258 |             return [{"tag": tag, "count": count} for tag, count in tag_counter.most_common()]
2259 | 
2260 |         except sqlite3.Error as e:
2261 |             logger.error(f"Database error getting tags with counts: {str(e)}")
2262 |             return []
2263 |         except Exception as e:
2264 |             logger.error(f"Unexpected error getting tags with counts: {str(e)}")
2265 |             raise
2266 | 
2267 |     def close(self):
2268 |         """Close the database connection."""
2269 |         if self.conn:
2270 |             self.conn.close()
2271 |             self.conn = None
2272 |             logger.info("SQLite-vec storage connection closed")
2273 | 
```