doobidoo/mcp-memory-service # codebase.md

This is page 33 of 47. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── agents
│   │   ├── amp-bridge.md
│   │   ├── amp-pr-automator.md
│   │   ├── code-quality-guard.md
│   │   ├── gemini-pr-automator.md
│   │   └── github-release-manager.md
│   ├── settings.local.json.backup
│   └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   ├── feature_request.yml
│   │   └── performance_issue.yml
│   ├── pull_request_template.md
│   └── workflows
│       ├── bridge-tests.yml
│       ├── CACHE_FIX.md
│       ├── claude-code-review.yml
│       ├── claude.yml
│       ├── cleanup-images.yml.disabled
│       ├── dev-setup-validation.yml
│       ├── docker-publish.yml
│       ├── LATEST_FIXES.md
│       ├── main-optimized.yml.disabled
│       ├── main.yml
│       ├── publish-and-test.yml
│       ├── README_OPTIMIZATION.md
│       ├── release-tag.yml.disabled
│       ├── release.yml
│       ├── roadmap-review-reminder.yml
│       ├── SECRET_CONDITIONAL_FIX.md
│       └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│   ├── .gitignore
│   └── reports
│       └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│   ├── deployment
│   │   ├── deploy_fastmcp_fixed.sh
│   │   ├── deploy_http_with_mcp.sh
│   │   └── deploy_mcp_v4.sh
│   ├── deployment-configs
│   │   ├── empty_config.yml
│   │   └── smithery.yaml
│   ├── development
│   │   └── test_fastmcp.py
│   ├── docs-removed-2025-08-23
│   │   ├── authentication.md
│   │   ├── claude_integration.md
│   │   ├── claude-code-compatibility.md
│   │   ├── claude-code-integration.md
│   │   ├── claude-code-quickstart.md
│   │   ├── claude-desktop-setup.md
│   │   ├── complete-setup-guide.md
│   │   ├── database-synchronization.md
│   │   ├── development
│   │   │   ├── autonomous-memory-consolidation.md
│   │   │   ├── CLEANUP_PLAN.md
│   │   │   ├── CLEANUP_README.md
│   │   │   ├── CLEANUP_SUMMARY.md
│   │   │   ├── dream-inspired-memory-consolidation.md
│   │   │   ├── hybrid-slm-memory-consolidation.md
│   │   │   ├── mcp-milestone.md
│   │   │   ├── multi-client-architecture.md
│   │   │   ├── test-results.md
│   │   │   └── TIMESTAMP_FIX_SUMMARY.md
│   │   ├── distributed-sync.md
│   │   ├── invocation_guide.md
│   │   ├── macos-intel.md
│   │   ├── master-guide.md
│   │   ├── mcp-client-configuration.md
│   │   ├── multi-client-server.md
│   │   ├── service-installation.md
│   │   ├── sessions
│   │   │   └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│   │   ├── UBUNTU_SETUP.md
│   │   ├── ubuntu.md
│   │   ├── windows-setup.md
│   │   └── windows.md
│   ├── docs-root-cleanup-2025-08-23
│   │   ├── AWESOME_LIST_SUBMISSION.md
│   │   ├── CLOUDFLARE_IMPLEMENTATION.md
│   │   ├── DOCUMENTATION_ANALYSIS.md
│   │   ├── DOCUMENTATION_CLEANUP_PLAN.md
│   │   ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│   │   ├── LITESTREAM_SETUP_GUIDE.md
│   │   ├── lm_studio_system_prompt.md
│   │   ├── PYTORCH_DOWNLOAD_FIX.md
│   │   └── README-ORIGINAL-BACKUP.md
│   ├── investigations
│   │   └── MACOS_HOOKS_INVESTIGATION.md
│   ├── litestream-configs-v6.3.0
│   │   ├── install_service.sh
│   │   ├── litestream_master_config_fixed.yml
│   │   ├── litestream_master_config.yml
│   │   ├── litestream_replica_config_fixed.yml
│   │   ├── litestream_replica_config.yml
│   │   ├── litestream_replica_simple.yml
│   │   ├── litestream-http.service
│   │   ├── litestream.service
│   │   └── requirements-cloudflare.txt
│   ├── release-notes
│   │   └── release-notes-v7.1.4.md
│   └── setup-development
│       ├── README.md
│       ├── setup_consolidation_mdns.sh
│       ├── STARTUP_SETUP_GUIDE.md
│       └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│   ├── memory-context.md
│   ├── memory-health.md
│   ├── memory-ingest-dir.md
│   ├── memory-ingest.md
│   ├── memory-recall.md
│   ├── memory-search.md
│   ├── memory-store.md
│   ├── README.md
│   └── session-start.md
├── claude-hooks
│   ├── config.json
│   ├── config.template.json
│   ├── CONFIGURATION.md
│   ├── core
│   │   ├── memory-retrieval.js
│   │   ├── mid-conversation.js
│   │   ├── session-end.js
│   │   ├── session-start.js
│   │   └── topic-change.js
│   ├── debug-pattern-test.js
│   ├── install_claude_hooks_windows.ps1
│   ├── install_hooks.py
│   ├── memory-mode-controller.js
│   ├── MIGRATION.md
│   ├── README-NATURAL-TRIGGERS.md
│   ├── README-phase2.md
│   ├── README.md
│   ├── simple-test.js
│   ├── statusline.sh
│   ├── test-adaptive-weights.js
│   ├── test-dual-protocol-hook.js
│   ├── test-mcp-hook.js
│   ├── test-natural-triggers.js
│   ├── test-recency-scoring.js
│   ├── tests
│   │   ├── integration-test.js
│   │   ├── phase2-integration-test.js
│   │   ├── test-code-execution.js
│   │   ├── test-cross-session.json
│   │   ├── test-session-tracking.json
│   │   └── test-threading.json
│   ├── utilities
│   │   ├── adaptive-pattern-detector.js
│   │   ├── context-formatter.js
│   │   ├── context-shift-detector.js
│   │   ├── conversation-analyzer.js
│   │   ├── dynamic-context-updater.js
│   │   ├── git-analyzer.js
│   │   ├── mcp-client.js
│   │   ├── memory-client.js
│   │   ├── memory-scorer.js
│   │   ├── performance-manager.js
│   │   ├── project-detector.js
│   │   ├── session-tracker.js
│   │   ├── tiered-conversation-monitor.js
│   │   └── version-checker.js
│   └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│   ├── amp-cli-bridge.md
│   ├── api
│   │   ├── code-execution-interface.md
│   │   ├── memory-metadata-api.md
│   │   ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│   │   ├── PHASE2_REPORT.md
│   │   └── tag-standardization.md
│   ├── architecture
│   │   ├── search-enhancement-spec.md
│   │   └── search-examples.md
│   ├── architecture.md
│   ├── archive
│   │   └── obsolete-workflows
│   │       ├── load_memory_context.md
│   │       └── README.md
│   ├── assets
│   │   └── images
│   │       ├── dashboard-v3.3.0-preview.png
│   │       ├── memory-awareness-hooks-example.png
│   │       ├── project-infographic.svg
│   │       └── README.md
│   ├── CLAUDE_CODE_QUICK_REFERENCE.md
│   ├── cloudflare-setup.md
│   ├── deployment
│   │   ├── docker.md
│   │   ├── dual-service.md
│   │   ├── production-guide.md
│   │   └── systemd-service.md
│   ├── development
│   │   ├── ai-agent-instructions.md
│   │   ├── code-quality
│   │   │   ├── phase-2a-completion.md
│   │   │   ├── phase-2a-handle-get-prompt.md
│   │   │   ├── phase-2a-index.md
│   │   │   ├── phase-2a-install-package.md
│   │   │   └── phase-2b-session-summary.md
│   │   ├── code-quality-workflow.md
│   │   ├── dashboard-workflow.md
│   │   ├── issue-management.md
│   │   ├── pr-review-guide.md
│   │   ├── refactoring-notes.md
│   │   ├── release-checklist.md
│   │   └── todo-tracker.md
│   ├── docker-optimized-build.md
│   ├── document-ingestion.md
│   ├── DOCUMENTATION_AUDIT.md
│   ├── enhancement-roadmap-issue-14.md
│   ├── examples
│   │   ├── analysis-scripts.js
│   │   ├── maintenance-session-example.md
│   │   ├── memory-distribution-chart.jsx
│   │   └── tag-schema.json
│   ├── first-time-setup.md
│   ├── glama-deployment.md
│   ├── guides
│   │   ├── advanced-command-examples.md
│   │   ├── chromadb-migration.md
│   │   ├── commands-vs-mcp-server.md
│   │   ├── mcp-enhancements.md
│   │   ├── mdns-service-discovery.md
│   │   ├── memory-consolidation-guide.md
│   │   ├── migration.md
│   │   ├── scripts.md
│   │   └── STORAGE_BACKENDS.md
│   ├── HOOK_IMPROVEMENTS.md
│   ├── hooks
│   │   └── phase2-code-execution-migration.md
│   ├── http-server-management.md
│   ├── ide-compatability.md
│   ├── IMAGE_RETENTION_POLICY.md
│   ├── images
│   │   └── dashboard-placeholder.md
│   ├── implementation
│   │   ├── health_checks.md
│   │   └── performance.md
│   ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│   ├── integration
│   │   ├── homebrew.md
│   │   └── multi-client.md
│   ├── integrations
│   │   ├── gemini.md
│   │   ├── groq-bridge.md
│   │   ├── groq-integration-summary.md
│   │   └── groq-model-comparison.md
│   ├── integrations.md
│   ├── legacy
│   │   └── dual-protocol-hooks.md
│   ├── LM_STUDIO_COMPATIBILITY.md
│   ├── maintenance
│   │   └── memory-maintenance.md
│   ├── mastery
│   │   ├── api-reference.md
│   │   ├── architecture-overview.md
│   │   ├── configuration-guide.md
│   │   ├── local-setup-and-run.md
│   │   ├── testing-guide.md
│   │   └── troubleshooting.md
│   ├── migration
│   │   └── code-execution-api-quick-start.md
│   ├── natural-memory-triggers
│   │   ├── cli-reference.md
│   │   ├── installation-guide.md
│   │   └── performance-optimization.md
│   ├── oauth-setup.md
│   ├── pr-graphql-integration.md
│   ├── quick-setup-cloudflare-dual-environment.md
│   ├── README.md
│   ├── remote-configuration-wiki-section.md
│   ├── research
│   │   ├── code-execution-interface-implementation.md
│   │   └── code-execution-interface-summary.md
│   ├── ROADMAP.md
│   ├── sqlite-vec-backend.md
│   ├── statistics
│   │   ├── charts
│   │   │   ├── activity_patterns.png
│   │   │   ├── contributors.png
│   │   │   ├── growth_trajectory.png
│   │   │   ├── monthly_activity.png
│   │   │   └── october_sprint.png
│   │   ├── data
│   │   │   ├── activity_by_day.csv
│   │   │   ├── activity_by_hour.csv
│   │   │   ├── contributors.csv
│   │   │   └── monthly_activity.csv
│   │   ├── generate_charts.py
│   │   └── REPOSITORY_STATISTICS.md
│   ├── technical
│   │   ├── development.md
│   │   ├── memory-migration.md
│   │   ├── migration-log.md
│   │   ├── sqlite-vec-embedding-fixes.md
│   │   └── tag-storage.md
│   ├── testing
│   │   └── regression-tests.md
│   ├── testing-cloudflare-backend.md
│   ├── troubleshooting
│   │   ├── cloudflare-api-token-setup.md
│   │   ├── cloudflare-authentication.md
│   │   ├── general.md
│   │   ├── hooks-quick-reference.md
│   │   ├── pr162-schema-caching-issue.md
│   │   ├── session-end-hooks.md
│   │   └── sync-issues.md
│   └── tutorials
│       ├── advanced-techniques.md
│       ├── data-analysis.md
│       └── demo-session-walkthrough.md
├── examples
│   ├── claude_desktop_config_template.json
│   ├── claude_desktop_config_windows.json
│   ├── claude-desktop-http-config.json
│   ├── config
│   │   └── claude_desktop_config.json
│   ├── http-mcp-bridge.js
│   ├── memory_export_template.json
│   ├── README.md
│   ├── setup
│   │   └── setup_multi_client_complete.py
│   └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│   ├── .claude
│   │   └── settings.local.json
│   ├── archive
│   │   └── check_missing_timestamps.py
│   ├── backup
│   │   ├── backup_memories.py
│   │   ├── backup_sqlite_vec.sh
│   │   ├── export_distributable_memories.sh
│   │   └── restore_memories.py
│   ├── benchmarks
│   │   ├── benchmark_code_execution_api.py
│   │   ├── benchmark_hybrid_sync.py
│   │   └── benchmark_server_caching.py
│   ├── database
│   │   ├── analyze_sqlite_vec_db.py
│   │   ├── check_sqlite_vec_status.py
│   │   ├── db_health_check.py
│   │   └── simple_timestamp_check.py
│   ├── development
│   │   ├── debug_server_initialization.py
│   │   ├── find_orphaned_files.py
│   │   ├── fix_mdns.sh
│   │   ├── fix_sitecustomize.py
│   │   ├── remote_ingest.sh
│   │   ├── setup-git-merge-drivers.sh
│   │   ├── uv-lock-merge.sh
│   │   └── verify_hybrid_sync.py
│   ├── hooks
│   │   └── pre-commit
│   ├── installation
│   │   ├── install_linux_service.py
│   │   ├── install_macos_service.py
│   │   ├── install_uv.py
│   │   ├── install_windows_service.py
│   │   ├── install.py
│   │   ├── setup_backup_cron.sh
│   │   ├── setup_claude_mcp.sh
│   │   └── setup_cloudflare_resources.py
│   ├── linux
│   │   ├── service_status.sh
│   │   ├── start_service.sh
│   │   ├── stop_service.sh
│   │   ├── uninstall_service.sh
│   │   └── view_logs.sh
│   ├── maintenance
│   │   ├── assign_memory_types.py
│   │   ├── check_memory_types.py
│   │   ├── cleanup_corrupted_encoding.py
│   │   ├── cleanup_memories.py
│   │   ├── cleanup_organize.py
│   │   ├── consolidate_memory_types.py
│   │   ├── consolidation_mappings.json
│   │   ├── delete_orphaned_vectors_fixed.py
│   │   ├── fast_cleanup_duplicates_with_tracking.sh
│   │   ├── find_all_duplicates.py
│   │   ├── find_cloudflare_duplicates.py
│   │   ├── find_duplicates.py
│   │   ├── memory-types.md
│   │   ├── README.md
│   │   ├── recover_timestamps_from_cloudflare.py
│   │   ├── regenerate_embeddings.py
│   │   ├── repair_malformed_tags.py
│   │   ├── repair_memories.py
│   │   ├── repair_sqlite_vec_embeddings.py
│   │   ├── repair_zero_embeddings.py
│   │   ├── restore_from_json_export.py
│   │   └── scan_todos.sh
│   ├── migration
│   │   ├── cleanup_mcp_timestamps.py
│   │   ├── legacy
│   │   │   └── migrate_chroma_to_sqlite.py
│   │   ├── mcp-migration.py
│   │   ├── migrate_sqlite_vec_embeddings.py
│   │   ├── migrate_storage.py
│   │   ├── migrate_tags.py
│   │   ├── migrate_timestamps.py
│   │   ├── migrate_to_cloudflare.py
│   │   ├── migrate_to_sqlite_vec.py
│   │   ├── migrate_v5_enhanced.py
│   │   ├── TIMESTAMP_CLEANUP_README.md
│   │   └── verify_mcp_timestamps.py
│   ├── pr
│   │   ├── amp_collect_results.sh
│   │   ├── amp_detect_breaking_changes.sh
│   │   ├── amp_generate_tests.sh
│   │   ├── amp_pr_review.sh
│   │   ├── amp_quality_gate.sh
│   │   ├── amp_suggest_fixes.sh
│   │   ├── auto_review.sh
│   │   ├── detect_breaking_changes.sh
│   │   ├── generate_tests.sh
│   │   ├── lib
│   │   │   └── graphql_helpers.sh
│   │   ├── quality_gate.sh
│   │   ├── resolve_threads.sh
│   │   ├── run_pyscn_analysis.sh
│   │   ├── run_quality_checks.sh
│   │   ├── thread_status.sh
│   │   └── watch_reviews.sh
│   ├── quality
│   │   ├── fix_dead_code_install.sh
│   │   ├── phase1_dead_code_analysis.md
│   │   ├── phase2_complexity_analysis.md
│   │   ├── README_PHASE1.md
│   │   ├── README_PHASE2.md
│   │   ├── track_pyscn_metrics.sh
│   │   └── weekly_quality_review.sh
│   ├── README.md
│   ├── run
│   │   ├── run_mcp_memory.sh
│   │   ├── run-with-uv.sh
│   │   └── start_sqlite_vec.sh
│   ├── run_memory_server.py
│   ├── server
│   │   ├── check_http_server.py
│   │   ├── check_server_health.py
│   │   ├── memory_offline.py
│   │   ├── preload_models.py
│   │   ├── run_http_server.py
│   │   ├── run_memory_server.py
│   │   ├── start_http_server.bat
│   │   └── start_http_server.sh
│   ├── service
│   │   ├── deploy_dual_services.sh
│   │   ├── install_http_service.sh
│   │   ├── mcp-memory-http.service
│   │   ├── mcp-memory.service
│   │   ├── memory_service_manager.sh
│   │   ├── service_control.sh
│   │   ├── service_utils.py
│   │   └── update_service.sh
│   ├── sync
│   │   ├── check_drift.py
│   │   ├── claude_sync_commands.py
│   │   ├── export_memories.py
│   │   ├── import_memories.py
│   │   ├── litestream
│   │   │   ├── apply_local_changes.sh
│   │   │   ├── enhanced_memory_store.sh
│   │   │   ├── init_staging_db.sh
│   │   │   ├── io.litestream.replication.plist
│   │   │   ├── manual_sync.sh
│   │   │   ├── memory_sync.sh
│   │   │   ├── pull_remote_changes.sh
│   │   │   ├── push_to_remote.sh
│   │   │   ├── README.md
│   │   │   ├── resolve_conflicts.sh
│   │   │   ├── setup_local_litestream.sh
│   │   │   ├── setup_remote_litestream.sh
│   │   │   ├── staging_db_init.sql
│   │   │   ├── stash_local_changes.sh
│   │   │   ├── sync_from_remote_noconfig.sh
│   │   │   └── sync_from_remote.sh
│   │   ├── README.md
│   │   ├── safe_cloudflare_update.sh
│   │   ├── sync_memory_backends.py
│   │   └── sync_now.py
│   ├── testing
│   │   ├── run_complete_test.py
│   │   ├── run_memory_test.sh
│   │   ├── simple_test.py
│   │   ├── test_cleanup_logic.py
│   │   ├── test_cloudflare_backend.py
│   │   ├── test_docker_functionality.py
│   │   ├── test_installation.py
│   │   ├── test_mdns.py
│   │   ├── test_memory_api.py
│   │   ├── test_memory_simple.py
│   │   ├── test_migration.py
│   │   ├── test_search_api.py
│   │   ├── test_sqlite_vec_embeddings.py
│   │   ├── test_sse_events.py
│   │   ├── test-connection.py
│   │   └── test-hook.js
│   ├── utils
│   │   ├── claude_commands_utils.py
│   │   ├── generate_personalized_claude_md.sh
│   │   ├── groq
│   │   ├── groq_agent_bridge.py
│   │   ├── list-collections.py
│   │   ├── memory_wrapper_uv.py
│   │   ├── query_memories.py
│   │   ├── smithery_wrapper.py
│   │   ├── test_groq_bridge.sh
│   │   └── uv_wrapper.py
│   └── validation
│       ├── check_dev_setup.py
│       ├── check_documentation_links.py
│       ├── diagnose_backend_config.py
│       ├── validate_configuration_complete.py
│       ├── validate_memories.py
│       ├── validate_migration.py
│       ├── validate_timestamp_integrity.py
│       ├── verify_environment.py
│       ├── verify_pytorch_windows.py
│       └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│   └── mcp_memory_service
│       ├── __init__.py
│       ├── api
│       │   ├── __init__.py
│       │   ├── client.py
│       │   ├── operations.py
│       │   ├── sync_wrapper.py
│       │   └── types.py
│       ├── backup
│       │   ├── __init__.py
│       │   └── scheduler.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── ingestion.py
│       │   ├── main.py
│       │   └── utils.py
│       ├── config.py
│       ├── consolidation
│       │   ├── __init__.py
│       │   ├── associations.py
│       │   ├── base.py
│       │   ├── clustering.py
│       │   ├── compression.py
│       │   ├── consolidator.py
│       │   ├── decay.py
│       │   ├── forgetting.py
│       │   ├── health.py
│       │   └── scheduler.py
│       ├── dependency_check.py
│       ├── discovery
│       │   ├── __init__.py
│       │   ├── client.py
│       │   └── mdns_service.py
│       ├── embeddings
│       │   ├── __init__.py
│       │   └── onnx_embeddings.py
│       ├── ingestion
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── chunker.py
│       │   ├── csv_loader.py
│       │   ├── json_loader.py
│       │   ├── pdf_loader.py
│       │   ├── registry.py
│       │   ├── semtools_loader.py
│       │   └── text_loader.py
│       ├── lm_studio_compat.py
│       ├── mcp_server.py
│       ├── models
│       │   ├── __init__.py
│       │   └── memory.py
│       ├── server.py
│       ├── services
│       │   ├── __init__.py
│       │   └── memory_service.py
│       ├── storage
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── cloudflare.py
│       │   ├── factory.py
│       │   ├── http_client.py
│       │   ├── hybrid.py
│       │   └── sqlite_vec.py
│       ├── sync
│       │   ├── __init__.py
│       │   ├── exporter.py
│       │   ├── importer.py
│       │   └── litestream_config.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cache_manager.py
│       │   ├── content_splitter.py
│       │   ├── db_utils.py
│       │   ├── debug.py
│       │   ├── document_processing.py
│       │   ├── gpu_detection.py
│       │   ├── hashing.py
│       │   ├── http_server_manager.py
│       │   ├── port_detection.py
│       │   ├── system_detection.py
│       │   └── time_parser.py
│       └── web
│           ├── __init__.py
│           ├── api
│           │   ├── __init__.py
│           │   ├── analytics.py
│           │   ├── backup.py
│           │   ├── consolidation.py
│           │   ├── documents.py
│           │   ├── events.py
│           │   ├── health.py
│           │   ├── manage.py
│           │   ├── mcp.py
│           │   ├── memories.py
│           │   ├── search.py
│           │   └── sync.py
│           ├── app.py
│           ├── dependencies.py
│           ├── oauth
│           │   ├── __init__.py
│           │   ├── authorization.py
│           │   ├── discovery.py
│           │   ├── middleware.py
│           │   ├── models.py
│           │   ├── registration.py
│           │   └── storage.py
│           ├── sse.py
│           └── static
│               ├── app.js
│               ├── index.html
│               ├── README.md
│               ├── sse_test.html
│               └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│   ├── __init__.py
│   ├── api
│   │   ├── __init__.py
│   │   ├── test_compact_types.py
│   │   └── test_operations.py
│   ├── bridge
│   │   ├── mock_responses.js
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   └── test_http_mcp_bridge.js
│   ├── conftest.py
│   ├── consolidation
│   │   ├── __init__.py
│   │   ├── conftest.py
│   │   ├── test_associations.py
│   │   ├── test_clustering.py
│   │   ├── test_compression.py
│   │   ├── test_consolidator.py
│   │   ├── test_decay.py
│   │   └── test_forgetting.py
│   ├── contracts
│   │   └── api-specification.yml
│   ├── integration
│   │   ├── package-lock.json
│   │   ├── package.json
│   │   ├── test_api_key_fallback.py
│   │   ├── test_api_memories_chronological.py
│   │   ├── test_api_tag_time_search.py
│   │   ├── test_api_with_memory_service.py
│   │   ├── test_bridge_integration.js
│   │   ├── test_cli_interfaces.py
│   │   ├── test_cloudflare_connection.py
│   │   ├── test_concurrent_clients.py
│   │   ├── test_data_serialization_consistency.py
│   │   ├── test_http_server_startup.py
│   │   ├── test_mcp_memory.py
│   │   ├── test_mdns_integration.py
│   │   ├── test_oauth_basic_auth.py
│   │   ├── test_oauth_flow.py
│   │   ├── test_server_handlers.py
│   │   └── test_store_memory.py
│   ├── performance
│   │   ├── test_background_sync.py
│   │   └── test_hybrid_live.py
│   ├── README.md
│   ├── smithery
│   │   └── test_smithery.py
│   ├── sqlite
│   │   └── simple_sqlite_vec_test.py
│   ├── test_client.py
│   ├── test_content_splitting.py
│   ├── test_database.py
│   ├── test_hybrid_cloudflare_limits.py
│   ├── test_hybrid_storage.py
│   ├── test_memory_ops.py
│   ├── test_semantic_search.py
│   ├── test_sqlite_vec_storage.py
│   ├── test_time_parser.py
│   ├── test_timestamp_preservation.py
│   ├── timestamp
│   │   ├── test_hook_vs_manual_storage.py
│   │   ├── test_issue99_final_validation.py
│   │   ├── test_search_retrieval_inconsistency.py
│   │   ├── test_timestamp_issue.py
│   │   └── test_timestamp_simple.py
│   └── unit
│       ├── conftest.py
│       ├── test_cloudflare_storage.py
│       ├── test_csv_loader.py
│       ├── test_fastapi_dependencies.py
│       ├── test_import.py
│       ├── test_json_loader.py
│       ├── test_mdns_simple.py
│       ├── test_mdns.py
│       ├── test_memory_service.py
│       ├── test_memory.py
│       ├── test_semtools_loader.py
│       ├── test_storage_interface_compatibility.py
│       └── test_tag_time_filtering.py
├── tools
│   ├── docker
│   │   ├── DEPRECATED.md
│   │   ├── docker-compose.http.yml
│   │   ├── docker-compose.pythonpath.yml
│   │   ├── docker-compose.standalone.yml
│   │   ├── docker-compose.uv.yml
│   │   ├── docker-compose.yml
│   │   ├── docker-entrypoint-persistent.sh
│   │   ├── docker-entrypoint-unified.sh
│   │   ├── docker-entrypoint.sh
│   │   ├── Dockerfile
│   │   ├── Dockerfile.glama
│   │   ├── Dockerfile.slim
│   │   ├── README.md
│   │   └── test-docker-modes.sh
│   └── README.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/src/mcp_memory_service/consolidation/consolidator.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Main dream-inspired consolidation orchestrator."""
 16 | 
 17 | import asyncio
 18 | from typing import List, Dict, Any, Optional, Protocol, Tuple
 19 | from datetime import datetime, timedelta
 20 | import logging
 21 | import time
 22 | 
 23 | from .base import ConsolidationConfig, ConsolidationReport, ConsolidationError
 24 | from .decay import ExponentialDecayCalculator
 25 | from .associations import CreativeAssociationEngine
 26 | from .clustering import SemanticClusteringEngine
 27 | from .compression import SemanticCompressionEngine
 28 | from .forgetting import ControlledForgettingEngine
 29 | from .health import ConsolidationHealthMonitor
 30 | from ..models.memory import Memory
 31 | 
 32 | # Protocol for storage backend interface
 33 | class StorageProtocol(Protocol):
 34 |     async def get_all_memories(self) -> List[Memory]: ...
 35 |     async def get_memories_by_time_range(self, start_time: float, end_time: float) -> List[Memory]: ...
 36 |     async def store(self, memory: Memory) -> Tuple[bool, str]: ...
 37 |     async def update_memory(self, memory: Memory) -> bool: ...
 38 |     async def delete_memory(self, content_hash: str) -> bool: ...
 39 |     async def get_memory_connections(self) -> Dict[str, int]: ...
 40 |     async def get_access_patterns(self) -> Dict[str, datetime]: ...
 41 | 
 42 | 
 43 | class SyncPauseContext:
 44 |     """Context manager for pausing/resuming hybrid backend sync."""
 45 | 
 46 |     def __init__(self, storage, logger):
 47 |         self.storage = storage
 48 |         self.logger = logger
 49 |         self.is_hybrid = hasattr(storage, 'pause_sync') and hasattr(storage, 'resume_sync')
 50 |         self.sync_paused = False
 51 | 
 52 |     async def __aenter__(self):
 53 |         if self.is_hybrid:
 54 |             self.logger.info("Pausing hybrid backend sync during consolidation")
 55 |             await self.storage.pause_sync()
 56 |             self.sync_paused = True
 57 |         return self
 58 | 
 59 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
 60 |         if self.sync_paused:
 61 |             try:
 62 |                 self.logger.info("Resuming hybrid backend sync after consolidation")
 63 |                 await self.storage.resume_sync()
 64 |             except Exception as e:
 65 |                 self.logger.error(f"Failed to resume sync: {e}", exc_info=True)
 66 | 
 67 | 
 68 | def check_horizon_requirements(time_horizon: str, phase_name: str,
 69 |                                enabled_phases: Dict[str, List[str]]) -> bool:
 70 |     """Check if a consolidation phase should run for the given horizon.
 71 | 
 72 |     Args:
 73 |         time_horizon: Current time horizon (daily, weekly, etc.)
 74 |         phase_name: Phase identifier (clustering, associations, etc.)
 75 |         enabled_phases: Dict mapping phase names to applicable horizons
 76 | 
 77 |     Returns:
 78 |         bool: True if phase should run
 79 |     """
 80 |     applicable_horizons = enabled_phases.get(phase_name, [])
 81 |     return time_horizon in applicable_horizons
 82 | 
 83 | 
 84 | # Horizon configuration
 85 | HORIZON_CONFIGS = {
 86 |     'daily': {'delta': timedelta(days=1), 'cutoff_days': 2},
 87 |     'weekly': {'delta': timedelta(days=7), 'cutoff_days': None},
 88 |     'monthly': {'delta': timedelta(days=30), 'cutoff_days': None},
 89 |     'quarterly': {'delta': timedelta(days=90), 'cutoff_days': 90},
 90 |     'yearly': {'delta': timedelta(days=365), 'cutoff_days': 365}
 91 | }
 92 | 
 93 | 
 94 | def filter_memories_by_age(memories: List[Memory], cutoff_date: datetime) -> List[Memory]:
 95 |     """Filter memories created before the cutoff date.
 96 | 
 97 |     Args:
 98 |         memories: List of Memory objects
 99 |         cutoff_date: Only keep memories older than this
100 | 
101 |     Returns:
102 |         Filtered list of memories
103 |     """
104 |     return [
105 |         m for m in memories
106 |         if m.created_at and datetime.utcfromtimestamp(m.created_at) < cutoff_date
107 |     ]
108 | 
109 | class DreamInspiredConsolidator:
110 |     """
111 |     Main consolidation engine with biologically-inspired processing.
112 | 
113 |     Orchestrates the full consolidation pipeline including:
114 |     - Exponential decay scoring
115 |     - Creative association discovery
116 |     - Semantic clustering and compression
117 |     - Controlled forgetting with archival
118 |     """
119 | 
120 |     # Phase enablement configuration
121 |     ENABLED_PHASES = {
122 |         'clustering': ['weekly', 'monthly', 'quarterly'],
123 |         'associations': ['weekly', 'monthly'],
124 |         'compression': ['weekly', 'monthly', 'quarterly'],
125 |         'forgetting': ['monthly', 'quarterly', 'yearly']
126 |     }
127 | 
128 |     def __init__(self, storage: StorageProtocol, config: ConsolidationConfig):
129 |         self.storage = storage
130 |         self.config = config
131 |         self.logger = logging.getLogger(__name__)
132 | 
133 |         # Initialize component engines
134 |         self.decay_calculator = ExponentialDecayCalculator(config)
135 |         self.association_engine = CreativeAssociationEngine(config)
136 |         self.clustering_engine = SemanticClusteringEngine(config)
137 |         self.compression_engine = SemanticCompressionEngine(config)
138 |         self.forgetting_engine = ControlledForgettingEngine(config)
139 | 
140 |         # Initialize health monitoring
141 |         self.health_monitor = ConsolidationHealthMonitor(config)
142 | 
143 |         # Performance tracking
144 |         self.last_consolidation_times = {}
145 |         self.consolidation_stats = {
146 |             'total_runs': 0,
147 |             'successful_runs': 0,
148 |             'total_memories_processed': 0,
149 |             'total_associations_created': 0,
150 |             'total_clusters_created': 0,
151 |             'total_memories_compressed': 0,
152 |             'total_memories_archived': 0
153 |         }
154 |     
155 |     async def consolidate(self, time_horizon: str, **kwargs) -> ConsolidationReport:
156 |         """
157 |         Run full consolidation pipeline for given time horizon.
158 | 
159 |         Args:
160 |             time_horizon: 'daily', 'weekly', 'monthly', 'quarterly', 'yearly'
161 |             **kwargs: Additional parameters for consolidation
162 | 
163 |         Returns:
164 |             ConsolidationReport with results and performance metrics
165 |         """
166 |         start_time = datetime.now()
167 |         report = ConsolidationReport(
168 |             time_horizon=time_horizon,
169 |             start_time=start_time,
170 |             end_time=start_time,  # Will be updated at the end
171 |             memories_processed=0
172 |         )
173 | 
174 |         try:
175 |             self.logger.info(f"Starting {time_horizon} consolidation - this may take several minutes depending on memory count...")
176 | 
177 |             # Use context manager for sync pause/resume
178 |             async with SyncPauseContext(self.storage, self.logger):
179 |                 # 1. Retrieve memories for processing
180 |                 memories = await self._get_memories_for_horizon(time_horizon, **kwargs)
181 |                 report.memories_processed = len(memories)
182 | 
183 |                 if not memories:
184 |                     self.logger.info(f"No memories to process for {time_horizon} consolidation")
185 |                     return self._finalize_report(report, [])
186 | 
187 |                 self.logger.info(f"✓ Found {len(memories)} memories to process")
188 | 
189 |                 # 2. Calculate/update relevance scores
190 |                 self.logger.info(f"📊 Phase 1/6: Calculating relevance scores for {len(memories)} memories...")
191 |                 performance_start = time.time()
192 |                 relevance_scores = await self._update_relevance_scores(memories, time_horizon)
193 |                 self.logger.info(f"✓ Relevance scoring completed in {time.time() - performance_start:.1f}s")
194 | 
195 |                 # 3. Cluster by semantic similarity (if enabled and appropriate)
196 |                 clusters = []
197 |                 if self.config.clustering_enabled and check_horizon_requirements(
198 |                     time_horizon, 'clustering', self.ENABLED_PHASES
199 |                 ):
200 |                     self.logger.info(f"🔗 Phase 2/6: Clustering memories by semantic similarity...")
201 |                     performance_start = time.time()
202 |                     clusters = await self.clustering_engine.process(memories)
203 |                     report.clusters_created = len(clusters)
204 |                     self.logger.info(f"✓ Clustering completed in {time.time() - performance_start:.1f}s, created {len(clusters)} clusters")
205 | 
206 |                 # 4. Run creative associations (if enabled and appropriate)
207 |                 associations = []
208 |                 if self.config.associations_enabled and check_horizon_requirements(
209 |                     time_horizon, 'associations', self.ENABLED_PHASES
210 |                 ):
211 |                     self.logger.info(f"🧠 Phase 3/6: Discovering creative associations...")
212 |                     performance_start = time.time()
213 |                     existing_associations = await self._get_existing_associations()
214 |                     associations = await self.association_engine.process(
215 |                         memories, existing_associations=existing_associations
216 |                     )
217 |                     report.associations_discovered = len(associations)
218 |                     self.logger.info(f"✓ Association discovery completed in {time.time() - performance_start:.1f}s, found {len(associations)} associations")
219 | 
220 |                     # Store new associations as memories
221 |                     await self._store_associations_as_memories(associations)
222 | 
223 |                 # 5. Compress clusters (if enabled and clusters exist)
224 |                 compression_results = []
225 |                 if self.config.compression_enabled and clusters and check_horizon_requirements(
226 |                     time_horizon, 'compression', self.ENABLED_PHASES
227 |                 ):
228 |                     self.logger.info(f"🗜️ Phase 4/6: Compressing memory clusters...")
229 |                     performance_start = time.time()
230 |                     compression_results = await self.compression_engine.process(clusters, memories)
231 |                     report.memories_compressed = len(compression_results)
232 |                     self.logger.info(f"✓ Compression completed in {time.time() - performance_start:.1f}s, compressed {len(compression_results)} clusters")
233 | 
234 |                     # Store compressed memories and update originals
235 |                     await self._handle_compression_results(compression_results)
236 | 
237 |                 # 6. Controlled forgetting (if enabled and appropriate)
238 |                 forgetting_results = []
239 |                 if self.config.forgetting_enabled and check_horizon_requirements(
240 |                     time_horizon, 'forgetting', self.ENABLED_PHASES
241 |                 ):
242 |                     self.logger.info(f"🗂️ Phase 5/6: Applying controlled forgetting...")
243 |                     performance_start = time.time()
244 |                     access_patterns = await self._get_access_patterns()
245 |                     forgetting_results = await self.forgetting_engine.process(
246 |                         memories, relevance_scores,
247 |                         access_patterns=access_patterns,
248 |                         time_horizon=time_horizon
249 |                     )
250 |                     report.memories_archived = len([r for r in forgetting_results if r.action_taken in ['archived', 'deleted']])
251 |                     self.logger.info(f"✓ Forgetting completed in {time.time() - performance_start:.1f}s, processed {len(forgetting_results)} candidates")
252 | 
253 |                     # Apply forgetting results to storage
254 |                     await self._apply_forgetting_results(forgetting_results)
255 | 
256 |                 # 7. Update consolidation statistics
257 |                 self._update_consolidation_stats(report)
258 | 
259 |                 # 8. Track consolidation timestamp for incremental mode
260 |                 if self.config.incremental_mode:
261 |                     await self._update_consolidation_timestamps(memories)
262 | 
263 |                 # 9. Finalize report
264 |                 return self._finalize_report(report, [])
265 | 
266 |         except ConsolidationError as e:
267 |             # Re-raise configuration and validation errors
268 |             self.logger.error(f"Configuration error during {time_horizon} consolidation: {e}")
269 |             self.health_monitor.record_error('consolidator', e, {'time_horizon': time_horizon})
270 |             raise
271 |         except Exception as e:
272 |             self.logger.error(f"Error during {time_horizon} consolidation: {e}")
273 |             self.health_monitor.record_error('consolidator', e, {'time_horizon': time_horizon})
274 |             report.errors.append(str(e))
275 |             return self._finalize_report(report, [str(e)])
276 |     
277 |     async def _get_memories_for_horizon(self, time_horizon: str, **kwargs) -> List[Memory]:
278 |         """Get memories appropriate for the given time horizon.
279 | 
280 |         With incremental mode enabled, returns oldest-first batch of memories
281 |         that haven't been recently consolidated.
282 |         """
283 |         now = datetime.now(timezone.utc)
284 | 
285 |         # Validate time horizon
286 |         if time_horizon not in HORIZON_CONFIGS:
287 |             raise ConsolidationError(f"Unknown time horizon: {time_horizon}")
288 | 
289 |         config = HORIZON_CONFIGS[time_horizon]
290 | 
291 |         # For daily processing, get recent memories (no change - already efficient)
292 |         if time_horizon == 'daily':
293 |             cutoff_days = config.get('cutoff_days', 2)
294 |             start_time = (now - timedelta(days=cutoff_days)).timestamp()
295 |             end_time = now.timestamp()
296 |             memories = await self.storage.get_memories_by_time_range(start_time, end_time)
297 |         else:
298 |             # For longer horizons: incremental oldest-first processing
299 |             memories = await self.storage.get_all_memories()
300 | 
301 |             # Filter by relevance to time horizon (quarterly/yearly still focus on old memories)
302 |             cutoff_days = config.get('cutoff_days')
303 |             if cutoff_days is not None:
304 |                 cutoff_date = now - timedelta(days=cutoff_days)
305 |                 memories = filter_memories_by_age(memories, cutoff_date)
306 | 
307 |             # Incremental mode: Sort oldest-first and batch
308 |             if self.config.incremental_mode:
309 |                 # Sort by last_consolidated_at (oldest first), fallback to created_at
310 |                 def get_consolidation_sort_key(memory: Memory) -> float:
311 |                     # Check metadata for last consolidation timestamp
312 |                     if memory.metadata and 'last_consolidated_at' in memory.metadata:
313 |                         return float(memory.metadata['last_consolidated_at'])
314 |                     # Fall back to created_at (treat never-consolidated as oldest)
315 |                     return memory.created_at if memory.created_at else 0.0
316 | 
317 |                 memories.sort(key=get_consolidation_sort_key)
318 | 
319 |                 # Limit to batch size
320 |                 batch_size = self.config.batch_size
321 |                 if len(memories) > batch_size:
322 |                     self.logger.info(f"Incremental mode: Processing {batch_size} oldest memories (out of {len(memories)} total)")
323 |                     memories = memories[:batch_size]
324 | 
325 |         return memories
326 |     
327 |     async def _update_relevance_scores(self, memories: List[Memory], time_horizon: str) -> List:
328 |         """Calculate and update relevance scores for memories."""
329 |         # Get connection and access data
330 |         connections = await self._get_memory_connections()
331 |         access_patterns = await self._get_access_patterns()
332 |         
333 |         # Calculate relevance scores
334 |         relevance_scores = await self.decay_calculator.process(
335 |             memories,
336 |             connections=connections,
337 |             access_patterns=access_patterns,
338 |             reference_time=datetime.now()
339 |         )
340 |         
341 |         # Update memory metadata with relevance scores
342 |         for memory in memories:
343 |             score = next((s for s in relevance_scores if s.memory_hash == memory.content_hash), None)
344 |             if score:
345 |                 updated_memory = await self.decay_calculator.update_memory_relevance_metadata(memory, score)
346 |                 await self.storage.update_memory(updated_memory)
347 |         
348 |         return relevance_scores
349 |     
350 |     async def _get_memory_connections(self) -> Dict[str, int]:
351 |         """Get memory connection counts from storage."""
352 |         try:
353 |             return await self.storage.get_memory_connections()
354 |         except AttributeError:
355 |             # Fallback if storage doesn't implement connection tracking
356 |             self.logger.warning("Storage backend doesn't support connection tracking")
357 |             return {}
358 |     
359 |     async def _get_access_patterns(self) -> Dict[str, datetime]:
360 |         """Get memory access patterns from storage."""
361 |         try:
362 |             return await self.storage.get_access_patterns()
363 |         except AttributeError:
364 |             # Fallback if storage doesn't implement access tracking
365 |             self.logger.warning("Storage backend doesn't support access pattern tracking")
366 |             return {}
367 |     
368 |     async def _get_existing_associations(self) -> set:
369 |         """Get existing memory associations to avoid duplicates."""
370 |         try:
371 |             # Look for existing association memories
372 |             all_memories = await self.storage.get_all_memories()
373 |             associations = set()
374 |             
375 |             for memory in all_memories:
376 |                 if memory.memory_type == 'association' and 'source_memory_hashes' in memory.metadata:
377 |                     source_hashes = memory.metadata['source_memory_hashes']
378 |                     if isinstance(source_hashes, list) and len(source_hashes) >= 2:
379 |                         # Create canonical pair representation
380 |                         pair_key = tuple(sorted(source_hashes[:2]))
381 |                         associations.add(pair_key)
382 |             
383 |             return associations
384 |             
385 |         except Exception as e:
386 |             self.logger.warning(f"Error getting existing associations: {e}")
387 |             return set()
388 |     
389 |     async def _store_associations_as_memories(self, associations) -> None:
390 |         """Store discovered associations as first-class memories."""
391 |         for association in associations:
392 |             # Create memory content from association
393 |             source_hashes = association.source_memory_hashes
394 |             similarity = association.similarity_score
395 |             connection_type = association.connection_type
396 |             
397 |             content = f"Association between memories {source_hashes[0][:8]} and {source_hashes[1][:8]}: {connection_type} (similarity: {similarity:.3f})"
398 |             
399 |             # Create association memory
400 |             association_memory = Memory(
401 |                 content=content,
402 |                 content_hash=f"assoc_{source_hashes[0][:8]}_{source_hashes[1][:8]}",
403 |                 tags=['association', 'discovered'] + connection_type.split(', '),
404 |                 memory_type='association',
405 |                 metadata={
406 |                     'source_memory_hashes': source_hashes,
407 |                     'similarity_score': similarity,
408 |                     'connection_type': connection_type,
409 |                     'discovery_method': association.discovery_method,
410 |                     'discovery_date': association.discovery_date.isoformat(),
411 |                     **association.metadata
412 |                 },
413 |                 created_at=datetime.now().timestamp(),
414 |                 created_at_iso=datetime.now().isoformat() + 'Z'
415 |             )
416 | 
417 |             # Store the association memory
418 |             success, _ = await self.storage.store(association_memory)
419 |             if not success:
420 |                 logger.warning(f"Failed to store association memory for {memory1_hash} <-> {memory2_hash}")
421 |     
422 |     async def _handle_compression_results(self, compression_results) -> None:
423 |         """Handle storage of compressed memories and linking to originals."""
424 |         for result in compression_results:
425 |             # Store compressed memory
426 |             success, _ = await self.storage.store(result.compressed_memory)
427 |             if not success:
428 |                 logger.warning(f"Failed to store compressed memory")
429 |             
430 |             # Update original memories with compression links
431 |             # This could involve adding metadata pointing to the compressed version
432 |             # Implementation depends on how the storage backend handles relationships
433 |             pass
434 |     
435 |     async def _apply_forgetting_results(self, forgetting_results) -> None:
436 |         """Apply forgetting results to the storage backend."""
437 |         for result in forgetting_results:
438 |             if result.action_taken == 'deleted':
439 |                 await self.storage.delete_memory(result.memory_hash)
440 |             elif result.action_taken == 'compressed' and result.compressed_version:
441 |                 # Replace original with compressed version
442 |                 await self.storage.delete_memory(result.memory_hash)
443 |                 success, _ = await self.storage.store(result.compressed_version)
444 |                 if not success:
445 |                     logger.warning(f"Failed to store compressed version for {result.memory_hash}")
446 |             # 'archived' memories are handled by the forgetting engine
447 |     
448 |     async def _update_consolidation_timestamps(self, memories: List[Memory]) -> None:
449 |         """Mark memories with last_consolidated_at timestamp for incremental mode using batch updates."""
450 |         consolidation_time = datetime.now().timestamp()
451 | 
452 |         self.logger.info(f"Marking {len(memories)} memories with consolidation timestamp (batch mode)")
453 | 
454 |         # Update all memories in-place
455 |         for memory in memories:
456 |             if memory.metadata is None:
457 |                 memory.metadata = {}
458 |             memory.metadata['last_consolidated_at'] = consolidation_time
459 | 
460 |         # Use batch update for optimal performance
461 |         try:
462 |             results = await self.storage.update_memories_batch(memories)
463 |             success_count = sum(results)
464 |             self.logger.info(f"Consolidation timestamps updated: {success_count}/{len(memories)} memories")
465 | 
466 |             if success_count < len(memories):
467 |                 failed_count = len(memories) - success_count
468 |                 self.logger.warning(f"{failed_count} memories failed to update during timestamp marking")
469 | 
470 |         except Exception as e:
471 |             self.logger.error(f"Batch timestamp update failed: {e}")
472 |             # Fallback to individual updates if batch fails
473 |             self.logger.info("Falling back to individual timestamp updates")
474 |             success_count = 0
475 |             for memory in memories:
476 |                 try:
477 |                     success = await self.storage.update_memory(memory)
478 |                     if success:
479 |                         success_count += 1
480 |                 except Exception as mem_error:
481 |                     self.logger.warning(f"Failed to update consolidation timestamp for {memory.content_hash}: {mem_error}")
482 | 
483 |             self.logger.info(f"Fallback completed: {success_count}/{len(memories)} memories updated")
484 | 
485 |     def _update_consolidation_stats(self, report: ConsolidationReport) -> None:
486 |         """Update internal consolidation statistics."""
487 |         self.consolidation_stats['total_runs'] += 1
488 |         if not report.errors:
489 |             self.consolidation_stats['successful_runs'] += 1
490 |         
491 |         self.consolidation_stats['total_memories_processed'] += report.memories_processed
492 |         self.consolidation_stats['total_associations_created'] += report.associations_discovered
493 |         self.consolidation_stats['total_clusters_created'] += report.clusters_created
494 |         self.consolidation_stats['total_memories_compressed'] += report.memories_compressed
495 |         self.consolidation_stats['total_memories_archived'] += report.memories_archived
496 |         
497 |         # Update last consolidation time
498 |         self.last_consolidation_times[report.time_horizon] = report.start_time
499 |     
500 |     def _finalize_report(self, report: ConsolidationReport, errors: List[str]) -> ConsolidationReport:
501 |         """Finalize the consolidation report."""
502 |         report.end_time = datetime.now()
503 |         report.errors.extend(errors)
504 |         
505 |         # Add performance metrics
506 |         duration = (report.end_time - report.start_time).total_seconds()
507 |         success = len(errors) == 0
508 |         report.performance_metrics = {
509 |             'duration_seconds': duration,
510 |             'memories_per_second': report.memories_processed / duration if duration > 0 else 0,
511 |             'success': success
512 |         }
513 |         
514 |         # Record performance in health monitor
515 |         self.health_monitor.record_consolidation_performance(
516 |             time_horizon=report.time_horizon,
517 |             duration=duration,
518 |             memories_processed=report.memories_processed,
519 |             success=success,
520 |             errors=errors
521 |         )
522 |         
523 |         # Log summary
524 |         if errors:
525 |             self.logger.error(f"Consolidation {report.time_horizon} completed with errors: {errors}")
526 |         else:
527 |             self.logger.info(
528 |                 f"Consolidation {report.time_horizon} completed successfully: "
529 |                 f"{report.memories_processed} memories, {report.associations_discovered} associations, "
530 |                 f"{report.clusters_created} clusters, {report.memories_compressed} compressed, "
531 |                 f"{report.memories_archived} archived in {duration:.2f}s"
532 |             )
533 |         
534 |         return report
535 |     
536 |     async def health_check(self) -> Dict[str, Any]:
537 |         """Perform health check on the consolidation system."""
538 |         return await self.health_monitor.check_overall_health()
539 |     
540 |     async def get_health_summary(self) -> Dict[str, Any]:
541 |         """Get a summary of consolidation system health."""
542 |         return await self.health_monitor.get_health_summary()
543 |     
544 |     def get_error_history(self, limit: int = 50) -> List[Dict[str, Any]]:
545 |         """Get recent error history."""
546 |         return self.health_monitor.error_history[-limit:]
547 |     
548 |     def get_performance_history(self, limit: int = 100) -> List[Dict[str, Any]]:
549 |         """Get recent performance history."""
550 |         return self.health_monitor.performance_history[-limit:]
551 |     
552 |     def resolve_health_alert(self, alert_id: str):
553 |         """Resolve a health alert."""
554 |         self.health_monitor.resolve_alert(alert_id)
555 |     
556 |     async def get_consolidation_recommendations(self, time_horizon: str) -> Dict[str, Any]:
557 |         """Get recommendations for consolidation based on current memory state."""
558 |         try:
559 |             memories = await self._get_memories_for_horizon(time_horizon)
560 |             
561 |             if not memories:
562 |                 return {
563 |                     'recommendation': 'no_action',
564 |                     'reason': 'No memories to process',
565 |                     'memory_count': 0
566 |                 }
567 |             
568 |             # Analyze memory distribution
569 |             memory_types = {}
570 |             total_size = 0
571 |             old_memories = 0
572 |             now = datetime.now()
573 |             
574 |             for memory in memories:
575 |                 memory_type = memory.memory_type or 'standard'
576 |                 memory_types[memory_type] = memory_types.get(memory_type, 0) + 1
577 |                 total_size += len(memory.content)
578 |                 
579 |                 if memory.created_at:
580 |                     age_days = (now - datetime.utcfromtimestamp(memory.created_at)).days
581 |                     if age_days > 30:
582 |                         old_memories += 1
583 |             
584 |             # Generate recommendations
585 |             recommendations = []
586 |             
587 |             if len(memories) > 1000:
588 |                 recommendations.append("Consider running compression to reduce memory usage")
589 |             
590 |             if old_memories > len(memories) * 0.5:
591 |                 recommendations.append("Many old memories present - consider forgetting/archival")
592 |             
593 |             if len(memories) > 100 and time_horizon in ['weekly', 'monthly']:
594 |                 recommendations.append("Good candidate for association discovery")
595 |             
596 |             if not recommendations:
597 |                 recommendations.append("Memory state looks healthy")
598 |             
599 |             return {
600 |                 'recommendation': 'consolidation_beneficial' if len(recommendations) > 1 else 'optional',
601 |                 'reasons': recommendations,
602 |                 'memory_count': len(memories),
603 |                 'memory_types': memory_types,
604 |                 'total_size_bytes': total_size,
605 |                 'old_memory_percentage': (old_memories / len(memories)) * 100,
606 |                 'estimated_duration_seconds': len(memories) * 0.01  # Rough estimate
607 |             }
608 |             
609 |         except Exception as e:
610 |             return {
611 |                 'recommendation': 'error',
612 |                 'error': str(e),
613 |                 'memory_count': 0
614 |             }
```

--------------------------------------------------------------------------------
/tests/integration/test_api_with_memory_service.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Integration tests for API endpoints using MemoryService.
  3 | 
  4 | These tests verify that the API layer correctly integrates with
  5 | MemoryService for all memory operations and maintains consistent behavior.
  6 | """
  7 | 
  8 | import pytest
  9 | import pytest_asyncio
 10 | import tempfile
 11 | import os
 12 | from unittest.mock import AsyncMock, MagicMock, patch
 13 | from fastapi.testclient import TestClient
 14 | 
 15 | from mcp_memory_service.web.dependencies import set_storage, get_memory_service
 16 | from mcp_memory_service.services.memory_service import MemoryService
 17 | from mcp_memory_service.models.memory import Memory
 18 | from mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 19 | 
 20 | 
 21 | # Test Fixtures
 22 | 
 23 | @pytest.fixture
 24 | def temp_db():
 25 |     """Create a temporary database for testing."""
 26 |     with tempfile.TemporaryDirectory() as tmpdir:
 27 |         db_path = os.path.join(tmpdir, "test.db")
 28 |         yield db_path
 29 | 
 30 | 
 31 | @pytest_asyncio.fixture
 32 | async def initialized_storage(temp_db):
 33 |     """Create and initialize a real SQLite storage backend."""
 34 |     storage = SqliteVecMemoryStorage(temp_db)
 35 |     await storage.initialize()
 36 |     yield storage
 37 |     storage.close()
 38 | 
 39 | 
 40 | @pytest.fixture
 41 | def test_app(initialized_storage):
 42 |     """Create a FastAPI test application with initialized storage."""
 43 |     # Import here to avoid circular dependencies
 44 |     from mcp_memory_service.web.server import app
 45 | 
 46 |     # Set storage for the app
 47 |     set_storage(initialized_storage)
 48 | 
 49 |     client = TestClient(app)
 50 |     yield client
 51 | 
 52 | 
 53 | @pytest.fixture
 54 | def mock_storage():
 55 |     """Create a mock storage for isolated testing."""
 56 |     storage = AsyncMock()
 57 |     return storage
 58 | 
 59 | 
 60 | @pytest.fixture
 61 | def mock_memory_service(mock_storage):
 62 |     """Create a MemoryService with mock storage."""
 63 |     return MemoryService(storage=mock_storage)
 64 | 
 65 | 
 66 | @pytest.fixture
 67 | def sample_memory():
 68 |     """Create a sample memory for testing."""
 69 |     return Memory(
 70 |         content="Integration test memory",
 71 |         content_hash="test_hash_123",
 72 |         tags=["integration", "test"],
 73 |         memory_type="note",
 74 |         metadata={"source": "test"},
 75 |         created_at=1698765432.0,
 76 |         updated_at=1698765432.0
 77 |     )
 78 | 
 79 | 
 80 | # Test API Store Memory Endpoint
 81 | 
 82 | @pytest.mark.asyncio
 83 | async def test_api_store_memory_uses_service(mock_storage):
 84 |     """Test that POST /api/memories uses MemoryService."""
 85 |     mock_storage.store.return_value = None
 86 | 
 87 |     # Create service
 88 |     service = MemoryService(storage=mock_storage)
 89 | 
 90 |     # Simulate API call through service
 91 |     result = await service.store_memory(
 92 |         content="Test API storage",
 93 |         tags=["api", "test"],
 94 |         memory_type="note"
 95 |     )
 96 | 
 97 |     assert result["success"] is True
 98 |     assert "memory" in result
 99 |     mock_storage.store.assert_called_once()
100 | 
101 | 
102 | @pytest.mark.asyncio
103 | async def test_api_store_memory_hostname_from_header(mock_storage):
104 |     """Test that X-Client-Hostname header is processed correctly."""
105 |     mock_storage.store.return_value = None
106 | 
107 |     service = MemoryService(storage=mock_storage)
108 | 
109 |     # Simulate API call with hostname
110 |     result = await service.store_memory(
111 |         content="Test with hostname",
112 |         tags=["test"],
113 |         client_hostname="client-machine"
114 |     )
115 | 
116 |     # Verify hostname tag was added
117 |     stored_memory = mock_storage.store.call_args.args[0]
118 |     assert "source:client-machine" in stored_memory.tags
119 |     assert stored_memory.metadata["hostname"] == "client-machine"
120 | 
121 | 
122 | @pytest.mark.asyncio
123 | async def test_api_store_memory_hostname_from_request_body(mock_storage):
124 |     """Test that client_hostname in request body works."""
125 |     mock_storage.store.return_value = None
126 | 
127 |     service = MemoryService(storage=mock_storage)
128 | 
129 |     # Simulate API call with hostname in body
130 |     result = await service.store_memory(
131 |         content="Test",
132 |         client_hostname="body-hostname"
133 |     )
134 | 
135 |     stored_memory = mock_storage.store.call_args.args[0]
136 |     assert "source:body-hostname" in stored_memory.tags
137 | 
138 | 
139 | # Test API List Memories Endpoint
140 | 
141 | @pytest.mark.asyncio
142 | async def test_api_list_memories_uses_database_filtering(mock_storage):
143 |     """Test that GET /api/memories uses database-level filtering."""
144 |     # Setup mock to return limited results
145 |     mock_storage.get_all_memories.return_value = []
146 |     mock_storage.count_all_memories.return_value = 1000
147 | 
148 |     service = MemoryService(storage=mock_storage)
149 | 
150 |     # Request page 1 with 10 items from 1000 total
151 |     result = await service.list_memories(page=1, page_size=10)
152 | 
153 |     # CRITICAL: Verify only 10 items requested, not all 1000
154 |     # This proves database-level filtering, not O(n) loading
155 |     call_kwargs = mock_storage.get_all_memories.call_args.kwargs
156 |     assert call_kwargs["limit"] == 10
157 |     assert call_kwargs["offset"] == 0
158 |     assert result["total"] == 1000
159 |     assert result["has_more"] is True
160 | 
161 | 
162 | @pytest.mark.asyncio
163 | async def test_api_list_memories_pagination_through_service(mock_storage):
164 |     """Test end-to-end pagination workflow."""
165 |     # Create mock memories
166 |     memories = [
167 |         Memory(
168 |             content=f"Memory {i}",
169 |             content_hash=f"hash_{i}",
170 |             tags=["test"],
171 |             memory_type="note",
172 |             metadata={},
173 |             created_at=1698765432.0 + i,
174 |             updated_at=1698765432.0 + i
175 |         )
176 |         for i in range(25)
177 |     ]
178 | 
179 |     # Page 1: First 10 memories
180 |     mock_storage.get_all_memories.return_value = memories[:10]
181 |     mock_storage.count_all_memories.return_value = 25
182 | 
183 |     service = MemoryService(storage=mock_storage)
184 |     page1 = await service.list_memories(page=1, page_size=10)
185 | 
186 |     assert page1["page"] == 1
187 |     assert page1["page_size"] == 10
188 |     assert page1["total"] == 25
189 |     assert page1["has_more"] is True
190 |     assert len(page1["memories"]) == 10
191 | 
192 |     # Page 2: Next 10 memories
193 |     mock_storage.get_all_memories.return_value = memories[10:20]
194 |     page2 = await service.list_memories(page=2, page_size=10)
195 | 
196 |     assert page2["page"] == 2
197 |     assert page2["has_more"] is True
198 | 
199 |     # Page 3: Last 5 memories
200 |     mock_storage.get_all_memories.return_value = memories[20:25]
201 |     page3 = await service.list_memories(page=3, page_size=10)
202 | 
203 |     assert page3["page"] == 3
204 |     assert page3["has_more"] is False
205 |     assert len(page3["memories"]) == 5
206 | 
207 | 
208 | @pytest.mark.asyncio
209 | async def test_api_list_memories_tag_filter(mock_storage):
210 |     """Test filtering by tag through API."""
211 |     mock_storage.get_all_memories.return_value = []
212 |     mock_storage.count_all_memories.return_value = 0
213 | 
214 |     service = MemoryService(storage=mock_storage)
215 | 
216 |     result = await service.list_memories(page=1, page_size=10, tag="important")
217 | 
218 |     # Verify tag passed to storage as list
219 |     call_kwargs = mock_storage.get_all_memories.call_args.kwargs
220 |     assert call_kwargs["tags"] == ["important"]
221 | 
222 | 
223 | @pytest.mark.asyncio
224 | async def test_api_list_memories_type_filter(mock_storage):
225 |     """Test filtering by memory type through API."""
226 |     mock_storage.get_all_memories.return_value = []
227 |     mock_storage.count_all_memories.return_value = 0
228 | 
229 |     service = MemoryService(storage=mock_storage)
230 | 
231 |     result = await service.list_memories(page=1, page_size=10, memory_type="reference")
232 | 
233 |     call_kwargs = mock_storage.get_all_memories.call_args.kwargs
234 |     assert call_kwargs["memory_type"] == "reference"
235 | 
236 | 
237 | @pytest.mark.asyncio
238 | async def test_api_list_memories_combined_filters(mock_storage):
239 |     """Test combining tag and type filters."""
240 |     mock_storage.get_all_memories.return_value = []
241 |     mock_storage.count_all_memories.return_value = 0
242 | 
243 |     service = MemoryService(storage=mock_storage)
244 | 
245 |     result = await service.list_memories(
246 |         page=1,
247 |         page_size=10,
248 |         tag="work",
249 |         memory_type="task"
250 |     )
251 | 
252 |     call_kwargs = mock_storage.get_all_memories.call_args.kwargs
253 |     assert call_kwargs["tags"] == ["work"]
254 |     assert call_kwargs["memory_type"] == "task"
255 | 
256 | 
257 | # Test API Search Endpoints
258 | 
259 | @pytest.mark.asyncio
260 | async def test_api_semantic_search_uses_service(mock_storage, sample_memory):
261 |     """Test POST /api/search uses MemoryService."""
262 |     mock_storage.retrieve.return_value = [sample_memory]
263 | 
264 |     service = MemoryService(storage=mock_storage)
265 | 
266 |     result = await service.retrieve_memories(query="test query", n_results=5)
267 | 
268 |     assert result["query"] == "test query"
269 |     assert result["count"] == 1
270 |     mock_storage.retrieve.assert_called_once()
271 | 
272 | 
273 | @pytest.mark.asyncio
274 | async def test_api_tag_search_uses_service(mock_storage, sample_memory):
275 |     """Test POST /api/search/by-tag uses MemoryService."""
276 |     mock_storage.search_by_tags.return_value = [sample_memory]
277 | 
278 |     service = MemoryService(storage=mock_storage)
279 | 
280 |     result = await service.search_by_tag(tags=["test"], match_all=False)
281 | 
282 |     assert result["tags"] == ["test"]
283 |     assert result["match_type"] == "ANY"
284 |     assert result["count"] == 1
285 | 
286 | 
287 | @pytest.mark.asyncio
288 | async def test_api_time_search_uses_service(mock_storage, sample_memory):
289 |     """Test POST /api/search/by-time flow (if applicable)."""
290 |     # Note: Time search might use retrieve_memories with time filters
291 |     mock_storage.retrieve.return_value = [sample_memory]
292 | 
293 |     service = MemoryService(storage=mock_storage)
294 | 
295 |     # Simulate time-based search
296 |     result = await service.retrieve_memories(query="last week", n_results=10)
297 | 
298 |     assert "memories" in result
299 | 
300 | 
301 | # Test API Delete Endpoint
302 | 
303 | @pytest.mark.asyncio
304 | async def test_api_delete_memory_uses_service(mock_storage):
305 |     """Test DELETE /api/memories/{hash} uses MemoryService."""
306 |     mock_storage.delete_memory.return_value = True
307 | 
308 |     service = MemoryService(storage=mock_storage)
309 | 
310 |     result = await service.delete_memory("test_hash_123")
311 | 
312 |     assert result["success"] is True
313 |     assert result["content_hash"] == "test_hash_123"
314 |     mock_storage.delete_memory.assert_called_once_with("test_hash_123")
315 | 
316 | 
317 | @pytest.mark.asyncio
318 | async def test_api_delete_memory_not_found(mock_storage):
319 |     """Test deleting non-existent memory returns proper response."""
320 |     mock_storage.delete_memory.return_value = False
321 | 
322 |     service = MemoryService(storage=mock_storage)
323 | 
324 |     result = await service.delete_memory("nonexistent")
325 | 
326 |     assert result["success"] is False
327 | 
328 | 
329 | # Test API Get Memory Endpoint
330 | 
331 | @pytest.mark.asyncio
332 | async def test_api_get_memory_by_hash_uses_service(mock_storage, sample_memory):
333 |     """Test GET /api/memories/{hash} uses MemoryService."""
334 |     mock_storage.get_by_hash.return_value = sample_memory
335 | 
336 |     service = MemoryService(storage=mock_storage)
337 | 
338 |     result = await service.get_memory_by_hash("test_hash_123")
339 | 
340 |     assert result["found"] is True
341 |     assert result["memory"]["content_hash"] == "test_hash_123"
342 |     mock_storage.get_by_hash.assert_called_once_with("test_hash_123")
343 | 
344 | 
345 | # Test Dependency Injection
346 | 
347 | def test_get_memory_service_dependency_injection():
348 |     """Test that get_memory_service creates service with correct storage."""
349 |     from mcp_memory_service.web.dependencies import get_memory_service
350 | 
351 |     # Create mock storage
352 |     mock_storage = MagicMock()
353 | 
354 |     # Override dependency
355 |     def override_get_storage():
356 |         return mock_storage
357 | 
358 |     # Get service
359 |     service = get_memory_service(storage=mock_storage)
360 | 
361 |     assert isinstance(service, MemoryService)
362 |     assert service.storage == mock_storage
363 | 
364 | 
365 | # Performance and Scaling Tests
366 | 
367 | @pytest.mark.asyncio
368 | async def test_list_memories_performance_with_large_dataset(mock_storage):
369 |     """
370 |     Test that list_memories remains efficient with large datasets.
371 | 
372 |     This verifies the fix for O(n) memory loading anti-pattern.
373 |     """
374 |     # Simulate 10,000 memories in database
375 |     mock_storage.get_all_memories.return_value = []
376 |     mock_storage.count_all_memories.return_value = 10000
377 | 
378 |     service = MemoryService(storage=mock_storage)
379 | 
380 |     # Request just 20 items
381 |     result = await service.list_memories(page=1, page_size=20)
382 | 
383 |     # CRITICAL: Verify we only queried for 20 items, not all 10,000
384 |     call_kwargs = mock_storage.get_all_memories.call_args.kwargs
385 |     assert call_kwargs["limit"] == 20
386 |     assert call_kwargs["offset"] == 0
387 | 
388 |     # This proves database-level filtering prevents loading 10,000 records
389 |     assert result["total"] == 10000
390 |     assert result["has_more"] is True
391 | 
392 | 
393 | @pytest.mark.asyncio
394 | async def test_tag_filter_performance(mock_storage):
395 |     """Test that tag filtering happens at database level."""
396 |     mock_storage.get_all_memories.return_value = []
397 |     mock_storage.count_all_memories.return_value = 50
398 | 
399 |     service = MemoryService(storage=mock_storage)
400 | 
401 |     result = await service.list_memories(page=1, page_size=10, tag="important")
402 | 
403 |     # Verify tag filter passed to database query
404 |     call_kwargs = mock_storage.get_all_memories.call_args.kwargs
405 |     assert call_kwargs["tags"] == ["important"]
406 | 
407 |     # Result should only reflect filtered count
408 |     assert result["total"] == 50  # Only memories matching tag
409 | 
410 | 
411 | # Error Handling Tests
412 | 
413 | @pytest.mark.asyncio
414 | async def test_api_handles_storage_errors_gracefully(mock_storage):
415 |     """Test that API returns proper errors when storage fails."""
416 |     mock_storage.get_all_memories.side_effect = Exception("Database connection lost")
417 | 
418 |     service = MemoryService(storage=mock_storage)
419 | 
420 |     result = await service.list_memories(page=1, page_size=10)
421 | 
422 |     assert result["success"] is False
423 |     assert "error" in result
424 |     assert "Database connection lost" in result["error"]
425 | 
426 | 
427 | @pytest.mark.asyncio
428 | async def test_api_validates_input_through_service(mock_storage):
429 |     """Test that validation errors from storage are handled."""
430 |     mock_storage.store.side_effect = ValueError("Invalid content format")
431 | 
432 |     service = MemoryService(storage=mock_storage)
433 | 
434 |     result = await service.store_memory(content="invalid")
435 | 
436 |     assert result["success"] is False
437 |     assert "Invalid memory data" in result["error"]
438 | 
439 | 
440 | # Consistency Tests
441 | 
442 | @pytest.mark.asyncio
443 | async def test_api_and_mcp_use_same_service_logic(mock_storage):
444 |     """
445 |     Test that API and MCP tools use the same MemoryService logic.
446 | 
447 |     This verifies the DRY principle - both interfaces share the same
448 |     business logic through MemoryService.
449 |     """
450 |     service = MemoryService(storage=mock_storage)
451 | 
452 |     # Store through service (used by both API and MCP)
453 |     mock_storage.store.return_value = None
454 |     result1 = await service.store_memory(content="Test", tags=["shared"])
455 | 
456 |     # Retrieve through service (used by both API and MCP)
457 |     mock_storage.retrieve.return_value = []
458 |     result2 = await service.retrieve_memories(query="test")
459 | 
460 |     # Both operations used the same service
461 |     assert result1["success"] is True
462 |     assert "memories" in result2
463 | 
464 | 
465 | @pytest.mark.asyncio
466 | async def test_response_format_consistency(mock_storage, sample_memory):
467 |     """Test that all service methods return consistently formatted responses."""
468 |     mock_storage.get_all_memories.return_value = [sample_memory]
469 |     mock_storage.count_all_memories.return_value = 1
470 |     mock_storage.retrieve.return_value = [sample_memory]
471 |     mock_storage.search_by_tags.return_value = [sample_memory]
472 | 
473 |     service = MemoryService(storage=mock_storage)
474 | 
475 |     # Get responses from different methods
476 |     list_result = await service.list_memories(page=1, page_size=10)
477 |     retrieve_result = await service.retrieve_memories(query="test")
478 |     tag_result = await service.search_by_tag(tags="test")
479 | 
480 |     # All should have consistently formatted memories
481 |     list_memory = list_result["memories"][0]
482 |     retrieve_memory = retrieve_result["memories"][0]
483 |     tag_memory = tag_result["memories"][0]
484 | 
485 |     # Verify all have same format
486 |     required_fields = ["content", "content_hash", "tags", "memory_type", "created_at"]
487 |     for field in required_fields:
488 |         assert field in list_memory
489 |         assert field in retrieve_memory
490 |         assert field in tag_memory
491 | 
492 | 
493 | # Real Storage Integration Test (End-to-End)
494 | 
495 | @pytest.mark.asyncio
496 | @pytest.mark.integration
497 | async def test_end_to_end_workflow_with_real_storage(temp_db):
498 |     """
499 |     End-to-end test with real SQLite storage (not mocked).
500 | 
501 |     This verifies the complete integration stack works correctly.
502 |     """
503 |     # Create real storage
504 |     storage = SqliteVecMemoryStorage(temp_db)
505 |     await storage.initialize()
506 | 
507 |     try:
508 |         # Create service with real storage
509 |         service = MemoryService(storage=storage)
510 | 
511 |         # Store a memory
512 |         store_result = await service.store_memory(
513 |             content="End-to-end test memory",
514 |             tags=["e2e", "integration"],
515 |             memory_type="test"
516 |         )
517 |         assert store_result["success"] is True
518 | 
519 |         # List memories
520 |         list_result = await service.list_memories(page=1, page_size=10)
521 |         assert len(list_result["memories"]) > 0
522 | 
523 |         # Search by tag
524 |         tag_result = await service.search_by_tag(tags="e2e")
525 |         assert len(tag_result["memories"]) > 0
526 | 
527 |         # Get specific memory
528 |         content_hash = store_result["memory"]["content_hash"]
529 |         get_result = await service.get_memory_by_hash(content_hash)
530 |         assert get_result["found"] is True
531 | 
532 |         # Delete memory
533 |         delete_result = await service.delete_memory(content_hash)
534 |         assert delete_result["success"] is True
535 | 
536 |         # Verify deleted
537 |         get_after_delete = await service.get_memory_by_hash(content_hash)
538 |         assert get_after_delete["found"] is False
539 | 
540 |     finally:
541 |         storage.close()
542 | 
543 | 
544 | # Real HTTP API Integration Tests with TestClient
545 | 
546 | @pytest.mark.asyncio
547 | @pytest.mark.integration
548 | async def test_http_api_store_memory_endpoint(temp_db):
549 |     """
550 |     Test POST /api/memories endpoint with real HTTP request.
551 | 
552 |     Uses TestClient to make actual HTTP request to FastAPI app.
553 |     """
554 |     # Create real storage
555 |     storage = SqliteVecMemoryStorage(temp_db)
556 |     await storage.initialize()
557 | 
558 |     try:
559 |         # Import app and set storage
560 |         from mcp_memory_service.web.app import app
561 |         set_storage(storage)
562 | 
563 |         # Create TestClient
564 |         client = TestClient(app)
565 | 
566 |         # Make HTTP POST request
567 |         response = client.post(
568 |             "/api/memories",
569 |             json={
570 |                 "content": "HTTP API test memory",
571 |                 "tags": ["http", "api", "test"],
572 |                 "memory_type": "note"
573 |             }
574 |         )
575 | 
576 |         # Verify response
577 |         assert response.status_code == 200
578 |         data = response.json()
579 |         assert data["success"] is True
580 |         assert "memory" in data
581 |         assert data["memory"]["content"] == "HTTP API test memory"
582 |         assert "http" in data["memory"]["tags"]
583 | 
584 |     finally:
585 |         storage.close()
586 | 
587 | 
588 | @pytest.mark.asyncio
589 | @pytest.mark.integration
590 | async def test_http_api_list_memories_endpoint(temp_db):
591 |     """
592 |     Test GET /api/memories endpoint with real HTTP request.
593 | 
594 |     Verifies pagination and filtering work through HTTP API.
595 |     """
596 |     storage = SqliteVecMemoryStorage(temp_db)
597 |     await storage.initialize()
598 | 
599 |     try:
600 |         from mcp_memory_service.web.app import app
601 |         set_storage(storage)
602 | 
603 |         # Store test memories first
604 |         service = MemoryService(storage=storage)
605 |         for i in range(5):
606 |             await service.store_memory(
607 |                 content=f"Test memory {i}",
608 |                 tags=["test"],
609 |                 memory_type="note"
610 |             )
611 | 
612 |         # Make HTTP GET request
613 |         client = TestClient(app)
614 |         response = client.get("/api/memories?page=1&page_size=10")
615 | 
616 |         # Verify response
617 |         assert response.status_code == 200
618 |         data = response.json()
619 |         assert "memories" in data
620 |         assert len(data["memories"]) == 5
621 |         assert data["total"] == 5
622 |         assert data["page"] == 1
623 | 
624 |     finally:
625 |         storage.close()
626 | 
627 | 
628 | @pytest.mark.asyncio
629 | @pytest.mark.integration
630 | async def test_http_api_search_endpoint(temp_db):
631 |     """
632 |     Test POST /api/search endpoint with real HTTP request.
633 | 
634 |     Verifies semantic search works through HTTP API.
635 |     """
636 |     storage = SqliteVecMemoryStorage(temp_db)
637 |     await storage.initialize()
638 | 
639 |     try:
640 |         from mcp_memory_service.web.app import app
641 |         set_storage(storage)
642 | 
643 |         # Store searchable memory
644 |         service = MemoryService(storage=storage)
645 |         await service.store_memory(
646 |             content="Python programming language tutorial",
647 |             tags=["python", "tutorial"],
648 |             memory_type="reference"
649 |         )
650 | 
651 |         # Make HTTP POST request for search
652 |         client = TestClient(app)
653 |         response = client.post(
654 |             "/api/search",
655 |             json={"query": "python tutorial", "limit": 5}
656 |         )
657 | 
658 |         # Verify response
659 |         assert response.status_code == 200
660 |         data = response.json()
661 |         assert "memories" in data
662 |         assert data["query"] == "python tutorial"
663 | 
664 |     finally:
665 |         storage.close()
666 | 
667 | 
668 | @pytest.mark.asyncio
669 | @pytest.mark.integration
670 | async def test_http_api_search_by_tag_endpoint(temp_db):
671 |     """
672 |     Test POST /api/search/by-tag endpoint with real HTTP request.
673 | 
674 |     Verifies tag search works through HTTP API.
675 |     """
676 |     storage = SqliteVecMemoryStorage(temp_db)
677 |     await storage.initialize()
678 | 
679 |     try:
680 |         from mcp_memory_service.web.app import app
681 |         set_storage(storage)
682 | 
683 |         # Store memories with tags
684 |         service = MemoryService(storage=storage)
685 |         await service.store_memory(
686 |             content="Important work item",
687 |             tags=["important", "work"],
688 |             memory_type="task"
689 |         )
690 |         await service.store_memory(
691 |             content="Personal note",
692 |             tags=["personal"],
693 |             memory_type="note"
694 |         )
695 | 
696 |         # Search by tag via HTTP
697 |         client = TestClient(app)
698 |         response = client.post(
699 |             "/api/search/by-tag",
700 |             json={"tags": ["important"], "limit": 10}
701 |         )
702 | 
703 |         # Verify response
704 |         assert response.status_code == 200
705 |         data = response.json()
706 |         assert len(data["memories"]) == 1
707 |         assert "important" in data["memories"][0]["tags"]
708 | 
709 |     finally:
710 |         storage.close()
711 | 
712 | 
713 | @pytest.mark.asyncio
714 | @pytest.mark.integration
715 | async def test_http_api_get_memory_by_hash_endpoint(temp_db):
716 |     """
717 |     Test GET /api/memories/{hash} endpoint with real HTTP request.
718 | 
719 |     Verifies retrieving specific memory by hash works.
720 |     """
721 |     storage = SqliteVecMemoryStorage(temp_db)
722 |     await storage.initialize()
723 | 
724 |     try:
725 |         from mcp_memory_service.web.app import app
726 |         set_storage(storage)
727 | 
728 |         # Store a memory
729 |         service = MemoryService(storage=storage)
730 |         store_result = await service.store_memory(
731 |             content="Memory to retrieve",
732 |             tags=["test"],
733 |             memory_type="note"
734 |         )
735 |         content_hash = store_result["memory"]["content_hash"]
736 | 
737 |         # Retrieve via HTTP
738 |         client = TestClient(app)
739 |         response = client.get(f"/api/memories/{content_hash}")
740 | 
741 |         # Verify response
742 |         assert response.status_code == 200
743 |         data = response.json()
744 |         assert data["content"] == "Memory to retrieve"
745 |         assert data["content_hash"] == content_hash
746 | 
747 |     finally:
748 |         storage.close()
749 | 
750 | 
751 | @pytest.mark.asyncio
752 | @pytest.mark.integration
753 | async def test_http_api_delete_memory_endpoint(temp_db):
754 |     """
755 |     Test DELETE /api/memories/{hash} endpoint with real HTTP request.
756 | 
757 |     Verifies deletion works through HTTP API.
758 |     """
759 |     storage = SqliteVecMemoryStorage(temp_db)
760 |     await storage.initialize()
761 | 
762 |     try:
763 |         from mcp_memory_service.web.app import app
764 |         set_storage(storage)
765 | 
766 |         # Store a memory
767 |         service = MemoryService(storage=storage)
768 |         store_result = await service.store_memory(
769 |             content="Memory to delete",
770 |             tags=["test"],
771 |             memory_type="note"
772 |         )
773 |         content_hash = store_result["memory"]["content_hash"]
774 | 
775 |         # Delete via HTTP
776 |         client = TestClient(app)
777 |         response = client.delete(f"/api/memories/{content_hash}")
778 | 
779 |         # Verify response
780 |         assert response.status_code == 200
781 |         data = response.json()
782 |         assert data["success"] is True
783 | 
784 |         # Verify memory is gone
785 |         get_response = client.get(f"/api/memories/{content_hash}")
786 |         assert get_response.status_code == 404
787 | 
788 |     finally:
789 |         storage.close()
790 | 
791 | 
792 | @pytest.mark.asyncio
793 | @pytest.mark.integration
794 | async def test_http_api_pagination_with_real_data(temp_db):
795 |     """
796 |     Test pagination through HTTP API with multiple pages.
797 | 
798 |     Verifies database-level pagination prevents O(n) loading.
799 |     """
800 |     storage = SqliteVecMemoryStorage(temp_db)
801 |     await storage.initialize()
802 | 
803 |     try:
804 |         from mcp_memory_service.web.app import app
805 |         set_storage(storage)
806 | 
807 |         # Store 25 memories
808 |         service = MemoryService(storage=storage)
809 |         for i in range(25):
810 |             await service.store_memory(
811 |                 content=f"Pagination test {i}",
812 |                 tags=["pagination"],
813 |                 memory_type="note"
814 |             )
815 | 
816 |         client = TestClient(app)
817 | 
818 |         # Page 1: First 10
819 |         response1 = client.get("/api/memories?page=1&page_size=10")
820 |         assert response1.status_code == 200
821 |         data1 = response1.json()
822 |         assert len(data1["memories"]) == 10
823 |         assert data1["total"] == 25
824 |         assert data1["has_more"] is True
825 | 
826 |         # Page 2: Next 10
827 |         response2 = client.get("/api/memories?page=2&page_size=10")
828 |         data2 = response2.json()
829 |         assert len(data2["memories"]) == 10
830 |         assert data2["has_more"] is True
831 | 
832 |         # Page 3: Last 5
833 |         response3 = client.get("/api/memories?page=3&page_size=10")
834 |         data3 = response3.json()
835 |         assert len(data3["memories"]) == 5
836 |         assert data3["has_more"] is False
837 | 
838 |     finally:
839 |         storage.close()
840 | 
841 | 
842 | @pytest.mark.asyncio
843 | @pytest.mark.integration
844 | async def test_http_api_error_handling_invalid_json(temp_db):
845 |     """
846 |     Test that HTTP API handles malformed JSON gracefully.
847 | 
848 |     This would have caught v8.12.0 syntax errors.
849 |     """
850 |     storage = SqliteVecMemoryStorage(temp_db)
851 |     await storage.initialize()
852 | 
853 |     try:
854 |         from mcp_memory_service.web.app import app
855 |         set_storage(storage)
856 | 
857 |         client = TestClient(app)
858 | 
859 |         # Send malformed JSON
860 |         response = client.post(
861 |             "/api/memories",
862 |             data="{'this': 'is not valid json}",  # Missing quote
863 |             headers={"Content-Type": "application/json"}
864 |         )
865 | 
866 |         # Should return 400 or 422, not 500
867 |         assert response.status_code in [400, 422]
868 | 
869 |     finally:
870 |         storage.close()
871 | 
872 | 
873 | @pytest.mark.asyncio
874 | @pytest.mark.integration
875 | async def test_http_api_client_hostname_header(temp_db):
876 |     """
877 |     Test that X-Client-Hostname header is processed correctly.
878 | 
879 |     Verifies hostname tagging works through real HTTP request.
880 |     """
881 |     storage = SqliteVecMemoryStorage(temp_db)
882 |     await storage.initialize()
883 | 
884 |     try:
885 |         from mcp_memory_service.web.app import app
886 |         set_storage(storage)
887 | 
888 |         client = TestClient(app)
889 | 
890 |         # Send request with hostname header
891 |         response = client.post(
892 |             "/api/memories",
893 |             json={
894 |                 "content": "Test with hostname",
895 |                 "tags": ["test"]
896 |             },
897 |             headers={"X-Client-Hostname": "test-machine"}
898 |         )
899 | 
900 |         # Verify hostname tag added
901 |         assert response.status_code == 200
902 |         data = response.json()
903 |         assert "source:test-machine" in data["memory"]["tags"]
904 |         assert data["memory"]["metadata"]["hostname"] == "test-machine"
905 | 
906 |     finally:
907 |         storage.close()
908 | 
909 | 
910 | @pytest.mark.asyncio
911 | @pytest.mark.integration
912 | async def test_http_api_complete_crud_workflow(temp_db):
913 |     """
914 |     Complete end-to-end CRUD workflow through real HTTP API.
915 | 
916 |     This verifies the entire HTTP API stack works correctly.
917 |     """
918 |     storage = SqliteVecMemoryStorage(temp_db)
919 |     await storage.initialize()
920 | 
921 |     try:
922 |         from mcp_memory_service.web.app import app
923 |         set_storage(storage)
924 | 
925 |         client = TestClient(app)
926 | 
927 |         # CREATE: Store a memory
928 |         create_response = client.post(
929 |             "/api/memories",
930 |             json={
931 |                 "content": "CRUD test memory",
932 |                 "tags": ["crud", "test"],
933 |                 "memory_type": "note"
934 |             }
935 |         )
936 |         assert create_response.status_code == 200
937 |         content_hash = create_response.json()["memory"]["content_hash"]
938 | 
939 |         # READ: List all memories
940 |         list_response = client.get("/api/memories")
941 |         assert list_response.status_code == 200
942 |         assert len(list_response.json()["memories"]) > 0
943 | 
944 |         # READ: Get specific memory
945 |         get_response = client.get(f"/api/memories/{content_hash}")
946 |         assert get_response.status_code == 200
947 |         assert get_response.json()["content"] == "CRUD test memory"
948 | 
949 |         # UPDATE: Search for memory
950 |         search_response = client.post(
951 |             "/api/search",
952 |             json={"query": "CRUD test", "limit": 5}
953 |         )
954 |         assert search_response.status_code == 200
955 |         assert len(search_response.json()["memories"]) > 0
956 | 
957 |         # DELETE: Remove memory
958 |         delete_response = client.delete(f"/api/memories/{content_hash}")
959 |         assert delete_response.status_code == 200
960 |         assert delete_response.json()["success"] is True
961 | 
962 |         # VERIFY: Memory is gone
963 |         verify_response = client.get(f"/api/memories/{content_hash}")
964 |         assert verify_response.status_code == 404
965 | 
966 |     finally:
967 |         storage.close()
968 | 
```

--------------------------------------------------------------------------------
/src/mcp_memory_service/web/api/documents.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Document Upload API Endpoints
 17 | 
 18 | Provides REST API endpoints for document ingestion through the web dashboard.
 19 | Supports single file upload, batch upload, progress tracking, and upload history.
 20 | """
 21 | 
 22 | import os
 23 | import re
 24 | import uuid
 25 | import asyncio
 26 | import logging
 27 | import tempfile
 28 | from typing import List, Dict, Any, Optional
 29 | from datetime import datetime
 30 | from pathlib import Path
 31 | from urllib.parse import urlparse, unquote
 32 | 
 33 | from fastapi import APIRouter, UploadFile, File, Form, HTTPException, BackgroundTasks
 34 | from fastapi.responses import JSONResponse
 35 | from pydantic import BaseModel
 36 | 
 37 | from ...ingestion import get_loader_for_file, SUPPORTED_FORMATS
 38 | from ...models.memory import Memory
 39 | from ...utils import create_memory_from_chunk, _process_and_store_chunk
 40 | from ..dependencies import get_storage
 41 | 
 42 | logger = logging.getLogger(__name__)
 43 | 
 44 | router = APIRouter()
 45 | 
 46 | # Constants
 47 | MAX_TAG_LENGTH = 100
 48 | 
 49 | 
 50 | def parse_and_validate_tags(tags: str) -> List[str]:
 51 |     """
 52 |     Parse and validate tags from user input.
 53 | 
 54 |     Handles comma-separated and space-separated tags, removes file:// prefixes,
 55 |     sanitizes path separators, and validates tag lengths.
 56 | 
 57 |     Args:
 58 |         tags: Raw tag string (comma or space separated)
 59 | 
 60 |     Returns:
 61 |         List of cleaned and validated tags
 62 | 
 63 |     Raises:
 64 |         HTTPException: If any tag exceeds MAX_TAG_LENGTH
 65 |     """
 66 |     if not tags or not tags.strip():
 67 |         return []
 68 | 
 69 |     # Split by comma OR space
 70 |     raw_tags = tags.replace(',', ' ').split()
 71 |     tag_list = []
 72 | 
 73 |     for tag in raw_tags:
 74 |         clean_tag = tag.strip()
 75 |         if not clean_tag:
 76 |             continue
 77 | 
 78 |         # Remove file:// protocol prefixes and extract filename
 79 |         # Uses urllib.parse for robust handling of URL-encoded chars and different path formats
 80 |         if clean_tag.startswith('file://'):
 81 |             path_str = unquote(urlparse(clean_tag).path)
 82 |             # On Windows, urlparse may add a leading slash (e.g., /C:/...), which needs to be removed
 83 |             if os.name == 'nt' and path_str.startswith('/') and len(path_str) > 2 and path_str[2] == ':':
 84 |                 path_str = path_str[1:]
 85 |             clean_tag = Path(path_str).name
 86 | 
 87 |         # Remove common path separators to create clean tag names
 88 |         clean_tag = re.sub(r'[/\\]', '_', clean_tag)
 89 | 
 90 |         # Validate tag length - raise error instead of silently dropping
 91 |         if len(clean_tag) > MAX_TAG_LENGTH:
 92 |             raise HTTPException(
 93 |                 status_code=400,
 94 |                 detail=f"Tag '{clean_tag[:100]}...' exceeds maximum length of {MAX_TAG_LENGTH} characters. "
 95 |                        f"Please use shorter, more descriptive tags."
 96 |             )
 97 | 
 98 |         tag_list.append(clean_tag)
 99 | 
100 |     return tag_list
101 | 
102 | 
103 | async def ensure_storage_initialized():
104 |     """Ensure storage is initialized for web API usage."""
105 |     logger.info("🔍 Checking storage availability...")
106 |     try:
107 |         # Try to get storage
108 |         storage = get_storage()
109 |         logger.info("✅ Storage already available")
110 |         return storage
111 |     except Exception as e:
112 |         logger.warning(f"⚠️ Storage not available ({e}), attempting to initialize...")
113 |         try:
114 |             # Import and initialize storage
115 |             from ..dependencies import create_storage_backend, set_storage
116 |             logger.info("🏗️ Creating storage backend...")
117 |             storage = await create_storage_backend()
118 |             set_storage(storage)
119 |             logger.info("✅ Storage initialized successfully in API context")
120 |             return storage
121 |         except Exception as init_error:
122 |             logger.error(f"❌ Failed to initialize storage: {init_error}")
123 |             logger.error(f"Full error: {str(init_error)}")
124 |             import traceback
125 |             logger.error(f"Traceback: {traceback.format_exc()}")
126 |             # Don't raise HTTPException here since this is called from background tasks
127 |             raise init_error
128 | 
129 | # In-memory storage for upload tracking (in production, use database)
130 | upload_sessions = {}
131 | 
132 | # Note: UploadRequest and BatchUploadRequest models removed - not used
133 | # Endpoints read parameters directly from form data
134 | 
135 | class UploadStatus(BaseModel):
136 |     upload_id: str
137 |     status: str  # queued, processing, completed, failed
138 |     filename: str = ""
139 |     file_size: int = 0
140 |     chunks_processed: int = 0
141 |     chunks_stored: int = 0
142 |     total_chunks: int = 0
143 |     progress: float = 0.0
144 |     errors: List[str] = []
145 |     created_at: datetime
146 |     completed_at: Optional[datetime] = None
147 | 
148 | @router.post("/upload", response_model=Dict[str, Any])
149 | async def upload_document(
150 |     background_tasks: BackgroundTasks,
151 |     file: UploadFile = File(...),
152 |     tags: str = Form(""),
153 |     chunk_size: int = Form(1000),
154 |     chunk_overlap: int = Form(200),
155 |     memory_type: str = Form("document")
156 | ):
157 |     """
158 |     Upload and ingest a single document.
159 | 
160 |     Args:
161 |         file: The document file to upload
162 |         tags: Comma-separated list of tags
163 |         chunk_size: Target chunk size in characters
164 |         chunk_overlap: Chunk overlap in characters
165 |         memory_type: Type label for memories
166 | 
167 |     Returns:
168 |         Upload session information with ID for tracking
169 | 
170 |     Uses FastAPI BackgroundTasks for proper async processing.
171 |     """
172 |     logger.info(f"🚀 Document upload endpoint called with file: {file.filename}")
173 |     try:
174 |         # Read file content
175 |         file_content = await file.read()
176 |         file_size = len(file_content)
177 |         logger.info(f"File content length: {file_size} bytes")
178 | 
179 |         # Validate file type
180 |         file_ext = Path(file.filename).suffix.lower().lstrip('.')
181 |         if file_ext not in SUPPORTED_FORMATS:
182 |             supported = ", ".join(f".{ext}" for ext in SUPPORTED_FORMATS.keys())
183 |             raise HTTPException(
184 |                 status_code=400,
185 |                 detail=f"Unsupported file type: .{file_ext}. Supported: {supported}"
186 |             )
187 | 
188 |         # Parse and validate tags
189 |         tag_list = parse_and_validate_tags(tags)
190 | 
191 |         # Create upload session
192 |         upload_id = str(uuid.uuid4())
193 | 
194 |         # Create secure temporary file (avoids path traversal vulnerability)
195 |         # Extract safe file extension for suffix
196 |         file_ext = Path(file.filename).suffix if file.filename else ""
197 |         temp_file = tempfile.NamedTemporaryFile(
198 |             delete=False,
199 |             prefix=f"{upload_id}_",
200 |             suffix=file_ext
201 |         )
202 |         temp_path = temp_file.name
203 | 
204 |         # Save uploaded file temporarily
205 |         with temp_file:
206 |             temp_file.write(file_content)
207 | 
208 |         # Initialize upload session
209 |         session = UploadStatus(
210 |             upload_id=upload_id,
211 |             status="queued",
212 |             filename=file.filename,
213 |             file_size=file_size,
214 |             created_at=datetime.now()
215 |         )
216 |         upload_sessions[upload_id] = session
217 | 
218 |         # Start background processing
219 |         background_tasks.add_task(
220 |             process_single_file_upload,
221 |             upload_id,
222 |             temp_path,
223 |             file.filename,
224 |             tag_list,
225 |             chunk_size,
226 |             chunk_overlap,
227 |             memory_type
228 |         )
229 | 
230 |         return {
231 |             "upload_id": upload_id,
232 |             "status": "queued",
233 |             "message": f"Document {file.filename} queued for processing"
234 |         }
235 | 
236 |     except Exception as e:
237 |         logger.error(f"Upload error: {str(e)}")
238 |         raise HTTPException(status_code=500, detail=str(e))
239 | 
240 | @router.post("/batch-upload", response_model=Dict[str, Any])
241 | async def batch_upload_documents(
242 |     background_tasks: BackgroundTasks,
243 |     files: List[UploadFile] = File(...),
244 |     tags: str = Form(""),
245 |     chunk_size: int = Form(1000),
246 |     chunk_overlap: int = Form(200),
247 |     memory_type: str = Form("document")
248 | ):
249 |     """
250 |     Upload and ingest multiple documents in batch.
251 | 
252 |     Args:
253 |         files: List of document files to upload
254 |         tags: Comma-separated list of tags
255 |         chunk_size: Target chunk size in characters
256 |         chunk_overlap: Chunk overlap in characters
257 |         memory_type: Type label for memories
258 | 
259 |     Returns:
260 |         Batch upload session information
261 |     """
262 |     try:
263 |         if not files:
264 |             raise HTTPException(status_code=400, detail="No files provided")
265 | 
266 |         # Parse and validate tags
267 |         tag_list = parse_and_validate_tags(tags)
268 | 
269 |         # Create batch upload session
270 |         batch_id = str(uuid.uuid4())
271 |         temp_paths = []
272 | 
273 |         # Validate and save all files
274 |         for file in files:
275 |             file_ext = Path(file.filename).suffix.lower().lstrip('.')
276 |             if file_ext not in SUPPORTED_FORMATS:
277 |                 supported = ", ".join(f".{ext}" for ext in SUPPORTED_FORMATS.keys())
278 |                 raise HTTPException(
279 |                     status_code=400,
280 |                     detail=f"Unsupported file type for {file.filename}: {file_ext}. Supported: {supported}"
281 |                 )
282 | 
283 |             # Create secure temporary file (avoids path traversal vulnerability)
284 |             content = await file.read()
285 |             safe_ext = Path(file.filename).suffix.lower() if file.filename else ""
286 |             temp_file = tempfile.NamedTemporaryFile(
287 |                 delete=False,
288 |                 prefix=f"{batch_id}_",
289 |                 suffix=safe_ext
290 |             )
291 |             temp_path = temp_file.name
292 |             with temp_file:
293 |                 temp_file.write(content)
294 |             temp_paths.append((file.filename, temp_path, len(content)))
295 | 
296 |         # Calculate total file size for the batch
297 |         total_file_size = sum(file_size for _, _, file_size in temp_paths)
298 | 
299 |         # Initialize batch session
300 |         session = UploadStatus(
301 |             upload_id=batch_id,
302 |             status="queued",
303 |             filename=f"Batch ({len(files)} files)",
304 |             file_size=total_file_size,
305 |             created_at=datetime.now()
306 |         )
307 |         upload_sessions[batch_id] = session
308 | 
309 |         # Start background processing
310 |         background_tasks.add_task(
311 |             process_batch_upload,
312 |             batch_id,
313 |             temp_paths,
314 |             tag_list,
315 |             chunk_size,
316 |             chunk_overlap,
317 |             memory_type
318 |         )
319 | 
320 |         return {
321 |             "upload_id": batch_id,
322 |             "status": "queued",
323 |             "message": f"Batch of {len(files)} documents queued for processing"
324 |         }
325 | 
326 |     except Exception as e:
327 |         logger.error(f"Batch upload error: {str(e)}")
328 |         raise HTTPException(status_code=500, detail=str(e))
329 | 
330 | @router.get("/status/{upload_id}", response_model=UploadStatus)
331 | async def get_upload_status(upload_id: str):
332 |     """
333 |     Get the status of an upload session.
334 | 
335 |     Args:
336 |         upload_id: The upload session ID
337 | 
338 |     Returns:
339 |         Current upload status
340 |     """
341 |     if upload_id not in upload_sessions:
342 |         raise HTTPException(status_code=404, detail="Upload session not found")
343 | 
344 |     return upload_sessions[upload_id]
345 | 
346 | @router.get("/history", response_model=Dict[str, List[Dict[str, Any]]])
347 | async def get_upload_history():
348 |     """
349 |     Get the history of all uploads.
350 | 
351 |     Returns:
352 |         List of completed uploads with metadata
353 |     """
354 |     logger.info("Documents history endpoint called")
355 |     try:
356 |         # For now, return empty history since storage might not be initialized
357 |         # In production, this would query a database
358 |         history = []
359 |         for session in upload_sessions.values():
360 |             if session.status in ["completed", "failed"]:
361 |                 history.append({
362 |                     "upload_id": session.upload_id,
363 |                     "filename": session.filename,
364 |                     "file_size": session.file_size,
365 |                     "status": session.status,
366 |                     "chunks_processed": session.chunks_processed,
367 |                     "chunks_stored": session.chunks_stored,
368 |                     "progress": session.progress,
369 |                     "errors": session.errors,
370 |                     "created_at": session.created_at.isoformat(),
371 |                     "completed_at": session.completed_at.isoformat() if session.completed_at else None
372 |                 })
373 | 
374 |         # Sort by creation time, most recent first
375 |         history.sort(key=lambda x: x["created_at"], reverse=True)
376 | 
377 |         return {"uploads": history}
378 |     except Exception as e:
379 |         logger.error(f"Error in get_upload_history: {e}")
380 |         # Return empty history on error so the UI doesn't break
381 |         return {"uploads": []}
382 | 
383 | async def process_single_file_upload(
384 |     upload_id: str,
385 |     file_path: str,
386 |     filename: str,
387 |     tags: List[str],
388 |     chunk_size: int,
389 |     chunk_overlap: int,
390 |     memory_type: str
391 | ):
392 |     """Background task to process a single document upload."""
393 |     try:
394 |         logger.info(f"Starting document processing: {upload_id} - {filename}")
395 |         session = upload_sessions[upload_id]
396 |         session.status = "processing"
397 | 
398 |         # Get storage
399 |         storage = await ensure_storage_initialized()
400 | 
401 |         # Get appropriate loader
402 |         file_path_obj = Path(file_path)
403 |         loader = get_loader_for_file(file_path_obj)
404 |         if loader is None:
405 |             raise ValueError(f"No loader available for file: {filename}")
406 | 
407 |         # Configure loader
408 |         loader.chunk_size = chunk_size
409 |         loader.chunk_overlap = chunk_overlap
410 | 
411 |         chunks_processed = 0
412 |         chunks_stored = 0
413 | 
414 |         # Process chunks from the file
415 |         async for chunk in loader.extract_chunks(file_path_obj):
416 |             chunks_processed += 1
417 | 
418 |             try:
419 |                 # Add file-specific tags
420 |                 all_tags = tags.copy()
421 |                 all_tags.append(f"source_file:{filename}")
422 |                 all_tags.append(f"file_type:{file_path_obj.suffix.lstrip('.')}")
423 |                 all_tags.append(f"upload_id:{upload_id}")
424 | 
425 |                 if chunk.metadata.get('tags'):
426 |                     # Handle tags from chunk metadata (can be string or list)
427 |                     chunk_tags = chunk.metadata['tags']
428 |                     if isinstance(chunk_tags, str):
429 |                         # Split comma-separated string into list
430 |                         chunk_tags = [tag.strip() for tag in chunk_tags.split(',') if tag.strip()]
431 |                     all_tags.extend(chunk_tags)
432 | 
433 |                 # Add upload_id to metadata
434 |                 chunk_metadata = chunk.metadata.copy() if chunk.metadata else {}
435 |                 chunk_metadata['upload_id'] = upload_id
436 |                 chunk_metadata['source_file'] = filename
437 | 
438 |                 # Create memory object
439 |                 memory = Memory(
440 |                     content=chunk.content,
441 |                     content_hash=generate_content_hash(chunk.content, chunk_metadata),
442 |                     tags=list(set(all_tags)),  # Remove duplicates
443 |                     memory_type=memory_type,
444 |                     metadata=chunk_metadata
445 |                 )
446 | 
447 |                 # Store the memory
448 |                 success, error = await storage.store(memory)
449 |                 if success:
450 |                     chunks_stored += 1
451 |                 else:
452 |                     session.errors.append(f"Chunk {chunk.chunk_index}: {error}")
453 | 
454 |             except Exception as e:
455 |                 session.errors.append(f"Chunk {chunk.chunk_index}: {str(e)}")
456 | 
457 |             # Update progress
458 |             session.chunks_processed = chunks_processed
459 |             session.chunks_stored = chunks_stored
460 |             session.progress = (chunks_processed / max(chunks_processed, 1)) * 100
461 | 
462 |         # Mark as completed
463 |         session.status = "completed"
464 |         session.completed_at = datetime.now()
465 |         session.progress = 100.0
466 | 
467 |         logger.info(f"Document processing completed: {upload_id}, {chunks_stored}/{chunks_processed} chunks")
468 |         return {"chunks_processed": chunks_processed, "chunks_stored": chunks_stored}
469 | 
470 |     except Exception as e:
471 |         logger.error(f"Document processing error: {str(e)}")
472 |         session = upload_sessions.get(upload_id)
473 |         if session:
474 |             session.status = "failed"
475 |             session.errors.append(str(e))
476 |             session.completed_at = datetime.now()
477 |     finally:
478 |         # Clean up temp file (always executed)
479 |         try:
480 |             os.unlink(file_path)
481 |         except Exception as cleanup_error:
482 |             logger.debug(f"Could not delete temp file {file_path}: {cleanup_error}")
483 | 
484 | 
485 | async def process_batch_upload(
486 |     batch_id: str,
487 |     file_info: List[tuple],  # (filename, temp_path, size)
488 |     tags: List[str],
489 |     chunk_size: int,
490 |     chunk_overlap: int,
491 |     memory_type: str
492 | ):
493 |     """Background task to process a batch document upload."""
494 |     try:
495 |         logger.info(f"Starting batch processing: {batch_id}")
496 |         session = upload_sessions[batch_id]
497 |         session.status = "processing"
498 | 
499 |         # Get storage
500 |         storage = await ensure_storage_initialized()
501 | 
502 |         total_files = len(file_info)
503 |         processed_files = 0
504 |         total_chunks_processed = 0
505 |         total_chunks_stored = 0
506 |         all_errors = []
507 | 
508 |         for filename, temp_path, file_size in file_info:
509 |             try:
510 |                 # Get appropriate loader
511 |                 file_path_obj = Path(temp_path)
512 |                 loader = get_loader_for_file(file_path_obj)
513 |                 if loader is None:
514 |                     all_errors.append(f"{filename}: No loader available")
515 |                     processed_files += 1
516 |                     continue
517 | 
518 |                 # Configure loader
519 |                 loader.chunk_size = chunk_size
520 |                 loader.chunk_overlap = chunk_overlap
521 | 
522 |                 file_chunks_processed = 0
523 |                 file_chunks_stored = 0
524 | 
525 |                 # Process chunks from this file
526 |                 async for chunk in loader.extract_chunks(file_path_obj):
527 |                      file_chunks_processed += 1
528 |                      total_chunks_processed += 1
529 | 
530 |                      # Process and store the chunk
531 |                      success, error = await _process_and_store_chunk(
532 |                          chunk,
533 |                          storage,
534 |                          filename,
535 |                          base_tags=tags.copy(),
536 |                          memory_type=memory_type,
537 |                          context_tags={
538 |                              "source_file": filename,
539 |                              "file_type": file_path_obj.suffix.lstrip('.'),
540 |                              "upload_id": batch_id
541 |                          },
542 |                          extra_metadata={
543 |                              "upload_id": batch_id,
544 |                              "source_file": filename
545 |                          }
546 |                      )
547 | 
548 |                      if success:
549 |                          file_chunks_stored += 1
550 |                          total_chunks_stored += 1
551 |                      else:
552 |                          all_errors.append(error)
553 | 
554 |                 processed_files += 1
555 | 
556 |             except Exception as e:
557 |                 all_errors.append(f"{filename}: {str(e)}")
558 |                 processed_files += 1
559 | 
560 |             finally:
561 |                 # Clean up temp file (always executed)
562 |                 try:
563 |                     os.unlink(temp_path)
564 |                 except Exception as cleanup_error:
565 |                     logger.debug(f"Could not delete temp file {temp_path}: {cleanup_error}")
566 | 
567 |         # Finalize batch
568 |         session.status = "completed" if total_chunks_stored > 0 else "failed"
569 |         session.completed_at = datetime.now()
570 |         session.chunks_processed = total_chunks_processed
571 |         session.chunks_stored = total_chunks_stored
572 |         session.progress = 100.0
573 |         session.errors = all_errors
574 | 
575 |         logger.info(f"Batch processing completed: {batch_id}, {total_chunks_stored}/{total_chunks_processed} chunks")
576 | 
577 |     except Exception as e:
578 |         logger.error(f"Batch processing error: {str(e)}")
579 |         session = upload_sessions.get(batch_id)
580 |         if session:
581 |             session.status = "failed"
582 |             session.errors.append(str(e))
583 |             session.completed_at = datetime.now()
584 |             # Note: send_progress_update removed - progress tracking via polling instead
585 | 
586 | # Clean up old completed sessions periodically
587 | @router.on_event("startup")  # TODO: Migrate to lifespan context manager in app.py (FastAPI 0.109+)
588 | async def cleanup_old_sessions():
589 |     """Clean up old completed upload sessions."""
590 |     async def cleanup():
591 |         while True:
592 |             await asyncio.sleep(3600)  # Clean up every hour
593 |             current_time = datetime.now()
594 |             to_remove = []
595 | 
596 |             for upload_id, session in upload_sessions.items():
597 |                 if session.status in ["completed", "failed"]:
598 |                     # Keep sessions for 24 hours after completion
599 |                     if session.completed_at and (current_time - session.completed_at).total_seconds() > 86400:
600 |                         to_remove.append(upload_id)
601 | 
602 |             for upload_id in to_remove:
603 |                 del upload_sessions[upload_id]
604 |                 logger.debug(f"Cleaned up old upload session: {upload_id}")
605 | 
606 |     asyncio.create_task(cleanup())
607 | 
608 | @router.delete("/remove/{upload_id}")
609 | async def remove_document(upload_id: str, remove_from_memory: bool = True):
610 |     """
611 |     Remove a document and optionally its memories.
612 | 
613 |     Args:
614 |         upload_id: The upload session ID
615 |         remove_from_memory: Whether to delete associated memories (default: True)
616 | 
617 |     Returns:
618 |         Removal status with count of memories deleted
619 |     """
620 |     logger.info(f"Remove document request for upload_id: {upload_id}, remove_from_memory: {remove_from_memory}")
621 | 
622 |     # Get session info if available (may not exist after server restart)
623 |     session = upload_sessions.get(upload_id)
624 |     filename = session.filename if session else "Unknown file"
625 |     memories_deleted = 0
626 | 
627 |     try:
628 |         if remove_from_memory:
629 |             # Get storage
630 |             storage = get_storage()
631 | 
632 |             # Search by tag pattern: upload_id:{upload_id}
633 |             upload_tag = f"upload_id:{upload_id}"
634 |             logger.info(f"Searching for memories with tag: {upload_tag}")
635 | 
636 |             try:
637 |                 # Delete all memories with this upload_id tag
638 |                 count, _ = await storage.delete_by_tags([upload_tag])
639 |                 memories_deleted = count
640 |                 logger.info(f"Deleted {memories_deleted} memories with tag {upload_tag}")
641 | 
642 |                 # If we deleted memories but don't have session info, try to get filename from first memory
643 |                 if memories_deleted > 0 and not session:
644 |                     # Try to get source_file from metadata by checking remaining memories
645 |                     # (we already deleted them, so we'll use a generic message)
646 |                     filename = f"Document (upload_id: {upload_id[:8]}...)"
647 | 
648 |             except Exception as e:
649 |                 logger.warning(f"Could not delete memories by tag: {e}")
650 |                 # If deletion fails and we don't know about this upload, return 404
651 |                 if not session:
652 |                     raise HTTPException(
653 |                         status_code=404,
654 |                         detail=f"Upload ID not found and no memories with tag '{upload_tag}'"
655 |                     )
656 |                 memories_deleted = 0
657 | 
658 |         # Remove upload session if it exists
659 |         if session:
660 |             del upload_sessions[upload_id]
661 | 
662 |         return {
663 |             "status": "success",
664 |             "upload_id": upload_id,
665 |             "filename": filename,
666 |             "memories_deleted": memories_deleted,
667 |             "message": f"Document '{filename}' removed successfully"
668 |         }
669 | 
670 |     except HTTPException:
671 |         raise
672 |     except Exception as e:
673 |         logger.error(f"Error removing document: {str(e)}")
674 |         import traceback
675 |         logger.error(f"Traceback: {traceback.format_exc()}")
676 |         raise HTTPException(status_code=500, detail=f"Failed to remove document: {str(e)}")
677 | 
678 | @router.delete("/remove-by-tags")
679 | async def remove_documents_by_tags(tags: List[str]):
680 |     """
681 |     Remove documents by their tags.
682 | 
683 |     Args:
684 |         tags: List of tags to search for
685 | 
686 |     Returns:
687 |         Removal status with affected upload IDs and memory counts
688 |     """
689 |     logger.info(f"Remove documents by tags request: {tags}")
690 | 
691 |     try:
692 |         # Get storage
693 |         storage = get_storage()
694 | 
695 |         # Delete memories by tags
696 |         result = await storage.delete_by_tags(tags)
697 |         memories_deleted = result.get('deleted_count', 0) if isinstance(result, dict) else 0
698 | 
699 |         # Find and remove affected upload sessions
700 |         affected_sessions = []
701 |         to_remove = []
702 | 
703 |         for upload_id, session in upload_sessions.items():
704 |             # Check if any of the document's tags match
705 |             # This requires storing tags in the session object
706 |             # For now, just track all sessions (placeholder)
707 |             pass
708 | 
709 |         return {
710 |             "status": "success",
711 |             "tags": tags,
712 |             "memories_deleted": memories_deleted,
713 |             "affected_uploads": affected_sessions,
714 |             "message": f"Deleted {memories_deleted} memories matching tags"
715 |         }
716 | 
717 |     except Exception as e:
718 |         logger.error(f"Error removing documents by tags: {str(e)}")
719 |         raise HTTPException(status_code=500, detail=f"Failed to remove documents: {str(e)}")
720 | 
721 | @router.get("/search-content/{upload_id}")
722 | async def search_document_content(upload_id: str, limit: int = 1000):
723 |     """
724 |     Search for all memories associated with an upload.
725 | 
726 |     Args:
727 |         upload_id: The upload session ID
728 |         limit: Maximum number of results to return (default: 1000)
729 | 
730 |     Returns:
731 |         List of memories with their content and metadata
732 |     """
733 |     logger.info(f"Search document content for upload_id: {upload_id}, limit: {limit}")
734 | 
735 |     # Get session info if available (may not exist after server restart)
736 |     session = upload_sessions.get(upload_id)
737 | 
738 |     # If no session, we'll still try to find memories by upload_id tag
739 |     if not session:
740 |         logger.info(f"No upload session found for {upload_id}, searching by tag only")
741 | 
742 |     try:
743 |         # Get storage
744 |         storage = get_storage()
745 | 
746 |         # Search for memories with upload_id tag
747 |         upload_tag = f"upload_id:{upload_id}"
748 |         logger.info(f"Searching for memories with tag: {upload_tag}")
749 | 
750 |         # Use tag search (search_by_tags doesn't support limit parameter)
751 |         all_memories = await storage.search_by_tags([upload_tag])
752 | 
753 |         # If no memories found and no session, this upload_id doesn't exist
754 |         if not all_memories and not session:
755 |             raise HTTPException(status_code=404, detail=f"No memories found for upload_id: {upload_id}")
756 | 
757 |         # Apply limit after retrieval
758 |         memories = all_memories[:limit] if limit and limit > 0 else all_memories
759 | 
760 |         # Format results
761 |         results = []
762 |         for memory in memories:
763 |             # Handle created_at (stored as float timestamp)
764 |             created_at_str = None
765 |             if memory.created_at:
766 |                 if isinstance(memory.created_at, float):
767 |                     created_at_str = datetime.fromtimestamp(memory.created_at).isoformat()
768 |                 elif hasattr(memory.created_at, 'isoformat'):
769 |                     created_at_str = memory.created_at.isoformat()
770 | 
771 |             results.append({
772 |                 "content_hash": memory.content_hash,
773 |                 "content": memory.content,
774 |                 "tags": memory.tags,
775 |                 "metadata": memory.metadata,
776 |                 "created_at": created_at_str,
777 |                 "chunk_index": memory.metadata.get('chunk_index', 0) if memory.metadata else 0,
778 |                 "page": memory.metadata.get('page', None) if memory.metadata else None
779 |             })
780 | 
781 |         # Sort by chunk index
782 |         results.sort(key=lambda x: x.get('chunk_index', 0))
783 | 
784 |         # Get filename from session or from first memory's metadata
785 |         filename = session.filename if session else None
786 |         if not filename and results:
787 |             # Try to get from first memory's metadata
788 |             first_memory_metadata = results[0].get('metadata', {})
789 |             filename = first_memory_metadata.get('source_file', f"Document (upload_id: {upload_id[:8]}...)")
790 | 
791 |         return {
792 |             "status": "success",
793 |             "upload_id": upload_id,
794 |             "filename": filename or "Unknown Document",
795 |             "total_found": len(results),
796 |             "memories": results
797 |         }
798 | 
799 |     except Exception as e:
800 |         logger.error(f"Error searching document content: {str(e)}")
801 |         # Get filename from session if available
802 |         filename = session.filename if session else f"Document (upload_id: {upload_id[:8]}...)"
803 |         # Return empty results instead of error to avoid breaking UI
804 |         return {
805 |             "status": "partial",
806 |             "upload_id": upload_id,
807 |             "filename": filename,
808 |             "total_found": 0,
809 |             "memories": [],
810 |             "error": str(e)
811 |         }
812 | 
```

--------------------------------------------------------------------------------
/tests/test_sqlite_vec_storage.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Comprehensive tests for SQLite-vec storage backend.
  3 | """
  4 | 
  5 | import pytest
  6 | import pytest_asyncio
  7 | import asyncio
  8 | import tempfile
  9 | import os
 10 | import shutil
 11 | import json
 12 | from unittest.mock import Mock, patch
 13 | import time
 14 | 
 15 | # Skip tests if sqlite-vec is not available
 16 | try:
 17 |     import sqlite_vec
 18 |     SQLITE_VEC_AVAILABLE = True
 19 | except ImportError:
 20 |     SQLITE_VEC_AVAILABLE = False
 21 | 
 22 | from src.mcp_memory_service.models.memory import Memory, MemoryQueryResult
 23 | from src.mcp_memory_service.utils.hashing import generate_content_hash
 24 | 
 25 | if SQLITE_VEC_AVAILABLE:
 26 |     from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
 27 | 
 28 | # Skip all tests if sqlite-vec is not available
 29 | pytestmark = pytest.mark.skipif(not SQLITE_VEC_AVAILABLE, reason="sqlite-vec not available")
 30 | 
 31 | 
 32 | class TestSqliteVecStorage:
 33 |     """Test suite for SQLite-vec storage functionality."""
 34 |     
 35 |     @pytest_asyncio.fixture
 36 |     async def storage(self):
 37 |         """Create a test storage instance."""
 38 |         temp_dir = tempfile.mkdtemp()
 39 |         db_path = os.path.join(temp_dir, "test_memory.db")
 40 |         
 41 |         storage = SqliteVecMemoryStorage(db_path)
 42 |         await storage.initialize()
 43 |         
 44 |         yield storage
 45 |         
 46 |         # Cleanup
 47 |         if storage.conn:
 48 |             storage.conn.close()
 49 |         shutil.rmtree(temp_dir, ignore_errors=True)
 50 |     
 51 |     @pytest.fixture
 52 |     def sample_memory(self):
 53 |         """Create a sample memory for testing."""
 54 |         content = "This is a test memory for SQLite-vec storage"
 55 |         return Memory(
 56 |             content=content,
 57 |             content_hash=generate_content_hash(content),
 58 |             tags=["test", "sqlite-vec"],
 59 |             memory_type="note",
 60 |             metadata={"priority": "medium", "category": "testing"}
 61 |         )
 62 |     
 63 |     @pytest.mark.asyncio
 64 |     async def test_initialization(self):
 65 |         """Test storage initialization."""
 66 |         temp_dir = tempfile.mkdtemp()
 67 |         db_path = os.path.join(temp_dir, "test_init.db")
 68 |         
 69 |         try:
 70 |             storage = SqliteVecMemoryStorage(db_path)
 71 |             await storage.initialize()
 72 |             
 73 |             # Check that database file was created
 74 |             assert os.path.exists(db_path)
 75 |             
 76 |             # Check that connection is established
 77 |             assert storage.conn is not None
 78 |             
 79 |             # Check that table was created
 80 |             cursor = storage.conn.execute('''
 81 |                 SELECT name FROM sqlite_master 
 82 |                 WHERE type='table' AND name='memories'
 83 |             ''')
 84 |             assert cursor.fetchone() is not None
 85 |             
 86 |             storage.close()
 87 |             
 88 |         finally:
 89 |             shutil.rmtree(temp_dir, ignore_errors=True)
 90 |     
 91 |     @pytest.mark.asyncio
 92 |     async def test_store_memory(self, storage, sample_memory):
 93 |         """Test storing a memory."""
 94 |         success, message = await storage.store(sample_memory)
 95 |         
 96 |         assert success
 97 |         assert "successfully" in message.lower()
 98 |         
 99 |         # Verify memory was stored
100 |         cursor = storage.conn.execute(
101 |             'SELECT content_hash FROM memories WHERE content_hash = ?',
102 |             (sample_memory.content_hash,)
103 |         )
104 |         result = cursor.fetchone()
105 |         assert result is not None
106 |         assert result[0] == sample_memory.content_hash
107 |     
108 |     @pytest.mark.asyncio
109 |     async def test_store_duplicate_memory(self, storage, sample_memory):
110 |         """Test that duplicate memories are rejected."""
111 |         # Store the memory first time
112 |         success, message = await storage.store(sample_memory)
113 |         assert success
114 |         
115 |         # Try to store the same memory again
116 |         success, message = await storage.store(sample_memory)
117 |         assert not success
118 |         assert "duplicate" in message.lower()
119 |     
120 |     @pytest.mark.asyncio
121 |     async def test_retrieve_memory(self, storage, sample_memory):
122 |         """Test retrieving memories using semantic search."""
123 |         # Store the memory
124 |         await storage.store(sample_memory)
125 |         
126 |         # Retrieve using semantic search
127 |         results = await storage.retrieve("test memory sqlite", n_results=5)
128 |         
129 |         assert len(results) > 0
130 |         assert isinstance(results[0], MemoryQueryResult)
131 |         assert results[0].memory.content_hash == sample_memory.content_hash
132 |         assert results[0].relevance_score >= 0.0
133 |         assert results[0].debug_info["backend"] == "sqlite-vec"
134 |     
135 |     @pytest.mark.asyncio
136 |     async def test_retrieve_no_results(self, storage):
137 |         """Test retrieving when no memories match."""
138 |         results = await storage.retrieve("nonexistent query", n_results=5)
139 |         assert len(results) == 0
140 |     
141 |     @pytest.mark.asyncio
142 |     async def test_search_by_tag(self, storage, sample_memory):
143 |         """Test searching memories by tags."""
144 |         # Store the memory
145 |         await storage.store(sample_memory)
146 |         
147 |         # Search by existing tag
148 |         results = await storage.search_by_tag(["test"])
149 |         assert len(results) == 1
150 |         assert results[0].content_hash == sample_memory.content_hash
151 |         
152 |         # Search by non-existent tag
153 |         results = await storage.search_by_tag(["nonexistent"])
154 |         assert len(results) == 0
155 |         
156 |         # Search by multiple tags
157 |         results = await storage.search_by_tag(["test", "sqlite-vec"])
158 |         assert len(results) == 1
159 |     
160 |     @pytest.mark.asyncio
161 |     async def test_search_by_empty_tags(self, storage):
162 |         """Test searching with empty tags list."""
163 |         results = await storage.search_by_tag([])
164 |         assert len(results) == 0
165 |     
166 |     @pytest.mark.asyncio
167 |     async def test_delete_memory(self, storage, sample_memory):
168 |         """Test deleting a memory by content hash."""
169 |         # Store the memory
170 |         await storage.store(sample_memory)
171 |         
172 |         # Delete the memory
173 |         success, message = await storage.delete(sample_memory.content_hash)
174 |         assert success
175 |         assert sample_memory.content_hash in message
176 |         
177 |         # Verify memory was deleted
178 |         cursor = storage.conn.execute(
179 |             'SELECT content_hash FROM memories WHERE content_hash = ?',
180 |             (sample_memory.content_hash,)
181 |         )
182 |         assert cursor.fetchone() is None
183 |     
184 |     @pytest.mark.asyncio
185 |     async def test_delete_nonexistent_memory(self, storage):
186 |         """Test deleting a non-existent memory."""
187 |         nonexistent_hash = "nonexistent123456789"
188 |         success, message = await storage.delete(nonexistent_hash)
189 |         assert not success
190 |         assert "not found" in message.lower()
191 |     
192 |     @pytest.mark.asyncio
193 |     async def test_delete_by_tag(self, storage):
194 |         """Test deleting memories by tag."""
195 |         # Store multiple memories with different tags
196 |         memory1 = Memory(
197 |             content="Memory 1",
198 |             content_hash=generate_content_hash("Memory 1"),
199 |             tags=["tag1", "shared"]
200 |         )
201 |         memory2 = Memory(
202 |             content="Memory 2", 
203 |             content_hash=generate_content_hash("Memory 2"),
204 |             tags=["tag2", "shared"]
205 |         )
206 |         memory3 = Memory(
207 |             content="Memory 3",
208 |             content_hash=generate_content_hash("Memory 3"),
209 |             tags=["tag3"]
210 |         )
211 |         
212 |         await storage.store(memory1)
213 |         await storage.store(memory2)
214 |         await storage.store(memory3)
215 |         
216 |         # Delete by shared tag
217 |         count, message = await storage.delete_by_tag("shared")
218 |         assert count == 2
219 |         assert "deleted 2 memories" in message.lower()
220 |         
221 |         # Verify correct memories were deleted
222 |         remaining = await storage.search_by_tag(["tag3"])
223 |         assert len(remaining) == 1
224 |         assert remaining[0].content_hash == memory3.content_hash
225 |     
226 |     @pytest.mark.asyncio
227 |     async def test_delete_by_nonexistent_tag(self, storage):
228 |         """Test deleting by a non-existent tag."""
229 |         count, message = await storage.delete_by_tag("nonexistent")
230 |         assert count == 0
231 |         assert "no memories found" in message.lower()
232 |     
233 |     @pytest.mark.asyncio
234 |     async def test_cleanup_duplicates(self, storage):
235 |         """Test cleaning up duplicate memories."""
236 |         # Create memory
237 |         content = "Duplicate test memory"
238 |         memory = Memory(
239 |             content=content,
240 |             content_hash=generate_content_hash(content),
241 |             tags=["duplicate"]
242 |         )
243 |         
244 |         # Store the memory
245 |         await storage.store(memory)
246 |         
247 |         # Manually insert a duplicate (bypassing duplicate check)
248 |         embedding = storage._generate_embedding(content)
249 |         storage.conn.execute('''
250 |             INSERT INTO memories (
251 |                 content_embedding, content_hash, content, tags, memory_type,
252 |                 metadata, created_at, updated_at, created_at_iso, updated_at_iso
253 |             ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
254 |         ''', (
255 |             sqlite_vec.serialize_float32(embedding),
256 |             memory.content_hash,
257 |             content,
258 |             "duplicate",
259 |             None,
260 |             "{}",
261 |             time.time(),
262 |             time.time(),
263 |             "2024-01-01T00:00:00Z",
264 |             "2024-01-01T00:00:00Z"
265 |         ))
266 |         storage.conn.commit()
267 |         
268 |         # Clean up duplicates
269 |         count, message = await storage.cleanup_duplicates()
270 |         assert count == 1
271 |         assert "removed 1 duplicate" in message.lower()
272 |         
273 |         # Verify only one copy remains
274 |         cursor = storage.conn.execute(
275 |             'SELECT COUNT(*) FROM memories WHERE content_hash = ?',
276 |             (memory.content_hash,)
277 |         )
278 |         assert cursor.fetchone()[0] == 1
279 |     
280 |     @pytest.mark.asyncio
281 |     async def test_cleanup_no_duplicates(self, storage, sample_memory):
282 |         """Test cleanup when no duplicates exist."""
283 |         await storage.store(sample_memory)
284 |         
285 |         count, message = await storage.cleanup_duplicates()
286 |         assert count == 0
287 |         assert "no duplicate memories found" in message.lower()
288 |     
289 |     @pytest.mark.asyncio
290 |     async def test_update_memory_metadata(self, storage, sample_memory):
291 |         """Test updating memory metadata."""
292 |         # Store the memory
293 |         await storage.store(sample_memory)
294 |         
295 |         # Update metadata
296 |         updates = {
297 |             "tags": ["updated", "test"],
298 |             "memory_type": "reminder", 
299 |             "metadata": {"priority": "high", "due_date": "2024-01-15"},
300 |             "status": "active"
301 |         }
302 |         
303 |         success, message = await storage.update_memory_metadata(
304 |             content_hash=sample_memory.content_hash,
305 |             updates=updates
306 |         )
307 |         
308 |         assert success
309 |         assert "updated fields" in message.lower()
310 |         
311 |         # Verify updates
312 |         cursor = storage.conn.execute('''
313 |             SELECT tags, memory_type, metadata
314 |             FROM memories WHERE content_hash = ?
315 |         ''', (sample_memory.content_hash,))
316 |         
317 |         row = cursor.fetchone()
318 |         assert row is not None
319 |         
320 |         tags_str, memory_type, metadata_str = row
321 |         metadata = json.loads(metadata_str)
322 |         
323 |         assert tags_str == "updated,test"
324 |         assert memory_type == "reminder"
325 |         assert metadata["priority"] == "high"
326 |         assert metadata["due_date"] == "2024-01-15"
327 |         assert metadata["status"] == "active"
328 |     
329 |     @pytest.mark.asyncio
330 |     async def test_update_nonexistent_memory(self, storage):
331 |         """Test updating metadata for non-existent memory."""
332 |         nonexistent_hash = "nonexistent123456789"
333 |         success, message = await storage.update_memory_metadata(
334 |             content_hash=nonexistent_hash,
335 |             updates={"tags": ["test"]}
336 |         )
337 |         
338 |         assert not success
339 |         assert "not found" in message.lower()
340 |     
341 |     @pytest.mark.asyncio
342 |     async def test_update_memory_with_invalid_tags(self, storage, sample_memory):
343 |         """Test updating memory with invalid tags format."""
344 |         await storage.store(sample_memory)
345 |         
346 |         success, message = await storage.update_memory_metadata(
347 |             content_hash=sample_memory.content_hash,
348 |             updates={"tags": "not_a_list"}
349 |         )
350 |         
351 |         assert not success
352 |         assert "list of strings" in message.lower()
353 |     
354 |     @pytest.mark.asyncio
355 |     async def test_update_memory_with_invalid_metadata(self, storage, sample_memory):
356 |         """Test updating memory with invalid metadata format."""
357 |         await storage.store(sample_memory)
358 |         
359 |         success, message = await storage.update_memory_metadata(
360 |             content_hash=sample_memory.content_hash,
361 |             updates={"metadata": "not_a_dict"}
362 |         )
363 |         
364 |         assert not success
365 |         assert "dictionary" in message.lower()
366 |     
367 |     @pytest.mark.asyncio
368 |     async def test_update_memory_preserve_timestamps(self, storage, sample_memory):
369 |         """Test updating memory while preserving timestamps."""
370 |         await storage.store(sample_memory)
371 |         
372 |         # Get original timestamps
373 |         cursor = storage.conn.execute('''
374 |             SELECT created_at, created_at_iso FROM memories WHERE content_hash = ?
375 |         ''', (sample_memory.content_hash,))
376 |         original_created_at, original_created_at_iso = cursor.fetchone()
377 |         
378 |         # Wait a moment
379 |         await asyncio.sleep(0.1)
380 |         
381 |         # Update with preserve_timestamps=True
382 |         success, message = await storage.update_memory_metadata(
383 |             content_hash=sample_memory.content_hash,
384 |             updates={"tags": ["updated"]},
385 |             preserve_timestamps=True
386 |         )
387 |         
388 |         assert success
389 |         
390 |         # Check timestamps
391 |         cursor = storage.conn.execute('''
392 |             SELECT created_at, created_at_iso, updated_at FROM memories WHERE content_hash = ?
393 |         ''', (sample_memory.content_hash,))
394 |         created_at, created_at_iso, updated_at = cursor.fetchone()
395 |         
396 |         # created_at should be preserved
397 |         assert abs(created_at - original_created_at) < 0.01
398 |         assert created_at_iso == original_created_at_iso
399 |         
400 |         # updated_at should be newer
401 |         assert updated_at > original_created_at
402 |     
403 |     @pytest.mark.asyncio
404 |     async def test_update_memory_reset_timestamps(self, storage, sample_memory):
405 |         """Test updating memory with timestamp reset."""
406 |         await storage.store(sample_memory)
407 |         
408 |         # Get original timestamps
409 |         cursor = storage.conn.execute('''
410 |             SELECT created_at FROM memories WHERE content_hash = ?
411 |         ''', (sample_memory.content_hash,))
412 |         original_created_at = cursor.fetchone()[0]
413 |         
414 |         # Wait a moment
415 |         await asyncio.sleep(0.1)
416 |         
417 |         # Update with preserve_timestamps=False
418 |         success, message = await storage.update_memory_metadata(
419 |             content_hash=sample_memory.content_hash,
420 |             updates={"tags": ["updated"]},
421 |             preserve_timestamps=False
422 |         )
423 |         
424 |         assert success
425 |         
426 |         # Check timestamps
427 |         cursor = storage.conn.execute('''
428 |             SELECT created_at FROM memories WHERE content_hash = ?
429 |         ''', (sample_memory.content_hash,))
430 |         created_at = cursor.fetchone()[0]
431 |         
432 |         # created_at should be updated (newer)
433 |         assert created_at > original_created_at
434 |     
435 |     def test_get_stats(self, storage):
436 |         """Test getting storage statistics."""
437 |         stats = storage.get_stats()
438 |         
439 |         assert isinstance(stats, dict)
440 |         assert stats["backend"] == "sqlite-vec"
441 |         assert "total_memories" in stats
442 |         assert "database_size_bytes" in stats
443 |         assert "embedding_model" in stats
444 |         assert "embedding_dimension" in stats
445 |     
446 |     @pytest.mark.asyncio
447 |     async def test_get_stats_with_data(self, storage, sample_memory):
448 |         """Test getting statistics with data."""
449 |         await storage.store(sample_memory)
450 |         
451 |         stats = storage.get_stats()
452 |         
453 |         assert stats["total_memories"] >= 1
454 |         assert stats["database_size_bytes"] > 0
455 |         assert stats["embedding_dimension"] == storage.embedding_dimension
456 |     
457 |     def test_close_connection(self, storage):
458 |         """Test closing the database connection."""
459 |         assert storage.conn is not None
460 |         
461 |         storage.close()
462 |         
463 |         assert storage.conn is None
464 |     
465 |     @pytest.mark.asyncio
466 |     async def test_multiple_memories_retrieval(self, storage):
467 |         """Test retrieving multiple memories with ranking."""
468 |         # Store multiple memories
469 |         memories = []
470 |         for i in range(5):
471 |             content = f"Test memory {i} with different content and keywords"
472 |             memory = Memory(
473 |                 content=content,
474 |                 content_hash=generate_content_hash(content),
475 |                 tags=[f"tag{i}"],
476 |                 memory_type="note"
477 |             )
478 |             memories.append(memory)
479 |             await storage.store(memory)
480 |         
481 |         # Retrieve memories
482 |         results = await storage.retrieve("test memory content", n_results=3)
483 |         
484 |         assert len(results) <= 3
485 |         assert len(results) > 0
486 |         
487 |         # Check that results are properly ranked (higher relevance first)
488 |         for i in range(len(results) - 1):
489 |             assert results[i].relevance_score >= results[i + 1].relevance_score
490 |     
491 |     @pytest.mark.asyncio
492 |     async def test_embedding_generation(self, storage):
493 |         """Test embedding generation functionality."""
494 |         test_text = "This is a test for embedding generation"
495 |         
496 |         embedding = storage._generate_embedding(test_text)
497 |         
498 |         assert isinstance(embedding, list)
499 |         assert len(embedding) == storage.embedding_dimension
500 |         assert all(isinstance(x, float) for x in embedding)
501 |     
502 |     @pytest.mark.asyncio
503 |     async def test_memory_with_complex_metadata(self, storage):
504 |         """Test storing and retrieving memory with complex metadata."""
505 |         content = "Memory with complex metadata"
506 |         memory = Memory(
507 |             content=content,
508 |             content_hash=generate_content_hash(content),
509 |             tags=["complex", "metadata", "test"],
510 |             memory_type="structured",
511 |             metadata={
512 |                 "nested": {"level1": {"level2": "value"}},
513 |                 "array": [1, 2, 3, "four"],
514 |                 "boolean": True,
515 |                 "null_value": None,
516 |                 "unicode": "测试中文 🚀"
517 |             }
518 |         )
519 |         
520 |         # Store the memory
521 |         success, message = await storage.store(memory)
522 |         assert success
523 |         
524 |         # Retrieve and verify
525 |         results = await storage.retrieve("complex metadata", n_results=1)
526 |         assert len(results) == 1
527 |         
528 |         retrieved_memory = results[0].memory
529 |         assert retrieved_memory.metadata["nested"]["level1"]["level2"] == "value"
530 |         assert retrieved_memory.metadata["array"] == [1, 2, 3, "four"]
531 |         assert retrieved_memory.metadata["boolean"] is True
532 |         assert retrieved_memory.metadata["unicode"] == "测试中文 🚀"
533 |     
534 |     @pytest.mark.asyncio
535 |     async def test_concurrent_operations(self, storage):
536 |         """Test concurrent storage operations."""
537 |         # Create multiple memories
538 |         memories = []
539 |         for i in range(10):
540 |             content = f"Concurrent test memory {i}"
541 |             memory = Memory(
542 |                 content=content,
543 |                 content_hash=generate_content_hash(content),
544 |                 tags=[f"concurrent{i}"]
545 |             )
546 |             memories.append(memory)
547 |         
548 |         # Store memories concurrently
549 |         tasks = [storage.store(memory) for memory in memories]
550 |         results = await asyncio.gather(*tasks)
551 |         
552 |         # All should succeed
553 |         assert all(success for success, _ in results)
554 |         
555 |         # Verify all were stored
556 |         for memory in memories:
557 |             cursor = storage.conn.execute(
558 |                 'SELECT content_hash FROM memories WHERE content_hash = ?',
559 |                 (memory.content_hash,)
560 |             )
561 |             assert cursor.fetchone() is not None
562 | 
563 |     @pytest.mark.asyncio
564 |     async def test_get_memories_by_time_range_basic(self, storage):
565 |         """Test basic time range filtering."""
566 |         # Store memories at different times
567 |         now = time.time()
568 | 
569 |         # Memory from 1 hour ago
570 |         memory1 = Memory(
571 |             content="Memory from 1 hour ago",
572 |             content_hash=generate_content_hash("Memory from 1 hour ago"),
573 |             tags=["timerange"],
574 |             created_at=now - 3600
575 |         )
576 | 
577 |         # Memory from 30 minutes ago
578 |         memory2 = Memory(
579 |             content="Memory from 30 minutes ago",
580 |             content_hash=generate_content_hash("Memory from 30 minutes ago"),
581 |             tags=["timerange"],
582 |             created_at=now - 1800
583 |         )
584 | 
585 |         # Memory from now
586 |         memory3 = Memory(
587 |             content="Memory from now",
588 |             content_hash=generate_content_hash("Memory from now"),
589 |             tags=["timerange"],
590 |             created_at=now
591 |         )
592 | 
593 |         await storage.store(memory1)
594 |         await storage.store(memory2)
595 |         await storage.store(memory3)
596 | 
597 |         # Get memories from last 45 minutes (should get memory2 and memory3)
598 |         results = await storage.get_memories_by_time_range(now - 2700, now + 100)
599 |         assert len(results) == 2
600 |         contents = [m.content for m in results]
601 |         assert "Memory from 30 minutes ago" in contents
602 |         assert "Memory from now" in contents
603 |         assert "Memory from 1 hour ago" not in contents
604 | 
605 |     @pytest.mark.asyncio
606 |     async def test_get_memories_by_time_range_empty(self, storage):
607 |         """Test time range with no matching memories."""
608 |         # Store one memory now
609 |         memory = Memory(
610 |             content="Current memory",
611 |             content_hash=generate_content_hash("Current memory"),
612 |             tags=["test"]
613 |         )
614 |         await storage.store(memory)
615 | 
616 |         # Query for memories from far in the past
617 |         now = time.time()
618 |         results = await storage.get_memories_by_time_range(now - 86400, now - 7200)
619 |         assert len(results) == 0
620 | 
621 |     @pytest.mark.asyncio
622 |     async def test_get_memories_by_time_range_boundaries(self, storage):
623 |         """Test inclusive boundaries of time range."""
624 |         now = time.time()
625 | 
626 |         # Memory exactly at start boundary
627 |         memory_start = Memory(
628 |             content="At start boundary",
629 |             content_hash=generate_content_hash("At start boundary"),
630 |             tags=["boundary"],
631 |             created_at=now - 1000
632 |         )
633 | 
634 |         # Memory exactly at end boundary
635 |         memory_end = Memory(
636 |             content="At end boundary",
637 |             content_hash=generate_content_hash("At end boundary"),
638 |             tags=["boundary"],
639 |             created_at=now
640 |         )
641 | 
642 |         # Memory just before start
643 |         memory_before = Memory(
644 |             content="Before start",
645 |             content_hash=generate_content_hash("Before start"),
646 |             tags=["boundary"],
647 |             created_at=now - 1001
648 |         )
649 | 
650 |         # Memory just after end
651 |         memory_after = Memory(
652 |             content="After end",
653 |             content_hash=generate_content_hash("After end"),
654 |             tags=["boundary"],
655 |             created_at=now + 1
656 |         )
657 | 
658 |         await storage.store(memory_start)
659 |         await storage.store(memory_end)
660 |         await storage.store(memory_before)
661 |         await storage.store(memory_after)
662 | 
663 |         # Query with inclusive boundaries
664 |         results = await storage.get_memories_by_time_range(now - 1000, now)
665 |         assert len(results) == 2
666 |         contents = [m.content for m in results]
667 |         assert "At start boundary" in contents
668 |         assert "At end boundary" in contents
669 |         assert "Before start" not in contents
670 |         assert "After end" not in contents
671 | 
672 |     @pytest.mark.asyncio
673 |     async def test_get_memories_by_time_range_ordering(self, storage):
674 |         """Test that results are ordered by created_at DESC."""
675 |         now = time.time()
676 | 
677 |         # Store three memories in random order
678 |         memory1 = Memory(
679 |             content="First",
680 |             content_hash=generate_content_hash("First"),
681 |             tags=["order"],
682 |             created_at=now - 300
683 |         )
684 |         memory2 = Memory(
685 |             content="Second",
686 |             content_hash=generate_content_hash("Second"),
687 |             tags=["order"],
688 |             created_at=now - 200
689 |         )
690 |         memory3 = Memory(
691 |             content="Third",
692 |             content_hash=generate_content_hash("Third"),
693 |             tags=["order"],
694 |             created_at=now - 100
695 |         )
696 | 
697 |         await storage.store(memory3)  # Store in non-chronological order
698 |         await storage.store(memory1)
699 |         await storage.store(memory2)
700 | 
701 |         # Get all three
702 |         results = await storage.get_memories_by_time_range(now - 400, now)
703 |         assert len(results) == 3
704 | 
705 |         # Should be ordered newest first (DESC)
706 |         assert results[0].content == "Third"
707 |         assert results[1].content == "Second"
708 |         assert results[2].content == "First"
709 | 
710 | 
711 | class TestSqliteVecStorageWithoutEmbeddings:
712 |     """Test SQLite-vec storage when sentence transformers is not available."""
713 |     
714 |     @pytest.mark.asyncio
715 |     async def test_initialization_without_embeddings(self):
716 |         """Test that storage can initialize without sentence transformers."""
717 |         temp_dir = tempfile.mkdtemp()
718 |         db_path = os.path.join(temp_dir, "test_no_embeddings.db")
719 |         
720 |         try:
721 |             with patch('src.mcp_memory_service.storage.sqlite_vec.SENTENCE_TRANSFORMERS_AVAILABLE', False):
722 |                 storage = SqliteVecMemoryStorage(db_path)
723 |                 await storage.initialize()
724 |                 
725 |                 assert storage.conn is not None
726 |                 assert storage.embedding_model is None
727 |                 
728 |                 storage.close()
729 |                 
730 |         finally:
731 |             shutil.rmtree(temp_dir, ignore_errors=True)
732 |     
733 |     @pytest.mark.asyncio
734 |     async def test_operations_without_embeddings(self):
735 |         """Test basic operations without embeddings."""
736 |         temp_dir = tempfile.mkdtemp()
737 |         db_path = os.path.join(temp_dir, "test_no_embeddings.db")
738 |         
739 |         try:
740 |             with patch('src.mcp_memory_service.storage.sqlite_vec.SENTENCE_TRANSFORMERS_AVAILABLE', False):
741 |                 storage = SqliteVecMemoryStorage(db_path)
742 |                 await storage.initialize()
743 |                 
744 |                 # Store should work (with zero embeddings)
745 |                 content = "Test without embeddings"
746 |                 memory = Memory(
747 |                     content=content,
748 |                     content_hash=generate_content_hash(content),
749 |                     tags=["no-embeddings"]
750 |                 )
751 |                 
752 |                 success, message = await storage.store(memory)
753 |                 assert success
754 |                 
755 |                 # Tag search should work
756 |                 results = await storage.search_by_tag(["no-embeddings"])
757 |                 assert len(results) == 1
758 |                 
759 |                 # Semantic search won't work well but shouldn't crash
760 |                 results = await storage.retrieve("test", n_results=1)
761 |                 # May or may not return results, but shouldn't crash
762 |                 
763 |                 storage.close()
764 |                 
765 |         finally:
766 |             shutil.rmtree(temp_dir, ignore_errors=True)
767 | 
768 | 
769 | if __name__ == "__main__":
770 |     # Run basic tests when executed directly
771 |     async def run_basic_tests():
772 |         """Run basic tests to verify functionality."""
773 |         if not SQLITE_VEC_AVAILABLE:
774 |             print("⚠️  sqlite-vec not available, skipping tests")
775 |             return
776 |         
777 |         print("Running basic SQLite-vec storage tests...")
778 |         
779 |         temp_dir = tempfile.mkdtemp()
780 |         db_path = os.path.join(temp_dir, "test_basic.db")
781 |         
782 |         try:
783 |             # Test basic functionality
784 |             storage = SqliteVecMemoryStorage(db_path)
785 |             await storage.initialize()
786 |             
787 |             # Store a memory
788 |             content = "Test memory for basic validation"
789 |             memory = Memory(
790 |                 content=content,
791 |                 content_hash=generate_content_hash(content),
792 |                 tags=["test", "basic"]
793 |             )
794 |             
795 |             success, message = await storage.store(memory)
796 |             print(f"Store: {success}, {message}")
797 |             
798 |             # Retrieve the memory
799 |             results = await storage.retrieve("test memory", n_results=1)
800 |             print(f"Retrieve: Found {len(results)} results")
801 |             
802 |             if results:
803 |                 print(f"Content: {results[0].memory.content}")
804 |                 print(f"Relevance: {results[0].relevance_score}")
805 |             
806 |             # Search by tag
807 |             tag_results = await storage.search_by_tag(["test"])
808 |             print(f"Tag search: Found {len(tag_results)} results")
809 |             
810 |             # Get stats
811 |             stats = storage.get_stats()
812 |             print(f"Stats: {stats['total_memories']} memories, {stats['database_size_mb']} MB")
813 |             
814 |             storage.close()
815 |             print("✅ Basic tests passed!")
816 |             
817 |         except Exception as e:
818 |             print(f"❌ Basic tests failed: {e}")
819 |             
820 |         finally:
821 |             shutil.rmtree(temp_dir, ignore_errors=True)
822 |     
823 |     # Run the basic tests
824 |     asyncio.run(run_basic_tests())
```

--------------------------------------------------------------------------------
/src/mcp_memory_service/utils/time_parser.py:
--------------------------------------------------------------------------------

```python
  1 | # Copyright 2024 Heinrich Krupp
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Natural language time expression parser for MCP Memory Service.
 17 | 
 18 | This module provides utilities to parse and understand various time expressions
 19 | for retrieving memories based on when they were stored.
 20 | """
 21 | import re
 22 | import logging
 23 | from datetime import datetime, timedelta, date, time
 24 | from typing import Tuple, Optional, Dict, Any, List
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | # Named time periods and their approximate date ranges
 29 | NAMED_PERIODS = {
 30 |     # Holidays (US/Western-centric, would need localization for global use)
 31 |     "christmas": {"month": 12, "day": 25, "window": 3},
 32 |     "new year": {"month": 1, "day": 1, "window": 3},
 33 |     "valentine": {"month": 2, "day": 14, "window": 1},
 34 |     "halloween": {"month": 10, "day": 31, "window": 3},
 35 |     "thanksgiving": {"month": 11, "day": -1, "window": 3},  # -1 means fourth Thursday
 36 |     
 37 |     # Seasons (Northern Hemisphere)
 38 |     "spring": {"start_month": 3, "start_day": 20, "end_month": 6, "end_day": 20},
 39 |     "summer": {"start_month": 6, "start_day": 21, "end_month": 9, "end_day": 22},
 40 |     "fall": {"start_month": 9, "start_day": 23, "end_month": 12, "end_day": 20},
 41 |     "autumn": {"start_month": 9, "start_day": 23, "end_month": 12, "end_day": 20},
 42 |     "winter": {"start_month": 12, "start_day": 21, "end_month": 3, "end_day": 19},
 43 | }
 44 | 
 45 | # Time of day mappings (24-hour format)
 46 | TIME_OF_DAY = {
 47 |     "morning": (5, 11),    # 5:00 AM - 11:59 AM
 48 |     "noon": (12, 12),      # 12:00 PM
 49 |     "afternoon": (13, 17), # 1:00 PM - 5:59 PM
 50 |     "evening": (18, 21),   # 6:00 PM - 9:59 PM
 51 |     "night": (22, 4),      # 10:00 PM - 4:59 AM (wraps around midnight)
 52 |     "midnight": (0, 0),    # 12:00 AM
 53 | }
 54 | 
 55 | # Regular expressions for various time patterns
 56 | PATTERNS = {
 57 |     "relative_days": re.compile(r'(?:(\d+)\s+days?\s+ago)|(?:yesterday)|(?:today)'),
 58 |     "relative_weeks": re.compile(r'(\d+)\s+weeks?\s+ago'),
 59 |     "relative_months": re.compile(r'(\d+)\s+months?\s+ago'),
 60 |     "relative_years": re.compile(r'(\d+)\s+years?\s+ago'),
 61 |     "last_period": re.compile(r'last\s+(day|week|month|year|summer|spring|winter|fall|autumn)'),
 62 |     "this_period": re.compile(r'this\s+(day|week|month|year|summer|spring|winter|fall|autumn)'),
 63 |     "month_name": re.compile(r'(january|february|march|april|may|june|july|august|september|october|november|december)'),
 64 |     "date_range": re.compile(r'between\s+(.+?)\s+and\s+(.+?)(?:\s|$)'),
 65 |     "time_of_day": re.compile(r'(morning|afternoon|evening|night|noon|midnight)'),
 66 |     "recent": re.compile(r'recent|lately|recently'),
 67 |     "specific_date": re.compile(r'(\d{1,2})[/-](\d{1,2})(?:[/-](\d{2,4}))?'),
 68 |     "full_date": re.compile(r'(\d{4})-(\d{1,2})-(\d{1,2})'),
 69 |     "named_period": re.compile(r'(spring|summer|winter|fall|autumn|christmas|new\s*year|valentine|halloween|thanksgiving|spring\s*break|summer\s*break|winter\s*break)'),    "half_year": re.compile(r'(first|second)\s+half\s+of\s+(\d{4})'),
 70 |     "quarter": re.compile(r'(first|second|third|fourth|1st|2nd|3rd|4th)\s+quarter(?:\s+of\s+(\d{4}))?'),
 71 | }
 72 | 
 73 | def _calculate_season_date_range(
 74 |     period: str,
 75 |     season_info: Dict[str, int],
 76 |     base_year: int,
 77 |     current_month: Optional[int] = None
 78 | ) -> Tuple[datetime, datetime]:
 79 |     """
 80 |     Calculate start and end dates for a season, handling winter's year boundary.
 81 | 
 82 |     Args:
 83 |         period: Season name ("winter", "spring", "summer", "fall"/"autumn")
 84 |         season_info: Dictionary with start_month, start_day, end_month, end_day
 85 |         base_year: The year to use as reference for calculation
 86 |         current_month: Current month (1-12) for context-aware winter calculation (optional)
 87 | 
 88 |     Returns:
 89 |         Tuple of (start_datetime, end_datetime) for the season
 90 |     """
 91 |     if period == "winter":
 92 |         # Winter spans year boundary (Dec -> Mar)
 93 |         # Determine start year based on current month context
 94 |         if current_month is not None and current_month <= 3:
 95 |             # We're in Jan-Mar, so winter started the previous year
 96 |             start_year = base_year - 1
 97 |             end_year = base_year
 98 |         else:
 99 |             # We're in any other month, winter starts this year
100 |             start_year = base_year
101 |             end_year = base_year + 1
102 | 
103 |         start_dt = datetime(start_year, season_info["start_month"], season_info["start_day"])
104 |         end_dt = datetime(end_year, season_info["end_month"], season_info["end_day"], 23, 59, 59)
105 |     else:
106 |         # All other seasons fall within a single calendar year
107 |         start_dt = datetime(base_year, season_info["start_month"], season_info["start_day"])
108 |         end_dt = datetime(base_year, season_info["end_month"], season_info["end_day"], 23, 59, 59)
109 | 
110 |     return start_dt, end_dt
111 | 
112 | def parse_time_expression(query: str) -> Tuple[Optional[float], Optional[float]]:
113 |     """
114 |     Parse a natural language time expression and return timestamp range.
115 |     
116 |     Args:
117 |         query: A natural language query with time expressions
118 |         
119 |     Returns:
120 |         Tuple of (start_timestamp, end_timestamp), either may be None
121 |     """
122 |     query = query.lower().strip()
123 |     
124 |     # Check for multiple patterns in a single query
125 |     try:
126 |         # First check for date ranges like "between X and Y"
127 |         date_range_match = PATTERNS["date_range"].search(query)
128 |         if date_range_match:
129 |             start_expr = date_range_match.group(1)
130 |             end_expr = date_range_match.group(2)
131 |             start_ts, _ = parse_time_expression(start_expr)
132 |             _, end_ts = parse_time_expression(end_expr)
133 |             return start_ts, end_ts
134 |         
135 |         # Check for full ISO dates (YYYY-MM-DD) FIRST
136 |         full_date_match = PATTERNS["full_date"].search(query)
137 |         if full_date_match:
138 |             year, month, day = full_date_match.groups()
139 |             try:
140 |                 specific_date = date(int(year), int(month), int(day))
141 |                 start_dt = datetime.combine(specific_date, time.min)
142 |                 end_dt = datetime.combine(specific_date, time.max)
143 |                 return start_dt.timestamp(), end_dt.timestamp()
144 |             except ValueError as e:
145 |                 logger.warning(f"Invalid date: {e}")
146 |                 return None, None
147 |             
148 |         # Check for specific dates (MM/DD/YYYY)
149 |         specific_date_match = PATTERNS["specific_date"].search(query)
150 |         if specific_date_match:
151 |             month, day, year = specific_date_match.groups()
152 |             month = int(month)
153 |             day = int(day)
154 |             current_year = datetime.now().year
155 |             year = int(year) if year else current_year
156 |             # Handle 2-digit years
157 |             if year and year < 100:
158 |                 year = 2000 + year if year < 50 else 1900 + year
159 |                 
160 |             try:
161 |                 specific_date = date(year, month, day)
162 |                 start_dt = datetime.combine(specific_date, time.min)
163 |                 end_dt = datetime.combine(specific_date, time.max)
164 |                 return start_dt.timestamp(), end_dt.timestamp()
165 |             except ValueError as e:
166 |                 logger.warning(f"Invalid date: {e}")
167 |                 return None, None
168 |         
169 |         # Relative days: "X days ago", "yesterday", "today"
170 |         days_ago_match = PATTERNS["relative_days"].search(query)
171 |         if days_ago_match:
172 |             if "yesterday" in query:
173 |                 days = 1
174 |             elif "today" in query:
175 |                 days = 0
176 |             else:
177 |                 days = int(days_ago_match.group(1))
178 |                 
179 |             target_date = date.today() - timedelta(days=days)
180 |             
181 |             # Check for time of day modifiers
182 |             time_of_day_match = PATTERNS["time_of_day"].search(query)
183 |             if time_of_day_match:
184 |                 # Narrow the range based on time of day
185 |                 return get_time_of_day_range(target_date, time_of_day_match.group(1))
186 |             else:
187 |                 # Return the full day
188 |                 start_dt = datetime.combine(target_date, time.min)
189 |                 end_dt = datetime.combine(target_date, time.max)
190 |                 return start_dt.timestamp(), end_dt.timestamp()
191 |         
192 |         # Relative weeks: "X weeks ago"
193 |         weeks_ago_match = PATTERNS["relative_weeks"].search(query)
194 |         if weeks_ago_match:
195 |             weeks = int(weeks_ago_match.group(1))
196 |             target_date = date.today() - timedelta(weeks=weeks)
197 |             # Get the start of the week (Monday)
198 |             start_date = target_date - timedelta(days=target_date.weekday())
199 |             end_date = start_date + timedelta(days=6)
200 |             start_dt = datetime.combine(start_date, time.min)
201 |             end_dt = datetime.combine(end_date, time.max)
202 |             return start_dt.timestamp(), end_dt.timestamp()
203 |         
204 |         # Relative months: "X months ago"
205 |         months_ago_match = PATTERNS["relative_months"].search(query)
206 |         if months_ago_match:
207 |             months = int(months_ago_match.group(1))
208 |             current = datetime.now()
209 |             # Calculate target month
210 |             year = current.year
211 |             month = current.month - months
212 |             
213 |             # Adjust year if month goes negative
214 |             while month <= 0:
215 |                 year -= 1
216 |                 month += 12
217 |                 
218 |             # Get first and last day of the month
219 |             first_day = date(year, month, 1)
220 |             if month == 12:
221 |                 last_day = date(year + 1, 1, 1) - timedelta(days=1)
222 |             else:
223 |                 last_day = date(year, month + 1, 1) - timedelta(days=1)
224 |                 
225 |             start_dt = datetime.combine(first_day, time.min)
226 |             end_dt = datetime.combine(last_day, time.max)
227 |             return start_dt.timestamp(), end_dt.timestamp()
228 |         
229 |         # Relative years: "X years ago"
230 |         years_ago_match = PATTERNS["relative_years"].search(query)
231 |         if years_ago_match:
232 |             years = int(years_ago_match.group(1))
233 |             current_year = datetime.now().year
234 |             target_year = current_year - years
235 |             start_dt = datetime(target_year, 1, 1, 0, 0, 0)
236 |             end_dt = datetime(target_year, 12, 31, 23, 59, 59)
237 |             return start_dt.timestamp(), end_dt.timestamp()
238 |         
239 |         # "Last X" expressions
240 |         last_period_match = PATTERNS["last_period"].search(query)
241 |         if last_period_match:
242 |             period = last_period_match.group(1)
243 |             return get_last_period_range(period)
244 |         
245 |         # "This X" expressions
246 |         this_period_match = PATTERNS["this_period"].search(query)
247 |         if this_period_match:
248 |             period = this_period_match.group(1)
249 |             return get_this_period_range(period)
250 |         
251 |         # Month names
252 |         month_match = PATTERNS["month_name"].search(query)
253 |         if month_match:
254 |             month_name = month_match.group(1)
255 |             return get_month_range(month_name)
256 |         
257 |         # Named periods (holidays, etc.)
258 |         named_period_match = PATTERNS["named_period"].search(query)
259 |         if named_period_match:
260 |             period_name = named_period_match.group(1)  # <-- Just get the matched group without replacing
261 |             return get_named_period_range(period_name)
262 |         
263 |         # Half year expressions
264 |         half_year_match = PATTERNS["half_year"].search(query)
265 |         if half_year_match:
266 |             half = half_year_match.group(1)
267 |             year_str = half_year_match.group(2)
268 |             year = int(year_str) if year_str else datetime.now().year
269 |             
270 |             if half.lower() == "first":
271 |                 start_dt = datetime(year, 1, 1, 0, 0, 0)
272 |                 end_dt = datetime(year, 6, 30, 23, 59, 59)
273 |             else:  # "second"
274 |                 start_dt = datetime(year, 7, 1, 0, 0, 0)
275 |                 end_dt = datetime(year, 12, 31, 23, 59, 59)
276 |                 
277 |             return start_dt.timestamp(), end_dt.timestamp()
278 |         
279 |         # Quarter expressions
280 |         quarter_match = PATTERNS["quarter"].search(query)
281 |         if quarter_match:
282 |             quarter = quarter_match.group(1).lower()
283 |             year_str = quarter_match.group(2)
284 |             year = int(year_str) if year_str else datetime.now().year
285 |             
286 |             # Map textual quarter to number
287 |             quarter_num = {"first": 1, "1st": 1, "second": 2, "2nd": 2, 
288 |                           "third": 3, "3rd": 3, "fourth": 4, "4th": 4}[quarter]
289 |             
290 |             # Calculate quarter start and end dates
291 |             quarter_month = (quarter_num - 1) * 3 + 1
292 |             start_dt = datetime(year, quarter_month, 1, 0, 0, 0)
293 |             
294 |             if quarter_month + 3 > 12:
295 |                 end_dt = datetime(year + 1, 1, 1, 0, 0, 0) - timedelta(seconds=1)
296 |             else:
297 |                 end_dt = datetime(year, quarter_month + 3, 1, 0, 0, 0) - timedelta(seconds=1)
298 |                 
299 |             return start_dt.timestamp(), end_dt.timestamp()
300 |         
301 |         # Recent/fuzzy time expressions
302 |         recent_match = PATTERNS["recent"].search(query)
303 |         if recent_match:
304 |             # Default to last 7 days for "recent"
305 |             end_dt = datetime.now()
306 |             start_dt = end_dt - timedelta(days=7)
307 |             return start_dt.timestamp(), end_dt.timestamp()
308 |             
309 |         # If no time expression is found, return None for both timestamps
310 |         return None, None
311 |         
312 |     except Exception as e:
313 |         logger.error(f"Error parsing time expression: {e}")
314 |         return None, None
315 | 
316 | def get_time_of_day_range(target_date: date, time_period: str) -> Tuple[float, float]:
317 |     """Get timestamp range for a specific time of day on a given date."""
318 |     if time_period in TIME_OF_DAY:
319 |         start_hour, end_hour = TIME_OF_DAY[time_period]
320 |         
321 |         # Handle periods that wrap around midnight
322 |         if start_hour > end_hour:  # e.g., "night" = (22, 4)
323 |             # For periods that span midnight, we need to handle specially
324 |             if time_period == "night":
325 |                 start_dt = datetime.combine(target_date, time(start_hour, 0))
326 |                 end_dt = datetime.combine(target_date + timedelta(days=1), time(end_hour, 59, 59))
327 |             else:
328 |                 # Default handling for other wrapping periods
329 |                 start_dt = datetime.combine(target_date, time(start_hour, 0))
330 |                 end_dt = datetime.combine(target_date + timedelta(days=1), time(end_hour, 59, 59))
331 |         else:
332 |             # Normal periods within a single day
333 |             start_dt = datetime.combine(target_date, time(start_hour, 0))
334 |             if end_hour == start_hour:  # For noon, midnight (specific hour)
335 |                 end_dt = datetime.combine(target_date, time(end_hour, 59, 59))
336 |             else:
337 |                 end_dt = datetime.combine(target_date, time(end_hour, 59, 59))
338 |                 
339 |         return start_dt.timestamp(), end_dt.timestamp()
340 |     else:
341 |         # Fallback to full day
342 |         start_dt = datetime.combine(target_date, time.min)
343 |         end_dt = datetime.combine(target_date, time.max)
344 |         return start_dt.timestamp(), end_dt.timestamp()
345 | 
346 | def get_last_period_range(period: str) -> Tuple[float, float]:
347 |     """Get timestamp range for 'last X' expressions."""
348 |     now = datetime.now()
349 |     today = date.today()
350 |     
351 |     if period == "day":
352 |         # Last day = yesterday
353 |         yesterday = today - timedelta(days=1)
354 |         start_dt = datetime.combine(yesterday, time.min)
355 |         end_dt = datetime.combine(yesterday, time.max)
356 |     elif period == "week":
357 |         # Last week = previous calendar week (Mon-Sun)
358 |         # Find last Monday
359 |         last_monday = today - timedelta(days=today.weekday() + 7)
360 |         # Find last Sunday
361 |         last_sunday = last_monday + timedelta(days=6)
362 |         start_dt = datetime.combine(last_monday, time.min)
363 |         end_dt = datetime.combine(last_sunday, time.max)
364 |     elif period == "month":
365 |         # Last month = previous calendar month
366 |         first_of_this_month = date(today.year, today.month, 1)
367 |         if today.month == 1:
368 |             last_month = 12
369 |             last_month_year = today.year - 1
370 |         else:
371 |             last_month = today.month - 1
372 |             last_month_year = today.year
373 |             
374 |         first_of_last_month = date(last_month_year, last_month, 1)
375 |         last_of_last_month = first_of_this_month - timedelta(days=1)
376 |         
377 |         start_dt = datetime.combine(first_of_last_month, time.min)
378 |         end_dt = datetime.combine(last_of_last_month, time.max)
379 |     elif period == "year":
380 |         # Last year = previous calendar year
381 |         last_year = today.year - 1
382 |         start_dt = datetime(last_year, 1, 1, 0, 0, 0)
383 |         end_dt = datetime(last_year, 12, 31, 23, 59, 59)
384 |     elif period in ["summer", "spring", "winter", "fall", "autumn"]:
385 |         # Last season
386 |         season_info = NAMED_PERIODS[period]
387 |         current_year = today.year
388 |         
389 |         # Determine if we're currently in this season
390 |         current_month = today.month
391 |         current_day = today.day
392 |         is_current_season = False
393 |         
394 |         # Check if today falls within the season's date range
395 |         if period in ["winter"]:  # Winter spans year boundary
396 |             if (current_month >= season_info["start_month"] or 
397 |                 (current_month <= season_info["end_month"] and 
398 |                  current_day <= season_info["end_day"])):
399 |                 is_current_season = True
400 |         else:
401 |             if (current_month >= season_info["start_month"] and current_month <= season_info["end_month"] and
402 |                 current_day >= season_info["start_day"] if current_month == season_info["start_month"] else True and
403 |                 current_day <= season_info["end_day"] if current_month == season_info["end_month"] else True):
404 |                 is_current_season = True
405 |         
406 |         # If we're currently in the season, get last year's season
407 |         if is_current_season:
408 |             year = current_year - 1
409 |         else:
410 |             year = current_year
411 |             
412 |         # Calculate season date range (handles winter's year boundary)
413 |         context_month = current_month if is_current_season else None
414 |         start_dt, end_dt = _calculate_season_date_range(period, season_info, year, context_month)
415 |     else:
416 |         # Fallback - last 24 hours
417 |         end_dt = now
418 |         start_dt = end_dt - timedelta(days=1)
419 |         
420 |     return start_dt.timestamp(), end_dt.timestamp()
421 | 
422 | def get_this_period_range(period: str) -> Tuple[float, float]:
423 |     """Get timestamp range for 'this X' expressions."""
424 |     now = datetime.now()
425 |     today = date.today()
426 |     
427 |     if period == "day":
428 |         # This day = today
429 |         start_dt = datetime.combine(today, time.min)
430 |         end_dt = datetime.combine(today, time.max)
431 |     elif period == "week":
432 |         # This week = current calendar week (Mon-Sun)
433 |         # Find this Monday
434 |         monday = today - timedelta(days=today.weekday())
435 |         sunday = monday + timedelta(days=6)
436 |         start_dt = datetime.combine(monday, time.min)
437 |         end_dt = datetime.combine(sunday, time.max)
438 |     elif period == "month":
439 |         # This month = current calendar month
440 |         first_of_month = date(today.year, today.month, 1)
441 |         if today.month == 12:
442 |             first_of_next_month = date(today.year + 1, 1, 1)
443 |         else:
444 |             first_of_next_month = date(today.year, today.month + 1, 1)
445 |             
446 |         last_of_month = first_of_next_month - timedelta(days=1)
447 |         
448 |         start_dt = datetime.combine(first_of_month, time.min)
449 |         end_dt = datetime.combine(last_of_month, time.max)
450 |     elif period == "year":
451 |         # This year = current calendar year
452 |         start_dt = datetime(today.year, 1, 1, 0, 0, 0)
453 |         end_dt = datetime(today.year, 12, 31, 23, 59, 59)
454 |     elif period in ["summer", "spring", "winter", "fall", "autumn"]:
455 |         # This season
456 |         season_info = NAMED_PERIODS[period]
457 |         current_year = today.year
458 |         
459 |         # Calculate season date range (handles winter's year boundary)
460 |         start_dt, end_dt = _calculate_season_date_range(period, season_info, current_year, today.month)
461 |     else:
462 |         # Fallback - current 24 hours
463 |         end_dt = now
464 |         start_dt = datetime.combine(today, time.min)
465 |         
466 |     return start_dt.timestamp(), end_dt.timestamp()
467 | 
468 | def get_month_range(month_name: str) -> Tuple[float, float]:
469 |     """Get timestamp range for a named month."""
470 |     # Map month name to number
471 |     month_map = {
472 |         "january": 1, "february": 2, "march": 3, "april": 4,
473 |         "may": 5, "june": 6, "july": 7, "august": 8,
474 |         "september": 9, "october": 10, "november": 11, "december": 12
475 |     }
476 |     
477 |     if month_name in month_map:
478 |         month_num = month_map[month_name]
479 |         current_year = datetime.now().year
480 |         
481 |         # If the month is in the future for this year, use last year
482 |         current_month = datetime.now().month
483 |         year = current_year if month_num <= current_month else current_year - 1
484 |         
485 |         # Get first and last day of the month
486 |         first_day = date(year, month_num, 1)
487 |         if month_num == 12:
488 |             last_day = date(year + 1, 1, 1) - timedelta(days=1)
489 |         else:
490 |             last_day = date(year, month_num + 1, 1) - timedelta(days=1)
491 |             
492 |         start_dt = datetime.combine(first_day, time.min)
493 |         end_dt = datetime.combine(last_day, time.max)
494 |         return start_dt.timestamp(), end_dt.timestamp()
495 |     else:
496 |         return None, None
497 | 
498 | def get_named_period_range(period_name: str) -> Tuple[Optional[float], Optional[float]]:
499 |     """Get timestamp range for named periods like holidays."""
500 |     period_name = period_name.lower().replace("_", " ")
501 |     current_year = datetime.now().year
502 |     current_month = datetime.now().month
503 |     current_day = datetime.now().day
504 |     
505 |     if period_name in NAMED_PERIODS:
506 |         info = NAMED_PERIODS[period_name]
507 |         # Found matching period
508 |         # Determine if the period is in the past or future for this year
509 |         if "month" in info and "day" in info:
510 |             # Simple fixed-date holiday
511 |             month = info["month"]
512 |             day = info["day"]
513 |             window = info.get("window", 1)  # Default 1-day window
514 |             
515 |             # Special case for Thanksgiving (fourth Thursday in November)
516 |             if day == -1 and month == 11:  # Thanksgiving
517 |                 # Find the fourth Thursday in November
518 |                 first_day = date(current_year, 11, 1)
519 |                 # Find first Thursday
520 |                 first_thursday = first_day + timedelta(days=((3 - first_day.weekday()) % 7))
521 |                 # Fourth Thursday is 3 weeks later
522 |                 thanksgiving = first_thursday + timedelta(weeks=3)
523 |                 day = thanksgiving.day
524 |             
525 |             # Check if the holiday has passed this year
526 |             is_past = (current_month > month or 
527 |                         (current_month == month and current_day > day + window))
528 |                         
529 |             year = current_year if not is_past else current_year - 1
530 |             target_date = date(year, month, day)
531 |             
532 |             # Create date range with window
533 |             start_date = target_date - timedelta(days=window)
534 |             end_date = target_date + timedelta(days=window)
535 |             
536 |             start_dt = datetime.combine(start_date, time.min)
537 |             end_dt = datetime.combine(end_date, time.max)
538 |             return start_dt.timestamp(), end_dt.timestamp()
539 |             
540 |         elif "start_month" in info and "end_month" in info:
541 |             # Season or date range
542 |             start_month = info["start_month"]
543 |             start_day = info["start_day"]
544 |             end_month = info["end_month"]
545 |             end_day = info["end_day"]
546 |             
547 |             # Determine year based on current date
548 |             if start_month > end_month:  # Period crosses year boundary
549 |                 if current_month < end_month or (current_month == end_month and current_day <= end_day):
550 |                     # We're in the end part of the period that started last year
551 |                     start_dt = datetime(current_year - 1, start_month, start_day)
552 |                     end_dt = datetime(current_year, end_month, end_day, 23, 59, 59)
553 |                 else:
554 |                     # The period is either coming up this year or happened earlier this year
555 |                     if current_month > start_month or (current_month == start_month and current_day >= start_day):
556 |                         # Period already started this year
557 |                         start_dt = datetime(current_year, start_month, start_day)
558 |                         end_dt = datetime(current_year + 1, end_month, end_day, 23, 59, 59)
559 |                     else:
560 |                         # Period from last year
561 |                         start_dt = datetime(current_year - 1, start_month, start_day)
562 |                         end_dt = datetime(current_year, end_month, end_day, 23, 59, 59)
563 |             else:
564 |                 # Period within a single year
565 |                 # Check if period has already occurred this year
566 |                 if (current_month > end_month or 
567 |                     (current_month == end_month and current_day > end_day)):
568 |                     # Period already passed this year
569 |                     start_dt = datetime(current_year, start_month, start_day)
570 |                     end_dt = datetime(current_year, end_month, end_day, 23, 59, 59)
571 |                 else:
572 |                     # Check if current date is within the period
573 |                     is_within_period = (
574 |                         (current_month > start_month or 
575 |                             (current_month == start_month and current_day >= start_day))
576 |                         and
577 |                         (current_month < end_month or 
578 |                             (current_month == end_month and current_day <= end_day))
579 |                     )
580 |                     
581 |                     if is_within_period:
582 |                         # We're in the period this year
583 |                         start_dt = datetime(current_year, start_month, start_day)
584 |                         end_dt = datetime(current_year, end_month, end_day, 23, 59, 59)
585 |                     else:
586 |                         # Period from last year
587 |                         start_dt = datetime(current_year - 1, start_month, start_day)
588 |                         end_dt = datetime(current_year - 1, end_month, end_day, 23, 59, 59)
589 |             
590 |             return start_dt.timestamp(), end_dt.timestamp()
591 |     
592 |     # If no match found
593 |     return None, None
594 | 
595 | # Helper function to detect time expressions in a general query
596 | def extract_time_expression(query: str) -> Tuple[str, Tuple[Optional[float], Optional[float]]]:
597 |     """
598 |     Extract time-related expressions from a query and return the timestamps.
599 |     
600 |     Args:
601 |         query: A natural language query that may contain time expressions
602 |         
603 |     Returns:
604 |         Tuple of (cleaned_query, (start_timestamp, end_timestamp))
605 |         The cleaned_query has time expressions removed
606 |     """
607 |     # Check for time expressions
608 |     time_expressions = [
609 |         r'\b\d+\s+days?\s+ago\b',
610 |         r'\byesterday\b',
611 |         r'\btoday\b',
612 |         r'\b\d+\s+weeks?\s+ago\b',
613 |         r'\b\d+\s+months?\s+ago\b',
614 |         r'\b\d+\s+years?\s+ago\b',
615 |         r'\blast\s+(day|week|month|year|summer|spring|winter|fall|autumn)\b',
616 |         r'\bthis\s+(day|week|month|year|summer|spring|winter|fall|autumn)\b',
617 |         r'\b(january|february|march|april|may|june|july|august|september|october|november|december)\b',
618 |         r'\bbetween\s+.+?\s+and\s+.+?(?:\s|$)',
619 |         r'\bin\s+the\s+(morning|afternoon|evening|night|noon|midnight)\b',
620 |         r'\brecent|lately|recently\b',
621 |         r'\b\d{1,2}[/-]\d{1,2}(?:[/-]\d{2,4})?\b',
622 |         r'\b\d{4}-\d{1,2}-\d{1,2}\b',
623 |         r'\b(spring|summer|winter|fall|autumn|christmas|new\s*year|valentine|halloween|thanksgiving|spring\s*break|summer\s*break|winter\s*break)\b',
624 |         r'\b(first|second)\s+half\s+of\s+\d{4}\b',
625 |         r'\b(first|second|third|fourth|1st|2nd|3rd|4th)\s+quarter(?:\s+of\s+\d{4})?\b',
626 |         r'\bfrom\s+.+\s+to\s+.+\b'
627 |     ]
628 |     
629 |     # Combine all patterns
630 |     combined_pattern = '|'.join(f'({expr})' for expr in time_expressions)
631 |     combined_regex = re.compile(combined_pattern, re.IGNORECASE)
632 |     
633 |     # Find all matches
634 |     matches = list(combined_regex.finditer(query))
635 |     if not matches:
636 |         return query, (None, None)
637 |     
638 |     # Extract the time expressions
639 |     time_expressions = []
640 |     for match in matches:
641 |         span = match.span()
642 |         expression = query[span[0]:span[1]]
643 |         time_expressions.append(expression)
644 |     
645 |     # Parse time expressions to get timestamps
646 |     full_time_expression = ' '.join(time_expressions)
647 |     start_ts, end_ts = parse_time_expression(full_time_expression)
648 |     
649 |     # Remove time expressions from the query
650 |     cleaned_query = query
651 |     for expr in time_expressions:
652 |         cleaned_query = cleaned_query.replace(expr, '')
653 |     
654 |     # Clean up multiple spaces
655 |     cleaned_query = re.sub(r'\s+', ' ', cleaned_query).strip()
656 |     
657 |     return cleaned_query, (start_ts, end_ts)
```