This is page 44 of 62. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .claude
│ ├── agents
│ │ ├── amp-bridge.md
│ │ ├── amp-pr-automator.md
│ │ ├── code-quality-guard.md
│ │ ├── gemini-pr-automator.md
│ │ └── github-release-manager.md
│ ├── commands
│ │ ├── README.md
│ │ ├── refactor-function
│ │ ├── refactor-function-prod
│ │ └── refactor-function.md
│ ├── consolidation-fix-handoff.md
│ ├── consolidation-hang-fix-summary.md
│ ├── directives
│ │ ├── agents.md
│ │ ├── code-quality-workflow.md
│ │ ├── consolidation-details.md
│ │ ├── development-setup.md
│ │ ├── hooks-configuration.md
│ │ ├── memory-first.md
│ │ ├── memory-tagging.md
│ │ ├── pr-workflow.md
│ │ ├── quality-system-details.md
│ │ ├── README.md
│ │ ├── refactoring-checklist.md
│ │ ├── storage-backends.md
│ │ └── version-management.md
│ ├── prompts
│ │ └── hybrid-cleanup-integration.md
│ ├── settings.local.json.backup
│ └── settings.local.json.local
├── .commit-message
├── .coveragerc
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── feature_request.yml
│ │ └── performance_issue.yml
│ ├── pull_request_template.md
│ └── workflows
│ ├── bridge-tests.yml
│ ├── CACHE_FIX.md
│ ├── claude-branch-automation.yml
│ ├── claude-code-review.yml
│ ├── claude.yml
│ ├── cleanup-images.yml.disabled
│ ├── dev-setup-validation.yml
│ ├── docker-publish.yml
│ ├── dockerfile-lint.yml
│ ├── LATEST_FIXES.md
│ ├── main-optimized.yml.disabled
│ ├── main.yml
│ ├── publish-and-test.yml
│ ├── publish-dual.yml
│ ├── README_OPTIMIZATION.md
│ ├── release-tag.yml.disabled
│ ├── release.yml
│ ├── roadmap-review-reminder.yml
│ ├── SECRET_CONDITIONAL_FIX.md
│ └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .metrics
│ ├── baseline_cc_install_hooks.txt
│ ├── baseline_mi_install_hooks.txt
│ ├── baseline_nesting_install_hooks.txt
│ ├── BASELINE_REPORT.md
│ ├── COMPLEXITY_COMPARISON.txt
│ ├── QUICK_REFERENCE.txt
│ ├── README.md
│ ├── REFACTORED_BASELINE.md
│ ├── REFACTORING_COMPLETION_REPORT.md
│ └── TRACKING_TABLE.md
├── .pyscn
│ ├── .gitignore
│ └── reports
│ └── analyze_20251123_214224.html
├── AGENTS.md
├── ai-optimized-tool-descriptions.py
├── archive
│ ├── deployment
│ │ ├── deploy_fastmcp_fixed.sh
│ │ ├── deploy_http_with_mcp.sh
│ │ └── deploy_mcp_v4.sh
│ ├── deployment-configs
│ │ ├── empty_config.yml
│ │ └── smithery.yaml
│ ├── development
│ │ └── test_fastmcp.py
│ ├── docs-removed-2025-08-23
│ │ ├── authentication.md
│ │ ├── claude_integration.md
│ │ ├── claude-code-compatibility.md
│ │ ├── claude-code-integration.md
│ │ ├── claude-code-quickstart.md
│ │ ├── claude-desktop-setup.md
│ │ ├── complete-setup-guide.md
│ │ ├── database-synchronization.md
│ │ ├── development
│ │ │ ├── autonomous-memory-consolidation.md
│ │ │ ├── CLEANUP_PLAN.md
│ │ │ ├── CLEANUP_README.md
│ │ │ ├── CLEANUP_SUMMARY.md
│ │ │ ├── dream-inspired-memory-consolidation.md
│ │ │ ├── hybrid-slm-memory-consolidation.md
│ │ │ ├── mcp-milestone.md
│ │ │ ├── multi-client-architecture.md
│ │ │ ├── test-results.md
│ │ │ └── TIMESTAMP_FIX_SUMMARY.md
│ │ ├── distributed-sync.md
│ │ ├── invocation_guide.md
│ │ ├── macos-intel.md
│ │ ├── master-guide.md
│ │ ├── mcp-client-configuration.md
│ │ ├── multi-client-server.md
│ │ ├── service-installation.md
│ │ ├── sessions
│ │ │ └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│ │ ├── UBUNTU_SETUP.md
│ │ ├── ubuntu.md
│ │ ├── windows-setup.md
│ │ └── windows.md
│ ├── docs-root-cleanup-2025-08-23
│ │ ├── AWESOME_LIST_SUBMISSION.md
│ │ ├── CLOUDFLARE_IMPLEMENTATION.md
│ │ ├── DOCUMENTATION_ANALYSIS.md
│ │ ├── DOCUMENTATION_CLEANUP_PLAN.md
│ │ ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│ │ ├── LITESTREAM_SETUP_GUIDE.md
│ │ ├── lm_studio_system_prompt.md
│ │ ├── PYTORCH_DOWNLOAD_FIX.md
│ │ └── README-ORIGINAL-BACKUP.md
│ ├── investigations
│ │ └── MACOS_HOOKS_INVESTIGATION.md
│ ├── litestream-configs-v6.3.0
│ │ ├── install_service.sh
│ │ ├── litestream_master_config_fixed.yml
│ │ ├── litestream_master_config.yml
│ │ ├── litestream_replica_config_fixed.yml
│ │ ├── litestream_replica_config.yml
│ │ ├── litestream_replica_simple.yml
│ │ ├── litestream-http.service
│ │ ├── litestream.service
│ │ └── requirements-cloudflare.txt
│ ├── release-notes
│ │ └── release-notes-v7.1.4.md
│ └── setup-development
│ ├── README.md
│ ├── setup_consolidation_mdns.sh
│ ├── STARTUP_SETUP_GUIDE.md
│ └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│ ├── memory-context.md
│ ├── memory-health.md
│ ├── memory-ingest-dir.md
│ ├── memory-ingest.md
│ ├── memory-recall.md
│ ├── memory-search.md
│ ├── memory-store.md
│ ├── README.md
│ └── session-start.md
├── claude-hooks
│ ├── config.json
│ ├── config.template.json
│ ├── CONFIGURATION.md
│ ├── core
│ │ ├── auto-capture-hook.js
│ │ ├── auto-capture-hook.ps1
│ │ ├── memory-retrieval.js
│ │ ├── mid-conversation.js
│ │ ├── permission-request.js
│ │ ├── session-end.js
│ │ ├── session-start.js
│ │ └── topic-change.js
│ ├── debug-pattern-test.js
│ ├── install_claude_hooks_windows.ps1
│ ├── install_hooks.py
│ ├── memory-mode-controller.js
│ ├── MIGRATION.md
│ ├── README-AUTO-CAPTURE.md
│ ├── README-NATURAL-TRIGGERS.md
│ ├── README-PERMISSION-REQUEST.md
│ ├── README-phase2.md
│ ├── README.md
│ ├── simple-test.js
│ ├── statusline.sh
│ ├── test-adaptive-weights.js
│ ├── test-dual-protocol-hook.js
│ ├── test-mcp-hook.js
│ ├── test-natural-triggers.js
│ ├── test-recency-scoring.js
│ ├── tests
│ │ ├── integration-test.js
│ │ ├── phase2-integration-test.js
│ │ ├── test-code-execution.js
│ │ ├── test-cross-session.json
│ │ ├── test-permission-request.js
│ │ ├── test-session-tracking.json
│ │ └── test-threading.json
│ ├── utilities
│ │ ├── adaptive-pattern-detector.js
│ │ ├── auto-capture-patterns.js
│ │ ├── context-formatter.js
│ │ ├── context-shift-detector.js
│ │ ├── conversation-analyzer.js
│ │ ├── dynamic-context-updater.js
│ │ ├── git-analyzer.js
│ │ ├── mcp-client.js
│ │ ├── memory-client.js
│ │ ├── memory-scorer.js
│ │ ├── performance-manager.js
│ │ ├── project-detector.js
│ │ ├── session-cache.json
│ │ ├── session-tracker.js
│ │ ├── tiered-conversation-monitor.js
│ │ ├── user-override-detector.js
│ │ └── version-checker.js
│ └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── COMMIT_MESSAGE.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│ ├── amp-cli-bridge.md
│ ├── api
│ │ ├── code-execution-interface.md
│ │ ├── memory-metadata-api.md
│ │ ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_REPORT.md
│ │ └── tag-standardization.md
│ ├── architecture
│ │ ├── graph-database-design.md
│ │ ├── search-enhancement-spec.md
│ │ └── search-examples.md
│ ├── architecture.md
│ ├── archive
│ │ └── obsolete-workflows
│ │ ├── load_memory_context.md
│ │ └── README.md
│ ├── assets
│ │ └── images
│ │ ├── dashboard-v3.3.0-preview.png
│ │ ├── memory-awareness-hooks-example.png
│ │ ├── project-infographic.svg
│ │ └── README.md
│ ├── CLAUDE_CODE_QUICK_REFERENCE.md
│ ├── cloudflare-setup.md
│ ├── demo-recording-script.md
│ ├── deployment
│ │ ├── docker.md
│ │ ├── dual-service.md
│ │ ├── production-guide.md
│ │ └── systemd-service.md
│ ├── development
│ │ ├── ai-agent-instructions.md
│ │ ├── code-quality
│ │ │ ├── phase-2a-completion.md
│ │ │ ├── phase-2a-handle-get-prompt.md
│ │ │ ├── phase-2a-index.md
│ │ │ ├── phase-2a-install-package.md
│ │ │ └── phase-2b-session-summary.md
│ │ ├── code-quality-workflow.md
│ │ ├── dashboard-workflow.md
│ │ ├── issue-management.md
│ │ ├── pr-280-post-mortem.md
│ │ ├── pr-review-guide.md
│ │ ├── refactoring-notes.md
│ │ ├── release-checklist.md
│ │ └── todo-tracker.md
│ ├── docker-optimized-build.md
│ ├── document-ingestion.md
│ ├── DOCUMENTATION_AUDIT.md
│ ├── enhancement-roadmap-issue-14.md
│ ├── examples
│ │ ├── analysis-scripts.js
│ │ ├── maintenance-session-example.md
│ │ ├── memory-distribution-chart.jsx
│ │ ├── quality-system-configs.md
│ │ └── tag-schema.json
│ ├── features
│ │ └── association-quality-boost.md
│ ├── first-time-setup.md
│ ├── glama-deployment.md
│ ├── guides
│ │ ├── advanced-command-examples.md
│ │ ├── chromadb-migration.md
│ │ ├── commands-vs-mcp-server.md
│ │ ├── mcp-enhancements.md
│ │ ├── mdns-service-discovery.md
│ │ ├── memory-consolidation-guide.md
│ │ ├── memory-quality-guide.md
│ │ ├── migration.md
│ │ ├── scripts.md
│ │ └── STORAGE_BACKENDS.md
│ ├── HOOK_IMPROVEMENTS.md
│ ├── hooks
│ │ └── phase2-code-execution-migration.md
│ ├── http-server-management.md
│ ├── ide-compatability.md
│ ├── IMAGE_RETENTION_POLICY.md
│ ├── images
│ │ ├── dashboard-placeholder.md
│ │ └── update-restart-demo.png
│ ├── implementation
│ │ ├── health_checks.md
│ │ └── performance.md
│ ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│ ├── integration
│ │ ├── homebrew.md
│ │ └── multi-client.md
│ ├── integrations
│ │ ├── gemini.md
│ │ ├── groq-bridge.md
│ │ ├── groq-integration-summary.md
│ │ └── groq-model-comparison.md
│ ├── integrations.md
│ ├── legacy
│ │ └── dual-protocol-hooks.md
│ ├── LIGHTWEIGHT_ONNX_SETUP.md
│ ├── LM_STUDIO_COMPATIBILITY.md
│ ├── maintenance
│ │ └── memory-maintenance.md
│ ├── mastery
│ │ ├── api-reference.md
│ │ ├── architecture-overview.md
│ │ ├── configuration-guide.md
│ │ ├── local-setup-and-run.md
│ │ ├── testing-guide.md
│ │ └── troubleshooting.md
│ ├── migration
│ │ ├── code-execution-api-quick-start.md
│ │ └── graph-migration-guide.md
│ ├── natural-memory-triggers
│ │ ├── cli-reference.md
│ │ ├── installation-guide.md
│ │ └── performance-optimization.md
│ ├── oauth-setup.md
│ ├── pr-graphql-integration.md
│ ├── quality-system-ui-implementation.md
│ ├── quick-setup-cloudflare-dual-environment.md
│ ├── README.md
│ ├── refactoring
│ │ └── phase-3-3-analysis.md
│ ├── releases
│ │ └── v8.72.0-testing.md
│ ├── remote-configuration-wiki-section.md
│ ├── research
│ │ ├── code-execution-interface-implementation.md
│ │ └── code-execution-interface-summary.md
│ ├── ROADMAP.md
│ ├── sqlite-vec-backend.md
│ ├── statistics
│ │ ├── charts
│ │ │ ├── activity_patterns.png
│ │ │ ├── contributors.png
│ │ │ ├── growth_trajectory.png
│ │ │ ├── monthly_activity.png
│ │ │ └── october_sprint.png
│ │ ├── data
│ │ │ ├── activity_by_day.csv
│ │ │ ├── activity_by_hour.csv
│ │ │ ├── contributors.csv
│ │ │ └── monthly_activity.csv
│ │ ├── generate_charts.py
│ │ └── REPOSITORY_STATISTICS.md
│ ├── technical
│ │ ├── development.md
│ │ ├── memory-migration.md
│ │ ├── migration-log.md
│ │ ├── sqlite-vec-embedding-fixes.md
│ │ └── tag-storage.md
│ ├── testing
│ │ └── regression-tests.md
│ ├── testing-cloudflare-backend.md
│ ├── troubleshooting
│ │ ├── cloudflare-api-token-setup.md
│ │ ├── cloudflare-authentication.md
│ │ ├── database-transfer-migration.md
│ │ ├── general.md
│ │ ├── hooks-quick-reference.md
│ │ ├── memory-management.md
│ │ ├── pr162-schema-caching-issue.md
│ │ ├── session-end-hooks.md
│ │ └── sync-issues.md
│ ├── tutorials
│ │ ├── advanced-techniques.md
│ │ ├── data-analysis.md
│ │ └── demo-session-walkthrough.md
│ ├── wiki-documentation-plan.md
│ └── wiki-Graph-Database-Architecture.md
├── examples
│ ├── claude_desktop_config_template.json
│ ├── claude_desktop_config_windows.json
│ ├── claude-desktop-http-config.json
│ ├── config
│ │ └── claude_desktop_config.json
│ ├── http-mcp-bridge.js
│ ├── memory_export_template.json
│ ├── README.md
│ ├── setup
│ │ └── setup_multi_client_complete.py
│ └── start_https_example.sh
├── IMPLEMENTATION_SUMMARY.md
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── PR_DESCRIPTION.md
├── pyproject-lite.toml
├── pyproject.toml
├── pytest.ini
├── README.md
├── release-notes-v8.61.0.md
├── run_server.py
├── scripts
│ ├── .claude
│ │ └── settings.local.json
│ ├── archive
│ │ └── check_missing_timestamps.py
│ ├── backup
│ │ ├── backup_memories.py
│ │ ├── backup_sqlite_vec.sh
│ │ ├── export_distributable_memories.sh
│ │ └── restore_memories.py
│ ├── benchmarks
│ │ ├── benchmark_code_execution_api.py
│ │ ├── benchmark_hybrid_sync.py
│ │ └── benchmark_server_caching.py
│ ├── ci
│ │ ├── check_dockerfile_args.sh
│ │ └── validate_imports.sh
│ ├── database
│ │ ├── analyze_sqlite_vec_db.py
│ │ ├── check_sqlite_vec_status.py
│ │ ├── db_health_check.py
│ │ └── simple_timestamp_check.py
│ ├── development
│ │ ├── debug_server_initialization.py
│ │ ├── find_orphaned_files.py
│ │ ├── fix_mdns.sh
│ │ ├── fix_sitecustomize.py
│ │ ├── remote_ingest.sh
│ │ ├── setup-git-merge-drivers.sh
│ │ ├── uv-lock-merge.sh
│ │ └── verify_hybrid_sync.py
│ ├── hooks
│ │ └── pre-commit
│ ├── installation
│ │ ├── install_linux_service.py
│ │ ├── install_macos_service.py
│ │ ├── install_uv.py
│ │ ├── install_windows_service.py
│ │ ├── install.py
│ │ ├── setup_backup_cron.sh
│ │ ├── setup_claude_mcp.sh
│ │ └── setup_cloudflare_resources.py
│ ├── linux
│ │ ├── service_status.sh
│ │ ├── start_service.sh
│ │ ├── stop_service.sh
│ │ ├── uninstall_service.sh
│ │ └── view_logs.sh
│ ├── maintenance
│ │ ├── add_project_tags.py
│ │ ├── apply_quality_boost_retroactively.py
│ │ ├── assign_memory_types.py
│ │ ├── auto_retag_memory_merge.py
│ │ ├── auto_retag_memory.py
│ │ ├── backfill_graph_table.py
│ │ ├── check_memory_types.py
│ │ ├── cleanup_association_memories_hybrid.py
│ │ ├── cleanup_association_memories.py
│ │ ├── cleanup_corrupted_encoding.py
│ │ ├── cleanup_low_quality.py
│ │ ├── cleanup_memories.py
│ │ ├── cleanup_organize.py
│ │ ├── consolidate_memory_types.py
│ │ ├── consolidation_mappings.json
│ │ ├── delete_orphaned_vectors_fixed.py
│ │ ├── delete_test_memories.py
│ │ ├── fast_cleanup_duplicates_with_tracking.sh
│ │ ├── find_all_duplicates.py
│ │ ├── find_cloudflare_duplicates.py
│ │ ├── find_duplicates.py
│ │ ├── memory-types.md
│ │ ├── README.md
│ │ ├── recover_timestamps_from_cloudflare.py
│ │ ├── regenerate_embeddings.py
│ │ ├── repair_malformed_tags.py
│ │ ├── repair_memories.py
│ │ ├── repair_sqlite_vec_embeddings.py
│ │ ├── repair_zero_embeddings.py
│ │ ├── restore_from_json_export.py
│ │ ├── retag_valuable_memories.py
│ │ ├── scan_todos.sh
│ │ ├── soft_delete_test_memories.py
│ │ └── sync_status.py
│ ├── migration
│ │ ├── cleanup_mcp_timestamps.py
│ │ ├── legacy
│ │ │ └── migrate_chroma_to_sqlite.py
│ │ ├── mcp-migration.py
│ │ ├── migrate_sqlite_vec_embeddings.py
│ │ ├── migrate_storage.py
│ │ ├── migrate_tags.py
│ │ ├── migrate_timestamps.py
│ │ ├── migrate_to_cloudflare.py
│ │ ├── migrate_to_sqlite_vec.py
│ │ ├── migrate_v5_enhanced.py
│ │ ├── TIMESTAMP_CLEANUP_README.md
│ │ └── verify_mcp_timestamps.py
│ ├── pr
│ │ ├── amp_collect_results.sh
│ │ ├── amp_detect_breaking_changes.sh
│ │ ├── amp_generate_tests.sh
│ │ ├── amp_pr_review.sh
│ │ ├── amp_quality_gate.sh
│ │ ├── amp_suggest_fixes.sh
│ │ ├── auto_review.sh
│ │ ├── detect_breaking_changes.sh
│ │ ├── generate_tests.sh
│ │ ├── lib
│ │ │ └── graphql_helpers.sh
│ │ ├── pre_pr_check.sh
│ │ ├── quality_gate.sh
│ │ ├── resolve_threads.sh
│ │ ├── run_pyscn_analysis.sh
│ │ ├── run_quality_checks_on_files.sh
│ │ ├── run_quality_checks.sh
│ │ ├── thread_status.sh
│ │ └── watch_reviews.sh
│ ├── quality
│ │ ├── bulk_evaluate_onnx.py
│ │ ├── check_test_scores.py
│ │ ├── debug_deberta_scoring.py
│ │ ├── export_deberta_onnx.py
│ │ ├── fix_dead_code_install.sh
│ │ ├── migrate_to_deberta.py
│ │ ├── phase1_dead_code_analysis.md
│ │ ├── phase2_complexity_analysis.md
│ │ ├── README_PHASE1.md
│ │ ├── README_PHASE2.md
│ │ ├── rescore_deberta.py
│ │ ├── rescore_fallback.py
│ │ ├── reset_onnx_scores.py
│ │ ├── track_pyscn_metrics.sh
│ │ └── weekly_quality_review.sh
│ ├── README.md
│ ├── run
│ │ ├── memory_wrapper_cleanup.ps1
│ │ ├── memory_wrapper_cleanup.py
│ │ ├── memory_wrapper_cleanup.sh
│ │ ├── README_CLEANUP_WRAPPER.md
│ │ ├── run_mcp_memory.sh
│ │ ├── run-with-uv.sh
│ │ └── start_sqlite_vec.sh
│ ├── run_memory_server.py
│ ├── server
│ │ ├── check_http_server.py
│ │ ├── check_server_health.py
│ │ ├── memory_offline.py
│ │ ├── preload_models.py
│ │ ├── run_http_server.py
│ │ ├── run_memory_server.py
│ │ ├── start_http_server.bat
│ │ └── start_http_server.sh
│ ├── service
│ │ ├── deploy_dual_services.sh
│ │ ├── http_server_manager.sh
│ │ ├── install_http_service.sh
│ │ ├── mcp-memory-http.service
│ │ ├── mcp-memory.service
│ │ ├── memory_service_manager.sh
│ │ ├── service_control.sh
│ │ ├── service_utils.py
│ │ ├── update_service.sh
│ │ └── windows
│ │ ├── add_watchdog_trigger.ps1
│ │ ├── install_scheduled_task.ps1
│ │ ├── manage_service.ps1
│ │ ├── run_http_server_background.ps1
│ │ ├── uninstall_scheduled_task.ps1
│ │ └── update_and_restart.ps1
│ ├── setup-lightweight.sh
│ ├── sync
│ │ ├── check_drift.py
│ │ ├── claude_sync_commands.py
│ │ ├── export_memories.py
│ │ ├── import_memories.py
│ │ ├── litestream
│ │ │ ├── apply_local_changes.sh
│ │ │ ├── enhanced_memory_store.sh
│ │ │ ├── init_staging_db.sh
│ │ │ ├── io.litestream.replication.plist
│ │ │ ├── manual_sync.sh
│ │ │ ├── memory_sync.sh
│ │ │ ├── pull_remote_changes.sh
│ │ │ ├── push_to_remote.sh
│ │ │ ├── README.md
│ │ │ ├── resolve_conflicts.sh
│ │ │ ├── setup_local_litestream.sh
│ │ │ ├── setup_remote_litestream.sh
│ │ │ ├── staging_db_init.sql
│ │ │ ├── stash_local_changes.sh
│ │ │ ├── sync_from_remote_noconfig.sh
│ │ │ └── sync_from_remote.sh
│ │ ├── README.md
│ │ ├── safe_cloudflare_update.sh
│ │ ├── sync_memory_backends.py
│ │ └── sync_now.py
│ ├── testing
│ │ ├── run_complete_test.py
│ │ ├── run_memory_test.sh
│ │ ├── simple_test.py
│ │ ├── test_cleanup_logic.py
│ │ ├── test_cloudflare_backend.py
│ │ ├── test_docker_functionality.py
│ │ ├── test_installation.py
│ │ ├── test_mdns.py
│ │ ├── test_memory_api.py
│ │ ├── test_memory_simple.py
│ │ ├── test_migration.py
│ │ ├── test_search_api.py
│ │ ├── test_sqlite_vec_embeddings.py
│ │ ├── test_sse_events.py
│ │ ├── test-connection.py
│ │ └── test-hook.js
│ ├── update_and_restart.sh
│ ├── utils
│ │ ├── claude_commands_utils.py
│ │ ├── detect_platform.py
│ │ ├── generate_personalized_claude_md.sh
│ │ ├── groq
│ │ ├── groq_agent_bridge.py
│ │ ├── list-collections.py
│ │ ├── memory_wrapper_uv.py
│ │ ├── query_memories.py
│ │ ├── README_detect_platform.md
│ │ ├── smithery_wrapper.py
│ │ ├── test_groq_bridge.sh
│ │ └── uv_wrapper.py
│ └── validation
│ ├── check_dev_setup.py
│ ├── check_documentation_links.py
│ ├── check_handler_coverage.py
│ ├── diagnose_backend_config.py
│ ├── validate_configuration_complete.py
│ ├── validate_graph_tools.py
│ ├── validate_memories.py
│ ├── validate_migration.py
│ ├── validate_timestamp_integrity.py
│ ├── verify_environment.py
│ ├── verify_pytorch_windows.py
│ └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│ └── mcp_memory_service
│ ├── __init__.py
│ ├── _version.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── operations.py
│ │ ├── sync_wrapper.py
│ │ └── types.py
│ ├── backup
│ │ ├── __init__.py
│ │ └── scheduler.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── ingestion.py
│ │ ├── main.py
│ │ └── utils.py
│ ├── config.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── associations.py
│ │ ├── base.py
│ │ ├── clustering.py
│ │ ├── compression.py
│ │ ├── consolidator.py
│ │ ├── decay.py
│ │ ├── forgetting.py
│ │ ├── health.py
│ │ └── scheduler.py
│ ├── dependency_check.py
│ ├── discovery
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── mdns_service.py
│ ├── embeddings
│ │ ├── __init__.py
│ │ └── onnx_embeddings.py
│ ├── ingestion
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── chunker.py
│ │ ├── csv_loader.py
│ │ ├── json_loader.py
│ │ ├── pdf_loader.py
│ │ ├── registry.py
│ │ ├── semtools_loader.py
│ │ └── text_loader.py
│ ├── lm_studio_compat.py
│ ├── mcp_server.py
│ ├── models
│ │ ├── __init__.py
│ │ └── memory.py
│ ├── quality
│ │ ├── __init__.py
│ │ ├── ai_evaluator.py
│ │ ├── async_scorer.py
│ │ ├── config.py
│ │ ├── implicit_signals.py
│ │ ├── metadata_codec.py
│ │ ├── onnx_ranker.py
│ │ └── scorer.py
│ ├── server
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── cache_manager.py
│ │ ├── client_detection.py
│ │ ├── environment.py
│ │ ├── handlers
│ │ │ ├── __init__.py
│ │ │ ├── consolidation.py
│ │ │ ├── documents.py
│ │ │ ├── graph.py
│ │ │ ├── memory.py
│ │ │ ├── quality.py
│ │ │ └── utility.py
│ │ └── logging_config.py
│ ├── server_impl.py
│ ├── services
│ │ ├── __init__.py
│ │ └── memory_service.py
│ ├── storage
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cloudflare.py
│ │ ├── factory.py
│ │ ├── graph.py
│ │ ├── http_client.py
│ │ ├── hybrid.py
│ │ ├── migrations
│ │ │ └── 008_add_graph_table.sql
│ │ └── sqlite_vec.py
│ ├── sync
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── importer.py
│ │ └── litestream_config.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cache_manager.py
│ │ ├── content_splitter.py
│ │ ├── db_utils.py
│ │ ├── debug.py
│ │ ├── directory_ingestion.py
│ │ ├── document_processing.py
│ │ ├── gpu_detection.py
│ │ ├── hashing.py
│ │ ├── health_check.py
│ │ ├── http_server_manager.py
│ │ ├── port_detection.py
│ │ ├── quality_analytics.py
│ │ ├── startup_orchestrator.py
│ │ ├── system_detection.py
│ │ └── time_parser.py
│ └── web
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── analytics.py
│ │ ├── backup.py
│ │ ├── consolidation.py
│ │ ├── documents.py
│ │ ├── events.py
│ │ ├── health.py
│ │ ├── manage.py
│ │ ├── mcp.py
│ │ ├── memories.py
│ │ ├── quality.py
│ │ ├── search.py
│ │ └── sync.py
│ ├── app.py
│ ├── dependencies.py
│ ├── oauth
│ │ ├── __init__.py
│ │ ├── authorization.py
│ │ ├── discovery.py
│ │ ├── middleware.py
│ │ ├── models.py
│ │ ├── registration.py
│ │ └── storage.py
│ ├── sse.py
│ └── static
│ ├── app.js
│ ├── i18n
│ │ ├── de.json
│ │ ├── en.json
│ │ ├── es.json
│ │ ├── fr.json
│ │ ├── ja.json
│ │ ├── ko.json
│ │ └── zh.json
│ ├── index.html
│ ├── README.md
│ ├── sse_test.html
│ └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── TESTING_NOTES.md
├── tests
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── test_compact_types.py
│ │ └── test_operations.py
│ ├── bridge
│ │ ├── mock_responses.js
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ └── test_http_mcp_bridge.js
│ ├── conftest.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_associations.py
│ │ ├── test_clustering.py
│ │ ├── test_compression.py
│ │ ├── test_consolidator.py
│ │ ├── test_decay.py
│ │ ├── test_forgetting.py
│ │ └── test_graph_modes.py
│ ├── contracts
│ │ └── api-specification.yml
│ ├── integration
│ │ ├── conftest.py
│ │ ├── HANDLER_COVERAGE_REPORT.md
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ ├── test_all_memory_handlers.py
│ │ ├── test_api_key_fallback.py
│ │ ├── test_api_memories_chronological.py
│ │ ├── test_api_tag_time_search.py
│ │ ├── test_api_with_memory_service.py
│ │ ├── test_bridge_integration.js
│ │ ├── test_cli_interfaces.py
│ │ ├── test_cloudflare_connection.py
│ │ ├── test_concurrent_clients.py
│ │ ├── test_data_serialization_consistency.py
│ │ ├── test_http_server_startup.py
│ │ ├── test_mcp_memory.py
│ │ ├── test_mdns_integration.py
│ │ ├── test_oauth_basic_auth.py
│ │ ├── test_oauth_flow.py
│ │ ├── test_server_handlers.py
│ │ └── test_store_memory.py
│ ├── performance
│ │ ├── test_background_sync.py
│ │ └── test_hybrid_live.py
│ ├── README.md
│ ├── smithery
│ │ └── test_smithery.py
│ ├── sqlite
│ │ └── simple_sqlite_vec_test.py
│ ├── storage
│ │ ├── conftest.py
│ │ └── test_graph_storage.py
│ ├── test_client.py
│ ├── test_content_splitting.py
│ ├── test_database.py
│ ├── test_deberta_quality.py
│ ├── test_fallback_quality.py
│ ├── test_graph_traversal.py
│ ├── test_hybrid_cloudflare_limits.py
│ ├── test_hybrid_storage.py
│ ├── test_lightweight_onnx.py
│ ├── test_memory_ops.py
│ ├── test_memory_wrapper_cleanup.py
│ ├── test_quality_integration.py
│ ├── test_quality_system.py
│ ├── test_semantic_search.py
│ ├── test_sqlite_vec_storage.py
│ ├── test_time_parser.py
│ ├── test_timestamp_preservation.py
│ ├── timestamp
│ │ ├── test_hook_vs_manual_storage.py
│ │ ├── test_issue99_final_validation.py
│ │ ├── test_search_retrieval_inconsistency.py
│ │ ├── test_timestamp_issue.py
│ │ └── test_timestamp_simple.py
│ └── unit
│ ├── conftest.py
│ ├── test_cloudflare_storage.py
│ ├── test_csv_loader.py
│ ├── test_fastapi_dependencies.py
│ ├── test_import.py
│ ├── test_imports.py
│ ├── test_json_loader.py
│ ├── test_mdns_simple.py
│ ├── test_mdns.py
│ ├── test_memory_service.py
│ ├── test_memory.py
│ ├── test_semtools_loader.py
│ ├── test_storage_interface_compatibility.py
│ ├── test_tag_time_filtering.py
│ └── test_uv_no_pip_installer_fallback.py
├── tools
│ ├── docker
│ │ ├── DEPRECATED.md
│ │ ├── docker-compose.http.yml
│ │ ├── docker-compose.pythonpath.yml
│ │ ├── docker-compose.standalone.yml
│ │ ├── docker-compose.uv.yml
│ │ ├── docker-compose.yml
│ │ ├── docker-entrypoint-persistent.sh
│ │ ├── docker-entrypoint-unified.sh
│ │ ├── docker-entrypoint.sh
│ │ ├── Dockerfile
│ │ ├── Dockerfile.glama
│ │ ├── Dockerfile.slim
│ │ ├── README.md
│ │ └── test-docker-modes.sh
│ └── README.md
├── uv.lock
└── verify_compression.sh
```
# Files
--------------------------------------------------------------------------------
/src/mcp_memory_service/web/api/documents.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | Document Upload API Endpoints
17 |
18 | Provides REST API endpoints for document ingestion through the web dashboard.
19 | Supports single file upload, batch upload, progress tracking, and upload history.
20 | """
21 |
22 | import os
23 | import re
24 | import uuid
25 | import asyncio
26 | import logging
27 | import tempfile
28 | from typing import List, Dict, Any, Optional
29 | from datetime import datetime
30 | from pathlib import Path
31 | from urllib.parse import urlparse, unquote
32 |
33 | from fastapi import APIRouter, UploadFile, File, Form, HTTPException, BackgroundTasks
34 | from fastapi.responses import JSONResponse
35 | from pydantic import BaseModel
36 |
37 | from ...ingestion import get_loader_for_file, SUPPORTED_FORMATS
38 | from ...models.memory import Memory
39 | from ...utils import create_memory_from_chunk, _process_and_store_chunk, generate_content_hash
40 | from ..dependencies import get_storage
41 |
42 | logger = logging.getLogger(__name__)
43 |
44 | router = APIRouter()
45 |
46 | # Constants
47 | MAX_TAG_LENGTH = 100
48 |
49 |
50 | def parse_and_validate_tags(tags: str) -> List[str]:
51 | """
52 | Parse and validate tags from user input.
53 |
54 | Handles comma-separated and space-separated tags, removes file:// prefixes,
55 | sanitizes path separators, and validates tag lengths.
56 |
57 | Args:
58 | tags: Raw tag string (comma or space separated)
59 |
60 | Returns:
61 | List of cleaned and validated tags
62 |
63 | Raises:
64 | HTTPException: If any tag exceeds MAX_TAG_LENGTH
65 | """
66 | if not tags or not tags.strip():
67 | return []
68 |
69 | # Split by comma OR space
70 | raw_tags = tags.replace(',', ' ').split()
71 | tag_list = []
72 |
73 | for tag in raw_tags:
74 | clean_tag = tag.strip()
75 | if not clean_tag:
76 | continue
77 |
78 | # Remove file:// protocol prefixes and extract filename
79 | # Uses urllib.parse for robust handling of URL-encoded chars and different path formats
80 | if clean_tag.startswith('file://'):
81 | path_str = unquote(urlparse(clean_tag).path)
82 | # On Windows, urlparse may add a leading slash (e.g., /C:/...), which needs to be removed
83 | if os.name == 'nt' and path_str.startswith('/') and len(path_str) > 2 and path_str[2] == ':':
84 | path_str = path_str[1:]
85 | clean_tag = Path(path_str).name
86 |
87 | # Remove common path separators to create clean tag names
88 | clean_tag = re.sub(r'[/\\]', '_', clean_tag)
89 |
90 | # Validate tag length - raise error instead of silently dropping
91 | if len(clean_tag) > MAX_TAG_LENGTH:
92 | raise HTTPException(
93 | status_code=400,
94 | detail=f"Tag '{clean_tag[:100]}...' exceeds maximum length of {MAX_TAG_LENGTH} characters. "
95 | f"Please use shorter, more descriptive tags."
96 | )
97 |
98 | tag_list.append(clean_tag)
99 |
100 | return tag_list
101 |
102 |
103 | async def ensure_storage_initialized():
104 | """Ensure storage is initialized for web API usage."""
105 | logger.info("🔍 Checking storage availability...")
106 | try:
107 | # Try to get storage
108 | storage = get_storage()
109 | logger.info("✅ Storage already available")
110 | return storage
111 | except Exception as e:
112 | logger.warning(f"⚠️ Storage not available ({e}), attempting to initialize...")
113 | try:
114 | # Import and initialize storage
115 | from ..dependencies import create_storage_backend, set_storage
116 | logger.info("🏗️ Creating storage backend...")
117 | storage = await create_storage_backend()
118 | set_storage(storage)
119 | logger.info("✅ Storage initialized successfully in API context")
120 | return storage
121 | except Exception as init_error:
122 | logger.error(f"❌ Failed to initialize storage: {init_error}")
123 | logger.error(f"Full error: {str(init_error)}")
124 | import traceback
125 | logger.error(f"Traceback: {traceback.format_exc()}")
126 | # Don't raise HTTPException here since this is called from background tasks
127 | raise init_error
128 |
129 | # In-memory storage for upload tracking (in production, use database)
130 | upload_sessions = {}
131 |
132 | # Note: UploadRequest and BatchUploadRequest models removed - not used
133 | # Endpoints read parameters directly from form data
134 |
135 | class UploadStatus(BaseModel):
136 | upload_id: str
137 | status: str # queued, processing, completed, failed
138 | filename: str = ""
139 | file_size: int = 0
140 | chunks_processed: int = 0
141 | chunks_stored: int = 0
142 | total_chunks: int = 0
143 | progress: float = 0.0
144 | errors: List[str] = []
145 | created_at: datetime
146 | completed_at: Optional[datetime] = None
147 |
148 | @router.post("/upload", response_model=Dict[str, Any])
149 | async def upload_document(
150 | background_tasks: BackgroundTasks,
151 | file: UploadFile = File(...),
152 | tags: str = Form(""),
153 | chunk_size: int = Form(1000),
154 | chunk_overlap: int = Form(200),
155 | memory_type: str = Form("document")
156 | ):
157 | """
158 | Upload and ingest a single document.
159 |
160 | Args:
161 | file: The document file to upload
162 | tags: Comma-separated list of tags
163 | chunk_size: Target chunk size in characters
164 | chunk_overlap: Chunk overlap in characters
165 | memory_type: Type label for memories
166 |
167 | Returns:
168 | Upload session information with ID for tracking
169 |
170 | Uses FastAPI BackgroundTasks for proper async processing.
171 | """
172 | logger.info(f"🚀 Document upload endpoint called with file: {file.filename}")
173 | try:
174 | # Read file content
175 | file_content = await file.read()
176 | file_size = len(file_content)
177 | logger.info(f"File content length: {file_size} bytes")
178 |
179 | # Validate file type
180 | file_ext = Path(file.filename).suffix.lower().lstrip('.')
181 | if file_ext not in SUPPORTED_FORMATS:
182 | supported = ", ".join(f".{ext}" for ext in SUPPORTED_FORMATS.keys())
183 | raise HTTPException(
184 | status_code=400,
185 | detail=f"Unsupported file type: .{file_ext}. Supported: {supported}"
186 | )
187 |
188 | # Parse and validate tags
189 | tag_list = parse_and_validate_tags(tags)
190 |
191 | # Create upload session
192 | upload_id = str(uuid.uuid4())
193 |
194 | # Create secure temporary file (avoids path traversal vulnerability)
195 | # Extract safe file extension for suffix
196 | file_ext = Path(file.filename).suffix if file.filename else ""
197 | temp_file = tempfile.NamedTemporaryFile(
198 | delete=False,
199 | prefix=f"{upload_id}_",
200 | suffix=file_ext
201 | )
202 | temp_path = temp_file.name
203 |
204 | # Save uploaded file temporarily
205 | with temp_file:
206 | temp_file.write(file_content)
207 |
208 | # Initialize upload session
209 | session = UploadStatus(
210 | upload_id=upload_id,
211 | status="queued",
212 | filename=file.filename,
213 | file_size=file_size,
214 | created_at=datetime.now()
215 | )
216 | upload_sessions[upload_id] = session
217 |
218 | # Start background processing
219 | background_tasks.add_task(
220 | process_single_file_upload,
221 | upload_id,
222 | temp_path,
223 | file.filename,
224 | tag_list,
225 | chunk_size,
226 | chunk_overlap,
227 | memory_type
228 | )
229 |
230 | return {
231 | "upload_id": upload_id,
232 | "status": "queued",
233 | "message": f"Document {file.filename} queued for processing"
234 | }
235 |
236 | except Exception as e:
237 | logger.error(f"Upload error: {str(e)}")
238 | raise HTTPException(status_code=500, detail=str(e))
239 |
240 | @router.post("/batch-upload", response_model=Dict[str, Any])
241 | async def batch_upload_documents(
242 | background_tasks: BackgroundTasks,
243 | files: List[UploadFile] = File(...),
244 | tags: str = Form(""),
245 | chunk_size: int = Form(1000),
246 | chunk_overlap: int = Form(200),
247 | memory_type: str = Form("document")
248 | ):
249 | """
250 | Upload and ingest multiple documents in batch.
251 |
252 | Args:
253 | files: List of document files to upload
254 | tags: Comma-separated list of tags
255 | chunk_size: Target chunk size in characters
256 | chunk_overlap: Chunk overlap in characters
257 | memory_type: Type label for memories
258 |
259 | Returns:
260 | Batch upload session information
261 | """
262 | try:
263 | if not files:
264 | raise HTTPException(status_code=400, detail="No files provided")
265 |
266 | # Parse and validate tags
267 | tag_list = parse_and_validate_tags(tags)
268 |
269 | # Create batch upload session
270 | batch_id = str(uuid.uuid4())
271 | temp_paths = []
272 |
273 | # Validate and save all files
274 | for file in files:
275 | file_ext = Path(file.filename).suffix.lower().lstrip('.')
276 | if file_ext not in SUPPORTED_FORMATS:
277 | supported = ", ".join(f".{ext}" for ext in SUPPORTED_FORMATS.keys())
278 | raise HTTPException(
279 | status_code=400,
280 | detail=f"Unsupported file type for {file.filename}: {file_ext}. Supported: {supported}"
281 | )
282 |
283 | # Create secure temporary file (avoids path traversal vulnerability)
284 | content = await file.read()
285 | safe_ext = Path(file.filename).suffix.lower() if file.filename else ""
286 | temp_file = tempfile.NamedTemporaryFile(
287 | delete=False,
288 | prefix=f"{batch_id}_",
289 | suffix=safe_ext
290 | )
291 | temp_path = temp_file.name
292 | with temp_file:
293 | temp_file.write(content)
294 | temp_paths.append((file.filename, temp_path, len(content)))
295 |
296 | # Calculate total file size for the batch
297 | total_file_size = sum(file_size for _, _, file_size in temp_paths)
298 |
299 | # Initialize batch session
300 | session = UploadStatus(
301 | upload_id=batch_id,
302 | status="queued",
303 | filename=f"Batch ({len(files)} files)",
304 | file_size=total_file_size,
305 | created_at=datetime.now()
306 | )
307 | upload_sessions[batch_id] = session
308 |
309 | # Start background processing
310 | background_tasks.add_task(
311 | process_batch_upload,
312 | batch_id,
313 | temp_paths,
314 | tag_list,
315 | chunk_size,
316 | chunk_overlap,
317 | memory_type
318 | )
319 |
320 | return {
321 | "upload_id": batch_id,
322 | "status": "queued",
323 | "message": f"Batch of {len(files)} documents queued for processing"
324 | }
325 |
326 | except Exception as e:
327 | logger.error(f"Batch upload error: {str(e)}")
328 | raise HTTPException(status_code=500, detail=str(e))
329 |
330 | @router.get("/status/{upload_id}", response_model=UploadStatus)
331 | async def get_upload_status(upload_id: str):
332 | """
333 | Get the status of an upload session.
334 |
335 | Args:
336 | upload_id: The upload session ID
337 |
338 | Returns:
339 | Current upload status
340 | """
341 | if upload_id not in upload_sessions:
342 | raise HTTPException(status_code=404, detail="Upload session not found")
343 |
344 | return upload_sessions[upload_id]
345 |
346 | @router.get("/history", response_model=Dict[str, List[Dict[str, Any]]])
347 | async def get_upload_history():
348 | """
349 | Get the history of all uploads.
350 |
351 | Returns:
352 | List of completed uploads with metadata
353 | """
354 | logger.info("Documents history endpoint called")
355 | try:
356 | # For now, return empty history since storage might not be initialized
357 | # In production, this would query a database
358 | history = []
359 | for session in upload_sessions.values():
360 | if session.status in ["completed", "failed"]:
361 | history.append({
362 | "upload_id": session.upload_id,
363 | "filename": session.filename,
364 | "file_size": session.file_size,
365 | "status": session.status,
366 | "chunks_processed": session.chunks_processed,
367 | "chunks_stored": session.chunks_stored,
368 | "progress": session.progress,
369 | "errors": session.errors,
370 | "created_at": session.created_at.isoformat(),
371 | "completed_at": session.completed_at.isoformat() if session.completed_at else None
372 | })
373 |
374 | # Sort by creation time, most recent first
375 | history.sort(key=lambda x: x["created_at"], reverse=True)
376 |
377 | return {"uploads": history}
378 | except Exception as e:
379 | logger.error(f"Error in get_upload_history: {e}")
380 | # Return empty history on error so the UI doesn't break
381 | return {"uploads": []}
382 |
383 | async def process_single_file_upload(
384 | upload_id: str,
385 | file_path: str,
386 | filename: str,
387 | tags: List[str],
388 | chunk_size: int,
389 | chunk_overlap: int,
390 | memory_type: str
391 | ):
392 | """Background task to process a single document upload."""
393 | try:
394 | logger.info(f"Starting document processing: {upload_id} - {filename}")
395 | session = upload_sessions[upload_id]
396 | session.status = "processing"
397 |
398 | # Get storage
399 | storage = await ensure_storage_initialized()
400 |
401 | # Get appropriate loader
402 | file_path_obj = Path(file_path)
403 | loader = get_loader_for_file(file_path_obj)
404 | if loader is None:
405 | raise ValueError(f"No loader available for file: {filename}")
406 |
407 | # Configure loader
408 | loader.chunk_size = chunk_size
409 | loader.chunk_overlap = chunk_overlap
410 |
411 | chunks_processed = 0
412 | chunks_stored = 0
413 |
414 | # Process chunks from the file
415 | async for chunk in loader.extract_chunks(file_path_obj):
416 | chunks_processed += 1
417 |
418 | try:
419 | # Add file-specific tags
420 | all_tags = tags.copy()
421 | all_tags.append(f"source_file:{filename}")
422 | all_tags.append(f"file_type:{file_path_obj.suffix.lstrip('.')}")
423 | all_tags.append(f"upload_id:{upload_id}")
424 |
425 | if chunk.metadata.get('tags'):
426 | # Handle tags from chunk metadata (can be string or list)
427 | chunk_tags = chunk.metadata['tags']
428 | if isinstance(chunk_tags, str):
429 | # Split comma-separated string into list
430 | chunk_tags = [tag.strip() for tag in chunk_tags.split(',') if tag.strip()]
431 | all_tags.extend(chunk_tags)
432 |
433 | # Add upload_id to metadata
434 | chunk_metadata = chunk.metadata.copy() if chunk.metadata else {}
435 | chunk_metadata['upload_id'] = upload_id
436 | chunk_metadata['source_file'] = filename
437 |
438 | # Create memory object
439 | memory = Memory(
440 | content=chunk.content,
441 | content_hash=generate_content_hash(chunk.content, chunk_metadata),
442 | tags=list(set(all_tags)), # Remove duplicates
443 | memory_type=memory_type,
444 | metadata=chunk_metadata
445 | )
446 |
447 | # Store the memory
448 | success, error = await storage.store(memory)
449 | if success:
450 | chunks_stored += 1
451 | else:
452 | session.errors.append(f"Chunk {chunk.chunk_index}: {error}")
453 |
454 | except Exception as e:
455 | session.errors.append(f"Chunk {chunk.chunk_index}: {str(e)}")
456 |
457 | # Update progress
458 | session.chunks_processed = chunks_processed
459 | session.chunks_stored = chunks_stored
460 | session.progress = (chunks_processed / max(chunks_processed, 1)) * 100
461 |
462 | # Mark as completed
463 | session.status = "completed"
464 | session.completed_at = datetime.now()
465 | session.progress = 100.0
466 |
467 | logger.info(f"Document processing completed: {upload_id}, {chunks_stored}/{chunks_processed} chunks")
468 | return {"chunks_processed": chunks_processed, "chunks_stored": chunks_stored}
469 |
470 | except Exception as e:
471 | logger.error(f"Document processing error: {str(e)}")
472 | session = upload_sessions.get(upload_id)
473 | if session:
474 | session.status = "failed"
475 | session.errors.append(str(e))
476 | session.completed_at = datetime.now()
477 | finally:
478 | # Clean up temp file (always executed)
479 | try:
480 | os.unlink(file_path)
481 | except Exception as cleanup_error:
482 | logger.debug(f"Could not delete temp file {file_path}: {cleanup_error}")
483 |
484 |
485 | async def process_batch_upload(
486 | batch_id: str,
487 | file_info: List[tuple], # (filename, temp_path, size)
488 | tags: List[str],
489 | chunk_size: int,
490 | chunk_overlap: int,
491 | memory_type: str
492 | ):
493 | """Background task to process a batch document upload."""
494 | try:
495 | logger.info(f"Starting batch processing: {batch_id}")
496 | session = upload_sessions[batch_id]
497 | session.status = "processing"
498 |
499 | # Get storage
500 | storage = await ensure_storage_initialized()
501 |
502 | total_files = len(file_info)
503 | processed_files = 0
504 | total_chunks_processed = 0
505 | total_chunks_stored = 0
506 | all_errors = []
507 |
508 | for filename, temp_path, file_size in file_info:
509 | try:
510 | # Get appropriate loader
511 | file_path_obj = Path(temp_path)
512 | loader = get_loader_for_file(file_path_obj)
513 | if loader is None:
514 | all_errors.append(f"{filename}: No loader available")
515 | processed_files += 1
516 | continue
517 |
518 | # Configure loader
519 | loader.chunk_size = chunk_size
520 | loader.chunk_overlap = chunk_overlap
521 |
522 | file_chunks_processed = 0
523 | file_chunks_stored = 0
524 |
525 | # Process chunks from this file
526 | async for chunk in loader.extract_chunks(file_path_obj):
527 | file_chunks_processed += 1
528 | total_chunks_processed += 1
529 |
530 | # Process and store the chunk
531 | success, error = await _process_and_store_chunk(
532 | chunk,
533 | storage,
534 | filename,
535 | base_tags=tags.copy(),
536 | memory_type=memory_type,
537 | context_tags={
538 | "source_file": filename,
539 | "file_type": file_path_obj.suffix.lstrip('.'),
540 | "upload_id": batch_id
541 | },
542 | extra_metadata={
543 | "upload_id": batch_id,
544 | "source_file": filename
545 | }
546 | )
547 |
548 | if success:
549 | file_chunks_stored += 1
550 | total_chunks_stored += 1
551 | else:
552 | all_errors.append(error)
553 |
554 | processed_files += 1
555 |
556 | except Exception as e:
557 | all_errors.append(f"{filename}: {str(e)}")
558 | processed_files += 1
559 |
560 | finally:
561 | # Clean up temp file (always executed)
562 | try:
563 | os.unlink(temp_path)
564 | except Exception as cleanup_error:
565 | logger.debug(f"Could not delete temp file {temp_path}: {cleanup_error}")
566 |
567 | # Finalize batch
568 | session.status = "completed" if total_chunks_stored > 0 else "failed"
569 | session.completed_at = datetime.now()
570 | session.chunks_processed = total_chunks_processed
571 | session.chunks_stored = total_chunks_stored
572 | session.progress = 100.0
573 | session.errors = all_errors
574 |
575 | logger.info(f"Batch processing completed: {batch_id}, {total_chunks_stored}/{total_chunks_processed} chunks")
576 |
577 | except Exception as e:
578 | logger.error(f"Batch processing error: {str(e)}")
579 | session = upload_sessions.get(batch_id)
580 | if session:
581 | session.status = "failed"
582 | session.errors.append(str(e))
583 | session.completed_at = datetime.now()
584 | # Note: send_progress_update removed - progress tracking via polling instead
585 |
586 | # Clean up old completed sessions periodically
587 | @router.on_event("startup") # TODO: Migrate to lifespan context manager in app.py (FastAPI 0.109+)
588 | async def cleanup_old_sessions():
589 | """Clean up old completed upload sessions."""
590 | async def cleanup():
591 | while True:
592 | await asyncio.sleep(3600) # Clean up every hour
593 | current_time = datetime.now()
594 | to_remove = []
595 |
596 | for upload_id, session in upload_sessions.items():
597 | if session.status in ["completed", "failed"]:
598 | # Keep sessions for 24 hours after completion
599 | if session.completed_at and (current_time - session.completed_at).total_seconds() > 86400:
600 | to_remove.append(upload_id)
601 |
602 | for upload_id in to_remove:
603 | del upload_sessions[upload_id]
604 | logger.debug(f"Cleaned up old upload session: {upload_id}")
605 |
606 | asyncio.create_task(cleanup())
607 |
608 | @router.delete("/remove/{upload_id}")
609 | async def remove_document(upload_id: str, remove_from_memory: bool = True):
610 | """
611 | Remove a document and optionally its memories.
612 |
613 | Args:
614 | upload_id: The upload session ID
615 | remove_from_memory: Whether to delete associated memories (default: True)
616 |
617 | Returns:
618 | Removal status with count of memories deleted
619 | """
620 | logger.info(f"Remove document request for upload_id: {upload_id}, remove_from_memory: {remove_from_memory}")
621 |
622 | # Get session info if available (may not exist after server restart)
623 | session = upload_sessions.get(upload_id)
624 | filename = session.filename if session else "Unknown file"
625 | memories_deleted = 0
626 |
627 | try:
628 | if remove_from_memory:
629 | # Get storage
630 | storage = get_storage()
631 |
632 | # Search by tag pattern: upload_id:{upload_id}
633 | upload_tag = f"upload_id:{upload_id}"
634 | logger.info(f"Searching for memories with tag: {upload_tag}")
635 |
636 | try:
637 | # Delete all memories with this upload_id tag
638 | count, _ = await storage.delete_by_tags([upload_tag])
639 | memories_deleted = count
640 | logger.info(f"Deleted {memories_deleted} memories with tag {upload_tag}")
641 |
642 | # If we deleted memories but don't have session info, try to get filename from first memory
643 | if memories_deleted > 0 and not session:
644 | # Try to get source_file from metadata by checking remaining memories
645 | # (we already deleted them, so we'll use a generic message)
646 | filename = f"Document (upload_id: {upload_id[:8]}...)"
647 |
648 | except Exception as e:
649 | logger.warning(f"Could not delete memories by tag: {e}")
650 | # If deletion fails and we don't know about this upload, return 404
651 | if not session:
652 | raise HTTPException(
653 | status_code=404,
654 | detail=f"Upload ID not found and no memories with tag '{upload_tag}'"
655 | )
656 | memories_deleted = 0
657 |
658 | # Remove upload session if it exists
659 | if session:
660 | del upload_sessions[upload_id]
661 |
662 | return {
663 | "status": "success",
664 | "upload_id": upload_id,
665 | "filename": filename,
666 | "memories_deleted": memories_deleted,
667 | "message": f"Document '{filename}' removed successfully"
668 | }
669 |
670 | except HTTPException:
671 | raise
672 | except Exception as e:
673 | logger.error(f"Error removing document: {str(e)}")
674 | import traceback
675 | logger.error(f"Traceback: {traceback.format_exc()}")
676 | raise HTTPException(status_code=500, detail=f"Failed to remove document: {str(e)}")
677 |
678 | @router.delete("/remove-by-tags")
679 | async def remove_documents_by_tags(tags: List[str]):
680 | """
681 | Remove documents by their tags.
682 |
683 | Args:
684 | tags: List of tags to search for
685 |
686 | Returns:
687 | Removal status with affected upload IDs and memory counts
688 | """
689 | logger.info(f"Remove documents by tags request: {tags}")
690 |
691 | try:
692 | # Get storage
693 | storage = get_storage()
694 |
695 | # Delete memories by tags
696 | result = await storage.delete_by_tags(tags)
697 | memories_deleted = result.get('deleted_count', 0) if isinstance(result, dict) else 0
698 |
699 | # Find and remove affected upload sessions
700 | affected_sessions = []
701 | to_remove = []
702 |
703 | for upload_id, session in upload_sessions.items():
704 | # Check if any of the document's tags match
705 | # This requires storing tags in the session object
706 | # For now, just track all sessions (placeholder)
707 | pass
708 |
709 | return {
710 | "status": "success",
711 | "tags": tags,
712 | "memories_deleted": memories_deleted,
713 | "affected_uploads": affected_sessions,
714 | "message": f"Deleted {memories_deleted} memories matching tags"
715 | }
716 |
717 | except Exception as e:
718 | logger.error(f"Error removing documents by tags: {str(e)}")
719 | raise HTTPException(status_code=500, detail=f"Failed to remove documents: {str(e)}")
720 |
721 | @router.get("/search-content/{upload_id}")
722 | async def search_document_content(upload_id: str, limit: int = 1000):
723 | """
724 | Search for all memories associated with an upload.
725 |
726 | Args:
727 | upload_id: The upload session ID
728 | limit: Maximum number of results to return (default: 1000)
729 |
730 | Returns:
731 | List of memories with their content and metadata
732 | """
733 | logger.info(f"Search document content for upload_id: {upload_id}, limit: {limit}")
734 |
735 | # Get session info if available (may not exist after server restart)
736 | session = upload_sessions.get(upload_id)
737 |
738 | # If no session, we'll still try to find memories by upload_id tag
739 | if not session:
740 | logger.info(f"No upload session found for {upload_id}, searching by tag only")
741 |
742 | try:
743 | # Get storage
744 | storage = get_storage()
745 |
746 | # Search for memories with upload_id tag
747 | upload_tag = f"upload_id:{upload_id}"
748 | logger.info(f"Searching for memories with tag: {upload_tag}")
749 |
750 | # Use tag search (search_by_tags doesn't support limit parameter)
751 | all_memories = await storage.search_by_tags([upload_tag])
752 |
753 | # If no memories found and no session, this upload_id doesn't exist
754 | if not all_memories and not session:
755 | raise HTTPException(status_code=404, detail=f"No memories found for upload_id: {upload_id}")
756 |
757 | # Apply limit after retrieval
758 | memories = all_memories[:limit] if limit and limit > 0 else all_memories
759 |
760 | # Format results
761 | results = []
762 | for memory in memories:
763 | # Handle created_at (stored as float timestamp)
764 | created_at_str = None
765 | if memory.created_at:
766 | if isinstance(memory.created_at, float):
767 | created_at_str = datetime.fromtimestamp(memory.created_at).isoformat()
768 | elif hasattr(memory.created_at, 'isoformat'):
769 | created_at_str = memory.created_at.isoformat()
770 |
771 | results.append({
772 | "content_hash": memory.content_hash,
773 | "content": memory.content,
774 | "tags": memory.tags,
775 | "metadata": memory.metadata,
776 | "created_at": created_at_str,
777 | "chunk_index": memory.metadata.get('chunk_index', 0) if memory.metadata else 0,
778 | "page": memory.metadata.get('page', None) if memory.metadata else None
779 | })
780 |
781 | # Sort by chunk index
782 | results.sort(key=lambda x: x.get('chunk_index', 0))
783 |
784 | # Get filename from session or from first memory's metadata
785 | filename = session.filename if session else None
786 | if not filename and results:
787 | # Try to get from first memory's metadata
788 | first_memory_metadata = results[0].get('metadata', {})
789 | filename = first_memory_metadata.get('source_file', f"Document (upload_id: {upload_id[:8]}...)")
790 |
791 | return {
792 | "status": "success",
793 | "upload_id": upload_id,
794 | "filename": filename or "Unknown Document",
795 | "total_found": len(results),
796 | "memories": results
797 | }
798 |
799 | except Exception as e:
800 | logger.error(f"Error searching document content: {str(e)}")
801 | # Get filename from session if available
802 | filename = session.filename if session else f"Document (upload_id: {upload_id[:8]}...)"
803 | # Return empty results instead of error to avoid breaking UI
804 | return {
805 | "status": "partial",
806 | "upload_id": upload_id,
807 | "filename": filename,
808 | "total_found": 0,
809 | "memories": [],
810 | "error": str(e)
811 | }
812 |
```
--------------------------------------------------------------------------------
/tests/unit/test_mdns.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 | Unit tests for mDNS service discovery functionality.
17 | """
18 |
19 | import pytest
20 | import asyncio
21 | import socket
22 | from unittest.mock import Mock, AsyncMock, patch, MagicMock
23 | from zeroconf import ServiceInfo, Zeroconf
24 |
25 | # Import the modules under test
26 | from mcp_memory_service.discovery.mdns_service import (
27 | ServiceAdvertiser,
28 | ServiceDiscovery,
29 | DiscoveryListener,
30 | ServiceDetails
31 | )
32 | from mcp_memory_service.discovery.client import DiscoveryClient, HealthStatus
33 |
34 |
35 | class TestServiceDetails:
36 | """Test ServiceDetails dataclass."""
37 |
38 | def test_service_details_creation(self):
39 | """Test ServiceDetails creation with basic parameters."""
40 | service_info = Mock()
41 | details = ServiceDetails(
42 | name="Test Service",
43 | host="192.168.1.100",
44 | port=8000,
45 | https=False,
46 | api_version="2.1.0",
47 | requires_auth=True,
48 | service_info=service_info
49 | )
50 |
51 | assert details.name == "Test Service"
52 | assert details.host == "192.168.1.100"
53 | assert details.port == 8000
54 | assert details.https is False
55 | assert details.api_version == "2.1.0"
56 | assert details.requires_auth is True
57 | assert details.service_info == service_info
58 |
59 | def test_service_details_url_http(self):
60 | """Test URL generation for HTTP service."""
61 | details = ServiceDetails(
62 | name="Test Service",
63 | host="192.168.1.100",
64 | port=8000,
65 | https=False,
66 | api_version="2.1.0",
67 | requires_auth=False,
68 | service_info=Mock()
69 | )
70 |
71 | assert details.url == "http://192.168.1.100:8000"
72 | assert details.api_url == "http://192.168.1.100:8000/api"
73 |
74 | def test_service_details_url_https(self):
75 | """Test URL generation for HTTPS service."""
76 | details = ServiceDetails(
77 | name="Test Service",
78 | host="192.168.1.100",
79 | port=8443,
80 | https=True,
81 | api_version="2.1.0",
82 | requires_auth=True,
83 | service_info=Mock()
84 | )
85 |
86 | assert details.url == "https://192.168.1.100:8443"
87 | assert details.api_url == "https://192.168.1.100:8443/api"
88 |
89 |
90 | class TestServiceAdvertiser:
91 | """Test ServiceAdvertiser class."""
92 |
93 | def test_init_default_parameters(self):
94 | """Test ServiceAdvertiser initialization with default parameters."""
95 | advertiser = ServiceAdvertiser()
96 |
97 | assert advertiser.service_name == "MCP Memory Service"
98 | assert advertiser.service_type == "_mcp-memory._tcp.local."
99 | assert advertiser.host == "0.0.0.0"
100 | assert advertiser.port == 8000
101 | assert advertiser.https_enabled is False
102 | assert advertiser.api_key_required is False
103 | assert advertiser._registered is False
104 |
105 | def test_init_custom_parameters(self):
106 | """Test ServiceAdvertiser initialization with custom parameters."""
107 | advertiser = ServiceAdvertiser(
108 | service_name="Custom Service",
109 | service_type="_custom._tcp.local.",
110 | host="192.168.1.100",
111 | port=8443,
112 | https_enabled=True,
113 | api_key_required=True
114 | )
115 |
116 | assert advertiser.service_name == "Custom Service"
117 | assert advertiser.service_type == "_custom._tcp.local."
118 | assert advertiser.host == "192.168.1.100"
119 | assert advertiser.port == 8443
120 | assert advertiser.https_enabled is True
121 | assert advertiser.api_key_required is True
122 |
123 | @patch('socket.socket')
124 | def test_get_local_ip(self, mock_socket):
125 | """Test local IP address detection."""
126 | mock_sock_instance = Mock()
127 | mock_sock_instance.getsockname.return_value = ("192.168.1.100", 12345)
128 | mock_socket.return_value.__enter__.return_value = mock_sock_instance
129 |
130 | advertiser = ServiceAdvertiser()
131 | ip = advertiser._get_local_ip()
132 |
133 | assert ip == "192.168.1.100"
134 | mock_sock_instance.connect.assert_called_once_with(("8.8.8.8", 80))
135 |
136 | @patch('socket.socket')
137 | def test_get_local_ip_fallback(self, mock_socket):
138 | """Test local IP address detection fallback."""
139 | mock_socket.side_effect = Exception("Network error")
140 |
141 | advertiser = ServiceAdvertiser()
142 | ip = advertiser._get_local_ip()
143 |
144 | assert ip == "127.0.0.1"
145 |
146 | @patch('socket.inet_aton')
147 | @patch.object(ServiceAdvertiser, '_get_local_ip')
148 | def test_create_service_info(self, mock_get_ip, mock_inet_aton):
149 | """Test ServiceInfo creation."""
150 | mock_get_ip.return_value = "192.168.1.100"
151 | mock_inet_aton.return_value = b'\xc0\xa8\x01\x64' # 192.168.1.100
152 |
153 | advertiser = ServiceAdvertiser(
154 | service_name="Test Service",
155 | https_enabled=True,
156 | api_key_required=True
157 | )
158 |
159 | service_info = advertiser._create_service_info()
160 |
161 | assert service_info.type == "_mcp-memory._tcp.local."
162 | assert service_info.name == "Test Service._mcp-memory._tcp.local."
163 | assert service_info.port == 8000
164 | assert service_info.server == "test-service.local."
165 |
166 | # Check properties
167 | properties = service_info.properties
168 | assert properties[b'https'] == b'True'
169 | assert properties[b'auth_required'] == b'True'
170 | assert properties[b'api_path'] == b'/api'
171 |
172 | @pytest.mark.asyncio
173 | async def test_start_success(self):
174 | """Test successful service advertisement start."""
175 | with patch('mcp_memory_service.discovery.mdns_service.AsyncZeroconf') as mock_zeroconf_class:
176 | mock_zeroconf = AsyncMock()
177 | mock_zeroconf_class.return_value = mock_zeroconf
178 |
179 | advertiser = ServiceAdvertiser()
180 |
181 | with patch.object(advertiser, '_create_service_info') as mock_create_info:
182 | mock_service_info = Mock()
183 | mock_create_info.return_value = mock_service_info
184 |
185 | result = await advertiser.start()
186 |
187 | assert result is True
188 | assert advertiser._registered is True
189 | mock_zeroconf.async_register_service.assert_called_once_with(mock_service_info)
190 |
191 | @pytest.mark.asyncio
192 | async def test_start_already_registered(self):
193 | """Test starting advertisement when already registered."""
194 | advertiser = ServiceAdvertiser()
195 | advertiser._registered = True
196 |
197 | result = await advertiser.start()
198 |
199 | assert result is True # Should return True but log warning
200 |
201 | @pytest.mark.asyncio
202 | async def test_start_failure(self):
203 | """Test service advertisement start failure."""
204 | with patch('mcp_memory_service.discovery.mdns_service.AsyncZeroconf') as mock_zeroconf_class:
205 | mock_zeroconf = AsyncMock()
206 | mock_zeroconf.async_register_service.side_effect = Exception("Registration failed")
207 | mock_zeroconf_class.return_value = mock_zeroconf
208 |
209 | advertiser = ServiceAdvertiser()
210 |
211 | with patch.object(advertiser, '_create_service_info'):
212 | result = await advertiser.start()
213 |
214 | assert result is False
215 | assert advertiser._registered is False
216 |
217 | @pytest.mark.asyncio
218 | async def test_stop_success(self):
219 | """Test successful service advertisement stop."""
220 | with patch('mcp_memory_service.discovery.mdns_service.AsyncZeroconf') as mock_zeroconf_class:
221 | mock_zeroconf = AsyncMock()
222 | mock_zeroconf_class.return_value = mock_zeroconf
223 |
224 | advertiser = ServiceAdvertiser()
225 | advertiser._registered = True
226 | advertiser._zeroconf = mock_zeroconf
227 | advertiser._service_info = Mock()
228 |
229 | await advertiser.stop()
230 |
231 | assert advertiser._registered is False
232 | mock_zeroconf.async_unregister_service.assert_called_once()
233 | mock_zeroconf.async_close.assert_called_once()
234 |
235 | @pytest.mark.asyncio
236 | async def test_stop_not_registered(self):
237 | """Test stopping advertisement when not registered."""
238 | advertiser = ServiceAdvertiser()
239 |
240 | # Should not raise exception
241 | await advertiser.stop()
242 |
243 |
244 | class TestDiscoveryListener:
245 | """Test DiscoveryListener class."""
246 |
247 | def test_init_no_callback(self):
248 | """Test DiscoveryListener initialization without callback."""
249 | listener = DiscoveryListener()
250 |
251 | assert listener.callback is None
252 | assert len(listener.services) == 0
253 |
254 | def test_init_with_callback(self):
255 | """Test DiscoveryListener initialization with callback."""
256 | callback = Mock()
257 | listener = DiscoveryListener(callback)
258 |
259 | assert listener.callback == callback
260 |
261 | @patch('socket.inet_ntoa')
262 | def test_parse_service_info(self, mock_inet_ntoa):
263 | """Test parsing ServiceInfo into ServiceDetails."""
264 | mock_inet_ntoa.return_value = "192.168.1.100"
265 |
266 | # Create mock ServiceInfo
267 | service_info = Mock()
268 | service_info.name = "Test Service._mcp-memory._tcp.local."
269 | service_info.type = "_mcp-memory._tcp.local."
270 | service_info.port = 8000
271 | service_info.addresses = [b'\xc0\xa8\x01\x64'] # 192.168.1.100
272 | service_info.properties = {
273 | b'https': b'true',
274 | b'api_version': b'2.1.0',
275 | b'auth_required': b'false'
276 | }
277 |
278 | listener = DiscoveryListener()
279 | details = listener._parse_service_info(service_info)
280 |
281 | assert details.name == "Test Service"
282 | assert details.host == "192.168.1.100"
283 | assert details.port == 8000
284 | assert details.https is True
285 | assert details.api_version == "2.1.0"
286 | assert details.requires_auth is False
287 |
288 | @patch('socket.inet_ntoa')
289 | def test_parse_service_info_no_addresses(self, mock_inet_ntoa):
290 | """Test parsing ServiceInfo with no addresses."""
291 | service_info = Mock()
292 | service_info.name = "Test Service._mcp-memory._tcp.local."
293 | service_info.type = "_mcp-memory._tcp.local."
294 | service_info.port = 8000
295 | service_info.addresses = []
296 | service_info.properties = {}
297 |
298 | listener = DiscoveryListener()
299 | details = listener._parse_service_info(service_info)
300 |
301 | assert details.host == "localhost"
302 |
303 | def test_add_service_success(self):
304 | """Test successful service addition."""
305 | callback = Mock()
306 | listener = DiscoveryListener(callback)
307 |
308 | # Mock zeroconf and service info
309 | mock_zc = Mock()
310 | mock_service_info = Mock()
311 | mock_zc.get_service_info.return_value = mock_service_info
312 |
313 | with patch.object(listener, '_parse_service_info') as mock_parse:
314 | mock_details = Mock()
315 | mock_details.name = "Test Service"
316 | mock_parse.return_value = mock_details
317 |
318 | listener.add_service(mock_zc, "_mcp-memory._tcp.local.", "test-service")
319 |
320 | assert "test-service" in listener.services
321 | callback.assert_called_once_with(mock_details)
322 |
323 | def test_add_service_no_info(self):
324 | """Test service addition when no service info available."""
325 | listener = DiscoveryListener()
326 |
327 | mock_zc = Mock()
328 | mock_zc.get_service_info.return_value = None
329 |
330 | listener.add_service(mock_zc, "_mcp-memory._tcp.local.", "test-service")
331 |
332 | assert "test-service" not in listener.services
333 |
334 | def test_remove_service(self):
335 | """Test service removal."""
336 | listener = DiscoveryListener()
337 | mock_details = Mock()
338 | mock_details.name = "Test Service"
339 | listener.services["test-service"] = mock_details
340 |
341 | listener.remove_service(Mock(), "_mcp-memory._tcp.local.", "test-service")
342 |
343 | assert "test-service" not in listener.services
344 |
345 |
346 | class TestServiceDiscovery:
347 | """Test ServiceDiscovery class."""
348 |
349 | def test_init_default_parameters(self):
350 | """Test ServiceDiscovery initialization with defaults."""
351 | discovery = ServiceDiscovery()
352 |
353 | assert discovery.service_type == "_mcp-memory._tcp.local."
354 | assert discovery.discovery_timeout == 5
355 | assert discovery._discovering is False
356 |
357 | def test_init_custom_parameters(self):
358 | """Test ServiceDiscovery initialization with custom parameters."""
359 | discovery = ServiceDiscovery(
360 | service_type="_custom._tcp.local.",
361 | discovery_timeout=10
362 | )
363 |
364 | assert discovery.service_type == "_custom._tcp.local."
365 | assert discovery.discovery_timeout == 10
366 |
367 | @pytest.mark.asyncio
368 | async def test_discover_services_success(self):
369 | """Test successful service discovery."""
370 | with patch('mcp_memory_service.discovery.mdns_service.AsyncZeroconf') as mock_zeroconf_class, \
371 | patch('mcp_memory_service.discovery.mdns_service.AsyncServiceBrowser') as mock_browser_class, \
372 | patch('mcp_memory_service.discovery.mdns_service.DiscoveryListener') as mock_listener_class:
373 |
374 | # Setup AsyncZeroconf mock with async context manager support
375 | mock_zeroconf = AsyncMock()
376 | mock_zeroconf.__aenter__ = AsyncMock(return_value=mock_zeroconf)
377 | mock_zeroconf.__aexit__ = AsyncMock(return_value=None)
378 | mock_zeroconf.async_close = AsyncMock()
379 | mock_zeroconf.zeroconf = Mock()
380 | mock_zeroconf_class.return_value = mock_zeroconf
381 |
382 | # Setup AsyncServiceBrowser mock with async cancel method
383 | mock_browser = Mock()
384 | mock_browser.async_cancel = AsyncMock()
385 | mock_browser_class.return_value = mock_browser
386 |
387 | # Mock discovered services via listener
388 | mock_service = Mock()
389 | mock_service.name = "Test Service"
390 | mock_listener = Mock()
391 | mock_listener.services = {"test-service": mock_service}
392 | mock_listener_class.return_value = mock_listener
393 |
394 | discovery = ServiceDiscovery(discovery_timeout=0.1) # Very short timeout for testing
395 |
396 | services = await discovery.discover_services()
397 |
398 | assert len(services) == 1
399 | assert services[0] == mock_service
400 |
401 | @pytest.mark.asyncio
402 | async def test_discover_services_already_discovering(self):
403 | """Test discovery when already in progress."""
404 | discovery = ServiceDiscovery()
405 | discovery._discovering = True
406 |
407 | # Mock existing services
408 | mock_listener = Mock()
409 | mock_service = Mock()
410 | mock_listener.services = {"test-service": mock_service}
411 | discovery._listener = mock_listener
412 |
413 | services = await discovery.discover_services()
414 |
415 | assert len(services) == 1
416 | assert services[0] == mock_service
417 |
418 | @pytest.mark.asyncio
419 | async def test_start_continuous_discovery(self):
420 | """Test starting continuous service discovery."""
421 | callback = Mock()
422 |
423 | with patch('mcp_memory_service.discovery.mdns_service.AsyncZeroconf') as mock_zeroconf_class, \
424 | patch('mcp_memory_service.discovery.mdns_service.AsyncServiceBrowser') as mock_browser_class, \
425 | patch('mcp_memory_service.discovery.mdns_service.DiscoveryListener') as mock_listener_class:
426 |
427 | # Setup AsyncZeroconf mock with async context manager support
428 | mock_zeroconf = AsyncMock()
429 | mock_zeroconf.__aenter__ = AsyncMock(return_value=mock_zeroconf)
430 | mock_zeroconf.__aexit__ = AsyncMock(return_value=None)
431 | mock_zeroconf.async_close = AsyncMock()
432 | mock_zeroconf.zeroconf = Mock()
433 | mock_zeroconf_class.return_value = mock_zeroconf
434 |
435 | # Setup AsyncServiceBrowser mock with async cancel method
436 | mock_browser = Mock()
437 | mock_browser.async_cancel = AsyncMock()
438 | mock_browser_class.return_value = mock_browser
439 |
440 | # Setup listener mock
441 | mock_listener = Mock()
442 | mock_listener_class.return_value = mock_listener
443 |
444 | discovery = ServiceDiscovery()
445 |
446 | result = await discovery.start_continuous_discovery(callback)
447 |
448 | assert result is True
449 | assert discovery._discovering is True
450 |
451 | @pytest.mark.asyncio
452 | async def test_start_continuous_discovery_already_started(self):
453 | """Test starting continuous discovery when already started."""
454 | discovery = ServiceDiscovery()
455 | discovery._discovering = True
456 |
457 | result = await discovery.start_continuous_discovery(Mock())
458 |
459 | assert result is False
460 |
461 | @pytest.mark.asyncio
462 | async def test_stop_discovery(self):
463 | """Test stopping service discovery."""
464 | with patch('mcp_memory_service.discovery.mdns_service.AsyncZeroconf') as mock_zeroconf_class, \
465 | patch('mcp_memory_service.discovery.mdns_service.AsyncServiceBrowser') as mock_browser_class:
466 |
467 | # Setup AsyncZeroconf mock with async methods
468 | mock_zeroconf = AsyncMock()
469 | mock_zeroconf.async_close = AsyncMock()
470 |
471 | # Setup AsyncServiceBrowser mock with async cancel method
472 | mock_browser = Mock()
473 | mock_browser.async_cancel = AsyncMock()
474 |
475 | discovery = ServiceDiscovery()
476 | discovery._discovering = True
477 | discovery._zeroconf = mock_zeroconf
478 | discovery._browser = mock_browser
479 |
480 | await discovery.stop_discovery()
481 |
482 | assert discovery._discovering is False
483 | mock_browser.async_cancel.assert_called_once()
484 | mock_zeroconf.async_close.assert_called_once()
485 |
486 | def test_get_discovered_services(self):
487 | """Test getting discovered services."""
488 | discovery = ServiceDiscovery()
489 |
490 | # No listener
491 | services = discovery.get_discovered_services()
492 | assert len(services) == 0
493 |
494 | # With listener
495 | mock_listener = Mock()
496 | mock_service = Mock()
497 | mock_listener.services = {"test": mock_service}
498 | discovery._listener = mock_listener
499 |
500 | services = discovery.get_discovered_services()
501 | assert len(services) == 1
502 | assert services[0] == mock_service
503 |
504 |
505 | class TestDiscoveryClient:
506 | """Test DiscoveryClient class."""
507 |
508 | def test_init_default_timeout(self):
509 | """Test DiscoveryClient initialization with default timeout."""
510 | client = DiscoveryClient()
511 |
512 | assert client.discovery_timeout == 5
513 |
514 | def test_init_custom_timeout(self):
515 | """Test DiscoveryClient initialization with custom timeout."""
516 | client = DiscoveryClient(discovery_timeout=10)
517 |
518 | assert client.discovery_timeout == 10
519 |
520 | @pytest.mark.asyncio
521 | async def test_discover_services(self):
522 | """Test service discovery."""
523 | client = DiscoveryClient()
524 |
525 | mock_service = Mock()
526 | mock_service.name = "Test Service"
527 | mock_service.url = "http://192.168.1.100:8000"
528 | mock_service.requires_auth = False
529 |
530 | with patch.object(client._discovery, 'discover_services', return_value=[mock_service]):
531 | services = await client.discover_services()
532 |
533 | assert len(services) == 1
534 | assert services[0] == mock_service
535 |
536 | @pytest.mark.asyncio
537 | async def test_find_best_service_no_services(self):
538 | """Test finding best service when no services available."""
539 | client = DiscoveryClient()
540 |
541 | with patch.object(client, 'discover_services', return_value=[]):
542 | service = await client.find_best_service()
543 |
544 | assert service is None
545 |
546 | @pytest.mark.asyncio
547 | async def test_find_best_service_with_validation(self):
548 | """Test finding best service with health validation."""
549 | client = DiscoveryClient()
550 |
551 | # Create mock services
552 | http_service = Mock()
553 | http_service.https = False
554 | http_service.requires_auth = False
555 | http_service.port = 8000
556 | http_service.name = "HTTP Service"
557 | http_service.url = "http://192.168.1.100:8000"
558 |
559 | https_service = Mock()
560 | https_service.https = True
561 | https_service.requires_auth = False
562 | https_service.port = 8443
563 | https_service.name = "HTTPS Service"
564 | https_service.url = "https://192.168.1.100:8443"
565 |
566 | with patch.object(client, 'discover_services', return_value=[http_service, https_service]), \
567 | patch.object(client, 'check_service_health') as mock_health:
568 |
569 | # Mock health check results
570 | def health_side_effect(service):
571 | if service.https:
572 | return HealthStatus(
573 | healthy=True, status='ok', backend='sqlite_vec',
574 | statistics={}, response_time_ms=50.0
575 | )
576 | else:
577 | return HealthStatus(
578 | healthy=False, status='error', backend='unknown',
579 | statistics={}, response_time_ms=0, error='Connection failed'
580 | )
581 |
582 | mock_health.side_effect = health_side_effect
583 |
584 | service = await client.find_best_service(prefer_https=True)
585 |
586 | assert service == https_service
587 |
588 | @pytest.mark.asyncio
589 | async def test_check_service_health_success(self):
590 | """Test successful service health check."""
591 | client = DiscoveryClient()
592 |
593 | mock_service = Mock()
594 | mock_service.api_url = "http://192.168.1.100:8000/api"
595 |
596 | # Setup response mock with async context manager support
597 | mock_response = Mock()
598 | mock_response.status = 200
599 | mock_response.json = AsyncMock(return_value={
600 | 'status': 'healthy',
601 | 'storage_type': 'sqlite_vec',
602 | 'statistics': {'memory_count': 100}
603 | })
604 | mock_response.__aenter__ = AsyncMock(return_value=mock_response)
605 | mock_response.__aexit__ = AsyncMock(return_value=None)
606 |
607 | with patch('aiohttp.ClientSession') as mock_session_class:
608 | # Setup session mock with async context manager support
609 | mock_session = Mock()
610 | mock_session.__aenter__ = AsyncMock(return_value=mock_session)
611 | mock_session.__aexit__ = AsyncMock(return_value=None)
612 |
613 | # Setup get method to return response with async context manager
614 | mock_get_result = Mock()
615 | mock_get_result.__aenter__ = AsyncMock(return_value=mock_response)
616 | mock_get_result.__aexit__ = AsyncMock(return_value=None)
617 | mock_session.get = Mock(return_value=mock_get_result)
618 |
619 | mock_session_class.return_value = mock_session
620 |
621 | health = await client.check_service_health(mock_service)
622 |
623 | assert health is not None
624 | assert health.healthy is True
625 | assert health.status == 'healthy'
626 | assert health.backend == 'sqlite_vec'
627 | assert health.statistics == {'memory_count': 100}
628 |
629 | @pytest.mark.asyncio
630 | async def test_check_service_health_failure(self):
631 | """Test service health check failure."""
632 | client = DiscoveryClient()
633 |
634 | mock_service = Mock()
635 | mock_service.api_url = "http://192.168.1.100:8000/api"
636 |
637 | with patch('aiohttp.ClientSession') as mock_session_class:
638 | mock_session_class.side_effect = Exception("Connection failed")
639 |
640 | health = await client.check_service_health(mock_service)
641 |
642 | assert health is not None
643 | assert health.healthy is False
644 | assert health.error == "Connection failed"
645 |
646 | @pytest.mark.asyncio
647 | async def test_find_services_with_health(self):
648 | """Test finding services with health status."""
649 | client = DiscoveryClient()
650 |
651 | # Create mock services
652 | service1 = Mock()
653 | service1.https = True
654 | service1.requires_auth = False
655 |
656 | service2 = Mock()
657 | service2.https = False
658 | service2.requires_auth = False
659 |
660 | health1 = HealthStatus(
661 | healthy=True, status='ok', backend='sqlite_vec',
662 | statistics={}, response_time_ms=50.0
663 | )
664 |
665 | health2 = HealthStatus(
666 | healthy=False, status='error', backend='unknown',
667 | statistics={}, response_time_ms=0, error='Connection failed'
668 | )
669 |
670 | with patch.object(client, 'discover_services', return_value=[service1, service2]), \
671 | patch.object(client, 'check_service_health', side_effect=[health1, health2]):
672 |
673 | services_with_health = await client.find_services_with_health()
674 |
675 | assert len(services_with_health) == 2
676 | # Should be sorted with healthy services first
677 | assert services_with_health[0][1].healthy is True
678 | assert services_with_health[1][1].healthy is False
679 |
680 | @pytest.mark.asyncio
681 | async def test_stop(self):
682 | """Test stopping the discovery client."""
683 | client = DiscoveryClient()
684 |
685 | with patch.object(client._discovery, 'stop_discovery') as mock_stop:
686 | await client.stop()
687 | mock_stop.assert_called_once()
688 |
689 |
690 | class TestHealthStatus:
691 | """Test HealthStatus dataclass."""
692 |
693 | def test_health_status_creation(self):
694 | """Test HealthStatus creation."""
695 | health = HealthStatus(
696 | healthy=True,
697 | status='ok',
698 | backend='sqlite_vec',
699 | statistics={'memory_count': 100},
700 | response_time_ms=50.0,
701 | error=None
702 | )
703 |
704 | assert health.healthy is True
705 | assert health.status == 'ok'
706 | assert health.backend == 'sqlite_vec'
707 | assert health.statistics == {'memory_count': 100}
708 | assert health.response_time_ms == 50.0
709 | assert health.error is None
710 |
711 | def test_health_status_with_error(self):
712 | """Test HealthStatus creation with error."""
713 | health = HealthStatus(
714 | healthy=False,
715 | status='error',
716 | backend='unknown',
717 | statistics={},
718 | response_time_ms=0,
719 | error='Connection timeout'
720 | )
721 |
722 | assert health.healthy is False
723 | assert health.error == 'Connection timeout'
724 |
725 |
726 | # Integration tests that can run without actual network
727 | class TestMDNSIntegration:
728 | """Integration tests for mDNS functionality."""
729 |
730 | @pytest.mark.asyncio
731 | async def test_advertiser_discovery_integration(self):
732 | """Test integration between advertiser and discovery (mocked)."""
733 | # This test uses mocks to simulate the integration without actual network traffic
734 |
735 | with patch('mcp_memory_service.discovery.mdns_service.AsyncZeroconf') as mock_zeroconf_class, \
736 | patch('mcp_memory_service.discovery.mdns_service.AsyncServiceBrowser') as mock_browser_class, \
737 | patch('mcp_memory_service.discovery.mdns_service.DiscoveryListener') as mock_listener_class:
738 |
739 | # Setup AsyncZeroconf mock with async context manager support
740 | mock_zeroconf = AsyncMock()
741 | mock_zeroconf.__aenter__ = AsyncMock(return_value=mock_zeroconf)
742 | mock_zeroconf.__aexit__ = AsyncMock(return_value=None)
743 | mock_zeroconf.async_close = AsyncMock()
744 | mock_zeroconf.async_unregister_service = AsyncMock()
745 | mock_zeroconf.zeroconf = Mock()
746 | mock_zeroconf_class.return_value = mock_zeroconf
747 |
748 | # Setup AsyncServiceBrowser mock with async cancel method
749 | mock_browser = Mock()
750 | mock_browser.async_cancel = AsyncMock()
751 | mock_browser_class.return_value = mock_browser
752 |
753 | # Mock discovered service via listener
754 | mock_service = ServiceDetails(
755 | name="Test Service",
756 | host="192.168.1.100",
757 | port=8000,
758 | https=False,
759 | api_version="2.1.0",
760 | requires_auth=False,
761 | service_info=Mock()
762 | )
763 | mock_listener = Mock()
764 | mock_listener.services = {"test-service": mock_service}
765 | mock_listener_class.return_value = mock_listener
766 |
767 | # Start advertiser
768 | advertiser = ServiceAdvertiser(service_name="Test Service")
769 |
770 | with patch.object(advertiser, '_create_service_info'):
771 | await advertiser.start()
772 | assert advertiser._registered is True
773 |
774 | # Start discovery
775 | discovery = ServiceDiscovery(discovery_timeout=0.1)
776 |
777 | services = await discovery.discover_services()
778 |
779 | assert len(services) == 1
780 | assert services[0].name == "Test Service"
781 |
782 | # Clean up
783 | await advertiser.stop()
784 | await discovery.stop_discovery()
785 |
786 |
787 | if __name__ == '__main__':
788 | pytest.main([__file__])
```
--------------------------------------------------------------------------------
/examples/http-mcp-bridge.js:
--------------------------------------------------------------------------------
```javascript
1 | #!/usr/bin/env node
2 | /**
3 | * HTTP-to-MCP Bridge for MCP Memory Service
4 | *
5 | * This bridge allows MCP clients (like Claude Desktop) to connect to a remote
6 | * MCP Memory Service HTTP server instead of running a local instance.
7 | *
8 | * Features:
9 | * - Automatic service discovery via mDNS (Bonjour/Zeroconf)
10 | * - Manual endpoint configuration fallback
11 | * - HTTPS support with self-signed certificate handling
12 | * - API key authentication
13 | *
14 | * Usage in Claude Desktop config:
15 | *
16 | * Option 1: Auto-discovery (recommended for local networks)
17 | * {
18 | * "mcpServers": {
19 | * "memory": {
20 | * "command": "node",
21 | * "args": ["/path/to/http-mcp-bridge.js"],
22 | * "env": {
23 | * "MCP_MEMORY_AUTO_DISCOVER": "true",
24 | * "MCP_MEMORY_PREFER_HTTPS": "true",
25 | * "MCP_MEMORY_API_KEY": "your-api-key"
26 | * }
27 | * }
28 | * }
29 | * }
30 | *
31 | * Option 2: Manual configuration
32 | * {
33 | * "mcpServers": {
34 | * "memory": {
35 | * "command": "node",
36 | * "args": ["/path/to/http-mcp-bridge.js"],
37 | * "env": {
38 | * "MCP_MEMORY_HTTP_ENDPOINT": "https://your-server:8000/api",
39 | * "MCP_MEMORY_API_KEY": "your-api-key"
40 | * }
41 | * }
42 | * }
43 | * }
44 | */
45 |
46 | const http = require('http');
47 | const https = require('https');
48 | const { URL } = require('url');
49 | const dgram = require('dgram');
50 | const dns = require('dns');
51 | const tls = require('tls');
52 |
53 | /**
54 | * Simple mDNS service discovery implementation
55 | */
56 | class MDNSDiscovery {
57 | constructor() {
58 | this.services = new Map();
59 | }
60 |
61 | /**
62 | * Discover MCP Memory Services using mDNS
63 | */
64 | async discoverServices(timeout = 5000) {
65 | return new Promise((resolve) => {
66 | const socket = dgram.createSocket('udp4');
67 | const services = [];
68 |
69 | // mDNS query for _mcp-memory._tcp.local
70 | const query = this.createMDNSQuery('_mcp-memory._tcp.local');
71 |
72 | socket.on('message', (msg, rinfo) => {
73 | try {
74 | const service = this.parseMDNSResponse(msg, rinfo);
75 | if (service) {
76 | services.push(service);
77 | }
78 | } catch (error) {
79 | // Ignore parsing errors
80 | }
81 | });
82 |
83 | socket.bind(() => {
84 | socket.addMembership('224.0.0.251');
85 | socket.send(query, 5353, '224.0.0.251');
86 | });
87 |
88 | setTimeout(() => {
89 | socket.close();
90 | resolve(services);
91 | }, timeout);
92 | });
93 | }
94 |
95 | createMDNSQuery(serviceName) {
96 | // Simplified mDNS query creation
97 | // This is a basic implementation - in production, use a proper mDNS library
98 | const header = Buffer.alloc(12);
99 | header.writeUInt16BE(0, 0); // Transaction ID
100 | header.writeUInt16BE(0, 2); // Flags
101 | header.writeUInt16BE(1, 4); // Questions
102 | header.writeUInt16BE(0, 6); // Answer RRs
103 | header.writeUInt16BE(0, 8); // Authority RRs
104 | header.writeUInt16BE(0, 10); // Additional RRs
105 |
106 | // Question section (simplified)
107 | const nameLabels = serviceName.split('.');
108 | let nameBuffer = Buffer.alloc(0);
109 |
110 | for (const label of nameLabels) {
111 | if (label) {
112 | const labelBuffer = Buffer.alloc(1 + label.length);
113 | labelBuffer.writeUInt8(label.length, 0);
114 | labelBuffer.write(label, 1);
115 | nameBuffer = Buffer.concat([nameBuffer, labelBuffer]);
116 | }
117 | }
118 |
119 | const endBuffer = Buffer.alloc(5);
120 | endBuffer.writeUInt8(0, 0); // End of name
121 | endBuffer.writeUInt16BE(12, 1); // Type PTR
122 | endBuffer.writeUInt16BE(1, 3); // Class IN
123 |
124 | return Buffer.concat([header, nameBuffer, endBuffer]);
125 | }
126 |
127 | parseMDNSResponse(msg, rinfo) {
128 | // Simplified mDNS response parsing
129 | // This is a basic implementation - in production, use a proper mDNS library
130 | try {
131 | // Look for MCP Memory Service indicators in the response
132 | const msgStr = msg.toString('ascii', 0, Math.min(msg.length, 512));
133 | if (msgStr.includes('mcp-memory') || msgStr.includes('MCP Memory')) {
134 | // Try common ports for the service
135 | const possiblePorts = [8000, 8080, 443, 80];
136 | const host = rinfo.address;
137 |
138 | for (const port of possiblePorts) {
139 | return {
140 | name: 'MCP Memory Service',
141 | host: host,
142 | port: port,
143 | https: port === 443,
144 | discovered: true
145 | };
146 | }
147 | }
148 | } catch (error) {
149 | // Ignore parsing errors
150 | }
151 | return null;
152 | }
153 | }
154 |
155 | class HTTPMCPBridge {
156 | constructor() {
157 | this.endpoint = process.env.MCP_MEMORY_HTTP_ENDPOINT;
158 | this.apiKey = process.env.MCP_MEMORY_API_KEY;
159 | this.autoDiscover = process.env.MCP_MEMORY_AUTO_DISCOVER === 'true';
160 | this.preferHttps = process.env.MCP_MEMORY_PREFER_HTTPS !== 'false';
161 | this.requestId = 0;
162 | this.discovery = new MDNSDiscovery();
163 | this.discoveredEndpoint = null;
164 | }
165 |
166 | /**
167 | * Initialize the bridge by discovering or configuring the endpoint
168 | */
169 | async initialize() {
170 | if (this.endpoint) {
171 | // Manual configuration takes precedence
172 | console.error(`Using manual endpoint: ${this.endpoint}`);
173 | return true;
174 | }
175 |
176 | if (this.autoDiscover) {
177 | console.error('Attempting to discover MCP Memory Service via mDNS...');
178 | try {
179 | const services = await this.discovery.discoverServices();
180 |
181 | if (services.length > 0) {
182 | // Sort services by preference (HTTPS first if preferred)
183 | services.sort((a, b) => {
184 | if (this.preferHttps) {
185 | if (a.https !== b.https) return b.https - a.https;
186 | }
187 | return a.port - b.port; // Prefer standard ports
188 | });
189 |
190 | const service = services[0];
191 | const protocol = service.https ? 'https' : 'http';
192 | this.discoveredEndpoint = `${protocol}://${service.host}:${service.port}/api`;
193 | this.endpoint = this.discoveredEndpoint;
194 |
195 | console.error(`Discovered service: ${this.endpoint}`);
196 |
197 | // Test the discovered endpoint
198 | const healthy = await this.testEndpoint(this.endpoint);
199 | if (!healthy) {
200 | console.error('Discovered endpoint failed health check, trying alternatives...');
201 |
202 | // Try other discovered services
203 | for (let i = 1; i < services.length; i++) {
204 | const altService = services[i];
205 | const altProtocol = altService.https ? 'https' : 'http';
206 | const altEndpoint = `${altProtocol}://${altService.host}:${altService.port}/api`;
207 |
208 | if (await this.testEndpoint(altEndpoint)) {
209 | this.endpoint = altEndpoint;
210 | console.error(`Using alternative endpoint: ${this.endpoint}`);
211 | return true;
212 | }
213 | }
214 |
215 | console.error('No healthy services found');
216 | return false;
217 | }
218 |
219 | return true;
220 | } else {
221 | console.error('No MCP Memory Services discovered');
222 | return false;
223 | }
224 | } catch (error) {
225 | console.error(`Discovery failed: ${error.message}`);
226 | return false;
227 | }
228 | }
229 |
230 | // Default fallback
231 | this.endpoint = 'http://localhost:8000/api';
232 | console.error(`Using default endpoint: ${this.endpoint}`);
233 | return true;
234 | }
235 |
236 | /**
237 | * Test if an endpoint is healthy
238 | */
239 | async testEndpoint(endpoint) {
240 | try {
241 | const healthUrl = `${endpoint}/api/health`;
242 | const response = await this.makeRequestInternal(healthUrl, 'GET', null, 3000); // 3 second timeout
243 | return response.statusCode === 200;
244 | } catch (error) {
245 | return false;
246 | }
247 | }
248 |
249 | /**
250 | * Make HTTP request to the MCP Memory Service with retry logic
251 | */
252 | async makeRequest(path, method = 'GET', data = null, maxRetries = 3) {
253 | let lastError;
254 |
255 | for (let attempt = 1; attempt <= maxRetries; attempt++) {
256 | try {
257 | console.error(`Attempt ${attempt}/${maxRetries} for ${method} ${path}`);
258 | const result = await this.makeRequestInternal(path, method, data);
259 |
260 | if (attempt > 1) {
261 | console.error(`Request succeeded on attempt ${attempt}`);
262 | }
263 |
264 | return result;
265 | } catch (error) {
266 | lastError = error;
267 | console.error(`Attempt ${attempt} failed: ${error.message}`);
268 |
269 | if (attempt < maxRetries) {
270 | const delay = Math.min(1000 * Math.pow(2, attempt - 1), 5000); // Exponential backoff, max 5s
271 | console.error(`Retrying in ${delay}ms...`);
272 | await new Promise(resolve => setTimeout(resolve, delay));
273 | } else {
274 | console.error(`All ${maxRetries} attempts failed. Last error: ${error.message}`);
275 | }
276 | }
277 | }
278 |
279 | throw lastError;
280 | }
281 |
282 | /**
283 | * Internal HTTP request method with timeout support and comprehensive logging
284 | */
285 | async makeRequestInternal(path, method = 'GET', data = null, timeout = 10000) {
286 | const startTime = Date.now();
287 | const requestId = Math.random().toString(36).substr(2, 9);
288 |
289 | console.error(`[${requestId}] Starting ${method} request to ${path}`);
290 |
291 | return new Promise((resolve, reject) => {
292 | // Use URL constructor's built-in path resolution to avoid duplicate base paths
293 | // Ensure endpoint has trailing slash for proper relative path resolution
294 | const baseUrl = this.endpoint.endsWith('/') ? this.endpoint : this.endpoint + '/';
295 | const url = new URL(path, baseUrl);
296 | const protocol = url.protocol === 'https:' ? https : http;
297 |
298 | console.error(`[${requestId}] Full URL: ${url.toString()}`);
299 | console.error(`[${requestId}] Using protocol: ${url.protocol}`);
300 |
301 | const options = {
302 | hostname: url.hostname,
303 | port: url.port || (url.protocol === 'https:' ? 443 : 80),
304 | path: url.pathname + url.search,
305 | method: method,
306 | headers: {
307 | 'Content-Type': 'application/json',
308 | 'User-Agent': 'MCP-HTTP-Bridge/2.0',
309 | 'Connection': 'close'
310 | },
311 | timeout: timeout,
312 | keepAlive: false
313 | };
314 |
315 | // For HTTPS, create custom agent for self-signed certificates with TLS 1.3
316 | if (url.protocol === 'https:') {
317 | const agent = new https.Agent({
318 | rejectUnauthorized: false,
319 | requestCert: false,
320 | checkServerIdentity: () => undefined,
321 | keepAlive: false
322 | });
323 | options.agent = agent;
324 | console.error(`[${requestId}] Using custom HTTPS agent with default TLS settings`);
325 | }
326 |
327 | if (this.apiKey) {
328 | options.headers['Authorization'] = `Bearer ${this.apiKey}`;
329 | console.error(`[${requestId}] API key added to headers`);
330 | }
331 |
332 | if (data) {
333 | const postData = JSON.stringify(data);
334 | options.headers['Content-Length'] = Buffer.byteLength(postData);
335 | console.error(`[${requestId}] Request body size: ${Buffer.byteLength(postData)} bytes`);
336 | }
337 |
338 | console.error(`[${requestId}] Request options:`, JSON.stringify(options, null, 2));
339 |
340 | const req = protocol.request(options, (res) => {
341 | const responseStartTime = Date.now();
342 | console.error(`[${requestId}] Response received after ${responseStartTime - startTime}ms`);
343 | console.error(`[${requestId}] Status code: ${res.statusCode}`);
344 | console.error(`[${requestId}] Response headers:`, JSON.stringify(res.headers, null, 2));
345 |
346 | let responseData = '';
347 |
348 | res.on('data', (chunk) => {
349 | responseData += chunk;
350 | console.error(`[${requestId}] Received ${chunk.length} bytes`);
351 | });
352 |
353 | res.on('end', () => {
354 | const endTime = Date.now();
355 | console.error(`[${requestId}] Response completed after ${endTime - startTime}ms total`);
356 | console.error(`[${requestId}] Response body: ${responseData}`);
357 |
358 | try {
359 | const result = JSON.parse(responseData);
360 | resolve({ statusCode: res.statusCode, data: result });
361 | } catch (error) {
362 | console.error(`[${requestId}] JSON parse error: ${error.message}`);
363 | reject(new Error(`Invalid JSON response: ${responseData}`));
364 | }
365 | });
366 | });
367 |
368 | req.on('error', (error) => {
369 | const errorTime = Date.now();
370 | console.error(`[${requestId}] Request error after ${errorTime - startTime}ms: ${error.message}`);
371 | console.error(`[${requestId}] Error details:`, error);
372 | reject(error);
373 | });
374 |
375 | req.on('timeout', () => {
376 | const timeoutTime = Date.now();
377 | console.error(`[${requestId}] Request timeout after ${timeoutTime - startTime}ms (limit: ${timeout}ms)`);
378 | req.destroy();
379 | reject(new Error(`Request timeout after ${timeout}ms`));
380 | });
381 |
382 | console.error(`[${requestId}] Sending request...`);
383 |
384 | if (data) {
385 | const postData = JSON.stringify(data);
386 | console.error(`[${requestId}] Writing request body: ${postData}`);
387 | req.write(postData);
388 | }
389 |
390 | req.end();
391 | console.error(`[${requestId}] Request sent, waiting for response...`);
392 | });
393 | }
394 |
395 | /**
396 | * Handle MCP store_memory operation
397 | */
398 | async storeMemory(params) {
399 | try {
400 | const response = await this.makeRequest('memories', 'POST', {
401 | content: params.content,
402 | tags: params.metadata?.tags || [],
403 | memory_type: params.metadata?.type || 'note',
404 | metadata: params.metadata || {}
405 | });
406 |
407 | if (response.statusCode === 200 || response.statusCode === 201) {
408 | // Server returns 200 with success field indicating actual result
409 | if (response.data.success) {
410 | return { success: true, message: response.data.message || 'Memory stored successfully' };
411 | } else {
412 | return { success: false, message: response.data.message || response.data.detail || 'Failed to store memory' };
413 | }
414 | } else {
415 | return { success: false, message: response.data.detail || 'Failed to store memory' };
416 | }
417 | } catch (error) {
418 | return { success: false, message: error.message };
419 | }
420 | }
421 |
422 | /**
423 | * Handle MCP retrieve_memory operation
424 | */
425 | async retrieveMemory(params) {
426 | try {
427 | const response = await this.makeRequest('search', 'POST', {
428 | query: params.query,
429 | n_results: params.n_results || 5,
430 | similarity_threshold: params.similarity_threshold || null
431 | });
432 |
433 | if (response.statusCode === 200) {
434 | return {
435 | memories: response.data.results.map(result => ({
436 | content: result.memory.content,
437 | metadata: {
438 | tags: result.memory.tags,
439 | type: result.memory.memory_type,
440 | created_at: result.memory.created_at_iso,
441 | relevance_score: result.relevance_score || result.similarity_score
442 | }
443 | }))
444 | };
445 | } else {
446 | return { memories: [] };
447 | }
448 | } catch (error) {
449 | return { memories: [] };
450 | }
451 | }
452 |
453 | /**
454 | * Handle MCP search_by_tag operation
455 | */
456 | async searchByTag(params) {
457 | try {
458 | const tags = Array.isArray(params.tags) ? params.tags : [params.tags];
459 |
460 | const response = await this.makeRequest('search/by-tag', 'POST', {
461 | tags: tags,
462 | match_all: params.match_all || false,
463 | time_filter: params.time_filter || null
464 | });
465 |
466 | if (response.statusCode === 200) {
467 | return {
468 | memories: response.data.results.map(result => ({
469 | content: result.memory.content,
470 | metadata: {
471 | tags: result.memory.tags,
472 | type: result.memory.memory_type,
473 | created_at: result.memory.created_at_iso
474 | }
475 | }))
476 | };
477 | } else {
478 | return { memories: [] };
479 | }
480 | } catch (error) {
481 | return { memories: [] };
482 | }
483 | }
484 |
485 | /**
486 | * Handle MCP delete_memory operation
487 | */
488 | async deleteMemory(params) {
489 | try {
490 | const response = await this.makeRequest(`memories/${params.content_hash}`, 'DELETE');
491 |
492 | if (response.statusCode === 200) {
493 | return { success: true, message: 'Memory deleted successfully' };
494 | } else {
495 | return { success: false, message: response.data.detail || 'Failed to delete memory' };
496 | }
497 | } catch (error) {
498 | return { success: false, message: error.message };
499 | }
500 | }
501 |
502 | /**
503 | * Handle MCP check_database_health operation
504 | */
505 | async checkHealth(params = {}) {
506 | try {
507 | const response = await this.makeRequest('health', 'GET');
508 |
509 | if (response.statusCode === 200) {
510 | return {
511 | status: response.data.status,
512 | backend: response.data.storage_type,
513 | statistics: response.data.statistics || {}
514 | };
515 | } else {
516 | return { status: 'unhealthy', backend: 'unknown', statistics: {} };
517 | }
518 | } catch (error) {
519 | // Handle errors that may not have a message property (like ECONNREFUSED)
520 | const errorMessage = error.message || error.code || error.toString() || 'Unknown error';
521 | return { status: 'error', backend: 'unknown', statistics: {}, error: errorMessage };
522 | }
523 | }
524 |
525 | /**
526 | * Process MCP JSON-RPC request
527 | */
528 | async processRequest(request) {
529 | const { method, params, id } = request;
530 |
531 | let result;
532 | try {
533 | switch (method) {
534 | case 'initialize':
535 | result = {
536 | protocolVersion: "2024-11-05",
537 | capabilities: {
538 | tools: {
539 | listChanged: false
540 | }
541 | },
542 | serverInfo: {
543 | name: "mcp-memory-service",
544 | version: "2.0.0"
545 | }
546 | };
547 | break;
548 | case 'notifications/initialized':
549 | // No response needed for notifications
550 | return null;
551 | case 'tools/list':
552 | result = {
553 | tools: [
554 | {
555 | name: "store_memory",
556 | description: "Store a memory with content and optional metadata",
557 | inputSchema: {
558 | type: "object",
559 | properties: {
560 | content: { type: "string", description: "The content to store" },
561 | metadata: {
562 | type: "object",
563 | properties: {
564 | tags: { type: "array", items: { type: "string" } },
565 | type: { type: "string" }
566 | }
567 | }
568 | },
569 | required: ["content"]
570 | }
571 | },
572 | {
573 | name: "retrieve_memory",
574 | description: "Retrieve memories based on a query",
575 | inputSchema: {
576 | type: "object",
577 | properties: {
578 | query: { type: "string", description: "Search query" },
579 | n_results: { type: "integer", description: "Number of results to return" }
580 | },
581 | required: ["query"]
582 | }
583 | },
584 | {
585 | name: "search_by_tag",
586 | description: "Search memories by tags",
587 | inputSchema: {
588 | type: "object",
589 | properties: {
590 | tags: {
591 | oneOf: [
592 | { type: "string" },
593 | { type: "array", items: { type: "string" } }
594 | ]
595 | }
596 | },
597 | required: ["tags"]
598 | }
599 | },
600 | {
601 | name: "delete_memory",
602 | description: "Delete a memory by content hash",
603 | inputSchema: {
604 | type: "object",
605 | properties: {
606 | content_hash: { type: "string", description: "Hash of the content to delete" }
607 | },
608 | required: ["content_hash"]
609 | }
610 | },
611 | {
612 | name: "check_database_health",
613 | description: "Check the health of the memory database",
614 | inputSchema: {
615 | type: "object",
616 | properties: {}
617 | }
618 | }
619 | ]
620 | };
621 | break;
622 | case 'tools/call':
623 | const toolName = params.name;
624 | const toolParams = params.arguments || {};
625 |
626 | console.error(`Processing tool call: ${toolName} with params:`, JSON.stringify(toolParams));
627 |
628 | let toolResult;
629 | switch (toolName) {
630 | case 'store_memory':
631 | toolResult = await this.storeMemory(toolParams);
632 | break;
633 | case 'retrieve_memory':
634 | toolResult = await this.retrieveMemory(toolParams);
635 | break;
636 | case 'search_by_tag':
637 | toolResult = await this.searchByTag(toolParams);
638 | break;
639 | case 'delete_memory':
640 | toolResult = await this.deleteMemory(toolParams);
641 | break;
642 | case 'check_database_health':
643 | toolResult = await this.checkHealth(toolParams);
644 | break;
645 | default:
646 | throw new Error(`Unknown tool: ${toolName}`);
647 | }
648 |
649 | console.error(`Tool result:`, JSON.stringify(toolResult));
650 |
651 | return {
652 | jsonrpc: "2.0",
653 | id: id,
654 | result: {
655 | content: [
656 | {
657 | type: "text",
658 | text: JSON.stringify(toolResult, null, 2)
659 | }
660 | ]
661 | }
662 | };
663 | case 'store_memory':
664 | result = await this.storeMemory(params);
665 | break;
666 | case 'retrieve_memory':
667 | result = await this.retrieveMemory(params);
668 | break;
669 | case 'search_by_tag':
670 | result = await this.searchByTag(params);
671 | break;
672 | case 'delete_memory':
673 | result = await this.deleteMemory(params);
674 | break;
675 | case 'check_database_health':
676 | result = await this.checkHealth(params);
677 | break;
678 | default:
679 | throw new Error(`Unknown method: ${method}`);
680 | }
681 |
682 | return {
683 | jsonrpc: "2.0",
684 | id: id,
685 | result: result
686 | };
687 | } catch (error) {
688 | return {
689 | jsonrpc: "2.0",
690 | id: id,
691 | error: {
692 | code: -32000,
693 | message: error.message
694 | }
695 | };
696 | }
697 | }
698 |
699 | /**
700 | * Start the bridge server
701 | */
702 | async start() {
703 | console.error(`MCP HTTP Bridge starting...`);
704 |
705 | // Initialize the bridge (discovery or manual config)
706 | const initialized = await this.initialize();
707 | if (!initialized) {
708 | console.error('Failed to initialize bridge - no endpoint available');
709 | process.exit(1);
710 | }
711 |
712 | console.error(`Endpoint: ${this.endpoint}`);
713 | console.error(`API Key: ${this.apiKey ? '[SET]' : '[NOT SET]'}`);
714 | console.error(`Auto-discovery: ${this.autoDiscover ? 'ENABLED' : 'DISABLED'}`);
715 | console.error(`Prefer HTTPS: ${this.preferHttps ? 'YES' : 'NO'}`);
716 |
717 | if (this.discoveredEndpoint) {
718 | console.error(`Service discovered automatically via mDNS`);
719 | }
720 |
721 | let buffer = '';
722 |
723 | process.stdin.on('data', async (chunk) => {
724 | buffer += chunk.toString();
725 |
726 | // Process complete JSON-RPC messages
727 | let newlineIndex;
728 | while ((newlineIndex = buffer.indexOf('\n')) !== -1) {
729 | const line = buffer.slice(0, newlineIndex).trim();
730 | buffer = buffer.slice(newlineIndex + 1);
731 |
732 | if (line) {
733 | try {
734 | const request = JSON.parse(line);
735 | const response = await this.processRequest(request);
736 | console.log(JSON.stringify(response));
737 | } catch (error) {
738 | console.error(`Error processing request: ${error.message}`);
739 | console.log(JSON.stringify({
740 | jsonrpc: "2.0",
741 | id: null,
742 | error: {
743 | code: -32700,
744 | message: "Parse error"
745 | }
746 | }));
747 | }
748 | }
749 | }
750 | });
751 |
752 | process.stdin.on('end', () => {
753 | process.exit(0);
754 | });
755 |
756 | // Handle graceful shutdown
757 | process.on('SIGINT', () => {
758 | console.error('Shutting down HTTP Bridge...');
759 | process.exit(0);
760 | });
761 |
762 | process.on('SIGTERM', () => {
763 | console.error('Shutting down HTTP Bridge...');
764 | process.exit(0);
765 | });
766 | }
767 | }
768 |
769 | // Start the bridge if this file is run directly
770 | if (require.main === module) {
771 | const bridge = new HTTPMCPBridge();
772 | bridge.start().catch(error => {
773 | console.error(`Failed to start bridge: ${error.message}`);
774 | process.exit(1);
775 | });
776 | }
777 |
778 | module.exports = HTTPMCPBridge;
```
--------------------------------------------------------------------------------
/tests/test_quality_system.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Comprehensive unit tests for the quality scoring system.
3 | Tests ONNX ranker, implicit signals, AI evaluator, and composite scorer.
4 | """
5 |
6 | import pytest
7 | import time
8 | import asyncio
9 | from unittest.mock import Mock, patch, AsyncMock
10 | from pathlib import Path
11 |
12 | from src.mcp_memory_service.quality.config import QualityConfig
13 | from src.mcp_memory_service.quality.onnx_ranker import ONNXRankerModel, get_onnx_ranker_model
14 | from src.mcp_memory_service.quality.implicit_signals import ImplicitSignalsEvaluator
15 | from src.mcp_memory_service.quality.ai_evaluator import QualityEvaluator
16 | from src.mcp_memory_service.quality.scorer import QualityScorer
17 | from src.mcp_memory_service.models.memory import Memory
18 |
19 |
20 | class TestQualityConfig:
21 | """Test quality configuration."""
22 |
23 | def test_default_config(self):
24 | """Test default configuration values."""
25 | config = QualityConfig()
26 | assert config.enabled is True
27 | assert config.ai_provider == 'local'
28 | assert config.local_model == 'nvidia-quality-classifier-deberta' # Updated to current default
29 | assert config.local_device == 'auto'
30 | assert config.boost_enabled is False
31 | assert config.boost_weight == 0.3
32 |
33 | def test_config_from_env(self, monkeypatch):
34 | """Test loading configuration from environment variables."""
35 | monkeypatch.setenv('MCP_QUALITY_SYSTEM_ENABLED', 'false')
36 | monkeypatch.setenv('MCP_QUALITY_AI_PROVIDER', 'groq')
37 | monkeypatch.setenv('MCP_QUALITY_BOOST_ENABLED', 'true')
38 | monkeypatch.setenv('MCP_QUALITY_BOOST_WEIGHT', '0.5')
39 | monkeypatch.setenv('GROQ_API_KEY', 'test-key')
40 |
41 | config = QualityConfig.from_env()
42 | assert config.enabled is False
43 | assert config.ai_provider == 'groq'
44 | assert config.boost_enabled is True
45 | assert config.boost_weight == 0.5
46 | assert config.groq_api_key == 'test-key'
47 |
48 | def test_config_validation(self):
49 | """Test configuration validation."""
50 | config = QualityConfig(ai_provider='local')
51 | assert config.validate() is True
52 |
53 | # Invalid provider
54 | config = QualityConfig(ai_provider='invalid')
55 | with pytest.raises(ValueError, match="Invalid ai_provider"):
56 | config.validate()
57 |
58 | # Invalid boost weight
59 | config = QualityConfig(boost_weight=1.5)
60 | with pytest.raises(ValueError, match="boost_weight must be between"):
61 | config.validate()
62 |
63 | # Groq provider without API key
64 | config = QualityConfig(ai_provider='groq')
65 | with pytest.raises(ValueError, match="GROQ_API_KEY not set"):
66 | config.validate()
67 |
68 | def test_config_helpers(self):
69 | """Test configuration helper properties."""
70 | config = QualityConfig(ai_provider='local')
71 | assert config.use_local_only is True
72 | assert config.can_use_groq is False
73 |
74 | config = QualityConfig(ai_provider='groq', groq_api_key='test')
75 | assert config.use_local_only is False
76 | assert config.can_use_groq is True
77 |
78 |
79 | class TestImplicitSignalsEvaluator:
80 | """Test implicit signals-based quality evaluation."""
81 |
82 | def test_evaluate_new_memory(self):
83 | """Test evaluating a new memory with no access history."""
84 | evaluator = ImplicitSignalsEvaluator()
85 | memory = Memory(
86 | content="Test content",
87 | content_hash="test_hash",
88 | metadata={}
89 | )
90 |
91 | score = evaluator.evaluate_quality(memory)
92 | # New memory should have low but non-zero score
93 | assert 0.0 < score < 0.5
94 |
95 | def test_evaluate_frequently_accessed_memory(self):
96 | """Test evaluating a frequently accessed memory."""
97 | evaluator = ImplicitSignalsEvaluator()
98 | memory = Memory(
99 | content="Popular content",
100 | content_hash="popular_hash",
101 | metadata={
102 | 'access_count': 50,
103 | 'last_accessed_at': time.time(),
104 | 'avg_ranking': 0.1 # Top result
105 | }
106 | )
107 |
108 | score = evaluator.evaluate_quality(memory)
109 | # Frequently accessed, recent, top-ranked memory should have high score
110 | assert score > 0.7
111 |
112 | def test_evaluate_old_memory(self):
113 | """Test evaluating a memory that hasn't been accessed recently."""
114 | evaluator = ImplicitSignalsEvaluator()
115 | thirty_days_ago = time.time() - (30 * 24 * 3600)
116 | memory = Memory(
117 | content="Old content",
118 | content_hash="old_hash",
119 | metadata={
120 | 'access_count': 10,
121 | 'last_accessed_at': thirty_days_ago,
122 | 'avg_ranking': 0.5
123 | }
124 | )
125 |
126 | score = evaluator.evaluate_quality(memory)
127 | # Old memory should have lower recency score
128 | assert score < 0.5
129 |
130 | def test_update_ranking_signal(self):
131 | """Test updating average ranking signal."""
132 | evaluator = ImplicitSignalsEvaluator()
133 | memory = Memory(
134 | content="Test content",
135 | content_hash="test_hash",
136 | metadata={'avg_ranking': 0.5}
137 | )
138 |
139 | # Memory appears as top result
140 | evaluator.update_ranking_signal(memory, position=0, total_results=10)
141 | new_ranking = memory.metadata['avg_ranking']
142 | assert new_ranking < 0.5 # Should improve (lower is better)
143 |
144 | # Memory appears as bottom result
145 | evaluator.update_ranking_signal(memory, position=9, total_results=10)
146 | newer_ranking = memory.metadata['avg_ranking']
147 | assert newer_ranking > new_ranking # Should worsen
148 |
149 | def test_get_signal_components(self):
150 | """Test getting detailed signal breakdown."""
151 | evaluator = ImplicitSignalsEvaluator()
152 | memory = Memory(
153 | content="Test content",
154 | content_hash="test_hash",
155 | metadata={
156 | 'access_count': 25,
157 | 'last_accessed_at': time.time(),
158 | 'avg_ranking': 0.2
159 | }
160 | )
161 |
162 | components = evaluator.get_signal_components(memory)
163 | assert 'access_score' in components
164 | assert 'recency_score' in components
165 | assert 'ranking_score' in components
166 | assert 'composite_score' in components
167 | assert components['access_count'] == 25
168 |
169 |
170 | @pytest.mark.skipif(
171 | not Path.home().joinpath(".cache/mcp_memory/onnx_models/ms-marco-MiniLM-L-6-v2/onnx/model.onnx").exists(),
172 | reason="ONNX model not downloaded"
173 | )
174 | class TestONNXRankerModel:
175 | """Test ONNX-based cross-encoder model."""
176 |
177 | def test_model_initialization(self):
178 | """Test ONNX ranker model initialization."""
179 | model = get_onnx_ranker_model(device='cpu')
180 | assert model is not None
181 | assert model._model is not None
182 | assert model._tokenizer is not None
183 |
184 | def test_score_quality_relevant(self):
185 | """Test scoring a highly relevant memory."""
186 | model = get_onnx_ranker_model(device='cpu')
187 | if model is None:
188 | pytest.skip("ONNX ranker not available")
189 |
190 | query = "How to implement a binary search tree"
191 | memory_content = "A binary search tree is a data structure where each node has at most two children. Implementation requires insert, delete, and search operations."
192 |
193 | score = model.score_quality(query, memory_content)
194 | # Highly relevant content should score high
195 | assert 0.5 < score <= 1.0
196 |
197 | def test_score_quality_irrelevant(self):
198 | """Test scoring an irrelevant memory."""
199 | model = get_onnx_ranker_model(device='cpu')
200 | if model is None:
201 | pytest.skip("ONNX ranker not available")
202 |
203 | query = "Python programming tutorial"
204 | memory_content = "Recipe for chocolate chip cookies with butter and sugar."
205 |
206 | score = model.score_quality(query, memory_content)
207 | # Irrelevant content should score low
208 | assert 0.0 <= score < 0.5
209 |
210 | def test_score_quality_empty_input(self):
211 | """Test handling empty query or content."""
212 | model = get_onnx_ranker_model(device='cpu')
213 | if model is None:
214 | pytest.skip("ONNX ranker not available")
215 |
216 | assert model.score_quality("", "content") == 0.0
217 | assert model.score_quality("query", "") == 0.0
218 | assert model.score_quality("", "") == 0.0
219 |
220 | def test_gpu_provider_detection(self):
221 | """Test GPU provider detection."""
222 | # Just test that the method runs without error
223 | model = get_onnx_ranker_model(device='auto')
224 | if model is not None:
225 | # Check that at least CPU provider is available
226 | assert 'CPUExecutionProvider' in model._preferred_providers
227 |
228 |
229 | class TestQualityEvaluator:
230 | """Test multi-tier AI quality evaluator."""
231 |
232 | @pytest.mark.asyncio
233 | async def test_local_only_evaluation(self):
234 | """Test evaluation using local ONNX model only."""
235 | config = QualityConfig(ai_provider='local')
236 | evaluator = QualityEvaluator(config)
237 |
238 | memory = Memory(
239 | content="Python is a high-level programming language",
240 | content_hash="python_hash",
241 | metadata={}
242 | )
243 |
244 | # Mock the ONNX ranker to return a fixed score
245 | mock_ranker = Mock()
246 | mock_ranker.score_quality.return_value = 0.85
247 |
248 | # Inject mock directly into evaluator
249 | evaluator._onnx_ranker = mock_ranker
250 | evaluator._initialized = True
251 |
252 | score = await evaluator.evaluate_quality("Python programming", memory)
253 |
254 | assert score == 0.85
255 | assert memory.metadata['quality_provider'] == 'onnx_local'
256 | mock_ranker.score_quality.assert_called_once()
257 |
258 | @pytest.mark.asyncio
259 | async def test_fallback_to_implicit_signals(self):
260 | """Test fallback to implicit signals when ONNX fails."""
261 | config = QualityConfig(ai_provider='local')
262 | evaluator = QualityEvaluator(config)
263 |
264 | memory = Memory(
265 | content="Test content",
266 | content_hash="test_hash",
267 | metadata={
268 | 'access_count': 10,
269 | 'last_accessed_at': time.time()
270 | }
271 | )
272 |
273 | with patch('src.mcp_memory_service.quality.onnx_ranker.get_onnx_ranker_model', return_value=None):
274 | score = await evaluator.evaluate_quality("test query", memory)
275 |
276 | # Should fall back to implicit signals
277 | assert 0.0 < score <= 1.0
278 | assert memory.metadata['quality_provider'] == 'implicit_signals'
279 |
280 | @pytest.mark.asyncio
281 | async def test_disabled_quality_system(self):
282 | """Test behavior when quality system is disabled."""
283 | config = QualityConfig(enabled=False)
284 | evaluator = QualityEvaluator(config)
285 |
286 | memory = Memory(
287 | content="Test content",
288 | content_hash="test_hash",
289 | metadata={}
290 | )
291 |
292 | score = await evaluator.evaluate_quality("test query", memory)
293 | # Should return neutral score when disabled
294 | assert score == 0.5
295 |
296 |
297 | class TestQualityScorer:
298 | """Test composite quality scorer."""
299 |
300 | @pytest.mark.asyncio
301 | async def test_calculate_quality_score_with_boost(self):
302 | """Test composite scoring with boost enabled."""
303 | config = QualityConfig(boost_enabled=True, boost_weight=0.3)
304 | scorer = QualityScorer(config)
305 |
306 | memory = Memory(
307 | content="Test content",
308 | content_hash="test_hash",
309 | metadata={
310 | 'access_count': 20,
311 | 'last_accessed_at': time.time()
312 | }
313 | )
314 |
315 | # Mock AI evaluator to return a fixed score
316 | with patch.object(scorer._ai_evaluator, 'evaluate_quality', return_value=0.8):
317 | score = await scorer.calculate_quality_score(memory, "test query")
318 |
319 | # Score should be weighted combination of AI (0.7 * 0.8) + implicit (0.3 * ~0.5)
320 | assert 0.5 < score < 1.0
321 | assert 'quality_score' in memory.metadata
322 | assert 'quality_components' in memory.metadata
323 |
324 | @pytest.mark.asyncio
325 | async def test_calculate_quality_score_no_boost(self):
326 | """Test scoring without boost (AI only)."""
327 | config = QualityConfig(boost_enabled=False)
328 | scorer = QualityScorer(config)
329 |
330 | memory = Memory(
331 | content="Test content",
332 | content_hash="test_hash",
333 | metadata={}
334 | )
335 |
336 | with patch.object(scorer._ai_evaluator, 'evaluate_quality', return_value=0.75):
337 | score = await scorer.calculate_quality_score(memory, "test query")
338 |
339 | # Should use AI score directly
340 | assert score == 0.75
341 |
342 | @pytest.mark.asyncio
343 | async def test_score_batch(self):
344 | """Test batch scoring of multiple memories."""
345 | config = QualityConfig()
346 | scorer = QualityScorer(config)
347 |
348 | memories = [
349 | Memory(content=f"Content {i}", content_hash=f"hash_{i}", metadata={})
350 | for i in range(5)
351 | ]
352 |
353 | with patch.object(scorer._ai_evaluator, 'evaluate_quality', return_value=0.6):
354 | scores = await scorer.score_batch(memories, "test query")
355 |
356 | assert len(scores) == 5
357 | assert all(0.0 <= s <= 1.0 for s in scores)
358 |
359 | def test_get_score_breakdown(self):
360 | """Test getting detailed score breakdown."""
361 | config = QualityConfig()
362 | scorer = QualityScorer(config)
363 |
364 | memory = Memory(
365 | content="Test content",
366 | content_hash="test_hash",
367 | metadata={
368 | 'quality_score': 0.75,
369 | 'quality_provider': 'onnx_local',
370 | 'access_count': 15,
371 | 'last_accessed_at': time.time()
372 | }
373 | )
374 |
375 | breakdown = scorer.get_score_breakdown(memory)
376 |
377 | assert breakdown['quality_score'] == 0.75
378 | assert breakdown['quality_provider'] == 'onnx_local'
379 | assert breakdown['access_count'] == 15
380 | assert 'implicit_signals' in breakdown
381 |
382 |
383 | class TestMemoryAccessTracking:
384 | """Test memory access tracking integration."""
385 |
386 | def test_record_access(self):
387 | """Test recording memory access."""
388 | memory = Memory(
389 | content="Test content",
390 | content_hash="test_hash",
391 | metadata={}
392 | )
393 |
394 | # Record first access
395 | memory.record_access("first query")
396 | assert memory.access_count == 1
397 | assert memory.last_accessed_at is not None
398 | assert len(memory.metadata.get('access_queries', [])) == 1
399 |
400 | # Record second access
401 | time.sleep(0.01) # Ensure different timestamp
402 | memory.record_access("second query")
403 | assert memory.access_count == 2
404 | assert len(memory.metadata.get('access_queries', [])) == 2
405 |
406 | def test_quality_score_property(self):
407 | """Test quality score property on Memory."""
408 | memory = Memory(
409 | content="Test content",
410 | content_hash="test_hash",
411 | metadata={'quality_score': 0.85}
412 | )
413 |
414 | assert memory.quality_score == 0.85
415 |
416 | # Default value when not set
417 | memory2 = Memory(
418 | content="Test content 2",
419 | content_hash="test_hash_2",
420 | metadata={}
421 | )
422 | assert memory2.quality_score == 0.5
423 |
424 | def test_quality_provider_property(self):
425 | """Test quality provider property on Memory."""
426 | memory = Memory(
427 | content="Test content",
428 | content_hash="test_hash",
429 | metadata={'quality_provider': 'onnx_local'}
430 | )
431 |
432 | assert memory.quality_provider == 'onnx_local'
433 |
434 |
435 | # Performance benchmarks
436 | class TestQualitySystemPerformance:
437 | """Performance benchmarks for quality scoring."""
438 |
439 | @pytest.mark.skip(reason="Issue #316: Missing pytest-benchmark fixture - requires pytest-benchmark plugin")
440 | @pytest.mark.benchmark
441 | def test_implicit_signals_performance(self, benchmark):
442 | """Benchmark implicit signals evaluation."""
443 | evaluator = ImplicitSignalsEvaluator()
444 | memory = Memory(
445 | content="Test content",
446 | content_hash="test_hash",
447 | metadata={
448 | 'access_count': 25,
449 | 'last_accessed_at': time.time(),
450 | 'avg_ranking': 0.3
451 | }
452 | )
453 |
454 | result = benchmark(evaluator.evaluate_quality, memory)
455 | # Target: <10ms for implicit signals
456 | assert result > 0.0
457 |
458 | @pytest.mark.benchmark
459 | @pytest.mark.skipif(
460 | not Path.home().joinpath(".cache/mcp_memory/onnx_models/ms-marco-MiniLM-L-6-v2/onnx/model.onnx").exists(),
461 | reason="ONNX model not downloaded"
462 | )
463 | def test_onnx_ranker_performance(self, benchmark):
464 | """Benchmark ONNX ranker scoring."""
465 | model = get_onnx_ranker_model(device='cpu')
466 | if model is None:
467 | pytest.skip("ONNX ranker not available")
468 |
469 | query = "Python programming tutorial"
470 | content = "Learn Python basics with examples and exercises"
471 |
472 | result = benchmark(model.score_quality, query, content)
473 | # Target: <100ms on CPU
474 | assert 0.0 <= result <= 1.0
475 |
476 |
477 | class TestQualityAPILayer:
478 | """Integration tests for quality API layer (MCP tools and HTTP endpoints)."""
479 |
480 | @pytest.mark.asyncio
481 | async def test_rate_memory_mcp_tool(self):
482 | """Test rate_memory MCP tool."""
483 | from src.mcp_memory_service.server import MemoryServer
484 | from src.mcp_memory_service.models.memory import Memory
485 | from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
486 | import tempfile
487 | import os
488 |
489 | # Create temporary database
490 | with tempfile.TemporaryDirectory() as tmpdir:
491 | db_path = os.path.join(tmpdir, "test.db")
492 | storage = SqliteVecMemoryStorage(db_path)
493 | await storage.initialize()
494 |
495 | # Create and store a test memory
496 | test_memory = Memory(
497 | content="Test content for rating",
498 | content_hash="test_rating_hash",
499 | metadata={}
500 | )
501 | await storage.store(test_memory)
502 |
503 | # Create server instance
504 | server = MemoryServer()
505 | server.storage = storage
506 | server._storage_initialized = True
507 |
508 | # Test rating with thumbs up
509 | result = await server.handle_rate_memory({
510 | "content_hash": "test_rating_hash",
511 | "rating": 1,
512 | "feedback": "Very useful information"
513 | })
514 |
515 | assert len(result) > 0
516 | assert "rated successfully" in result[0].text.lower()
517 | assert "thumbs up" in result[0].text.lower()
518 |
519 | # Verify quality score was updated
520 | updated_memory = await storage.get_by_hash("test_rating_hash")
521 | assert updated_memory.metadata['user_rating'] == 1
522 | assert updated_memory.metadata['user_feedback'] == "Very useful information"
523 | assert 'quality_score' in updated_memory.metadata
524 |
525 | @pytest.mark.asyncio
526 | async def test_get_memory_quality_mcp_tool(self):
527 | """Test get_memory_quality MCP tool."""
528 | from src.mcp_memory_service.server import MemoryServer
529 | from src.mcp_memory_service.models.memory import Memory
530 | from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
531 | import tempfile
532 | import os
533 |
534 | # Create temporary database
535 | with tempfile.TemporaryDirectory() as tmpdir:
536 | db_path = os.path.join(tmpdir, "test.db")
537 | storage = SqliteVecMemoryStorage(db_path)
538 | await storage.initialize()
539 |
540 | # Create and store a test memory with quality metadata
541 | test_memory = Memory(
542 | content="Test content with quality data",
543 | content_hash="test_quality_hash",
544 | metadata={
545 | 'quality_score': 0.85,
546 | 'quality_provider': 'onnx_local',
547 | 'access_count': 10,
548 | 'last_accessed_at': time.time()
549 | }
550 | )
551 | await storage.store(test_memory)
552 |
553 | # Create server instance
554 | server = MemoryServer()
555 | server.storage = storage
556 | server._storage_initialized = True
557 |
558 | # Get quality metrics
559 | result = await server.handle_get_memory_quality({
560 | "content_hash": "test_quality_hash"
561 | })
562 |
563 | assert len(result) > 0
564 | response_text = result[0].text
565 | assert "Quality Score: 0.850" in response_text
566 | assert "onnx_local" in response_text
567 | assert "Access Count: 10" in response_text
568 |
569 | @pytest.mark.asyncio
570 | async def test_analyze_quality_distribution_mcp_tool(self):
571 | """Test analyze_quality_distribution MCP tool."""
572 | from src.mcp_memory_service.server import MemoryServer
573 | from src.mcp_memory_service.models.memory import Memory
574 | from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
575 | import tempfile
576 | import os
577 |
578 | # Create temporary database
579 | with tempfile.TemporaryDirectory() as tmpdir:
580 | db_path = os.path.join(tmpdir, "test.db")
581 | storage = SqliteVecMemoryStorage(db_path)
582 | await storage.initialize()
583 |
584 | # Store memories with different quality scores
585 | test_memories = [
586 | Memory(content=f"High quality {i}", content_hash=f"high_{i}",
587 | metadata={'quality_score': 0.8 + i * 0.02})
588 | for i in range(5)
589 | ] + [
590 | Memory(content=f"Low quality {i}", content_hash=f"low_{i}",
591 | metadata={'quality_score': 0.2 + i * 0.02})
592 | for i in range(5)
593 | ]
594 |
595 | for mem in test_memories:
596 | await storage.store(mem)
597 |
598 | # Create server instance
599 | server = MemoryServer()
600 | server.storage = storage
601 | server._storage_initialized = True
602 |
603 | # Analyze distribution
604 | result = await server.handle_analyze_quality_distribution({
605 | "min_quality": 0.0,
606 | "max_quality": 1.0
607 | })
608 |
609 | assert len(result) > 0
610 | response_text = result[0].text
611 | assert "Total Memories: 10" in response_text
612 | assert "High Quality" in response_text
613 | assert "Low Quality" in response_text
614 |
615 | @pytest.mark.asyncio
616 | async def test_rate_memory_http_endpoint(self):
617 | """Test POST /api/quality/memories/{hash}/rate HTTP endpoint."""
618 | import httpx
619 | from src.mcp_memory_service.web.app import app
620 | from src.mcp_memory_service.web.dependencies import get_storage
621 | from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
622 | from src.mcp_memory_service.models.memory import Memory
623 | import tempfile
624 | import os
625 |
626 | # Create temporary database
627 | with tempfile.TemporaryDirectory() as tmpdir:
628 | db_path = os.path.join(tmpdir, "test.db")
629 | storage = SqliteVecMemoryStorage(db_path)
630 | await storage.initialize()
631 |
632 | # Store test memory
633 | test_memory = Memory(
634 | content="Test HTTP rating",
635 | content_hash="http_test_hash",
636 | metadata={}
637 | )
638 | await storage.store(test_memory)
639 |
640 | # Override get_storage dependency to use test storage
641 | async def override_get_storage():
642 | return storage
643 |
644 | app.dependency_overrides[get_storage] = override_get_storage
645 |
646 | try:
647 | # Use async client for proper async/await support
648 | async with httpx.AsyncClient(transport=httpx.ASGITransport(app=app), base_url="http://test") as client:
649 | response = await client.post(
650 | "/api/quality/memories/http_test_hash/rate",
651 | json={"rating": 1, "feedback": "Excellent"}
652 | )
653 |
654 | assert response.status_code == 200
655 | data = response.json()
656 | assert data["success"] is True
657 | assert data["content_hash"] == "http_test_hash"
658 | assert "new_quality_score" in data
659 | finally:
660 | # Clean up dependency override
661 | app.dependency_overrides.clear()
662 |
663 | @pytest.mark.asyncio
664 | async def test_get_quality_http_endpoint(self):
665 | """Test GET /api/quality/memories/{hash} HTTP endpoint."""
666 | import httpx
667 | from src.mcp_memory_service.web.app import app
668 | from src.mcp_memory_service.web.dependencies import get_storage
669 | from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
670 | from src.mcp_memory_service.models.memory import Memory
671 | import tempfile
672 | import os
673 |
674 | # Create temporary database
675 | with tempfile.TemporaryDirectory() as tmpdir:
676 | db_path = os.path.join(tmpdir, "test.db")
677 | storage = SqliteVecMemoryStorage(db_path)
678 | await storage.initialize()
679 |
680 | # Store test memory with quality data
681 | test_memory = Memory(
682 | content="Test HTTP quality retrieval",
683 | content_hash="http_quality_hash",
684 | metadata={
685 | 'quality_score': 0.75,
686 | 'quality_provider': 'implicit_signals',
687 | 'access_count': 5
688 | }
689 | )
690 | await storage.store(test_memory)
691 |
692 | # Override get_storage dependency to use test storage
693 | async def override_get_storage():
694 | return storage
695 |
696 | app.dependency_overrides[get_storage] = override_get_storage
697 |
698 | try:
699 | # Use async client for proper async/await support
700 | async with httpx.AsyncClient(transport=httpx.ASGITransport(app=app), base_url="http://test") as client:
701 | response = await client.get("/api/quality/memories/http_quality_hash")
702 |
703 | assert response.status_code == 200
704 | data = response.json()
705 | assert data["content_hash"] == "http_quality_hash"
706 | assert data["quality_score"] == 0.75
707 | assert data["quality_provider"] == "implicit_signals"
708 | assert data["access_count"] == 5
709 | finally:
710 | # Clean up dependency override
711 | app.dependency_overrides.clear()
712 |
713 | @pytest.mark.asyncio
714 | async def test_distribution_http_endpoint(self):
715 | """Test GET /api/quality/distribution HTTP endpoint."""
716 | import httpx
717 | from src.mcp_memory_service.web.app import app
718 | from src.mcp_memory_service.web.dependencies import get_storage
719 | from src.mcp_memory_service.storage.sqlite_vec import SqliteVecMemoryStorage
720 | from src.mcp_memory_service.models.memory import Memory
721 | import tempfile
722 | import os
723 |
724 | # Create temporary database
725 | with tempfile.TemporaryDirectory() as tmpdir:
726 | db_path = os.path.join(tmpdir, "test.db")
727 | storage = SqliteVecMemoryStorage(db_path)
728 | await storage.initialize()
729 |
730 | # Store memories with varied quality
731 | for i in range(20):
732 | score = 0.3 + (i / 20) * 0.6 # Range from 0.3 to 0.9
733 | memory = Memory(
734 | content=f"Memory {i}",
735 | content_hash=f"mem_hash_{i}",
736 | metadata={'quality_score': score}
737 | )
738 | await storage.store(memory)
739 |
740 | # Override get_storage dependency to use test storage
741 | async def override_get_storage():
742 | return storage
743 |
744 | app.dependency_overrides[get_storage] = override_get_storage
745 |
746 | try:
747 | # Use async client for proper async/await support
748 | async with httpx.AsyncClient(transport=httpx.ASGITransport(app=app), base_url="http://test") as client:
749 | response = await client.get("/api/quality/distribution?min_quality=0.0&max_quality=1.0")
750 |
751 | assert response.status_code == 200
752 | data = response.json()
753 | assert data["total_memories"] == 20
754 | assert "high_quality_count" in data
755 | assert "medium_quality_count" in data
756 | assert "low_quality_count" in data
757 | assert "average_score" in data
758 | assert len(data["top_memories"]) <= 10
759 | assert len(data["bottom_memories"]) <= 10
760 |
761 | # Verify memory objects include required fields for UI display
762 | for memory_list_key in ["top_memories", "bottom_memories"]:
763 | for memory in data[memory_list_key]:
764 | assert "content_hash" in memory, "content_hash required for ID display"
765 | assert "created_at" in memory, "created_at required for date display"
766 | assert "memory_type" in memory, "memory_type required for type display"
767 | assert "quality_score" in memory
768 | finally:
769 | # Clean up dependency override
770 | app.dependency_overrides.clear()
771 |
772 | @pytest.mark.asyncio
773 | async def test_async_background_scoring(self):
774 | """Test async quality scoring doesn't block."""
775 | from src.mcp_memory_service.quality.async_scorer import AsyncQualityScorer
776 | from src.mcp_memory_service.models.memory import Memory
777 | import time
778 |
779 | scorer = AsyncQualityScorer()
780 | await scorer.start()
781 |
782 | try:
783 | # Queue multiple memories for scoring
784 | start_time = time.time()
785 | memories = [
786 | Memory(content=f"Test {i}", content_hash=f"hash_{i}", metadata={})
787 | for i in range(10)
788 | ]
789 |
790 | for memory in memories:
791 | await scorer.score_memory(memory, "test query")
792 |
793 | # Should return immediately (non-blocking)
794 | elapsed = time.time() - start_time
795 | assert elapsed < 0.1 # Should be very fast (just queuing, not scoring)
796 |
797 | # Give worker time to process
798 | await asyncio.sleep(1.0)
799 |
800 | # Check stats
801 | stats = scorer.get_stats()
802 | assert stats["total_queued"] == 10
803 | assert stats["is_running"] is True
804 |
805 | finally:
806 | await scorer.stop()
807 |
808 |
809 | if __name__ == "__main__":
810 | pytest.main([__file__, "-v", "--tb=short"])
811 |
```
--------------------------------------------------------------------------------
/claude-hooks/utilities/memory-scorer.js:
--------------------------------------------------------------------------------
```javascript
1 | /**
2 | * Memory Relevance Scoring Utility
3 | * Implements intelligent algorithms to score memories by relevance to current project context
4 | * Phase 2: Enhanced with conversation context awareness for dynamic memory loading
5 | */
6 |
7 | /**
8 | * Calculate time decay factor for memory relevance
9 | * More recent memories get higher scores
10 | */
11 | function calculateTimeDecay(memoryDate, decayRate = 0.1) {
12 | try {
13 | const now = new Date();
14 |
15 | // Handle both Unix timestamps (seconds) and ISO strings
16 | let memoryTime;
17 | if (typeof memoryDate === 'string') {
18 | // ISO string format
19 | memoryTime = new Date(memoryDate);
20 | } else if (typeof memoryDate === 'number') {
21 | // Unix timestamp in seconds, convert to milliseconds
22 | memoryTime = new Date(memoryDate * 1000);
23 | } else {
24 | return 0.5; // Invalid format
25 | }
26 |
27 | if (isNaN(memoryTime.getTime())) {
28 | return 0.5; // Default score for invalid dates
29 | }
30 |
31 | // Calculate days since memory creation
32 | const daysDiff = (now - memoryTime) / (1000 * 60 * 60 * 24);
33 |
34 | // Exponential decay: score = e^(-decayRate * days)
35 | // Recent memories (0-7 days): score 0.8-1.0
36 | // Older memories (8-30 days): score 0.3-0.8
37 | // Ancient memories (30+ days): score 0.0-0.3
38 | const decayScore = Math.exp(-decayRate * daysDiff);
39 |
40 | // Ensure score is between 0 and 1
41 | return Math.max(0.01, Math.min(1.0, decayScore));
42 |
43 | } catch (error) {
44 | // Silently fail with default score to avoid noise
45 | return 0.5;
46 | }
47 | }
48 |
49 | /**
50 | * Calculate tag relevance score
51 | * Memories with tags matching project context get higher scores
52 | */
53 | function calculateTagRelevance(memoryTags = [], projectContext) {
54 | try {
55 | if (!Array.isArray(memoryTags) || memoryTags.length === 0) {
56 | return 0.3; // Default score for memories without tags
57 | }
58 |
59 | const contextTags = [
60 | projectContext.name?.toLowerCase(),
61 | projectContext.language?.toLowerCase(),
62 | ...(projectContext.frameworks || []).map(f => f.toLowerCase()),
63 | ...(projectContext.tools || []).map(t => t.toLowerCase())
64 | ].filter(Boolean);
65 |
66 | if (contextTags.length === 0) {
67 | return 0.5; // No context to match against
68 | }
69 |
70 | // Calculate tag overlap (exact match only to prevent cross-project pollution)
71 | const memoryTagsLower = memoryTags.map(tag => tag.toLowerCase());
72 | const matchingTags = contextTags.filter(contextTag =>
73 | memoryTagsLower.includes(contextTag)
74 | );
75 |
76 | // Score based on percentage of matching tags
77 | const overlapScore = matchingTags.length / contextTags.length;
78 |
79 | // Bonus for exact project name matches
80 | const exactProjectMatch = memoryTagsLower.includes(projectContext.name?.toLowerCase());
81 | const projectBonus = exactProjectMatch ? 0.3 : 0;
82 |
83 | // Bonus for exact language matches
84 | const exactLanguageMatch = memoryTagsLower.includes(projectContext.language?.toLowerCase());
85 | const languageBonus = exactLanguageMatch ? 0.2 : 0;
86 |
87 | // Bonus for framework matches
88 | const frameworkMatches = (projectContext.frameworks || []).filter(framework =>
89 | memoryTagsLower.some(tag => tag.includes(framework.toLowerCase()))
90 | );
91 | const frameworkBonus = frameworkMatches.length * 0.1;
92 |
93 | const totalScore = Math.min(1.0, overlapScore + projectBonus + languageBonus + frameworkBonus);
94 |
95 | return Math.max(0.1, totalScore);
96 |
97 | } catch (error) {
98 | // Silently fail with default score to avoid noise
99 | return 0.3;
100 | }
101 | }
102 |
103 | /**
104 | * Calculate content quality score to penalize generic/empty content
105 | */
106 | function calculateContentQuality(memoryContent = '') {
107 | try {
108 | if (!memoryContent || typeof memoryContent !== 'string') {
109 | return 0.1;
110 | }
111 |
112 | const content = memoryContent.trim();
113 |
114 | // Check for generic session summary patterns
115 | const genericPatterns = [
116 | /## 🎯 Topics Discussed\s*-\s*implementation\s*-\s*\.\.\.?$/m,
117 | /Topics Discussed.*implementation.*\.\.\..*$/s,
118 | /Session Summary.*implementation.*\.\.\..*$/s,
119 | /^# Session Summary.*Date.*Project.*Topics Discussed.*implementation.*\.\.\..*$/s
120 | ];
121 |
122 | const isGeneric = genericPatterns.some(pattern => pattern.test(content));
123 | if (isGeneric) {
124 | return 0.05; // Heavily penalize generic content
125 | }
126 |
127 | // Check content length and substance
128 | if (content.length < 50) {
129 | return 0.2; // Short content gets low score
130 | }
131 |
132 | // Check for meaningful content indicators
133 | const meaningfulIndicators = [
134 | 'decided', 'implemented', 'changed', 'fixed', 'created', 'updated',
135 | 'because', 'reason', 'approach', 'solution', 'result', 'impact',
136 | 'learned', 'discovered', 'found', 'issue', 'problem', 'challenge'
137 | ];
138 |
139 | const meaningfulMatches = meaningfulIndicators.filter(indicator =>
140 | content.toLowerCase().includes(indicator)
141 | ).length;
142 |
143 | // Calculate information density
144 | const words = content.split(/\s+/).filter(w => w.length > 2);
145 | const uniqueWords = new Set(words.map(w => w.toLowerCase()));
146 | const diversityRatio = uniqueWords.size / Math.max(words.length, 1);
147 |
148 | // Combine factors
149 | const meaningfulnessScore = Math.min(0.4, meaningfulMatches * 0.08);
150 | const diversityScore = Math.min(0.3, diversityRatio * 0.5);
151 | const lengthScore = Math.min(0.3, content.length / 1000); // Longer content gets bonus
152 |
153 | const qualityScore = meaningfulnessScore + diversityScore + lengthScore;
154 | return Math.max(0.05, Math.min(1.0, qualityScore));
155 |
156 | } catch (error) {
157 | // Silently fail with default score to avoid noise
158 | return 0.3;
159 | }
160 | }
161 |
162 | /**
163 | * Calculate content relevance using simple text analysis
164 | * Memories with content matching project keywords get higher scores
165 | */
166 | function calculateContentRelevance(memoryContent = '', projectContext) {
167 | try {
168 | if (!memoryContent || typeof memoryContent !== 'string') {
169 | return 0.3;
170 | }
171 |
172 | const content = memoryContent.toLowerCase();
173 | const keywords = [
174 | projectContext.name?.toLowerCase(),
175 | projectContext.language?.toLowerCase(),
176 | ...(projectContext.frameworks || []).map(f => f.toLowerCase()),
177 | ...(projectContext.tools || []).map(t => t.toLowerCase()),
178 | // Add common technical keywords
179 | 'architecture', 'decision', 'implementation', 'bug', 'fix',
180 | 'feature', 'config', 'setup', 'deployment', 'performance'
181 | ].filter(Boolean);
182 |
183 | if (keywords.length === 0) {
184 | return 0.5;
185 | }
186 |
187 | // Count keyword occurrences
188 | let totalMatches = 0;
189 | let keywordScore = 0;
190 |
191 | keywords.forEach(keyword => {
192 | const occurrences = (content.match(new RegExp(keyword, 'g')) || []).length;
193 | if (occurrences > 0) {
194 | totalMatches++;
195 | keywordScore += Math.log(1 + occurrences) * 0.1; // Logarithmic scoring
196 | }
197 | });
198 |
199 | // Normalize score
200 | const matchRatio = totalMatches / keywords.length;
201 | const contentScore = Math.min(1.0, matchRatio + keywordScore);
202 |
203 | return Math.max(0.1, contentScore);
204 |
205 | } catch (error) {
206 | // Silently fail with default score to avoid noise
207 | return 0.3;
208 | }
209 | }
210 |
211 | /**
212 | * Calculate memory type bonus
213 | * Certain memory types are more valuable for context injection
214 | */
215 | function calculateTypeBonus(memoryType) {
216 | const typeScores = {
217 | 'decision': 0.3, // Architectural decisions are highly valuable
218 | 'architecture': 0.3, // Architecture documentation is important
219 | 'reference': 0.2, // Reference materials are useful
220 | 'session': 0.15, // Session summaries provide good context
221 | 'insight': 0.2, // Insights are valuable for learning
222 | 'bug-fix': 0.15, // Bug fixes provide historical context
223 | 'feature': 0.1, // Feature descriptions are moderately useful
224 | 'note': 0.05, // General notes are less critical
225 | 'todo': 0.05, // TODOs are task-specific
226 | 'temporary': -0.1 // Temporary notes should be deprioritized
227 | };
228 |
229 | return typeScores[memoryType?.toLowerCase()] || 0;
230 | }
231 |
232 | /**
233 | * Calculate recency bonus to prioritize very recent memories
234 | * Provides explicit boost for memories created within specific time windows
235 | */
236 | function calculateRecencyBonus(memoryDate) {
237 | // Recency bonus tiers (days and corresponding bonus values)
238 | const RECENCY_TIERS = [
239 | { days: 7, bonus: 0.15 }, // Strong boost for last week
240 | { days: 14, bonus: 0.10 }, // Moderate boost for last 2 weeks
241 | { days: 30, bonus: 0.05 } // Small boost for last month
242 | ];
243 |
244 | try {
245 | const now = new Date();
246 |
247 | // Handle both Unix timestamps (seconds) and ISO strings
248 | let memoryTime;
249 | if (typeof memoryDate === 'string') {
250 | // ISO string format
251 | memoryTime = new Date(memoryDate);
252 | } else if (typeof memoryDate === 'number') {
253 | // Unix timestamp in seconds, convert to milliseconds
254 | memoryTime = new Date(memoryDate * 1000);
255 | } else {
256 | return 0; // Invalid format
257 | }
258 |
259 | if (isNaN(memoryTime.getTime()) || memoryTime > now) {
260 | return 0; // No bonus for invalid or future dates
261 | }
262 |
263 | const daysDiff = (now - memoryTime) / (1000 * 60 * 60 * 24);
264 |
265 | // Find the appropriate tier for this memory's age
266 | for (const tier of RECENCY_TIERS) {
267 | if (daysDiff <= tier.days) {
268 | return tier.bonus;
269 | }
270 | }
271 |
272 | return 0; // No bonus for older memories
273 |
274 | } catch (error) {
275 | return 0;
276 | }
277 | }
278 |
279 | /**
280 | * Extract backend quality score from memory metadata
281 | * This leverages the AI-based quality scoring from the MCP Memory Service backend
282 | * (ONNX local SLM, Groq, or implicit signals)
283 | */
284 | function calculateBackendQuality(memory) {
285 | try {
286 | // Check for quality_score in metadata (set by backend quality system)
287 | if (memory.metadata && typeof memory.metadata.quality_score === 'number') {
288 | return memory.metadata.quality_score;
289 | }
290 |
291 | // Also check direct property (some API responses flatten metadata)
292 | if (typeof memory.quality_score === 'number') {
293 | return memory.quality_score;
294 | }
295 |
296 | // Default to neutral score if not available
297 | // This ensures graceful fallback when backend hasn't scored the memory
298 | return 0.5;
299 |
300 | } catch (error) {
301 | return 0.5; // Neutral fallback
302 | }
303 | }
304 |
305 | /**
306 | * Calculate conversation context relevance score (Phase 2)
307 | * Matches memory content with current conversation topics and intent
308 | */
309 | function calculateConversationRelevance(memory, conversationAnalysis) {
310 | try {
311 | if (!conversationAnalysis || !memory.content) {
312 | return 0.3; // Default score when no conversation context
313 | }
314 |
315 | const memoryContent = memory.content.toLowerCase();
316 | let relevanceScore = 0;
317 | let factorCount = 0;
318 |
319 | // Score based on topic matching
320 | if (conversationAnalysis.topics && conversationAnalysis.topics.length > 0) {
321 | conversationAnalysis.topics.forEach(topic => {
322 | const topicMatches = (memoryContent.match(new RegExp(topic.name, 'gi')) || []).length;
323 | if (topicMatches > 0) {
324 | relevanceScore += topic.confidence * Math.min(topicMatches * 0.2, 0.8);
325 | factorCount++;
326 | }
327 | });
328 | }
329 |
330 | // Score based on entity matching
331 | if (conversationAnalysis.entities && conversationAnalysis.entities.length > 0) {
332 | conversationAnalysis.entities.forEach(entity => {
333 | const entityMatches = (memoryContent.match(new RegExp(entity.name, 'gi')) || []).length;
334 | if (entityMatches > 0) {
335 | relevanceScore += entity.confidence * 0.3;
336 | factorCount++;
337 | }
338 | });
339 | }
340 |
341 | // Score based on intent alignment
342 | if (conversationAnalysis.intent) {
343 | const intentKeywords = {
344 | 'learning': ['learn', 'understand', 'explain', 'how', 'tutorial', 'guide'],
345 | 'problem-solving': ['fix', 'error', 'debug', 'issue', 'problem', 'solve'],
346 | 'development': ['build', 'create', 'implement', 'develop', 'code', 'feature'],
347 | 'optimization': ['optimize', 'improve', 'performance', 'faster', 'better'],
348 | 'review': ['review', 'check', 'analyze', 'audit', 'validate'],
349 | 'planning': ['plan', 'design', 'architecture', 'approach', 'strategy']
350 | };
351 |
352 | const intentWords = intentKeywords[conversationAnalysis.intent.name] || [];
353 | let intentMatches = 0;
354 | intentWords.forEach(word => {
355 | if (memoryContent.includes(word)) {
356 | intentMatches++;
357 | }
358 | });
359 |
360 | if (intentMatches > 0) {
361 | relevanceScore += conversationAnalysis.intent.confidence * (intentMatches / intentWords.length);
362 | factorCount++;
363 | }
364 | }
365 |
366 | // Score based on code context if present
367 | if (conversationAnalysis.codeContext && conversationAnalysis.codeContext.isCodeRelated) {
368 | const codeIndicators = ['code', 'function', 'class', 'method', 'variable', 'api', 'library'];
369 | let codeMatches = 0;
370 | codeIndicators.forEach(indicator => {
371 | if (memoryContent.includes(indicator)) {
372 | codeMatches++;
373 | }
374 | });
375 |
376 | if (codeMatches > 0) {
377 | relevanceScore += 0.4 * (codeMatches / codeIndicators.length);
378 | factorCount++;
379 | }
380 | }
381 |
382 | // Normalize score
383 | const normalizedScore = factorCount > 0 ? relevanceScore / factorCount : 0.3;
384 | return Math.max(0.1, Math.min(1.0, normalizedScore));
385 |
386 | } catch (error) {
387 | // Silently fail with default score to avoid noise
388 | return 0.3;
389 | }
390 | }
391 |
392 | /**
393 | * Calculate final relevance score for a memory (Enhanced with quality scoring)
394 | */
395 | function calculateRelevanceScore(memory, projectContext, options = {}) {
396 | try {
397 | const {
398 | weights = {},
399 | timeDecayRate = 0.1, // Default decay rate
400 | includeConversationContext = false,
401 | conversationAnalysis = null
402 | } = options;
403 |
404 | // Default weights including content quality and backend quality factors
405 | // Backend quality leverages AI-based semantic scoring from MCP Memory Service
406 | const defaultWeights = includeConversationContext ? {
407 | timeDecay: 0.15, // Reduced weight for time
408 | tagRelevance: 0.25, // Tag matching remains important
409 | contentRelevance: 0.10, // Content matching reduced
410 | contentQuality: 0.15, // Heuristic quality factor
411 | backendQuality: 0.15, // AI-based backend quality (ONNX/Groq)
412 | conversationRelevance: 0.20, // Conversation context factor
413 | typeBonus: 0.05 // Memory type provides minor adjustment
414 | } : {
415 | timeDecay: 0.20, // Reduced time weight
416 | tagRelevance: 0.30, // Tag matching important
417 | contentRelevance: 0.10, // Content matching reduced
418 | contentQuality: 0.20, // Heuristic quality factor
419 | backendQuality: 0.20, // AI-based backend quality (ONNX/Groq)
420 | typeBonus: 0.05 // Type bonus reduced
421 | };
422 |
423 | const w = { ...defaultWeights, ...weights };
424 |
425 | // Calculate individual scores
426 | const timeScore = calculateTimeDecay(memory.created_at || memory.created_at_iso, timeDecayRate);
427 | const tagScore = calculateTagRelevance(memory.tags, projectContext);
428 | const contentScore = calculateContentRelevance(memory.content, projectContext);
429 | const qualityScore = calculateContentQuality(memory.content);
430 | const backendQualityScore = calculateBackendQuality(memory); // AI-based quality from backend
431 | const typeBonus = calculateTypeBonus(memory.memory_type);
432 | const recencyBonus = calculateRecencyBonus(memory.created_at || memory.created_at_iso);
433 |
434 | let finalScore = (
435 | (timeScore * w.timeDecay) +
436 | (tagScore * w.tagRelevance) +
437 | (contentScore * w.contentRelevance) +
438 | (qualityScore * w.contentQuality) +
439 | (backendQualityScore * (w.backendQuality || 0)) + // Backend AI quality score
440 | typeBonus + // Type bonus is not weighted, acts as adjustment
441 | recencyBonus // Recency bonus provides explicit boost for very recent memories
442 | );
443 |
444 | const breakdown = {
445 | timeDecay: timeScore,
446 | tagRelevance: tagScore,
447 | contentRelevance: contentScore,
448 | contentQuality: qualityScore,
449 | backendQuality: backendQualityScore, // AI-based quality from ONNX/Groq
450 | typeBonus: typeBonus,
451 | recencyBonus: recencyBonus
452 | };
453 |
454 | // Add conversation context scoring if enabled (Phase 2)
455 | if (includeConversationContext && conversationAnalysis) {
456 | const conversationScore = calculateConversationRelevance(memory, conversationAnalysis);
457 | finalScore += (conversationScore * (w.conversationRelevance || 0));
458 | breakdown.conversationRelevance = conversationScore;
459 | }
460 |
461 | // Apply quality penalty for very low quality content (multiplicative)
462 | if (qualityScore < 0.2) {
463 | finalScore *= 0.5; // Heavily penalize low quality content
464 | }
465 |
466 | // Apply project affinity penalty - memories without project tag match get penalized
467 | // This prevents cross-project memory pollution (e.g., Azure memories in Python project)
468 | const memoryTags = (memory.tags || []).map(t => t.toLowerCase());
469 | const memoryContent = (memory.content || '').toLowerCase();
470 | const projectName = projectContext.name?.toLowerCase();
471 |
472 | // Check for project name in tags OR content
473 | const hasProjectTag = projectName && (
474 | memoryTags.some(tag => tag === projectName || tag.includes(projectName)) ||
475 | memoryContent.includes(projectName)
476 | );
477 |
478 | if (!hasProjectTag && tagScore < 0.3) {
479 | // No project reference at all - definitely unrelated memory
480 | // Hard filter: set score to 0 to exclude from results entirely
481 | finalScore = 0;
482 | breakdown.projectAffinity = 'none (filtered)';
483 | } else if (!hasProjectTag) {
484 | // Some tag relevance but no project tag - might be related
485 | finalScore *= 0.5; // Moderate penalty
486 | breakdown.projectAffinity = 'low';
487 | } else {
488 | breakdown.projectAffinity = 'high';
489 | }
490 |
491 | // Ensure score is between 0 and 1
492 | const normalizedScore = Math.max(0, Math.min(1, finalScore));
493 |
494 | return {
495 | finalScore: normalizedScore,
496 | breakdown: breakdown,
497 | weights: w,
498 | hasConversationContext: includeConversationContext
499 | };
500 |
501 | } catch (error) {
502 | // Silently fail with default score to avoid noise
503 | return {
504 | finalScore: 0.1,
505 | breakdown: { error: error.message },
506 | weights: {},
507 | hasConversationContext: false
508 | };
509 | }
510 | }
511 |
512 | /**
513 | * Score and sort memories by relevance
514 | */
515 | function scoreMemoryRelevance(memories, projectContext, options = {}) {
516 | try {
517 | const { verbose = true } = options;
518 |
519 | if (!Array.isArray(memories)) {
520 | if (verbose) console.warn('[Memory Scorer] Invalid memories array');
521 | return [];
522 | }
523 |
524 | if (verbose) {
525 | console.log(`[Memory Scorer] Scoring ${memories.length} memories for project: ${projectContext.name}`);
526 | }
527 |
528 | // Score each memory
529 | const scoredMemories = memories.map(memory => {
530 | const scoreResult = calculateRelevanceScore(memory, projectContext, options);
531 |
532 | return {
533 | ...memory,
534 | relevanceScore: scoreResult.finalScore,
535 | scoreBreakdown: scoreResult.breakdown,
536 | hasConversationContext: scoreResult.hasConversationContext
537 | };
538 | });
539 |
540 | // Sort by relevance score (highest first)
541 | const sortedMemories = scoredMemories.sort((a, b) => b.relevanceScore - a.relevanceScore);
542 |
543 | // Log scoring results for debugging
544 | if (verbose) {
545 | console.log('[Memory Scorer] Top scored memories:');
546 | sortedMemories.slice(0, 3).forEach((memory, index) => {
547 | console.log(` ${index + 1}. Score: ${memory.relevanceScore.toFixed(3)} - ${memory.content.substring(0, 60)}...`);
548 | });
549 | }
550 |
551 | return sortedMemories;
552 |
553 | } catch (error) {
554 | if (verbose) console.error('[Memory Scorer] Error scoring memories:', error.message);
555 | return memories || [];
556 | }
557 | }
558 |
559 | /**
560 | * Filter memories by minimum relevance threshold
561 | */
562 | function filterByRelevance(memories, minScore = 0.3, options = {}) {
563 | try {
564 | const { verbose = true } = options;
565 | const filtered = memories.filter(memory => memory.relevanceScore >= minScore);
566 | if (verbose) {
567 | console.log(`[Memory Scorer] Filtered ${filtered.length}/${memories.length} memories above threshold ${minScore}`);
568 | }
569 | return filtered;
570 |
571 | } catch (error) {
572 | if (verbose) console.warn('[Memory Scorer] Error filtering memories:', error.message);
573 | return memories;
574 | }
575 | }
576 |
577 | /**
578 | * Analyze memory age distribution to detect staleness
579 | * Returns statistics and recommended weight adjustments
580 | */
581 | function analyzeMemoryAgeDistribution(memories, options = {}) {
582 | try {
583 | const { verbose = false } = options;
584 |
585 | if (!Array.isArray(memories) || memories.length === 0) {
586 | return {
587 | avgAge: 0,
588 | medianAge: 0,
589 | p75Age: 0,
590 | p90Age: 0,
591 | recentCount: 0,
592 | staleCount: 0,
593 | isStale: false,
594 | recommendedAdjustments: {}
595 | };
596 | }
597 |
598 | const now = new Date();
599 |
600 | // Calculate ages in days
601 | const ages = memories.map(memory => {
602 | // Handle both Unix timestamps (seconds) and ISO strings
603 | let memoryTime;
604 | if (memory.created_at_iso) {
605 | memoryTime = new Date(memory.created_at_iso);
606 | } else if (memory.created_at) {
607 | // created_at is in seconds, convert to milliseconds
608 | memoryTime = new Date(memory.created_at * 1000);
609 | } else {
610 | return 365; // Default to very old if no timestamp
611 | }
612 |
613 | if (isNaN(memoryTime.getTime())) return 365; // Default to very old
614 | return (now - memoryTime) / (1000 * 60 * 60 * 24);
615 | }).sort((a, b) => a - b);
616 |
617 | // Calculate percentiles
618 | const avgAge = ages.reduce((sum, age) => sum + age, 0) / ages.length;
619 | const medianAge = ages[Math.floor(ages.length / 2)];
620 | const p75Age = ages[Math.floor(ages.length * 0.75)];
621 | const p90Age = ages[Math.floor(ages.length * 0.90)];
622 |
623 | // Count recent vs stale
624 | const recentCount = ages.filter(age => age <= 14).length; // Last 2 weeks
625 | const staleCount = ages.filter(age => age > 30).length; // Older than 1 month
626 |
627 | // Determine if memory set is stale
628 | const isStale = medianAge > 30 || (recentCount / ages.length) < 0.2;
629 |
630 | // Recommended adjustments based on staleness
631 | const recommendedAdjustments = {};
632 |
633 | if (isStale) {
634 | // Memories are old - boost time decay weight, reduce tag relevance
635 | recommendedAdjustments.timeDecay = 0.50; // Increase from default 0.25
636 | recommendedAdjustments.tagRelevance = 0.20; // Decrease from default 0.35
637 | recommendedAdjustments.recencyBonus = 0.25; // Increase bonus for any recent memories
638 | recommendedAdjustments.reason = `Stale memory set detected (median: ${Math.round(medianAge)}d old, ${Math.round(recentCount/ages.length*100)}% recent)`;
639 | } else if (avgAge < 14) {
640 | // Memories are very recent - balanced approach
641 | recommendedAdjustments.timeDecay = 0.30;
642 | recommendedAdjustments.tagRelevance = 0.30;
643 | recommendedAdjustments.reason = `Recent memory set (avg: ${Math.round(avgAge)}d old)`;
644 | }
645 |
646 | if (verbose) {
647 | console.log('[Memory Age Analyzer]', {
648 | avgAge: Math.round(avgAge),
649 | medianAge: Math.round(medianAge),
650 | p75Age: Math.round(p75Age),
651 | recentPercent: Math.round(recentCount / ages.length * 100),
652 | isStale,
653 | adjustments: recommendedAdjustments.reason || 'No adjustments needed'
654 | });
655 | }
656 |
657 | return {
658 | avgAge,
659 | medianAge,
660 | p75Age,
661 | p90Age,
662 | recentCount,
663 | staleCount,
664 | totalCount: ages.length,
665 | isStale,
666 | recommendedAdjustments
667 | };
668 |
669 | } catch (error) {
670 | if (verbose) console.error('[Memory Age Analyzer] Error:', error.message);
671 | return {
672 | avgAge: 0,
673 | medianAge: 0,
674 | p75Age: 0,
675 | p90Age: 0,
676 | recentCount: 0,
677 | staleCount: 0,
678 | isStale: false,
679 | recommendedAdjustments: {}
680 | };
681 | }
682 | }
683 |
684 | /**
685 | * Calculate adaptive git context weight based on memory age and git activity
686 | * Prevents old git-related memories from dominating when recent development exists
687 | */
688 | function calculateAdaptiveGitWeight(gitContext, memoryAgeAnalysis, configuredWeight = 1.2, options = {}) {
689 | try {
690 | const { verbose = false } = options;
691 |
692 | // No git context or no recent commits - use configured weight
693 | if (!gitContext || !gitContext.recentCommits || gitContext.recentCommits.length === 0) {
694 | return { weight: configuredWeight, reason: 'No recent git activity' };
695 | }
696 |
697 | // Calculate days since most recent commit
698 | const now = new Date();
699 | const mostRecentCommit = new Date(gitContext.recentCommits[0].date);
700 | const daysSinceLastCommit = (now - mostRecentCommit) / (1000 * 60 * 60 * 24);
701 |
702 | // Scenario 1: Recent commits (< 7d) BUT stale memories (median > 30d)
703 | // Problem: Git boost would amplify old git memories over potential recent work
704 | if (daysSinceLastCommit <= 7 && memoryAgeAnalysis.medianAge > 30) {
705 | const reducedWeight = Math.max(1.0, configuredWeight * 0.7); // Reduce by 30%
706 | const reason = `Recent commits (${Math.round(daysSinceLastCommit)}d ago) but stale memories (median: ${Math.round(memoryAgeAnalysis.medianAge)}d) - reducing git boost`;
707 |
708 | if (verbose) {
709 | console.log(`[Adaptive Git Weight] ${reason}: ${configuredWeight.toFixed(1)} → ${reducedWeight.toFixed(1)}`);
710 | }
711 |
712 | return { weight: reducedWeight, reason, adjusted: true };
713 | }
714 |
715 | // Scenario 2: Both commits and memories are recent (< 14d)
716 | // Safe to use configured weight, git context is relevant
717 | if (daysSinceLastCommit <= 14 && memoryAgeAnalysis.avgAge <= 14) {
718 | return {
719 | weight: configuredWeight,
720 | reason: `Recent commits and memories aligned (${Math.round(daysSinceLastCommit)}d commits, ${Math.round(memoryAgeAnalysis.avgAge)}d avg memory age)`,
721 | adjusted: false
722 | };
723 | }
724 |
725 | // Scenario 3: Old commits (> 14d) but recent memories exist
726 | // Slightly reduce git weight to let recent non-git memories surface
727 | if (daysSinceLastCommit > 14 && memoryAgeAnalysis.recentCount > 0) {
728 | const reducedWeight = Math.max(1.0, configuredWeight * 0.85); // Reduce by 15%
729 | const reason = `Older commits (${Math.round(daysSinceLastCommit)}d ago) with some recent memories - slightly reducing git boost`;
730 |
731 | if (verbose) {
732 | console.log(`[Adaptive Git Weight] ${reason}: ${configuredWeight.toFixed(1)} → ${reducedWeight.toFixed(1)}`);
733 | }
734 |
735 | return { weight: reducedWeight, reason, adjusted: true };
736 | }
737 |
738 | // Default: use configured weight
739 | return { weight: configuredWeight, reason: 'Using configured weight', adjusted: false };
740 |
741 | } catch (error) {
742 | if (verbose) console.error('[Adaptive Git Weight] Error:', error.message);
743 | return { weight: configuredWeight, reason: 'Error - using fallback', adjusted: false };
744 | }
745 | }
746 |
747 | module.exports = {
748 | scoreMemoryRelevance,
749 | calculateRelevanceScore,
750 | calculateTimeDecay,
751 | calculateTagRelevance,
752 | calculateContentRelevance,
753 | calculateContentQuality,
754 | calculateBackendQuality, // AI-based quality scoring integration
755 | calculateConversationRelevance,
756 | calculateTypeBonus,
757 | calculateRecencyBonus,
758 | filterByRelevance,
759 | analyzeMemoryAgeDistribution,
760 | calculateAdaptiveGitWeight
761 | };
762 |
763 | // Direct execution support for testing
764 | if (require.main === module) {
765 | // Test with mock data
766 | const mockProjectContext = {
767 | name: 'mcp-memory-service',
768 | language: 'JavaScript',
769 | frameworks: ['Node.js'],
770 | tools: ['npm']
771 | };
772 |
773 | const mockMemories = [
774 | {
775 | content: 'Decided to use SQLite-vec for better performance in MCP Memory Service',
776 | tags: ['mcp-memory-service', 'decision', 'sqlite-vec'],
777 | memory_type: 'decision',
778 | created_at: '2025-08-19T10:00:00Z'
779 | },
780 | {
781 | content: 'Fixed bug in JavaScript hook implementation for Claude Code integration',
782 | tags: ['javascript', 'bug-fix', 'claude-code'],
783 | memory_type: 'bug-fix',
784 | created_at: '2025-08-18T15:30:00Z'
785 | },
786 | {
787 | content: 'Random note about completely unrelated project',
788 | tags: ['other-project', 'note'],
789 | memory_type: 'note',
790 | created_at: '2025-08-01T08:00:00Z'
791 | }
792 | ];
793 |
794 | console.log('\n=== MEMORY SCORING TEST ===');
795 | const scored = scoreMemoryRelevance(mockMemories, mockProjectContext);
796 | console.log('\n=== SCORED RESULTS ===');
797 | scored.forEach((memory, index) => {
798 | console.log(`${index + 1}. Score: ${memory.relevanceScore.toFixed(3)}`);
799 | console.log(` Content: ${memory.content.substring(0, 80)}...`);
800 | console.log(` Breakdown:`, memory.scoreBreakdown);
801 | console.log('');
802 | });
803 | }
```