This is page 34 of 47. Use http://codebase.md/doobidoo/mcp-memory-service?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .claude
│ ├── agents
│ │ ├── amp-bridge.md
│ │ ├── amp-pr-automator.md
│ │ ├── code-quality-guard.md
│ │ ├── gemini-pr-automator.md
│ │ └── github-release-manager.md
│ ├── settings.local.json.backup
│ └── settings.local.json.local
├── .commit-message
├── .dockerignore
├── .env.example
├── .env.sqlite.backup
├── .envnn#
├── .gitattributes
├── .github
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── feature_request.yml
│ │ └── performance_issue.yml
│ ├── pull_request_template.md
│ └── workflows
│ ├── bridge-tests.yml
│ ├── CACHE_FIX.md
│ ├── claude-code-review.yml
│ ├── claude.yml
│ ├── cleanup-images.yml.disabled
│ ├── dev-setup-validation.yml
│ ├── docker-publish.yml
│ ├── LATEST_FIXES.md
│ ├── main-optimized.yml.disabled
│ ├── main.yml
│ ├── publish-and-test.yml
│ ├── README_OPTIMIZATION.md
│ ├── release-tag.yml.disabled
│ ├── release.yml
│ ├── roadmap-review-reminder.yml
│ ├── SECRET_CONDITIONAL_FIX.md
│ └── WORKFLOW_FIXES.md
├── .gitignore
├── .mcp.json.backup
├── .mcp.json.template
├── .pyscn
│ ├── .gitignore
│ └── reports
│ └── analyze_20251123_214224.html
├── AGENTS.md
├── archive
│ ├── deployment
│ │ ├── deploy_fastmcp_fixed.sh
│ │ ├── deploy_http_with_mcp.sh
│ │ └── deploy_mcp_v4.sh
│ ├── deployment-configs
│ │ ├── empty_config.yml
│ │ └── smithery.yaml
│ ├── development
│ │ └── test_fastmcp.py
│ ├── docs-removed-2025-08-23
│ │ ├── authentication.md
│ │ ├── claude_integration.md
│ │ ├── claude-code-compatibility.md
│ │ ├── claude-code-integration.md
│ │ ├── claude-code-quickstart.md
│ │ ├── claude-desktop-setup.md
│ │ ├── complete-setup-guide.md
│ │ ├── database-synchronization.md
│ │ ├── development
│ │ │ ├── autonomous-memory-consolidation.md
│ │ │ ├── CLEANUP_PLAN.md
│ │ │ ├── CLEANUP_README.md
│ │ │ ├── CLEANUP_SUMMARY.md
│ │ │ ├── dream-inspired-memory-consolidation.md
│ │ │ ├── hybrid-slm-memory-consolidation.md
│ │ │ ├── mcp-milestone.md
│ │ │ ├── multi-client-architecture.md
│ │ │ ├── test-results.md
│ │ │ └── TIMESTAMP_FIX_SUMMARY.md
│ │ ├── distributed-sync.md
│ │ ├── invocation_guide.md
│ │ ├── macos-intel.md
│ │ ├── master-guide.md
│ │ ├── mcp-client-configuration.md
│ │ ├── multi-client-server.md
│ │ ├── service-installation.md
│ │ ├── sessions
│ │ │ └── MCP_ENHANCEMENT_SESSION_MEMORY_v4.1.0.md
│ │ ├── UBUNTU_SETUP.md
│ │ ├── ubuntu.md
│ │ ├── windows-setup.md
│ │ └── windows.md
│ ├── docs-root-cleanup-2025-08-23
│ │ ├── AWESOME_LIST_SUBMISSION.md
│ │ ├── CLOUDFLARE_IMPLEMENTATION.md
│ │ ├── DOCUMENTATION_ANALYSIS.md
│ │ ├── DOCUMENTATION_CLEANUP_PLAN.md
│ │ ├── DOCUMENTATION_CONSOLIDATION_COMPLETE.md
│ │ ├── LITESTREAM_SETUP_GUIDE.md
│ │ ├── lm_studio_system_prompt.md
│ │ ├── PYTORCH_DOWNLOAD_FIX.md
│ │ └── README-ORIGINAL-BACKUP.md
│ ├── investigations
│ │ └── MACOS_HOOKS_INVESTIGATION.md
│ ├── litestream-configs-v6.3.0
│ │ ├── install_service.sh
│ │ ├── litestream_master_config_fixed.yml
│ │ ├── litestream_master_config.yml
│ │ ├── litestream_replica_config_fixed.yml
│ │ ├── litestream_replica_config.yml
│ │ ├── litestream_replica_simple.yml
│ │ ├── litestream-http.service
│ │ ├── litestream.service
│ │ └── requirements-cloudflare.txt
│ ├── release-notes
│ │ └── release-notes-v7.1.4.md
│ └── setup-development
│ ├── README.md
│ ├── setup_consolidation_mdns.sh
│ ├── STARTUP_SETUP_GUIDE.md
│ └── test_service.sh
├── CHANGELOG-HISTORIC.md
├── CHANGELOG.md
├── claude_commands
│ ├── memory-context.md
│ ├── memory-health.md
│ ├── memory-ingest-dir.md
│ ├── memory-ingest.md
│ ├── memory-recall.md
│ ├── memory-search.md
│ ├── memory-store.md
│ ├── README.md
│ └── session-start.md
├── claude-hooks
│ ├── config.json
│ ├── config.template.json
│ ├── CONFIGURATION.md
│ ├── core
│ │ ├── memory-retrieval.js
│ │ ├── mid-conversation.js
│ │ ├── session-end.js
│ │ ├── session-start.js
│ │ └── topic-change.js
│ ├── debug-pattern-test.js
│ ├── install_claude_hooks_windows.ps1
│ ├── install_hooks.py
│ ├── memory-mode-controller.js
│ ├── MIGRATION.md
│ ├── README-NATURAL-TRIGGERS.md
│ ├── README-phase2.md
│ ├── README.md
│ ├── simple-test.js
│ ├── statusline.sh
│ ├── test-adaptive-weights.js
│ ├── test-dual-protocol-hook.js
│ ├── test-mcp-hook.js
│ ├── test-natural-triggers.js
│ ├── test-recency-scoring.js
│ ├── tests
│ │ ├── integration-test.js
│ │ ├── phase2-integration-test.js
│ │ ├── test-code-execution.js
│ │ ├── test-cross-session.json
│ │ ├── test-session-tracking.json
│ │ └── test-threading.json
│ ├── utilities
│ │ ├── adaptive-pattern-detector.js
│ │ ├── context-formatter.js
│ │ ├── context-shift-detector.js
│ │ ├── conversation-analyzer.js
│ │ ├── dynamic-context-updater.js
│ │ ├── git-analyzer.js
│ │ ├── mcp-client.js
│ │ ├── memory-client.js
│ │ ├── memory-scorer.js
│ │ ├── performance-manager.js
│ │ ├── project-detector.js
│ │ ├── session-tracker.js
│ │ ├── tiered-conversation-monitor.js
│ │ └── version-checker.js
│ └── WINDOWS-SESSIONSTART-BUG.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development-Sprint-November-2025.md
├── docs
│ ├── amp-cli-bridge.md
│ ├── api
│ │ ├── code-execution-interface.md
│ │ ├── memory-metadata-api.md
│ │ ├── PHASE1_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_IMPLEMENTATION_SUMMARY.md
│ │ ├── PHASE2_REPORT.md
│ │ └── tag-standardization.md
│ ├── architecture
│ │ ├── search-enhancement-spec.md
│ │ └── search-examples.md
│ ├── architecture.md
│ ├── archive
│ │ └── obsolete-workflows
│ │ ├── load_memory_context.md
│ │ └── README.md
│ ├── assets
│ │ └── images
│ │ ├── dashboard-v3.3.0-preview.png
│ │ ├── memory-awareness-hooks-example.png
│ │ ├── project-infographic.svg
│ │ └── README.md
│ ├── CLAUDE_CODE_QUICK_REFERENCE.md
│ ├── cloudflare-setup.md
│ ├── deployment
│ │ ├── docker.md
│ │ ├── dual-service.md
│ │ ├── production-guide.md
│ │ └── systemd-service.md
│ ├── development
│ │ ├── ai-agent-instructions.md
│ │ ├── code-quality
│ │ │ ├── phase-2a-completion.md
│ │ │ ├── phase-2a-handle-get-prompt.md
│ │ │ ├── phase-2a-index.md
│ │ │ ├── phase-2a-install-package.md
│ │ │ └── phase-2b-session-summary.md
│ │ ├── code-quality-workflow.md
│ │ ├── dashboard-workflow.md
│ │ ├── issue-management.md
│ │ ├── pr-review-guide.md
│ │ ├── refactoring-notes.md
│ │ ├── release-checklist.md
│ │ └── todo-tracker.md
│ ├── docker-optimized-build.md
│ ├── document-ingestion.md
│ ├── DOCUMENTATION_AUDIT.md
│ ├── enhancement-roadmap-issue-14.md
│ ├── examples
│ │ ├── analysis-scripts.js
│ │ ├── maintenance-session-example.md
│ │ ├── memory-distribution-chart.jsx
│ │ └── tag-schema.json
│ ├── first-time-setup.md
│ ├── glama-deployment.md
│ ├── guides
│ │ ├── advanced-command-examples.md
│ │ ├── chromadb-migration.md
│ │ ├── commands-vs-mcp-server.md
│ │ ├── mcp-enhancements.md
│ │ ├── mdns-service-discovery.md
│ │ ├── memory-consolidation-guide.md
│ │ ├── migration.md
│ │ ├── scripts.md
│ │ └── STORAGE_BACKENDS.md
│ ├── HOOK_IMPROVEMENTS.md
│ ├── hooks
│ │ └── phase2-code-execution-migration.md
│ ├── http-server-management.md
│ ├── ide-compatability.md
│ ├── IMAGE_RETENTION_POLICY.md
│ ├── images
│ │ └── dashboard-placeholder.md
│ ├── implementation
│ │ ├── health_checks.md
│ │ └── performance.md
│ ├── IMPLEMENTATION_PLAN_HTTP_SSE.md
│ ├── integration
│ │ ├── homebrew.md
│ │ └── multi-client.md
│ ├── integrations
│ │ ├── gemini.md
│ │ ├── groq-bridge.md
│ │ ├── groq-integration-summary.md
│ │ └── groq-model-comparison.md
│ ├── integrations.md
│ ├── legacy
│ │ └── dual-protocol-hooks.md
│ ├── LM_STUDIO_COMPATIBILITY.md
│ ├── maintenance
│ │ └── memory-maintenance.md
│ ├── mastery
│ │ ├── api-reference.md
│ │ ├── architecture-overview.md
│ │ ├── configuration-guide.md
│ │ ├── local-setup-and-run.md
│ │ ├── testing-guide.md
│ │ └── troubleshooting.md
│ ├── migration
│ │ └── code-execution-api-quick-start.md
│ ├── natural-memory-triggers
│ │ ├── cli-reference.md
│ │ ├── installation-guide.md
│ │ └── performance-optimization.md
│ ├── oauth-setup.md
│ ├── pr-graphql-integration.md
│ ├── quick-setup-cloudflare-dual-environment.md
│ ├── README.md
│ ├── remote-configuration-wiki-section.md
│ ├── research
│ │ ├── code-execution-interface-implementation.md
│ │ └── code-execution-interface-summary.md
│ ├── ROADMAP.md
│ ├── sqlite-vec-backend.md
│ ├── statistics
│ │ ├── charts
│ │ │ ├── activity_patterns.png
│ │ │ ├── contributors.png
│ │ │ ├── growth_trajectory.png
│ │ │ ├── monthly_activity.png
│ │ │ └── october_sprint.png
│ │ ├── data
│ │ │ ├── activity_by_day.csv
│ │ │ ├── activity_by_hour.csv
│ │ │ ├── contributors.csv
│ │ │ └── monthly_activity.csv
│ │ ├── generate_charts.py
│ │ └── REPOSITORY_STATISTICS.md
│ ├── technical
│ │ ├── development.md
│ │ ├── memory-migration.md
│ │ ├── migration-log.md
│ │ ├── sqlite-vec-embedding-fixes.md
│ │ └── tag-storage.md
│ ├── testing
│ │ └── regression-tests.md
│ ├── testing-cloudflare-backend.md
│ ├── troubleshooting
│ │ ├── cloudflare-api-token-setup.md
│ │ ├── cloudflare-authentication.md
│ │ ├── general.md
│ │ ├── hooks-quick-reference.md
│ │ ├── pr162-schema-caching-issue.md
│ │ ├── session-end-hooks.md
│ │ └── sync-issues.md
│ └── tutorials
│ ├── advanced-techniques.md
│ ├── data-analysis.md
│ └── demo-session-walkthrough.md
├── examples
│ ├── claude_desktop_config_template.json
│ ├── claude_desktop_config_windows.json
│ ├── claude-desktop-http-config.json
│ ├── config
│ │ └── claude_desktop_config.json
│ ├── http-mcp-bridge.js
│ ├── memory_export_template.json
│ ├── README.md
│ ├── setup
│ │ └── setup_multi_client_complete.py
│ └── start_https_example.sh
├── install_service.py
├── install.py
├── LICENSE
├── NOTICE
├── pyproject.toml
├── pytest.ini
├── README.md
├── run_server.py
├── scripts
│ ├── .claude
│ │ └── settings.local.json
│ ├── archive
│ │ └── check_missing_timestamps.py
│ ├── backup
│ │ ├── backup_memories.py
│ │ ├── backup_sqlite_vec.sh
│ │ ├── export_distributable_memories.sh
│ │ └── restore_memories.py
│ ├── benchmarks
│ │ ├── benchmark_code_execution_api.py
│ │ ├── benchmark_hybrid_sync.py
│ │ └── benchmark_server_caching.py
│ ├── database
│ │ ├── analyze_sqlite_vec_db.py
│ │ ├── check_sqlite_vec_status.py
│ │ ├── db_health_check.py
│ │ └── simple_timestamp_check.py
│ ├── development
│ │ ├── debug_server_initialization.py
│ │ ├── find_orphaned_files.py
│ │ ├── fix_mdns.sh
│ │ ├── fix_sitecustomize.py
│ │ ├── remote_ingest.sh
│ │ ├── setup-git-merge-drivers.sh
│ │ ├── uv-lock-merge.sh
│ │ └── verify_hybrid_sync.py
│ ├── hooks
│ │ └── pre-commit
│ ├── installation
│ │ ├── install_linux_service.py
│ │ ├── install_macos_service.py
│ │ ├── install_uv.py
│ │ ├── install_windows_service.py
│ │ ├── install.py
│ │ ├── setup_backup_cron.sh
│ │ ├── setup_claude_mcp.sh
│ │ └── setup_cloudflare_resources.py
│ ├── linux
│ │ ├── service_status.sh
│ │ ├── start_service.sh
│ │ ├── stop_service.sh
│ │ ├── uninstall_service.sh
│ │ └── view_logs.sh
│ ├── maintenance
│ │ ├── assign_memory_types.py
│ │ ├── check_memory_types.py
│ │ ├── cleanup_corrupted_encoding.py
│ │ ├── cleanup_memories.py
│ │ ├── cleanup_organize.py
│ │ ├── consolidate_memory_types.py
│ │ ├── consolidation_mappings.json
│ │ ├── delete_orphaned_vectors_fixed.py
│ │ ├── fast_cleanup_duplicates_with_tracking.sh
│ │ ├── find_all_duplicates.py
│ │ ├── find_cloudflare_duplicates.py
│ │ ├── find_duplicates.py
│ │ ├── memory-types.md
│ │ ├── README.md
│ │ ├── recover_timestamps_from_cloudflare.py
│ │ ├── regenerate_embeddings.py
│ │ ├── repair_malformed_tags.py
│ │ ├── repair_memories.py
│ │ ├── repair_sqlite_vec_embeddings.py
│ │ ├── repair_zero_embeddings.py
│ │ ├── restore_from_json_export.py
│ │ └── scan_todos.sh
│ ├── migration
│ │ ├── cleanup_mcp_timestamps.py
│ │ ├── legacy
│ │ │ └── migrate_chroma_to_sqlite.py
│ │ ├── mcp-migration.py
│ │ ├── migrate_sqlite_vec_embeddings.py
│ │ ├── migrate_storage.py
│ │ ├── migrate_tags.py
│ │ ├── migrate_timestamps.py
│ │ ├── migrate_to_cloudflare.py
│ │ ├── migrate_to_sqlite_vec.py
│ │ ├── migrate_v5_enhanced.py
│ │ ├── TIMESTAMP_CLEANUP_README.md
│ │ └── verify_mcp_timestamps.py
│ ├── pr
│ │ ├── amp_collect_results.sh
│ │ ├── amp_detect_breaking_changes.sh
│ │ ├── amp_generate_tests.sh
│ │ ├── amp_pr_review.sh
│ │ ├── amp_quality_gate.sh
│ │ ├── amp_suggest_fixes.sh
│ │ ├── auto_review.sh
│ │ ├── detect_breaking_changes.sh
│ │ ├── generate_tests.sh
│ │ ├── lib
│ │ │ └── graphql_helpers.sh
│ │ ├── quality_gate.sh
│ │ ├── resolve_threads.sh
│ │ ├── run_pyscn_analysis.sh
│ │ ├── run_quality_checks.sh
│ │ ├── thread_status.sh
│ │ └── watch_reviews.sh
│ ├── quality
│ │ ├── fix_dead_code_install.sh
│ │ ├── phase1_dead_code_analysis.md
│ │ ├── phase2_complexity_analysis.md
│ │ ├── README_PHASE1.md
│ │ ├── README_PHASE2.md
│ │ ├── track_pyscn_metrics.sh
│ │ └── weekly_quality_review.sh
│ ├── README.md
│ ├── run
│ │ ├── run_mcp_memory.sh
│ │ ├── run-with-uv.sh
│ │ └── start_sqlite_vec.sh
│ ├── run_memory_server.py
│ ├── server
│ │ ├── check_http_server.py
│ │ ├── check_server_health.py
│ │ ├── memory_offline.py
│ │ ├── preload_models.py
│ │ ├── run_http_server.py
│ │ ├── run_memory_server.py
│ │ ├── start_http_server.bat
│ │ └── start_http_server.sh
│ ├── service
│ │ ├── deploy_dual_services.sh
│ │ ├── install_http_service.sh
│ │ ├── mcp-memory-http.service
│ │ ├── mcp-memory.service
│ │ ├── memory_service_manager.sh
│ │ ├── service_control.sh
│ │ ├── service_utils.py
│ │ └── update_service.sh
│ ├── sync
│ │ ├── check_drift.py
│ │ ├── claude_sync_commands.py
│ │ ├── export_memories.py
│ │ ├── import_memories.py
│ │ ├── litestream
│ │ │ ├── apply_local_changes.sh
│ │ │ ├── enhanced_memory_store.sh
│ │ │ ├── init_staging_db.sh
│ │ │ ├── io.litestream.replication.plist
│ │ │ ├── manual_sync.sh
│ │ │ ├── memory_sync.sh
│ │ │ ├── pull_remote_changes.sh
│ │ │ ├── push_to_remote.sh
│ │ │ ├── README.md
│ │ │ ├── resolve_conflicts.sh
│ │ │ ├── setup_local_litestream.sh
│ │ │ ├── setup_remote_litestream.sh
│ │ │ ├── staging_db_init.sql
│ │ │ ├── stash_local_changes.sh
│ │ │ ├── sync_from_remote_noconfig.sh
│ │ │ └── sync_from_remote.sh
│ │ ├── README.md
│ │ ├── safe_cloudflare_update.sh
│ │ ├── sync_memory_backends.py
│ │ └── sync_now.py
│ ├── testing
│ │ ├── run_complete_test.py
│ │ ├── run_memory_test.sh
│ │ ├── simple_test.py
│ │ ├── test_cleanup_logic.py
│ │ ├── test_cloudflare_backend.py
│ │ ├── test_docker_functionality.py
│ │ ├── test_installation.py
│ │ ├── test_mdns.py
│ │ ├── test_memory_api.py
│ │ ├── test_memory_simple.py
│ │ ├── test_migration.py
│ │ ├── test_search_api.py
│ │ ├── test_sqlite_vec_embeddings.py
│ │ ├── test_sse_events.py
│ │ ├── test-connection.py
│ │ └── test-hook.js
│ ├── utils
│ │ ├── claude_commands_utils.py
│ │ ├── generate_personalized_claude_md.sh
│ │ ├── groq
│ │ ├── groq_agent_bridge.py
│ │ ├── list-collections.py
│ │ ├── memory_wrapper_uv.py
│ │ ├── query_memories.py
│ │ ├── smithery_wrapper.py
│ │ ├── test_groq_bridge.sh
│ │ └── uv_wrapper.py
│ └── validation
│ ├── check_dev_setup.py
│ ├── check_documentation_links.py
│ ├── diagnose_backend_config.py
│ ├── validate_configuration_complete.py
│ ├── validate_memories.py
│ ├── validate_migration.py
│ ├── validate_timestamp_integrity.py
│ ├── verify_environment.py
│ ├── verify_pytorch_windows.py
│ └── verify_torch.py
├── SECURITY.md
├── selective_timestamp_recovery.py
├── SPONSORS.md
├── src
│ └── mcp_memory_service
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── operations.py
│ │ ├── sync_wrapper.py
│ │ └── types.py
│ ├── backup
│ │ ├── __init__.py
│ │ └── scheduler.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── ingestion.py
│ │ ├── main.py
│ │ └── utils.py
│ ├── config.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── associations.py
│ │ ├── base.py
│ │ ├── clustering.py
│ │ ├── compression.py
│ │ ├── consolidator.py
│ │ ├── decay.py
│ │ ├── forgetting.py
│ │ ├── health.py
│ │ └── scheduler.py
│ ├── dependency_check.py
│ ├── discovery
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── mdns_service.py
│ ├── embeddings
│ │ ├── __init__.py
│ │ └── onnx_embeddings.py
│ ├── ingestion
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── chunker.py
│ │ ├── csv_loader.py
│ │ ├── json_loader.py
│ │ ├── pdf_loader.py
│ │ ├── registry.py
│ │ ├── semtools_loader.py
│ │ └── text_loader.py
│ ├── lm_studio_compat.py
│ ├── mcp_server.py
│ ├── models
│ │ ├── __init__.py
│ │ └── memory.py
│ ├── server.py
│ ├── services
│ │ ├── __init__.py
│ │ └── memory_service.py
│ ├── storage
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cloudflare.py
│ │ ├── factory.py
│ │ ├── http_client.py
│ │ ├── hybrid.py
│ │ └── sqlite_vec.py
│ ├── sync
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── importer.py
│ │ └── litestream_config.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cache_manager.py
│ │ ├── content_splitter.py
│ │ ├── db_utils.py
│ │ ├── debug.py
│ │ ├── document_processing.py
│ │ ├── gpu_detection.py
│ │ ├── hashing.py
│ │ ├── http_server_manager.py
│ │ ├── port_detection.py
│ │ ├── system_detection.py
│ │ └── time_parser.py
│ └── web
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── analytics.py
│ │ ├── backup.py
│ │ ├── consolidation.py
│ │ ├── documents.py
│ │ ├── events.py
│ │ ├── health.py
│ │ ├── manage.py
│ │ ├── mcp.py
│ │ ├── memories.py
│ │ ├── search.py
│ │ └── sync.py
│ ├── app.py
│ ├── dependencies.py
│ ├── oauth
│ │ ├── __init__.py
│ │ ├── authorization.py
│ │ ├── discovery.py
│ │ ├── middleware.py
│ │ ├── models.py
│ │ ├── registration.py
│ │ └── storage.py
│ ├── sse.py
│ └── static
│ ├── app.js
│ ├── index.html
│ ├── README.md
│ ├── sse_test.html
│ └── style.css
├── start_http_debug.bat
├── start_http_server.sh
├── test_document.txt
├── test_version_checker.js
├── tests
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── test_compact_types.py
│ │ └── test_operations.py
│ ├── bridge
│ │ ├── mock_responses.js
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ └── test_http_mcp_bridge.js
│ ├── conftest.py
│ ├── consolidation
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_associations.py
│ │ ├── test_clustering.py
│ │ ├── test_compression.py
│ │ ├── test_consolidator.py
│ │ ├── test_decay.py
│ │ └── test_forgetting.py
│ ├── contracts
│ │ └── api-specification.yml
│ ├── integration
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ ├── test_api_key_fallback.py
│ │ ├── test_api_memories_chronological.py
│ │ ├── test_api_tag_time_search.py
│ │ ├── test_api_with_memory_service.py
│ │ ├── test_bridge_integration.js
│ │ ├── test_cli_interfaces.py
│ │ ├── test_cloudflare_connection.py
│ │ ├── test_concurrent_clients.py
│ │ ├── test_data_serialization_consistency.py
│ │ ├── test_http_server_startup.py
│ │ ├── test_mcp_memory.py
│ │ ├── test_mdns_integration.py
│ │ ├── test_oauth_basic_auth.py
│ │ ├── test_oauth_flow.py
│ │ ├── test_server_handlers.py
│ │ └── test_store_memory.py
│ ├── performance
│ │ ├── test_background_sync.py
│ │ └── test_hybrid_live.py
│ ├── README.md
│ ├── smithery
│ │ └── test_smithery.py
│ ├── sqlite
│ │ └── simple_sqlite_vec_test.py
│ ├── test_client.py
│ ├── test_content_splitting.py
│ ├── test_database.py
│ ├── test_hybrid_cloudflare_limits.py
│ ├── test_hybrid_storage.py
│ ├── test_memory_ops.py
│ ├── test_semantic_search.py
│ ├── test_sqlite_vec_storage.py
│ ├── test_time_parser.py
│ ├── test_timestamp_preservation.py
│ ├── timestamp
│ │ ├── test_hook_vs_manual_storage.py
│ │ ├── test_issue99_final_validation.py
│ │ ├── test_search_retrieval_inconsistency.py
│ │ ├── test_timestamp_issue.py
│ │ └── test_timestamp_simple.py
│ └── unit
│ ├── conftest.py
│ ├── test_cloudflare_storage.py
│ ├── test_csv_loader.py
│ ├── test_fastapi_dependencies.py
│ ├── test_import.py
│ ├── test_json_loader.py
│ ├── test_mdns_simple.py
│ ├── test_mdns.py
│ ├── test_memory_service.py
│ ├── test_memory.py
│ ├── test_semtools_loader.py
│ ├── test_storage_interface_compatibility.py
│ └── test_tag_time_filtering.py
├── tools
│ ├── docker
│ │ ├── DEPRECATED.md
│ │ ├── docker-compose.http.yml
│ │ ├── docker-compose.pythonpath.yml
│ │ ├── docker-compose.standalone.yml
│ │ ├── docker-compose.uv.yml
│ │ ├── docker-compose.yml
│ │ ├── docker-entrypoint-persistent.sh
│ │ ├── docker-entrypoint-unified.sh
│ │ ├── docker-entrypoint.sh
│ │ ├── Dockerfile
│ │ ├── Dockerfile.glama
│ │ ├── Dockerfile.slim
│ │ ├── README.md
│ │ └── test-docker-modes.sh
│ └── README.md
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/claude-hooks/utilities/memory-scorer.js:
--------------------------------------------------------------------------------
```javascript
1 | /**
2 | * Memory Relevance Scoring Utility
3 | * Implements intelligent algorithms to score memories by relevance to current project context
4 | * Phase 2: Enhanced with conversation context awareness for dynamic memory loading
5 | */
6 |
7 | /**
8 | * Calculate time decay factor for memory relevance
9 | * More recent memories get higher scores
10 | */
11 | function calculateTimeDecay(memoryDate, decayRate = 0.1) {
12 | try {
13 | const now = new Date();
14 |
15 | // Handle both Unix timestamps (seconds) and ISO strings
16 | let memoryTime;
17 | if (typeof memoryDate === 'string') {
18 | // ISO string format
19 | memoryTime = new Date(memoryDate);
20 | } else if (typeof memoryDate === 'number') {
21 | // Unix timestamp in seconds, convert to milliseconds
22 | memoryTime = new Date(memoryDate * 1000);
23 | } else {
24 | return 0.5; // Invalid format
25 | }
26 |
27 | if (isNaN(memoryTime.getTime())) {
28 | return 0.5; // Default score for invalid dates
29 | }
30 |
31 | // Calculate days since memory creation
32 | const daysDiff = (now - memoryTime) / (1000 * 60 * 60 * 24);
33 |
34 | // Exponential decay: score = e^(-decayRate * days)
35 | // Recent memories (0-7 days): score 0.8-1.0
36 | // Older memories (8-30 days): score 0.3-0.8
37 | // Ancient memories (30+ days): score 0.0-0.3
38 | const decayScore = Math.exp(-decayRate * daysDiff);
39 |
40 | // Ensure score is between 0 and 1
41 | return Math.max(0.01, Math.min(1.0, decayScore));
42 |
43 | } catch (error) {
44 | // Silently fail with default score to avoid noise
45 | return 0.5;
46 | }
47 | }
48 |
49 | /**
50 | * Calculate tag relevance score
51 | * Memories with tags matching project context get higher scores
52 | */
53 | function calculateTagRelevance(memoryTags = [], projectContext) {
54 | try {
55 | if (!Array.isArray(memoryTags) || memoryTags.length === 0) {
56 | return 0.3; // Default score for memories without tags
57 | }
58 |
59 | const contextTags = [
60 | projectContext.name?.toLowerCase(),
61 | projectContext.language?.toLowerCase(),
62 | ...(projectContext.frameworks || []).map(f => f.toLowerCase()),
63 | ...(projectContext.tools || []).map(t => t.toLowerCase())
64 | ].filter(Boolean);
65 |
66 | if (contextTags.length === 0) {
67 | return 0.5; // No context to match against
68 | }
69 |
70 | // Calculate tag overlap (exact match only to prevent cross-project pollution)
71 | const memoryTagsLower = memoryTags.map(tag => tag.toLowerCase());
72 | const matchingTags = contextTags.filter(contextTag =>
73 | memoryTagsLower.includes(contextTag)
74 | );
75 |
76 | // Score based on percentage of matching tags
77 | const overlapScore = matchingTags.length / contextTags.length;
78 |
79 | // Bonus for exact project name matches
80 | const exactProjectMatch = memoryTagsLower.includes(projectContext.name?.toLowerCase());
81 | const projectBonus = exactProjectMatch ? 0.3 : 0;
82 |
83 | // Bonus for exact language matches
84 | const exactLanguageMatch = memoryTagsLower.includes(projectContext.language?.toLowerCase());
85 | const languageBonus = exactLanguageMatch ? 0.2 : 0;
86 |
87 | // Bonus for framework matches
88 | const frameworkMatches = (projectContext.frameworks || []).filter(framework =>
89 | memoryTagsLower.some(tag => tag.includes(framework.toLowerCase()))
90 | );
91 | const frameworkBonus = frameworkMatches.length * 0.1;
92 |
93 | const totalScore = Math.min(1.0, overlapScore + projectBonus + languageBonus + frameworkBonus);
94 |
95 | return Math.max(0.1, totalScore);
96 |
97 | } catch (error) {
98 | // Silently fail with default score to avoid noise
99 | return 0.3;
100 | }
101 | }
102 |
103 | /**
104 | * Calculate content quality score to penalize generic/empty content
105 | */
106 | function calculateContentQuality(memoryContent = '') {
107 | try {
108 | if (!memoryContent || typeof memoryContent !== 'string') {
109 | return 0.1;
110 | }
111 |
112 | const content = memoryContent.trim();
113 |
114 | // Check for generic session summary patterns
115 | const genericPatterns = [
116 | /## 🎯 Topics Discussed\s*-\s*implementation\s*-\s*\.\.\.?$/m,
117 | /Topics Discussed.*implementation.*\.\.\..*$/s,
118 | /Session Summary.*implementation.*\.\.\..*$/s,
119 | /^# Session Summary.*Date.*Project.*Topics Discussed.*implementation.*\.\.\..*$/s
120 | ];
121 |
122 | const isGeneric = genericPatterns.some(pattern => pattern.test(content));
123 | if (isGeneric) {
124 | return 0.05; // Heavily penalize generic content
125 | }
126 |
127 | // Check content length and substance
128 | if (content.length < 50) {
129 | return 0.2; // Short content gets low score
130 | }
131 |
132 | // Check for meaningful content indicators
133 | const meaningfulIndicators = [
134 | 'decided', 'implemented', 'changed', 'fixed', 'created', 'updated',
135 | 'because', 'reason', 'approach', 'solution', 'result', 'impact',
136 | 'learned', 'discovered', 'found', 'issue', 'problem', 'challenge'
137 | ];
138 |
139 | const meaningfulMatches = meaningfulIndicators.filter(indicator =>
140 | content.toLowerCase().includes(indicator)
141 | ).length;
142 |
143 | // Calculate information density
144 | const words = content.split(/\s+/).filter(w => w.length > 2);
145 | const uniqueWords = new Set(words.map(w => w.toLowerCase()));
146 | const diversityRatio = uniqueWords.size / Math.max(words.length, 1);
147 |
148 | // Combine factors
149 | const meaningfulnessScore = Math.min(0.4, meaningfulMatches * 0.08);
150 | const diversityScore = Math.min(0.3, diversityRatio * 0.5);
151 | const lengthScore = Math.min(0.3, content.length / 1000); // Longer content gets bonus
152 |
153 | const qualityScore = meaningfulnessScore + diversityScore + lengthScore;
154 | return Math.max(0.05, Math.min(1.0, qualityScore));
155 |
156 | } catch (error) {
157 | // Silently fail with default score to avoid noise
158 | return 0.3;
159 | }
160 | }
161 |
162 | /**
163 | * Calculate content relevance using simple text analysis
164 | * Memories with content matching project keywords get higher scores
165 | */
166 | function calculateContentRelevance(memoryContent = '', projectContext) {
167 | try {
168 | if (!memoryContent || typeof memoryContent !== 'string') {
169 | return 0.3;
170 | }
171 |
172 | const content = memoryContent.toLowerCase();
173 | const keywords = [
174 | projectContext.name?.toLowerCase(),
175 | projectContext.language?.toLowerCase(),
176 | ...(projectContext.frameworks || []).map(f => f.toLowerCase()),
177 | ...(projectContext.tools || []).map(t => t.toLowerCase()),
178 | // Add common technical keywords
179 | 'architecture', 'decision', 'implementation', 'bug', 'fix',
180 | 'feature', 'config', 'setup', 'deployment', 'performance'
181 | ].filter(Boolean);
182 |
183 | if (keywords.length === 0) {
184 | return 0.5;
185 | }
186 |
187 | // Count keyword occurrences
188 | let totalMatches = 0;
189 | let keywordScore = 0;
190 |
191 | keywords.forEach(keyword => {
192 | const occurrences = (content.match(new RegExp(keyword, 'g')) || []).length;
193 | if (occurrences > 0) {
194 | totalMatches++;
195 | keywordScore += Math.log(1 + occurrences) * 0.1; // Logarithmic scoring
196 | }
197 | });
198 |
199 | // Normalize score
200 | const matchRatio = totalMatches / keywords.length;
201 | const contentScore = Math.min(1.0, matchRatio + keywordScore);
202 |
203 | return Math.max(0.1, contentScore);
204 |
205 | } catch (error) {
206 | // Silently fail with default score to avoid noise
207 | return 0.3;
208 | }
209 | }
210 |
211 | /**
212 | * Calculate memory type bonus
213 | * Certain memory types are more valuable for context injection
214 | */
215 | function calculateTypeBonus(memoryType) {
216 | const typeScores = {
217 | 'decision': 0.3, // Architectural decisions are highly valuable
218 | 'architecture': 0.3, // Architecture documentation is important
219 | 'reference': 0.2, // Reference materials are useful
220 | 'session': 0.15, // Session summaries provide good context
221 | 'insight': 0.2, // Insights are valuable for learning
222 | 'bug-fix': 0.15, // Bug fixes provide historical context
223 | 'feature': 0.1, // Feature descriptions are moderately useful
224 | 'note': 0.05, // General notes are less critical
225 | 'todo': 0.05, // TODOs are task-specific
226 | 'temporary': -0.1 // Temporary notes should be deprioritized
227 | };
228 |
229 | return typeScores[memoryType?.toLowerCase()] || 0;
230 | }
231 |
232 | /**
233 | * Calculate recency bonus to prioritize very recent memories
234 | * Provides explicit boost for memories created within specific time windows
235 | */
236 | function calculateRecencyBonus(memoryDate) {
237 | // Recency bonus tiers (days and corresponding bonus values)
238 | const RECENCY_TIERS = [
239 | { days: 7, bonus: 0.15 }, // Strong boost for last week
240 | { days: 14, bonus: 0.10 }, // Moderate boost for last 2 weeks
241 | { days: 30, bonus: 0.05 } // Small boost for last month
242 | ];
243 |
244 | try {
245 | const now = new Date();
246 |
247 | // Handle both Unix timestamps (seconds) and ISO strings
248 | let memoryTime;
249 | if (typeof memoryDate === 'string') {
250 | // ISO string format
251 | memoryTime = new Date(memoryDate);
252 | } else if (typeof memoryDate === 'number') {
253 | // Unix timestamp in seconds, convert to milliseconds
254 | memoryTime = new Date(memoryDate * 1000);
255 | } else {
256 | return 0; // Invalid format
257 | }
258 |
259 | if (isNaN(memoryTime.getTime()) || memoryTime > now) {
260 | return 0; // No bonus for invalid or future dates
261 | }
262 |
263 | const daysDiff = (now - memoryTime) / (1000 * 60 * 60 * 24);
264 |
265 | // Find the appropriate tier for this memory's age
266 | for (const tier of RECENCY_TIERS) {
267 | if (daysDiff <= tier.days) {
268 | return tier.bonus;
269 | }
270 | }
271 |
272 | return 0; // No bonus for older memories
273 |
274 | } catch (error) {
275 | return 0;
276 | }
277 | }
278 |
279 | /**
280 | * Calculate conversation context relevance score (Phase 2)
281 | * Matches memory content with current conversation topics and intent
282 | */
283 | function calculateConversationRelevance(memory, conversationAnalysis) {
284 | try {
285 | if (!conversationAnalysis || !memory.content) {
286 | return 0.3; // Default score when no conversation context
287 | }
288 |
289 | const memoryContent = memory.content.toLowerCase();
290 | let relevanceScore = 0;
291 | let factorCount = 0;
292 |
293 | // Score based on topic matching
294 | if (conversationAnalysis.topics && conversationAnalysis.topics.length > 0) {
295 | conversationAnalysis.topics.forEach(topic => {
296 | const topicMatches = (memoryContent.match(new RegExp(topic.name, 'gi')) || []).length;
297 | if (topicMatches > 0) {
298 | relevanceScore += topic.confidence * Math.min(topicMatches * 0.2, 0.8);
299 | factorCount++;
300 | }
301 | });
302 | }
303 |
304 | // Score based on entity matching
305 | if (conversationAnalysis.entities && conversationAnalysis.entities.length > 0) {
306 | conversationAnalysis.entities.forEach(entity => {
307 | const entityMatches = (memoryContent.match(new RegExp(entity.name, 'gi')) || []).length;
308 | if (entityMatches > 0) {
309 | relevanceScore += entity.confidence * 0.3;
310 | factorCount++;
311 | }
312 | });
313 | }
314 |
315 | // Score based on intent alignment
316 | if (conversationAnalysis.intent) {
317 | const intentKeywords = {
318 | 'learning': ['learn', 'understand', 'explain', 'how', 'tutorial', 'guide'],
319 | 'problem-solving': ['fix', 'error', 'debug', 'issue', 'problem', 'solve'],
320 | 'development': ['build', 'create', 'implement', 'develop', 'code', 'feature'],
321 | 'optimization': ['optimize', 'improve', 'performance', 'faster', 'better'],
322 | 'review': ['review', 'check', 'analyze', 'audit', 'validate'],
323 | 'planning': ['plan', 'design', 'architecture', 'approach', 'strategy']
324 | };
325 |
326 | const intentWords = intentKeywords[conversationAnalysis.intent.name] || [];
327 | let intentMatches = 0;
328 | intentWords.forEach(word => {
329 | if (memoryContent.includes(word)) {
330 | intentMatches++;
331 | }
332 | });
333 |
334 | if (intentMatches > 0) {
335 | relevanceScore += conversationAnalysis.intent.confidence * (intentMatches / intentWords.length);
336 | factorCount++;
337 | }
338 | }
339 |
340 | // Score based on code context if present
341 | if (conversationAnalysis.codeContext && conversationAnalysis.codeContext.isCodeRelated) {
342 | const codeIndicators = ['code', 'function', 'class', 'method', 'variable', 'api', 'library'];
343 | let codeMatches = 0;
344 | codeIndicators.forEach(indicator => {
345 | if (memoryContent.includes(indicator)) {
346 | codeMatches++;
347 | }
348 | });
349 |
350 | if (codeMatches > 0) {
351 | relevanceScore += 0.4 * (codeMatches / codeIndicators.length);
352 | factorCount++;
353 | }
354 | }
355 |
356 | // Normalize score
357 | const normalizedScore = factorCount > 0 ? relevanceScore / factorCount : 0.3;
358 | return Math.max(0.1, Math.min(1.0, normalizedScore));
359 |
360 | } catch (error) {
361 | // Silently fail with default score to avoid noise
362 | return 0.3;
363 | }
364 | }
365 |
366 | /**
367 | * Calculate final relevance score for a memory (Enhanced with quality scoring)
368 | */
369 | function calculateRelevanceScore(memory, projectContext, options = {}) {
370 | try {
371 | const {
372 | weights = {},
373 | timeDecayRate = 0.1, // Default decay rate
374 | includeConversationContext = false,
375 | conversationAnalysis = null
376 | } = options;
377 |
378 | // Default weights including content quality factor
379 | const defaultWeights = includeConversationContext ? {
380 | timeDecay: 0.20, // Reduced weight for time
381 | tagRelevance: 0.30, // Tag matching remains important
382 | contentRelevance: 0.15, // Content matching reduced
383 | contentQuality: 0.25, // New quality factor
384 | conversationRelevance: 0.25, // Conversation context factor
385 | typeBonus: 0.05 // Memory type provides minor adjustment
386 | } : {
387 | timeDecay: 0.25, // Reduced time weight
388 | tagRelevance: 0.35, // Tag matching important
389 | contentRelevance: 0.15, // Content matching
390 | contentQuality: 0.25, // Quality factor prioritized
391 | typeBonus: 0.05 // Type bonus reduced
392 | };
393 |
394 | const w = { ...defaultWeights, ...weights };
395 |
396 | // Calculate individual scores
397 | const timeScore = calculateTimeDecay(memory.created_at || memory.created_at_iso, timeDecayRate);
398 | const tagScore = calculateTagRelevance(memory.tags, projectContext);
399 | const contentScore = calculateContentRelevance(memory.content, projectContext);
400 | const qualityScore = calculateContentQuality(memory.content);
401 | const typeBonus = calculateTypeBonus(memory.memory_type);
402 | const recencyBonus = calculateRecencyBonus(memory.created_at || memory.created_at_iso);
403 |
404 | let finalScore = (
405 | (timeScore * w.timeDecay) +
406 | (tagScore * w.tagRelevance) +
407 | (contentScore * w.contentRelevance) +
408 | (qualityScore * w.contentQuality) +
409 | typeBonus + // Type bonus is not weighted, acts as adjustment
410 | recencyBonus // Recency bonus provides explicit boost for very recent memories
411 | );
412 |
413 | const breakdown = {
414 | timeDecay: timeScore,
415 | tagRelevance: tagScore,
416 | contentRelevance: contentScore,
417 | contentQuality: qualityScore,
418 | typeBonus: typeBonus,
419 | recencyBonus: recencyBonus
420 | };
421 |
422 | // Add conversation context scoring if enabled (Phase 2)
423 | if (includeConversationContext && conversationAnalysis) {
424 | const conversationScore = calculateConversationRelevance(memory, conversationAnalysis);
425 | finalScore += (conversationScore * (w.conversationRelevance || 0));
426 | breakdown.conversationRelevance = conversationScore;
427 | }
428 |
429 | // Apply quality penalty for very low quality content (multiplicative)
430 | if (qualityScore < 0.2) {
431 | finalScore *= 0.5; // Heavily penalize low quality content
432 | }
433 |
434 | // Apply project affinity penalty - memories without project tag match get penalized
435 | // This prevents cross-project memory pollution (e.g., Azure memories in Python project)
436 | const memoryTags = (memory.tags || []).map(t => t.toLowerCase());
437 | const memoryContent = (memory.content || '').toLowerCase();
438 | const projectName = projectContext.name?.toLowerCase();
439 |
440 | // Check for project name in tags OR content
441 | const hasProjectTag = projectName && (
442 | memoryTags.some(tag => tag === projectName || tag.includes(projectName)) ||
443 | memoryContent.includes(projectName)
444 | );
445 |
446 | if (!hasProjectTag && tagScore < 0.3) {
447 | // No project reference at all - definitely unrelated memory
448 | // Hard filter: set score to 0 to exclude from results entirely
449 | finalScore = 0;
450 | breakdown.projectAffinity = 'none (filtered)';
451 | } else if (!hasProjectTag) {
452 | // Some tag relevance but no project tag - might be related
453 | finalScore *= 0.5; // Moderate penalty
454 | breakdown.projectAffinity = 'low';
455 | } else {
456 | breakdown.projectAffinity = 'high';
457 | }
458 |
459 | // Ensure score is between 0 and 1
460 | const normalizedScore = Math.max(0, Math.min(1, finalScore));
461 |
462 | return {
463 | finalScore: normalizedScore,
464 | breakdown: breakdown,
465 | weights: w,
466 | hasConversationContext: includeConversationContext
467 | };
468 |
469 | } catch (error) {
470 | // Silently fail with default score to avoid noise
471 | return {
472 | finalScore: 0.1,
473 | breakdown: { error: error.message },
474 | weights: {},
475 | hasConversationContext: false
476 | };
477 | }
478 | }
479 |
480 | /**
481 | * Score and sort memories by relevance
482 | */
483 | function scoreMemoryRelevance(memories, projectContext, options = {}) {
484 | try {
485 | const { verbose = true } = options;
486 |
487 | if (!Array.isArray(memories)) {
488 | if (verbose) console.warn('[Memory Scorer] Invalid memories array');
489 | return [];
490 | }
491 |
492 | if (verbose) {
493 | console.log(`[Memory Scorer] Scoring ${memories.length} memories for project: ${projectContext.name}`);
494 | }
495 |
496 | // Score each memory
497 | const scoredMemories = memories.map(memory => {
498 | const scoreResult = calculateRelevanceScore(memory, projectContext, options);
499 |
500 | return {
501 | ...memory,
502 | relevanceScore: scoreResult.finalScore,
503 | scoreBreakdown: scoreResult.breakdown,
504 | hasConversationContext: scoreResult.hasConversationContext
505 | };
506 | });
507 |
508 | // Sort by relevance score (highest first)
509 | const sortedMemories = scoredMemories.sort((a, b) => b.relevanceScore - a.relevanceScore);
510 |
511 | // Log scoring results for debugging
512 | if (verbose) {
513 | console.log('[Memory Scorer] Top scored memories:');
514 | sortedMemories.slice(0, 3).forEach((memory, index) => {
515 | console.log(` ${index + 1}. Score: ${memory.relevanceScore.toFixed(3)} - ${memory.content.substring(0, 60)}...`);
516 | });
517 | }
518 |
519 | return sortedMemories;
520 |
521 | } catch (error) {
522 | if (verbose) console.error('[Memory Scorer] Error scoring memories:', error.message);
523 | return memories || [];
524 | }
525 | }
526 |
527 | /**
528 | * Filter memories by minimum relevance threshold
529 | */
530 | function filterByRelevance(memories, minScore = 0.3, options = {}) {
531 | try {
532 | const { verbose = true } = options;
533 | const filtered = memories.filter(memory => memory.relevanceScore >= minScore);
534 | if (verbose) {
535 | console.log(`[Memory Scorer] Filtered ${filtered.length}/${memories.length} memories above threshold ${minScore}`);
536 | }
537 | return filtered;
538 |
539 | } catch (error) {
540 | if (verbose) console.warn('[Memory Scorer] Error filtering memories:', error.message);
541 | return memories;
542 | }
543 | }
544 |
545 | /**
546 | * Analyze memory age distribution to detect staleness
547 | * Returns statistics and recommended weight adjustments
548 | */
549 | function analyzeMemoryAgeDistribution(memories, options = {}) {
550 | try {
551 | const { verbose = false } = options;
552 |
553 | if (!Array.isArray(memories) || memories.length === 0) {
554 | return {
555 | avgAge: 0,
556 | medianAge: 0,
557 | p75Age: 0,
558 | p90Age: 0,
559 | recentCount: 0,
560 | staleCount: 0,
561 | isStale: false,
562 | recommendedAdjustments: {}
563 | };
564 | }
565 |
566 | const now = new Date();
567 |
568 | // Calculate ages in days
569 | const ages = memories.map(memory => {
570 | // Handle both Unix timestamps (seconds) and ISO strings
571 | let memoryTime;
572 | if (memory.created_at_iso) {
573 | memoryTime = new Date(memory.created_at_iso);
574 | } else if (memory.created_at) {
575 | // created_at is in seconds, convert to milliseconds
576 | memoryTime = new Date(memory.created_at * 1000);
577 | } else {
578 | return 365; // Default to very old if no timestamp
579 | }
580 |
581 | if (isNaN(memoryTime.getTime())) return 365; // Default to very old
582 | return (now - memoryTime) / (1000 * 60 * 60 * 24);
583 | }).sort((a, b) => a - b);
584 |
585 | // Calculate percentiles
586 | const avgAge = ages.reduce((sum, age) => sum + age, 0) / ages.length;
587 | const medianAge = ages[Math.floor(ages.length / 2)];
588 | const p75Age = ages[Math.floor(ages.length * 0.75)];
589 | const p90Age = ages[Math.floor(ages.length * 0.90)];
590 |
591 | // Count recent vs stale
592 | const recentCount = ages.filter(age => age <= 14).length; // Last 2 weeks
593 | const staleCount = ages.filter(age => age > 30).length; // Older than 1 month
594 |
595 | // Determine if memory set is stale
596 | const isStale = medianAge > 30 || (recentCount / ages.length) < 0.2;
597 |
598 | // Recommended adjustments based on staleness
599 | const recommendedAdjustments = {};
600 |
601 | if (isStale) {
602 | // Memories are old - boost time decay weight, reduce tag relevance
603 | recommendedAdjustments.timeDecay = 0.50; // Increase from default 0.25
604 | recommendedAdjustments.tagRelevance = 0.20; // Decrease from default 0.35
605 | recommendedAdjustments.recencyBonus = 0.25; // Increase bonus for any recent memories
606 | recommendedAdjustments.reason = `Stale memory set detected (median: ${Math.round(medianAge)}d old, ${Math.round(recentCount/ages.length*100)}% recent)`;
607 | } else if (avgAge < 14) {
608 | // Memories are very recent - balanced approach
609 | recommendedAdjustments.timeDecay = 0.30;
610 | recommendedAdjustments.tagRelevance = 0.30;
611 | recommendedAdjustments.reason = `Recent memory set (avg: ${Math.round(avgAge)}d old)`;
612 | }
613 |
614 | if (verbose) {
615 | console.log('[Memory Age Analyzer]', {
616 | avgAge: Math.round(avgAge),
617 | medianAge: Math.round(medianAge),
618 | p75Age: Math.round(p75Age),
619 | recentPercent: Math.round(recentCount / ages.length * 100),
620 | isStale,
621 | adjustments: recommendedAdjustments.reason || 'No adjustments needed'
622 | });
623 | }
624 |
625 | return {
626 | avgAge,
627 | medianAge,
628 | p75Age,
629 | p90Age,
630 | recentCount,
631 | staleCount,
632 | totalCount: ages.length,
633 | isStale,
634 | recommendedAdjustments
635 | };
636 |
637 | } catch (error) {
638 | if (verbose) console.error('[Memory Age Analyzer] Error:', error.message);
639 | return {
640 | avgAge: 0,
641 | medianAge: 0,
642 | p75Age: 0,
643 | p90Age: 0,
644 | recentCount: 0,
645 | staleCount: 0,
646 | isStale: false,
647 | recommendedAdjustments: {}
648 | };
649 | }
650 | }
651 |
652 | /**
653 | * Calculate adaptive git context weight based on memory age and git activity
654 | * Prevents old git-related memories from dominating when recent development exists
655 | */
656 | function calculateAdaptiveGitWeight(gitContext, memoryAgeAnalysis, configuredWeight = 1.2, options = {}) {
657 | try {
658 | const { verbose = false } = options;
659 |
660 | // No git context or no recent commits - use configured weight
661 | if (!gitContext || !gitContext.recentCommits || gitContext.recentCommits.length === 0) {
662 | return { weight: configuredWeight, reason: 'No recent git activity' };
663 | }
664 |
665 | // Calculate days since most recent commit
666 | const now = new Date();
667 | const mostRecentCommit = new Date(gitContext.recentCommits[0].date);
668 | const daysSinceLastCommit = (now - mostRecentCommit) / (1000 * 60 * 60 * 24);
669 |
670 | // Scenario 1: Recent commits (< 7d) BUT stale memories (median > 30d)
671 | // Problem: Git boost would amplify old git memories over potential recent work
672 | if (daysSinceLastCommit <= 7 && memoryAgeAnalysis.medianAge > 30) {
673 | const reducedWeight = Math.max(1.0, configuredWeight * 0.7); // Reduce by 30%
674 | const reason = `Recent commits (${Math.round(daysSinceLastCommit)}d ago) but stale memories (median: ${Math.round(memoryAgeAnalysis.medianAge)}d) - reducing git boost`;
675 |
676 | if (verbose) {
677 | console.log(`[Adaptive Git Weight] ${reason}: ${configuredWeight.toFixed(1)} → ${reducedWeight.toFixed(1)}`);
678 | }
679 |
680 | return { weight: reducedWeight, reason, adjusted: true };
681 | }
682 |
683 | // Scenario 2: Both commits and memories are recent (< 14d)
684 | // Safe to use configured weight, git context is relevant
685 | if (daysSinceLastCommit <= 14 && memoryAgeAnalysis.avgAge <= 14) {
686 | return {
687 | weight: configuredWeight,
688 | reason: `Recent commits and memories aligned (${Math.round(daysSinceLastCommit)}d commits, ${Math.round(memoryAgeAnalysis.avgAge)}d avg memory age)`,
689 | adjusted: false
690 | };
691 | }
692 |
693 | // Scenario 3: Old commits (> 14d) but recent memories exist
694 | // Slightly reduce git weight to let recent non-git memories surface
695 | if (daysSinceLastCommit > 14 && memoryAgeAnalysis.recentCount > 0) {
696 | const reducedWeight = Math.max(1.0, configuredWeight * 0.85); // Reduce by 15%
697 | const reason = `Older commits (${Math.round(daysSinceLastCommit)}d ago) with some recent memories - slightly reducing git boost`;
698 |
699 | if (verbose) {
700 | console.log(`[Adaptive Git Weight] ${reason}: ${configuredWeight.toFixed(1)} → ${reducedWeight.toFixed(1)}`);
701 | }
702 |
703 | return { weight: reducedWeight, reason, adjusted: true };
704 | }
705 |
706 | // Default: use configured weight
707 | return { weight: configuredWeight, reason: 'Using configured weight', adjusted: false };
708 |
709 | } catch (error) {
710 | if (verbose) console.error('[Adaptive Git Weight] Error:', error.message);
711 | return { weight: configuredWeight, reason: 'Error - using fallback', adjusted: false };
712 | }
713 | }
714 |
715 | module.exports = {
716 | scoreMemoryRelevance,
717 | calculateRelevanceScore,
718 | calculateTimeDecay,
719 | calculateTagRelevance,
720 | calculateContentRelevance,
721 | calculateTypeBonus,
722 | calculateRecencyBonus,
723 | filterByRelevance,
724 | analyzeMemoryAgeDistribution,
725 | calculateAdaptiveGitWeight
726 | };
727 |
728 | // Direct execution support for testing
729 | if (require.main === module) {
730 | // Test with mock data
731 | const mockProjectContext = {
732 | name: 'mcp-memory-service',
733 | language: 'JavaScript',
734 | frameworks: ['Node.js'],
735 | tools: ['npm']
736 | };
737 |
738 | const mockMemories = [
739 | {
740 | content: 'Decided to use SQLite-vec for better performance in MCP Memory Service',
741 | tags: ['mcp-memory-service', 'decision', 'sqlite-vec'],
742 | memory_type: 'decision',
743 | created_at: '2025-08-19T10:00:00Z'
744 | },
745 | {
746 | content: 'Fixed bug in JavaScript hook implementation for Claude Code integration',
747 | tags: ['javascript', 'bug-fix', 'claude-code'],
748 | memory_type: 'bug-fix',
749 | created_at: '2025-08-18T15:30:00Z'
750 | },
751 | {
752 | content: 'Random note about completely unrelated project',
753 | tags: ['other-project', 'note'],
754 | memory_type: 'note',
755 | created_at: '2025-08-01T08:00:00Z'
756 | }
757 | ];
758 |
759 | console.log('\n=== MEMORY SCORING TEST ===');
760 | const scored = scoreMemoryRelevance(mockMemories, mockProjectContext);
761 | console.log('\n=== SCORED RESULTS ===');
762 | scored.forEach((memory, index) => {
763 | console.log(`${index + 1}. Score: ${memory.relevanceScore.toFixed(3)}`);
764 | console.log(` Content: ${memory.content.substring(0, 80)}...`);
765 | console.log(` Breakdown:`, memory.scoreBreakdown);
766 | console.log('');
767 | });
768 | }
```
--------------------------------------------------------------------------------
/examples/http-mcp-bridge.js:
--------------------------------------------------------------------------------
```javascript
1 | #!/usr/bin/env node
2 | /**
3 | * HTTP-to-MCP Bridge for MCP Memory Service
4 | *
5 | * This bridge allows MCP clients (like Claude Desktop) to connect to a remote
6 | * MCP Memory Service HTTP server instead of running a local instance.
7 | *
8 | * Features:
9 | * - Automatic service discovery via mDNS (Bonjour/Zeroconf)
10 | * - Manual endpoint configuration fallback
11 | * - HTTPS support with self-signed certificate handling
12 | * - API key authentication
13 | *
14 | * Usage in Claude Desktop config:
15 | *
16 | * Option 1: Auto-discovery (recommended for local networks)
17 | * {
18 | * "mcpServers": {
19 | * "memory": {
20 | * "command": "node",
21 | * "args": ["/path/to/http-mcp-bridge.js"],
22 | * "env": {
23 | * "MCP_MEMORY_AUTO_DISCOVER": "true",
24 | * "MCP_MEMORY_PREFER_HTTPS": "true",
25 | * "MCP_MEMORY_API_KEY": "your-api-key"
26 | * }
27 | * }
28 | * }
29 | * }
30 | *
31 | * Option 2: Manual configuration
32 | * {
33 | * "mcpServers": {
34 | * "memory": {
35 | * "command": "node",
36 | * "args": ["/path/to/http-mcp-bridge.js"],
37 | * "env": {
38 | * "MCP_MEMORY_HTTP_ENDPOINT": "https://your-server:8000/api",
39 | * "MCP_MEMORY_API_KEY": "your-api-key"
40 | * }
41 | * }
42 | * }
43 | * }
44 | */
45 |
46 | const http = require('http');
47 | const https = require('https');
48 | const { URL } = require('url');
49 | const dgram = require('dgram');
50 | const dns = require('dns');
51 | const tls = require('tls');
52 |
53 | /**
54 | * Simple mDNS service discovery implementation
55 | */
56 | class MDNSDiscovery {
57 | constructor() {
58 | this.services = new Map();
59 | }
60 |
61 | /**
62 | * Discover MCP Memory Services using mDNS
63 | */
64 | async discoverServices(timeout = 5000) {
65 | return new Promise((resolve) => {
66 | const socket = dgram.createSocket('udp4');
67 | const services = [];
68 |
69 | // mDNS query for _mcp-memory._tcp.local
70 | const query = this.createMDNSQuery('_mcp-memory._tcp.local');
71 |
72 | socket.on('message', (msg, rinfo) => {
73 | try {
74 | const service = this.parseMDNSResponse(msg, rinfo);
75 | if (service) {
76 | services.push(service);
77 | }
78 | } catch (error) {
79 | // Ignore parsing errors
80 | }
81 | });
82 |
83 | socket.bind(() => {
84 | socket.addMembership('224.0.0.251');
85 | socket.send(query, 5353, '224.0.0.251');
86 | });
87 |
88 | setTimeout(() => {
89 | socket.close();
90 | resolve(services);
91 | }, timeout);
92 | });
93 | }
94 |
95 | createMDNSQuery(serviceName) {
96 | // Simplified mDNS query creation
97 | // This is a basic implementation - in production, use a proper mDNS library
98 | const header = Buffer.alloc(12);
99 | header.writeUInt16BE(0, 0); // Transaction ID
100 | header.writeUInt16BE(0, 2); // Flags
101 | header.writeUInt16BE(1, 4); // Questions
102 | header.writeUInt16BE(0, 6); // Answer RRs
103 | header.writeUInt16BE(0, 8); // Authority RRs
104 | header.writeUInt16BE(0, 10); // Additional RRs
105 |
106 | // Question section (simplified)
107 | const nameLabels = serviceName.split('.');
108 | let nameBuffer = Buffer.alloc(0);
109 |
110 | for (const label of nameLabels) {
111 | if (label) {
112 | const labelBuffer = Buffer.alloc(1 + label.length);
113 | labelBuffer.writeUInt8(label.length, 0);
114 | labelBuffer.write(label, 1);
115 | nameBuffer = Buffer.concat([nameBuffer, labelBuffer]);
116 | }
117 | }
118 |
119 | const endBuffer = Buffer.alloc(5);
120 | endBuffer.writeUInt8(0, 0); // End of name
121 | endBuffer.writeUInt16BE(12, 1); // Type PTR
122 | endBuffer.writeUInt16BE(1, 3); // Class IN
123 |
124 | return Buffer.concat([header, nameBuffer, endBuffer]);
125 | }
126 |
127 | parseMDNSResponse(msg, rinfo) {
128 | // Simplified mDNS response parsing
129 | // This is a basic implementation - in production, use a proper mDNS library
130 | try {
131 | // Look for MCP Memory Service indicators in the response
132 | const msgStr = msg.toString('ascii', 0, Math.min(msg.length, 512));
133 | if (msgStr.includes('mcp-memory') || msgStr.includes('MCP Memory')) {
134 | // Try common ports for the service
135 | const possiblePorts = [8000, 8080, 443, 80];
136 | const host = rinfo.address;
137 |
138 | for (const port of possiblePorts) {
139 | return {
140 | name: 'MCP Memory Service',
141 | host: host,
142 | port: port,
143 | https: port === 443,
144 | discovered: true
145 | };
146 | }
147 | }
148 | } catch (error) {
149 | // Ignore parsing errors
150 | }
151 | return null;
152 | }
153 | }
154 |
155 | class HTTPMCPBridge {
156 | constructor() {
157 | this.endpoint = process.env.MCP_MEMORY_HTTP_ENDPOINT;
158 | this.apiKey = process.env.MCP_MEMORY_API_KEY;
159 | this.autoDiscover = process.env.MCP_MEMORY_AUTO_DISCOVER === 'true';
160 | this.preferHttps = process.env.MCP_MEMORY_PREFER_HTTPS !== 'false';
161 | this.requestId = 0;
162 | this.discovery = new MDNSDiscovery();
163 | this.discoveredEndpoint = null;
164 | }
165 |
166 | /**
167 | * Initialize the bridge by discovering or configuring the endpoint
168 | */
169 | async initialize() {
170 | if (this.endpoint) {
171 | // Manual configuration takes precedence
172 | console.error(`Using manual endpoint: ${this.endpoint}`);
173 | return true;
174 | }
175 |
176 | if (this.autoDiscover) {
177 | console.error('Attempting to discover MCP Memory Service via mDNS...');
178 | try {
179 | const services = await this.discovery.discoverServices();
180 |
181 | if (services.length > 0) {
182 | // Sort services by preference (HTTPS first if preferred)
183 | services.sort((a, b) => {
184 | if (this.preferHttps) {
185 | if (a.https !== b.https) return b.https - a.https;
186 | }
187 | return a.port - b.port; // Prefer standard ports
188 | });
189 |
190 | const service = services[0];
191 | const protocol = service.https ? 'https' : 'http';
192 | this.discoveredEndpoint = `${protocol}://${service.host}:${service.port}/api`;
193 | this.endpoint = this.discoveredEndpoint;
194 |
195 | console.error(`Discovered service: ${this.endpoint}`);
196 |
197 | // Test the discovered endpoint
198 | const healthy = await this.testEndpoint(this.endpoint);
199 | if (!healthy) {
200 | console.error('Discovered endpoint failed health check, trying alternatives...');
201 |
202 | // Try other discovered services
203 | for (let i = 1; i < services.length; i++) {
204 | const altService = services[i];
205 | const altProtocol = altService.https ? 'https' : 'http';
206 | const altEndpoint = `${altProtocol}://${altService.host}:${altService.port}/api`;
207 |
208 | if (await this.testEndpoint(altEndpoint)) {
209 | this.endpoint = altEndpoint;
210 | console.error(`Using alternative endpoint: ${this.endpoint}`);
211 | return true;
212 | }
213 | }
214 |
215 | console.error('No healthy services found');
216 | return false;
217 | }
218 |
219 | return true;
220 | } else {
221 | console.error('No MCP Memory Services discovered');
222 | return false;
223 | }
224 | } catch (error) {
225 | console.error(`Discovery failed: ${error.message}`);
226 | return false;
227 | }
228 | }
229 |
230 | // Default fallback
231 | this.endpoint = 'http://localhost:8000/api';
232 | console.error(`Using default endpoint: ${this.endpoint}`);
233 | return true;
234 | }
235 |
236 | /**
237 | * Test if an endpoint is healthy
238 | */
239 | async testEndpoint(endpoint) {
240 | try {
241 | const healthUrl = `${endpoint}/api/health`;
242 | const response = await this.makeRequestInternal(healthUrl, 'GET', null, 3000); // 3 second timeout
243 | return response.statusCode === 200;
244 | } catch (error) {
245 | return false;
246 | }
247 | }
248 |
249 | /**
250 | * Make HTTP request to the MCP Memory Service with retry logic
251 | */
252 | async makeRequest(path, method = 'GET', data = null, maxRetries = 3) {
253 | let lastError;
254 |
255 | for (let attempt = 1; attempt <= maxRetries; attempt++) {
256 | try {
257 | console.error(`Attempt ${attempt}/${maxRetries} for ${method} ${path}`);
258 | const result = await this.makeRequestInternal(path, method, data);
259 |
260 | if (attempt > 1) {
261 | console.error(`Request succeeded on attempt ${attempt}`);
262 | }
263 |
264 | return result;
265 | } catch (error) {
266 | lastError = error;
267 | console.error(`Attempt ${attempt} failed: ${error.message}`);
268 |
269 | if (attempt < maxRetries) {
270 | const delay = Math.min(1000 * Math.pow(2, attempt - 1), 5000); // Exponential backoff, max 5s
271 | console.error(`Retrying in ${delay}ms...`);
272 | await new Promise(resolve => setTimeout(resolve, delay));
273 | } else {
274 | console.error(`All ${maxRetries} attempts failed. Last error: ${error.message}`);
275 | }
276 | }
277 | }
278 |
279 | throw lastError;
280 | }
281 |
282 | /**
283 | * Internal HTTP request method with timeout support and comprehensive logging
284 | */
285 | async makeRequestInternal(path, method = 'GET', data = null, timeout = 10000) {
286 | const startTime = Date.now();
287 | const requestId = Math.random().toString(36).substr(2, 9);
288 |
289 | console.error(`[${requestId}] Starting ${method} request to ${path}`);
290 |
291 | return new Promise((resolve, reject) => {
292 | // Use URL constructor's built-in path resolution to avoid duplicate base paths
293 | // Ensure endpoint has trailing slash for proper relative path resolution
294 | const baseUrl = this.endpoint.endsWith('/') ? this.endpoint : this.endpoint + '/';
295 | const url = new URL(path, baseUrl);
296 | const protocol = url.protocol === 'https:' ? https : http;
297 |
298 | console.error(`[${requestId}] Full URL: ${url.toString()}`);
299 | console.error(`[${requestId}] Using protocol: ${url.protocol}`);
300 |
301 | const options = {
302 | hostname: url.hostname,
303 | port: url.port || (url.protocol === 'https:' ? 443 : 80),
304 | path: url.pathname + url.search,
305 | method: method,
306 | headers: {
307 | 'Content-Type': 'application/json',
308 | 'User-Agent': 'MCP-HTTP-Bridge/2.0',
309 | 'Connection': 'close'
310 | },
311 | timeout: timeout,
312 | keepAlive: false
313 | };
314 |
315 | // For HTTPS, create custom agent for self-signed certificates with TLS 1.3
316 | if (url.protocol === 'https:') {
317 | const agent = new https.Agent({
318 | rejectUnauthorized: false,
319 | requestCert: false,
320 | checkServerIdentity: () => undefined,
321 | keepAlive: false
322 | });
323 | options.agent = agent;
324 | console.error(`[${requestId}] Using custom HTTPS agent with default TLS settings`);
325 | }
326 |
327 | if (this.apiKey) {
328 | options.headers['Authorization'] = `Bearer ${this.apiKey}`;
329 | console.error(`[${requestId}] API key added to headers`);
330 | }
331 |
332 | if (data) {
333 | const postData = JSON.stringify(data);
334 | options.headers['Content-Length'] = Buffer.byteLength(postData);
335 | console.error(`[${requestId}] Request body size: ${Buffer.byteLength(postData)} bytes`);
336 | }
337 |
338 | console.error(`[${requestId}] Request options:`, JSON.stringify(options, null, 2));
339 |
340 | const req = protocol.request(options, (res) => {
341 | const responseStartTime = Date.now();
342 | console.error(`[${requestId}] Response received after ${responseStartTime - startTime}ms`);
343 | console.error(`[${requestId}] Status code: ${res.statusCode}`);
344 | console.error(`[${requestId}] Response headers:`, JSON.stringify(res.headers, null, 2));
345 |
346 | let responseData = '';
347 |
348 | res.on('data', (chunk) => {
349 | responseData += chunk;
350 | console.error(`[${requestId}] Received ${chunk.length} bytes`);
351 | });
352 |
353 | res.on('end', () => {
354 | const endTime = Date.now();
355 | console.error(`[${requestId}] Response completed after ${endTime - startTime}ms total`);
356 | console.error(`[${requestId}] Response body: ${responseData}`);
357 |
358 | try {
359 | const result = JSON.parse(responseData);
360 | resolve({ statusCode: res.statusCode, data: result });
361 | } catch (error) {
362 | console.error(`[${requestId}] JSON parse error: ${error.message}`);
363 | reject(new Error(`Invalid JSON response: ${responseData}`));
364 | }
365 | });
366 | });
367 |
368 | req.on('error', (error) => {
369 | const errorTime = Date.now();
370 | console.error(`[${requestId}] Request error after ${errorTime - startTime}ms: ${error.message}`);
371 | console.error(`[${requestId}] Error details:`, error);
372 | reject(error);
373 | });
374 |
375 | req.on('timeout', () => {
376 | const timeoutTime = Date.now();
377 | console.error(`[${requestId}] Request timeout after ${timeoutTime - startTime}ms (limit: ${timeout}ms)`);
378 | req.destroy();
379 | reject(new Error(`Request timeout after ${timeout}ms`));
380 | });
381 |
382 | console.error(`[${requestId}] Sending request...`);
383 |
384 | if (data) {
385 | const postData = JSON.stringify(data);
386 | console.error(`[${requestId}] Writing request body: ${postData}`);
387 | req.write(postData);
388 | }
389 |
390 | req.end();
391 | console.error(`[${requestId}] Request sent, waiting for response...`);
392 | });
393 | }
394 |
395 | /**
396 | * Handle MCP store_memory operation
397 | */
398 | async storeMemory(params) {
399 | try {
400 | const response = await this.makeRequest('memories', 'POST', {
401 | content: params.content,
402 | tags: params.metadata?.tags || [],
403 | memory_type: params.metadata?.type || 'note',
404 | metadata: params.metadata || {}
405 | });
406 |
407 | if (response.statusCode === 200 || response.statusCode === 201) {
408 | // Server returns 200 with success field indicating actual result
409 | if (response.data.success) {
410 | return { success: true, message: response.data.message || 'Memory stored successfully' };
411 | } else {
412 | return { success: false, message: response.data.message || response.data.detail || 'Failed to store memory' };
413 | }
414 | } else {
415 | return { success: false, message: response.data.detail || 'Failed to store memory' };
416 | }
417 | } catch (error) {
418 | return { success: false, message: error.message };
419 | }
420 | }
421 |
422 | /**
423 | * Handle MCP retrieve_memory operation
424 | */
425 | async retrieveMemory(params) {
426 | try {
427 | const queryParams = new URLSearchParams({
428 | q: params.query,
429 | n_results: params.n_results || 5
430 | });
431 |
432 | const response = await this.makeRequest(`search?${queryParams}`, 'GET');
433 |
434 | if (response.statusCode === 200) {
435 | return {
436 | memories: response.data.results.map(result => ({
437 | content: result.memory.content,
438 | metadata: {
439 | tags: result.memory.tags,
440 | type: result.memory.memory_type,
441 | created_at: result.memory.created_at_iso,
442 | relevance_score: result.relevance_score
443 | }
444 | }))
445 | };
446 | } else {
447 | return { memories: [] };
448 | }
449 | } catch (error) {
450 | return { memories: [] };
451 | }
452 | }
453 |
454 | /**
455 | * Handle MCP search_by_tag operation
456 | */
457 | async searchByTag(params) {
458 | try {
459 | const queryParams = new URLSearchParams();
460 | if (Array.isArray(params.tags)) {
461 | params.tags.forEach(tag => queryParams.append('tags', tag));
462 | } else if (typeof params.tags === 'string') {
463 | queryParams.append('tags', params.tags);
464 | }
465 |
466 | const response = await this.makeRequest(`memories/search/tags?${queryParams}`, 'GET');
467 |
468 | if (response.statusCode === 200) {
469 | return {
470 | memories: response.data.memories.map(memory => ({
471 | content: memory.content,
472 | metadata: {
473 | tags: memory.tags,
474 | type: memory.memory_type,
475 | created_at: memory.created_at_iso
476 | }
477 | }))
478 | };
479 | } else {
480 | return { memories: [] };
481 | }
482 | } catch (error) {
483 | return { memories: [] };
484 | }
485 | }
486 |
487 | /**
488 | * Handle MCP delete_memory operation
489 | */
490 | async deleteMemory(params) {
491 | try {
492 | const response = await this.makeRequest(`memories/${params.content_hash}`, 'DELETE');
493 |
494 | if (response.statusCode === 200) {
495 | return { success: true, message: 'Memory deleted successfully' };
496 | } else {
497 | return { success: false, message: response.data.detail || 'Failed to delete memory' };
498 | }
499 | } catch (error) {
500 | return { success: false, message: error.message };
501 | }
502 | }
503 |
504 | /**
505 | * Handle MCP check_database_health operation
506 | */
507 | async checkHealth(params = {}) {
508 | try {
509 | const response = await this.makeRequest('health', 'GET');
510 |
511 | if (response.statusCode === 200) {
512 | return {
513 | status: response.data.status,
514 | backend: response.data.storage_type,
515 | statistics: response.data.statistics || {}
516 | };
517 | } else {
518 | return { status: 'unhealthy', backend: 'unknown', statistics: {} };
519 | }
520 | } catch (error) {
521 | // Handle errors that may not have a message property (like ECONNREFUSED)
522 | const errorMessage = error.message || error.code || error.toString() || 'Unknown error';
523 | return { status: 'error', backend: 'unknown', statistics: {}, error: errorMessage };
524 | }
525 | }
526 |
527 | /**
528 | * Process MCP JSON-RPC request
529 | */
530 | async processRequest(request) {
531 | const { method, params, id } = request;
532 |
533 | let result;
534 | try {
535 | switch (method) {
536 | case 'initialize':
537 | result = {
538 | protocolVersion: "2024-11-05",
539 | capabilities: {
540 | tools: {
541 | listChanged: false
542 | }
543 | },
544 | serverInfo: {
545 | name: "mcp-memory-service",
546 | version: "2.0.0"
547 | }
548 | };
549 | break;
550 | case 'notifications/initialized':
551 | // No response needed for notifications
552 | return null;
553 | case 'tools/list':
554 | result = {
555 | tools: [
556 | {
557 | name: "store_memory",
558 | description: "Store a memory with content and optional metadata",
559 | inputSchema: {
560 | type: "object",
561 | properties: {
562 | content: { type: "string", description: "The content to store" },
563 | metadata: {
564 | type: "object",
565 | properties: {
566 | tags: { type: "array", items: { type: "string" } },
567 | type: { type: "string" }
568 | }
569 | }
570 | },
571 | required: ["content"]
572 | }
573 | },
574 | {
575 | name: "retrieve_memory",
576 | description: "Retrieve memories based on a query",
577 | inputSchema: {
578 | type: "object",
579 | properties: {
580 | query: { type: "string", description: "Search query" },
581 | n_results: { type: "integer", description: "Number of results to return" }
582 | },
583 | required: ["query"]
584 | }
585 | },
586 | {
587 | name: "search_by_tag",
588 | description: "Search memories by tags",
589 | inputSchema: {
590 | type: "object",
591 | properties: {
592 | tags: {
593 | oneOf: [
594 | { type: "string" },
595 | { type: "array", items: { type: "string" } }
596 | ]
597 | }
598 | },
599 | required: ["tags"]
600 | }
601 | },
602 | {
603 | name: "delete_memory",
604 | description: "Delete a memory by content hash",
605 | inputSchema: {
606 | type: "object",
607 | properties: {
608 | content_hash: { type: "string", description: "Hash of the content to delete" }
609 | },
610 | required: ["content_hash"]
611 | }
612 | },
613 | {
614 | name: "check_database_health",
615 | description: "Check the health of the memory database",
616 | inputSchema: {
617 | type: "object",
618 | properties: {}
619 | }
620 | }
621 | ]
622 | };
623 | break;
624 | case 'tools/call':
625 | const toolName = params.name;
626 | const toolParams = params.arguments || {};
627 |
628 | console.error(`Processing tool call: ${toolName} with params:`, JSON.stringify(toolParams));
629 |
630 | let toolResult;
631 | switch (toolName) {
632 | case 'store_memory':
633 | toolResult = await this.storeMemory(toolParams);
634 | break;
635 | case 'retrieve_memory':
636 | toolResult = await this.retrieveMemory(toolParams);
637 | break;
638 | case 'search_by_tag':
639 | toolResult = await this.searchByTag(toolParams);
640 | break;
641 | case 'delete_memory':
642 | toolResult = await this.deleteMemory(toolParams);
643 | break;
644 | case 'check_database_health':
645 | toolResult = await this.checkHealth(toolParams);
646 | break;
647 | default:
648 | throw new Error(`Unknown tool: ${toolName}`);
649 | }
650 |
651 | console.error(`Tool result:`, JSON.stringify(toolResult));
652 |
653 | return {
654 | jsonrpc: "2.0",
655 | id: id,
656 | result: {
657 | content: [
658 | {
659 | type: "text",
660 | text: JSON.stringify(toolResult, null, 2)
661 | }
662 | ]
663 | }
664 | };
665 | case 'store_memory':
666 | result = await this.storeMemory(params);
667 | break;
668 | case 'retrieve_memory':
669 | result = await this.retrieveMemory(params);
670 | break;
671 | case 'search_by_tag':
672 | result = await this.searchByTag(params);
673 | break;
674 | case 'delete_memory':
675 | result = await this.deleteMemory(params);
676 | break;
677 | case 'check_database_health':
678 | result = await this.checkHealth(params);
679 | break;
680 | default:
681 | throw new Error(`Unknown method: ${method}`);
682 | }
683 |
684 | return {
685 | jsonrpc: "2.0",
686 | id: id,
687 | result: result
688 | };
689 | } catch (error) {
690 | return {
691 | jsonrpc: "2.0",
692 | id: id,
693 | error: {
694 | code: -32000,
695 | message: error.message
696 | }
697 | };
698 | }
699 | }
700 |
701 | /**
702 | * Start the bridge server
703 | */
704 | async start() {
705 | console.error(`MCP HTTP Bridge starting...`);
706 |
707 | // Initialize the bridge (discovery or manual config)
708 | const initialized = await this.initialize();
709 | if (!initialized) {
710 | console.error('Failed to initialize bridge - no endpoint available');
711 | process.exit(1);
712 | }
713 |
714 | console.error(`Endpoint: ${this.endpoint}`);
715 | console.error(`API Key: ${this.apiKey ? '[SET]' : '[NOT SET]'}`);
716 | console.error(`Auto-discovery: ${this.autoDiscover ? 'ENABLED' : 'DISABLED'}`);
717 | console.error(`Prefer HTTPS: ${this.preferHttps ? 'YES' : 'NO'}`);
718 |
719 | if (this.discoveredEndpoint) {
720 | console.error(`Service discovered automatically via mDNS`);
721 | }
722 |
723 | let buffer = '';
724 |
725 | process.stdin.on('data', async (chunk) => {
726 | buffer += chunk.toString();
727 |
728 | // Process complete JSON-RPC messages
729 | let newlineIndex;
730 | while ((newlineIndex = buffer.indexOf('\n')) !== -1) {
731 | const line = buffer.slice(0, newlineIndex).trim();
732 | buffer = buffer.slice(newlineIndex + 1);
733 |
734 | if (line) {
735 | try {
736 | const request = JSON.parse(line);
737 | const response = await this.processRequest(request);
738 | console.log(JSON.stringify(response));
739 | } catch (error) {
740 | console.error(`Error processing request: ${error.message}`);
741 | console.log(JSON.stringify({
742 | jsonrpc: "2.0",
743 | id: null,
744 | error: {
745 | code: -32700,
746 | message: "Parse error"
747 | }
748 | }));
749 | }
750 | }
751 | }
752 | });
753 |
754 | process.stdin.on('end', () => {
755 | process.exit(0);
756 | });
757 |
758 | // Handle graceful shutdown
759 | process.on('SIGINT', () => {
760 | console.error('Shutting down HTTP Bridge...');
761 | process.exit(0);
762 | });
763 |
764 | process.on('SIGTERM', () => {
765 | console.error('Shutting down HTTP Bridge...');
766 | process.exit(0);
767 | });
768 | }
769 | }
770 |
771 | // Start the bridge if this file is run directly
772 | if (require.main === module) {
773 | const bridge = new HTTPMCPBridge();
774 | bridge.start().catch(error => {
775 | console.error(`Failed to start bridge: ${error.message}`);
776 | process.exit(1);
777 | });
778 | }
779 |
780 | module.exports = HTTPMCPBridge;
```
--------------------------------------------------------------------------------
/docs/examples/analysis-scripts.js:
--------------------------------------------------------------------------------
```javascript
1 | /**
2 | * Memory Analysis Scripts
3 | *
4 | * A collection of JavaScript functions for analyzing and extracting insights
5 | * from MCP Memory Service data. These scripts demonstrate practical approaches
6 | * to memory data analysis, pattern recognition, and visualization preparation.
7 | *
8 | * Usage: Import individual functions or use as reference for building
9 | * custom analysis pipelines.
10 | */
11 |
12 | // =============================================================================
13 | // TEMPORAL ANALYSIS FUNCTIONS
14 | // =============================================================================
15 |
16 | /**
17 | * Analyze memory distribution over time periods
18 | * @param {Array} memories - Array of memory objects with timestamps
19 | * @returns {Object} Distribution data organized by time periods
20 | */
21 | function analyzeTemporalDistribution(memories) {
22 | const distribution = {
23 | monthly: {},
24 | weekly: {},
25 | daily: {},
26 | hourly: {}
27 | };
28 |
29 | memories.forEach(memory => {
30 | const date = new Date(memory.timestamp);
31 |
32 | // Monthly distribution
33 | const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
34 | if (!distribution.monthly[monthKey]) {
35 | distribution.monthly[monthKey] = [];
36 | }
37 | distribution.monthly[monthKey].push(memory);
38 |
39 | // Weekly distribution (week of year)
40 | const weekKey = `${date.getFullYear()}-W${getWeekNumber(date)}`;
41 | if (!distribution.weekly[weekKey]) {
42 | distribution.weekly[weekKey] = [];
43 | }
44 | distribution.weekly[weekKey].push(memory);
45 |
46 | // Daily distribution (day of week)
47 | const dayKey = date.toLocaleDateString('en-US', { weekday: 'long' });
48 | if (!distribution.daily[dayKey]) {
49 | distribution.daily[dayKey] = [];
50 | }
51 | distribution.daily[dayKey].push(memory);
52 |
53 | // Hourly distribution
54 | const hourKey = date.getHours();
55 | if (!distribution.hourly[hourKey]) {
56 | distribution.hourly[hourKey] = [];
57 | }
58 | distribution.hourly[hourKey].push(memory);
59 | });
60 |
61 | return distribution;
62 | }
63 |
64 | /**
65 | * Calculate week number for a given date
66 | * @param {Date} date - Date object
67 | * @returns {number} Week number
68 | */
69 | function getWeekNumber(date) {
70 | const firstDayOfYear = new Date(date.getFullYear(), 0, 1);
71 | const pastDaysOfYear = (date - firstDayOfYear) / 86400000;
72 | return Math.ceil((pastDaysOfYear + firstDayOfYear.getDay() + 1) / 7);
73 | }
74 |
75 | /**
76 | * Prepare temporal data for chart visualization
77 | * @param {Object} distribution - Distribution object from analyzeTemporalDistribution
78 | * @param {string} period - Time period ('monthly', 'weekly', 'daily', 'hourly')
79 | * @returns {Array} Chart-ready data array
80 | */
81 | function prepareTemporalChartData(distribution, period = 'monthly') {
82 | const data = distribution[period];
83 |
84 | const chartData = Object.entries(data)
85 | .map(([key, memories]) => ({
86 | period: formatPeriodLabel(key, period),
87 | count: memories.length,
88 | memories: memories,
89 | key: key
90 | }))
91 | .sort((a, b) => a.key.localeCompare(b.key));
92 |
93 | return chartData;
94 | }
95 |
96 | /**
97 | * Format period labels for display
98 | * @param {string} key - Period key
99 | * @param {string} period - Period type
100 | * @returns {string} Formatted label
101 | */
102 | function formatPeriodLabel(key, period) {
103 | switch (period) {
104 | case 'monthly':
105 | const [year, month] = key.split('-');
106 | const monthNames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
107 | 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
108 | return `${monthNames[parseInt(month) - 1]} ${year}`;
109 |
110 | case 'weekly':
111 | return key; // Already formatted as YYYY-WXX
112 |
113 | case 'daily':
114 | return key; // Day names are already formatted
115 |
116 | case 'hourly':
117 | const hour = parseInt(key);
118 | return `${hour}:00`;
119 |
120 | default:
121 | return key;
122 | }
123 | }
124 |
125 | // =============================================================================
126 | // TAG ANALYSIS FUNCTIONS
127 | // =============================================================================
128 |
129 | /**
130 | * Analyze tag usage frequency and patterns
131 | * @param {Array} memories - Array of memory objects
132 | * @returns {Object} Tag analysis results
133 | */
134 | function analyzeTagUsage(memories) {
135 | const tagFrequency = {};
136 | const tagCombinations = {};
137 | const categoryDistribution = {};
138 |
139 | memories.forEach(memory => {
140 | const tags = memory.tags || [];
141 |
142 | // Tag frequency analysis
143 | tags.forEach(tag => {
144 | tagFrequency[tag] = (tagFrequency[tag] || 0) + 1;
145 |
146 | // Categorize tags
147 | const category = categorizeTag(tag);
148 | if (!categoryDistribution[category]) {
149 | categoryDistribution[category] = {};
150 | }
151 | categoryDistribution[category][tag] = (categoryDistribution[category][tag] || 0) + 1;
152 | });
153 |
154 | // Tag combination analysis
155 | if (tags.length > 1) {
156 | for (let i = 0; i < tags.length; i++) {
157 | for (let j = i + 1; j < tags.length; j++) {
158 | const combo = [tags[i], tags[j]].sort().join(' + ');
159 | tagCombinations[combo] = (tagCombinations[combo] || 0) + 1;
160 | }
161 | }
162 | }
163 | });
164 |
165 | return {
166 | frequency: Object.entries(tagFrequency)
167 | .sort(([,a], [,b]) => b - a),
168 | combinations: Object.entries(tagCombinations)
169 | .sort(([,a], [,b]) => b - a)
170 | .slice(0, 20), // Top 20 combinations
171 | categories: categoryDistribution,
172 | totalTags: Object.keys(tagFrequency).length,
173 | averageTagsPerMemory: memories.reduce((sum, m) => sum + (m.tags?.length || 0), 0) / memories.length
174 | };
175 | }
176 |
177 | /**
178 | * Categorize a tag based on common patterns
179 | * @param {string} tag - Tag to categorize
180 | * @returns {string} Category name
181 | */
182 | function categorizeTag(tag) {
183 | const patterns = {
184 | 'projects': /^(mcp-memory-service|memory-dashboard|github-integration|mcp-protocol)/,
185 | 'technologies': /^(python|react|typescript|chromadb|git|docker|aws|npm)/,
186 | 'activities': /^(testing|debugging|development|documentation|deployment|maintenance)/,
187 | 'status': /^(resolved|in-progress|blocked|verified|completed|experimental)/,
188 | 'content-types': /^(concept|architecture|tutorial|reference|example|guide)/,
189 | 'temporal': /^(january|february|march|april|may|june|q1|q2|2025)/,
190 | 'priorities': /^(urgent|high-priority|low-priority|critical)/
191 | };
192 |
193 | for (const [category, pattern] of Object.entries(patterns)) {
194 | if (pattern.test(tag)) {
195 | return category;
196 | }
197 | }
198 |
199 | return 'other';
200 | }
201 |
202 | /**
203 | * Find tag inconsistencies and suggest improvements
204 | * @param {Array} memories - Array of memory objects
205 | * @returns {Object} Consistency analysis results
206 | */
207 | function analyzeTagConsistency(memories) {
208 | const inconsistencies = [];
209 | const suggestions = [];
210 | const patterns = {};
211 |
212 | memories.forEach((memory, index) => {
213 | const content = memory.content || '';
214 | const tags = memory.tags || [];
215 |
216 | // Common content patterns that should have corresponding tags
217 | const contentPatterns = {
218 | 'test': /\b(test|testing|TEST)\b/i,
219 | 'bug': /\b(bug|issue|error|problem)\b/i,
220 | 'debug': /\b(debug|debugging|fix|fixed)\b/i,
221 | 'documentation': /\b(document|guide|tutorial|readme)\b/i,
222 | 'concept': /\b(concept|idea|design|architecture)\b/i,
223 | 'implementation': /\b(implement|implementation|develop|development)\b/i
224 | };
225 |
226 | Object.entries(contentPatterns).forEach(([expectedTag, pattern]) => {
227 | if (pattern.test(content)) {
228 | const hasRelatedTag = tags.some(tag =>
229 | tag.includes(expectedTag) ||
230 | expectedTag.includes(tag.split('-')[0])
231 | );
232 |
233 | if (!hasRelatedTag) {
234 | inconsistencies.push({
235 | memoryIndex: index,
236 | type: 'missing-tag',
237 | expectedTag: expectedTag,
238 | content: content.substring(0, 100) + '...',
239 | currentTags: tags
240 | });
241 | }
242 | }
243 | });
244 |
245 | // Check for overly generic tags
246 | const genericTags = ['test', 'memory', 'note', 'temp', 'example'];
247 | const hasGenericOnly = tags.length > 0 &&
248 | tags.every(tag => genericTags.includes(tag));
249 |
250 | if (hasGenericOnly) {
251 | suggestions.push({
252 | memoryIndex: index,
253 | type: 'improve-specificity',
254 | suggestion: 'Replace generic tags with specific categories',
255 | currentTags: tags,
256 | content: content.substring(0, 100) + '...'
257 | });
258 | }
259 | });
260 |
261 | return {
262 | inconsistencies,
263 | suggestions,
264 | consistencyScore: ((memories.length - inconsistencies.length) / memories.length) * 100,
265 | totalIssues: inconsistencies.length + suggestions.length
266 | };
267 | }
268 |
269 | // =============================================================================
270 | // CONTENT ANALYSIS FUNCTIONS
271 | // =============================================================================
272 |
273 | /**
274 | * Analyze content patterns and themes
275 | * @param {Array} memories - Array of memory objects
276 | * @returns {Object} Content analysis results
277 | */
278 | function analyzeContentPatterns(memories) {
279 | const themes = {};
280 | const contentTypes = {};
281 | const wordFrequency = {};
282 | const lengthDistribution = {};
283 |
284 | memories.forEach(memory => {
285 | const content = memory.content || '';
286 | const words = extractKeywords(content);
287 | const contentType = detectContentType(content);
288 |
289 | // Theme analysis based on keywords
290 | words.forEach(word => {
291 | wordFrequency[word] = (wordFrequency[word] || 0) + 1;
292 | });
293 |
294 | // Content type distribution
295 | contentTypes[contentType] = (contentTypes[contentType] || 0) + 1;
296 |
297 | // Length distribution
298 | const lengthCategory = categorizeContentLength(content.length);
299 | lengthDistribution[lengthCategory] = (lengthDistribution[lengthCategory] || 0) + 1;
300 | });
301 |
302 | // Extract top themes from word frequency
303 | const topWords = Object.entries(wordFrequency)
304 | .sort(([,a], [,b]) => b - a)
305 | .slice(0, 50);
306 |
307 | return {
308 | themes: extractThemes(topWords),
309 | contentTypes,
310 | lengthDistribution,
311 | wordFrequency: topWords,
312 | averageLength: memories.reduce((sum, m) => sum + (m.content?.length || 0), 0) / memories.length
313 | };
314 | }
315 |
316 | /**
317 | * Extract keywords from content
318 | * @param {string} content - Memory content
319 | * @returns {Array} Array of keywords
320 | */
321 | function extractKeywords(content) {
322 | const stopWords = new Set([
323 | 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
324 | 'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after',
325 | 'above', 'below', 'between', 'among', 'is', 'are', 'was', 'were', 'be', 'been',
326 | 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should',
327 | 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those'
328 | ]);
329 |
330 | return content
331 | .toLowerCase()
332 | .replace(/[^\w\s-]/g, ' ') // Remove punctuation except hyphens
333 | .split(/\s+/)
334 | .filter(word =>
335 | word.length > 2 &&
336 | !stopWords.has(word) &&
337 | !word.match(/^\d+$/) // Exclude pure numbers
338 | );
339 | }
340 |
341 | /**
342 | * Detect content type based on patterns
343 | * @param {string} content - Memory content
344 | * @returns {string} Content type
345 | */
346 | function detectContentType(content) {
347 | const patterns = {
348 | 'code': /```|function\s*\(|class\s+\w+|import\s+\w+/,
349 | 'documentation': /^#+\s|README|GUIDE|TUTORIAL/i,
350 | 'issue': /issue|bug|error|problem|fix|resolved/i,
351 | 'concept': /concept|idea|design|architecture|approach/i,
352 | 'test': /test|testing|verify|validation|TEST/i,
353 | 'configuration': /config|setup|installation|environment/i,
354 | 'analysis': /analysis|report|summary|statistics|metrics/i
355 | };
356 |
357 | for (const [type, pattern] of Object.entries(patterns)) {
358 | if (pattern.test(content)) {
359 | return type;
360 | }
361 | }
362 |
363 | return 'general';
364 | }
365 |
366 | /**
367 | * Categorize content by length
368 | * @param {number} length - Content length in characters
369 | * @returns {string} Length category
370 | */
371 | function categorizeContentLength(length) {
372 | if (length < 100) return 'very-short';
373 | if (length < 500) return 'short';
374 | if (length < 1500) return 'medium';
375 | if (length < 3000) return 'long';
376 | return 'very-long';
377 | }
378 |
379 | /**
380 | * Extract themes from word frequency data
381 | * @param {Array} topWords - Array of [word, frequency] pairs
382 | * @returns {Object} Organized themes
383 | */
384 | function extractThemes(topWords) {
385 | const themeCategories = {
386 | technology: ['python', 'react', 'typescript', 'chromadb', 'git', 'docker', 'api', 'database'],
387 | development: ['development', 'implementation', 'code', 'programming', 'build', 'deploy'],
388 | testing: ['test', 'testing', 'debug', 'debugging', 'verification', 'quality'],
389 | project: ['project', 'service', 'system', 'application', 'platform', 'tool'],
390 | process: ['process', 'workflow', 'methodology', 'procedure', 'approach', 'strategy']
391 | };
392 |
393 | const themes = {};
394 | const wordMap = new Map(topWords);
395 |
396 | Object.entries(themeCategories).forEach(([theme, keywords]) => {
397 | themes[theme] = keywords
398 | .filter(keyword => wordMap.has(keyword))
399 | .map(keyword => ({ word: keyword, frequency: wordMap.get(keyword) }))
400 | .sort((a, b) => b.frequency - a.frequency);
401 | });
402 |
403 | return themes;
404 | }
405 |
406 | // =============================================================================
407 | // QUALITY ANALYSIS FUNCTIONS
408 | // =============================================================================
409 |
410 | /**
411 | * Assess overall memory quality and organization
412 | * @param {Array} memories - Array of memory objects
413 | * @returns {Object} Quality assessment results
414 | */
415 | function assessMemoryQuality(memories) {
416 | const metrics = {
417 | tagging: assessTaggingQuality(memories),
418 | content: assessContentQuality(memories),
419 | organization: assessOrganizationQuality(memories),
420 | searchability: assessSearchabilityQuality(memories)
421 | };
422 |
423 | // Calculate overall quality score
424 | const overallScore = Object.values(metrics)
425 | .reduce((sum, metric) => sum + metric.score, 0) / Object.keys(metrics).length;
426 |
427 | return {
428 | overallScore: Math.round(overallScore),
429 | metrics,
430 | recommendations: generateQualityRecommendations(metrics),
431 | totalMemories: memories.length
432 | };
433 | }
434 |
435 | /**
436 | * Assess tagging quality
437 | * @param {Array} memories - Array of memory objects
438 | * @returns {Object} Tagging quality assessment
439 | */
440 | function assessTaggingQuality(memories) {
441 | let taggedCount = 0;
442 | let wellTaggedCount = 0;
443 | let totalTags = 0;
444 |
445 | memories.forEach(memory => {
446 | const tags = memory.tags || [];
447 | totalTags += tags.length;
448 |
449 | if (tags.length > 0) {
450 | taggedCount++;
451 |
452 | // Well-tagged: has 3+ tags from different categories
453 | if (tags.length >= 3) {
454 | const categories = new Set(tags.map(tag => categorizeTag(tag)));
455 | if (categories.size >= 2) {
456 | wellTaggedCount++;
457 | }
458 | }
459 | }
460 | });
461 |
462 | const taggedPercentage = (taggedCount / memories.length) * 100;
463 | const wellTaggedPercentage = (wellTaggedCount / memories.length) * 100;
464 | const averageTagsPerMemory = totalTags / memories.length;
465 |
466 | let score = 0;
467 | if (taggedPercentage >= 90) score += 40;
468 | else if (taggedPercentage >= 70) score += 30;
469 | else if (taggedPercentage >= 50) score += 20;
470 |
471 | if (wellTaggedPercentage >= 70) score += 30;
472 | else if (wellTaggedPercentage >= 50) score += 20;
473 | else if (wellTaggedPercentage >= 30) score += 10;
474 |
475 | if (averageTagsPerMemory >= 4) score += 30;
476 | else if (averageTagsPerMemory >= 3) score += 20;
477 | else if (averageTagsPerMemory >= 2) score += 10;
478 |
479 | return {
480 | score,
481 | taggedPercentage: Math.round(taggedPercentage),
482 | wellTaggedPercentage: Math.round(wellTaggedPercentage),
483 | averageTagsPerMemory: Math.round(averageTagsPerMemory * 10) / 10,
484 | issues: {
485 | untagged: memories.length - taggedCount,
486 | poorlyTagged: taggedCount - wellTaggedCount
487 | }
488 | };
489 | }
490 |
491 | /**
492 | * Assess content quality
493 | * @param {Array} memories - Array of memory objects
494 | * @returns {Object} Content quality assessment
495 | */
496 | function assessContentQuality(memories) {
497 | let substantialContent = 0;
498 | let hasDescription = 0;
499 | let totalLength = 0;
500 |
501 | memories.forEach(memory => {
502 | const content = memory.content || '';
503 | totalLength += content.length;
504 |
505 | if (content.length >= 50) {
506 | substantialContent++;
507 | }
508 |
509 | if (content.length >= 200) {
510 | hasDescription++;
511 | }
512 | });
513 |
514 | const substantialPercentage = (substantialContent / memories.length) * 100;
515 | const descriptivePercentage = (hasDescription / memories.length) * 100;
516 | const averageLength = totalLength / memories.length;
517 |
518 | let score = 0;
519 | if (substantialPercentage >= 90) score += 50;
520 | else if (substantialPercentage >= 70) score += 35;
521 | else if (substantialPercentage >= 50) score += 20;
522 |
523 | if (descriptivePercentage >= 60) score += 30;
524 | else if (descriptivePercentage >= 40) score += 20;
525 | else if (descriptivePercentage >= 20) score += 10;
526 |
527 | if (averageLength >= 300) score += 20;
528 | else if (averageLength >= 150) score += 10;
529 |
530 | return {
531 | score,
532 | substantialPercentage: Math.round(substantialPercentage),
533 | descriptivePercentage: Math.round(descriptivePercentage),
534 | averageLength: Math.round(averageLength),
535 | issues: {
536 | tooShort: memories.length - substantialContent,
537 | lackingDescription: memories.length - hasDescription
538 | }
539 | };
540 | }
541 |
542 | /**
543 | * Assess organization quality
544 | * @param {Array} memories - Array of memory objects
545 | * @returns {Object} Organization quality assessment
546 | */
547 | function assessOrganizationQuality(memories) {
548 | const tagAnalysis = analyzeTagUsage(memories);
549 | const categories = Object.keys(tagAnalysis.categories);
550 | const topTags = tagAnalysis.frequency.slice(0, 10);
551 |
552 | // Check for balanced tag distribution
553 | const tagDistribution = tagAnalysis.frequency.map(([, count]) => count);
554 | const maxUsage = Math.max(...tagDistribution);
555 | const minUsage = Math.min(...tagDistribution);
556 | const distributionBalance = minUsage / maxUsage;
557 |
558 | let score = 0;
559 |
560 | // Category diversity
561 | if (categories.length >= 5) score += 30;
562 | else if (categories.length >= 3) score += 20;
563 | else if (categories.length >= 2) score += 10;
564 |
565 | // Tag usage balance
566 | if (distributionBalance >= 0.3) score += 25;
567 | else if (distributionBalance >= 0.2) score += 15;
568 | else if (distributionBalance >= 0.1) score += 5;
569 |
570 | // Consistent tag combinations
571 | if (tagAnalysis.combinations.length >= 10) score += 25;
572 | else if (tagAnalysis.combinations.length >= 5) score += 15;
573 |
574 | // Avoid over-concentration
575 | const topTagUsagePercentage = (topTags[0]?.[1] || 0) / memories.length * 100;
576 | if (topTagUsagePercentage <= 30) score += 20;
577 | else if (topTagUsagePercentage <= 40) score += 10;
578 |
579 | return {
580 | score,
581 | categoryCount: categories.length,
582 | tagDistributionBalance: Math.round(distributionBalance * 100),
583 | topTagUsagePercentage: Math.round(topTagUsagePercentage),
584 | consistentCombinations: tagAnalysis.combinations.length,
585 | issues: {
586 | fewCategories: categories.length < 3,
587 | imbalancedDistribution: distributionBalance < 0.2,
588 | overConcentration: topTagUsagePercentage > 40
589 | }
590 | };
591 | }
592 |
593 | /**
594 | * Assess searchability quality
595 | * @param {Array} memories - Array of memory objects
596 | * @returns {Object} Searchability quality assessment
597 | */
598 | function assessSearchabilityQuality(memories) {
599 | const contentAnalysis = analyzeContentPatterns(memories);
600 | const tagAnalysis = analyzeTagUsage(memories);
601 |
602 | // Calculate searchability metrics
603 | const keywordDiversity = Object.keys(contentAnalysis.wordFrequency).length;
604 | const tagDiversity = tagAnalysis.totalTags;
605 | const averageTagsPerMemory = tagAnalysis.averageTagsPerMemory;
606 |
607 | let score = 0;
608 |
609 | // Keyword diversity
610 | if (keywordDiversity >= 100) score += 25;
611 | else if (keywordDiversity >= 50) score += 15;
612 | else if (keywordDiversity >= 25) score += 5;
613 |
614 | // Tag diversity
615 | if (tagDiversity >= 50) score += 25;
616 | else if (tagDiversity >= 30) score += 15;
617 | else if (tagDiversity >= 15) score += 5;
618 |
619 | // Tag coverage
620 | if (averageTagsPerMemory >= 4) score += 25;
621 | else if (averageTagsPerMemory >= 3) score += 15;
622 | else if (averageTagsPerMemory >= 2) score += 5;
623 |
624 | // Content type diversity
625 | const contentTypes = Object.keys(contentAnalysis.contentTypes).length;
626 | if (contentTypes >= 5) score += 25;
627 | else if (contentTypes >= 3) score += 15;
628 | else if (contentTypes >= 2) score += 5;
629 |
630 | return {
631 | score,
632 | keywordDiversity,
633 | tagDiversity,
634 | averageTagsPerMemory: Math.round(averageTagsPerMemory * 10) / 10,
635 | contentTypeDiversity: contentTypes,
636 | issues: {
637 | lowKeywordDiversity: keywordDiversity < 25,
638 | lowTagDiversity: tagDiversity < 15,
639 | poorTagCoverage: averageTagsPerMemory < 2
640 | }
641 | };
642 | }
643 |
644 | /**
645 | * Generate quality improvement recommendations
646 | * @param {Object} metrics - Quality metrics object
647 | * @returns {Array} Array of recommendations
648 | */
649 | function generateQualityRecommendations(metrics) {
650 | const recommendations = [];
651 |
652 | // Tagging recommendations
653 | if (metrics.tagging.taggedPercentage < 90) {
654 | recommendations.push({
655 | category: 'tagging',
656 | priority: 'high',
657 | issue: `${metrics.tagging.issues.untagged} memories are untagged`,
658 | action: 'Run memory maintenance session to tag untagged memories',
659 | expectedImprovement: 'Improve searchability and organization'
660 | });
661 | }
662 |
663 | if (metrics.tagging.averageTagsPerMemory < 3) {
664 | recommendations.push({
665 | category: 'tagging',
666 | priority: 'medium',
667 | issue: 'Low average tags per memory',
668 | action: 'Add more specific and categorical tags to existing memories',
669 | expectedImprovement: 'Better categorization and discoverability'
670 | });
671 | }
672 |
673 | // Content recommendations
674 | if (metrics.content.substantialPercentage < 80) {
675 | recommendations.push({
676 | category: 'content',
677 | priority: 'medium',
678 | issue: `${metrics.content.issues.tooShort} memories have minimal content`,
679 | action: 'Expand brief memories with more context and details',
680 | expectedImprovement: 'Increased information value and searchability'
681 | });
682 | }
683 |
684 | // Organization recommendations
685 | if (metrics.organization.categoryCount < 3) {
686 | recommendations.push({
687 | category: 'organization',
688 | priority: 'high',
689 | issue: 'Limited tag category diversity',
690 | action: 'Implement standardized tag schema with multiple categories',
691 | expectedImprovement: 'Better knowledge organization structure'
692 | });
693 | }
694 |
695 | if (metrics.organization.tagDistributionBalance < 20) {
696 | recommendations.push({
697 | category: 'organization',
698 | priority: 'medium',
699 | issue: 'Imbalanced tag usage distribution',
700 | action: 'Review and balance tag usage across content types',
701 | expectedImprovement: 'More consistent knowledge organization'
702 | });
703 | }
704 |
705 | // Searchability recommendations
706 | if (metrics.searchability.tagDiversity < 30) {
707 | recommendations.push({
708 | category: 'searchability',
709 | priority: 'medium',
710 | issue: 'Limited tag vocabulary',
711 | action: 'Expand tag vocabulary with more specific and varied tags',
712 | expectedImprovement: 'Enhanced search precision and recall'
713 | });
714 | }
715 |
716 | return recommendations.sort((a, b) => {
717 | const priorityOrder = { 'high': 3, 'medium': 2, 'low': 1 };
718 | return priorityOrder[b.priority] - priorityOrder[a.priority];
719 | });
720 | }
721 |
722 | // =============================================================================
723 | // VISUALIZATION DATA PREPARATION
724 | // =============================================================================
725 |
726 | /**
727 | * Prepare comprehensive data package for visualizations
728 | * @param {Array} memories - Array of memory objects
729 | * @returns {Object} Complete visualization data package
730 | */
731 | function prepareVisualizationData(memories) {
732 | const temporal = analyzeTemporalDistribution(memories);
733 | const tags = analyzeTagUsage(memories);
734 | const content = analyzeContentPatterns(memories);
735 | const quality = assessMemoryQuality(memories);
736 |
737 | return {
738 | metadata: {
739 | totalMemories: memories.length,
740 | analysisDate: new Date().toISOString(),
741 | dataVersion: '1.0'
742 | },
743 |
744 | // Chart data for different visualizations
745 | charts: {
746 | temporalDistribution: prepareTemporalChartData(temporal, 'monthly'),
747 | weeklyPattern: prepareTemporalChartData(temporal, 'weekly'),
748 | dailyPattern: prepareTemporalChartData(temporal, 'daily'),
749 | hourlyPattern: prepareTemporalChartData(temporal, 'hourly'),
750 |
751 | tagFrequency: tags.frequency.slice(0, 20).map(([tag, count]) => ({
752 | tag,
753 | count,
754 | category: categorizeTag(tag)
755 | })),
756 |
757 | tagCombinations: tags.combinations.slice(0, 10).map(([combo, count]) => ({
758 | combination: combo,
759 | count,
760 | tags: combo.split(' + ')
761 | })),
762 |
763 | contentTypes: Object.entries(content.contentTypes).map(([type, count]) => ({
764 | type,
765 | count,
766 | percentage: Math.round((count / memories.length) * 100)
767 | })),
768 |
769 | contentLengths: Object.entries(content.lengthDistribution).map(([category, count]) => ({
770 | category,
771 | count,
772 | percentage: Math.round((count / memories.length) * 100)
773 | }))
774 | },
775 |
776 | // Summary statistics
777 | statistics: {
778 | temporal: {
779 | peakMonth: findPeakPeriod(temporal.monthly),
780 | mostActiveDay: findPeakPeriod(temporal.daily),
781 | mostActiveHour: findPeakPeriod(temporal.hourly)
782 | },
783 |
784 | tags: {
785 | totalUniqueTags: tags.totalTags,
786 | averageTagsPerMemory: Math.round(tags.averageTagsPerMemory * 10) / 10,
787 | mostUsedTag: tags.frequency[0],
788 | categoryDistribution: Object.keys(tags.categories).length
789 | },
790 |
791 | content: {
792 | averageLength: Math.round(content.averageLength),
793 | mostCommonType: Object.entries(content.contentTypes)
794 | .sort(([,a], [,b]) => b - a)[0],
795 | keywordCount: Object.keys(content.wordFrequency).length
796 | },
797 |
798 | quality: {
799 | overallScore: quality.overallScore,
800 | taggedPercentage: quality.metrics.tagging.taggedPercentage,
801 | organizationScore: quality.metrics.organization.score,
802 | recommendationCount: quality.recommendations.length
803 | }
804 | },
805 |
806 | // Raw analysis data for advanced processing
807 | rawData: {
808 | temporal,
809 | tags,
810 | content,
811 | quality
812 | }
813 | };
814 | }
815 |
816 | /**
817 | * Find peak period from distribution data
818 | * @param {Object} distribution - Distribution object
819 | * @returns {Object} Peak period information
820 | */
821 | function findPeakPeriod(distribution) {
822 | const entries = Object.entries(distribution);
823 | if (entries.length === 0) return null;
824 |
825 | const peak = entries.reduce((max, [period, memories]) =>
826 | memories.length > max.count ? { period, count: memories.length } : max,
827 | { period: null, count: 0 }
828 | );
829 |
830 | return peak;
831 | }
832 |
833 | // =============================================================================
834 | // EXPORT FUNCTIONS
835 | // =============================================================================
836 |
837 | /**
838 | * Export analysis results to various formats
839 | * @param {Object} analysisData - Complete analysis data
840 | * @param {string} format - Export format ('json', 'csv', 'summary')
841 | * @returns {string} Formatted export data
842 | */
843 | function exportAnalysisData(analysisData, format = 'json') {
844 | switch (format) {
845 | case 'json':
846 | return JSON.stringify(analysisData, null, 2);
847 |
848 | case 'csv':
849 | return exportToCSV(analysisData);
850 |
851 | case 'summary':
852 | return generateSummaryReport(analysisData);
853 |
854 | default:
855 | throw new Error(`Unsupported export format: ${format}`);
856 | }
857 | }
858 |
859 | /**
860 | * Export key metrics to CSV format
861 | * @param {Object} analysisData - Analysis data
862 | * @returns {string} CSV formatted data
863 | */
864 | function exportToCSV(analysisData) {
865 | const csvSections = [];
866 |
867 | // Temporal data
868 | csvSections.push('TEMPORAL DISTRIBUTION');
869 | csvSections.push('Month,Count');
870 | analysisData.charts.temporalDistribution.forEach(item => {
871 | csvSections.push(`${item.period},${item.count}`);
872 | });
873 | csvSections.push('');
874 |
875 | // Tag frequency
876 | csvSections.push('TAG FREQUENCY');
877 | csvSections.push('Tag,Count,Category');
878 | analysisData.charts.tagFrequency.forEach(item => {
879 | csvSections.push(`${item.tag},${item.count},${item.category}`);
880 | });
881 | csvSections.push('');
882 |
883 | // Content types
884 | csvSections.push('CONTENT TYPES');
885 | csvSections.push('Type,Count,Percentage');
886 | analysisData.charts.contentTypes.forEach(item => {
887 | csvSections.push(`${item.type},${item.count},${item.percentage}%`);
888 | });
889 |
890 | return csvSections.join('\n');
891 | }
892 |
893 | /**
894 | * Generate a human-readable summary report
895 | * @param {Object} analysisData - Analysis data
896 | * @returns {string} Summary report
897 | */
898 | function generateSummaryReport(analysisData) {
899 | const stats = analysisData.statistics;
900 | const quality = analysisData.rawData.quality;
901 |
902 | return `
903 | MEMORY ANALYSIS SUMMARY REPORT
904 | Generated: ${new Date().toLocaleDateString()}
905 |
906 | DATABASE OVERVIEW:
907 | - Total Memories: ${analysisData.metadata.totalMemories}
908 | - Overall Quality Score: ${stats.quality.overallScore}/100
909 | - Tagged Memories: ${stats.quality.taggedPercentage}%
910 |
911 | TEMPORAL PATTERNS:
912 | - Peak Activity: ${stats.temporal.peakMonth?.period} (${stats.temporal.peakMonth?.count} memories)
913 | - Most Active Day: ${stats.temporal.mostActiveDay?.period}
914 | - Most Active Hour: ${stats.temporal.mostActiveHour?.period}:00
915 |
916 | TAG ANALYSIS:
917 | - Unique Tags: ${stats.tags.totalUniqueTags}
918 | - Average Tags per Memory: ${stats.tags.averageTagsPerMemory}
919 | - Most Used Tag: ${stats.tags.mostUsedTag?.[0]} (${stats.tags.mostUsedTag?.[1]} uses)
920 | - Tag Categories: ${stats.tags.categoryDistribution}
921 |
922 | CONTENT INSIGHTS:
923 | - Average Length: ${stats.content.averageLength} characters
924 | - Most Common Type: ${stats.content.mostCommonType?.[0]}
925 | - Unique Keywords: ${stats.content.keywordCount}
926 |
927 | QUALITY RECOMMENDATIONS:
928 | ${quality.recommendations.slice(0, 3).map(rec =>
929 | `- ${rec.priority.toUpperCase()}: ${rec.action}`
930 | ).join('\n')}
931 |
932 | For detailed analysis, use the full JSON export or visualization tools.
933 | `.trim();
934 | }
935 |
936 | // =============================================================================
937 | // MAIN ANALYSIS PIPELINE
938 | // =============================================================================
939 |
940 | /**
941 | * Run complete analysis pipeline on memory data
942 | * @param {Array} memories - Array of memory objects
943 | * @returns {Object} Complete analysis results
944 | */
945 | async function runCompleteAnalysis(memories) {
946 | console.log('Starting comprehensive memory analysis...');
947 |
948 | const startTime = Date.now();
949 |
950 | try {
951 | // Run all analysis functions
952 | const results = prepareVisualizationData(memories);
953 |
954 | const endTime = Date.now();
955 | const duration = endTime - startTime;
956 |
957 | console.log(`Analysis complete in ${duration}ms`);
958 | console.log(`Analyzed ${memories.length} memories`);
959 | console.log(`Overall quality score: ${results.statistics.quality.overallScore}/100`);
960 |
961 | return {
962 | ...results,
963 | meta: {
964 | analysisDuration: duration,
965 | analysisTimestamp: new Date().toISOString(),
966 | version: '1.0'
967 | }
968 | };
969 |
970 | } catch (error) {
971 | console.error('Analysis failed:', error);
972 | throw error;
973 | }
974 | }
975 |
976 | // Export all functions for use in other modules
977 | if (typeof module !== 'undefined' && module.exports) {
978 | module.exports = {
979 | // Temporal analysis
980 | analyzeTemporalDistribution,
981 | prepareTemporalChartData,
982 |
983 | // Tag analysis
984 | analyzeTagUsage,
985 | analyzeTagConsistency,
986 | categorizeTag,
987 |
988 | // Content analysis
989 | analyzeContentPatterns,
990 | detectContentType,
991 | extractKeywords,
992 |
993 | // Quality analysis
994 | assessMemoryQuality,
995 | generateQualityRecommendations,
996 |
997 | // Visualization
998 | prepareVisualizationData,
999 |
1000 | // Export utilities
1001 | exportAnalysisData,
1002 | generateSummaryReport,
1003 |
1004 | // Main pipeline
1005 | runCompleteAnalysis
1006 | };
1007 | }
1008 |
1009 | /**
1010 | * Usage Examples:
1011 | *
1012 | * // Basic usage with MCP Memory Service data
1013 | * const memories = await retrieve_memory({ query: "all memories", n_results: 500 });
1014 | * const analysis = await runCompleteAnalysis(memories);
1015 | *
1016 | * // Specific analyses
1017 | * const temporalData = analyzeTemporalDistribution(memories);
1018 | * const tagAnalysis = analyzeTagUsage(memories);
1019 | * const qualityReport = assessMemoryQuality(memories);
1020 | *
1021 | * // Export results
1022 | * const jsonExport = exportAnalysisData(analysis, 'json');
1023 | * const csvExport = exportAnalysisData(analysis, 'csv');
1024 | * const summary = exportAnalysisData(analysis, 'summary');
1025 | *
1026 | * // Prepare data for React charts
1027 | * const chartData = prepareVisualizationData(memories);
1028 | * // Use chartData.charts.temporalDistribution with the React component
1029 | */
```
--------------------------------------------------------------------------------
/src/mcp_memory_service/web/api/analytics.py:
--------------------------------------------------------------------------------
```python
1 | # Copyright 2024 Heinrich Krupp
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import annotations
16 |
17 | """
18 | Analytics endpoints for the HTTP interface.
19 |
20 | Provides usage statistics, trends, and performance metrics for the memory system.
21 | """
22 |
23 | import logging
24 | from typing import List, Optional, Dict, Any, TYPE_CHECKING, Tuple
25 | from datetime import datetime, timedelta, timezone
26 | from collections import defaultdict
27 | from dataclasses import dataclass
28 | from enum import Enum
29 |
30 | from fastapi import APIRouter, HTTPException, Depends, Query
31 | from pydantic import BaseModel, Field
32 |
33 | from ...storage.base import MemoryStorage
34 | from ...config import OAUTH_ENABLED
35 | from ..dependencies import get_storage
36 |
37 | # OAuth authentication imports (conditional)
38 | if OAUTH_ENABLED or TYPE_CHECKING:
39 | from ..oauth.middleware import require_read_access, AuthenticationResult
40 | else:
41 | # Provide type stubs when OAuth is disabled
42 | AuthenticationResult = None
43 | require_read_access = None
44 |
45 | router = APIRouter()
46 | logger = logging.getLogger(__name__)
47 |
48 |
49 | # Helper functions for analytics endpoints
50 | async def fetch_storage_stats(storage: MemoryStorage) -> Dict[str, Any]:
51 | """Fetch storage statistics from the storage backend.
52 |
53 | Args:
54 | storage: MemoryStorage backend instance
55 |
56 | Returns:
57 | Dict containing storage stats, or empty dict if unavailable
58 | """
59 | if hasattr(storage, 'get_stats'):
60 | try:
61 | return await storage.get_stats()
62 | except Exception as e:
63 | logger.warning(f"Failed to retrieve storage stats: {e}")
64 | return {}
65 | return {}
66 |
67 |
68 | def calculate_tag_statistics(tag_data: List[Dict[str, Any]], total_memories: int) -> List[TagUsageStats]:
69 | """Calculate tag usage statistics with percentages.
70 |
71 | Args:
72 | tag_data: List of dicts with 'tag' and 'count' keys
73 | total_memories: Total number of memories for percentage calculation
74 |
75 | Returns:
76 | List of TagUsageStats objects
77 | """
78 | tags = []
79 | for tag_item in tag_data:
80 | percentage = (tag_item["count"] / total_memories * 100) if total_memories > 0 else 0
81 | tags.append(TagUsageStats(
82 | tag=tag_item["tag"],
83 | count=tag_item["count"],
84 | percentage=round(percentage, 1),
85 | growth_rate=None # Would need historical data to calculate
86 | ))
87 | return tags
88 |
89 |
90 | def calculate_activity_time_ranges(timestamps: List[float], granularity: str) -> Tuple[List[ActivityBreakdown], set, List]:
91 | """Calculate activity breakdown by time range (hourly, daily, weekly).
92 |
93 | Args:
94 | timestamps: List of Unix timestamps
95 | granularity: One of 'hourly', 'daily', 'weekly'
96 |
97 | Returns:
98 | Tuple of (breakdown_list, active_days_set, activity_dates_list)
99 | """
100 | breakdown = []
101 | active_days = set()
102 | activity_dates = []
103 |
104 | # Convert all timestamps to datetime objects and populate active_days/activity_dates once
105 | dts = [datetime.fromtimestamp(ts, tz=timezone.utc) for ts in timestamps]
106 | for dt in dts:
107 | active_days.add(dt.date())
108 | activity_dates.append(dt.date())
109 |
110 | if granularity == "hourly":
111 | hour_counts = defaultdict(int)
112 | for dt in dts:
113 | hour_counts[dt.hour] += 1
114 |
115 | for hour in range(24):
116 | count = hour_counts.get(hour, 0)
117 | label = f"{hour:02d}:00"
118 | breakdown.append(ActivityBreakdown(
119 | period="hourly",
120 | count=count,
121 | label=label
122 | ))
123 |
124 | elif granularity == "daily":
125 | day_counts = defaultdict(int)
126 | day_names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
127 | for dt in dts:
128 | day_counts[dt.weekday()] += 1
129 |
130 | for i, day_name in enumerate(day_names):
131 | count = day_counts.get(i, 0)
132 | breakdown.append(ActivityBreakdown(
133 | period="daily",
134 | count=count,
135 | label=day_name
136 | ))
137 |
138 | else: # weekly
139 | week_counts = defaultdict(int)
140 | for dt in dts:
141 | # Get ISO week number with year
142 | year, week_num, _ = dt.isocalendar()
143 | week_key = f"{year}-W{week_num:02d}"
144 | week_counts[week_key] += 1
145 |
146 | # Last 12 weeks
147 | now = datetime.now(timezone.utc)
148 | for i in range(12):
149 | # Calculate target date
150 | target_date = now - timedelta(weeks=(11 - i))
151 | year, week_num, _ = target_date.isocalendar()
152 | week_key = f"{year}-W{week_num:02d}"
153 | count = week_counts.get(week_key, 0)
154 | breakdown.append(ActivityBreakdown(
155 | period="weekly",
156 | count=count,
157 | label=f"Week {week_num} ({year})"
158 | ))
159 |
160 | return breakdown, active_days, activity_dates
161 |
162 |
163 | def aggregate_type_statistics(type_counts: Dict[str, int], total_memories: int) -> List[MemoryTypeDistribution]:
164 | """Aggregate memory type statistics with percentages.
165 |
166 | Args:
167 | type_counts: Dict mapping memory types to counts
168 | total_memories: Total number of memories
169 |
170 | Returns:
171 | List of MemoryTypeDistribution objects, sorted by count descending
172 | """
173 | types = []
174 | for mem_type, count in type_counts.items():
175 | percentage = (count / total_memories * 100) if total_memories > 0 else 0
176 | types.append(MemoryTypeDistribution(
177 | memory_type=mem_type,
178 | count=count,
179 | percentage=round(percentage, 1)
180 | ))
181 |
182 | # Sort by count
183 | types.sort(key=lambda x: x.count, reverse=True)
184 | return types
185 |
186 |
187 | # Period Configuration for Analytics
188 | class PeriodType(str, Enum):
189 | """Valid time period types for analytics."""
190 | WEEK = "week"
191 | MONTH = "month"
192 | QUARTER = "quarter"
193 | YEAR = "year"
194 |
195 |
196 | @dataclass
197 | class PeriodConfig:
198 | """Configuration for time period analysis."""
199 | days: int
200 | interval_days: int
201 |
202 |
203 | PERIOD_CONFIGS = {
204 | PeriodType.WEEK: PeriodConfig(days=7, interval_days=1),
205 | PeriodType.MONTH: PeriodConfig(days=30, interval_days=7), # Weekly aggregation for monthly view
206 | PeriodType.QUARTER: PeriodConfig(days=90, interval_days=7),
207 | PeriodType.YEAR: PeriodConfig(days=365, interval_days=30),
208 | }
209 |
210 |
211 | def get_period_config(period: PeriodType) -> PeriodConfig:
212 | """Get configuration for the specified time period.
213 |
214 | Args:
215 | period: Time period identifier (week, month, quarter, year)
216 |
217 | Returns:
218 | PeriodConfig for the specified period
219 |
220 | Raises:
221 | HTTPException: If period is invalid
222 | """
223 | config = PERIOD_CONFIGS.get(period)
224 | if not config:
225 | valid_periods = ', '.join(p.value for p in PeriodType)
226 | raise HTTPException(
227 | status_code=400,
228 | detail=f"Invalid period. Use: {valid_periods}"
229 | )
230 | return config
231 |
232 |
233 | # Response Models
234 | class AnalyticsOverview(BaseModel):
235 | """Overview statistics for the memory system."""
236 | total_memories: int
237 | memories_this_week: int
238 | memories_this_month: int
239 | unique_tags: int
240 | database_size_mb: Optional[float]
241 | uptime_seconds: Optional[float]
242 | backend_type: str
243 |
244 |
245 | class MemoryGrowthPoint(BaseModel):
246 | """Data point for memory growth over time."""
247 | date: str # YYYY-MM-DD format
248 | count: int
249 | cumulative: int
250 | label: Optional[str] = None # Human-readable label (e.g., "Week of Nov 1", "November 2024")
251 |
252 |
253 | class MemoryGrowthData(BaseModel):
254 | """Memory growth data over time."""
255 | data_points: List[MemoryGrowthPoint]
256 | period: str # "week", "month", "quarter", "year"
257 |
258 |
259 | class TagUsageStats(BaseModel):
260 | """Usage statistics for a specific tag."""
261 | tag: str
262 | count: int
263 | percentage: float
264 | growth_rate: Optional[float] # Growth rate compared to previous period
265 |
266 |
267 | class TagUsageData(BaseModel):
268 | """Tag usage analytics."""
269 | tags: List[TagUsageStats]
270 | total_memories: int
271 | period: str
272 |
273 |
274 | class MemoryTypeDistribution(BaseModel):
275 | """Distribution of memories by type."""
276 | memory_type: str
277 | count: int
278 | percentage: float
279 |
280 |
281 | class MemoryTypeData(BaseModel):
282 | """Memory type distribution data."""
283 | types: List[MemoryTypeDistribution]
284 | total_memories: int
285 |
286 |
287 | class SearchAnalytics(BaseModel):
288 | """Search usage analytics."""
289 | total_searches: int = 0
290 | avg_response_time: Optional[float] = None
291 | popular_tags: List[Dict[str, Any]] = []
292 | search_types: Dict[str, int] = {}
293 |
294 |
295 | class PerformanceMetrics(BaseModel):
296 | """System performance metrics."""
297 | avg_response_time: Optional[float] = None
298 | memory_usage_mb: Optional[float] = None
299 | storage_latency: Optional[float] = None
300 | error_rate: Optional[float] = None
301 |
302 |
303 | class ActivityHeatmapData(BaseModel):
304 | """Activity heatmap data for calendar view."""
305 | date: str # YYYY-MM-DD format
306 | count: int
307 | level: int # 0-4 activity level for color coding
308 |
309 |
310 | class ActivityHeatmapResponse(BaseModel):
311 | """Response containing activity heatmap data."""
312 | data: List[ActivityHeatmapData]
313 | total_days: int
314 | max_count: int
315 |
316 |
317 | class TopTagsReport(BaseModel):
318 | """Enhanced top tags report with trends and co-occurrence."""
319 | tag: str
320 | count: int
321 | percentage: float
322 | growth_rate: Optional[float]
323 | trending: bool # Is usage increasing
324 | co_occurring_tags: List[Dict[str, Any]] # Tags that appear with this tag
325 |
326 |
327 | class TopTagsResponse(BaseModel):
328 | """Response for top tags report."""
329 | tags: List[TopTagsReport]
330 | period: str
331 |
332 |
333 | class ActivityBreakdown(BaseModel):
334 | """Activity breakdown by time period."""
335 | period: str # hour, day, week, month
336 | count: int
337 | label: str # e.g., "Monday", "10 AM", etc.
338 |
339 |
340 | class ActivityReport(BaseModel):
341 | """Comprehensive activity report."""
342 | breakdown: List[ActivityBreakdown]
343 | peak_times: List[str]
344 | active_days: int
345 | total_days: int
346 | current_streak: int
347 | longest_streak: int
348 |
349 |
350 | class LargestMemory(BaseModel):
351 | """A single large memory entry."""
352 | content_hash: str
353 | size_bytes: int
354 | size_kb: float
355 | created_at: Optional[str] = None
356 | tags: List[str] = []
357 | preview: str # First 100 chars
358 |
359 |
360 | class GrowthTrendPoint(BaseModel):
361 | """Storage growth at a point in time."""
362 | date: str # ISO format YYYY-MM-DD
363 | total_size_mb: float
364 | memory_count: int
365 |
366 |
367 | class StorageStats(BaseModel):
368 | """Storage statistics and largest memories."""
369 | total_size_mb: float
370 | average_memory_size: float
371 | largest_memories: List[LargestMemory]
372 | growth_trend: List[GrowthTrendPoint]
373 | storage_efficiency: float # Percentage of efficient storage
374 |
375 |
376 | @router.get("/overview", response_model=AnalyticsOverview, tags=["analytics"])
377 | async def get_analytics_overview(
378 | storage: MemoryStorage = Depends(get_storage),
379 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
380 | ):
381 | """
382 | Get overview analytics for the memory system.
383 |
384 | Returns key metrics including total memories, recent activity, and system stats.
385 | """
386 | try:
387 | # Get detailed health data which contains most stats
388 | if hasattr(storage, 'get_stats'):
389 | try:
390 | stats = await storage.get_stats()
391 | logger.info(f"Storage stats: {stats}") # Debug logging
392 | except Exception as e:
393 | logger.warning(f"Failed to retrieve storage stats: {e}")
394 | stats = {}
395 | else:
396 | stats = {}
397 |
398 | # Get memories_this_week from storage stats (accurate for all memories)
399 | memories_this_week = stats.get("memories_this_week", 0)
400 |
401 | # Calculate memories this month
402 | # TODO: Add memories_this_month to storage.get_stats() for consistency
403 | month_ago = datetime.now(timezone.utc) - timedelta(days=30)
404 | month_ago_ts = month_ago.timestamp()
405 | memories_this_month = 0
406 | try:
407 | # Use larger sample for monthly calculation
408 | # Note: This may be inaccurate if there are >5000 memories
409 | recent_memories = await storage.get_recent_memories(n=5000)
410 | memories_this_month = sum(1 for m in recent_memories if m.created_at and m.created_at > month_ago_ts)
411 | except Exception as e:
412 | logger.warning(f"Failed to calculate monthly memories: {e}")
413 | memories_this_month = 0
414 |
415 | return AnalyticsOverview(
416 | total_memories=stats.get("total_memories", 0),
417 | memories_this_week=memories_this_week,
418 | memories_this_month=memories_this_month,
419 | unique_tags=stats.get("unique_tags", 0),
420 | database_size_mb=stats.get("primary_stats", {}).get("database_size_mb") or stats.get("database_size_mb"),
421 | uptime_seconds=None, # Would need to be calculated from health endpoint
422 | backend_type=stats.get("storage_backend", "unknown")
423 | )
424 |
425 | except Exception as e:
426 | logger.error(f"Failed to get analytics overview: {str(e)}")
427 | raise HTTPException(status_code=500, detail=f"Failed to get analytics overview: {str(e)}")
428 |
429 |
430 | # Label formatters for each period type
431 | PERIOD_LABEL_FORMATTERS = {
432 | PeriodType.WEEK: lambda date: date.strftime("%b %d"), # "Nov 15"
433 | PeriodType.MONTH: lambda date: f"Week of {date.strftime('%b %d')}", # "Week of Nov 15"
434 | PeriodType.QUARTER: lambda date: f"Week of {date.strftime('%b %d')}", # "Week of Nov 15"
435 | PeriodType.YEAR: lambda date: date.strftime("%B %Y"), # "November 2024"
436 | }
437 |
438 |
439 | def _generate_interval_label(date: datetime, period: PeriodType) -> str:
440 | """
441 | Generate a human-readable label for a date interval based on the period type.
442 |
443 | Args:
444 | date: The date for the interval
445 | period: The period type (week, month, quarter, year)
446 |
447 | Returns:
448 | A formatted label string
449 | """
450 | formatter = PERIOD_LABEL_FORMATTERS.get(period)
451 | if formatter:
452 | return formatter(date)
453 | # Fallback to ISO format
454 | return date.strftime("%Y-%m-%d")
455 |
456 |
457 | @router.get("/memory-growth", response_model=MemoryGrowthData, tags=["analytics"])
458 | async def get_memory_growth(
459 | period: PeriodType = Query(PeriodType.MONTH, description="Time period: week, month, quarter, year"),
460 | storage: MemoryStorage = Depends(get_storage),
461 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
462 | ):
463 | """
464 | Get memory growth data over time.
465 |
466 | Returns data points showing how the memory count has grown over the specified period.
467 | """
468 | try:
469 | # Get period configuration
470 | config = get_period_config(period)
471 | days = config.days
472 | interval_days = config.interval_days
473 |
474 | # Calculate date ranges
475 | end_date = datetime.now(timezone.utc)
476 | start_date = end_date - timedelta(days=days)
477 |
478 | # This is a simplified implementation
479 | # In a real system, we'd need efficient date-range queries in the storage layer
480 | data_points = []
481 | cumulative = 0
482 |
483 | try:
484 | # Performance optimization: Use database-layer filtering instead of
485 | # fetching all memories and filtering in Python (10x improvement)
486 | # This pushes the date range filter to the storage backend (SQLite WHERE clause
487 | # or Cloudflare D1 query), reducing memory consumption and network transfer
488 | date_counts = defaultdict(int)
489 | start_timestamp = start_date.timestamp()
490 | end_timestamp = end_date.timestamp()
491 |
492 | # Get memories in date range (database-filtered)
493 | memories_in_range = await storage.get_memories_by_time_range(start_timestamp, end_timestamp)
494 |
495 | # Group by date
496 | for memory in memories_in_range:
497 | if memory.created_at:
498 | mem_date = datetime.fromtimestamp(memory.created_at, tz=timezone.utc).date()
499 | date_counts[mem_date] += 1
500 |
501 | # Create data points
502 | current_date = start_date.date()
503 | while current_date <= end_date.date():
504 | # For intervals > 1 day, sum counts across the entire interval
505 | interval_end = current_date + timedelta(days=interval_days)
506 | count = 0
507 |
508 | # Sum all memories within this interval
509 | check_date = current_date
510 | while check_date < interval_end and check_date <= end_date.date():
511 | count += date_counts.get(check_date, 0)
512 | check_date += timedelta(days=1)
513 |
514 | cumulative += count
515 |
516 | # Convert date to datetime for label generation
517 | current_datetime = datetime.combine(current_date, datetime.min.time())
518 | label = _generate_interval_label(current_datetime, period)
519 |
520 | data_points.append(MemoryGrowthPoint(
521 | date=current_date.isoformat(),
522 | count=count,
523 | cumulative=cumulative,
524 | label=label
525 | ))
526 |
527 | current_date += timedelta(days=interval_days)
528 |
529 | except Exception as e:
530 | logger.warning(f"Failed to calculate memory growth: {str(e)}")
531 | # Return empty data if calculation fails
532 | data_points = []
533 |
534 | return MemoryGrowthData(
535 | data_points=data_points,
536 | period=period.value
537 | )
538 |
539 | except HTTPException:
540 | raise
541 | except Exception as e:
542 | logger.error(f"Failed to get memory growth data: {str(e)}")
543 | raise HTTPException(status_code=500, detail=f"Failed to get memory growth data: {str(e)}")
544 |
545 |
546 | @router.get("/tag-usage", response_model=TagUsageData, tags=["analytics"])
547 | async def get_tag_usage_analytics(
548 | period: str = Query("all", description="Time period: week, month, all"),
549 | limit: int = Query(20, description="Maximum number of tags to return"),
550 | storage: MemoryStorage = Depends(get_storage),
551 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
552 | ):
553 | """
554 | Get tag usage analytics.
555 |
556 | Returns statistics about tag usage, optionally filtered by time period.
557 | """
558 | try:
559 | # Get all tags with counts
560 | if hasattr(storage, 'get_all_tags_with_counts'):
561 | tag_data = await storage.get_all_tags_with_counts()
562 | else:
563 | raise HTTPException(status_code=501, detail="Tag analytics not supported by storage backend")
564 |
565 | # Get total memories for accurate percentage calculation
566 | stats = await fetch_storage_stats(storage)
567 | total_memories = stats.get("total_memories", 0)
568 |
569 | if total_memories == 0:
570 | # Fallback: count all memories directly for an accurate total.
571 | total_memories = await storage.count_all_memories()
572 |
573 | # Sort by count and limit
574 | tag_data.sort(key=lambda x: x["count"], reverse=True)
575 | tag_data = tag_data[:limit]
576 |
577 | # Convert to response format using helper
578 | tags = calculate_tag_statistics(tag_data, total_memories)
579 |
580 | return TagUsageData(
581 | tags=tags,
582 | total_memories=total_memories,
583 | period=period
584 | )
585 |
586 | except HTTPException:
587 | raise
588 | except Exception as e:
589 | logger.error(f"Failed to get tag usage analytics: {str(e)}")
590 | raise HTTPException(status_code=500, detail=f"Failed to get tag usage analytics: {str(e)}")
591 |
592 |
593 | @router.get("/memory-types", response_model=MemoryTypeData, tags=["analytics"])
594 | async def get_memory_type_distribution(
595 | storage: MemoryStorage = Depends(get_storage),
596 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
597 | ):
598 | """
599 | Get distribution of memories by type.
600 |
601 | Returns statistics about how memories are categorized by type.
602 | """
603 | try:
604 | # Try to get accurate counts from storage layer if available
605 | if hasattr(storage, 'get_type_counts'):
606 | type_counts_data = await storage.get_type_counts()
607 | type_counts = dict(type_counts_data)
608 | total_memories = sum(type_counts.values())
609 | # For Hybrid storage, access underlying SQLite primary storage
610 | elif hasattr(storage, 'primary') and hasattr(storage.primary, 'conn') and storage.primary.conn:
611 | # Hybrid storage - access underlying SQLite storage
612 | import sqlite3
613 | cursor = storage.primary.conn.cursor()
614 | cursor.execute("""
615 | SELECT
616 | CASE
617 | WHEN memory_type IS NULL OR memory_type = '' THEN 'untyped'
618 | ELSE memory_type
619 | END as mem_type,
620 | COUNT(*) as count
621 | FROM memories
622 | GROUP BY mem_type
623 | """)
624 | type_counts = {row[0]: row[1] for row in cursor.fetchall()}
625 |
626 | cursor.execute("SELECT COUNT(*) FROM memories")
627 | total_memories = cursor.fetchone()[0]
628 | elif hasattr(storage, 'conn') and storage.conn:
629 | # Direct SQLite storage
630 | import sqlite3
631 | cursor = storage.conn.cursor()
632 | cursor.execute("""
633 | SELECT
634 | CASE
635 | WHEN memory_type IS NULL OR memory_type = '' THEN 'untyped'
636 | ELSE memory_type
637 | END as mem_type,
638 | COUNT(*) as count
639 | FROM memories
640 | GROUP BY mem_type
641 | """)
642 | type_counts = {row[0]: row[1] for row in cursor.fetchall()}
643 |
644 | cursor.execute("SELECT COUNT(*) FROM memories")
645 | total_memories = cursor.fetchone()[0]
646 | else:
647 | # Fallback to sampling approach (less accurate for large databases)
648 | logger.warning("Using sampling approach for memory type distribution - results may not reflect entire database")
649 | memories = await storage.get_recent_memories(n=1000)
650 |
651 | type_counts = defaultdict(int)
652 | for memory in memories:
653 | mem_type = memory.memory_type or "untyped"
654 | type_counts[mem_type] += 1
655 |
656 | total_memories = len(memories)
657 |
658 | # Convert to response format using helper
659 | types = aggregate_type_statistics(type_counts, total_memories)
660 |
661 | return MemoryTypeData(
662 | types=types,
663 | total_memories=total_memories
664 | )
665 |
666 | except Exception as e:
667 | logger.error(f"Failed to get memory type distribution: {str(e)}")
668 | raise HTTPException(status_code=500, detail=f"Failed to get memory type distribution: {str(e)}")
669 |
670 |
671 | @router.get("/search-analytics", response_model=SearchAnalytics, tags=["analytics"])
672 | async def get_search_analytics(
673 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
674 | ):
675 | """
676 | Get search usage analytics.
677 |
678 | Returns statistics about search patterns and performance.
679 | This is a placeholder - real implementation would need search logging.
680 | """
681 | # Placeholder implementation
682 | # In a real system, this would analyze search logs
683 | return SearchAnalytics(
684 | total_searches=0,
685 | avg_response_time=None,
686 | popular_tags=[],
687 | search_types={}
688 | )
689 |
690 |
691 | @router.get("/performance", response_model=PerformanceMetrics, tags=["analytics"])
692 | async def get_performance_metrics(
693 | storage: MemoryStorage = Depends(get_storage),
694 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
695 | ):
696 | """
697 | Get system performance metrics.
698 |
699 | Returns performance statistics for the memory system.
700 | """
701 | # Placeholder implementation
702 | # In a real system, this would collect actual performance metrics
703 | return PerformanceMetrics(
704 | avg_response_time=None,
705 | memory_usage_mb=None,
706 | storage_latency=None,
707 | error_rate=None
708 | )
709 |
710 |
711 | @router.get("/activity-heatmap", response_model=ActivityHeatmapResponse, tags=["analytics"])
712 | async def get_activity_heatmap(
713 | days: int = Query(365, description="Number of days to include in heatmap"),
714 | storage: MemoryStorage = Depends(get_storage),
715 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
716 | ):
717 | """
718 | Get activity heatmap data for calendar view.
719 |
720 | Returns daily activity counts for the specified period, with activity levels for color coding.
721 | """
722 | try:
723 | # Use optimized timestamp-only fetching (v8.18.0+)
724 | timestamps = await storage.get_memory_timestamps(days=days)
725 |
726 | # Group by date
727 | date_counts = defaultdict(int)
728 |
729 | end_date = datetime.now(timezone.utc).date()
730 | start_date = end_date - timedelta(days=days)
731 |
732 | for timestamp in timestamps:
733 | mem_date = datetime.fromtimestamp(timestamp, tz=timezone.utc).date()
734 | if start_date <= mem_date <= end_date:
735 | date_counts[mem_date] += 1
736 |
737 | # Create heatmap data
738 | heatmap_data = []
739 | total_days = 0
740 | max_count = 0
741 |
742 | current_date = start_date
743 | while current_date <= end_date:
744 | count = date_counts.get(current_date, 0)
745 | if count > 0:
746 | total_days += 1
747 | max_count = max(max_count, count)
748 |
749 | # Calculate activity level (0-4)
750 | if count == 0:
751 | level = 0
752 | elif count <= max_count * 0.25:
753 | level = 1
754 | elif count <= max_count * 0.5:
755 | level = 2
756 | elif count <= max_count * 0.75:
757 | level = 3
758 | else:
759 | level = 4
760 |
761 | heatmap_data.append(ActivityHeatmapData(
762 | date=current_date.isoformat(),
763 | count=count,
764 | level=level
765 | ))
766 |
767 | current_date += timedelta(days=1)
768 |
769 | return ActivityHeatmapResponse(
770 | data=heatmap_data,
771 | total_days=total_days,
772 | max_count=max_count
773 | )
774 |
775 | except Exception as e:
776 | logger.error(f"Failed to get activity heatmap: {str(e)}")
777 | raise HTTPException(status_code=500, detail=f"Failed to get activity heatmap: {str(e)}")
778 |
779 |
780 | @router.get("/top-tags", response_model=TopTagsResponse, tags=["analytics"])
781 | async def get_top_tags_report(
782 | period: str = Query("30d", description="Time period: 7d, 30d, 90d, all"),
783 | limit: int = Query(20, description="Maximum number of tags to return"),
784 | storage: MemoryStorage = Depends(get_storage),
785 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
786 | ):
787 | """
788 | Get enhanced top tags report with trends and co-occurrence patterns.
789 |
790 | Returns detailed tag analytics including usage trends and related tags.
791 | """
792 | try:
793 | # Parse period
794 | if period == "7d":
795 | days = 7
796 | elif period == "30d":
797 | days = 30
798 | elif period == "90d":
799 | days = 90
800 | else: # "all"
801 | days = None
802 |
803 | # Get tag usage data
804 | if hasattr(storage, 'get_all_tags_with_counts'):
805 | tag_data = await storage.get_all_tags_with_counts()
806 | else:
807 | raise HTTPException(status_code=501, detail="Tag analytics not supported by storage backend")
808 |
809 | # Get total memories
810 | if hasattr(storage, 'get_stats'):
811 | stats = await storage.get_stats()
812 | total_memories = stats.get("total_memories", 0)
813 | else:
814 | total_memories = sum(tag["count"] for tag in tag_data)
815 |
816 | if total_memories == 0:
817 | return TopTagsResponse(tags=[], period=period)
818 |
819 | # Filter by time period if needed
820 | if days is not None:
821 | cutoff_ts = (datetime.now(timezone.utc) - timedelta(days=days)).timestamp()
822 |
823 | # Get memories within the time range and count their tags
824 | if hasattr(storage, 'get_memories_by_time_range'):
825 | # Get memories from cutoff_ts to now
826 | now_ts = datetime.now(timezone.utc).timestamp()
827 | memories_in_period = await storage.get_memories_by_time_range(cutoff_ts, now_ts)
828 |
829 | # Count tags from memories in this period
830 | from collections import Counter
831 | tag_counter = Counter()
832 | period_memory_count = 0
833 |
834 | for memory in memories_in_period:
835 | period_memory_count += 1
836 | if memory.tags:
837 | for tag in memory.tags:
838 | tag_counter[tag] += 1
839 |
840 | # Convert to the expected format
841 | tag_data = [{"tag": tag, "count": count} for tag, count in tag_counter.items()]
842 | total_memories = period_memory_count
843 | # If the storage backend doesn't support time range queries, fall back to all tags
844 | # (This maintains backward compatibility with storage backends that don't implement the method)
845 |
846 | # Sort and limit
847 | tag_data.sort(key=lambda x: x["count"], reverse=True)
848 | tag_data = tag_data[:limit]
849 |
850 | # Calculate co-occurrence (simplified)
851 | # In a real implementation, this would analyze memory-tag relationships
852 | enhanced_tags = []
853 | for tag_item in tag_data:
854 | percentage = (tag_item["count"] / total_memories * 100) if total_memories > 0 else 0
855 |
856 | # Placeholder co-occurrence data
857 | # Real implementation would query the storage for tag co-occurrence
858 | co_occurring = [
859 | {"tag": "related-tag-1", "count": 5, "strength": 0.8},
860 | {"tag": "related-tag-2", "count": 3, "strength": 0.6}
861 | ]
862 |
863 | enhanced_tags.append(TopTagsReport(
864 | tag=tag_item["tag"],
865 | count=tag_item["count"],
866 | percentage=round(percentage, 1),
867 | growth_rate=None, # Would need historical data
868 | trending=False, # Would need trend analysis
869 | co_occurring_tags=co_occurring
870 | ))
871 |
872 | return TopTagsResponse(
873 | tags=enhanced_tags,
874 | period=period
875 | )
876 |
877 | except HTTPException:
878 | raise
879 | except Exception as e:
880 | logger.error(f"Failed to get top tags report: {str(e)}")
881 | raise HTTPException(status_code=500, detail=f"Failed to get top tags report: {str(e)}")
882 |
883 |
884 | @router.get("/activity-breakdown", response_model=ActivityReport, tags=["analytics"])
885 | async def get_activity_breakdown(
886 | granularity: str = Query("daily", description="Time granularity: hourly, daily, weekly"),
887 | storage: MemoryStorage = Depends(get_storage),
888 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
889 | ):
890 | """
891 | Get activity breakdown and patterns.
892 |
893 | Returns activity statistics by time period, peak times, and streak information.
894 | """
895 | try:
896 | # Use optimized timestamp-only fetching (v8.18.0+)
897 | # Get last 90 days of timestamps (adequate for all granularity levels)
898 | timestamps = await storage.get_memory_timestamps(days=90)
899 |
900 | # Group by granularity using helper function
901 | breakdown, active_days, activity_dates = calculate_activity_time_ranges(timestamps, granularity)
902 |
903 | # Calculate streaks
904 | activity_dates = sorted(set(activity_dates))
905 | current_streak = 0
906 | longest_streak = 0
907 |
908 | if activity_dates:
909 | # Current streak - check backwards from today
910 | today = datetime.now(timezone.utc).date()
911 | activity_dates_set = set(activity_dates)
912 |
913 | # A streak is only "current" if it includes today
914 | if today in activity_dates_set:
915 | day_to_check = today
916 | while day_to_check in activity_dates_set:
917 | current_streak += 1
918 | day_to_check -= timedelta(days=1)
919 |
920 | # Longest streak - iterate through sorted dates
921 | temp_streak = 1 # Start at 1, not 0
922 | longest_streak = 1 # At least 1 if there's any activity
923 |
924 | for i in range(1, len(activity_dates)):
925 | if activity_dates[i] == activity_dates[i-1] + timedelta(days=1):
926 | temp_streak += 1
927 | longest_streak = max(longest_streak, temp_streak)
928 | else:
929 | temp_streak = 1 # Reset to 1, not 0
930 |
931 | # Find peak times (top 3)
932 | sorted_breakdown = sorted(breakdown, key=lambda x: x.count, reverse=True)
933 | peak_times = [item.label for item in sorted_breakdown[:3]]
934 |
935 | # Calculate total_days as the span from oldest to newest memory
936 | total_days = (activity_dates[-1] - activity_dates[0]).days + 1 if len(activity_dates) >= 2 else len(activity_dates)
937 |
938 | return ActivityReport(
939 | breakdown=breakdown,
940 | peak_times=peak_times,
941 | active_days=len(active_days),
942 | total_days=total_days,
943 | current_streak=current_streak,
944 | longest_streak=max(longest_streak, current_streak)
945 | )
946 |
947 | except Exception as e:
948 | logger.error(f"Failed to get activity breakdown: {str(e)}")
949 | raise HTTPException(status_code=500, detail=f"Failed to get activity breakdown: {str(e)}")
950 |
951 |
952 | @router.get("/storage-stats", response_model=StorageStats, tags=["analytics"])
953 | async def get_storage_stats(
954 | storage: MemoryStorage = Depends(get_storage),
955 | user: AuthenticationResult = Depends(require_read_access) if OAUTH_ENABLED else None
956 | ):
957 | """
958 | Get storage statistics and largest memories.
959 |
960 | Returns comprehensive storage analytics including size trends and largest memories.
961 | """
962 | try:
963 | # Get basic stats
964 | if hasattr(storage, 'get_stats'):
965 | stats = await storage.get_stats()
966 | else:
967 | stats = {}
968 |
969 | total_size_mb = stats.get("primary_stats", {}).get("database_size_mb") or stats.get("database_size_mb") or 0
970 | total_memories = stats.get("primary_stats", {}).get("total_memories") or stats.get("total_memories") or 0
971 |
972 | # Get recent memories for average size calculation (smaller sample)
973 | recent_memories = await storage.get_recent_memories(n=100)
974 |
975 | if recent_memories:
976 | # Calculate average memory size from recent sample
977 | total_content_length = sum(len(memory.content or "") for memory in recent_memories)
978 | average_memory_size = total_content_length / len(recent_memories)
979 | else:
980 | average_memory_size = 0
981 |
982 | # Get largest memories using efficient database query
983 | largest_memories_objs = await storage.get_largest_memories(n=10)
984 | largest_memories = []
985 | for memory in largest_memories_objs:
986 | size_bytes = len(memory.content or "")
987 | content = memory.content or ""
988 | largest_memories.append(LargestMemory(
989 | content_hash=memory.content_hash,
990 | size_bytes=size_bytes,
991 | size_kb=round(size_bytes / 1024, 2),
992 | created_at=datetime.fromtimestamp(memory.created_at, tz=timezone.utc).isoformat() if memory.created_at else None,
993 | tags=memory.tags or [],
994 | preview=content[:100] + "..." if len(content) > 100 else content
995 | ))
996 |
997 | # Placeholder growth trend (would need historical data)
998 | now = datetime.now(timezone.utc)
999 | growth_trend = [
1000 | GrowthTrendPoint(
1001 | date=(now - timedelta(days=i)).date().isoformat(),
1002 | total_size_mb=round(total_size_mb * (0.9 + i * 0.01), 2),
1003 | memory_count=int(total_memories * (0.9 + i * 0.01))
1004 | )
1005 | for i in range(30, 0, -1)
1006 | ]
1007 |
1008 | # Storage efficiency (placeholder)
1009 | storage_efficiency = 85.0 # Would calculate based on deduplication, etc.
1010 |
1011 | return StorageStats(
1012 | total_size_mb=round(total_size_mb, 2),
1013 | average_memory_size=round(average_memory_size, 2),
1014 | largest_memories=largest_memories,
1015 | growth_trend=growth_trend,
1016 | storage_efficiency=storage_efficiency
1017 | )
1018 |
1019 | except Exception as e:
1020 | logger.error(f"Failed to get storage stats: {str(e)}")
1021 | raise HTTPException(status_code=500, detail=f"Failed to get storage stats: {str(e)}")
1022 |
```