#
tokens: 44491/50000 6/416 files (page 18/27)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 18 of 27. Use http://codebase.md/basicmachines-co/basic-memory?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── commands
│   │   ├── release
│   │   │   ├── beta.md
│   │   │   ├── changelog.md
│   │   │   ├── release-check.md
│   │   │   └── release.md
│   │   ├── spec.md
│   │   └── test-live.md
│   └── settings.json
├── .dockerignore
├── .env.example
├── .github
│   ├── dependabot.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.md
│   │   ├── config.yml
│   │   ├── documentation.md
│   │   └── feature_request.md
│   └── workflows
│       ├── claude-code-review.yml
│       ├── claude-issue-triage.yml
│       ├── claude.yml
│       ├── dev-release.yml
│       ├── docker.yml
│       ├── pr-title.yml
│       ├── release.yml
│       └── test.yml
├── .gitignore
├── .python-version
├── CHANGELOG.md
├── CITATION.cff
├── CLA.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── docker-compose-postgres.yml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── ai-assistant-guide-extended.md
│   ├── ARCHITECTURE.md
│   ├── character-handling.md
│   ├── cloud-cli.md
│   ├── Docker.md
│   └── testing-coverage.md
├── justfile
├── LICENSE
├── llms-install.md
├── pyproject.toml
├── README.md
├── SECURITY.md
├── smithery.yaml
├── specs
│   ├── SPEC-1 Specification-Driven Development Process.md
│   ├── SPEC-10 Unified Deployment Workflow and Event Tracking.md
│   ├── SPEC-11 Basic Memory API Performance Optimization.md
│   ├── SPEC-12 OpenTelemetry Observability.md
│   ├── SPEC-13 CLI Authentication with Subscription Validation.md
│   ├── SPEC-14 Cloud Git Versioning & GitHub Backup.md
│   ├── SPEC-14- Cloud Git Versioning & GitHub Backup.md
│   ├── SPEC-15 Configuration Persistence via Tigris for Cloud Tenants.md
│   ├── SPEC-16 MCP Cloud Service Consolidation.md
│   ├── SPEC-17 Semantic Search with ChromaDB.md
│   ├── SPEC-18 AI Memory Management Tool.md
│   ├── SPEC-19 Sync Performance and Memory Optimization.md
│   ├── SPEC-2 Slash Commands Reference.md
│   ├── SPEC-20 Simplified Project-Scoped Rclone Sync.md
│   ├── SPEC-3 Agent Definitions.md
│   ├── SPEC-4 Notes Web UI Component Architecture.md
│   ├── SPEC-5 CLI Cloud Upload via WebDAV.md
│   ├── SPEC-6 Explicit Project Parameter Architecture.md
│   ├── SPEC-7 POC to spike Tigris Turso for local access to cloud data.md
│   ├── SPEC-8 TigrisFS Integration.md
│   ├── SPEC-9 Multi-Project Bidirectional Sync Architecture.md
│   ├── SPEC-9 Signed Header Tenant Information.md
│   └── SPEC-9-1 Follow-Ups- Conflict, Sync, and Observability.md
├── src
│   └── basic_memory
│       ├── __init__.py
│       ├── alembic
│       │   ├── alembic.ini
│       │   ├── env.py
│       │   ├── migrations.py
│       │   ├── script.py.mako
│       │   └── versions
│       │       ├── 314f1ea54dc4_add_postgres_full_text_search_support_.py
│       │       ├── 3dae7c7b1564_initial_schema.py
│       │       ├── 502b60eaa905_remove_required_from_entity_permalink.py
│       │       ├── 5fe1ab1ccebe_add_projects_table.py
│       │       ├── 647e7a75e2cd_project_constraint_fix.py
│       │       ├── 6830751f5fb6_merge_multiple_heads.py
│       │       ├── 9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py
│       │       ├── a1b2c3d4e5f6_fix_project_foreign_keys.py
│       │       ├── a2b3c4d5e6f7_add_search_index_entity_cascade.py
│       │       ├── b3c3938bacdb_relation_to_name_unique_index.py
│       │       ├── cc7172b46608_update_search_index_schema.py
│       │       ├── e7e1f4367280_add_scan_watermark_tracking_to_project.py
│       │       ├── f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py
│       │       └── g9a0b3c4d5e6_add_external_id_to_project_and_entity.py
│       ├── api
│       │   ├── __init__.py
│       │   ├── app.py
│       │   ├── container.py
│       │   ├── routers
│       │   │   ├── __init__.py
│       │   │   ├── directory_router.py
│       │   │   ├── importer_router.py
│       │   │   ├── knowledge_router.py
│       │   │   ├── management_router.py
│       │   │   ├── memory_router.py
│       │   │   ├── project_router.py
│       │   │   ├── prompt_router.py
│       │   │   ├── resource_router.py
│       │   │   ├── search_router.py
│       │   │   └── utils.py
│       │   ├── template_loader.py
│       │   └── v2
│       │       ├── __init__.py
│       │       └── routers
│       │           ├── __init__.py
│       │           ├── directory_router.py
│       │           ├── importer_router.py
│       │           ├── knowledge_router.py
│       │           ├── memory_router.py
│       │           ├── project_router.py
│       │           ├── prompt_router.py
│       │           ├── resource_router.py
│       │           └── search_router.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── app.py
│       │   ├── auth.py
│       │   ├── commands
│       │   │   ├── __init__.py
│       │   │   ├── cloud
│       │   │   │   ├── __init__.py
│       │   │   │   ├── api_client.py
│       │   │   │   ├── bisync_commands.py
│       │   │   │   ├── cloud_utils.py
│       │   │   │   ├── core_commands.py
│       │   │   │   ├── rclone_commands.py
│       │   │   │   ├── rclone_config.py
│       │   │   │   ├── rclone_installer.py
│       │   │   │   ├── upload_command.py
│       │   │   │   └── upload.py
│       │   │   ├── command_utils.py
│       │   │   ├── db.py
│       │   │   ├── format.py
│       │   │   ├── import_chatgpt.py
│       │   │   ├── import_claude_conversations.py
│       │   │   ├── import_claude_projects.py
│       │   │   ├── import_memory_json.py
│       │   │   ├── mcp.py
│       │   │   ├── project.py
│       │   │   ├── status.py
│       │   │   ├── telemetry.py
│       │   │   └── tool.py
│       │   ├── container.py
│       │   └── main.py
│       ├── config.py
│       ├── db.py
│       ├── deps
│       │   ├── __init__.py
│       │   ├── config.py
│       │   ├── db.py
│       │   ├── importers.py
│       │   ├── projects.py
│       │   ├── repositories.py
│       │   └── services.py
│       ├── deps.py
│       ├── file_utils.py
│       ├── ignore_utils.py
│       ├── importers
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── chatgpt_importer.py
│       │   ├── claude_conversations_importer.py
│       │   ├── claude_projects_importer.py
│       │   ├── memory_json_importer.py
│       │   └── utils.py
│       ├── markdown
│       │   ├── __init__.py
│       │   ├── entity_parser.py
│       │   ├── markdown_processor.py
│       │   ├── plugins.py
│       │   ├── schemas.py
│       │   └── utils.py
│       ├── mcp
│       │   ├── __init__.py
│       │   ├── async_client.py
│       │   ├── clients
│       │   │   ├── __init__.py
│       │   │   ├── directory.py
│       │   │   ├── knowledge.py
│       │   │   ├── memory.py
│       │   │   ├── project.py
│       │   │   ├── resource.py
│       │   │   └── search.py
│       │   ├── container.py
│       │   ├── project_context.py
│       │   ├── prompts
│       │   │   ├── __init__.py
│       │   │   ├── ai_assistant_guide.py
│       │   │   ├── continue_conversation.py
│       │   │   ├── recent_activity.py
│       │   │   ├── search.py
│       │   │   └── utils.py
│       │   ├── resources
│       │   │   ├── ai_assistant_guide.md
│       │   │   └── project_info.py
│       │   ├── server.py
│       │   └── tools
│       │       ├── __init__.py
│       │       ├── build_context.py
│       │       ├── canvas.py
│       │       ├── chatgpt_tools.py
│       │       ├── delete_note.py
│       │       ├── edit_note.py
│       │       ├── list_directory.py
│       │       ├── move_note.py
│       │       ├── project_management.py
│       │       ├── read_content.py
│       │       ├── read_note.py
│       │       ├── recent_activity.py
│       │       ├── search.py
│       │       ├── utils.py
│       │       ├── view_note.py
│       │       └── write_note.py
│       ├── models
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── knowledge.py
│       │   ├── project.py
│       │   └── search.py
│       ├── project_resolver.py
│       ├── repository
│       │   ├── __init__.py
│       │   ├── entity_repository.py
│       │   ├── observation_repository.py
│       │   ├── postgres_search_repository.py
│       │   ├── project_info_repository.py
│       │   ├── project_repository.py
│       │   ├── relation_repository.py
│       │   ├── repository.py
│       │   ├── search_index_row.py
│       │   ├── search_repository_base.py
│       │   ├── search_repository.py
│       │   └── sqlite_search_repository.py
│       ├── runtime.py
│       ├── schemas
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── cloud.py
│       │   ├── delete.py
│       │   ├── directory.py
│       │   ├── importer.py
│       │   ├── memory.py
│       │   ├── project_info.py
│       │   ├── prompt.py
│       │   ├── request.py
│       │   ├── response.py
│       │   ├── search.py
│       │   ├── sync_report.py
│       │   └── v2
│       │       ├── __init__.py
│       │       ├── entity.py
│       │       └── resource.py
│       ├── services
│       │   ├── __init__.py
│       │   ├── context_service.py
│       │   ├── directory_service.py
│       │   ├── entity_service.py
│       │   ├── exceptions.py
│       │   ├── file_service.py
│       │   ├── initialization.py
│       │   ├── link_resolver.py
│       │   ├── project_service.py
│       │   ├── search_service.py
│       │   └── service.py
│       ├── sync
│       │   ├── __init__.py
│       │   ├── background_sync.py
│       │   ├── coordinator.py
│       │   ├── sync_service.py
│       │   └── watch_service.py
│       ├── telemetry.py
│       ├── templates
│       │   └── prompts
│       │       ├── continue_conversation.hbs
│       │       └── search.hbs
│       └── utils.py
├── test-int
│   ├── BENCHMARKS.md
│   ├── cli
│   │   ├── test_project_commands_integration.py
│   │   └── test_version_integration.py
│   ├── conftest.py
│   ├── mcp
│   │   ├── test_build_context_underscore.py
│   │   ├── test_build_context_validation.py
│   │   ├── test_chatgpt_tools_integration.py
│   │   ├── test_default_project_mode_integration.py
│   │   ├── test_delete_note_integration.py
│   │   ├── test_edit_note_integration.py
│   │   ├── test_lifespan_shutdown_sync_task_cancellation_integration.py
│   │   ├── test_list_directory_integration.py
│   │   ├── test_move_note_integration.py
│   │   ├── test_project_management_integration.py
│   │   ├── test_project_state_sync_integration.py
│   │   ├── test_read_content_integration.py
│   │   ├── test_read_note_integration.py
│   │   ├── test_search_integration.py
│   │   ├── test_single_project_mcp_integration.py
│   │   └── test_write_note_integration.py
│   ├── test_db_wal_mode.py
│   └── test_disable_permalinks_integration.py
├── tests
│   ├── __init__.py
│   ├── api
│   │   ├── conftest.py
│   │   ├── test_api_container.py
│   │   ├── test_async_client.py
│   │   ├── test_continue_conversation_template.py
│   │   ├── test_directory_router.py
│   │   ├── test_importer_router.py
│   │   ├── test_knowledge_router.py
│   │   ├── test_management_router.py
│   │   ├── test_memory_router.py
│   │   ├── test_project_router_operations.py
│   │   ├── test_project_router.py
│   │   ├── test_prompt_router.py
│   │   ├── test_relation_background_resolution.py
│   │   ├── test_resource_router.py
│   │   ├── test_search_router.py
│   │   ├── test_search_template.py
│   │   ├── test_template_loader_helpers.py
│   │   ├── test_template_loader.py
│   │   └── v2
│   │       ├── __init__.py
│   │       ├── conftest.py
│   │       ├── test_directory_router.py
│   │       ├── test_importer_router.py
│   │       ├── test_knowledge_router.py
│   │       ├── test_memory_router.py
│   │       ├── test_project_router.py
│   │       ├── test_prompt_router.py
│   │       ├── test_resource_router.py
│   │       └── test_search_router.py
│   ├── cli
│   │   ├── cloud
│   │   │   ├── test_cloud_api_client_and_utils.py
│   │   │   ├── test_rclone_config_and_bmignore_filters.py
│   │   │   └── test_upload_path.py
│   │   ├── conftest.py
│   │   ├── test_auth_cli_auth.py
│   │   ├── test_cli_container.py
│   │   ├── test_cli_exit.py
│   │   ├── test_cli_tool_exit.py
│   │   ├── test_cli_tools.py
│   │   ├── test_cloud_authentication.py
│   │   ├── test_ignore_utils.py
│   │   ├── test_import_chatgpt.py
│   │   ├── test_import_claude_conversations.py
│   │   ├── test_import_claude_projects.py
│   │   ├── test_import_memory_json.py
│   │   ├── test_project_add_with_local_path.py
│   │   └── test_upload.py
│   ├── conftest.py
│   ├── db
│   │   └── test_issue_254_foreign_key_constraints.py
│   ├── importers
│   │   ├── test_conversation_indexing.py
│   │   ├── test_importer_base.py
│   │   └── test_importer_utils.py
│   ├── markdown
│   │   ├── __init__.py
│   │   ├── test_date_frontmatter_parsing.py
│   │   ├── test_entity_parser_error_handling.py
│   │   ├── test_entity_parser.py
│   │   ├── test_markdown_plugins.py
│   │   ├── test_markdown_processor.py
│   │   ├── test_observation_edge_cases.py
│   │   ├── test_parser_edge_cases.py
│   │   ├── test_relation_edge_cases.py
│   │   └── test_task_detection.py
│   ├── mcp
│   │   ├── clients
│   │   │   ├── __init__.py
│   │   │   └── test_clients.py
│   │   ├── conftest.py
│   │   ├── test_async_client_modes.py
│   │   ├── test_mcp_container.py
│   │   ├── test_obsidian_yaml_formatting.py
│   │   ├── test_permalink_collision_file_overwrite.py
│   │   ├── test_project_context.py
│   │   ├── test_prompts.py
│   │   ├── test_recent_activity_prompt_modes.py
│   │   ├── test_resources.py
│   │   ├── test_server_lifespan_branches.py
│   │   ├── test_tool_build_context.py
│   │   ├── test_tool_canvas.py
│   │   ├── test_tool_delete_note.py
│   │   ├── test_tool_edit_note.py
│   │   ├── test_tool_list_directory.py
│   │   ├── test_tool_move_note.py
│   │   ├── test_tool_project_management.py
│   │   ├── test_tool_read_content.py
│   │   ├── test_tool_read_note.py
│   │   ├── test_tool_recent_activity.py
│   │   ├── test_tool_resource.py
│   │   ├── test_tool_search.py
│   │   ├── test_tool_utils.py
│   │   ├── test_tool_view_note.py
│   │   ├── test_tool_write_note_kebab_filenames.py
│   │   ├── test_tool_write_note.py
│   │   └── tools
│   │       └── test_chatgpt_tools.py
│   ├── Non-MarkdownFileSupport.pdf
│   ├── README.md
│   ├── repository
│   │   ├── test_entity_repository_upsert.py
│   │   ├── test_entity_repository.py
│   │   ├── test_entity_upsert_issue_187.py
│   │   ├── test_observation_repository.py
│   │   ├── test_postgres_search_repository.py
│   │   ├── test_project_info_repository.py
│   │   ├── test_project_repository.py
│   │   ├── test_relation_repository.py
│   │   ├── test_repository.py
│   │   ├── test_search_repository_edit_bug_fix.py
│   │   └── test_search_repository.py
│   ├── schemas
│   │   ├── test_base_timeframe_minimum.py
│   │   ├── test_memory_serialization.py
│   │   ├── test_memory_url_validation.py
│   │   ├── test_memory_url.py
│   │   ├── test_relation_response_reference_resolution.py
│   │   ├── test_schemas.py
│   │   └── test_search.py
│   ├── Screenshot.png
│   ├── services
│   │   ├── test_context_service.py
│   │   ├── test_directory_service.py
│   │   ├── test_entity_service_disable_permalinks.py
│   │   ├── test_entity_service.py
│   │   ├── test_file_service.py
│   │   ├── test_initialization_cloud_mode_branches.py
│   │   ├── test_initialization.py
│   │   ├── test_link_resolver.py
│   │   ├── test_project_removal_bug.py
│   │   ├── test_project_service_operations.py
│   │   ├── test_project_service.py
│   │   └── test_search_service.py
│   ├── sync
│   │   ├── test_character_conflicts.py
│   │   ├── test_coordinator.py
│   │   ├── test_sync_service_incremental.py
│   │   ├── test_sync_service.py
│   │   ├── test_sync_wikilink_issue.py
│   │   ├── test_tmp_files.py
│   │   ├── test_watch_service_atomic_adds.py
│   │   ├── test_watch_service_edge_cases.py
│   │   ├── test_watch_service_reload.py
│   │   └── test_watch_service.py
│   ├── test_config.py
│   ├── test_deps.py
│   ├── test_production_cascade_delete.py
│   ├── test_project_resolver.py
│   ├── test_rclone_commands.py
│   ├── test_runtime.py
│   ├── test_telemetry.py
│   └── utils
│       ├── test_file_utils.py
│       ├── test_frontmatter_obsidian_compatible.py
│       ├── test_parse_tags.py
│       ├── test_permalink_formatting.py
│       ├── test_timezone_utils.py
│       ├── test_utf8_handling.py
│       └── test_validate_project_path.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/src/basic_memory/services/context_service.py:
--------------------------------------------------------------------------------

```python
  1 | """Service for building rich context from the knowledge graph."""
  2 | 
  3 | from dataclasses import dataclass, field
  4 | from datetime import datetime, timezone
  5 | from typing import List, Optional, Tuple
  6 | 
  7 | 
  8 | from loguru import logger
  9 | from sqlalchemy import text
 10 | 
 11 | from basic_memory.repository.entity_repository import EntityRepository
 12 | from basic_memory.repository.observation_repository import ObservationRepository
 13 | from basic_memory.repository.postgres_search_repository import PostgresSearchRepository
 14 | from basic_memory.repository.search_repository import SearchRepository, SearchIndexRow
 15 | from basic_memory.schemas.memory import MemoryUrl, memory_url_path
 16 | from basic_memory.schemas.search import SearchItemType
 17 | from basic_memory.utils import generate_permalink
 18 | 
 19 | 
 20 | @dataclass
 21 | class ContextResultRow:
 22 |     type: str
 23 |     id: int
 24 |     title: str
 25 |     permalink: str
 26 |     file_path: str
 27 |     depth: int
 28 |     root_id: int
 29 |     created_at: datetime
 30 |     from_id: Optional[int] = None
 31 |     to_id: Optional[int] = None
 32 |     relation_type: Optional[str] = None
 33 |     content: Optional[str] = None
 34 |     category: Optional[str] = None
 35 |     entity_id: Optional[int] = None
 36 | 
 37 | 
 38 | @dataclass
 39 | class ContextResultItem:
 40 |     """A hierarchical result containing a primary item with its observations and related items."""
 41 | 
 42 |     primary_result: ContextResultRow | SearchIndexRow
 43 |     observations: List[ContextResultRow] = field(default_factory=list)
 44 |     related_results: List[ContextResultRow] = field(default_factory=list)
 45 | 
 46 | 
 47 | @dataclass
 48 | class ContextMetadata:
 49 |     """Metadata about a context result."""
 50 | 
 51 |     uri: Optional[str] = None
 52 |     types: Optional[List[SearchItemType]] = None
 53 |     depth: int = 1
 54 |     timeframe: Optional[str] = None
 55 |     generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
 56 |     primary_count: int = 0
 57 |     related_count: int = 0
 58 |     total_observations: int = 0
 59 |     total_relations: int = 0
 60 | 
 61 | 
 62 | @dataclass
 63 | class ContextResult:
 64 |     """Complete context result with metadata."""
 65 | 
 66 |     results: List[ContextResultItem] = field(default_factory=list)
 67 |     metadata: ContextMetadata = field(default_factory=ContextMetadata)
 68 | 
 69 | 
 70 | class ContextService:
 71 |     """Service for building rich context from memory:// URIs.
 72 | 
 73 |     Handles three types of context building:
 74 |     1. Direct permalink lookup - exact match on path
 75 |     2. Pattern matching - using * wildcards
 76 |     3. Special modes via params (e.g., 'related')
 77 |     """
 78 | 
 79 |     def __init__(
 80 |         self,
 81 |         search_repository: SearchRepository,
 82 |         entity_repository: EntityRepository,
 83 |         observation_repository: ObservationRepository,
 84 |     ):
 85 |         self.search_repository = search_repository
 86 |         self.entity_repository = entity_repository
 87 |         self.observation_repository = observation_repository
 88 | 
 89 |     async def build_context(
 90 |         self,
 91 |         memory_url: Optional[MemoryUrl] = None,
 92 |         types: Optional[List[SearchItemType]] = None,
 93 |         depth: int = 1,
 94 |         since: Optional[datetime] = None,
 95 |         limit=10,
 96 |         offset=0,
 97 |         max_related: int = 10,
 98 |         include_observations: bool = True,
 99 |     ) -> ContextResult:
100 |         """Build rich context from a memory:// URI."""
101 |         logger.debug(
102 |             f"Building context for URI: '{memory_url}' depth: '{depth}' since: '{since}' limit: '{limit}' offset: '{offset}'  max_related: '{max_related}'"
103 |         )
104 | 
105 |         normalized_path: Optional[str] = None
106 |         if memory_url:
107 |             path = memory_url_path(memory_url)
108 |             # Check for wildcards before normalization
109 |             has_wildcard = "*" in path
110 | 
111 |             if has_wildcard:
112 |                 # For wildcard patterns, normalize each segment separately to preserve the *
113 |                 parts = path.split("*")
114 |                 normalized_parts = [
115 |                     generate_permalink(part, split_extension=False) if part else ""
116 |                     for part in parts
117 |                 ]
118 |                 normalized_path = "*".join(normalized_parts)
119 |                 logger.debug(f"Pattern search for '{normalized_path}'")
120 |                 primary = await self.search_repository.search(
121 |                     permalink_match=normalized_path, limit=limit, offset=offset
122 |                 )
123 |             else:
124 |                 # For exact paths, normalize the whole thing
125 |                 normalized_path = generate_permalink(path, split_extension=False)
126 |                 logger.debug(f"Direct lookup for '{normalized_path}'")
127 |                 primary = await self.search_repository.search(
128 |                     permalink=normalized_path, limit=limit, offset=offset
129 |                 )
130 |         else:
131 |             logger.debug(f"Build context for '{types}'")
132 |             primary = await self.search_repository.search(
133 |                 search_item_types=types, after_date=since, limit=limit, offset=offset
134 |             )
135 | 
136 |         # Get type_id pairs for traversal
137 | 
138 |         type_id_pairs = [(r.type, r.id) for r in primary] if primary else []
139 |         logger.debug(f"found primary type_id_pairs: {len(type_id_pairs)}")
140 | 
141 |         # Find related content
142 |         related = await self.find_related(
143 |             type_id_pairs, max_depth=depth, since=since, max_results=max_related
144 |         )
145 |         logger.debug(f"Found {len(related)} related results")
146 | 
147 |         # Collect entity IDs from primary and related results
148 |         entity_ids = []
149 |         for result in primary:
150 |             if result.type == SearchItemType.ENTITY.value:
151 |                 entity_ids.append(result.id)
152 | 
153 |         for result in related:
154 |             if result.type == SearchItemType.ENTITY.value:
155 |                 entity_ids.append(result.id)
156 | 
157 |         # Fetch observations for all entities if requested
158 |         observations_by_entity = {}
159 |         if include_observations and entity_ids:
160 |             # Use our observation repository to get observations for all entities at once
161 |             observations_by_entity = await self.observation_repository.find_by_entities(entity_ids)
162 |             logger.debug(f"Found observations for {len(observations_by_entity)} entities")
163 | 
164 |         # Create metadata dataclass
165 |         metadata = ContextMetadata(
166 |             uri=normalized_path if memory_url else None,
167 |             types=types,
168 |             depth=depth,
169 |             timeframe=since.isoformat() if since else None,
170 |             primary_count=len(primary),
171 |             related_count=len(related),
172 |             total_observations=sum(len(obs) for obs in observations_by_entity.values()),
173 |             total_relations=sum(1 for r in related if r.type == SearchItemType.RELATION),
174 |         )
175 | 
176 |         # Build context results list directly with ContextResultItem objects
177 |         context_results = []
178 | 
179 |         # For each primary result
180 |         for primary_item in primary:
181 |             # Find all related items with this primary item as root
182 |             related_to_primary = [r for r in related if r.root_id == primary_item.id]
183 | 
184 |             # Get observations for this item if it's an entity
185 |             item_observations = []
186 |             if primary_item.type == SearchItemType.ENTITY.value and include_observations:
187 |                 # Convert Observation models to ContextResultRows
188 |                 for obs in observations_by_entity.get(primary_item.id, []):
189 |                     item_observations.append(
190 |                         ContextResultRow(
191 |                             type="observation",
192 |                             id=obs.id,
193 |                             title=f"{obs.category}: {obs.content[:50]}...",
194 |                             permalink=generate_permalink(
195 |                                 f"{primary_item.permalink}/observations/{obs.category}/{obs.content}"
196 |                             ),
197 |                             file_path=primary_item.file_path,
198 |                             content=obs.content,
199 |                             category=obs.category,
200 |                             entity_id=primary_item.id,
201 |                             depth=0,
202 |                             root_id=primary_item.id,
203 |                             created_at=primary_item.created_at,  # created_at time from entity
204 |                         )
205 |                     )
206 | 
207 |             # Create ContextResultItem directly
208 |             context_item = ContextResultItem(
209 |                 primary_result=primary_item,
210 |                 observations=item_observations,
211 |                 related_results=related_to_primary,
212 |             )
213 | 
214 |             context_results.append(context_item)
215 | 
216 |         # Return the structured ContextResult
217 |         return ContextResult(results=context_results, metadata=metadata)
218 | 
219 |     async def find_related(
220 |         self,
221 |         type_id_pairs: List[Tuple[str, int]],
222 |         max_depth: int = 1,
223 |         since: Optional[datetime] = None,
224 |         max_results: int = 10,
225 |     ) -> List[ContextResultRow]:
226 |         """Find items connected through relations.
227 | 
228 |         Uses recursive CTE to find:
229 |         - Connected entities
230 |         - Relations that connect them
231 | 
232 |         Note on depth:
233 |         Each traversal step requires two depth levels - one to find the relation,
234 |         and another to follow that relation to an entity. So a max_depth of 4 allows
235 |         traversal through two entities (relation->entity->relation->entity), while reaching
236 |         an entity three steps away requires max_depth=6 (relation->entity->relation->entity->relation->entity).
237 |         """
238 |         max_depth = max_depth * 2
239 | 
240 |         if not type_id_pairs:
241 |             return []
242 | 
243 |         # Extract entity IDs from type_id_pairs for the optimized query
244 |         entity_ids = [i for t, i in type_id_pairs if t == "entity"]
245 | 
246 |         if not entity_ids:
247 |             logger.debug("No entity IDs found in type_id_pairs")
248 |             return []
249 | 
250 |         logger.debug(
251 |             f"Finding connected items for {len(entity_ids)} entities with depth {max_depth}"
252 |         )
253 | 
254 |         # Build the VALUES clause for entity IDs
255 |         entity_id_values = ", ".join([str(i) for i in entity_ids])
256 | 
257 |         # Parameters for bindings - include project_id for security filtering
258 |         params = {
259 |             "max_depth": max_depth,
260 |             "max_results": max_results,
261 |             "project_id": self.search_repository.project_id,
262 |         }
263 | 
264 |         # Build date and timeframe filters conditionally based on since parameter
265 |         if since:
266 |             # SQLite accepts ISO strings, but Postgres/asyncpg requires datetime objects
267 |             if isinstance(self.search_repository, PostgresSearchRepository):  # pragma: no cover
268 |                 # asyncpg expects timezone-NAIVE datetime in UTC for DateTime(timezone=True) columns
269 |                 # even though the column stores timezone-aware values
270 |                 since_utc = (
271 |                     since.astimezone(timezone.utc) if since.tzinfo else since
272 |                 )  # pragma: no cover
273 |                 params["since_date"] = since_utc.replace(tzinfo=None)  # pyright: ignore  # pragma: no cover
274 |             else:
275 |                 params["since_date"] = since.isoformat()  # pyright: ignore
276 |             date_filter = "AND e.created_at >= :since_date"
277 |             relation_date_filter = "AND e_from.created_at >= :since_date"
278 |             timeframe_condition = "AND eg.relation_date >= :since_date"
279 |         else:
280 |             date_filter = ""
281 |             relation_date_filter = ""
282 |             timeframe_condition = ""
283 | 
284 |         # Add project filtering for security - ensure all entities and relations belong to the same project
285 |         project_filter = "AND e.project_id = :project_id"
286 |         relation_project_filter = "AND e_from.project_id = :project_id"
287 | 
288 |         # Use a CTE that operates directly on entity and relation tables
289 |         # This avoids the overhead of the search_index virtual table
290 |         # Note: Postgres and SQLite have different CTE limitations:
291 |         # - Postgres: doesn't allow multiple UNION ALL branches referencing the CTE
292 |         # - SQLite: doesn't support LATERAL joins
293 |         # So we need different queries for each database backend
294 | 
295 |         # Detect database backend
296 |         is_postgres = isinstance(self.search_repository, PostgresSearchRepository)
297 | 
298 |         if is_postgres:  # pragma: no cover
299 |             query = self._build_postgres_query(
300 |                 entity_id_values,
301 |                 date_filter,
302 |                 project_filter,
303 |                 relation_date_filter,
304 |                 relation_project_filter,
305 |                 timeframe_condition,
306 |             )
307 |         else:
308 |             # SQLite needs VALUES clause for exclusion (not needed for Postgres)
309 |             values = ", ".join([f"('{t}', {i})" for t, i in type_id_pairs])
310 |             query = self._build_sqlite_query(
311 |                 entity_id_values,
312 |                 date_filter,
313 |                 project_filter,
314 |                 relation_date_filter,
315 |                 relation_project_filter,
316 |                 timeframe_condition,
317 |                 values,
318 |             )
319 | 
320 |         result = await self.search_repository.execute_query(query, params=params)
321 |         rows = result.all()
322 | 
323 |         context_rows = [
324 |             ContextResultRow(
325 |                 type=row.type,
326 |                 id=row.id,
327 |                 title=row.title,
328 |                 permalink=row.permalink,
329 |                 file_path=row.file_path,
330 |                 from_id=row.from_id,
331 |                 to_id=row.to_id,
332 |                 relation_type=row.relation_type,
333 |                 content=row.content,
334 |                 category=row.category,
335 |                 entity_id=row.entity_id,
336 |                 depth=row.depth,
337 |                 root_id=row.root_id,
338 |                 created_at=row.created_at,
339 |             )
340 |             for row in rows
341 |         ]
342 |         return context_rows
343 | 
344 |     def _build_postgres_query(  # pragma: no cover
345 |         self,
346 |         entity_id_values: str,
347 |         date_filter: str,
348 |         project_filter: str,
349 |         relation_date_filter: str,
350 |         relation_project_filter: str,
351 |         timeframe_condition: str,
352 |     ):
353 |         """Build Postgres-specific CTE query using LATERAL joins."""
354 |         return text(f"""
355 |         WITH RECURSIVE entity_graph AS (
356 |             -- Base case: seed entities
357 |             SELECT
358 |                 e.id,
359 |                 'entity' as type,
360 |                 e.title,
361 |                 e.permalink,
362 |                 e.file_path,
363 |                 CAST(NULL AS INTEGER) as from_id,
364 |                 CAST(NULL AS INTEGER) as to_id,
365 |                 CAST(NULL AS TEXT) as relation_type,
366 |                 CAST(NULL AS TEXT) as content,
367 |                 CAST(NULL AS TEXT) as category,
368 |                 CAST(NULL AS INTEGER) as entity_id,
369 |                 0 as depth,
370 |                 e.id as root_id,
371 |                 e.created_at,
372 |                 e.created_at as relation_date
373 |             FROM entity e
374 |             WHERE e.id IN ({entity_id_values})
375 |             {date_filter}
376 |             {project_filter}
377 | 
378 |             UNION ALL
379 | 
380 |             -- Fetch BOTH relations AND connected entities in a single recursive step
381 |             -- Postgres only allows ONE reference to the recursive CTE in the recursive term
382 |             -- We use CROSS JOIN LATERAL to generate two rows (relation + entity) from each traversal
383 |             SELECT
384 |                 CASE
385 |                     WHEN step_type = 1 THEN r.id
386 |                     ELSE e.id
387 |                 END as id,
388 |                 CASE
389 |                     WHEN step_type = 1 THEN 'relation'
390 |                     ELSE 'entity'
391 |                 END as type,
392 |                 CASE
393 |                     WHEN step_type = 1 THEN r.relation_type || ': ' || r.to_name
394 |                     ELSE e.title
395 |                 END as title,
396 |                 CASE
397 |                     WHEN step_type = 1 THEN ''
398 |                     ELSE COALESCE(e.permalink, '')
399 |                 END as permalink,
400 |                 CASE
401 |                     WHEN step_type = 1 THEN e_from.file_path
402 |                     ELSE e.file_path
403 |                 END as file_path,
404 |                 CASE
405 |                     WHEN step_type = 1 THEN r.from_id
406 |                     ELSE NULL
407 |                 END as from_id,
408 |                 CASE
409 |                     WHEN step_type = 1 THEN r.to_id
410 |                     ELSE NULL
411 |                 END as to_id,
412 |                 CASE
413 |                     WHEN step_type = 1 THEN r.relation_type
414 |                     ELSE NULL
415 |                 END as relation_type,
416 |                 CAST(NULL AS TEXT) as content,
417 |                 CAST(NULL AS TEXT) as category,
418 |                 CAST(NULL AS INTEGER) as entity_id,
419 |                 eg.depth + step_type as depth,
420 |                 eg.root_id,
421 |                 CASE
422 |                     WHEN step_type = 1 THEN e_from.created_at
423 |                     ELSE e.created_at
424 |                 END as created_at,
425 |                 CASE
426 |                     WHEN step_type = 1 THEN e_from.created_at
427 |                     ELSE eg.relation_date
428 |                 END as relation_date
429 |             FROM entity_graph eg
430 |             CROSS JOIN LATERAL (VALUES (1), (2)) AS steps(step_type)
431 |             JOIN relation r ON (
432 |                 eg.type = 'entity' AND
433 |                 (r.from_id = eg.id OR r.to_id = eg.id)
434 |             )
435 |             JOIN entity e_from ON (
436 |                 r.from_id = e_from.id
437 |                 {relation_project_filter}
438 |             )
439 |             LEFT JOIN entity e ON (
440 |                 step_type = 2 AND
441 |                 e.id = CASE
442 |                     WHEN r.from_id = eg.id THEN r.to_id
443 |                     ELSE r.from_id
444 |                 END
445 |                 {date_filter}
446 |                 {project_filter}
447 |             )
448 |             WHERE eg.depth < :max_depth
449 |             AND (step_type = 1 OR (step_type = 2 AND e.id IS NOT NULL AND e.id != eg.id))
450 |             {timeframe_condition}
451 |         )
452 |         -- Materialize and filter
453 |         SELECT DISTINCT
454 |             type,
455 |             id,
456 |             title,
457 |             permalink,
458 |             file_path,
459 |             from_id,
460 |             to_id,
461 |             relation_type,
462 |             content,
463 |             category,
464 |             entity_id,
465 |             MIN(depth) as depth,
466 |             root_id,
467 |             created_at
468 |         FROM entity_graph
469 |         WHERE depth > 0
470 |         GROUP BY type, id, title, permalink, file_path, from_id, to_id,
471 |                  relation_type, content, category, entity_id, root_id, created_at
472 |         ORDER BY depth, type, id
473 |         LIMIT :max_results
474 |        """)
475 | 
476 |     def _build_sqlite_query(
477 |         self,
478 |         entity_id_values: str,
479 |         date_filter: str,
480 |         project_filter: str,
481 |         relation_date_filter: str,
482 |         relation_project_filter: str,
483 |         timeframe_condition: str,
484 |         values: str,
485 |     ):
486 |         """Build SQLite-specific CTE query using multiple UNION ALL branches."""
487 |         return text(f"""
488 |         WITH RECURSIVE entity_graph AS (
489 |             -- Base case: seed entities
490 |             SELECT
491 |                 e.id,
492 |                 'entity' as type,
493 |                 e.title,
494 |                 e.permalink,
495 |                 e.file_path,
496 |                 NULL as from_id,
497 |                 NULL as to_id,
498 |                 NULL as relation_type,
499 |                 NULL as content,
500 |                 NULL as category,
501 |                 NULL as entity_id,
502 |                 0 as depth,
503 |                 e.id as root_id,
504 |                 e.created_at,
505 |                 e.created_at as relation_date,
506 |                 0 as is_incoming
507 |             FROM entity e
508 |             WHERE e.id IN ({entity_id_values})
509 |             {date_filter}
510 |             {project_filter}
511 | 
512 |             UNION ALL
513 | 
514 |             -- Get relations from current entities
515 |             SELECT
516 |                 r.id,
517 |                 'relation' as type,
518 |                 r.relation_type || ': ' || r.to_name as title,
519 |                 '' as permalink,
520 |                 e_from.file_path,
521 |                 r.from_id,
522 |                 r.to_id,
523 |                 r.relation_type,
524 |                 NULL as content,
525 |                 NULL as category,
526 |                 NULL as entity_id,
527 |                 eg.depth + 1,
528 |                 eg.root_id,
529 |                 e_from.created_at,
530 |                 e_from.created_at as relation_date,
531 |                 CASE WHEN r.from_id = eg.id THEN 0 ELSE 1 END as is_incoming
532 |             FROM entity_graph eg
533 |             JOIN relation r ON (
534 |                 eg.type = 'entity' AND
535 |                 (r.from_id = eg.id OR r.to_id = eg.id)
536 |             )
537 |             JOIN entity e_from ON (
538 |                 r.from_id = e_from.id
539 |                 {relation_date_filter}
540 |                 {relation_project_filter}
541 |             )
542 |             LEFT JOIN entity e_to ON (r.to_id = e_to.id)
543 |             WHERE eg.depth < :max_depth
544 |             AND (r.to_id IS NULL OR e_to.project_id = :project_id)
545 | 
546 |             UNION ALL
547 | 
548 |             -- Get entities connected by relations
549 |             SELECT
550 |                 e.id,
551 |                 'entity' as type,
552 |                 e.title,
553 |                 CASE
554 |                     WHEN e.permalink IS NULL THEN ''
555 |                     ELSE e.permalink
556 |                 END as permalink,
557 |                 e.file_path,
558 |                 NULL as from_id,
559 |                 NULL as to_id,
560 |                 NULL as relation_type,
561 |                 NULL as content,
562 |                 NULL as category,
563 |                 NULL as entity_id,
564 |                 eg.depth + 1,
565 |                 eg.root_id,
566 |                 e.created_at,
567 |                 eg.relation_date,
568 |                 eg.is_incoming
569 |             FROM entity_graph eg
570 |             JOIN entity e ON (
571 |                 eg.type = 'relation' AND
572 |                 e.id = CASE
573 |                     WHEN eg.is_incoming = 0 THEN eg.to_id
574 |                     ELSE eg.from_id
575 |                 END
576 |                 {date_filter}
577 |                 {project_filter}
578 |             )
579 |             WHERE eg.depth < :max_depth
580 |             {timeframe_condition}
581 |         )
582 |         SELECT DISTINCT
583 |             type,
584 |             id,
585 |             title,
586 |             permalink,
587 |             file_path,
588 |             from_id,
589 |             to_id,
590 |             relation_type,
591 |             content,
592 |             category,
593 |             entity_id,
594 |             MIN(depth) as depth,
595 |             root_id,
596 |             created_at
597 |         FROM entity_graph
598 |         WHERE depth > 0
599 |         GROUP BY type, id, title, permalink, file_path, from_id, to_id,
600 |                  relation_type, content, category, entity_id, root_id, created_at
601 |         ORDER BY depth, type, id
602 |         LIMIT :max_results
603 |        """)
604 | 
```

--------------------------------------------------------------------------------
/src/basic_memory/config.py:
--------------------------------------------------------------------------------

```python
  1 | """Configuration management for basic-memory."""
  2 | 
  3 | import json
  4 | import os
  5 | from dataclasses import dataclass
  6 | from datetime import datetime
  7 | from pathlib import Path
  8 | from typing import Any, Dict, Literal, Optional, List, Tuple
  9 | from enum import Enum
 10 | 
 11 | from loguru import logger
 12 | from pydantic import BaseModel, Field, model_validator
 13 | from pydantic_settings import BaseSettings, SettingsConfigDict
 14 | 
 15 | from basic_memory.utils import setup_logging, generate_permalink
 16 | 
 17 | 
 18 | DATABASE_NAME = "memory.db"
 19 | APP_DATABASE_NAME = "memory.db"  # Using the same name but in the app directory
 20 | DATA_DIR_NAME = ".basic-memory"
 21 | CONFIG_FILE_NAME = "config.json"
 22 | WATCH_STATUS_JSON = "watch-status.json"
 23 | 
 24 | Environment = Literal["test", "dev", "user"]
 25 | 
 26 | 
 27 | class DatabaseBackend(str, Enum):
 28 |     """Supported database backends."""
 29 | 
 30 |     SQLITE = "sqlite"
 31 |     POSTGRES = "postgres"
 32 | 
 33 | 
 34 | @dataclass
 35 | class ProjectConfig:
 36 |     """Configuration for a specific basic-memory project."""
 37 | 
 38 |     name: str
 39 |     home: Path
 40 | 
 41 |     @property
 42 |     def project(self):
 43 |         return self.name  # pragma: no cover
 44 | 
 45 |     @property
 46 |     def project_url(self) -> str:  # pragma: no cover
 47 |         return f"/{generate_permalink(self.name)}"
 48 | 
 49 | 
 50 | class CloudProjectConfig(BaseModel):
 51 |     """Sync configuration for a cloud project.
 52 | 
 53 |     This tracks the local working directory and sync state for a project
 54 |     that is synced with Basic Memory Cloud.
 55 |     """
 56 | 
 57 |     local_path: str = Field(description="Local working directory path for this cloud project")
 58 |     last_sync: Optional[datetime] = Field(
 59 |         default=None, description="Timestamp of last successful sync operation"
 60 |     )
 61 |     bisync_initialized: bool = Field(
 62 |         default=False, description="Whether rclone bisync baseline has been established"
 63 |     )
 64 | 
 65 | 
 66 | class BasicMemoryConfig(BaseSettings):
 67 |     """Pydantic model for Basic Memory global configuration."""
 68 | 
 69 |     env: Environment = Field(default="dev", description="Environment name")
 70 | 
 71 |     projects: Dict[str, str] = Field(
 72 |         default_factory=lambda: {
 73 |             "main": str(Path(os.getenv("BASIC_MEMORY_HOME", Path.home() / "basic-memory")))
 74 |         }
 75 |         if os.getenv("BASIC_MEMORY_HOME")
 76 |         else {},
 77 |         description="Mapping of project names to their filesystem paths",
 78 |     )
 79 |     default_project: str = Field(
 80 |         default="main",
 81 |         description="Name of the default project to use",
 82 |     )
 83 |     default_project_mode: bool = Field(
 84 |         default=False,
 85 |         description="When True, MCP tools automatically use default_project when no project parameter is specified. Enables simplified UX for single-project workflows.",
 86 |     )
 87 | 
 88 |     # overridden by ~/.basic-memory/config.json
 89 |     log_level: str = "INFO"
 90 | 
 91 |     # Database configuration
 92 |     database_backend: DatabaseBackend = Field(
 93 |         default=DatabaseBackend.SQLITE,
 94 |         description="Database backend to use (sqlite or postgres)",
 95 |     )
 96 | 
 97 |     database_url: Optional[str] = Field(
 98 |         default=None,
 99 |         description="Database connection URL. For Postgres, use postgresql+asyncpg://user:pass@host:port/db. If not set, SQLite will use default path.",
100 |     )
101 | 
102 |     # Database connection pool configuration (Postgres only)
103 |     db_pool_size: int = Field(
104 |         default=20,
105 |         description="Number of connections to keep in the pool (Postgres only)",
106 |         gt=0,
107 |     )
108 |     db_pool_overflow: int = Field(
109 |         default=40,
110 |         description="Max additional connections beyond pool_size under load (Postgres only)",
111 |         gt=0,
112 |     )
113 |     db_pool_recycle: int = Field(
114 |         default=180,
115 |         description="Recycle connections after N seconds to prevent stale connections. Default 180s works well with Neon's ~5 minute scale-to-zero (Postgres only)",
116 |         gt=0,
117 |     )
118 | 
119 |     # Watch service configuration
120 |     sync_delay: int = Field(
121 |         default=1000, description="Milliseconds to wait after changes before syncing", gt=0
122 |     )
123 | 
124 |     watch_project_reload_interval: int = Field(
125 |         default=300,
126 |         description="Seconds between reloading project list in watch service. Higher values reduce CPU usage by minimizing watcher restarts. Default 300s (5 min) balances efficiency with responsiveness to new projects.",
127 |         gt=0,
128 |     )
129 | 
130 |     # update permalinks on move
131 |     update_permalinks_on_move: bool = Field(
132 |         default=False,
133 |         description="Whether to update permalinks when files are moved or renamed. default (False)",
134 |     )
135 | 
136 |     sync_changes: bool = Field(
137 |         default=True,
138 |         description="Whether to sync changes in real time. default (True)",
139 |     )
140 | 
141 |     sync_thread_pool_size: int = Field(
142 |         default=4,
143 |         description="Size of thread pool for file I/O operations in sync service. Default of 4 is optimized for cloud deployments with 1-2GB RAM.",
144 |         gt=0,
145 |     )
146 | 
147 |     sync_max_concurrent_files: int = Field(
148 |         default=10,
149 |         description="Maximum number of files to process concurrently during sync. Limits memory usage on large projects (2000+ files). Lower values reduce memory consumption.",
150 |         gt=0,
151 |     )
152 | 
153 |     kebab_filenames: bool = Field(
154 |         default=False,
155 |         description="Format for generated filenames. False preserves spaces and special chars, True converts them to hyphens for consistency with permalinks",
156 |     )
157 | 
158 |     disable_permalinks: bool = Field(
159 |         default=False,
160 |         description="Disable automatic permalink generation in frontmatter. When enabled, new notes won't have permalinks added and sync won't update permalinks. Existing permalinks will still work for reading.",
161 |     )
162 | 
163 |     skip_initialization_sync: bool = Field(
164 |         default=False,
165 |         description="Skip expensive initialization synchronization. Useful for cloud/stateless deployments where project reconciliation is not needed.",
166 |     )
167 | 
168 |     # File formatting configuration
169 |     format_on_save: bool = Field(
170 |         default=False,
171 |         description="Automatically format files after saving using configured formatter. Disabled by default.",
172 |     )
173 | 
174 |     formatter_command: Optional[str] = Field(
175 |         default=None,
176 |         description="External formatter command. Use {file} as placeholder for file path. If not set, uses built-in mdformat (Python, no Node.js required). Set to 'npx prettier --write {file}' for Prettier.",
177 |     )
178 | 
179 |     formatters: Dict[str, str] = Field(
180 |         default_factory=dict,
181 |         description="Per-extension formatters. Keys are extensions (without dot), values are commands. Example: {'md': 'prettier --write {file}', 'json': 'prettier --write {file}'}",
182 |     )
183 | 
184 |     formatter_timeout: float = Field(
185 |         default=5.0,
186 |         description="Maximum seconds to wait for formatter to complete",
187 |         gt=0,
188 |     )
189 | 
190 |     # Project path constraints
191 |     project_root: Optional[str] = Field(
192 |         default=None,
193 |         description="If set, all projects must be created underneath this directory. Paths will be sanitized and constrained to this root. If not set, projects can be created anywhere (default behavior).",
194 |     )
195 | 
196 |     # Cloud configuration
197 |     cloud_client_id: str = Field(
198 |         default="client_01K6KWQPW6J1M8VV7R3TZP5A6M",
199 |         description="OAuth client ID for Basic Memory Cloud",
200 |     )
201 | 
202 |     cloud_domain: str = Field(
203 |         default="https://eloquent-lotus-05.authkit.app",
204 |         description="AuthKit domain for Basic Memory Cloud",
205 |     )
206 | 
207 |     cloud_host: str = Field(
208 |         default_factory=lambda: os.getenv(
209 |             "BASIC_MEMORY_CLOUD_HOST", "https://cloud.basicmemory.com"
210 |         ),
211 |         description="Basic Memory Cloud host URL",
212 |     )
213 | 
214 |     cloud_mode: bool = Field(
215 |         default=False,
216 |         description="Enable cloud mode - all requests go to cloud instead of local (config file value)",
217 |     )
218 | 
219 |     cloud_projects: Dict[str, CloudProjectConfig] = Field(
220 |         default_factory=dict,
221 |         description="Cloud project sync configuration mapping project names to their local paths and sync state",
222 |     )
223 | 
224 |     # Telemetry configuration (Homebrew-style opt-out)
225 |     telemetry_enabled: bool = Field(
226 |         default=True,
227 |         description="Send anonymous usage statistics to help improve Basic Memory. Disable with: bm telemetry disable",
228 |     )
229 | 
230 |     telemetry_notice_shown: bool = Field(
231 |         default=False,
232 |         description="Whether the one-time telemetry notice has been shown to the user",
233 |     )
234 | 
235 |     @property
236 |     def is_test_env(self) -> bool:
237 |         """Check if running in a test environment.
238 | 
239 |         Returns True if any of:
240 |         - env field is set to "test"
241 |         - BASIC_MEMORY_ENV environment variable is "test"
242 |         - PYTEST_CURRENT_TEST environment variable is set (pytest is running)
243 | 
244 |         Used to disable features like telemetry and file watchers during tests.
245 |         """
246 |         return (
247 |             self.env == "test"
248 |             or os.getenv("BASIC_MEMORY_ENV", "").lower() == "test"
249 |             or os.getenv("PYTEST_CURRENT_TEST") is not None
250 |         )
251 | 
252 |     @property
253 |     def cloud_mode_enabled(self) -> bool:
254 |         """Check if cloud mode is enabled.
255 | 
256 |         Priority:
257 |         1. BASIC_MEMORY_CLOUD_MODE environment variable
258 |         2. Config file value (cloud_mode)
259 |         """
260 |         env_value = os.environ.get("BASIC_MEMORY_CLOUD_MODE", "").lower()
261 |         if env_value in ("true", "1", "yes"):
262 |             return True
263 |         elif env_value in ("false", "0", "no"):
264 |             return False
265 |         # Fall back to config file value
266 |         return self.cloud_mode
267 | 
268 |     @classmethod
269 |     def for_cloud_tenant(
270 |         cls,
271 |         database_url: str,
272 |         projects: Optional[Dict[str, str]] = None,
273 |     ) -> "BasicMemoryConfig":
274 |         """Create config for cloud tenant - no config.json, database is source of truth.
275 | 
276 |         This factory method creates a BasicMemoryConfig suitable for cloud deployments
277 |         where:
278 |         - Database is Postgres (Neon), not SQLite
279 |         - Projects are discovered from the database, not config file
280 |         - Path validation is skipped (no local filesystem in cloud)
281 |         - Initialization sync is skipped (stateless deployment)
282 | 
283 |         Args:
284 |             database_url: Postgres connection URL for tenant database
285 |             projects: Optional project mapping (usually empty, discovered from DB)
286 | 
287 |         Returns:
288 |             BasicMemoryConfig configured for cloud mode
289 |         """
290 |         return cls(  # pragma: no cover
291 |             database_backend=DatabaseBackend.POSTGRES,
292 |             database_url=database_url,
293 |             projects=projects or {},
294 |             cloud_mode=True,
295 |             skip_initialization_sync=True,
296 |         )
297 | 
298 |     model_config = SettingsConfigDict(
299 |         env_prefix="BASIC_MEMORY_",
300 |         extra="ignore",
301 |     )
302 | 
303 |     def get_project_path(self, project_name: Optional[str] = None) -> Path:  # pragma: no cover
304 |         """Get the path for a specific project or the default project."""
305 |         name = project_name or self.default_project
306 | 
307 |         if name not in self.projects:
308 |             raise ValueError(f"Project '{name}' not found in configuration")
309 | 
310 |         return Path(self.projects[name])
311 | 
312 |     def model_post_init(self, __context: Any) -> None:
313 |         """Ensure configuration is valid after initialization."""
314 |         # Skip project initialization in cloud mode - projects are discovered from DB
315 |         if self.database_backend == DatabaseBackend.POSTGRES:  # pragma: no cover
316 |             return  # pragma: no cover
317 | 
318 |         # Ensure at least one project exists; if none exist then create main
319 |         if not self.projects:  # pragma: no cover
320 |             self.projects["main"] = str(
321 |                 Path(os.getenv("BASIC_MEMORY_HOME", Path.home() / "basic-memory"))
322 |             )
323 | 
324 |         # Ensure default project is valid (i.e. points to an existing project)
325 |         if self.default_project not in self.projects:  # pragma: no cover
326 |             # Set default to first available project
327 |             self.default_project = next(iter(self.projects.keys()))
328 | 
329 |     @property
330 |     def app_database_path(self) -> Path:
331 |         """Get the path to the app-level database.
332 | 
333 |         This is the single database that will store all knowledge data
334 |         across all projects.
335 |         """
336 |         database_path = Path.home() / DATA_DIR_NAME / APP_DATABASE_NAME
337 |         if not database_path.exists():  # pragma: no cover
338 |             database_path.parent.mkdir(parents=True, exist_ok=True)
339 |             database_path.touch()
340 |         return database_path
341 | 
342 |     @property
343 |     def database_path(self) -> Path:
344 |         """Get SQLite database path.
345 | 
346 |         Rreturns the app-level database path
347 |         for backward compatibility in the codebase.
348 |         """
349 | 
350 |         # Load the app-level database path from the global config
351 |         config_manager = ConfigManager()
352 |         config = config_manager.load_config()  # pragma: no cover
353 |         return config.app_database_path  # pragma: no cover
354 | 
355 |     @property
356 |     def project_list(self) -> List[ProjectConfig]:  # pragma: no cover
357 |         """Get all configured projects as ProjectConfig objects."""
358 |         return [ProjectConfig(name=name, home=Path(path)) for name, path in self.projects.items()]
359 | 
360 |     @model_validator(mode="after")
361 |     def ensure_project_paths_exists(self) -> "BasicMemoryConfig":  # pragma: no cover
362 |         """Ensure project paths exist.
363 | 
364 |         Skips path creation when using Postgres backend (cloud mode) since
365 |         cloud tenants don't use local filesystem paths.
366 |         """
367 |         # Skip path creation for cloud mode - no local filesystem
368 |         if self.database_backend == DatabaseBackend.POSTGRES:
369 |             return self
370 | 
371 |         for name, path_value in self.projects.items():
372 |             path = Path(path_value)
373 |             if not path.exists():
374 |                 try:
375 |                     path.mkdir(parents=True)
376 |                 except Exception as e:
377 |                     logger.error(f"Failed to create project path: {e}")
378 |                     raise e
379 |         return self
380 | 
381 |     @property
382 |     def data_dir_path(self):
383 |         return Path.home() / DATA_DIR_NAME
384 | 
385 | 
386 | # Module-level cache for configuration
387 | _CONFIG_CACHE: Optional[BasicMemoryConfig] = None
388 | 
389 | 
390 | class ConfigManager:
391 |     """Manages Basic Memory configuration."""
392 | 
393 |     def __init__(self) -> None:
394 |         """Initialize the configuration manager."""
395 |         home = os.getenv("HOME", Path.home())
396 |         if isinstance(home, str):
397 |             home = Path(home)
398 | 
399 |         # Allow override via environment variable
400 |         if config_dir := os.getenv("BASIC_MEMORY_CONFIG_DIR"):
401 |             self.config_dir = Path(config_dir)
402 |         else:
403 |             self.config_dir = home / DATA_DIR_NAME
404 | 
405 |         self.config_file = self.config_dir / CONFIG_FILE_NAME
406 | 
407 |         # Ensure config directory exists
408 |         self.config_dir.mkdir(parents=True, exist_ok=True)
409 | 
410 |     @property
411 |     def config(self) -> BasicMemoryConfig:
412 |         """Get configuration, loading it lazily if needed."""
413 |         return self.load_config()
414 | 
415 |     def load_config(self) -> BasicMemoryConfig:
416 |         """Load configuration from file or create default.
417 | 
418 |         Environment variables take precedence over file config values,
419 |         following Pydantic Settings best practices.
420 | 
421 |         Uses module-level cache for performance across ConfigManager instances.
422 |         """
423 |         global _CONFIG_CACHE
424 | 
425 |         # Return cached config if available
426 |         if _CONFIG_CACHE is not None:
427 |             return _CONFIG_CACHE
428 | 
429 |         if self.config_file.exists():
430 |             try:
431 |                 file_data = json.loads(self.config_file.read_text(encoding="utf-8"))
432 | 
433 |                 # First, create config from environment variables (Pydantic will read them)
434 |                 # Then overlay with file data for fields that aren't set via env vars
435 |                 # This ensures env vars take precedence
436 | 
437 |                 # Get env-based config fields that are actually set
438 |                 env_config = BasicMemoryConfig()
439 |                 env_dict = env_config.model_dump()
440 | 
441 |                 # Merge: file data as base, but only use it for fields not set by env
442 |                 # We detect env-set fields by comparing to default values
443 |                 merged_data = file_data.copy()
444 | 
445 |                 # For fields that have env var overrides, use those instead of file values
446 |                 # The env_prefix is "BASIC_MEMORY_" so we check those
447 |                 for field_name in BasicMemoryConfig.model_fields.keys():
448 |                     env_var_name = f"BASIC_MEMORY_{field_name.upper()}"
449 |                     if env_var_name in os.environ:
450 |                         # Environment variable is set, use it
451 |                         merged_data[field_name] = env_dict[field_name]
452 | 
453 |                 _CONFIG_CACHE = BasicMemoryConfig(**merged_data)
454 |                 return _CONFIG_CACHE
455 |             except Exception as e:  # pragma: no cover
456 |                 logger.exception(f"Failed to load config: {e}")
457 |                 raise e
458 |         else:
459 |             config = BasicMemoryConfig()
460 |             self.save_config(config)
461 |             return config
462 | 
463 |     def save_config(self, config: BasicMemoryConfig) -> None:
464 |         """Save configuration to file and invalidate cache."""
465 |         global _CONFIG_CACHE
466 |         save_basic_memory_config(self.config_file, config)
467 |         # Invalidate cache so next load_config() reads fresh data
468 |         _CONFIG_CACHE = None
469 | 
470 |     @property
471 |     def projects(self) -> Dict[str, str]:
472 |         """Get all configured projects."""
473 |         return self.config.projects.copy()
474 | 
475 |     @property
476 |     def default_project(self) -> str:
477 |         """Get the default project name."""
478 |         return self.config.default_project
479 | 
480 |     def add_project(self, name: str, path: str) -> ProjectConfig:
481 |         """Add a new project to the configuration."""
482 |         project_name, _ = self.get_project(name)
483 |         if project_name:  # pragma: no cover
484 |             raise ValueError(f"Project '{name}' already exists")
485 | 
486 |         # Ensure the path exists
487 |         project_path = Path(path)
488 |         project_path.mkdir(parents=True, exist_ok=True)  # pragma: no cover
489 | 
490 |         # Load config, modify it, and save it
491 |         config = self.load_config()
492 |         config.projects[name] = str(project_path)
493 |         self.save_config(config)
494 |         return ProjectConfig(name=name, home=project_path)
495 | 
496 |     def remove_project(self, name: str) -> None:
497 |         """Remove a project from the configuration."""
498 | 
499 |         project_name, path = self.get_project(name)
500 |         if not project_name:  # pragma: no cover
501 |             raise ValueError(f"Project '{name}' not found")
502 | 
503 |         # Load config, check, modify, and save
504 |         config = self.load_config()
505 |         if project_name == config.default_project:  # pragma: no cover
506 |             raise ValueError(f"Cannot remove the default project '{name}'")
507 | 
508 |         # Use the found project_name (which may differ from input name due to permalink matching)
509 |         del config.projects[project_name]
510 |         self.save_config(config)
511 | 
512 |     def set_default_project(self, name: str) -> None:
513 |         """Set the default project."""
514 |         project_name, path = self.get_project(name)
515 |         if not project_name:  # pragma: no cover
516 |             raise ValueError(f"Project '{name}' not found")
517 | 
518 |         # Load config, modify, and save
519 |         config = self.load_config()
520 |         config.default_project = project_name
521 |         self.save_config(config)
522 | 
523 |     def get_project(self, name: str) -> Tuple[str, str] | Tuple[None, None]:
524 |         """Look up a project from the configuration by name or permalink"""
525 |         project_permalink = generate_permalink(name)
526 |         app_config = self.config
527 |         for project_name, path in app_config.projects.items():
528 |             if project_permalink == generate_permalink(project_name):
529 |                 return project_name, path
530 |         return None, None
531 | 
532 | 
533 | def get_project_config(project_name: Optional[str] = None) -> ProjectConfig:
534 |     """
535 |     Get the project configuration for the current session.
536 |     If project_name is provided, it will be used instead of the default project.
537 |     """
538 | 
539 |     actual_project_name = None
540 | 
541 |     # load the config from file
542 |     config_manager = ConfigManager()
543 |     app_config = config_manager.load_config()
544 | 
545 |     # Get project name from environment variable
546 |     os_project_name = os.environ.get("BASIC_MEMORY_PROJECT", None)
547 |     if os_project_name:  # pragma: no cover
548 |         logger.warning(
549 |             f"BASIC_MEMORY_PROJECT is not supported anymore. Set the default project in the config instead. Setting default project to {os_project_name}"
550 |         )
551 |         actual_project_name = project_name
552 |     # if the project_name is passed in, use it
553 |     elif not project_name:
554 |         # use default
555 |         actual_project_name = app_config.default_project
556 |     else:  # pragma: no cover
557 |         actual_project_name = project_name
558 | 
559 |     # the config contains a dict[str,str] of project names and absolute paths
560 |     assert actual_project_name is not None, "actual_project_name cannot be None"
561 | 
562 |     project_permalink = generate_permalink(actual_project_name)
563 | 
564 |     for name, path in app_config.projects.items():
565 |         if project_permalink == generate_permalink(name):
566 |             return ProjectConfig(name=name, home=Path(path))
567 | 
568 |     # otherwise raise error
569 |     raise ValueError(f"Project '{actual_project_name}' not found")  # pragma: no cover
570 | 
571 | 
572 | def save_basic_memory_config(file_path: Path, config: BasicMemoryConfig) -> None:
573 |     """Save configuration to file."""
574 |     try:
575 |         # Use model_dump with mode='json' to serialize datetime objects properly
576 |         config_dict = config.model_dump(mode="json")
577 |         file_path.write_text(json.dumps(config_dict, indent=2))
578 |     except Exception as e:  # pragma: no cover
579 |         logger.error(f"Failed to save config: {e}")
580 | 
581 | 
582 | # Logging initialization functions for different entry points
583 | 
584 | 
585 | def init_cli_logging() -> None:  # pragma: no cover
586 |     """Initialize logging for CLI commands - file only.
587 | 
588 |     CLI commands should not log to stdout to avoid interfering with
589 |     command output and shell integration.
590 |     """
591 |     log_level = os.getenv("BASIC_MEMORY_LOG_LEVEL", "INFO")
592 |     setup_logging(log_level=log_level, log_to_file=True)
593 | 
594 | 
595 | def init_mcp_logging() -> None:  # pragma: no cover
596 |     """Initialize logging for MCP server - file only.
597 | 
598 |     MCP server must not log to stdout as it would corrupt the
599 |     JSON-RPC protocol communication.
600 |     """
601 |     log_level = os.getenv("BASIC_MEMORY_LOG_LEVEL", "INFO")
602 |     setup_logging(log_level=log_level, log_to_file=True)
603 | 
604 | 
605 | def init_api_logging() -> None:  # pragma: no cover
606 |     """Initialize logging for API server.
607 | 
608 |     Cloud mode (BASIC_MEMORY_CLOUD_MODE=1): stdout with structured context
609 |     Local mode: file only
610 |     """
611 |     log_level = os.getenv("BASIC_MEMORY_LOG_LEVEL", "INFO")
612 |     cloud_mode = os.getenv("BASIC_MEMORY_CLOUD_MODE", "").lower() in ("1", "true")
613 |     if cloud_mode:
614 |         setup_logging(log_level=log_level, log_to_stdout=True, structured_context=True)
615 |     else:
616 |         setup_logging(log_level=log_level, log_to_file=True)
617 | 
```

--------------------------------------------------------------------------------
/test-int/mcp/test_project_management_integration.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Integration tests for project_management MCP tools.
  3 | 
  4 | Tests the complete project management workflow: MCP client -> MCP server -> FastAPI -> project service
  5 | """
  6 | 
  7 | import pytest
  8 | from fastmcp import Client
  9 | 
 10 | 
 11 | @pytest.mark.asyncio
 12 | async def test_list_projects_basic_operation(mcp_server, app, test_project):
 13 |     """Test basic list_projects operation showing available projects."""
 14 | 
 15 |     async with Client(mcp_server) as client:
 16 |         # List all available projects
 17 |         list_result = await client.call_tool(
 18 |             "list_memory_projects",
 19 |             {},
 20 |         )
 21 | 
 22 |         # Should return formatted project list
 23 |         assert len(list_result.content) == 1
 24 |         list_text = list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
 25 | 
 26 |         # Should show available projects with new session guidance format
 27 |         assert "Available projects:" in list_text
 28 |         assert "test-project" in list_text  # Our test project
 29 |         # Check for new session guidance instead of CLI default
 30 |         assert "Next: Ask which project to use for this session." in list_text
 31 |         assert "Session reminder: Track the selected project" in list_text
 32 | 
 33 | 
 34 | @pytest.mark.asyncio
 35 | async def test_project_management_workflow(mcp_server, app, test_project):
 36 |     """Test basic project management workflow."""
 37 | 
 38 |     async with Client(mcp_server) as client:
 39 |         # List all projects
 40 |         list_result = await client.call_tool("list_memory_projects", {})
 41 |         assert "Available projects:" in list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
 42 |         assert "test-project" in list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
 43 | 
 44 | 
 45 | @pytest.mark.asyncio
 46 | async def test_project_metadata_consistency(mcp_server, app, test_project):
 47 |     """Test that project management tools work correctly."""
 48 | 
 49 |     async with Client(mcp_server) as client:
 50 |         # Test basic project management tools
 51 | 
 52 |         # list_projects
 53 |         list_result = await client.call_tool("list_memory_projects", {})
 54 |         assert "Available projects:" in list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
 55 |         assert "test-project" in list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
 56 | 
 57 | 
 58 | @pytest.mark.asyncio
 59 | async def test_create_project_basic_operation(mcp_server, app, test_project, tmp_path):
 60 |     """Test creating a new project with basic parameters."""
 61 | 
 62 |     async with Client(mcp_server) as client:
 63 |         # Create a new project
 64 |         create_result = await client.call_tool(
 65 |             "create_memory_project",
 66 |             {
 67 |                 "project_name": "test-new-project",
 68 |                 "project_path": str(
 69 |                     tmp_path.parent / (tmp_path.name + "-projects") / "project-test-new-project"
 70 |                 ),
 71 |             },
 72 |         )
 73 | 
 74 |         assert len(create_result.content) == 1
 75 |         create_text = create_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
 76 | 
 77 |         # Should show success message and project details
 78 |         assert "✓" in create_text  # Success indicator
 79 |         assert "test-new-project" in create_text
 80 |         assert "Project Details:" in create_text
 81 |         assert "Name: test-new-project" in create_text
 82 |         # Check path contains project name (platform-independent)
 83 |         assert "Path:" in create_text and "test-new-project" in create_text
 84 |         assert "Project is now available for use" in create_text
 85 | 
 86 |         # Verify project appears in project list
 87 |         list_result = await client.call_tool("list_memory_projects", {})
 88 |         list_text = list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
 89 |         assert "test-new-project" in list_text
 90 | 
 91 | 
 92 | @pytest.mark.asyncio
 93 | async def test_create_project_with_default_flag(mcp_server, app, test_project, tmp_path):
 94 |     """Test creating a project and setting it as default."""
 95 | 
 96 |     async with Client(mcp_server) as client:
 97 |         # Create a new project and set as default
 98 |         create_result = await client.call_tool(
 99 |             "create_memory_project",
100 |             {
101 |                 "project_name": "test-default-project",
102 |                 "project_path": str(
103 |                     tmp_path.parent / (tmp_path.name + "-projects") / "project-test-default-project"
104 |                 ),
105 |                 "set_default": True,
106 |             },
107 |         )
108 | 
109 |         assert len(create_result.content) == 1
110 |         create_text = create_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
111 | 
112 |         # Should show success and default flag
113 |         assert "✓" in create_text
114 |         assert "test-default-project" in create_text
115 |         assert "Set as default project" in create_text
116 | 
117 |         # Verify the new project is listed
118 |         list_after_create = await client.call_tool("list_memory_projects", {})
119 |         assert "test-default-project" in list_after_create.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
120 | 
121 | 
122 | @pytest.mark.asyncio
123 | async def test_create_project_duplicate_name(mcp_server, app, test_project, tmp_path):
124 |     """Test creating a project with duplicate name shows error."""
125 | 
126 |     async with Client(mcp_server) as client:
127 |         # First create a project
128 |         await client.call_tool(
129 |             "create_memory_project",
130 |             {
131 |                 "project_name": "duplicate-test",
132 |                 "project_path": str(
133 |                     tmp_path.parent / (tmp_path.name + "-projects") / "project-duplicate-test-1"
134 |                 ),
135 |             },
136 |         )
137 | 
138 |         # Try to create another project with same name
139 |         with pytest.raises(Exception) as exc_info:
140 |             await client.call_tool(
141 |                 "create_memory_project",
142 |                 {
143 |                     "project_name": "duplicate-test",
144 |                     "project_path": str(
145 |                         tmp_path.parent / (tmp_path.name + "-projects") / "project-duplicate-test-2"
146 |                     ),
147 |                 },
148 |             )
149 | 
150 |         # Should show error about duplicate name
151 |         error_message = str(exc_info.value)
152 |         assert "create_memory_project" in error_message
153 |         assert (
154 |             "duplicate-test" in error_message
155 |             or "already exists" in error_message
156 |             or "Invalid request" in error_message
157 |         )
158 | 
159 | 
160 | @pytest.mark.asyncio
161 | async def test_delete_project_basic_operation(mcp_server, app, test_project, tmp_path):
162 |     """Test deleting a project that exists."""
163 | 
164 |     async with Client(mcp_server) as client:
165 |         # First create a project to delete
166 |         await client.call_tool(
167 |             "create_memory_project",
168 |             {
169 |                 "project_name": "to-be-deleted",
170 |                 "project_path": str(
171 |                     tmp_path.parent / (tmp_path.name + "-projects") / "project-to-be-deleted"
172 |                 ),
173 |             },
174 |         )
175 | 
176 |         # Verify it exists
177 |         list_result = await client.call_tool("list_memory_projects", {})
178 |         assert "to-be-deleted" in list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
179 | 
180 |         # Delete the project
181 |         delete_result = await client.call_tool(
182 |             "delete_project",
183 |             {
184 |                 "project_name": "to-be-deleted",
185 |             },
186 |         )
187 | 
188 |         assert len(delete_result.content) == 1
189 |         delete_text = delete_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
190 | 
191 |         # Should show success message
192 |         assert "✓" in delete_text
193 |         assert "to-be-deleted" in delete_text
194 |         assert "removed successfully" in delete_text
195 |         assert "Removed project details:" in delete_text
196 |         assert "Name: to-be-deleted" in delete_text
197 |         assert "Files remain on disk but project is no longer tracked" in delete_text
198 | 
199 |         # Verify project no longer appears in list
200 |         list_result_after = await client.call_tool("list_memory_projects", {})
201 |         assert "to-be-deleted" not in list_result_after.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
202 | 
203 | 
204 | @pytest.mark.asyncio
205 | async def test_delete_project_not_found(mcp_server, app, test_project):
206 |     """Test deleting a non-existent project shows error."""
207 | 
208 |     async with Client(mcp_server) as client:
209 |         # Try to delete non-existent project
210 |         with pytest.raises(Exception) as exc_info:
211 |             await client.call_tool(
212 |                 "delete_project",
213 |                 {
214 |                     "project_name": "non-existent-project",
215 |                 },
216 |             )
217 | 
218 |         # Should show error about non-existent project
219 |         error_message = str(exc_info.value)
220 |         assert "delete_project" in error_message
221 |         assert (
222 |             "non-existent-project" in error_message
223 |             or "not found" in error_message
224 |             or "Invalid request" in error_message
225 |         )
226 | 
227 | 
228 | @pytest.mark.asyncio
229 | async def test_delete_current_project_protection(mcp_server, app, test_project):
230 |     """Test that deleting the current project is prevented."""
231 | 
232 |     async with Client(mcp_server) as client:
233 |         # Try to delete the current project (test-project)
234 |         with pytest.raises(Exception) as exc_info:
235 |             await client.call_tool(
236 |                 "delete_project",
237 |                 {
238 |                     "project_name": "test-project",
239 |                 },
240 |             )
241 | 
242 |         # Should show error about deleting current project
243 |         error_message = str(exc_info.value)
244 |         assert "delete_project" in error_message
245 |         assert (
246 |             "currently active" in error_message
247 |             or "test-project" in error_message
248 |             or "Switch to a different project" in error_message
249 |         )
250 | 
251 | 
252 | @pytest.mark.asyncio
253 | async def test_project_lifecycle_workflow(mcp_server, app, test_project, tmp_path):
254 |     """Test complete project lifecycle: create, switch, use, delete."""
255 | 
256 |     async with Client(mcp_server) as client:
257 |         project_name = "lifecycle-test"
258 |         project_path = str(
259 |             tmp_path.parent / (tmp_path.name + "-projects") / "project-lifecycle-test"
260 |         )
261 | 
262 |         # 1. Create new project
263 |         create_result = await client.call_tool(
264 |             "create_memory_project",
265 |             {
266 |                 "project_name": project_name,
267 |                 "project_path": project_path,
268 |             },
269 |         )
270 |         assert "✓" in create_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
271 |         assert project_name in create_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
272 | 
273 |         # 2. Create content in the new project
274 |         await client.call_tool(
275 |             "write_note",
276 |             {
277 |                 "project": project_name,
278 |                 "title": "Lifecycle Test Note",
279 |                 "folder": "test",
280 |                 "content": "# Lifecycle Test\\n\\nThis note tests the project lifecycle.\\n\\n- [test] Lifecycle testing",
281 |                 "tags": "lifecycle,test",
282 |             },
283 |         )
284 | 
285 |         # 3. Verify the project exists in the list
286 |         list_with_content = await client.call_tool("list_memory_projects", {})
287 |         assert project_name in list_with_content.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
288 | 
289 |         # 4. Verify we can still access the original test project
290 |         test_list = await client.call_tool("list_memory_projects", {})
291 |         assert "test-project" in test_list.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
292 | 
293 |         # 5. Delete the lifecycle test project
294 |         delete_result = await client.call_tool(
295 |             "delete_project",
296 |             {
297 |                 "project_name": project_name,
298 |             },
299 |         )
300 |         assert "✓" in delete_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
301 |         assert f"{project_name}" in delete_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
302 |         assert "removed successfully" in delete_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
303 | 
304 |         # 6. Verify project is gone from list
305 |         list_result = await client.call_tool("list_memory_projects", {})
306 |         assert project_name not in list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
307 | 
308 | 
309 | @pytest.mark.asyncio
310 | async def test_create_delete_project_edge_cases(mcp_server, app, test_project, tmp_path):
311 |     """Test edge cases for create and delete project operations."""
312 | 
313 |     async with Client(mcp_server) as client:
314 |         # Test with special characters and spaces in project name (should be handled gracefully)
315 |         special_name = "test project with spaces & symbols!"
316 | 
317 |         # Create project with special characters
318 |         create_result = await client.call_tool(
319 |             "create_memory_project",
320 |             {
321 |                 "project_name": special_name,
322 |                 "project_path": str(
323 |                     tmp_path.parent
324 |                     / (tmp_path.name + "-projects")
325 |                     / "project-test-project-with-special-chars"
326 |                 ),
327 |             },
328 |         )
329 |         assert "✓" in create_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
330 |         assert special_name in create_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
331 | 
332 |         # Verify it appears in list
333 |         list_result = await client.call_tool("list_memory_projects", {})
334 |         assert special_name in list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
335 | 
336 |         # Delete it
337 |         delete_result = await client.call_tool(
338 |             "delete_project",
339 |             {
340 |                 "project_name": special_name,
341 |             },
342 |         )
343 |         assert "✓" in delete_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
344 |         assert special_name in delete_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
345 | 
346 |         # Verify it's gone
347 |         list_result_after = await client.call_tool("list_memory_projects", {})
348 |         assert special_name not in list_result_after.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
349 | 
350 | 
351 | @pytest.mark.asyncio
352 | async def test_case_insensitive_project_switching(mcp_server, app, test_project, tmp_path):
353 |     """Test case-insensitive project switching with proper database lookup."""
354 | 
355 |     async with Client(mcp_server) as client:
356 |         # Create a project with mixed case name
357 |         project_name = "Personal-Project"
358 |         create_result = await client.call_tool(
359 |             "create_memory_project",
360 |             {
361 |                 "project_name": project_name,
362 |                 "project_path": str(
363 |                     tmp_path.parent / (tmp_path.name + "-projects") / f"project-{project_name}"
364 |                 ),
365 |             },
366 |         )
367 |         assert "✓" in create_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
368 |         assert project_name in create_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
369 | 
370 |         # Verify project was created with canonical name
371 |         list_result = await client.call_tool("list_memory_projects", {})
372 |         assert project_name in list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
373 | 
374 |         # Test with different case variations
375 |         test_cases = [
376 |             "personal-project",  # all lowercase
377 |             "PERSONAL-PROJECT",  # all uppercase
378 |             "Personal-project",  # mixed case 1
379 |             "personal-Project",  # mixed case 2
380 |         ]
381 | 
382 |         # Test that project operations work with case-insensitive input
383 |         # (Project creation is case-preserving but operations can use different cases)
384 | 
385 |         # Test that we can reference the project with different cases in operations
386 |         for test_input in test_cases:
387 |             # Test write_note with case-insensitive project reference
388 |             write_result = await client.call_tool(
389 |                 "write_note",
390 |                 {
391 |                     "project": test_input,  # Use different case
392 |                     "title": f"Case Test {test_input}",
393 |                     "folder": "case-test",
394 |                     "content": f"# Case Test\n\nTesting with {test_input}",
395 |                 },
396 |             )
397 |             assert len(write_result.content) == 1
398 |             assert f"Case Test {test_input}".lower() in write_result.content[0].text.lower()  # pyright: ignore [reportAttributeAccessIssue]
399 | 
400 |         # Clean up
401 |         await client.call_tool("delete_project", {"project_name": project_name})
402 | 
403 | 
404 | @pytest.mark.asyncio
405 | async def test_case_insensitive_project_operations(mcp_server, app, test_project, tmp_path):
406 |     """Test that all project operations work correctly after case-insensitive switching."""
407 | 
408 |     async with Client(mcp_server) as client:
409 |         # Create a project with capital letters
410 |         project_name = "CamelCase-Project"
411 |         create_result = await client.call_tool(
412 |             "create_memory_project",
413 |             {
414 |                 "project_name": project_name,
415 |                 "project_path": str(
416 |                     tmp_path.parent / (tmp_path.name + "-projects") / f"project-{project_name}"
417 |                 ),
418 |             },
419 |         )
420 |         assert "✓" in create_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
421 | 
422 |         # Test that MCP operations work correctly with the project
423 | 
424 |         # 1. Create a note in the project
425 |         write_result = await client.call_tool(
426 |             "write_note",
427 |             {
428 |                 "project": project_name,
429 |                 "title": "Case Test Note",
430 |                 "folder": "case-test",
431 |                 "content": "# Case Test Note\n\nTesting case-insensitive operations.\n\n- [test] Case insensitive switch\n- relates_to [[Another Note]]",
432 |                 "tags": "case,test",
433 |             },
434 |         )
435 |         assert len(write_result.content) == 1
436 |         assert "Case Test Note" in write_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
437 | 
438 |         # 2. Test search works in the project
439 |         search_result = await client.call_tool(
440 |             "search_notes",
441 |             {"project": project_name, "query": "case insensitive"},
442 |         )
443 |         assert len(search_result.content) == 1
444 |         assert "Case Test Note" in search_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
445 | 
446 |         # 3. Test read_note works
447 |         read_result = await client.call_tool(
448 |             "read_note",
449 |             {"project": project_name, "identifier": "Case Test Note"},
450 |         )
451 |         assert len(read_result.content) == 1
452 |         assert "Case Test Note" in read_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
453 |         assert "case insensitive" in read_result.content[0].text.lower()  # pyright: ignore [reportAttributeAccessIssue]
454 | 
455 |         # Clean up
456 |         await client.call_tool("delete_project", {"project_name": project_name})
457 | 
458 | 
459 | @pytest.mark.asyncio
460 | async def test_case_insensitive_error_handling(mcp_server, app, test_project):
461 |     """Test error handling for case-insensitive project operations."""
462 | 
463 |     async with Client(mcp_server) as client:
464 |         # Test non-existent project with various cases
465 |         non_existent_cases = [
466 |             "NonExistent",
467 |             "non-existent",
468 |             "NON-EXISTENT",
469 |             "Non-Existent-Project",
470 |         ]
471 | 
472 |         # Test that operations fail gracefully with non-existent projects
473 |         for test_case in non_existent_cases:
474 |             # Test that write_note fails with non-existent project
475 |             with pytest.raises(Exception):
476 |                 await client.call_tool(
477 |                     "write_note",
478 |                     {
479 |                         "project": test_case,
480 |                         "title": "Test Note",
481 |                         "folder": "test",
482 |                         "content": "# Test\n\nTest content.",
483 |                     },
484 |                 )
485 | 
486 | 
487 | @pytest.mark.asyncio
488 | async def test_case_preservation_in_project_list(mcp_server, app, test_project, tmp_path):
489 |     """Test that project names preserve their original case in listings."""
490 | 
491 |     async with Client(mcp_server) as client:
492 |         # Create projects with different casing patterns
493 |         test_projects = [
494 |             "lowercase-project",
495 |             "UPPERCASE-PROJECT",
496 |             "CamelCase-Project",
497 |             "Mixed-CASE-project",
498 |         ]
499 | 
500 |         # Create all test projects
501 |         for project_name in test_projects:
502 |             await client.call_tool(
503 |                 "create_memory_project",
504 |                 {
505 |                     "project_name": project_name,
506 |                     "project_path": str(
507 |                         tmp_path.parent / (tmp_path.name + "-projects") / f"project-{project_name}"
508 |                     ),
509 |                 },
510 |             )
511 | 
512 |         # List projects and verify each appears with its original case
513 |         list_result = await client.call_tool("list_memory_projects", {})
514 |         list_text = list_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
515 | 
516 |         for project_name in test_projects:
517 |             assert project_name in list_text, f"Project {project_name} not found in list"
518 | 
519 |         # Test each project with exact case (projects are case-sensitive)
520 |         for project_name in test_projects:
521 |             # Test write_note with exact project name
522 |             write_result = await client.call_tool(
523 |                 "write_note",
524 |                 {
525 |                     "project": project_name,  # Use exact project name
526 |                     "title": f"Test Note {project_name}",
527 |                     "folder": "test",
528 |                     "content": f"# Test\n\nTesting {project_name}",
529 |                 },
530 |             )
531 |             assert len(write_result.content) == 1
532 |             result_text = write_result.content[0].text  # pyright: ignore [reportAttributeAccessIssue]
533 |             assert "successfully" in result_text.lower() or "created" in result_text.lower()
534 | 
535 |         # Clean up - delete test projects
536 |         for project_name in test_projects:
537 |             await client.call_tool("delete_project", {"project_name": project_name})
538 | 
539 | 
540 | @pytest.mark.asyncio
541 | async def test_nested_project_paths_rejected(mcp_server, app, test_project, tmp_path):
542 |     """Test that creating nested project paths is rejected with clear error message."""
543 | 
544 |     async with Client(mcp_server) as client:
545 |         # Create a parent project
546 |         parent_name = "parent-project"
547 |         parent_path = str(
548 |             tmp_path.parent / (tmp_path.name + "-projects") / "project-nested-test/parent"
549 |         )
550 | 
551 |         await client.call_tool(
552 |             "create_memory_project",
553 |             {
554 |                 "project_name": parent_name,
555 |                 "project_path": parent_path,
556 |             },
557 |         )
558 | 
559 |         # Try to create a child project nested under the parent
560 |         child_name = "child-project"
561 |         child_path = str(
562 |             tmp_path.parent / (tmp_path.name + "-projects") / "project-nested-test/parent/child"
563 |         )
564 | 
565 |         with pytest.raises(Exception) as exc_info:
566 |             await client.call_tool(
567 |                 "create_memory_project",
568 |                 {
569 |                     "project_name": child_name,
570 |                     "project_path": child_path,
571 |                 },
572 |             )
573 | 
574 |         # Verify error message mentions nested paths
575 |         error_message = str(exc_info.value)
576 |         assert "nested" in error_message.lower()
577 |         assert parent_name in error_message or parent_path in error_message
578 | 
579 |         # Clean up parent project
580 |         await client.call_tool("delete_project", {"project_name": parent_name})
581 | 
```

--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------

```python
  1 | """Test configuration management."""
  2 | 
  3 | import tempfile
  4 | import pytest
  5 | from datetime import datetime
  6 | 
  7 | from basic_memory.config import BasicMemoryConfig, CloudProjectConfig, ConfigManager
  8 | from pathlib import Path
  9 | 
 10 | 
 11 | class TestBasicMemoryConfig:
 12 |     """Test BasicMemoryConfig behavior with BASIC_MEMORY_HOME environment variable."""
 13 | 
 14 |     def test_default_behavior_without_basic_memory_home(self, config_home, monkeypatch):
 15 |         """Test that config uses default path when BASIC_MEMORY_HOME is not set."""
 16 |         # Ensure BASIC_MEMORY_HOME is not set
 17 |         monkeypatch.delenv("BASIC_MEMORY_HOME", raising=False)
 18 | 
 19 |         config = BasicMemoryConfig()
 20 | 
 21 |         # Should use the default path (home/basic-memory)
 22 |         expected_path = config_home / "basic-memory"
 23 |         assert Path(config.projects["main"]) == expected_path
 24 | 
 25 |     def test_respects_basic_memory_home_environment_variable(self, config_home, monkeypatch):
 26 |         """Test that config respects BASIC_MEMORY_HOME environment variable."""
 27 |         custom_path = config_home / "app" / "data"
 28 |         monkeypatch.setenv("BASIC_MEMORY_HOME", str(custom_path))
 29 | 
 30 |         config = BasicMemoryConfig()
 31 | 
 32 |         # Should use the custom path from environment variable
 33 |         assert Path(config.projects["main"]) == custom_path
 34 | 
 35 |     def test_model_post_init_respects_basic_memory_home_creates_main(
 36 |         self, config_home, monkeypatch
 37 |     ):
 38 |         """Test that model_post_init creates main project with BASIC_MEMORY_HOME when missing and no other projects."""
 39 |         custom_path = config_home / "custom" / "memory" / "path"
 40 |         monkeypatch.setenv("BASIC_MEMORY_HOME", str(custom_path))
 41 | 
 42 |         # Create config without main project
 43 |         config = BasicMemoryConfig()
 44 | 
 45 |         # model_post_init should have added main project with BASIC_MEMORY_HOME
 46 |         assert "main" in config.projects
 47 |         assert Path(config.projects["main"]) == custom_path
 48 | 
 49 |     def test_model_post_init_respects_basic_memory_home_sets_non_main_default(
 50 |         self, config_home, monkeypatch
 51 |     ):
 52 |         """Test that model_post_init does not create main project with BASIC_MEMORY_HOME when another project exists."""
 53 |         custom_path = config_home / "custom" / "memory" / "path"
 54 |         monkeypatch.setenv("BASIC_MEMORY_HOME", str(custom_path))
 55 | 
 56 |         # Create config without main project
 57 |         other_path = config_home / "some" / "path"
 58 |         config = BasicMemoryConfig(projects={"other": str(other_path)})
 59 | 
 60 |         # model_post_init should not add main project with BASIC_MEMORY_HOME
 61 |         assert "main" not in config.projects
 62 |         assert Path(config.projects["other"]) == other_path
 63 | 
 64 |     def test_model_post_init_fallback_without_basic_memory_home(self, config_home, monkeypatch):
 65 |         """Test that model_post_init can set a non-main default when BASIC_MEMORY_HOME is not set."""
 66 |         # Ensure BASIC_MEMORY_HOME is not set
 67 |         monkeypatch.delenv("BASIC_MEMORY_HOME", raising=False)
 68 | 
 69 |         # Create config without main project
 70 |         other_path = config_home / "some" / "path"
 71 |         config = BasicMemoryConfig(projects={"other": str(other_path)})
 72 | 
 73 |         # model_post_init should not add main project, but "other" should now be the default
 74 |         assert "main" not in config.projects
 75 |         assert Path(config.projects["other"]) == other_path
 76 | 
 77 |     def test_basic_memory_home_with_relative_path(self, config_home, monkeypatch):
 78 |         """Test that BASIC_MEMORY_HOME works with relative paths."""
 79 |         relative_path = "relative/memory/path"
 80 |         monkeypatch.setenv("BASIC_MEMORY_HOME", relative_path)
 81 | 
 82 |         config = BasicMemoryConfig()
 83 | 
 84 |         # Should normalize to platform-native path format
 85 |         assert Path(config.projects["main"]) == Path(relative_path)
 86 | 
 87 |     def test_basic_memory_home_overrides_existing_main_project(self, config_home, monkeypatch):
 88 |         """Test that BASIC_MEMORY_HOME is not used when a map is passed in the constructor."""
 89 |         custom_path = str(config_home / "override" / "memory" / "path")
 90 |         monkeypatch.setenv("BASIC_MEMORY_HOME", custom_path)
 91 | 
 92 |         # Try to create config with a different main project path
 93 |         original_path = str(config_home / "original" / "path")
 94 |         config = BasicMemoryConfig(projects={"main": original_path})
 95 | 
 96 |         # The default_factory should override with BASIC_MEMORY_HOME value
 97 |         # Note: This tests the current behavior where default_factory takes precedence
 98 |         assert config.projects["main"] == original_path
 99 | 
100 | 
101 | class TestConfigManager:
102 |     """Test ConfigManager functionality."""
103 | 
104 |     @pytest.fixture
105 |     def temp_config_manager(self):
106 |         """Create a ConfigManager with temporary config file."""
107 |         with tempfile.TemporaryDirectory() as temp_dir:
108 |             temp_path = Path(temp_dir)
109 | 
110 |             # Create a test ConfigManager instance
111 |             config_manager = ConfigManager()
112 |             # Override config paths to use temp directory
113 |             config_manager.config_dir = temp_path / "basic-memory"
114 |             config_manager.config_file = config_manager.config_dir / "config.yaml"
115 |             config_manager.config_dir.mkdir(parents=True, exist_ok=True)
116 | 
117 |             # Create initial config with test projects
118 |             test_config = BasicMemoryConfig(
119 |                 default_project="main",
120 |                 projects={
121 |                     "main": str(temp_path / "main"),
122 |                     "test-project": str(temp_path / "test"),
123 |                     "special-chars": str(
124 |                         temp_path / "special"
125 |                     ),  # This will be the config key for "Special/Chars"
126 |                 },
127 |             )
128 |             config_manager.save_config(test_config)
129 | 
130 |             yield config_manager
131 | 
132 |     def test_set_default_project_with_exact_name_match(self, temp_config_manager):
133 |         """Test set_default_project when project name matches config key exactly."""
134 |         config_manager = temp_config_manager
135 | 
136 |         # Set default to a project that exists with exact name match
137 |         config_manager.set_default_project("test-project")
138 | 
139 |         # Verify the config was updated
140 |         config = config_manager.load_config()
141 |         assert config.default_project == "test-project"
142 | 
143 |     def test_set_default_project_with_permalink_lookup(self, temp_config_manager):
144 |         """Test set_default_project when input needs permalink normalization."""
145 |         config_manager = temp_config_manager
146 | 
147 |         # Simulate a project that was created with special characters
148 |         # The config key would be the permalink, but user might type the original name
149 | 
150 |         # First add a project with original name that gets normalized
151 |         config = config_manager.load_config()
152 |         config.projects["special-chars-project"] = str(Path("/tmp/special"))
153 |         config_manager.save_config(config)
154 | 
155 |         # Now test setting default using a name that will normalize to the config key
156 |         config_manager.set_default_project(
157 |             "Special Chars Project"
158 |         )  # This should normalize to "special-chars-project"
159 | 
160 |         # Verify the config was updated with the correct config key
161 |         updated_config = config_manager.load_config()
162 |         assert updated_config.default_project == "special-chars-project"
163 | 
164 |     def test_set_default_project_uses_canonical_name(self, temp_config_manager):
165 |         """Test that set_default_project uses the canonical config key, not user input."""
166 |         config_manager = temp_config_manager
167 | 
168 |         # Add a project with a config key that differs from user input
169 |         config = config_manager.load_config()
170 |         config.projects["my-test-project"] = str(Path("/tmp/mytest"))
171 |         config_manager.save_config(config)
172 | 
173 |         # Set default using input that will match but is different from config key
174 |         config_manager.set_default_project("My Test Project")  # Should find "my-test-project"
175 | 
176 |         # Verify that the canonical config key is used, not the user input
177 |         updated_config = config_manager.load_config()
178 |         assert updated_config.default_project == "my-test-project"
179 |         # Should NOT be the user input
180 |         assert updated_config.default_project != "My Test Project"
181 | 
182 |     def test_set_default_project_nonexistent_project(self, temp_config_manager):
183 |         """Test set_default_project raises ValueError for nonexistent project."""
184 |         config_manager = temp_config_manager
185 | 
186 |         with pytest.raises(ValueError, match="Project 'nonexistent' not found"):
187 |             config_manager.set_default_project("nonexistent")
188 | 
189 |     def test_disable_permalinks_flag_default(self):
190 |         """Test that disable_permalinks flag defaults to False."""
191 |         config = BasicMemoryConfig()
192 |         assert config.disable_permalinks is False
193 | 
194 |     def test_disable_permalinks_flag_can_be_enabled(self):
195 |         """Test that disable_permalinks flag can be set to True."""
196 |         config = BasicMemoryConfig(disable_permalinks=True)
197 |         assert config.disable_permalinks is True
198 | 
199 |     def test_config_manager_respects_custom_config_dir(self, monkeypatch):
200 |         """Test that ConfigManager respects BASIC_MEMORY_CONFIG_DIR environment variable."""
201 |         with tempfile.TemporaryDirectory() as temp_dir:
202 |             custom_config_dir = Path(temp_dir) / "custom" / "config"
203 |             monkeypatch.setenv("BASIC_MEMORY_CONFIG_DIR", str(custom_config_dir))
204 | 
205 |             config_manager = ConfigManager()
206 | 
207 |             # Verify config_dir is set to the custom path
208 |             assert config_manager.config_dir == custom_config_dir
209 |             # Verify config_file is in the custom directory
210 |             assert config_manager.config_file == custom_config_dir / "config.json"
211 |             # Verify the directory was created
212 |             assert config_manager.config_dir.exists()
213 | 
214 |     def test_config_manager_default_without_custom_config_dir(self, config_home, monkeypatch):
215 |         """Test that ConfigManager uses default location when BASIC_MEMORY_CONFIG_DIR is not set."""
216 |         monkeypatch.delenv("BASIC_MEMORY_CONFIG_DIR", raising=False)
217 | 
218 |         config_manager = ConfigManager()
219 | 
220 |         # Should use default location
221 |         assert config_manager.config_dir == config_home / ".basic-memory"
222 |         assert config_manager.config_file == config_home / ".basic-memory" / "config.json"
223 | 
224 |     def test_remove_project_with_exact_name_match(self, temp_config_manager):
225 |         """Test remove_project when project name matches config key exactly."""
226 |         config_manager = temp_config_manager
227 | 
228 |         # Verify project exists
229 |         config = config_manager.load_config()
230 |         assert "test-project" in config.projects
231 | 
232 |         # Remove the project with exact name match
233 |         config_manager.remove_project("test-project")
234 | 
235 |         # Verify the project was removed
236 |         config = config_manager.load_config()
237 |         assert "test-project" not in config.projects
238 | 
239 |     def test_remove_project_with_permalink_lookup(self, temp_config_manager):
240 |         """Test remove_project when input needs permalink normalization."""
241 |         config_manager = temp_config_manager
242 | 
243 |         # Add a project with normalized key
244 |         config = config_manager.load_config()
245 |         config.projects["special-chars-project"] = str(Path("/tmp/special"))
246 |         config_manager.save_config(config)
247 | 
248 |         # Remove using a name that will normalize to the config key
249 |         config_manager.remove_project(
250 |             "Special Chars Project"
251 |         )  # This should normalize to "special-chars-project"
252 | 
253 |         # Verify the project was removed using the correct config key
254 |         updated_config = config_manager.load_config()
255 |         assert "special-chars-project" not in updated_config.projects
256 | 
257 |     def test_remove_project_uses_canonical_name(self, temp_config_manager):
258 |         """Test that remove_project uses the canonical config key, not user input."""
259 |         config_manager = temp_config_manager
260 | 
261 |         # Add a project with a config key that differs from user input
262 |         config = config_manager.load_config()
263 |         config.projects["my-test-project"] = str(Path("/tmp/mytest"))
264 |         config_manager.save_config(config)
265 | 
266 |         # Remove using input that will match but is different from config key
267 |         config_manager.remove_project("My Test Project")  # Should find "my-test-project"
268 | 
269 |         # Verify that the canonical config key was removed
270 |         updated_config = config_manager.load_config()
271 |         assert "my-test-project" not in updated_config.projects
272 | 
273 |     def test_remove_project_nonexistent_project(self, temp_config_manager):
274 |         """Test remove_project raises ValueError for nonexistent project."""
275 |         config_manager = temp_config_manager
276 | 
277 |         with pytest.raises(ValueError, match="Project 'nonexistent' not found"):
278 |             config_manager.remove_project("nonexistent")
279 | 
280 |     def test_remove_project_cannot_remove_default(self, temp_config_manager):
281 |         """Test remove_project raises ValueError when trying to remove default project."""
282 |         config_manager = temp_config_manager
283 | 
284 |         # Try to remove the default project
285 |         with pytest.raises(ValueError, match="Cannot remove the default project"):
286 |             config_manager.remove_project("main")
287 | 
288 |     def test_config_with_cloud_projects_empty_by_default(self, temp_config_manager):
289 |         """Test that cloud_projects field exists and defaults to empty dict."""
290 |         config_manager = temp_config_manager
291 |         config = config_manager.load_config()
292 | 
293 |         assert hasattr(config, "cloud_projects")
294 |         assert config.cloud_projects == {}
295 | 
296 |     def test_save_and_load_config_with_cloud_projects(self):
297 |         """Test that config with cloud_projects can be saved and loaded."""
298 |         with tempfile.TemporaryDirectory() as temp_dir:
299 |             temp_path = Path(temp_dir)
300 | 
301 |             config_manager = ConfigManager()
302 |             config_manager.config_dir = temp_path / "basic-memory"
303 |             config_manager.config_file = config_manager.config_dir / "config.json"
304 |             config_manager.config_dir.mkdir(parents=True, exist_ok=True)
305 | 
306 |             # Create config with cloud_projects
307 |             now = datetime.now()
308 |             test_config = BasicMemoryConfig(
309 |                 projects={"main": str(temp_path / "main")},
310 |                 cloud_projects={
311 |                     "research": CloudProjectConfig(
312 |                         local_path=str(temp_path / "research-local"),
313 |                         last_sync=now,
314 |                         bisync_initialized=True,
315 |                     )
316 |                 },
317 |             )
318 |             config_manager.save_config(test_config)
319 | 
320 |             # Load and verify
321 |             loaded_config = config_manager.load_config()
322 |             assert "research" in loaded_config.cloud_projects
323 |             assert loaded_config.cloud_projects["research"].local_path == str(
324 |                 temp_path / "research-local"
325 |             )
326 |             assert loaded_config.cloud_projects["research"].bisync_initialized is True
327 |             assert loaded_config.cloud_projects["research"].last_sync == now
328 | 
329 |     def test_add_cloud_project_to_existing_config(self):
330 |         """Test adding cloud projects to an existing config file."""
331 |         with tempfile.TemporaryDirectory() as temp_dir:
332 |             temp_path = Path(temp_dir)
333 | 
334 |             config_manager = ConfigManager()
335 |             config_manager.config_dir = temp_path / "basic-memory"
336 |             config_manager.config_file = config_manager.config_dir / "config.json"
337 |             config_manager.config_dir.mkdir(parents=True, exist_ok=True)
338 | 
339 |             # Create initial config without cloud projects
340 |             initial_config = BasicMemoryConfig(projects={"main": str(temp_path / "main")})
341 |             config_manager.save_config(initial_config)
342 | 
343 |             # Load, modify, and save
344 |             config = config_manager.load_config()
345 |             assert config.cloud_projects == {}
346 | 
347 |             config.cloud_projects["work"] = CloudProjectConfig(
348 |                 local_path=str(temp_path / "work-local")
349 |             )
350 |             config_manager.save_config(config)
351 | 
352 |             # Reload and verify persistence
353 |             reloaded_config = config_manager.load_config()
354 |             assert "work" in reloaded_config.cloud_projects
355 |             assert reloaded_config.cloud_projects["work"].local_path == str(
356 |                 temp_path / "work-local"
357 |             )
358 |             assert reloaded_config.cloud_projects["work"].bisync_initialized is False
359 | 
360 |     def test_backward_compatibility_loading_config_without_cloud_projects(self):
361 |         """Test that old config files without cloud_projects field can be loaded."""
362 |         with tempfile.TemporaryDirectory() as temp_dir:
363 |             temp_path = Path(temp_dir)
364 | 
365 |             config_manager = ConfigManager()
366 |             config_manager.config_dir = temp_path / "basic-memory"
367 |             config_manager.config_file = config_manager.config_dir / "config.json"
368 |             config_manager.config_dir.mkdir(parents=True, exist_ok=True)
369 | 
370 |             # Manually write old-style config without cloud_projects
371 |             import json
372 | 
373 |             old_config_data = {
374 |                 "env": "dev",
375 |                 "projects": {"main": str(temp_path / "main")},
376 |                 "default_project": "main",
377 |                 "log_level": "INFO",
378 |             }
379 |             config_manager.config_file.write_text(json.dumps(old_config_data, indent=2))
380 | 
381 |             # Clear the config cache to ensure we load from the temp file
382 |             import basic_memory.config
383 | 
384 |             basic_memory.config._CONFIG_CACHE = None
385 | 
386 |             # Should load successfully with cloud_projects defaulting to empty dict
387 |             config = config_manager.load_config()
388 |             assert config.cloud_projects == {}
389 |             assert config.projects == {"main": str(temp_path / "main")}
390 | 
391 | 
392 | class TestPlatformNativePathSeparators:
393 |     """Test that config uses platform-native path separators."""
394 | 
395 |     def test_project_paths_use_platform_native_separators_in_config(self, monkeypatch):
396 |         """Test that project paths use platform-native separators when created."""
397 |         import platform
398 | 
399 |         with tempfile.TemporaryDirectory() as temp_dir:
400 |             temp_path = Path(temp_dir)
401 | 
402 |             # Set up ConfigManager with temp directory
403 |             config_manager = ConfigManager()
404 |             config_manager.config_dir = temp_path / "basic-memory"
405 |             config_manager.config_file = config_manager.config_dir / "config.json"
406 |             config_manager.config_dir.mkdir(parents=True, exist_ok=True)
407 | 
408 |             # Create a project path
409 |             project_path = temp_path / "my" / "project"
410 |             project_path.mkdir(parents=True, exist_ok=True)
411 | 
412 |             # Add project via ConfigManager
413 |             config = BasicMemoryConfig(projects={})
414 |             config.projects["test-project"] = str(project_path)
415 |             config_manager.save_config(config)
416 | 
417 |             # Read the raw JSON file
418 |             import json
419 | 
420 |             config_data = json.loads(config_manager.config_file.read_text())
421 | 
422 |             # Verify path uses platform-native separators
423 |             saved_path = config_data["projects"]["test-project"]
424 | 
425 |             # On Windows, should have backslashes; on Unix, forward slashes
426 |             if platform.system() == "Windows":
427 |                 # Windows paths should contain backslashes
428 |                 assert "\\" in saved_path or ":" in saved_path  # C:\\ or \\UNC
429 |                 assert "/" not in saved_path.replace(":/", "")  # Exclude drive letter
430 |             else:
431 |                 # Unix paths should use forward slashes
432 |                 assert "/" in saved_path
433 |                 # Should not force POSIX on non-Windows
434 |                 assert saved_path == str(project_path)
435 | 
436 |     def test_add_project_uses_platform_native_separators(self, monkeypatch):
437 |         """Test that ConfigManager.add_project() uses platform-native separators."""
438 |         import platform
439 | 
440 |         with tempfile.TemporaryDirectory() as temp_dir:
441 |             temp_path = Path(temp_dir)
442 | 
443 |             # Set up ConfigManager
444 |             config_manager = ConfigManager()
445 |             config_manager.config_dir = temp_path / "basic-memory"
446 |             config_manager.config_file = config_manager.config_dir / "config.json"
447 |             config_manager.config_dir.mkdir(parents=True, exist_ok=True)
448 | 
449 |             # Initialize with empty projects
450 |             initial_config = BasicMemoryConfig(projects={})
451 |             config_manager.save_config(initial_config)
452 | 
453 |             # Add project
454 |             project_path = temp_path / "new" / "project"
455 |             config_manager.add_project("new-project", str(project_path))
456 | 
457 |             # Load and verify
458 |             config = config_manager.load_config()
459 |             saved_path = config.projects["new-project"]
460 | 
461 |             # Verify platform-native separators
462 |             if platform.system() == "Windows":
463 |                 assert "\\" in saved_path or ":" in saved_path
464 |             else:
465 |                 assert "/" in saved_path
466 |                 assert saved_path == str(project_path)
467 | 
468 |     def test_model_post_init_uses_platform_native_separators(self, config_home, monkeypatch):
469 |         """Test that model_post_init uses platform-native separators."""
470 |         import platform
471 | 
472 |         monkeypatch.delenv("BASIC_MEMORY_HOME", raising=False)
473 | 
474 |         # Create config without projects (triggers model_post_init to add main)
475 |         config = BasicMemoryConfig(projects={})
476 | 
477 |         # Verify main project path uses platform-native separators
478 |         main_path = config.projects["main"]
479 | 
480 |         if platform.system() == "Windows":
481 |             # Windows: should have backslashes or drive letter
482 |             assert "\\" in main_path or ":" in main_path
483 |         else:
484 |             # Unix: should have forward slashes
485 |             assert "/" in main_path
486 | 
487 | 
488 | class TestFormattingConfig:
489 |     """Test file formatting configuration options."""
490 | 
491 |     def test_format_on_save_defaults_to_false(self):
492 |         """Test that format_on_save is disabled by default."""
493 |         config = BasicMemoryConfig()
494 |         assert config.format_on_save is False
495 | 
496 |     def test_format_on_save_can_be_enabled(self):
497 |         """Test that format_on_save can be set to True."""
498 |         config = BasicMemoryConfig(format_on_save=True)
499 |         assert config.format_on_save is True
500 | 
501 |     def test_formatter_command_defaults_to_none(self):
502 |         """Test that formatter_command defaults to None (uses built-in mdformat)."""
503 |         config = BasicMemoryConfig()
504 |         assert config.formatter_command is None
505 | 
506 |     def test_formatter_command_can_be_set(self):
507 |         """Test that formatter_command can be configured."""
508 |         config = BasicMemoryConfig(formatter_command="prettier --write {file}")
509 |         assert config.formatter_command == "prettier --write {file}"
510 | 
511 |     def test_formatters_defaults_to_empty_dict(self):
512 |         """Test that formatters defaults to empty dict."""
513 |         config = BasicMemoryConfig()
514 |         assert config.formatters == {}
515 | 
516 |     def test_formatters_can_be_configured(self):
517 |         """Test that per-extension formatters can be configured."""
518 |         config = BasicMemoryConfig(
519 |             formatters={
520 |                 "md": "prettier --write {file}",
521 |                 "json": "jq . {file} > {file}.tmp && mv {file}.tmp {file}",
522 |             }
523 |         )
524 |         assert config.formatters["md"] == "prettier --write {file}"
525 |         assert "json" in config.formatters
526 | 
527 |     def test_formatter_timeout_defaults_to_5_seconds(self):
528 |         """Test that formatter_timeout defaults to 5.0 seconds."""
529 |         config = BasicMemoryConfig()
530 |         assert config.formatter_timeout == 5.0
531 | 
532 |     def test_formatter_timeout_can_be_customized(self):
533 |         """Test that formatter_timeout can be set to a different value."""
534 |         config = BasicMemoryConfig(formatter_timeout=10.0)
535 |         assert config.formatter_timeout == 10.0
536 | 
537 |     def test_formatter_timeout_must_be_positive(self):
538 |         """Test that formatter_timeout validation rejects non-positive values."""
539 |         import pydantic
540 | 
541 |         with pytest.raises(pydantic.ValidationError):
542 |             BasicMemoryConfig(formatter_timeout=0)
543 | 
544 |         with pytest.raises(pydantic.ValidationError):
545 |             BasicMemoryConfig(formatter_timeout=-1)
546 | 
547 |     def test_formatting_env_vars(self, monkeypatch):
548 |         """Test that formatting config can be set via environment variables."""
549 |         monkeypatch.setenv("BASIC_MEMORY_FORMAT_ON_SAVE", "true")
550 |         monkeypatch.setenv("BASIC_MEMORY_FORMATTER_COMMAND", "prettier --write {file}")
551 |         monkeypatch.setenv("BASIC_MEMORY_FORMATTER_TIMEOUT", "10.0")
552 | 
553 |         config = BasicMemoryConfig()
554 | 
555 |         assert config.format_on_save is True
556 |         assert config.formatter_command == "prettier --write {file}"
557 |         assert config.formatter_timeout == 10.0
558 | 
559 |     def test_formatters_env_var_json(self, monkeypatch):
560 |         """Test that formatters dict can be set via JSON environment variable."""
561 |         import json
562 | 
563 |         formatters_json = json.dumps({"md": "prettier --write {file}", "json": "jq . {file}"})
564 |         monkeypatch.setenv("BASIC_MEMORY_FORMATTERS", formatters_json)
565 | 
566 |         config = BasicMemoryConfig()
567 | 
568 |         assert config.formatters == {"md": "prettier --write {file}", "json": "jq . {file}"}
569 | 
570 |     def test_save_and_load_formatting_config(self):
571 |         """Test that formatting config survives save/load cycle."""
572 |         with tempfile.TemporaryDirectory() as temp_dir:
573 |             temp_path = Path(temp_dir)
574 | 
575 |             config_manager = ConfigManager()
576 |             config_manager.config_dir = temp_path / "basic-memory"
577 |             config_manager.config_file = config_manager.config_dir / "config.json"
578 |             config_manager.config_dir.mkdir(parents=True, exist_ok=True)
579 | 
580 |             # Create config with formatting settings
581 |             test_config = BasicMemoryConfig(
582 |                 projects={"main": str(temp_path / "main")},
583 |                 format_on_save=True,
584 |                 formatter_command="prettier --write {file}",
585 |                 formatters={"md": "prettier --write {file}", "json": "prettier --write {file}"},
586 |                 formatter_timeout=10.0,
587 |             )
588 |             config_manager.save_config(test_config)
589 | 
590 |             # Load and verify
591 |             loaded_config = config_manager.load_config()
592 |             assert loaded_config.format_on_save is True
593 |             assert loaded_config.formatter_command == "prettier --write {file}"
594 |             assert loaded_config.formatters == {
595 |                 "md": "prettier --write {file}",
596 |                 "json": "prettier --write {file}",
597 |             }
598 |             assert loaded_config.formatter_timeout == 10.0
599 | 
```

--------------------------------------------------------------------------------
/tests/sync/test_sync_service_incremental.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for incremental scan watermark optimization (Phase 1.5).
  2 | 
  3 | These tests verify the scan watermark feature that dramatically improves sync
  4 | performance on large projects by:
  5 | - Using find -newermt for incremental scans (only changed files)
  6 | - Tracking last_scan_timestamp and last_file_count
  7 | - Falling back to full scan when deletions detected
  8 | 
  9 | Expected performance improvements:
 10 | - No changes: 225x faster (2s vs 450s for 1,460 files)
 11 | - Few changes: 84x faster (5s vs 420s)
 12 | """
 13 | 
 14 | import time
 15 | from pathlib import Path
 16 | from textwrap import dedent
 17 | 
 18 | import pytest
 19 | 
 20 | from basic_memory.config import ProjectConfig
 21 | from basic_memory.sync.sync_service import SyncService
 22 | 
 23 | 
 24 | async def create_test_file(path: Path, content: str = "test content") -> None:
 25 |     """Create a test file with given content."""
 26 |     path.parent.mkdir(parents=True, exist_ok=True)
 27 |     path.write_text(content)
 28 | 
 29 | 
 30 | async def sleep_past_watermark(duration: float = 1.1) -> None:
 31 |     """Sleep long enough to ensure mtime is newer than watermark.
 32 | 
 33 |     Args:
 34 |         duration: Sleep duration in seconds (default 1.1s for filesystem precision)
 35 |     """
 36 |     time.sleep(duration)
 37 | 
 38 | 
 39 | # ==============================================================================
 40 | # Scan Strategy Selection Tests
 41 | # ==============================================================================
 42 | 
 43 | 
 44 | @pytest.mark.asyncio
 45 | async def test_first_sync_uses_full_scan(sync_service: SyncService, project_config: ProjectConfig):
 46 |     """Test that first sync (no watermark) triggers full scan."""
 47 |     project_dir = project_config.home
 48 | 
 49 |     # Create test files
 50 |     await create_test_file(project_dir / "file1.md", "# File 1\nContent 1")
 51 |     await create_test_file(project_dir / "file2.md", "# File 2\nContent 2")
 52 | 
 53 |     # First sync - should use full scan (no watermark exists)
 54 |     report = await sync_service.sync(project_dir)
 55 | 
 56 |     assert len(report.new) == 2
 57 |     assert "file1.md" in report.new
 58 |     assert "file2.md" in report.new
 59 | 
 60 |     # Verify watermark was set
 61 |     project = await sync_service.project_repository.find_by_id(
 62 |         sync_service.entity_repository.project_id
 63 |     )
 64 |     assert project.last_scan_timestamp is not None
 65 |     assert project.last_file_count >= 2  # May include config files
 66 | 
 67 | 
 68 | @pytest.mark.asyncio
 69 | async def test_file_count_decreased_triggers_full_scan(
 70 |     sync_service: SyncService, project_config: ProjectConfig
 71 | ):
 72 |     """Test that file deletion (count decreased) triggers full scan."""
 73 |     project_dir = project_config.home
 74 | 
 75 |     # Create initial files
 76 |     await create_test_file(project_dir / "file1.md", "# File 1")
 77 |     await create_test_file(project_dir / "file2.md", "# File 2")
 78 |     await create_test_file(project_dir / "file3.md", "# File 3")
 79 | 
 80 |     # First sync
 81 |     await sync_service.sync(project_dir)
 82 | 
 83 |     # Delete a file
 84 |     (project_dir / "file2.md").unlink()
 85 | 
 86 |     # Sleep to ensure file operations complete
 87 |     await sleep_past_watermark()
 88 | 
 89 |     # Second sync - should detect deletion via full scan (file count decreased)
 90 |     report = await sync_service.sync(project_dir)
 91 | 
 92 |     assert len(report.deleted) == 1
 93 |     assert "file2.md" in report.deleted
 94 | 
 95 | 
 96 | @pytest.mark.asyncio
 97 | async def test_file_count_same_uses_incremental_scan(
 98 |     sync_service: SyncService, project_config: ProjectConfig
 99 | ):
100 |     """Test that same file count uses incremental scan."""
101 |     project_dir = project_config.home
102 | 
103 |     # Create initial files
104 |     await create_test_file(project_dir / "file1.md", "# File 1\nOriginal")
105 |     await create_test_file(project_dir / "file2.md", "# File 2\nOriginal")
106 | 
107 |     # First sync
108 |     await sync_service.sync(project_dir)
109 | 
110 |     # Sleep to ensure mtime will be newer than watermark
111 |     await sleep_past_watermark()
112 | 
113 |     # Modify one file (file count stays the same)
114 |     await create_test_file(project_dir / "file1.md", "# File 1\nModified")
115 | 
116 |     # Second sync - should use incremental scan (same file count)
117 |     report = await sync_service.sync(project_dir)
118 | 
119 |     assert len(report.modified) == 1
120 |     assert "file1.md" in report.modified
121 | 
122 | 
123 | @pytest.mark.asyncio
124 | async def test_file_count_increased_uses_incremental_scan(
125 |     sync_service: SyncService, project_config: ProjectConfig
126 | ):
127 |     """Test that increased file count still uses incremental scan."""
128 |     project_dir = project_config.home
129 | 
130 |     # Create initial files
131 |     await create_test_file(project_dir / "file1.md", "# File 1")
132 |     await create_test_file(project_dir / "file2.md", "# File 2")
133 | 
134 |     # First sync
135 |     await sync_service.sync(project_dir)
136 | 
137 |     # Sleep to ensure mtime will be newer than watermark
138 |     await sleep_past_watermark()
139 | 
140 |     # Add a new file (file count increased)
141 |     await create_test_file(project_dir / "file3.md", "# File 3")
142 | 
143 |     # Second sync - should use incremental scan and detect new file
144 |     report = await sync_service.sync(project_dir)
145 | 
146 |     assert len(report.new) == 1
147 |     assert "file3.md" in report.new
148 | 
149 | 
150 | @pytest.mark.asyncio
151 | async def test_force_full_bypasses_watermark_optimization(
152 |     sync_service: SyncService, project_config: ProjectConfig
153 | ):
154 |     """Test that force_full=True bypasses watermark optimization and scans all files.
155 | 
156 |     This is critical for detecting changes made by external tools like rclone bisync
157 |     that don't update mtimes detectably. See issue #407.
158 |     """
159 |     project_dir = project_config.home
160 | 
161 |     # Create initial files
162 |     await create_test_file(project_dir / "file1.md", "# File 1\nOriginal")
163 |     await create_test_file(project_dir / "file2.md", "# File 2\nOriginal")
164 | 
165 |     # First sync - establishes watermark
166 |     report = await sync_service.sync(project_dir)
167 |     assert len(report.new) == 2
168 | 
169 |     # Verify watermark was set
170 |     project = await sync_service.project_repository.find_by_id(
171 |         sync_service.entity_repository.project_id
172 |     )
173 |     assert project.last_scan_timestamp is not None
174 |     initial_timestamp = project.last_scan_timestamp
175 | 
176 |     # Sleep to ensure time passes
177 |     await sleep_past_watermark()
178 | 
179 |     # Modify a file WITHOUT updating mtime (simulates external tool like rclone)
180 |     # We set mtime to be BEFORE the watermark to ensure incremental scan won't detect it
181 |     file_path = project_dir / "file1.md"
182 |     file_path.stat()
183 |     await create_test_file(file_path, "# File 1\nModified by external tool")
184 | 
185 |     # Set mtime to be before the watermark (use time from before first sync)
186 |     # This simulates rclone bisync which may preserve original timestamps
187 |     import os
188 | 
189 |     old_time = initial_timestamp - 10  # 10 seconds before watermark
190 |     os.utime(file_path, (old_time, old_time))
191 | 
192 |     # Normal incremental sync should NOT detect the change (mtime before watermark)
193 |     report = await sync_service.sync(project_dir)
194 |     assert len(report.modified) == 0, (
195 |         "Incremental scan should not detect changes with mtime older than watermark"
196 |     )
197 | 
198 |     # Force full scan should detect the change via checksum comparison
199 |     report = await sync_service.sync(project_dir, force_full=True)
200 |     assert len(report.modified) == 1, "Force full scan should detect changes via checksum"
201 |     assert "file1.md" in report.modified
202 | 
203 |     # Verify watermark was still updated after force_full
204 |     project = await sync_service.project_repository.find_by_id(
205 |         sync_service.entity_repository.project_id
206 |     )
207 |     assert project.last_scan_timestamp is not None
208 |     assert project.last_scan_timestamp > initial_timestamp
209 | 
210 | 
211 | # ==============================================================================
212 | # Incremental Scan Base Cases
213 | # ==============================================================================
214 | 
215 | 
216 | @pytest.mark.asyncio
217 | async def test_incremental_scan_no_changes(
218 |     sync_service: SyncService, project_config: ProjectConfig
219 | ):
220 |     """Test that incremental scan with no changes returns empty report."""
221 |     project_dir = project_config.home
222 | 
223 |     # Create initial files
224 |     await create_test_file(project_dir / "file1.md", "# File 1")
225 |     await create_test_file(project_dir / "file2.md", "# File 2")
226 | 
227 |     # First sync
228 |     await sync_service.sync(project_dir)
229 | 
230 |     # Sleep to ensure time passes
231 |     await sleep_past_watermark()
232 | 
233 |     # Second sync - no changes
234 |     report = await sync_service.sync(project_dir)
235 | 
236 |     assert len(report.new) == 0
237 |     assert len(report.modified) == 0
238 |     assert len(report.deleted) == 0
239 |     assert len(report.moves) == 0
240 | 
241 | 
242 | @pytest.mark.asyncio
243 | async def test_incremental_scan_detects_new_file(
244 |     sync_service: SyncService, project_config: ProjectConfig
245 | ):
246 |     """Test that incremental scan detects newly created files."""
247 |     project_dir = project_config.home
248 | 
249 |     # Create initial file
250 |     await create_test_file(project_dir / "file1.md", "# File 1")
251 | 
252 |     # First sync
253 |     await sync_service.sync(project_dir)
254 | 
255 |     # Sleep to ensure mtime will be newer than watermark
256 |     await sleep_past_watermark()
257 | 
258 |     # Create new file
259 |     await create_test_file(project_dir / "file2.md", "# File 2")
260 | 
261 |     # Second sync - should detect new file via incremental scan
262 |     report = await sync_service.sync(project_dir)
263 | 
264 |     assert len(report.new) == 1
265 |     assert "file2.md" in report.new
266 |     assert len(report.modified) == 0
267 | 
268 | 
269 | @pytest.mark.asyncio
270 | async def test_incremental_scan_detects_modified_file(
271 |     sync_service: SyncService, project_config: ProjectConfig
272 | ):
273 |     """Test that incremental scan detects modified files."""
274 |     project_dir = project_config.home
275 | 
276 |     # Create initial files
277 |     file_path = project_dir / "file1.md"
278 |     await create_test_file(file_path, "# File 1\nOriginal content")
279 | 
280 |     # First sync
281 |     await sync_service.sync(project_dir)
282 | 
283 |     # Sleep to ensure mtime will be newer than watermark
284 |     await sleep_past_watermark()
285 | 
286 |     # Modify the file
287 |     await create_test_file(file_path, "# File 1\nModified content")
288 | 
289 |     # Second sync - should detect modification via incremental scan
290 |     report = await sync_service.sync(project_dir)
291 | 
292 |     assert len(report.modified) == 1
293 |     assert "file1.md" in report.modified
294 |     assert len(report.new) == 0
295 | 
296 | 
297 | @pytest.mark.asyncio
298 | async def test_incremental_scan_detects_multiple_changes(
299 |     sync_service: SyncService, project_config: ProjectConfig
300 | ):
301 |     """Test that incremental scan detects multiple file changes."""
302 |     project_dir = project_config.home
303 | 
304 |     # Create initial files
305 |     await create_test_file(project_dir / "file1.md", "# File 1\nOriginal")
306 |     await create_test_file(project_dir / "file2.md", "# File 2\nOriginal")
307 |     await create_test_file(project_dir / "file3.md", "# File 3\nOriginal")
308 | 
309 |     # First sync
310 |     await sync_service.sync(project_dir)
311 | 
312 |     # Sleep to ensure mtime will be newer than watermark
313 |     await sleep_past_watermark()
314 | 
315 |     # Modify multiple files
316 |     await create_test_file(project_dir / "file1.md", "# File 1\nModified")
317 |     await create_test_file(project_dir / "file3.md", "# File 3\nModified")
318 |     await create_test_file(project_dir / "file4.md", "# File 4\nNew")
319 | 
320 |     # Second sync - should detect all changes via incremental scan
321 |     report = await sync_service.sync(project_dir)
322 | 
323 |     assert len(report.modified) == 2
324 |     assert "file1.md" in report.modified
325 |     assert "file3.md" in report.modified
326 |     assert len(report.new) == 1
327 |     assert "file4.md" in report.new
328 | 
329 | 
330 | # ==============================================================================
331 | # Deletion Detection Tests
332 | # ==============================================================================
333 | 
334 | 
335 | @pytest.mark.asyncio
336 | async def test_deletion_triggers_full_scan_single_file(
337 |     sync_service: SyncService, project_config: ProjectConfig
338 | ):
339 |     """Test that deleting a single file triggers full scan."""
340 |     project_dir = project_config.home
341 | 
342 |     # Create initial files
343 |     await create_test_file(project_dir / "file1.md", "# File 1")
344 |     await create_test_file(project_dir / "file2.md", "# File 2")
345 |     await create_test_file(project_dir / "file3.md", "# File 3")
346 | 
347 |     # First sync
348 |     report1 = await sync_service.sync(project_dir)
349 |     assert len(report1.new) == 3
350 | 
351 |     # Delete one file
352 |     (project_dir / "file2.md").unlink()
353 | 
354 |     # Sleep to ensure file operations complete
355 |     await sleep_past_watermark()
356 | 
357 |     # Second sync - should trigger full scan due to decreased file count
358 |     report2 = await sync_service.sync(project_dir)
359 | 
360 |     assert len(report2.deleted) == 1
361 |     assert "file2.md" in report2.deleted
362 | 
363 | 
364 | @pytest.mark.asyncio
365 | async def test_deletion_triggers_full_scan_multiple_files(
366 |     sync_service: SyncService, project_config: ProjectConfig
367 | ):
368 |     """Test that deleting multiple files triggers full scan."""
369 |     project_dir = project_config.home
370 | 
371 |     # Create initial files
372 |     await create_test_file(project_dir / "file1.md", "# File 1")
373 |     await create_test_file(project_dir / "file2.md", "# File 2")
374 |     await create_test_file(project_dir / "file3.md", "# File 3")
375 |     await create_test_file(project_dir / "file4.md", "# File 4")
376 | 
377 |     # First sync
378 |     await sync_service.sync(project_dir)
379 | 
380 |     # Delete multiple files
381 |     (project_dir / "file2.md").unlink()
382 |     (project_dir / "file4.md").unlink()
383 | 
384 |     # Sleep to ensure file operations complete
385 |     await sleep_past_watermark()
386 | 
387 |     # Second sync - should trigger full scan and detect both deletions
388 |     report = await sync_service.sync(project_dir)
389 | 
390 |     assert len(report.deleted) == 2
391 |     assert "file2.md" in report.deleted
392 |     assert "file4.md" in report.deleted
393 | 
394 | 
395 | # ==============================================================================
396 | # Move Detection Tests
397 | # ==============================================================================
398 | 
399 | 
400 | @pytest.mark.asyncio
401 | async def test_move_detection_requires_full_scan(
402 |     sync_service: SyncService, project_config: ProjectConfig
403 | ):
404 |     """Test that file moves require full scan to be detected (cannot detect in incremental).
405 | 
406 |     Moves (renames) don't update mtime, so incremental scans can't detect them.
407 |     To trigger a full scan for move detection, we need file count to decrease.
408 |     This test verifies moves are detected when combined with a deletion.
409 |     """
410 |     project_dir = project_config.home
411 | 
412 |     # Create initial files - include extra file to delete later
413 |     old_path = project_dir / "old" / "file.md"
414 |     content = dedent(
415 |         """
416 |         ---
417 |         title: Test File
418 |         type: note
419 |         ---
420 |         # Test File
421 |         Distinctive content for move detection
422 |         """
423 |     ).strip()
424 |     await create_test_file(old_path, content)
425 |     await create_test_file(project_dir / "other.md", "# Other\nContent")
426 | 
427 |     # First sync
428 |     await sync_service.sync(project_dir)
429 | 
430 |     # Sleep to ensure operations complete and watermark is in the past
431 |     await sleep_past_watermark()
432 | 
433 |     # Move file AND delete another to trigger full scan
434 |     # Move alone won't work because file count stays same (no full scan)
435 |     new_path = project_dir / "new" / "moved.md"
436 |     new_path.parent.mkdir(parents=True, exist_ok=True)
437 |     old_path.rename(new_path)
438 |     (project_dir / "other.md").unlink()  # Delete to trigger full scan
439 | 
440 |     # Second sync - full scan due to deletion, move detected via checksum
441 |     report = await sync_service.sync(project_dir)
442 | 
443 |     assert len(report.moves) == 1
444 |     assert "old/file.md" in report.moves
445 |     assert report.moves["old/file.md"] == "new/moved.md"
446 |     assert len(report.deleted) == 1
447 |     assert "other.md" in report.deleted
448 | 
449 | 
450 | @pytest.mark.asyncio
451 | async def test_move_detection_in_full_scan(
452 |     sync_service: SyncService, project_config: ProjectConfig
453 | ):
454 |     """Test that file moves are detected via checksum in full scan."""
455 |     project_dir = project_config.home
456 | 
457 |     # Create initial files
458 |     old_path = project_dir / "old" / "file.md"
459 |     content = dedent(
460 |         """
461 |         ---
462 |         title: Test File
463 |         type: note
464 |         ---
465 |         # Test File
466 |         Distinctive content for move detection
467 |         """
468 |     ).strip()
469 |     await create_test_file(old_path, content)
470 |     await create_test_file(project_dir / "other.md", "# Other\nContent")
471 | 
472 |     # First sync
473 |     await sync_service.sync(project_dir)
474 | 
475 |     # Sleep to ensure operations complete
476 |     await sleep_past_watermark()
477 | 
478 |     # Move file AND delete another to trigger full scan
479 |     new_path = project_dir / "new" / "moved.md"
480 |     new_path.parent.mkdir(parents=True, exist_ok=True)
481 |     old_path.rename(new_path)
482 |     (project_dir / "other.md").unlink()
483 | 
484 |     # Second sync - full scan due to deletion, should still detect move
485 |     report = await sync_service.sync(project_dir)
486 | 
487 |     assert len(report.moves) == 1
488 |     assert "old/file.md" in report.moves
489 |     assert report.moves["old/file.md"] == "new/moved.md"
490 |     assert len(report.deleted) == 1
491 |     assert "other.md" in report.deleted
492 | 
493 | 
494 | # ==============================================================================
495 | # Watermark Update Tests
496 | # ==============================================================================
497 | 
498 | 
499 | @pytest.mark.asyncio
500 | async def test_watermark_updated_after_successful_sync(
501 |     sync_service: SyncService, project_config: ProjectConfig
502 | ):
503 |     """Test that watermark is updated after each successful sync."""
504 |     project_dir = project_config.home
505 | 
506 |     # Create initial file
507 |     await create_test_file(project_dir / "file1.md", "# File 1")
508 | 
509 |     # Get project before sync
510 |     project_before = await sync_service.project_repository.find_by_id(
511 |         sync_service.entity_repository.project_id
512 |     )
513 |     assert project_before.last_scan_timestamp is None
514 |     assert project_before.last_file_count is None
515 | 
516 |     # First sync
517 |     sync_start = time.time()
518 |     await sync_service.sync(project_dir)
519 |     sync_end = time.time()
520 | 
521 |     # Verify watermark was set
522 |     project_after = await sync_service.project_repository.find_by_id(
523 |         sync_service.entity_repository.project_id
524 |     )
525 |     assert project_after.last_scan_timestamp is not None
526 |     assert project_after.last_file_count >= 1  # May include config files
527 | 
528 |     # Watermark should be between sync start and end
529 |     assert sync_start <= project_after.last_scan_timestamp <= sync_end
530 | 
531 | 
532 | @pytest.mark.asyncio
533 | async def test_watermark_uses_sync_start_time(
534 |     sync_service: SyncService, project_config: ProjectConfig
535 | ):
536 |     """Test that watermark uses sync start time, not end time."""
537 |     project_dir = project_config.home
538 | 
539 |     # Create initial file
540 |     await create_test_file(project_dir / "file1.md", "# File 1")
541 | 
542 |     # First sync - capture timestamps
543 |     sync_start = time.time()
544 |     await sync_service.sync(project_dir)
545 |     sync_end = time.time()
546 | 
547 |     # Get watermark
548 |     project = await sync_service.project_repository.find_by_id(
549 |         sync_service.entity_repository.project_id
550 |     )
551 | 
552 |     # Watermark should be closer to start than end
553 |     # (In practice, watermark == sync_start_timestamp captured in sync())
554 |     time_from_start = abs(project.last_scan_timestamp - sync_start)
555 |     time_from_end = abs(project.last_scan_timestamp - sync_end)
556 | 
557 |     assert time_from_start < time_from_end
558 | 
559 | 
560 | @pytest.mark.asyncio
561 | async def test_watermark_file_count_accurate(
562 |     sync_service: SyncService, project_config: ProjectConfig
563 | ):
564 |     """Test that watermark file count accurately reflects synced files."""
565 |     project_dir = project_config.home
566 | 
567 |     # Create initial files
568 |     await create_test_file(project_dir / "file1.md", "# File 1")
569 |     await create_test_file(project_dir / "file2.md", "# File 2")
570 |     await create_test_file(project_dir / "file3.md", "# File 3")
571 | 
572 |     # First sync
573 |     await sync_service.sync(project_dir)
574 | 
575 |     # Verify file count
576 |     project1 = await sync_service.project_repository.find_by_id(
577 |         sync_service.entity_repository.project_id
578 |     )
579 |     initial_count = project1.last_file_count
580 |     assert initial_count >= 3  # May include config files
581 | 
582 |     # Add more files
583 |     await sleep_past_watermark()
584 |     await create_test_file(project_dir / "file4.md", "# File 4")
585 |     await create_test_file(project_dir / "file5.md", "# File 5")
586 | 
587 |     # Second sync
588 |     await sync_service.sync(project_dir)
589 | 
590 |     # Verify updated count increased by 2
591 |     project2 = await sync_service.project_repository.find_by_id(
592 |         sync_service.entity_repository.project_id
593 |     )
594 |     assert project2.last_file_count == initial_count + 2
595 | 
596 | 
597 | # ==============================================================================
598 | # Edge Cases and Error Handling
599 | # ==============================================================================
600 | 
601 | 
602 | @pytest.mark.asyncio
603 | async def test_concurrent_file_changes_handled_gracefully(
604 |     sync_service: SyncService, project_config: ProjectConfig
605 | ):
606 |     """Test that files created/modified during sync are handled correctly.
607 | 
608 |     Files created during sync (between start and file processing) should be
609 |     caught in the next sync, not cause errors in the current sync.
610 |     """
611 |     project_dir = project_config.home
612 | 
613 |     # Create initial file
614 |     await create_test_file(project_dir / "file1.md", "# File 1")
615 | 
616 |     # First sync
617 |     await sync_service.sync(project_dir)
618 | 
619 |     # Sleep to ensure mtime will be newer
620 |     await sleep_past_watermark()
621 | 
622 |     # Create file that will have mtime very close to watermark
623 |     # In real scenarios, this could be created during sync
624 |     await create_test_file(project_dir / "concurrent.md", "# Concurrent")
625 | 
626 |     # Should be caught in next sync without errors
627 |     report = await sync_service.sync(project_dir)
628 |     assert "concurrent.md" in report.new
629 | 
630 | 
631 | @pytest.mark.asyncio
632 | async def test_empty_directory_handles_incremental_scan(
633 |     sync_service: SyncService, project_config: ProjectConfig
634 | ):
635 |     """Test that incremental scan handles empty directories correctly."""
636 |     project_dir = project_config.home
637 | 
638 |     # First sync with empty directory (no user files)
639 |     report1 = await sync_service.sync(project_dir)
640 |     assert len(report1.new) == 0
641 | 
642 |     # Verify watermark was set even for empty directory
643 |     project = await sync_service.project_repository.find_by_id(
644 |         sync_service.entity_repository.project_id
645 |     )
646 |     assert project.last_scan_timestamp is not None
647 |     # May have config files, so just check it's set
648 |     assert project.last_file_count is not None
649 | 
650 |     # Second sync - still empty (no new user files)
651 |     report2 = await sync_service.sync(project_dir)
652 |     assert len(report2.new) == 0
653 | 
654 | 
655 | @pytest.mark.asyncio
656 | async def test_incremental_scan_respects_gitignore(
657 |     sync_service: SyncService, project_config: ProjectConfig
658 | ):
659 |     """Test that incremental scan respects .gitignore patterns."""
660 |     project_dir = project_config.home
661 | 
662 |     # Create .gitignore
663 |     (project_dir / ".gitignore").write_text("*.ignored\n.hidden/\n")
664 | 
665 |     # Reload ignore patterns
666 |     from basic_memory.ignore_utils import load_gitignore_patterns
667 | 
668 |     sync_service._ignore_patterns = load_gitignore_patterns(project_dir)
669 | 
670 |     # Create files - some should be ignored
671 |     await create_test_file(project_dir / "included.md", "# Included")
672 |     await create_test_file(project_dir / "excluded.ignored", "# Excluded")
673 | 
674 |     # First sync
675 |     report1 = await sync_service.sync(project_dir)
676 |     assert "included.md" in report1.new
677 |     assert "excluded.ignored" not in report1.new
678 | 
679 |     # Sleep and add more files
680 |     await sleep_past_watermark()
681 |     await create_test_file(project_dir / "included2.md", "# Included 2")
682 |     await create_test_file(project_dir / "excluded2.ignored", "# Excluded 2")
683 | 
684 |     # Second sync - incremental scan should also respect ignore patterns
685 |     report2 = await sync_service.sync(project_dir)
686 |     assert "included2.md" in report2.new
687 |     assert "excluded2.ignored" not in report2.new
688 | 
689 | 
690 | # ==============================================================================
691 | # Relation Resolution Optimization Tests
692 | # ==============================================================================
693 | 
694 | 
695 | @pytest.mark.asyncio
696 | async def test_relation_resolution_skipped_when_no_changes(
697 |     sync_service: SyncService, project_config: ProjectConfig
698 | ):
699 |     """Test that relation resolution is skipped when no file changes detected.
700 | 
701 |     This optimization prevents wasting time resolving relations when there are
702 |     no changes, dramatically improving sync performance for large projects.
703 |     """
704 |     project_dir = project_config.home
705 | 
706 |     # Create initial file with wikilink
707 |     content = dedent(
708 |         """
709 |         ---
710 |         title: File with Link
711 |         type: note
712 |         ---
713 |         # File with Link
714 |         This links to [[Target File]]
715 |         """
716 |     ).strip()
717 |     await create_test_file(project_dir / "file1.md", content)
718 | 
719 |     # First sync - will resolve relations (or leave unresolved)
720 |     report1 = await sync_service.sync(project_dir)
721 |     assert len(report1.new) == 1
722 | 
723 |     # Check that there are unresolved relations (target doesn't exist)
724 |     unresolved = await sync_service.relation_repository.find_unresolved_relations()
725 |     unresolved_count_before = len(unresolved)
726 |     assert unresolved_count_before > 0  # Should have unresolved relation to [[Target File]]
727 | 
728 |     # Sleep to ensure time passes
729 |     await sleep_past_watermark()
730 | 
731 |     # Second sync - no changes, should skip relation resolution
732 |     report2 = await sync_service.sync(project_dir)
733 |     assert report2.total == 0  # No changes detected
734 | 
735 |     # Verify unresolved relations count unchanged (resolution was skipped)
736 |     unresolved_after = await sync_service.relation_repository.find_unresolved_relations()
737 |     assert len(unresolved_after) == unresolved_count_before
738 | 
739 | 
740 | @pytest.mark.asyncio
741 | async def test_relation_resolution_runs_when_files_modified(
742 |     sync_service: SyncService, project_config: ProjectConfig
743 | ):
744 |     """Test that relation resolution runs when files are actually modified."""
745 |     project_dir = project_config.home
746 | 
747 |     # Create file with unresolved wikilink
748 |     content1 = dedent(
749 |         """
750 |         ---
751 |         title: File with Link
752 |         type: note
753 |         ---
754 |         # File with Link
755 |         This links to [[Target File]]
756 |         """
757 |     ).strip()
758 |     await create_test_file(project_dir / "file1.md", content1)
759 | 
760 |     # First sync
761 |     await sync_service.sync(project_dir)
762 | 
763 |     # Verify unresolved relation exists
764 |     unresolved_before = await sync_service.relation_repository.find_unresolved_relations()
765 |     assert len(unresolved_before) > 0
766 | 
767 |     # Sleep to ensure mtime will be newer
768 |     await sleep_past_watermark()
769 | 
770 |     # Create the target file (should resolve the relation)
771 |     content2 = dedent(
772 |         """
773 |         ---
774 |         title: Target File
775 |         type: note
776 |         ---
777 |         # Target File
778 |         This is the target.
779 |         """
780 |     ).strip()
781 |     await create_test_file(project_dir / "target.md", content2)
782 | 
783 |     # Second sync - should detect new file and resolve relations
784 |     report = await sync_service.sync(project_dir)
785 |     assert len(report.new) == 1
786 |     assert "target.md" in report.new
787 | 
788 |     # Verify relation was resolved (unresolved count decreased)
789 |     unresolved_after = await sync_service.relation_repository.find_unresolved_relations()
790 |     assert len(unresolved_after) < len(unresolved_before)
791 | 
```

--------------------------------------------------------------------------------
/specs/SPEC-10 Unified Deployment Workflow and Event Tracking.md:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: 'SPEC-10: Unified Deployment Workflow and Event Tracking'
  3 | type: spec
  4 | permalink: specs/spec-10-unified-deployment-workflow-event-tracking
  5 | tags:
  6 | - workflow
  7 | - deployment
  8 | - event-sourcing
  9 | - architecture
 10 | - simplification
 11 | ---
 12 | 
 13 | # SPEC-10: Unified Deployment Workflow and Event Tracking
 14 | 
 15 | ## Why
 16 | 
 17 | We replaced a complex multi-workflow system with DBOS orchestration that was proving to be more trouble than it was worth. The previous architecture had four separate workflows (`tenant_provisioning`, `tenant_update`, `tenant_deployment`, `tenant_undeploy`) with overlapping logic, complex state management, and fragmented event tracking. DBOS added unnecessary complexity without providing sufficient value, leading to harder debugging and maintenance.
 18 | 
 19 | **Problems Solved:**
 20 | - **Framework Complexity**: DBOS configuration overhead and fighting framework limitations
 21 | - **Code Duplication**: Multiple workflows implementing similar operations with duplicate logic
 22 | - **Poor Observability**: Fragmented event tracking across workflow boundaries
 23 | - **Maintenance Overhead**: Complex orchestration for fundamentally simple operations
 24 | - **Debugging Difficulty**: Framework abstractions hiding simple Python stack traces
 25 | 
 26 | ## What
 27 | 
 28 | This spec documents the architectural simplification that consolidates tenant lifecycle management into a unified system with comprehensive event tracking.
 29 | 
 30 | **Affected Areas:**
 31 | - Tenant deployment workflows (provisioning, updates, undeploying)
 32 | - Event sourcing and workflow tracking infrastructure
 33 | - API endpoints for tenant operations
 34 | - Database schema for workflow and event correlation
 35 | - Integration testing for tenant lifecycle operations
 36 | 
 37 | **Key Changes:**
 38 | - **Removed DBOS entirely** - eliminated framework dependency and complexity
 39 | - **Consolidated 4 workflows → 2 unified deployment workflows (deploy/undeploy)**
 40 | - **Added workflow tracking system** with complete event correlation
 41 | - **Simplified API surface** - single `/deploy` endpoint handles all scenarios
 42 | - **Enhanced observability** through event sourcing with workflow grouping
 43 | 
 44 | ## How (High Level)
 45 | 
 46 | ### Architectural Philosophy
 47 | **Embrace simplicity over framework complexity** - use well-structured Python with proper database design instead of complex orchestration frameworks.
 48 | 
 49 | ### Core Components
 50 | 
 51 | #### 1. Unified Deployment Workflow
 52 | ```python
 53 | class TenantDeploymentWorkflow:
 54 |     async def deploy_tenant_workflow(self, tenant_id: str, workflow_id: UUID, image_tag: str = None):
 55 |         # Single workflow handles both initial provisioning AND updates
 56 |         # Each step is idempotent and handles its own error recovery
 57 |         # Database transactions provide the durability we need
 58 |         await self.start_deployment_step(workflow_id, tenant_uuid, image_tag)
 59 |         await self.create_fly_app_step(workflow_id, tenant_uuid)
 60 |         await self.create_bucket_step(workflow_id, tenant_uuid)
 61 |         await self.deploy_machine_step(workflow_id, tenant_uuid, image_tag)
 62 |         await self.complete_deployment_step(workflow_id, tenant_uuid, image_tag, deployment_time)
 63 | ```
 64 | 
 65 | **Key Benefits:**
 66 | - **Handles both provisioning and updates** in single workflow
 67 | - **Idempotent operations** - safe to retry any step
 68 | - **Clean error handling** via simple Python exceptions
 69 | - **Resumable** - can restart from any failed step
 70 | 
 71 | #### 2. Workflow Tracking System
 72 | 
 73 | **Database Schema:**
 74 | ```sql
 75 | CREATE TABLE workflow (
 76 |     id UUID PRIMARY KEY,
 77 |     workflow_type VARCHAR(50) NOT NULL,  -- 'tenant_deployment', 'tenant_undeploy'
 78 |     tenant_id UUID REFERENCES tenant(id),
 79 |     status VARCHAR(20) DEFAULT 'running', -- 'running', 'completed', 'failed'
 80 |     workflow_metadata JSONB DEFAULT '{}'  -- image_tag, etc.
 81 | );
 82 | 
 83 | ALTER TABLE event ADD COLUMN workflow_id UUID REFERENCES workflow(id);
 84 | ```
 85 | 
 86 | **Event Correlation:**
 87 | - Every workflow operation generates events tagged with `workflow_id`
 88 | - Complete audit trail from workflow start to completion
 89 | - Events grouped by workflow for easy reconstruction of operations
 90 | 
 91 | #### 3. Parameter Standardization
 92 | All workflow methods follow consistent signature pattern:
 93 | ```python
 94 | async def method_name(self, session: AsyncSession, workflow_id: UUID | None, tenant_id: UUID, ...)
 95 | ```
 96 | 
 97 | **Benefits:**
 98 | - **Consistent event tagging** - all events properly correlated
 99 | - **Clear method contracts** - workflow_id always first parameter
100 | - **Type safety** - proper UUID handling throughout
101 | 
102 | ### Implementation Strategy
103 | 
104 | #### Phase 1: Workflow Consolidation ✅ COMPLETED
105 | - [x] **Remove DBOS dependency** - eliminated dbos_config.py and all DBOS imports
106 | - [x] **Create unified TenantDeploymentWorkflow** - handles both provisioning and updates
107 | - [x] **Remove legacy workflows** - deleted tenant_provisioning.py, tenant_update.py
108 | - [x] **Simplify API endpoints** - consolidated to single `/deploy` endpoint
109 | - [x] **Update integration tests** - comprehensive edge case testing
110 | 
111 | #### Phase 2: Workflow Tracking System ✅ COMPLETED
112 | - [x] **Database migration** - added workflow table and event.workflow_id foreign key
113 | - [x] **Workflow repository** - CRUD operations for workflow records
114 | - [x] **Event correlation** - all workflow events tagged with workflow_id
115 | - [x] **Comprehensive testing** - workflow lifecycle and event grouping tests
116 | 
117 | #### Phase 3: Parameter Standardization ✅ COMPLETED
118 | - [x] **Standardize method signatures** - workflow_id as first parameter pattern
119 | - [x] **Fix event tagging** - ensure all workflow events properly correlated
120 | - [x] **Update service methods** - consistent parameter order across tenant_service
121 | - [x] **Integration test validation** - verify complete event sequences
122 | 
123 | ### Architectural Benefits
124 | 
125 | #### Code Simplification
126 | - **39 files changed**: 2,247 additions, 3,256 deletions (net -1,009 lines)
127 | - **Eliminated framework complexity** - no more DBOS configuration or abstractions
128 | - **Consolidated logic** - single deployment workflow vs 4 separate workflows
129 | - **Cleaner API surface** - unified endpoint vs multiple workflow-specific endpoints
130 | 
131 | #### Enhanced Observability
132 | - **Complete event correlation** - every workflow event tagged with workflow_id
133 | - **Audit trail reconstruction** - can trace entire tenant lifecycle through events
134 | - **Workflow status tracking** - running/completed/failed states in database
135 | - **Comprehensive testing** - edge cases covered with real infrastructure
136 | 
137 | #### Operational Benefits
138 | - **Simpler debugging** - plain Python stack traces vs framework abstractions
139 | - **Reduced dependencies** - one less complex framework to maintain
140 | - **Better error handling** - explicit exception handling vs framework magic
141 | - **Easier maintenance** - straightforward Python code vs orchestration complexity
142 | 
143 | ## How to Evaluate
144 | 
145 | ### Success Criteria
146 | 
147 | #### Functional Completeness ✅ VERIFIED
148 | - [x] **Unified deployment workflow** handles both initial provisioning and updates
149 | - [x] **Undeploy workflow** properly integrated with event tracking
150 | - [x] **All operations idempotent** - safe to retry any step without duplication
151 | - [x] **Complete tenant lifecycle** - provision → active → update → undeploy
152 | 
153 | #### Event Tracking and Correlation ✅ VERIFIED
154 | - [x] **All workflow events tagged** with proper workflow_id
155 | - [x] **Event sequence verification** - tests assert exact event order and content
156 | - [x] **Workflow grouping** - events can be queried by workflow_id for complete audit trail
157 | - [x] **Cross-workflow isolation** - deployment vs undeploy events properly separated
158 | 
159 | #### Database Schema and Performance ✅ VERIFIED
160 | - [x] **Migration applied** - workflow table and event.workflow_id column created
161 | - [x] **Proper indexing** - performance optimized queries on workflow_type, tenant_id, status
162 | - [x] **Foreign key constraints** - referential integrity between workflows and events
163 | - [x] **Database triggers** - updated_at timestamp automation
164 | 
165 | #### Test Coverage ✅ COMPREHENSIVE
166 | - [x] **Unit tests**: 4 workflow tracking tests covering lifecycle and event grouping
167 | - [x] **Integration tests**: Real infrastructure testing with Fly.io resources
168 | - [x] **Edge case coverage**: Failed deployments, partial state recovery, resource conflicts
169 | - [x] **Event sequence verification**: Exact event order and content validation
170 | 
171 | ### Testing Procedure
172 | 
173 | #### Unit Test Validation ✅ PASSING
174 | ```bash
175 | cd apps/cloud && pytest tests/test_workflow_tracking.py -v
176 | # 4/4 tests passing - workflow lifecycle and event grouping
177 | ```
178 | 
179 | #### Integration Test Validation ✅ PASSING
180 | ```bash
181 | cd apps/cloud && pytest tests/integration/test_tenant_workflow_deployment_integration.py -v
182 | cd apps/cloud && pytest tests/integration/test_tenant_workflow_undeploy_integration.py -v
183 | # Comprehensive real infrastructure testing with actual Fly.io resources
184 | # Tests provision → deploy → update → undeploy → cleanup cycles
185 | ```
186 | 
187 | ### Performance Metrics
188 | 
189 | #### Code Metrics ✅ ACHIEVED
190 | - **Net code reduction**: -1,009 lines (3,256 deletions, 2,247 additions)
191 | - **Workflow consolidation**: 4 workflows → 1 unified deployment workflow
192 | - **Dependency reduction**: Removed DBOS framework dependency entirely
193 | - **API simplification**: Multiple endpoints → single `/deploy` endpoint
194 | 
195 | #### Operational Metrics ✅ VERIFIED
196 | - **Event correlation**: 100% of workflow events properly tagged with workflow_id
197 | - **Audit trail completeness**: Full tenant lifecycle traceable through event sequences
198 | - **Error handling**: Clean Python exceptions vs framework abstractions
199 | - **Debugging simplicity**: Direct stack traces vs orchestration complexity
200 | 
201 | ### Implementation Status: ✅ COMPLETE
202 | 
203 | All phases completed successfully with comprehensive testing and verification:
204 | 
205 | **Phase 1 - Workflow Consolidation**: ✅ COMPLETE
206 | - Removed DBOS dependency and consolidated workflows
207 | - Unified deployment workflow handles all scenarios
208 | - Comprehensive integration testing with real infrastructure
209 | 
210 | **Phase 2 - Workflow Tracking**: ✅ COMPLETE
211 | - Database schema implemented with proper indexing
212 | - Event correlation system fully functional
213 | - Complete audit trail capability verified
214 | 
215 | **Phase 3 - Parameter Standardization**: ✅ COMPLETE
216 | - Consistent method signatures across all workflow methods
217 | - All events properly tagged with workflow_id
218 | - Type safety verified across entire codebase
219 | 
220 | **Phase 4 - Asynchronous Job Queuing**:
221 | **Goal**: Transform synchronous deployment workflows into background jobs for better user experience and system reliability.
222 | 
223 | **Current Problem**:
224 | - Deployment API calls are synchronous - users wait for entire tenant provisioning (30-60 seconds)
225 | - No retry mechanism for failed operations
226 | - HTTP timeouts on long-running deployments
227 | - Poor user experience during infrastructure provisioning
228 | 
229 | **Solution**: Redis-backed job queue with arq for reliable background processing
230 | 
231 | #### Architecture Overview
232 | ```python
233 | # API Layer: Return immediately with job tracking
234 | @router.post("/{tenant_id}/deploy")
235 | async def deploy_tenant(tenant_id: UUID):
236 |     # Create workflow record in Postgres
237 |     workflow = await workflow_repo.create_workflow("tenant_deployment", tenant_id)
238 | 
239 |     # Enqueue job in Redis
240 |     job = await arq_pool.enqueue_job('deploy_tenant_task', tenant_id, workflow.id)
241 | 
242 |     # Return job ID immediately
243 |     return {"job_id": job.job_id, "workflow_id": workflow.id, "status": "queued"}
244 | 
245 | # Background Worker: Process via existing unified workflow
246 | async def deploy_tenant_task(ctx, tenant_id: str, workflow_id: str):
247 |     # Existing workflow logic - zero changes needed!
248 |     await workflow_manager.deploy_tenant(UUID(tenant_id), workflow_id=UUID(workflow_id))
249 | ```
250 | 
251 | #### Implementation Tasks
252 | 
253 | **Phase 4.1: Core Job Queue Setup** ✅ COMPLETED
254 | - [x] **Add arq dependency** - integrated Redis job queue with existing infrastructure
255 | - [x] **Create job definitions** - wrapped existing deployment/undeploy workflows as arq tasks
256 | - [x] **Update API endpoints** - updated provisioning endpoints to return job IDs instead of waiting for completion
257 | - [x] **JobQueueService implementation** - service layer for job enqueueing and status tracking
258 | - [x] **Job status tracking** - integrated with existing workflow table for status updates
259 | - [x] **Comprehensive testing** - 18 tests covering positive, negative, and edge cases
260 | 
261 | **Phase 4.2: Background Worker Implementation** ✅ COMPLETED
262 | - [x] **Job status API** - GET /jobs/{job_id}/status endpoint integrated with JobQueueService
263 | - [x] **Background worker process** - arq worker to process queued jobs with proper settings and Redis configuration
264 | - [x] **Worker settings and configuration** - WorkerSettings class with proper timeouts, max jobs, and error handling
265 | - [x] **Fix API endpoints** - updated job status API to use JobQueueService instead of direct Redis access
266 | - [x] **Integration testing** - comprehensive end-to-end testing with real ARQ workers and Fly.io infrastructure
267 | - [x] **Worker entry points** - dual-purpose entrypoint.sh script and __main__.py module support for both API and worker processes
268 | - [x] **Test fixture updates** - fixed all API and service test fixtures to work with job queue dependencies
269 | - [x] **AsyncIO event loop fixes** - resolved event loop issues in integration tests for subprocess worker compatibility
270 | - [x] **Complete test coverage** - all 46 tests passing across unit, integration, and API test suites
271 | - [x] **Type safety verification** - 0 type checking errors across entire ARQ job queue implementation
272 | 
273 | #### Phase 4.2 Implementation Summary ✅ COMPLETE
274 | 
275 | **Core ARQ Job Queue System:**
276 | - **JobQueueService** - Centralized service for job enqueueing, status tracking, and Redis pool management
277 | - **deployment_jobs.py** - ARQ job functions that wrap existing deployment/undeploy workflows
278 | - **Worker Settings** - Production-ready ARQ configuration with proper timeouts and error handling
279 | - **Dual-Process Architecture** - Single Docker image with entrypoint.sh supporting both API and worker modes
280 | 
281 | **Key Files Added:**
282 | - `apps/cloud/src/basic_memory_cloud/jobs/` - Complete job queue implementation (7 files)
283 | - `apps/cloud/entrypoint.sh` - Dual-purpose Docker container entry point
284 | - `apps/cloud/tests/integration/test_worker_integration.py` - Real infrastructure integration tests
285 | - `apps/cloud/src/basic_memory_cloud/schemas/job_responses.py` - API response schemas
286 | 
287 | **API Integration:**
288 | - Provisioning endpoints return job IDs immediately instead of blocking for 60+ seconds
289 | - Job status API endpoints for real-time monitoring of deployment progress
290 | - Proper error handling and job failure scenarios with detailed error messages
291 | 
292 | **Testing Achievement:**
293 | - **46 total tests passing** across all test suites (unit, integration, API, services)
294 | - **Real infrastructure testing** - ARQ workers process actual Fly.io deployments
295 | - **Event loop safety** - Fixed asyncio issues for subprocess worker compatibility
296 | - **Test fixture updates** - All fixtures properly support job queue dependencies
297 | - **Type checking** - 0 errors across entire codebase
298 | 
299 | **Technical Metrics:**
300 | - **38 files changed** - +1,736 insertions, -334 deletions
301 | - **Integration test runtime** - ~18 seconds with real ARQ workers and Fly.io verification
302 | - **Event loop isolation** - Proper async session management for subprocess compatibility
303 | - **Redis integration** - Production-ready Redis configuration with connection pooling
304 | 
305 | **Phase 4.3: Production Hardening** ✅ COMPLETED
306 | - [x] **Configure Upstash Redis** - production Redis setup on Fly.io
307 | - [x] **Retry logic for external APIs** - exponential backoff for flaky Tigris IAM operations
308 | - [x] **Monitoring and observability** - comprehensive Redis queue monitoring with CLI tools
309 | - [x] **Error handling improvements** - graceful handling of expected API errors with appropriate log levels
310 | - [x] **CLI tooling enhancements** - bulk update commands for CI/CD automation
311 | - [x] **Documentation improvements** - comprehensive monitoring guide with Redis patterns
312 | - [x] **Job uniqueness** - ARQ-based duplicate prevention for tenant operations
313 | - [ ] **Worker scaling** - multiple arq workers for parallel job processing
314 | - [ ] **Job persistence** - ensure jobs survive Redis/worker restarts
315 | - [ ] **Error alerting** - notifications for failed deployment jobs
316 | 
317 | **Phase 4.4: Advanced Features** (Future)
318 | - [ ] **Job scheduling** - deploy tenants at specific times
319 | - [ ] **Priority queues** - urgent deployments processed first
320 | - [ ] **Batch operations** - bulk tenant deployments
321 | - [ ] **Job dependencies** - deployment → configuration → activation chains
322 | 
323 | #### Benefits Achieved ✅ REALIZED
324 | 
325 | **User Experience Improvements:**
326 | - **Immediate API responses** - users get job ID instantly vs waiting 60+ seconds for deployment completion
327 | - **Real-time job tracking** - status API provides live updates on deployment progress
328 | - **Better error visibility** - detailed error messages and job failure tracking
329 | - **CI/CD automation ready** - bulk update commands for automated tenant deployments
330 | 
331 | **System Reliability:**
332 | - **Redis persistence** - jobs survive Redis/worker restarts with proper queue durability
333 | - **Idempotent job processing** - jobs can be safely retried without side effects
334 | - **Event loop isolation** - worker processes operate independently from API server
335 | - **Retry resilience** - exponential backoff for flaky external API calls (3 attempts, 1s/2s delays)
336 | - **Graceful error handling** - expected API errors logged at INFO level, unexpected at ERROR level
337 | - **Job uniqueness** - prevent duplicate tenant operations with ARQ's built-in uniqueness feature
338 | 
339 | **Operational Benefits:**
340 | - **Horizontal scaling ready** - architecture supports adding more workers for parallel processing
341 | - **Comprehensive testing** - real infrastructure integration tests ensure production reliability
342 | - **Type safety** - full type checking prevents runtime errors in job processing
343 | - **Clean separation** - API and worker processes use same codebase with different entry points
344 | - **Queue monitoring** - Redis CLI integration for real-time queue activity monitoring
345 | - **Comprehensive documentation** - detailed monitoring guide with Redis pattern explanations
346 | 
347 | **Development Benefits:**
348 | - **Zero workflow changes** - existing deployment/undeploy workflows work unchanged as background jobs
349 | - **Async/await native** - modern Python asyncio patterns throughout the implementation
350 | - **Event correlation preserved** - all existing workflow tracking and event sourcing continues to work
351 | - **Enhanced CLI tooling** - unified tenant commands with proper endpoint routing
352 | - **Database integrity** - proper foreign key constraint handling in tenant deletion
353 | 
354 | #### Infrastructure Requirements
355 | - **Local**: Redis via docker-compose (already exists) ✅
356 | - **Production**: Upstash Redis on Fly.io (already configured) ✅
357 | - **Workers**: arq worker processes (new deployment target)
358 | - **Monitoring**: Job status dashboard (simple web interface)
359 | 
360 | #### API Evolution
361 | ```python
362 | # Before: Synchronous (blocks for 60+ seconds)
363 | POST /tenant/{id}/deploy → {status: "active", machine_id: "..."}
364 | 
365 | # After: Asynchronous (returns immediately)
366 | POST /tenant/{id}/deploy → {job_id: "uuid", workflow_id: "uuid", status: "queued"}
367 | GET  /jobs/{job_id}/status → {status: "running", progress: "deploying_machine", workflow_id: "uuid"}
368 | GET  /workflows/{workflow_id}/events → [...] # Existing event tracking works unchanged
369 | ```
370 | 
371 | **Technology Choice**: **arq (Redis)** over pgqueuer
372 | - **Existing Redis infrastructure** - Upstash + docker-compose already configured
373 | - **Better ecosystem** - monitoring tools, documentation, community
374 | - **Made by pydantic team** - aligns with existing Python stack
375 | - **Hybrid approach** - Redis for queue operations + Postgres for workflow state
376 | 
377 | #### Job Uniqueness Implementation
378 | 
379 | **Problem**: Multiple concurrent deployment requests for the same tenant could create duplicate jobs, wasting resources and potentially causing conflicts.
380 | 
381 | **Solution**: Leverage ARQ's built-in job uniqueness feature using predictable job IDs:
382 | 
383 | ```python
384 | # JobQueueService implementation
385 | async def enqueue_deploy_job(self, tenant_id: UUID, image_tag: str | None = None) -> str:
386 |     unique_job_id = f"deploy-{tenant_id}"
387 | 
388 |     job = await self.redis_pool.enqueue_job(
389 |         "deploy_tenant_job",
390 |         str(tenant_id),
391 |         image_tag,
392 |         _job_id=unique_job_id,  # ARQ prevents duplicates
393 |     )
394 | 
395 |     if job is None:
396 |         # Job already exists - return existing job ID
397 |         return unique_job_id
398 |     else:
399 |         # New job created - return ARQ job ID
400 |         return job.job_id
401 | ```
402 | 
403 | **Key Features:**
404 | - **Predictable Job IDs**: `deploy-{tenant_id}`, `undeploy-{tenant_id}`
405 | - **Duplicate Prevention**: ARQ returns `None` for duplicate job IDs
406 | - **Graceful Handling**: Return existing job ID instead of raising errors
407 | - **Idempotent Operations**: Safe to retry deployment requests
408 | - **Clear Logging**: Distinguish "Enqueued new" vs "Found existing" jobs
409 | 
410 | **Benefits:**
411 | - Prevents resource waste from duplicate deployments
412 | - Eliminates race conditions from concurrent requests
413 | - Makes job monitoring more predictable with consistent IDs
414 | - Provides natural deduplication without complex locking mechanisms
415 | 
416 | 
417 | ## Notes
418 | 
419 | ### Design Philosophy Lessons
420 | - **Simplicity beats framework magic** - removing DBOS made the system more reliable and debuggable
421 | - **Event sourcing > complex orchestration** - database-backed event tracking provides better observability than framework abstractions
422 | - **Idempotent operations > resumable workflows** - each step handling its own retry logic is simpler than framework-managed resumability
423 | - **Explicit error handling > framework exception handling** - Python exceptions are clearer than orchestration framework error states
424 | 
425 | ### Future Considerations
426 | - **Monitoring integration** - workflow tracking events could feed into observability systems
427 | - **Performance optimization** - event querying patterns may benefit from additional indexing
428 | - **Audit compliance** - complete event trail supports regulatory requirements
429 | - **Operational dashboards** - workflow status could drive tenant health monitoring
430 | 
431 | ### Related Specifications
432 | - **SPEC-8**: TigrisFS Integration - bucket provisioning integrated with deployment workflow
433 | - **SPEC-1**: Specification-Driven Development Process - this spec follows the established format
434 | 
435 | ## Observations
436 | 
437 | - [architecture] Removing framework complexity led to more maintainable system #simplification
438 | - [workflow] Single unified deployment workflow handles both provisioning and updates #consolidation
439 | - [observability] Event sourcing with workflow correlation provides complete audit trail #event-tracking
440 | - [database] Foreign key relationships between workflows and events enable powerful queries #schema-design
441 | - [testing] Integration tests with real infrastructure catch edge cases that unit tests miss #testing-strategy
442 | - [parameters] Consistent method signatures (workflow_id first) reduce cognitive overhead #api-design
443 | - [maintenance] Fewer workflows and dependencies reduce long-term maintenance burden #operational-excellence
444 | - [debugging] Plain Python exceptions are clearer than framework abstraction layers #developer-experience
445 | - [resilience] Exponential backoff retry patterns handle flaky external API calls gracefully #error-handling
446 | - [monitoring] Redis queue monitoring provides real-time operational visibility #observability
447 | - [ci-cd] Bulk update commands enable automated tenant deployments in continuous delivery pipelines #automation
448 | - [documentation] Comprehensive monitoring guides reduce operational learning curve #knowledge-management
449 | - [error-logging] Context-aware log levels (INFO for expected errors, ERROR for unexpected) improve signal-to-noise ratio #logging-strategy
450 | - [job-uniqueness] ARQ job uniqueness with predictable tenant-based IDs prevents duplicate operations and resource waste #deduplication
451 | 
452 | ## Implementation Notes
453 | 
454 | ### Configuration Integration
455 | - **Redis Configuration**: Add Redis settings to existing `apps/cloud/src/basic_memory_cloud/config.py`
456 | - **Local Development**: Leverage existing Redis setup from `docker-compose.yml`
457 | - **Production**: Use Upstash Redis configuration for production environments
458 | 
459 | ### Docker Entrypoint Strategy
460 | Create `entrypoint.sh` script to toggle between API server and worker processes using single Docker image:
461 | 
462 | ```bash
463 | #!/bin/bash
464 | 
465 | # Entrypoint script for Basic Memory Cloud service
466 | # Supports multiple process types: api, worker
467 | 
468 | set -e
469 | 
470 | case "$1" in
471 |   "api")
472 |     echo "Starting Basic Memory Cloud API server..."
473 |     exec uvicorn basic_memory_cloud.main:app \
474 |       --host 0.0.0.0 \
475 |       --port 8000 \
476 |       --log-level info
477 |     ;;
478 |   "worker")
479 |     echo "Starting Basic Memory Cloud ARQ worker..."
480 |     # For ARQ worker implementation
481 |     exec python -m arq basic_memory_cloud.jobs.settings.WorkerSettings
482 |     ;;
483 |   *)
484 |     echo "Usage: $0 {api|worker}"
485 |     echo "  api    - Start the FastAPI server"
486 |     echo "  worker - Start the ARQ worker"
487 |     exit 1
488 |     ;;
489 | esac
490 | ```
491 | 
492 | ### Fly.io Process Groups Configuration
493 | Use separate machine groups for API and worker processes with independent scaling:
494 | 
495 | ```toml
496 | # fly.toml app configuration for basic-memory-cloud
497 | app = 'basic-memory-cloud-dev-basic-machines'
498 | primary_region = 'dfw'
499 | org = 'basic-machines'
500 | kill_signal = 'SIGINT'
501 | kill_timeout = '5s'
502 | 
503 | [build]
504 | 
505 | # Process groups for API server and worker
506 | [processes]
507 |   api = "api"
508 |   worker = "worker"
509 | 
510 | # Machine scaling configuration
511 | [[machine]]
512 |   size = 'shared-cpu-1x'
513 |   processes = ['api']
514 |   min_machines_running = 1
515 |   auto_stop_machines = false
516 |   auto_start_machines = true
517 | 
518 | [[machine]]
519 |   size = 'shared-cpu-1x'
520 |   processes = ['worker']
521 |   min_machines_running = 1
522 |   auto_stop_machines = false
523 |   auto_start_machines = true
524 | 
525 | [env]
526 |   # Python configuration
527 |   PYTHONUNBUFFERED = '1'
528 |   PYTHONPATH = '/app'
529 | 
530 |   # Logging configuration
531 |   LOG_LEVEL = 'DEBUG'
532 | 
533 |   # Redis configuration for ARQ
534 |   REDIS_URL = 'redis://basic-memory-cloud-redis.upstash.io'
535 | 
536 |   # Database configuration
537 |   DATABASE_HOST = 'basic-memory-cloud-db-dev-basic-machines.internal'
538 |   DATABASE_PORT = '5432'
539 |   DATABASE_NAME = 'basic_memory_cloud'
540 |   DATABASE_USER = 'postgres'
541 |   DATABASE_SSL = 'true'
542 | 
543 |   # Worker configuration
544 |   ARQ_MAX_JOBS = '10'
545 |   ARQ_KEEP_RESULT = '3600'
546 | 
547 |   # Fly.io configuration
548 |   FLY_ORG = 'basic-machines'
549 |   FLY_REGION = 'dfw'
550 | 
551 | # Internal service - no external HTTP exposure for worker
552 | # API accessible via basic-memory-cloud-dev-basic-machines.flycast:8000
553 | 
554 | [[vm]]
555 |   size = 'shared-cpu-1x'
556 | ```
557 | 
558 | ### Benefits of This Architecture
559 | - **Single Docker Image**: Both API and worker use same container with different entrypoints
560 | - **Independent Scaling**: Scale API and worker processes separately based on demand
561 | - **Clean Separation**: Web traffic handling separate from background job processing
562 | - **Existing Infrastructure**: Leverages current PostgreSQL + Redis setup without complexity
563 | - **Hybrid State Management**: Redis for queue operations, PostgreSQL for persistent workflow tracking
564 | 
565 | ## Relations
566 | 
567 | - implements [[SPEC-8 TigrisFS Integration]]
568 | - follows [[SPEC-1 Specification-Driven Development Process]]
569 | - supersedes previous multi-workflow architecture
570 | 
```
Page 18/27FirstPrevNextLast