basicmachines-co/basic-memory # codebase.md

This is page 16 of 23. Use http://codebase.md/basicmachines-co/basic-memory?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .claude
│   ├── agents
│   │   ├── python-developer.md
│   │   └── system-architect.md
│   └── commands
│       ├── release
│       │   ├── beta.md
│       │   ├── changelog.md
│       │   ├── release-check.md
│       │   └── release.md
│       ├── spec.md
│       └── test-live.md
├── .dockerignore
├── .github
│   ├── dependabot.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.md
│   │   ├── config.yml
│   │   ├── documentation.md
│   │   └── feature_request.md
│   └── workflows
│       ├── claude-code-review.yml
│       ├── claude-issue-triage.yml
│       ├── claude.yml
│       ├── dev-release.yml
│       ├── docker.yml
│       ├── pr-title.yml
│       ├── release.yml
│       └── test.yml
├── .gitignore
├── .python-version
├── CHANGELOG.md
├── CITATION.cff
├── CLA.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── ai-assistant-guide-extended.md
│   ├── character-handling.md
│   ├── cloud-cli.md
│   └── Docker.md
├── justfile
├── LICENSE
├── llms-install.md
├── pyproject.toml
├── README.md
├── SECURITY.md
├── smithery.yaml
├── specs
│   ├── SPEC-1 Specification-Driven Development Process.md
│   ├── SPEC-10 Unified Deployment Workflow and Event Tracking.md
│   ├── SPEC-11 Basic Memory API Performance Optimization.md
│   ├── SPEC-12 OpenTelemetry Observability.md
│   ├── SPEC-13 CLI Authentication with Subscription Validation.md
│   ├── SPEC-14 Cloud Git Versioning & GitHub Backup.md
│   ├── SPEC-14- Cloud Git Versioning & GitHub Backup.md
│   ├── SPEC-15 Configuration Persistence via Tigris for Cloud Tenants.md
│   ├── SPEC-16 MCP Cloud Service Consolidation.md
│   ├── SPEC-17 Semantic Search with ChromaDB.md
│   ├── SPEC-18 AI Memory Management Tool.md
│   ├── SPEC-19 Sync Performance and Memory Optimization.md
│   ├── SPEC-2 Slash Commands Reference.md
│   ├── SPEC-3 Agent Definitions.md
│   ├── SPEC-4 Notes Web UI Component Architecture.md
│   ├── SPEC-5 CLI Cloud Upload via WebDAV.md
│   ├── SPEC-6 Explicit Project Parameter Architecture.md
│   ├── SPEC-7 POC to spike Tigris Turso for local access to cloud data.md
│   ├── SPEC-8 TigrisFS Integration.md
│   ├── SPEC-9 Multi-Project Bidirectional Sync Architecture.md
│   ├── SPEC-9 Signed Header Tenant Information.md
│   └── SPEC-9-1 Follow-Ups- Conflict, Sync, and Observability.md
├── src
│   └── basic_memory
│       ├── __init__.py
│       ├── alembic
│       │   ├── alembic.ini
│       │   ├── env.py
│       │   ├── migrations.py
│       │   ├── script.py.mako
│       │   └── versions
│       │       ├── 3dae7c7b1564_initial_schema.py
│       │       ├── 502b60eaa905_remove_required_from_entity_permalink.py
│       │       ├── 5fe1ab1ccebe_add_projects_table.py
│       │       ├── 647e7a75e2cd_project_constraint_fix.py
│       │       ├── 9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py
│       │       ├── a1b2c3d4e5f6_fix_project_foreign_keys.py
│       │       ├── b3c3938bacdb_relation_to_name_unique_index.py
│       │       ├── cc7172b46608_update_search_index_schema.py
│       │       └── e7e1f4367280_add_scan_watermark_tracking_to_project.py
│       ├── api
│       │   ├── __init__.py
│       │   ├── app.py
│       │   ├── routers
│       │   │   ├── __init__.py
│       │   │   ├── directory_router.py
│       │   │   ├── importer_router.py
│       │   │   ├── knowledge_router.py
│       │   │   ├── management_router.py
│       │   │   ├── memory_router.py
│       │   │   ├── project_router.py
│       │   │   ├── prompt_router.py
│       │   │   ├── resource_router.py
│       │   │   ├── search_router.py
│       │   │   └── utils.py
│       │   └── template_loader.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── app.py
│       │   ├── auth.py
│       │   ├── commands
│       │   │   ├── __init__.py
│       │   │   ├── cloud
│       │   │   │   ├── __init__.py
│       │   │   │   ├── api_client.py
│       │   │   │   ├── bisync_commands.py
│       │   │   │   ├── cloud_utils.py
│       │   │   │   ├── core_commands.py
│       │   │   │   ├── mount_commands.py
│       │   │   │   ├── rclone_config.py
│       │   │   │   ├── rclone_installer.py
│       │   │   │   ├── upload_command.py
│       │   │   │   └── upload.py
│       │   │   ├── command_utils.py
│       │   │   ├── db.py
│       │   │   ├── import_chatgpt.py
│       │   │   ├── import_claude_conversations.py
│       │   │   ├── import_claude_projects.py
│       │   │   ├── import_memory_json.py
│       │   │   ├── mcp.py
│       │   │   ├── project.py
│       │   │   ├── status.py
│       │   │   ├── sync.py
│       │   │   └── tool.py
│       │   └── main.py
│       ├── config.py
│       ├── db.py
│       ├── deps.py
│       ├── file_utils.py
│       ├── ignore_utils.py
│       ├── importers
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── chatgpt_importer.py
│       │   ├── claude_conversations_importer.py
│       │   ├── claude_projects_importer.py
│       │   ├── memory_json_importer.py
│       │   └── utils.py
│       ├── markdown
│       │   ├── __init__.py
│       │   ├── entity_parser.py
│       │   ├── markdown_processor.py
│       │   ├── plugins.py
│       │   ├── schemas.py
│       │   └── utils.py
│       ├── mcp
│       │   ├── __init__.py
│       │   ├── async_client.py
│       │   ├── project_context.py
│       │   ├── prompts
│       │   │   ├── __init__.py
│       │   │   ├── ai_assistant_guide.py
│       │   │   ├── continue_conversation.py
│       │   │   ├── recent_activity.py
│       │   │   ├── search.py
│       │   │   └── utils.py
│       │   ├── resources
│       │   │   ├── ai_assistant_guide.md
│       │   │   └── project_info.py
│       │   ├── server.py
│       │   └── tools
│       │       ├── __init__.py
│       │       ├── build_context.py
│       │       ├── canvas.py
│       │       ├── chatgpt_tools.py
│       │       ├── delete_note.py
│       │       ├── edit_note.py
│       │       ├── list_directory.py
│       │       ├── move_note.py
│       │       ├── project_management.py
│       │       ├── read_content.py
│       │       ├── read_note.py
│       │       ├── recent_activity.py
│       │       ├── search.py
│       │       ├── utils.py
│       │       ├── view_note.py
│       │       └── write_note.py
│       ├── models
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── knowledge.py
│       │   ├── project.py
│       │   └── search.py
│       ├── repository
│       │   ├── __init__.py
│       │   ├── entity_repository.py
│       │   ├── observation_repository.py
│       │   ├── project_info_repository.py
│       │   ├── project_repository.py
│       │   ├── relation_repository.py
│       │   ├── repository.py
│       │   └── search_repository.py
│       ├── schemas
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── cloud.py
│       │   ├── delete.py
│       │   ├── directory.py
│       │   ├── importer.py
│       │   ├── memory.py
│       │   ├── project_info.py
│       │   ├── prompt.py
│       │   ├── request.py
│       │   ├── response.py
│       │   ├── search.py
│       │   └── sync_report.py
│       ├── services
│       │   ├── __init__.py
│       │   ├── context_service.py
│       │   ├── directory_service.py
│       │   ├── entity_service.py
│       │   ├── exceptions.py
│       │   ├── file_service.py
│       │   ├── initialization.py
│       │   ├── link_resolver.py
│       │   ├── project_service.py
│       │   ├── search_service.py
│       │   └── service.py
│       ├── sync
│       │   ├── __init__.py
│       │   ├── background_sync.py
│       │   ├── sync_service.py
│       │   └── watch_service.py
│       ├── templates
│       │   └── prompts
│       │       ├── continue_conversation.hbs
│       │       └── search.hbs
│       └── utils.py
├── test-int
│   ├── BENCHMARKS.md
│   ├── cli
│   │   ├── test_project_commands_integration.py
│   │   ├── test_sync_commands_integration.py
│   │   └── test_version_integration.py
│   ├── conftest.py
│   ├── mcp
│   │   ├── test_build_context_underscore.py
│   │   ├── test_build_context_validation.py
│   │   ├── test_chatgpt_tools_integration.py
│   │   ├── test_default_project_mode_integration.py
│   │   ├── test_delete_note_integration.py
│   │   ├── test_edit_note_integration.py
│   │   ├── test_list_directory_integration.py
│   │   ├── test_move_note_integration.py
│   │   ├── test_project_management_integration.py
│   │   ├── test_project_state_sync_integration.py
│   │   ├── test_read_content_integration.py
│   │   ├── test_read_note_integration.py
│   │   ├── test_search_integration.py
│   │   ├── test_single_project_mcp_integration.py
│   │   └── test_write_note_integration.py
│   ├── test_db_wal_mode.py
│   ├── test_disable_permalinks_integration.py
│   └── test_sync_performance_benchmark.py
├── tests
│   ├── __init__.py
│   ├── api
│   │   ├── conftest.py
│   │   ├── test_async_client.py
│   │   ├── test_continue_conversation_template.py
│   │   ├── test_directory_router.py
│   │   ├── test_importer_router.py
│   │   ├── test_knowledge_router.py
│   │   ├── test_management_router.py
│   │   ├── test_memory_router.py
│   │   ├── test_project_router_operations.py
│   │   ├── test_project_router.py
│   │   ├── test_prompt_router.py
│   │   ├── test_relation_background_resolution.py
│   │   ├── test_resource_router.py
│   │   ├── test_search_router.py
│   │   ├── test_search_template.py
│   │   ├── test_template_loader_helpers.py
│   │   └── test_template_loader.py
│   ├── cli
│   │   ├── conftest.py
│   │   ├── test_bisync_commands.py
│   │   ├── test_cli_tools.py
│   │   ├── test_cloud_authentication.py
│   │   ├── test_cloud_utils.py
│   │   ├── test_ignore_utils.py
│   │   ├── test_import_chatgpt.py
│   │   ├── test_import_claude_conversations.py
│   │   ├── test_import_claude_projects.py
│   │   ├── test_import_memory_json.py
│   │   └── test_upload.py
│   ├── conftest.py
│   ├── db
│   │   └── test_issue_254_foreign_key_constraints.py
│   ├── importers
│   │   ├── test_importer_base.py
│   │   └── test_importer_utils.py
│   ├── markdown
│   │   ├── __init__.py
│   │   ├── test_date_frontmatter_parsing.py
│   │   ├── test_entity_parser_error_handling.py
│   │   ├── test_entity_parser.py
│   │   ├── test_markdown_plugins.py
│   │   ├── test_markdown_processor.py
│   │   ├── test_observation_edge_cases.py
│   │   ├── test_parser_edge_cases.py
│   │   ├── test_relation_edge_cases.py
│   │   └── test_task_detection.py
│   ├── mcp
│   │   ├── conftest.py
│   │   ├── test_obsidian_yaml_formatting.py
│   │   ├── test_permalink_collision_file_overwrite.py
│   │   ├── test_prompts.py
│   │   ├── test_resources.py
│   │   ├── test_tool_build_context.py
│   │   ├── test_tool_canvas.py
│   │   ├── test_tool_delete_note.py
│   │   ├── test_tool_edit_note.py
│   │   ├── test_tool_list_directory.py
│   │   ├── test_tool_move_note.py
│   │   ├── test_tool_read_content.py
│   │   ├── test_tool_read_note.py
│   │   ├── test_tool_recent_activity.py
│   │   ├── test_tool_resource.py
│   │   ├── test_tool_search.py
│   │   ├── test_tool_utils.py
│   │   ├── test_tool_view_note.py
│   │   ├── test_tool_write_note.py
│   │   └── tools
│   │       └── test_chatgpt_tools.py
│   ├── Non-MarkdownFileSupport.pdf
│   ├── repository
│   │   ├── test_entity_repository_upsert.py
│   │   ├── test_entity_repository.py
│   │   ├── test_entity_upsert_issue_187.py
│   │   ├── test_observation_repository.py
│   │   ├── test_project_info_repository.py
│   │   ├── test_project_repository.py
│   │   ├── test_relation_repository.py
│   │   ├── test_repository.py
│   │   ├── test_search_repository_edit_bug_fix.py
│   │   └── test_search_repository.py
│   ├── schemas
│   │   ├── test_base_timeframe_minimum.py
│   │   ├── test_memory_serialization.py
│   │   ├── test_memory_url_validation.py
│   │   ├── test_memory_url.py
│   │   ├── test_schemas.py
│   │   └── test_search.py
│   ├── Screenshot.png
│   ├── services
│   │   ├── test_context_service.py
│   │   ├── test_directory_service.py
│   │   ├── test_entity_service_disable_permalinks.py
│   │   ├── test_entity_service.py
│   │   ├── test_file_service.py
│   │   ├── test_initialization.py
│   │   ├── test_link_resolver.py
│   │   ├── test_project_removal_bug.py
│   │   ├── test_project_service_operations.py
│   │   ├── test_project_service.py
│   │   └── test_search_service.py
│   ├── sync
│   │   ├── test_character_conflicts.py
│   │   ├── test_sync_service_incremental.py
│   │   ├── test_sync_service.py
│   │   ├── test_sync_wikilink_issue.py
│   │   ├── test_tmp_files.py
│   │   ├── test_watch_service_edge_cases.py
│   │   ├── test_watch_service_reload.py
│   │   └── test_watch_service.py
│   ├── test_config.py
│   ├── test_db_migration_deduplication.py
│   ├── test_deps.py
│   ├── test_production_cascade_delete.py
│   └── utils
│       ├── test_file_utils.py
│       ├── test_frontmatter_obsidian_compatible.py
│       ├── test_parse_tags.py
│       ├── test_permalink_formatting.py
│       ├── test_utf8_handling.py
│       └── test_validate_project_path.py
├── uv.lock
├── v0.15.0-RELEASE-DOCS.md
└── v15-docs
    ├── api-performance.md
    ├── background-relations.md
    ├── basic-memory-home.md
    ├── bug-fixes.md
    ├── chatgpt-integration.md
    ├── cloud-authentication.md
    ├── cloud-bisync.md
    ├── cloud-mode-usage.md
    ├── cloud-mount.md
    ├── default-project-mode.md
    ├── env-file-removal.md
    ├── env-var-overrides.md
    ├── explicit-project-parameter.md
    ├── gitignore-integration.md
    ├── project-root-env-var.md
    ├── README.md
    └── sqlite-performance.md
```

# Files

--------------------------------------------------------------------------------
/tests/services/test_search_service.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for search service."""
  2 | 
  3 | from datetime import datetime
  4 | 
  5 | import pytest
  6 | from sqlalchemy import text
  7 | 
  8 | from basic_memory import db
  9 | from basic_memory.schemas.search import SearchQuery, SearchItemType
 10 | 
 11 | 
 12 | @pytest.mark.asyncio
 13 | async def test_search_permalink(search_service, test_graph):
 14 |     """Exact permalink"""
 15 |     results = await search_service.search(SearchQuery(permalink="test/root"))
 16 |     assert len(results) == 1
 17 | 
 18 |     for r in results:
 19 |         assert "test/root" in r.permalink
 20 | 
 21 | 
 22 | @pytest.mark.asyncio
 23 | async def test_search_limit_offset(search_service, test_graph):
 24 |     """Exact permalink"""
 25 |     results = await search_service.search(SearchQuery(permalink_match="test/*"))
 26 |     assert len(results) > 1
 27 | 
 28 |     results = await search_service.search(SearchQuery(permalink_match="test/*"), limit=1)
 29 |     assert len(results) == 1
 30 | 
 31 |     results = await search_service.search(SearchQuery(permalink_match="test/*"), limit=100)
 32 |     num_results = len(results)
 33 | 
 34 |     # assert offset
 35 |     offset_results = await search_service.search(
 36 |         SearchQuery(permalink_match="test/*"), limit=100, offset=1
 37 |     )
 38 |     assert len(offset_results) == num_results - 1
 39 | 
 40 | 
 41 | @pytest.mark.asyncio
 42 | async def test_search_permalink_observations_wildcard(search_service, test_graph):
 43 |     """Pattern matching"""
 44 |     results = await search_service.search(SearchQuery(permalink_match="test/root/observations/*"))
 45 |     assert len(results) == 2
 46 |     permalinks = {r.permalink for r in results}
 47 |     assert "test/root/observations/note/root-note-1" in permalinks
 48 |     assert "test/root/observations/tech/root-tech-note" in permalinks
 49 | 
 50 | 
 51 | @pytest.mark.asyncio
 52 | async def test_search_permalink_relation_wildcard(search_service, test_graph):
 53 |     """Pattern matching"""
 54 |     results = await search_service.search(SearchQuery(permalink_match="test/root/connects-to/*"))
 55 |     assert len(results) == 1
 56 |     permalinks = {r.permalink for r in results}
 57 |     assert "test/root/connects-to/test/connected-entity-1" in permalinks
 58 | 
 59 | 
 60 | @pytest.mark.asyncio
 61 | async def test_search_permalink_wildcard2(search_service, test_graph):
 62 |     """Pattern matching"""
 63 |     results = await search_service.search(
 64 |         SearchQuery(
 65 |             permalink_match="test/connected*",
 66 |         )
 67 |     )
 68 |     assert len(results) >= 2
 69 |     permalinks = {r.permalink for r in results}
 70 |     assert "test/connected-entity-1" in permalinks
 71 |     assert "test/connected-entity-2" in permalinks
 72 | 
 73 | 
 74 | @pytest.mark.asyncio
 75 | async def test_search_text(search_service, test_graph):
 76 |     """Full-text search"""
 77 |     results = await search_service.search(
 78 |         SearchQuery(text="Root Entity", entity_types=[SearchItemType.ENTITY])
 79 |     )
 80 |     assert len(results) >= 1
 81 |     assert results[0].permalink == "test/root"
 82 | 
 83 | 
 84 | @pytest.mark.asyncio
 85 | async def test_search_title(search_service, test_graph):
 86 |     """Title only search"""
 87 |     results = await search_service.search(
 88 |         SearchQuery(title="Root", entity_types=[SearchItemType.ENTITY])
 89 |     )
 90 |     assert len(results) >= 1
 91 |     assert results[0].permalink == "test/root"
 92 | 
 93 | 
 94 | @pytest.mark.asyncio
 95 | async def test_text_search_case_insensitive(search_service, test_graph):
 96 |     """Test text search functionality."""
 97 |     # Case insensitive
 98 |     results = await search_service.search(SearchQuery(text="ENTITY"))
 99 |     assert any("test/root" in r.permalink for r in results)
100 | 
101 | 
102 | @pytest.mark.asyncio
103 | async def test_text_search_content_word_match(search_service, test_graph):
104 |     """Test text search functionality."""
105 | 
106 |     # content word match
107 |     results = await search_service.search(SearchQuery(text="Connected"))
108 |     assert len(results) > 0
109 |     assert any(r.file_path == "test/Connected Entity 2.md" for r in results)
110 | 
111 | 
112 | @pytest.mark.asyncio
113 | async def test_text_search_multiple_terms(search_service, test_graph):
114 |     """Test text search functionality."""
115 | 
116 |     # Multiple terms
117 |     results = await search_service.search(SearchQuery(text="root note"))
118 |     assert any("test/root" in r.permalink for r in results)
119 | 
120 | 
121 | @pytest.mark.asyncio
122 | async def test_pattern_matching(search_service, test_graph):
123 |     """Test pattern matching with various wildcards."""
124 |     # Test wildcards
125 |     results = await search_service.search(SearchQuery(permalink_match="test/*"))
126 |     for r in results:
127 |         assert "test/" in r.permalink
128 | 
129 |     # Test start wildcards
130 |     results = await search_service.search(SearchQuery(permalink_match="*/observations"))
131 |     for r in results:
132 |         assert "/observations" in r.permalink
133 | 
134 |     # Test permalink partial match
135 |     results = await search_service.search(SearchQuery(permalink_match="test"))
136 |     for r in results:
137 |         assert "test/" in r.permalink
138 | 
139 | 
140 | @pytest.mark.asyncio
141 | async def test_filters(search_service, test_graph):
142 |     """Test search filters."""
143 |     # Combined filters
144 |     results = await search_service.search(
145 |         SearchQuery(text="Deep", entity_types=[SearchItemType.ENTITY], types=["deep"])
146 |     )
147 |     assert len(results) == 1
148 |     for r in results:
149 |         assert r.type == SearchItemType.ENTITY
150 |         assert r.metadata.get("entity_type") == "deep"
151 | 
152 | 
153 | @pytest.mark.asyncio
154 | async def test_after_date(search_service, test_graph):
155 |     """Test search filters."""
156 | 
157 |     # Should find with past date
158 |     past_date = datetime(2020, 1, 1).astimezone()
159 |     results = await search_service.search(
160 |         SearchQuery(
161 |             text="entity",
162 |             after_date=past_date.isoformat(),
163 |         )
164 |     )
165 |     for r in results:
166 |         assert datetime.fromisoformat(r.created_at) > past_date
167 | 
168 |     # Should not find with future date
169 |     future_date = datetime(2030, 1, 1).astimezone()
170 |     results = await search_service.search(
171 |         SearchQuery(
172 |             text="entity",
173 |             after_date=future_date.isoformat(),
174 |         )
175 |     )
176 |     assert len(results) == 0
177 | 
178 | 
179 | @pytest.mark.asyncio
180 | async def test_search_type(search_service, test_graph):
181 |     """Test search filters."""
182 | 
183 |     # Should find only type
184 |     results = await search_service.search(SearchQuery(types=["test"]))
185 |     assert len(results) > 0
186 |     for r in results:
187 |         assert r.type == SearchItemType.ENTITY
188 | 
189 | 
190 | @pytest.mark.asyncio
191 | async def test_search_entity_type(search_service, test_graph):
192 |     """Test search filters."""
193 | 
194 |     # Should find only type
195 |     results = await search_service.search(SearchQuery(entity_types=[SearchItemType.ENTITY]))
196 |     assert len(results) > 0
197 |     for r in results:
198 |         assert r.type == SearchItemType.ENTITY
199 | 
200 | 
201 | @pytest.mark.asyncio
202 | async def test_extract_entity_tags_exception_handling(search_service):
203 |     """Test the _extract_entity_tags method exception handling (lines 147-151)."""
204 |     from basic_memory.models.knowledge import Entity
205 | 
206 |     # Create entity with string tags that will cause parsing to fail and fall back to single tag
207 |     entity_with_invalid_tags = Entity(
208 |         title="Test Entity",
209 |         entity_type="test",
210 |         entity_metadata={"tags": "just a string"},  # This will fail ast.literal_eval
211 |         content_type="text/markdown",
212 |         file_path="test/test-entity.md",
213 |         project_id=1,
214 |     )
215 | 
216 |     # This should trigger the except block on lines 147-149
217 |     result = search_service._extract_entity_tags(entity_with_invalid_tags)
218 |     assert result == ["just a string"]
219 | 
220 |     # Test with empty string (should return empty list) - covers line 149
221 |     entity_with_empty_tags = Entity(
222 |         title="Test Entity Empty",
223 |         entity_type="test",
224 |         entity_metadata={"tags": ""},
225 |         content_type="text/markdown",
226 |         file_path="test/test-entity-empty.md",
227 |         project_id=1,
228 |     )
229 | 
230 |     result = search_service._extract_entity_tags(entity_with_empty_tags)
231 |     assert result == []
232 | 
233 | 
234 | @pytest.mark.asyncio
235 | async def test_delete_entity_without_permalink(search_service, sample_entity):
236 |     """Test deleting an entity that has no permalink (edge case)."""
237 | 
238 |     # Set the entity permalink to None to trigger the else branch on line 355
239 |     sample_entity.permalink = None
240 | 
241 |     # This should trigger the delete_by_entity_id path (line 355) in handle_delete
242 |     await search_service.handle_delete(sample_entity)
243 | 
244 | 
245 | @pytest.mark.asyncio
246 | async def test_no_criteria(search_service, test_graph):
247 |     """Test search with no criteria returns empty list."""
248 |     results = await search_service.search(SearchQuery())
249 |     assert len(results) == 0
250 | 
251 | 
252 | @pytest.mark.asyncio
253 | async def test_init_search_index(search_service, session_maker):
254 |     """Test search index initialization."""
255 |     async with db.scoped_session(session_maker) as session:
256 |         result = await session.execute(
257 |             text("SELECT name FROM sqlite_master WHERE type='table' AND name='search_index';")
258 |         )
259 |         assert result.scalar() == "search_index"
260 | 
261 | 
262 | @pytest.mark.asyncio
263 | async def test_update_index(search_service, full_entity):
264 |     """Test updating indexed content."""
265 |     await search_service.index_entity(full_entity)
266 | 
267 |     # Update entity
268 |     full_entity.title = "OMG I AM UPDATED"
269 |     await search_service.index_entity(full_entity)
270 | 
271 |     # Search for new title
272 |     results = await search_service.search(SearchQuery(text="OMG I AM UPDATED"))
273 |     assert len(results) > 1
274 | 
275 | 
276 | @pytest.mark.asyncio
277 | async def test_boolean_and_search(search_service, test_graph):
278 |     """Test boolean AND search."""
279 |     # Create an entity with specific terms for testing
280 |     # This assumes the test_graph fixture already has entities with relevant terms
281 | 
282 |     # Test AND operator - both terms must be present
283 |     results = await search_service.search(SearchQuery(text="Root AND Entity"))
284 |     assert len(results) >= 1
285 | 
286 |     # Verify the result contains both terms
287 |     found = False
288 |     for result in results:
289 |         if (result.title and "Root" in result.title and "Entity" in result.title) or (
290 |             result.content_snippet
291 |             and "Root" in result.content_snippet
292 |             and "Entity" in result.content_snippet
293 |         ):
294 |             found = True
295 |             break
296 |     assert found, "Boolean AND search failed to find items containing both terms"
297 | 
298 |     # Verify that items with only one term are not returned
299 |     results = await search_service.search(SearchQuery(text="NonexistentTerm AND Root"))
300 |     assert len(results) == 0, "Boolean AND search returned results when it shouldn't have"
301 | 
302 | 
303 | @pytest.mark.asyncio
304 | async def test_boolean_or_search(search_service, test_graph):
305 |     """Test boolean OR search."""
306 |     # Test OR operator - either term can be present
307 |     results = await search_service.search(SearchQuery(text="Root OR Connected"))
308 | 
309 |     # Should find both "Root Entity" and "Connected Entity"
310 |     assert len(results) >= 2
311 | 
312 |     # Verify we find items with either term
313 |     root_found = False
314 |     connected_found = False
315 | 
316 |     for result in results:
317 |         if result.permalink == "test/root":
318 |             root_found = True
319 |         elif "connected" in result.permalink.lower():
320 |             connected_found = True
321 | 
322 |     assert root_found, "Boolean OR search failed to find 'Root' term"
323 |     assert connected_found, "Boolean OR search failed to find 'Connected' term"
324 | 
325 | 
326 | @pytest.mark.asyncio
327 | async def test_boolean_not_search(search_service, test_graph):
328 |     """Test boolean NOT search."""
329 |     # Test NOT operator - exclude certain terms
330 |     results = await search_service.search(SearchQuery(text="Entity NOT Connected"))
331 | 
332 |     # Should find "Root Entity" but not "Connected Entity"
333 |     for result in results:
334 |         assert "connected" not in result.permalink.lower(), (
335 |             "Boolean NOT search returned excluded term"
336 |         )
337 | 
338 | 
339 | @pytest.mark.asyncio
340 | async def test_boolean_group_search(search_service, test_graph):
341 |     """Test boolean grouping with parentheses."""
342 |     # Test grouping - (A OR B) AND C
343 |     results = await search_service.search(SearchQuery(title="(Root OR Connected) AND Entity"))
344 | 
345 |     # Should find both entities that contain "Entity" and either "Root" or "Connected"
346 |     assert len(results) >= 2
347 | 
348 |     for result in results:
349 |         # Each result should contain "Entity" and either "Root" or "Connected"
350 |         contains_entity = "entity" in result.title.lower()
351 |         contains_root_or_connected = (
352 |             "root" in result.title.lower() or "connected" in result.title.lower()
353 |         )
354 | 
355 |         assert contains_entity and contains_root_or_connected, (
356 |             "Boolean grouped search returned incorrect results"
357 |         )
358 | 
359 | 
360 | @pytest.mark.asyncio
361 | async def test_boolean_operators_detection(search_service):
362 |     """Test detection of boolean operators in query."""
363 |     # Test various queries that should be detected as boolean
364 |     boolean_queries = [
365 |         "term1 AND term2",
366 |         "term1 OR term2",
367 |         "term1 NOT term2",
368 |         "(term1 OR term2) AND term3",
369 |         "complex (nested OR grouping) AND term",
370 |     ]
371 | 
372 |     for query_text in boolean_queries:
373 |         query = SearchQuery(text=query_text)
374 |         assert query.has_boolean_operators(), f"Failed to detect boolean operators in: {query_text}"
375 | 
376 |     # Test queries that should not be detected as boolean
377 |     non_boolean_queries = [
378 |         "normal search query",
379 |         "brand name",  # Should not detect "AND" within "brand"
380 |         "understand this concept",  # Should not detect "AND" within "understand"
381 |         "command line",
382 |         "sandbox testing",
383 |     ]
384 | 
385 |     for query_text in non_boolean_queries:
386 |         query = SearchQuery(text=query_text)
387 |         assert not query.has_boolean_operators(), (
388 |             f"Incorrectly detected boolean operators in: {query_text}"
389 |         )
390 | 
391 | 
392 | # Tests for frontmatter tag search functionality
393 | 
394 | 
395 | @pytest.mark.asyncio
396 | async def test_extract_entity_tags_list_format(search_service, session_maker):
397 |     """Test tag extraction from list format in entity metadata."""
398 |     from basic_memory.models import Entity
399 | 
400 |     entity = Entity(
401 |         title="Test Entity",
402 |         entity_type="note",
403 |         entity_metadata={"tags": ["business", "strategy", "planning"]},
404 |         content_type="text/markdown",
405 |         file_path="test/business-strategy.md",
406 |         project_id=1,
407 |     )
408 | 
409 |     tags = search_service._extract_entity_tags(entity)
410 |     assert tags == ["business", "strategy", "planning"]
411 | 
412 | 
413 | @pytest.mark.asyncio
414 | async def test_extract_entity_tags_string_format(search_service, session_maker):
415 |     """Test tag extraction from string format in entity metadata."""
416 |     from basic_memory.models import Entity
417 | 
418 |     entity = Entity(
419 |         title="Test Entity",
420 |         entity_type="note",
421 |         entity_metadata={"tags": "['documentation', 'tools', 'best-practices']"},
422 |         content_type="text/markdown",
423 |         file_path="test/docs.md",
424 |         project_id=1,
425 |     )
426 | 
427 |     tags = search_service._extract_entity_tags(entity)
428 |     assert tags == ["documentation", "tools", "best-practices"]
429 | 
430 | 
431 | @pytest.mark.asyncio
432 | async def test_extract_entity_tags_empty_list(search_service, session_maker):
433 |     """Test tag extraction from empty list in entity metadata."""
434 |     from basic_memory.models import Entity
435 | 
436 |     entity = Entity(
437 |         title="Test Entity",
438 |         entity_type="note",
439 |         entity_metadata={"tags": []},
440 |         content_type="text/markdown",
441 |         file_path="test/empty-tags.md",
442 |         project_id=1,
443 |     )
444 | 
445 |     tags = search_service._extract_entity_tags(entity)
446 |     assert tags == []
447 | 
448 | 
449 | @pytest.mark.asyncio
450 | async def test_extract_entity_tags_empty_string(search_service, session_maker):
451 |     """Test tag extraction from empty string in entity metadata."""
452 |     from basic_memory.models import Entity
453 | 
454 |     entity = Entity(
455 |         title="Test Entity",
456 |         entity_type="note",
457 |         entity_metadata={"tags": "[]"},
458 |         content_type="text/markdown",
459 |         file_path="test/empty-string-tags.md",
460 |         project_id=1,
461 |     )
462 | 
463 |     tags = search_service._extract_entity_tags(entity)
464 |     assert tags == []
465 | 
466 | 
467 | @pytest.mark.asyncio
468 | async def test_extract_entity_tags_no_metadata(search_service, session_maker):
469 |     """Test tag extraction when entity has no metadata."""
470 |     from basic_memory.models import Entity
471 | 
472 |     entity = Entity(
473 |         title="Test Entity",
474 |         entity_type="note",
475 |         entity_metadata=None,
476 |         content_type="text/markdown",
477 |         file_path="test/no-metadata.md",
478 |         project_id=1,
479 |     )
480 | 
481 |     tags = search_service._extract_entity_tags(entity)
482 |     assert tags == []
483 | 
484 | 
485 | @pytest.mark.asyncio
486 | async def test_extract_entity_tags_no_tags_key(search_service, session_maker):
487 |     """Test tag extraction when metadata exists but has no tags key."""
488 |     from basic_memory.models import Entity
489 | 
490 |     entity = Entity(
491 |         title="Test Entity",
492 |         entity_type="note",
493 |         entity_metadata={"title": "Some Title", "type": "note"},
494 |         content_type="text/markdown",
495 |         file_path="test/no-tags-key.md",
496 |         project_id=1,
497 |     )
498 | 
499 |     tags = search_service._extract_entity_tags(entity)
500 |     assert tags == []
501 | 
502 | 
503 | @pytest.mark.asyncio
504 | async def test_search_by_frontmatter_tags(search_service, session_maker, test_project):
505 |     """Test that entities can be found by searching for their frontmatter tags."""
506 |     from basic_memory.repository import EntityRepository
507 |     from unittest.mock import AsyncMock
508 | 
509 |     entity_repo = EntityRepository(session_maker, project_id=test_project.id)
510 | 
511 |     # Create entity with tags
512 |     from datetime import datetime
513 | 
514 |     entity_data = {
515 |         "title": "Business Strategy Guide",
516 |         "entity_type": "note",
517 |         "entity_metadata": {"tags": ["business", "strategy", "planning", "organization"]},
518 |         "content_type": "text/markdown",
519 |         "file_path": "guides/business-strategy.md",
520 |         "permalink": "guides/business-strategy",
521 |         "project_id": test_project.id,
522 |         "created_at": datetime.now(),
523 |         "updated_at": datetime.now(),
524 |     }
525 | 
526 |     entity = await entity_repo.create(entity_data)
527 | 
528 |     # Mock file service to avoid file I/O
529 |     search_service.file_service.read_entity_content = AsyncMock(return_value="")
530 | 
531 |     await search_service.index_entity(entity)
532 | 
533 |     # Search for entities by tag
534 |     results = await search_service.search(SearchQuery(text="business"))
535 |     assert len(results) >= 1
536 | 
537 |     # Check that our entity is in the results
538 |     entity_found = False
539 |     for result in results:
540 |         if result.title == "Business Strategy Guide":
541 |             entity_found = True
542 |             break
543 |     assert entity_found, "Entity with 'business' tag should be found in search results"
544 | 
545 |     # Test searching by another tag
546 |     results = await search_service.search(SearchQuery(text="planning"))
547 |     assert len(results) >= 1
548 | 
549 |     entity_found = False
550 |     for result in results:
551 |         if result.title == "Business Strategy Guide":
552 |             entity_found = True
553 |             break
554 |     assert entity_found, "Entity with 'planning' tag should be found in search results"
555 | 
556 | 
557 | @pytest.mark.asyncio
558 | async def test_search_by_frontmatter_tags_string_format(
559 |     search_service, session_maker, test_project
560 | ):
561 |     """Test that entities with string format tags can be found in search."""
562 |     from basic_memory.repository import EntityRepository
563 |     from unittest.mock import AsyncMock
564 | 
565 |     entity_repo = EntityRepository(session_maker, project_id=test_project.id)
566 | 
567 |     # Create entity with tags in string format
568 |     from datetime import datetime
569 | 
570 |     entity_data = {
571 |         "title": "Documentation Guidelines",
572 |         "entity_type": "note",
573 |         "entity_metadata": {"tags": "['documentation', 'tools', 'best-practices']"},
574 |         "content_type": "text/markdown",
575 |         "file_path": "guides/documentation.md",
576 |         "permalink": "guides/documentation",
577 |         "project_id": test_project.id,
578 |         "created_at": datetime.now(),
579 |         "updated_at": datetime.now(),
580 |     }
581 | 
582 |     entity = await entity_repo.create(entity_data)
583 | 
584 |     # Mock file service to avoid file I/O
585 |     search_service.file_service.read_entity_content = AsyncMock(return_value="")
586 | 
587 |     await search_service.index_entity(entity)
588 | 
589 |     # Search for entities by tag
590 |     results = await search_service.search(SearchQuery(text="documentation"))
591 |     assert len(results) >= 1
592 | 
593 |     # Check that our entity is in the results
594 |     entity_found = False
595 |     for result in results:
596 |         if result.title == "Documentation Guidelines":
597 |             entity_found = True
598 |             break
599 |     assert entity_found, "Entity with 'documentation' tag should be found in search results"
600 | 
601 | 
602 | @pytest.mark.asyncio
603 | async def test_search_special_characters_in_title(search_service, session_maker, test_project):
604 |     """Test that entities with special characters in titles can be searched without FTS5 syntax errors."""
605 |     from basic_memory.repository import EntityRepository
606 |     from unittest.mock import AsyncMock
607 | 
608 |     entity_repo = EntityRepository(session_maker, project_id=test_project.id)
609 | 
610 |     # Create entities with special characters that could cause FTS5 syntax errors
611 |     special_titles = [
612 |         "Note with spaces",
613 |         "Note-with-dashes",
614 |         "Note_with_underscores",
615 |         "Note (with parentheses)",  # This is the problematic one
616 |         "Note & Symbols!",
617 |         "Note [with brackets]",
618 |         "Note {with braces}",
619 |         'Note "with quotes"',
620 |         "Note 'with apostrophes'",
621 |     ]
622 | 
623 |     entities = []
624 |     for i, title in enumerate(special_titles):
625 |         from datetime import datetime
626 | 
627 |         entity_data = {
628 |             "title": title,
629 |             "entity_type": "note",
630 |             "entity_metadata": {"tags": ["special", "characters"]},
631 |             "content_type": "text/markdown",
632 |             "file_path": f"special/{title}.md",
633 |             "permalink": f"special/note-{i}",
634 |             "project_id": test_project.id,
635 |             "created_at": datetime.now(),
636 |             "updated_at": datetime.now(),
637 |         }
638 | 
639 |         entity = await entity_repo.create(entity_data)
640 |         entities.append(entity)
641 | 
642 |     # Mock file service to avoid file I/O
643 |     search_service.file_service.read_entity_content = AsyncMock(return_value="")
644 | 
645 |     # Index all entities
646 |     for entity in entities:
647 |         await search_service.index_entity(entity)
648 | 
649 |     # Test searching for each title - this should not cause FTS5 syntax errors
650 |     for title in special_titles:
651 |         results = await search_service.search(SearchQuery(title=title))
652 | 
653 |         # Should find the entity without throwing FTS5 syntax errors
654 |         entity_found = False
655 |         for result in results:
656 |             if result.title == title:
657 |                 entity_found = True
658 |                 break
659 | 
660 |         assert entity_found, f"Entity with title '{title}' should be found in search results"
661 | 
662 | 
663 | @pytest.mark.asyncio
664 | async def test_search_title_with_parentheses_specific(search_service, session_maker, test_project):
665 |     """Test searching specifically for title with parentheses to reproduce FTS5 error."""
666 |     from basic_memory.repository import EntityRepository
667 |     from unittest.mock import AsyncMock
668 | 
669 |     entity_repo = EntityRepository(session_maker, project_id=test_project.id)
670 | 
671 |     # Create the problematic entity
672 |     from datetime import datetime
673 | 
674 |     entity_data = {
675 |         "title": "Note (with parentheses)",
676 |         "entity_type": "note",
677 |         "entity_metadata": {"tags": ["test"]},
678 |         "content_type": "text/markdown",
679 |         "file_path": "special/Note (with parentheses).md",
680 |         "permalink": "special/note-with-parentheses",
681 |         "project_id": test_project.id,
682 |         "created_at": datetime.now(),
683 |         "updated_at": datetime.now(),
684 |     }
685 | 
686 |     entity = await entity_repo.create(entity_data)
687 | 
688 |     # Mock file service to avoid file I/O
689 |     search_service.file_service.read_entity_content = AsyncMock(return_value="")
690 | 
691 |     # Index the entity
692 |     await search_service.index_entity(entity)
693 | 
694 |     # Test searching for the title - this should not cause FTS5 syntax errors
695 |     search_query = SearchQuery(title="Note (with parentheses)")
696 |     results = await search_service.search(search_query)
697 | 
698 |     # Should find the entity without throwing FTS5 syntax errors
699 |     assert len(results) >= 1
700 |     assert any(result.title == "Note (with parentheses)" for result in results)
701 | 
702 | 
703 | @pytest.mark.asyncio
704 | async def test_search_title_via_repository_direct(search_service, session_maker, test_project):
705 |     """Test searching via search repository directly to isolate the FTS5 error."""
706 |     from basic_memory.repository import EntityRepository
707 |     from unittest.mock import AsyncMock
708 | 
709 |     entity_repo = EntityRepository(session_maker, project_id=test_project.id)
710 | 
711 |     # Create the problematic entity
712 |     from datetime import datetime
713 | 
714 |     entity_data = {
715 |         "title": "Note (with parentheses)",
716 |         "entity_type": "note",
717 |         "entity_metadata": {"tags": ["test"]},
718 |         "content_type": "text/markdown",
719 |         "file_path": "special/Note (with parentheses).md",
720 |         "permalink": "special/note-with-parentheses",
721 |         "project_id": test_project.id,
722 |         "created_at": datetime.now(),
723 |         "updated_at": datetime.now(),
724 |     }
725 | 
726 |     entity = await entity_repo.create(entity_data)
727 | 
728 |     # Mock file service to avoid file I/O
729 |     search_service.file_service.read_entity_content = AsyncMock(return_value="")
730 | 
731 |     # Index the entity
732 |     await search_service.index_entity(entity)
733 | 
734 |     # Test searching via repository directly - this reproduces the error path
735 |     results = await search_service.repository.search(
736 |         title="Note (with parentheses)",
737 |         limit=10,
738 |         offset=0,
739 |     )
740 | 
741 |     # Should find the entity without throwing FTS5 syntax errors
742 |     assert len(results) >= 1
743 |     assert any(result.title == "Note (with parentheses)" for result in results)
744 | 
```

--------------------------------------------------------------------------------
/tests/api/test_project_router.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for the project router API endpoints."""
  2 | 
  3 | import tempfile
  4 | from pathlib import Path
  5 | 
  6 | import pytest
  7 | 
  8 | from basic_memory.schemas.project_info import ProjectItem
  9 | 
 10 | 
 11 | @pytest.mark.asyncio
 12 | async def test_get_project_item(test_graph, client, project_config, test_project, project_url):
 13 |     """Test the project item endpoint returns correctly structured data."""
 14 |     # Set up some test data in the database
 15 | 
 16 |     # Call the endpoint
 17 |     response = await client.get(f"{project_url}/project/item")
 18 | 
 19 |     # Verify response
 20 |     assert response.status_code == 200
 21 |     project_info = ProjectItem.model_validate(response.json())
 22 |     assert project_info.name == test_project.name
 23 |     assert project_info.path == test_project.path
 24 |     assert project_info.is_default == test_project.is_default
 25 | 
 26 | 
 27 | @pytest.mark.asyncio
 28 | async def test_get_project_item_not_found(
 29 |     test_graph, client, project_config, test_project, project_url
 30 | ):
 31 |     """Test the project item endpoint returns correctly structured data."""
 32 |     # Set up some test data in the database
 33 | 
 34 |     # Call the endpoint
 35 |     response = await client.get("/not-found/project/item")
 36 | 
 37 |     # Verify response
 38 |     assert response.status_code == 404
 39 | 
 40 | 
 41 | @pytest.mark.asyncio
 42 | async def test_get_default_project(test_graph, client, project_config, test_project, project_url):
 43 |     """Test the default project item endpoint returns the default project."""
 44 |     # Set up some test data in the database
 45 | 
 46 |     # Call the endpoint
 47 |     response = await client.get("/projects/default")
 48 | 
 49 |     # Verify response
 50 |     assert response.status_code == 200
 51 |     project_info = ProjectItem.model_validate(response.json())
 52 |     assert project_info.name == test_project.name
 53 |     assert project_info.path == test_project.path
 54 |     assert project_info.is_default == test_project.is_default
 55 | 
 56 | 
 57 | @pytest.mark.asyncio
 58 | async def test_get_project_info_endpoint(test_graph, client, project_config, project_url):
 59 |     """Test the project-info endpoint returns correctly structured data."""
 60 |     # Set up some test data in the database
 61 | 
 62 |     # Call the endpoint
 63 |     response = await client.get(f"{project_url}/project/info")
 64 | 
 65 |     # Verify response
 66 |     assert response.status_code == 200
 67 |     data = response.json()
 68 | 
 69 |     # Check top-level keys
 70 |     assert "project_name" in data
 71 |     assert "project_path" in data
 72 |     assert "available_projects" in data
 73 |     assert "default_project" in data
 74 |     assert "statistics" in data
 75 |     assert "activity" in data
 76 |     assert "system" in data
 77 | 
 78 |     # Check statistics
 79 |     stats = data["statistics"]
 80 |     assert "total_entities" in stats
 81 |     assert stats["total_entities"] >= 0
 82 |     assert "total_observations" in stats
 83 |     assert stats["total_observations"] >= 0
 84 |     assert "total_relations" in stats
 85 |     assert stats["total_relations"] >= 0
 86 | 
 87 |     # Check activity
 88 |     activity = data["activity"]
 89 |     assert "recently_created" in activity
 90 |     assert "recently_updated" in activity
 91 |     assert "monthly_growth" in activity
 92 | 
 93 |     # Check system
 94 |     system = data["system"]
 95 |     assert "version" in system
 96 |     assert "database_path" in system
 97 |     assert "database_size" in system
 98 |     assert "timestamp" in system
 99 | 
100 | 
101 | @pytest.mark.asyncio
102 | async def test_get_project_info_content(test_graph, client, project_config, project_url):
103 |     """Test that project-info contains actual data from the test database."""
104 |     # Call the endpoint
105 |     response = await client.get(f"{project_url}/project/info")
106 | 
107 |     # Verify response
108 |     assert response.status_code == 200
109 |     data = response.json()
110 | 
111 |     # Check that test_graph content is reflected in statistics
112 |     stats = data["statistics"]
113 | 
114 |     # Our test graph should have at least a few entities
115 |     assert stats["total_entities"] > 0
116 | 
117 |     # It should also have some observations
118 |     assert stats["total_observations"] > 0
119 | 
120 |     # And relations
121 |     assert stats["total_relations"] > 0
122 | 
123 |     # Check that entity types include 'test'
124 |     assert "test" in stats["entity_types"] or "entity" in stats["entity_types"]
125 | 
126 | 
127 | @pytest.mark.asyncio
128 | async def test_list_projects_endpoint(test_config, test_graph, client, project_config, project_url):
129 |     """Test the list projects endpoint returns correctly structured data."""
130 |     # Call the endpoint
131 |     response = await client.get("/projects/projects")
132 | 
133 |     # Verify response
134 |     assert response.status_code == 200
135 |     data = response.json()
136 | 
137 |     # Check that the response contains expected fields
138 |     assert "projects" in data
139 |     assert "default_project" in data
140 | 
141 |     # Check that projects is a list
142 |     assert isinstance(data["projects"], list)
143 | 
144 |     # There should be at least one project (the test project)
145 |     assert len(data["projects"]) > 0
146 | 
147 |     # Verify project item structure
148 |     if data["projects"]:
149 |         project = data["projects"][0]
150 |         assert "name" in project
151 |         assert "path" in project
152 |         assert "is_default" in project
153 | 
154 |         # Default project should be marked
155 |         default_project = next((p for p in data["projects"] if p["is_default"]), None)
156 |         assert default_project is not None
157 |         assert default_project["name"] == data["default_project"]
158 | 
159 | 
160 | @pytest.mark.asyncio
161 | async def test_remove_project_endpoint(test_config, client, project_service):
162 |     """Test the remove project endpoint."""
163 |     # First create a test project to remove
164 |     test_project_name = "test-remove-project"
165 |     await project_service.add_project(test_project_name, "/tmp/test-remove-project")
166 | 
167 |     # Verify it exists
168 |     project = await project_service.get_project(test_project_name)
169 |     assert project is not None
170 | 
171 |     # Remove the project
172 |     response = await client.delete(f"/projects/{test_project_name}")
173 | 
174 |     # Verify response
175 |     assert response.status_code == 200
176 |     data = response.json()
177 | 
178 |     # Check response structure
179 |     assert "message" in data
180 |     assert "status" in data
181 |     assert data["status"] == "success"
182 |     assert "old_project" in data
183 |     assert data["old_project"]["name"] == test_project_name
184 | 
185 |     # Verify project is actually removed
186 |     removed_project = await project_service.get_project(test_project_name)
187 |     assert removed_project is None
188 | 
189 | 
190 | @pytest.mark.asyncio
191 | async def test_set_default_project_endpoint(test_config, client, project_service):
192 |     """Test the set default project endpoint."""
193 |     # Create a test project to set as default
194 |     test_project_name = "test-default-project"
195 |     await project_service.add_project(test_project_name, "/tmp/test-default-project")
196 | 
197 |     # Set it as default
198 |     response = await client.put(f"/projects/{test_project_name}/default")
199 | 
200 |     # Verify response
201 |     assert response.status_code == 200
202 |     data = response.json()
203 | 
204 |     # Check response structure
205 |     assert "message" in data
206 |     assert "status" in data
207 |     assert data["status"] == "success"
208 |     assert "new_project" in data
209 |     assert data["new_project"]["name"] == test_project_name
210 | 
211 |     # Verify it's actually set as default
212 |     assert project_service.default_project == test_project_name
213 | 
214 | 
215 | @pytest.mark.asyncio
216 | async def test_update_project_path_endpoint(test_config, client, project_service, project_url):
217 |     """Test the update project endpoint for changing project path."""
218 |     # Create a test project to update
219 |     test_project_name = "test-update-project"
220 |     with tempfile.TemporaryDirectory() as temp_dir:
221 |         test_root = Path(temp_dir)
222 |         old_path = (test_root / "old-location").as_posix()
223 |         new_path = (test_root / "new-location").as_posix()
224 | 
225 |         await project_service.add_project(test_project_name, old_path)
226 | 
227 |         try:
228 |             # Verify initial state
229 |             project = await project_service.get_project(test_project_name)
230 |             assert project is not None
231 |             assert project.path == old_path
232 | 
233 |             # Update the project path
234 |             response = await client.patch(
235 |                 f"{project_url}/project/{test_project_name}", json={"path": new_path}
236 |             )
237 | 
238 |             # Verify response
239 |             assert response.status_code == 200
240 |             data = response.json()
241 | 
242 |             # Check response structure
243 |             assert "message" in data
244 |             assert "status" in data
245 |             assert data["status"] == "success"
246 |             assert "old_project" in data
247 |             assert "new_project" in data
248 | 
249 |             # Check old project data
250 |             assert data["old_project"]["name"] == test_project_name
251 |             assert data["old_project"]["path"] == old_path
252 | 
253 |             # Check new project data
254 |             assert data["new_project"]["name"] == test_project_name
255 |             assert data["new_project"]["path"] == new_path
256 | 
257 |             # Verify project was actually updated in database
258 |             updated_project = await project_service.get_project(test_project_name)
259 |             assert updated_project is not None
260 |             assert updated_project.path == new_path
261 | 
262 |         finally:
263 |             # Clean up
264 |             try:
265 |                 await project_service.remove_project(test_project_name)
266 |             except Exception:
267 |                 pass
268 | 
269 | 
270 | @pytest.mark.asyncio
271 | async def test_update_project_is_active_endpoint(test_config, client, project_service, project_url):
272 |     """Test the update project endpoint for changing is_active status."""
273 |     # Create a test project to update
274 |     test_project_name = "test-update-active-project"
275 |     test_path = "/tmp/test-update-active"
276 | 
277 |     await project_service.add_project(test_project_name, test_path)
278 | 
279 |     try:
280 |         # Update the project is_active status
281 |         response = await client.patch(
282 |             f"{project_url}/project/{test_project_name}", json={"is_active": False}
283 |         )
284 | 
285 |         # Verify response
286 |         assert response.status_code == 200
287 |         data = response.json()
288 | 
289 |         # Check response structure
290 |         assert "message" in data
291 |         assert "status" in data
292 |         assert data["status"] == "success"
293 |         assert f"Project '{test_project_name}' updated successfully" == data["message"]
294 | 
295 |     finally:
296 |         # Clean up
297 |         try:
298 |             await project_service.remove_project(test_project_name)
299 |         except Exception:
300 |             pass
301 | 
302 | 
303 | @pytest.mark.asyncio
304 | async def test_update_project_both_params_endpoint(
305 |     test_config, client, project_service, project_url
306 | ):
307 |     """Test the update project endpoint with both path and is_active parameters."""
308 |     # Create a test project to update
309 |     test_project_name = "test-update-both-project"
310 |     with tempfile.TemporaryDirectory() as temp_dir:
311 |         test_root = Path(temp_dir)
312 |         old_path = (test_root / "old-location").as_posix()
313 |         new_path = (test_root / "new-location").as_posix()
314 | 
315 |         await project_service.add_project(test_project_name, old_path)
316 | 
317 |         try:
318 |             # Update both path and is_active (path should take precedence)
319 |             response = await client.patch(
320 |                 f"{project_url}/project/{test_project_name}",
321 |                 json={"path": new_path, "is_active": False},
322 |             )
323 | 
324 |             # Verify response
325 |             assert response.status_code == 200
326 |             data = response.json()
327 | 
328 |             # Check that path update was performed (takes precedence)
329 |             assert data["new_project"]["path"] == new_path
330 | 
331 |             # Verify project was actually updated in database
332 |             updated_project = await project_service.get_project(test_project_name)
333 |             assert updated_project is not None
334 |             assert updated_project.path == new_path
335 | 
336 |         finally:
337 |             # Clean up
338 |             try:
339 |                 await project_service.remove_project(test_project_name)
340 |             except Exception:
341 |                 pass
342 | 
343 | 
344 | @pytest.mark.asyncio
345 | async def test_update_project_nonexistent_endpoint(client, project_url):
346 |     """Test the update project endpoint with a nonexistent project."""
347 |     # Try to update a project that doesn't exist
348 |     response = await client.patch(
349 |         f"{project_url}/project/nonexistent-project", json={"path": "/tmp/new-path"}
350 |     )
351 | 
352 |     # Should return 400 error
353 |     assert response.status_code == 400
354 |     data = response.json()
355 |     assert "detail" in data
356 |     assert "not found in configuration" in data["detail"]
357 | 
358 | 
359 | @pytest.mark.asyncio
360 | async def test_update_project_relative_path_error_endpoint(
361 |     test_config, client, project_service, project_url
362 | ):
363 |     """Test the update project endpoint with relative path (should fail)."""
364 |     # Create a test project to update
365 |     test_project_name = "test-update-relative-project"
366 |     test_path = "/tmp/test-update-relative"
367 | 
368 |     await project_service.add_project(test_project_name, test_path)
369 | 
370 |     try:
371 |         # Try to update with relative path
372 |         response = await client.patch(
373 |             f"{project_url}/project/{test_project_name}", json={"path": "./relative-path"}
374 |         )
375 | 
376 |         # Should return 400 error
377 |         assert response.status_code == 400
378 |         data = response.json()
379 |         assert "detail" in data
380 |         assert "Path must be absolute" in data["detail"]
381 | 
382 |     finally:
383 |         # Clean up
384 |         try:
385 |             await project_service.remove_project(test_project_name)
386 |         except Exception:
387 |             pass
388 | 
389 | 
390 | @pytest.mark.asyncio
391 | async def test_update_project_no_params_endpoint(test_config, client, project_service, project_url):
392 |     """Test the update project endpoint with no parameters (should fail)."""
393 |     # Create a test project to update
394 |     test_project_name = "test-update-no-params-project"
395 |     test_path = "/tmp/test-update-no-params"
396 | 
397 |     await project_service.add_project(test_project_name, test_path)
398 |     proj_info = await project_service.get_project(test_project_name)
399 |     assert proj_info.name == test_project_name
400 |     # On Windows the path is prepended with a drive letter
401 |     assert test_path in proj_info.path
402 | 
403 |     try:
404 |         # Try to update with no parameters
405 |         response = await client.patch(f"{project_url}/project/{test_project_name}", json={})
406 | 
407 |         # Should return 200 (no-op)
408 |         assert response.status_code == 200
409 |         proj_info = await project_service.get_project(test_project_name)
410 |         assert proj_info.name == test_project_name
411 |         # On Windows the path is prepended with a drive letter
412 |         assert test_path in proj_info.path
413 | 
414 |     finally:
415 |         # Clean up
416 |         try:
417 |             await project_service.remove_project(test_project_name)
418 |         except Exception:
419 |             pass
420 | 
421 | 
422 | @pytest.mark.asyncio
423 | async def test_update_project_empty_path_endpoint(
424 |     test_config, client, project_service, project_url
425 | ):
426 |     """Test the update project endpoint with empty path parameter."""
427 |     # Create a test project to update
428 |     test_project_name = "test-update-empty-path-project"
429 |     test_path = "/tmp/test-update-empty-path"
430 | 
431 |     await project_service.add_project(test_project_name, test_path)
432 | 
433 |     try:
434 |         # Try to update with empty/null path - should be treated as no path update
435 |         response = await client.patch(
436 |             f"{project_url}/project/{test_project_name}", json={"path": None, "is_active": True}
437 |         )
438 | 
439 |         # Should succeed and perform is_active update
440 |         assert response.status_code == 200
441 |         data = response.json()
442 |         assert data["status"] == "success"
443 | 
444 |     finally:
445 |         # Clean up
446 |         try:
447 |             await project_service.remove_project(test_project_name)
448 |         except Exception:
449 |             pass
450 | 
451 | 
452 | @pytest.mark.asyncio
453 | async def test_sync_project_endpoint(test_graph, client, project_url):
454 |     """Test the project sync endpoint initiates background sync."""
455 |     # Call the sync endpoint
456 |     response = await client.post(f"{project_url}/project/sync")
457 | 
458 |     # Verify response
459 |     assert response.status_code == 200
460 |     data = response.json()
461 | 
462 |     # Check response structure
463 |     assert "status" in data
464 |     assert "message" in data
465 |     assert data["status"] == "sync_started"
466 |     assert "Filesystem sync initiated" in data["message"]
467 | 
468 | 
469 | @pytest.mark.asyncio
470 | async def test_sync_project_endpoint_not_found(client):
471 |     """Test the project sync endpoint with nonexistent project."""
472 |     # Call the sync endpoint for a project that doesn't exist
473 |     response = await client.post("/nonexistent-project/project/sync")
474 | 
475 |     # Should return 404
476 |     assert response.status_code == 404
477 | 
478 | 
479 | @pytest.mark.asyncio
480 | async def test_remove_default_project_fails(test_config, client, project_service):
481 |     """Test that removing the default project returns an error."""
482 |     # Get the current default project
483 |     default_project_name = project_service.default_project
484 | 
485 |     # Try to remove the default project
486 |     response = await client.delete(f"/projects/{default_project_name}")
487 | 
488 |     # Should return 400 with helpful error message
489 |     assert response.status_code == 400
490 |     data = response.json()
491 |     assert "detail" in data
492 |     assert "Cannot delete default project" in data["detail"]
493 |     assert default_project_name in data["detail"]
494 | 
495 | 
496 | @pytest.mark.asyncio
497 | async def test_remove_default_project_with_alternatives(test_config, client, project_service):
498 |     """Test that error message includes alternative projects when trying to delete default."""
499 |     # Get the current default project
500 |     default_project_name = project_service.default_project
501 | 
502 |     # Create another project so there are alternatives
503 |     test_project_name = "test-alternative-project"
504 |     await project_service.add_project(test_project_name, "/tmp/test-alternative")
505 | 
506 |     try:
507 |         # Try to remove the default project
508 |         response = await client.delete(f"/projects/{default_project_name}")
509 | 
510 |         # Should return 400 with helpful error message including alternatives
511 |         assert response.status_code == 400
512 |         data = response.json()
513 |         assert "detail" in data
514 |         assert "Cannot delete default project" in data["detail"]
515 |         assert "Set another project as default first" in data["detail"]
516 |         assert test_project_name in data["detail"]
517 | 
518 |     finally:
519 |         # Clean up
520 |         try:
521 |             await project_service.remove_project(test_project_name)
522 |         except Exception:
523 |             pass
524 | 
525 | 
526 | @pytest.mark.asyncio
527 | async def test_remove_non_default_project_succeeds(test_config, client, project_service):
528 |     """Test that removing a non-default project succeeds."""
529 |     # Create a test project to remove
530 |     test_project_name = "test-remove-non-default"
531 |     await project_service.add_project(test_project_name, "/tmp/test-remove-non-default")
532 | 
533 |     # Verify it's not the default
534 |     assert project_service.default_project != test_project_name
535 | 
536 |     # Remove the project
537 |     response = await client.delete(f"/projects/{test_project_name}")
538 | 
539 |     # Should succeed
540 |     assert response.status_code == 200
541 |     data = response.json()
542 |     assert data["status"] == "success"
543 | 
544 |     # Verify project is removed
545 |     removed_project = await project_service.get_project(test_project_name)
546 |     assert removed_project is None
547 | 
548 | 
549 | @pytest.mark.asyncio
550 | async def test_set_nonexistent_project_as_default_fails(test_config, client, project_service):
551 |     """Test that setting a non-existent project as default returns 404."""
552 |     # Try to set a project that doesn't exist as default
553 |     response = await client.put("/projects/nonexistent-project/default")
554 | 
555 |     # Should return 404
556 |     assert response.status_code == 404
557 |     data = response.json()
558 |     assert "detail" in data
559 |     assert "does not exist" in data["detail"]
560 | 
561 | 
562 | @pytest.mark.asyncio
563 | async def test_create_project_idempotent_same_path(test_config, client, project_service):
564 |     """Test that creating a project with same name and same path is idempotent."""
565 |     # Create a project with platform-independent path
566 |     test_project_name = "test-idempotent"
567 |     with tempfile.TemporaryDirectory() as temp_dir:
568 |         test_project_path = (Path(temp_dir) / "test-idempotent").as_posix()
569 | 
570 |         response1 = await client.post(
571 |             "/projects/projects",
572 |             json={"name": test_project_name, "path": test_project_path, "set_default": False},
573 |         )
574 | 
575 |         # Should succeed with 201 Created
576 |         assert response1.status_code == 201
577 |         data1 = response1.json()
578 |         assert data1["status"] == "success"
579 |         assert data1["new_project"]["name"] == test_project_name
580 | 
581 |         # Try to create the same project again with same name and path
582 |         response2 = await client.post(
583 |             "/projects/projects",
584 |             json={"name": test_project_name, "path": test_project_path, "set_default": False},
585 |         )
586 | 
587 |         # Should also succeed (idempotent)
588 |         assert response2.status_code == 200
589 |         data2 = response2.json()
590 |         assert data2["status"] == "success"
591 |         assert "already exists" in data2["message"]
592 |         assert data2["new_project"]["name"] == test_project_name
593 |         # Normalize paths for cross-platform comparison
594 |         assert Path(data2["new_project"]["path"]).resolve() == Path(test_project_path).resolve()
595 | 
596 |         # Clean up
597 |         try:
598 |             await project_service.remove_project(test_project_name)
599 |         except Exception:
600 |             pass
601 | 
602 | 
603 | @pytest.mark.asyncio
604 | async def test_create_project_fails_different_path(test_config, client, project_service):
605 |     """Test that creating a project with same name but different path fails."""
606 |     # Create a project
607 |     test_project_name = "test-path-conflict"
608 |     test_project_path1 = "/tmp/test-path-conflict-1"
609 | 
610 |     response1 = await client.post(
611 |         "/projects/projects",
612 |         json={"name": test_project_name, "path": test_project_path1, "set_default": False},
613 |     )
614 | 
615 |     # Should succeed with 201 Created
616 |     assert response1.status_code == 201
617 | 
618 |     # Try to create the same project with different path
619 |     test_project_path2 = "/tmp/test-path-conflict-2"
620 |     response2 = await client.post(
621 |         "/projects/projects",
622 |         json={"name": test_project_name, "path": test_project_path2, "set_default": False},
623 |     )
624 | 
625 |     # Should fail with 400
626 |     assert response2.status_code == 400
627 |     data2 = response2.json()
628 |     assert "detail" in data2
629 |     assert "already exists with different path" in data2["detail"]
630 |     assert test_project_path1 in data2["detail"]
631 |     assert test_project_path2 in data2["detail"]
632 | 
633 |     # Clean up
634 |     try:
635 |         await project_service.remove_project(test_project_name)
636 |     except Exception:
637 |         pass
638 | 
639 | 
640 | @pytest.mark.asyncio
641 | async def test_remove_project_with_delete_notes_false(test_config, client, project_service):
642 |     """Test that removing a project with delete_notes=False leaves directory intact."""
643 |     # Create a test project with actual directory
644 |     test_project_name = "test-remove-keep-files"
645 |     with tempfile.TemporaryDirectory() as temp_dir:
646 |         test_path = Path(temp_dir) / "test-project"
647 |         test_path.mkdir()
648 |         test_file = test_path / "test.md"
649 |         test_file.write_text("# Test Note")
650 | 
651 |         await project_service.add_project(test_project_name, str(test_path))
652 | 
653 |         # Remove the project without deleting files (default)
654 |         response = await client.delete(f"/projects/{test_project_name}")
655 | 
656 |         # Verify response
657 |         assert response.status_code == 200
658 |         data = response.json()
659 |         assert data["status"] == "success"
660 | 
661 |         # Verify project is removed from config/db
662 |         removed_project = await project_service.get_project(test_project_name)
663 |         assert removed_project is None
664 | 
665 |         # Verify directory still exists
666 |         assert test_path.exists()
667 |         assert test_file.exists()
668 | 
669 | 
670 | @pytest.mark.asyncio
671 | async def test_remove_project_with_delete_notes_true(test_config, client, project_service):
672 |     """Test that removing a project with delete_notes=True deletes the directory."""
673 |     # Create a test project with actual directory
674 |     test_project_name = "test-remove-delete-files"
675 |     with tempfile.TemporaryDirectory() as temp_dir:
676 |         test_path = Path(temp_dir) / "test-project"
677 |         test_path.mkdir()
678 |         test_file = test_path / "test.md"
679 |         test_file.write_text("# Test Note")
680 | 
681 |         await project_service.add_project(test_project_name, str(test_path))
682 | 
683 |         # Remove the project with delete_notes=True
684 |         response = await client.delete(f"/projects/{test_project_name}?delete_notes=true")
685 | 
686 |         # Verify response
687 |         assert response.status_code == 200
688 |         data = response.json()
689 |         assert data["status"] == "success"
690 | 
691 |         # Verify project is removed from config/db
692 |         removed_project = await project_service.get_project(test_project_name)
693 |         assert removed_project is None
694 | 
695 |         # Verify directory is deleted
696 |         assert not test_path.exists()
697 | 
698 | 
699 | @pytest.mark.asyncio
700 | async def test_remove_project_delete_notes_nonexistent_directory(
701 |     test_config, client, project_service
702 | ):
703 |     """Test that removing a project with delete_notes=True handles missing directory gracefully."""
704 |     # Create a project pointing to a non-existent path
705 |     test_project_name = "test-remove-missing-dir"
706 |     test_path = "/tmp/this-directory-does-not-exist-12345"
707 | 
708 |     await project_service.add_project(test_project_name, test_path)
709 | 
710 |     # Remove the project with delete_notes=True (should not fail even if dir doesn't exist)
711 |     response = await client.delete(f"/projects/{test_project_name}?delete_notes=true")
712 | 
713 |     # Should succeed
714 |     assert response.status_code == 200
715 |     data = response.json()
716 |     assert data["status"] == "success"
717 | 
718 |     # Verify project is removed
719 |     removed_project = await project_service.get_project(test_project_name)
720 |     assert removed_project is None
721 | 
```

--------------------------------------------------------------------------------
/tests/repository/test_search_repository.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for the SearchRepository."""
  2 | 
  3 | from datetime import datetime, timezone
  4 | 
  5 | import pytest
  6 | import pytest_asyncio
  7 | from sqlalchemy import text
  8 | 
  9 | from basic_memory import db
 10 | from basic_memory.models import Entity
 11 | from basic_memory.models.project import Project
 12 | from basic_memory.repository.search_repository import SearchRepository, SearchIndexRow
 13 | from basic_memory.schemas.search import SearchItemType
 14 | 
 15 | 
 16 | @pytest_asyncio.fixture
 17 | async def search_entity(session_maker, test_project: Project):
 18 |     """Create a test entity for search testing."""
 19 |     async with db.scoped_session(session_maker) as session:
 20 |         entity = Entity(
 21 |             project_id=test_project.id,
 22 |             title="Search Test Entity",
 23 |             entity_type="test",
 24 |             permalink="test/search-test-entity",
 25 |             file_path="test/search_test_entity.md",
 26 |             content_type="text/markdown",
 27 |             created_at=datetime.now(timezone.utc),
 28 |             updated_at=datetime.now(timezone.utc),
 29 |         )
 30 |         session.add(entity)
 31 |         await session.flush()
 32 |         return entity
 33 | 
 34 | 
 35 | @pytest_asyncio.fixture
 36 | async def second_project(project_repository):
 37 |     """Create a second project for testing project isolation."""
 38 |     project_data = {
 39 |         "name": "Second Test Project",
 40 |         "description": "Another project for testing",
 41 |         "path": "/second/project/path",
 42 |         "is_active": True,
 43 |         "is_default": None,
 44 |     }
 45 |     return await project_repository.create(project_data)
 46 | 
 47 | 
 48 | @pytest_asyncio.fixture
 49 | async def second_project_repository(session_maker, second_project):
 50 |     """Create a repository for the second project."""
 51 |     return SearchRepository(session_maker, project_id=second_project.id)
 52 | 
 53 | 
 54 | @pytest_asyncio.fixture
 55 | async def second_entity(session_maker, second_project: Project):
 56 |     """Create a test entity in the second project."""
 57 |     async with db.scoped_session(session_maker) as session:
 58 |         entity = Entity(
 59 |             project_id=second_project.id,
 60 |             title="Second Project Entity",
 61 |             entity_type="test",
 62 |             permalink="test/second-project-entity",
 63 |             file_path="test/second_project_entity.md",
 64 |             content_type="text/markdown",
 65 |             created_at=datetime.now(timezone.utc),
 66 |             updated_at=datetime.now(timezone.utc),
 67 |         )
 68 |         session.add(entity)
 69 |         await session.flush()
 70 |         return entity
 71 | 
 72 | 
 73 | @pytest.mark.asyncio
 74 | async def test_init_search_index(search_repository):
 75 |     """Test that search index can be initialized."""
 76 |     await search_repository.init_search_index()
 77 | 
 78 |     # Verify search_index table exists
 79 |     async with db.scoped_session(search_repository.session_maker) as session:
 80 |         result = await session.execute(
 81 |             text("SELECT name FROM sqlite_master WHERE type='table' AND name='search_index';")
 82 |         )
 83 |         assert result.scalar() == "search_index"
 84 | 
 85 | 
 86 | @pytest.mark.asyncio
 87 | async def test_index_item(search_repository, search_entity):
 88 |     """Test indexing an item with project_id."""
 89 |     # Create search index row for the entity
 90 |     search_row = SearchIndexRow(
 91 |         id=search_entity.id,
 92 |         type=SearchItemType.ENTITY.value,
 93 |         title=search_entity.title,
 94 |         content_stems="search test entity content",
 95 |         content_snippet="This is a test entity for search",
 96 |         permalink=search_entity.permalink,
 97 |         file_path=search_entity.file_path,
 98 |         entity_id=search_entity.id,
 99 |         metadata={"entity_type": search_entity.entity_type},
100 |         created_at=search_entity.created_at,
101 |         updated_at=search_entity.updated_at,
102 |         project_id=search_repository.project_id,
103 |     )
104 | 
105 |     # Index the item
106 |     await search_repository.index_item(search_row)
107 | 
108 |     # Search for the item
109 |     results = await search_repository.search(search_text="search test")
110 | 
111 |     # Verify we found the item
112 |     assert len(results) == 1
113 |     assert results[0].title == search_entity.title
114 |     assert results[0].project_id == search_repository.project_id
115 | 
116 | 
117 | @pytest.mark.asyncio
118 | async def test_project_isolation(
119 |     search_repository, second_project_repository, search_entity, second_entity
120 | ):
121 |     """Test that search is isolated by project."""
122 |     # Index entities in both projects
123 |     search_row1 = SearchIndexRow(
124 |         id=search_entity.id,
125 |         type=SearchItemType.ENTITY.value,
126 |         title=search_entity.title,
127 |         content_stems="unique first project content",
128 |         content_snippet="This is a test entity in the first project",
129 |         permalink=search_entity.permalink,
130 |         file_path=search_entity.file_path,
131 |         entity_id=search_entity.id,
132 |         metadata={"entity_type": search_entity.entity_type},
133 |         created_at=search_entity.created_at,
134 |         updated_at=search_entity.updated_at,
135 |         project_id=search_repository.project_id,
136 |     )
137 | 
138 |     search_row2 = SearchIndexRow(
139 |         id=second_entity.id,
140 |         type=SearchItemType.ENTITY.value,
141 |         title=second_entity.title,
142 |         content_stems="unique second project content",
143 |         content_snippet="This is a test entity in the second project",
144 |         permalink=second_entity.permalink,
145 |         file_path=second_entity.file_path,
146 |         entity_id=second_entity.id,
147 |         metadata={"entity_type": second_entity.entity_type},
148 |         created_at=second_entity.created_at,
149 |         updated_at=second_entity.updated_at,
150 |         project_id=second_project_repository.project_id,
151 |     )
152 | 
153 |     # Index items in their respective repositories
154 |     await search_repository.index_item(search_row1)
155 |     await second_project_repository.index_item(search_row2)
156 | 
157 |     # Search in first project
158 |     results1 = await search_repository.search(search_text="unique first")
159 |     assert len(results1) == 1
160 |     assert results1[0].title == search_entity.title
161 |     assert results1[0].project_id == search_repository.project_id
162 | 
163 |     # Search in second project
164 |     results2 = await second_project_repository.search(search_text="unique second")
165 |     assert len(results2) == 1
166 |     assert results2[0].title == second_entity.title
167 |     assert results2[0].project_id == second_project_repository.project_id
168 | 
169 |     # Make sure first project can't see second project's content
170 |     results_cross1 = await search_repository.search(search_text="unique second")
171 |     assert len(results_cross1) == 0
172 | 
173 |     # Make sure second project can't see first project's content
174 |     results_cross2 = await second_project_repository.search(search_text="unique first")
175 |     assert len(results_cross2) == 0
176 | 
177 | 
178 | @pytest.mark.asyncio
179 | async def test_delete_by_permalink(search_repository, search_entity):
180 |     """Test deleting an item by permalink respects project isolation."""
181 |     # Index the item
182 |     search_row = SearchIndexRow(
183 |         id=search_entity.id,
184 |         type=SearchItemType.ENTITY.value,
185 |         title=search_entity.title,
186 |         content_stems="content to delete",
187 |         content_snippet="This content should be deleted",
188 |         permalink=search_entity.permalink,
189 |         file_path=search_entity.file_path,
190 |         entity_id=search_entity.id,
191 |         metadata={"entity_type": search_entity.entity_type},
192 |         created_at=search_entity.created_at,
193 |         updated_at=search_entity.updated_at,
194 |         project_id=search_repository.project_id,
195 |     )
196 | 
197 |     await search_repository.index_item(search_row)
198 | 
199 |     # Verify it exists
200 |     results = await search_repository.search(search_text="content to delete")
201 |     assert len(results) == 1
202 | 
203 |     # Delete by permalink
204 |     await search_repository.delete_by_permalink(search_entity.permalink)
205 | 
206 |     # Verify it's gone
207 |     results_after = await search_repository.search(search_text="content to delete")
208 |     assert len(results_after) == 0
209 | 
210 | 
211 | @pytest.mark.asyncio
212 | async def test_delete_by_entity_id(search_repository, search_entity):
213 |     """Test deleting an item by entity_id respects project isolation."""
214 |     # Index the item
215 |     search_row = SearchIndexRow(
216 |         id=search_entity.id,
217 |         type=SearchItemType.ENTITY.value,
218 |         title=search_entity.title,
219 |         content_stems="entity to delete",
220 |         content_snippet="This entity should be deleted",
221 |         permalink=search_entity.permalink,
222 |         file_path=search_entity.file_path,
223 |         entity_id=search_entity.id,
224 |         metadata={"entity_type": search_entity.entity_type},
225 |         created_at=search_entity.created_at,
226 |         updated_at=search_entity.updated_at,
227 |         project_id=search_repository.project_id,
228 |     )
229 | 
230 |     await search_repository.index_item(search_row)
231 | 
232 |     # Verify it exists
233 |     results = await search_repository.search(search_text="entity to delete")
234 |     assert len(results) == 1
235 | 
236 |     # Delete by entity_id
237 |     await search_repository.delete_by_entity_id(search_entity.id)
238 | 
239 |     # Verify it's gone
240 |     results_after = await search_repository.search(search_text="entity to delete")
241 |     assert len(results_after) == 0
242 | 
243 | 
244 | @pytest.mark.asyncio
245 | async def test_to_insert_includes_project_id(search_repository):
246 |     """Test that the to_insert method includes project_id."""
247 |     # Create a search index row with project_id
248 |     row = SearchIndexRow(
249 |         id=1234,
250 |         type=SearchItemType.ENTITY.value,
251 |         title="Test Title",
252 |         content_stems="test content",
253 |         content_snippet="test snippet",
254 |         permalink="test/permalink",
255 |         file_path="test/file.md",
256 |         metadata={"test": "metadata"},
257 |         created_at=datetime.now(timezone.utc),
258 |         updated_at=datetime.now(timezone.utc),
259 |         project_id=search_repository.project_id,
260 |     )
261 | 
262 |     # Get insert data
263 |     insert_data = row.to_insert()
264 | 
265 |     # Verify project_id is included
266 |     assert "project_id" in insert_data
267 |     assert insert_data["project_id"] == search_repository.project_id
268 | 
269 | 
270 | def test_directory_property():
271 |     """Test the directory property of SearchIndexRow."""
272 |     # Test a file in a nested directory
273 |     row1 = SearchIndexRow(
274 |         id=1,
275 |         type=SearchItemType.ENTITY.value,
276 |         file_path="projects/notes/ideas.md",
277 |         created_at=datetime.now(timezone.utc),
278 |         updated_at=datetime.now(timezone.utc),
279 |         project_id=1,
280 |     )
281 |     assert row1.directory == "/projects/notes"
282 | 
283 |     # Test a file at the root level
284 |     row2 = SearchIndexRow(
285 |         id=2,
286 |         type=SearchItemType.ENTITY.value,
287 |         file_path="README.md",
288 |         created_at=datetime.now(timezone.utc),
289 |         updated_at=datetime.now(timezone.utc),
290 |         project_id=1,
291 |     )
292 |     assert row2.directory == "/"
293 | 
294 |     # Test a non-entity type with empty file_path
295 |     row3 = SearchIndexRow(
296 |         id=3,
297 |         type=SearchItemType.OBSERVATION.value,
298 |         file_path="",
299 |         created_at=datetime.now(timezone.utc),
300 |         updated_at=datetime.now(timezone.utc),
301 |         project_id=1,
302 |     )
303 |     assert row3.directory == ""
304 | 
305 | 
306 | class TestSearchTermPreparation:
307 |     """Test cases for FTS5 search term preparation."""
308 | 
309 |     def test_simple_terms_get_prefix_wildcard(self, search_repository):
310 |         """Simple alphanumeric terms should get prefix matching."""
311 |         assert search_repository._prepare_search_term("hello") == "hello*"
312 |         assert search_repository._prepare_search_term("project") == "project*"
313 |         assert search_repository._prepare_search_term("test123") == "test123*"
314 | 
315 |     def test_terms_with_existing_wildcard_unchanged(self, search_repository):
316 |         """Terms that already contain * should remain unchanged."""
317 |         assert search_repository._prepare_search_term("hello*") == "hello*"
318 |         assert search_repository._prepare_search_term("test*world") == "test*world"
319 | 
320 |     def test_boolean_operators_preserved(self, search_repository):
321 |         """Boolean operators should be preserved without modification."""
322 |         assert search_repository._prepare_search_term("hello AND world") == "hello AND world"
323 |         assert search_repository._prepare_search_term("cat OR dog") == "cat OR dog"
324 |         assert (
325 |             search_repository._prepare_search_term("project NOT meeting") == "project NOT meeting"
326 |         )
327 |         assert (
328 |             search_repository._prepare_search_term("(hello AND world) OR test")
329 |             == "(hello AND world) OR test"
330 |         )
331 | 
332 |     def test_hyphenated_terms_with_boolean_operators(self, search_repository):
333 |         """Hyphenated terms with Boolean operators should be properly quoted."""
334 |         # Test the specific case from the GitHub issue
335 |         result = search_repository._prepare_search_term("tier1-test AND unicode")
336 |         assert result == '"tier1-test" AND unicode'
337 | 
338 |         # Test other hyphenated Boolean combinations
339 |         assert (
340 |             search_repository._prepare_search_term("multi-word OR single")
341 |             == '"multi-word" OR single'
342 |         )
343 |         assert (
344 |             search_repository._prepare_search_term("well-formed NOT badly-formed")
345 |             == '"well-formed" NOT "badly-formed"'
346 |         )
347 |         assert (
348 |             search_repository._prepare_search_term("test-case AND (hello OR world)")
349 |             == '"test-case" AND (hello OR world)'
350 |         )
351 | 
352 |         # Test mixed special characters with Boolean operators
353 |         assert (
354 |             search_repository._prepare_search_term("config.json AND test-file")
355 |             == '"config.json" AND "test-file"'
356 |         )
357 |         assert (
358 |             search_repository._prepare_search_term("C++ OR python-script")
359 |             == '"C++" OR "python-script"'
360 |         )
361 | 
362 |     def test_programming_terms_should_work(self, search_repository):
363 |         """Programming-related terms with special chars should be searchable."""
364 |         # These should be quoted to handle special characters safely
365 |         assert search_repository._prepare_search_term("C++") == '"C++"*'
366 |         assert search_repository._prepare_search_term("function()") == '"function()"*'
367 |         assert search_repository._prepare_search_term("[email protected]") == '"[email protected]"*'
368 |         assert search_repository._prepare_search_term("array[index]") == '"array[index]"*'
369 |         assert search_repository._prepare_search_term("config.json") == '"config.json"*'
370 | 
371 |     def test_malformed_fts5_syntax_quoted(self, search_repository):
372 |         """Malformed FTS5 syntax should be quoted to prevent errors."""
373 |         # Multiple operators without proper syntax
374 |         assert search_repository._prepare_search_term("+++invalid+++") == '"+++invalid+++"*'
375 |         assert search_repository._prepare_search_term("!!!error!!!") == '"!!!error!!!"*'
376 |         assert search_repository._prepare_search_term("@#$%^&*()") == '"@#$%^&*()"*'
377 | 
378 |     def test_quoted_strings_handled_properly(self, search_repository):
379 |         """Strings with quotes should have quotes escaped."""
380 |         assert search_repository._prepare_search_term('say "hello"') == '"say ""hello"""*'
381 |         assert search_repository._prepare_search_term("it's working") == '"it\'s working"*'
382 | 
383 |     def test_file_paths_no_prefix_wildcard(self, search_repository):
384 |         """File paths should not get prefix wildcards."""
385 |         assert (
386 |             search_repository._prepare_search_term("config.json", is_prefix=False)
387 |             == '"config.json"'
388 |         )
389 |         assert (
390 |             search_repository._prepare_search_term("docs/readme.md", is_prefix=False)
391 |             == '"docs/readme.md"'
392 |         )
393 | 
394 |     def test_spaces_handled_correctly(self, search_repository):
395 |         """Terms with spaces should use boolean AND for word order independence."""
396 |         assert search_repository._prepare_search_term("hello world") == "hello* AND world*"
397 |         assert (
398 |             search_repository._prepare_search_term("project planning") == "project* AND planning*"
399 |         )
400 | 
401 |     def test_version_strings_with_dots_handled_correctly(self, search_repository):
402 |         """Version strings with dots should be quoted to prevent FTS5 syntax errors."""
403 |         # This reproduces the bug where "Basic Memory v0.13.0b2" becomes "Basic* AND Memory* AND v0.13.0b2*"
404 |         # which causes FTS5 syntax errors because v0.13.0b2* is not valid FTS5 syntax
405 |         result = search_repository._prepare_search_term("Basic Memory v0.13.0b2")
406 |         # Should be quoted because of dots in v0.13.0b2
407 |         assert result == '"Basic Memory v0.13.0b2"*'
408 | 
409 |     def test_mixed_special_characters_in_multi_word_queries(self, search_repository):
410 |         """Multi-word queries with special characters in any word should be fully quoted."""
411 |         # Any word containing special characters should cause the entire phrase to be quoted
412 |         assert search_repository._prepare_search_term("config.json file") == '"config.json file"*'
413 |         assert (
414 |             search_repository._prepare_search_term("[email protected] account")
415 |             == '"[email protected] account"*'
416 |         )
417 |         assert search_repository._prepare_search_term("node.js and react") == '"node.js and react"*'
418 | 
419 |     @pytest.mark.asyncio
420 |     async def test_search_with_special_characters_returns_results(self, search_repository):
421 |         """Integration test: search with special characters should work gracefully."""
422 |         # This test ensures the search doesn't crash with FTS5 syntax errors
423 | 
424 |         # These should all return empty results gracefully, not crash
425 |         results1 = await search_repository.search(search_text="C++")
426 |         assert isinstance(results1, list)  # Should not crash
427 | 
428 |         results2 = await search_repository.search(search_text="function()")
429 |         assert isinstance(results2, list)  # Should not crash
430 | 
431 |         results3 = await search_repository.search(search_text="+++malformed+++")
432 |         assert isinstance(results3, list)  # Should not crash, return empty results
433 | 
434 |         results4 = await search_repository.search(search_text="[email protected]")
435 |         assert isinstance(results4, list)  # Should not crash
436 | 
437 |     @pytest.mark.asyncio
438 |     async def test_boolean_search_still_works(self, search_repository):
439 |         """Boolean search operations should continue to work."""
440 |         # These should not crash and should respect boolean logic
441 |         results1 = await search_repository.search(search_text="hello AND world")
442 |         assert isinstance(results1, list)
443 | 
444 |         results2 = await search_repository.search(search_text="cat OR dog")
445 |         assert isinstance(results2, list)
446 | 
447 |         results3 = await search_repository.search(search_text="project NOT meeting")
448 |         assert isinstance(results3, list)
449 | 
450 |     @pytest.mark.asyncio
451 |     async def test_permalink_match_exact_with_slash(self, search_repository):
452 |         """Test exact permalink matching with slash (line 249 coverage)."""
453 |         # This tests the exact match path: if "/" in permalink_text:
454 |         results = await search_repository.search(permalink_match="test/path")
455 |         assert isinstance(results, list)
456 |         # Should use exact equality matching for paths with slashes
457 | 
458 |     @pytest.mark.asyncio
459 |     async def test_permalink_match_simple_term(self, search_repository):
460 |         """Test permalink matching with simple term (no slash)."""
461 |         # This tests the simple term path that goes through _prepare_search_term
462 |         results = await search_repository.search(permalink_match="simpleterm")
463 |         assert isinstance(results, list)
464 |         # Should use FTS5 MATCH for simple terms
465 | 
466 |     @pytest.mark.asyncio
467 |     async def test_fts5_error_handling_database_error(self, search_repository):
468 |         """Test that non-FTS5 database errors are properly re-raised."""
469 |         import unittest.mock
470 | 
471 |         # Mock the scoped_session to raise a non-FTS5 error
472 |         with unittest.mock.patch("basic_memory.db.scoped_session") as mock_scoped_session:
473 |             mock_session = unittest.mock.AsyncMock()
474 |             mock_scoped_session.return_value.__aenter__.return_value = mock_session
475 | 
476 |             # Simulate a database error that's NOT an FTS5 syntax error
477 |             mock_session.execute.side_effect = Exception("Database connection failed")
478 | 
479 |             # This should re-raise the exception (not return empty list)
480 |             with pytest.raises(Exception, match="Database connection failed"):
481 |                 await search_repository.search(search_text="test")
482 | 
483 |     @pytest.mark.asyncio
484 |     async def test_version_string_search_integration(self, search_repository, search_entity):
485 |         """Integration test: searching for version strings should work without FTS5 errors."""
486 |         # Index an entity with version information
487 |         search_row = SearchIndexRow(
488 |             id=search_entity.id,
489 |             type=SearchItemType.ENTITY.value,
490 |             title="Basic Memory v0.13.0b2 Release",
491 |             content_stems="basic memory version 0.13.0b2 beta release notes features",
492 |             content_snippet="Basic Memory v0.13.0b2 is a beta release with new features",
493 |             permalink=search_entity.permalink,
494 |             file_path=search_entity.file_path,
495 |             entity_id=search_entity.id,
496 |             metadata={"entity_type": search_entity.entity_type},
497 |             created_at=search_entity.created_at,
498 |             updated_at=search_entity.updated_at,
499 |             project_id=search_repository.project_id,
500 |         )
501 | 
502 |         await search_repository.index_item(search_row)
503 | 
504 |         # This should not cause FTS5 syntax errors and should find the entity
505 |         results = await search_repository.search(search_text="Basic Memory v0.13.0b2")
506 |         assert len(results) == 1
507 |         assert results[0].title == "Basic Memory v0.13.0b2 Release"
508 | 
509 |         # Test other version-like patterns
510 |         results2 = await search_repository.search(search_text="v0.13.0b2")
511 |         assert len(results2) == 1  # Should still find it due to content_stems
512 | 
513 |         # Test with other problematic patterns
514 |         results3 = await search_repository.search(search_text="node.js version")
515 |         assert isinstance(results3, list)  # Should not crash
516 | 
517 |     @pytest.mark.asyncio
518 |     async def test_wildcard_only_search(self, search_repository, search_entity):
519 |         """Test that wildcard-only search '*' doesn't cause FTS5 errors (line 243 coverage)."""
520 |         # Index an entity for testing
521 |         search_row = SearchIndexRow(
522 |             id=search_entity.id,
523 |             type=SearchItemType.ENTITY.value,
524 |             title="Test Entity",
525 |             content_stems="test entity content",
526 |             content_snippet="This is a test entity",
527 |             permalink=search_entity.permalink,
528 |             file_path=search_entity.file_path,
529 |             entity_id=search_entity.id,
530 |             metadata={"entity_type": search_entity.entity_type},
531 |             created_at=search_entity.created_at,
532 |             updated_at=search_entity.updated_at,
533 |             project_id=search_repository.project_id,
534 |         )
535 | 
536 |         await search_repository.index_item(search_row)
537 | 
538 |         # Test wildcard-only search - should not crash and should return results
539 |         results = await search_repository.search(search_text="*")
540 |         assert isinstance(results, list)  # Should not crash
541 |         assert len(results) >= 1  # Should return all results, including our test entity
542 | 
543 |         # Test empty string search - should also not crash
544 |         results_empty = await search_repository.search(search_text="")
545 |         assert isinstance(results_empty, list)  # Should not crash
546 | 
547 |         # Test whitespace-only search
548 |         results_whitespace = await search_repository.search(search_text="   ")
549 |         assert isinstance(results_whitespace, list)  # Should not crash
550 | 
551 |     def test_boolean_query_empty_parts_coverage(self, search_repository):
552 |         """Test Boolean query parsing with empty parts (line 143 coverage)."""
553 |         # Create queries that will result in empty parts after splitting
554 |         result1 = search_repository._prepare_boolean_query(
555 |             "hello AND  AND world"
556 |         )  # Double operator
557 |         assert "hello" in result1 and "world" in result1
558 | 
559 |         result2 = search_repository._prepare_boolean_query("  OR test")  # Leading operator
560 |         assert "test" in result2
561 | 
562 |         result3 = search_repository._prepare_boolean_query("test OR  ")  # Trailing operator
563 |         assert "test" in result3
564 | 
565 |     def test_parenthetical_term_quote_escaping(self, search_repository):
566 |         """Test quote escaping in parenthetical terms (lines 190-191 coverage)."""
567 |         # Test term with quotes that needs escaping
568 |         result = search_repository._prepare_parenthetical_term('(say "hello" world)')
569 |         # Should escape quotes by doubling them
570 |         assert '""hello""' in result
571 | 
572 |         # Test term with single quotes
573 |         result2 = search_repository._prepare_parenthetical_term("(it's working)")
574 |         assert "it's working" in result2
575 | 
576 |     def test_needs_quoting_empty_input(self, search_repository):
577 |         """Test _needs_quoting with empty inputs (line 207 coverage)."""
578 |         # Test empty string
579 |         assert not search_repository._needs_quoting("")
580 | 
581 |         # Test whitespace-only string
582 |         assert not search_repository._needs_quoting("   ")
583 | 
584 |         # Test None-like cases
585 |         assert not search_repository._needs_quoting("\t")
586 | 
587 |     def test_prepare_single_term_empty_input(self, search_repository):
588 |         """Test _prepare_single_term with empty inputs (line 227 coverage)."""
589 |         # Test empty string
590 |         result1 = search_repository._prepare_single_term("")
591 |         assert result1 == ""
592 | 
593 |         # Test whitespace-only string
594 |         result2 = search_repository._prepare_single_term("   ")
595 |         assert result2 == "   "  # Should return as-is
596 | 
597 |         # Test string that becomes empty after strip
598 |         result3 = search_repository._prepare_single_term("\t\n")
599 |         assert result3 == "\t\n"  # Should return original
600 | 
```

--------------------------------------------------------------------------------
/src/basic_memory/cli/commands/cloud/bisync_commands.py:
--------------------------------------------------------------------------------

```python
  1 | """Cloud bisync commands for Basic Memory CLI."""
  2 | 
  3 | import asyncio
  4 | import subprocess
  5 | import time
  6 | from datetime import datetime
  7 | from pathlib import Path
  8 | from typing import Optional
  9 | 
 10 | import typer
 11 | from rich.console import Console
 12 | from rich.table import Table
 13 | 
 14 | from basic_memory.cli.commands.cloud.api_client import CloudAPIError, make_api_request
 15 | from basic_memory.cli.commands.cloud.cloud_utils import (
 16 |     create_cloud_project,
 17 |     fetch_cloud_projects,
 18 | )
 19 | from basic_memory.cli.commands.cloud.rclone_config import (
 20 |     add_tenant_to_rclone_config,
 21 | )
 22 | from basic_memory.cli.commands.cloud.rclone_installer import RcloneInstallError, install_rclone
 23 | from basic_memory.config import ConfigManager
 24 | from basic_memory.ignore_utils import get_bmignore_path, create_default_bmignore
 25 | from basic_memory.schemas.cloud import (
 26 |     TenantMountInfo,
 27 |     MountCredentials,
 28 | )
 29 | 
 30 | console = Console()
 31 | 
 32 | 
 33 | class BisyncError(Exception):
 34 |     """Exception raised for bisync-related errors."""
 35 | 
 36 |     pass
 37 | 
 38 | 
 39 | class RcloneBisyncProfile:
 40 |     """Bisync profile with safety settings."""
 41 | 
 42 |     def __init__(
 43 |         self,
 44 |         name: str,
 45 |         conflict_resolve: str,
 46 |         max_delete: int,
 47 |         check_access: bool,
 48 |         description: str,
 49 |         extra_args: Optional[list[str]] = None,
 50 |     ):
 51 |         self.name = name
 52 |         self.conflict_resolve = conflict_resolve
 53 |         self.max_delete = max_delete
 54 |         self.check_access = check_access
 55 |         self.description = description
 56 |         self.extra_args = extra_args or []
 57 | 
 58 | 
 59 | # Bisync profiles based on SPEC-9 Phase 2.1
 60 | BISYNC_PROFILES = {
 61 |     "safe": RcloneBisyncProfile(
 62 |         name="safe",
 63 |         conflict_resolve="none",
 64 |         max_delete=10,
 65 |         check_access=False,
 66 |         description="Safe mode with conflict preservation (keeps both versions)",
 67 |     ),
 68 |     "balanced": RcloneBisyncProfile(
 69 |         name="balanced",
 70 |         conflict_resolve="newer",
 71 |         max_delete=25,
 72 |         check_access=False,
 73 |         description="Balanced mode - auto-resolve to newer file (recommended)",
 74 |     ),
 75 |     "fast": RcloneBisyncProfile(
 76 |         name="fast",
 77 |         conflict_resolve="newer",
 78 |         max_delete=50,
 79 |         check_access=False,
 80 |         description="Fast mode for rapid iteration (skip verification)",
 81 |     ),
 82 | }
 83 | 
 84 | 
 85 | async def get_mount_info() -> TenantMountInfo:
 86 |     """Get current tenant information from cloud API."""
 87 |     try:
 88 |         config_manager = ConfigManager()
 89 |         config = config_manager.config
 90 |         host_url = config.cloud_host.rstrip("/")
 91 | 
 92 |         response = await make_api_request(method="GET", url=f"{host_url}/tenant/mount/info")
 93 | 
 94 |         return TenantMountInfo.model_validate(response.json())
 95 |     except Exception as e:
 96 |         raise BisyncError(f"Failed to get tenant info: {e}") from e
 97 | 
 98 | 
 99 | async def generate_mount_credentials(tenant_id: str) -> MountCredentials:
100 |     """Generate scoped credentials for syncing."""
101 |     try:
102 |         config_manager = ConfigManager()
103 |         config = config_manager.config
104 |         host_url = config.cloud_host.rstrip("/")
105 | 
106 |         response = await make_api_request(method="POST", url=f"{host_url}/tenant/mount/credentials")
107 | 
108 |         return MountCredentials.model_validate(response.json())
109 |     except Exception as e:
110 |         raise BisyncError(f"Failed to generate credentials: {e}") from e
111 | 
112 | 
113 | def scan_local_directories(sync_dir: Path) -> list[str]:
114 |     """Scan local sync directory for project folders.
115 | 
116 |     Args:
117 |         sync_dir: Path to bisync directory
118 | 
119 |     Returns:
120 |         List of directory names (project names)
121 |     """
122 |     if not sync_dir.exists():
123 |         return []
124 | 
125 |     directories = []
126 |     for item in sync_dir.iterdir():
127 |         if item.is_dir() and not item.name.startswith("."):
128 |             directories.append(item.name)
129 | 
130 |     return directories
131 | 
132 | 
133 | def get_bisync_state_path(tenant_id: str) -> Path:
134 |     """Get path to bisync state directory."""
135 |     return Path.home() / ".basic-memory" / "bisync-state" / tenant_id
136 | 
137 | 
138 | def get_bisync_directory() -> Path:
139 |     """Get bisync directory from config.
140 | 
141 |     Returns:
142 |         Path to bisync directory (default: ~/basic-memory-cloud-sync)
143 |     """
144 |     config_manager = ConfigManager()
145 |     config = config_manager.config
146 | 
147 |     sync_dir = config.bisync_config.get("sync_dir", str(Path.home() / "basic-memory-cloud-sync"))
148 |     return Path(sync_dir).expanduser().resolve()
149 | 
150 | 
151 | def validate_bisync_directory(bisync_dir: Path) -> None:
152 |     """Validate bisync directory doesn't conflict with mount.
153 | 
154 |     Raises:
155 |         BisyncError: If bisync directory conflicts with mount directory
156 |     """
157 |     # Get fixed mount directory
158 |     mount_dir = (Path.home() / "basic-memory-cloud").resolve()
159 | 
160 |     # Check if bisync dir is the same as mount dir
161 |     if bisync_dir == mount_dir:
162 |         raise BisyncError(
163 |             f"Cannot use {bisync_dir} for bisync - it's the mount directory!\n"
164 |             f"Mount and bisync must use different directories.\n\n"
165 |             f"Options:\n"
166 |             f"  1. Use default: ~/basic-memory-cloud-sync/\n"
167 |             f"  2. Specify different directory: --dir ~/my-sync-folder"
168 |         )
169 | 
170 |     # Check if mount is active at this location
171 |     result = subprocess.run(["mount"], capture_output=True, text=True)
172 |     if str(bisync_dir) in result.stdout and "rclone" in result.stdout:
173 |         raise BisyncError(
174 |             f"{bisync_dir} is currently mounted via 'bm cloud mount'\n"
175 |             f"Cannot use mounted directory for bisync.\n\n"
176 |             f"Either:\n"
177 |             f"  1. Unmount first: bm cloud unmount\n"
178 |             f"  2. Use different directory for bisync"
179 |         )
180 | 
181 | 
182 | def convert_bmignore_to_rclone_filters() -> Path:
183 |     """Convert .bmignore patterns to rclone filter format.
184 | 
185 |     Reads ~/.basic-memory/.bmignore (gitignore-style) and converts to
186 |     ~/.basic-memory/.bmignore.rclone (rclone filter format).
187 | 
188 |     Only regenerates if .bmignore has been modified since last conversion.
189 | 
190 |     Returns:
191 |         Path to converted rclone filter file
192 |     """
193 |     # Ensure .bmignore exists
194 |     create_default_bmignore()
195 | 
196 |     bmignore_path = get_bmignore_path()
197 |     # Create rclone filter path: ~/.basic-memory/.bmignore -> ~/.basic-memory/.bmignore.rclone
198 |     rclone_filter_path = bmignore_path.parent / f"{bmignore_path.name}.rclone"
199 | 
200 |     # Skip regeneration if rclone file is newer than bmignore
201 |     if rclone_filter_path.exists():
202 |         bmignore_mtime = bmignore_path.stat().st_mtime
203 |         rclone_mtime = rclone_filter_path.stat().st_mtime
204 |         if rclone_mtime >= bmignore_mtime:
205 |             return rclone_filter_path
206 | 
207 |     # Read .bmignore patterns
208 |     patterns = []
209 |     try:
210 |         with bmignore_path.open("r", encoding="utf-8") as f:
211 |             for line in f:
212 |                 line = line.strip()
213 |                 # Keep comments and empty lines
214 |                 if not line or line.startswith("#"):
215 |                     patterns.append(line)
216 |                     continue
217 | 
218 |                 # Convert gitignore pattern to rclone filter syntax
219 |                 # gitignore: node_modules  → rclone: - node_modules/**
220 |                 # gitignore: *.pyc        → rclone: - *.pyc
221 |                 if "*" in line:
222 |                     # Pattern already has wildcard, just add exclude prefix
223 |                     patterns.append(f"- {line}")
224 |                 else:
225 |                     # Directory pattern - add /** for recursive exclude
226 |                     patterns.append(f"- {line}/**")
227 | 
228 |     except Exception:
229 |         # If we can't read the file, create a minimal filter
230 |         patterns = ["# Error reading .bmignore, using minimal filters", "- .git/**"]
231 | 
232 |     # Write rclone filter file
233 |     rclone_filter_path.write_text("\n".join(patterns) + "\n")
234 | 
235 |     return rclone_filter_path
236 | 
237 | 
238 | def get_bisync_filter_path() -> Path:
239 |     """Get path to bisync filter file.
240 | 
241 |     Uses ~/.basic-memory/.bmignore (converted to rclone format).
242 |     The file is automatically created with default patterns on first use.
243 | 
244 |     Returns:
245 |         Path to rclone filter file
246 |     """
247 |     return convert_bmignore_to_rclone_filters()
248 | 
249 | 
250 | def bisync_state_exists(tenant_id: str) -> bool:
251 |     """Check if bisync state exists (has been initialized)."""
252 |     state_path = get_bisync_state_path(tenant_id)
253 |     return state_path.exists() and any(state_path.iterdir())
254 | 
255 | 
256 | def build_bisync_command(
257 |     tenant_id: str,
258 |     bucket_name: str,
259 |     local_path: Path,
260 |     profile: RcloneBisyncProfile,
261 |     dry_run: bool = False,
262 |     resync: bool = False,
263 |     verbose: bool = False,
264 | ) -> list[str]:
265 |     """Build rclone bisync command with profile settings."""
266 | 
267 |     # Sync with the entire bucket root (all projects)
268 |     rclone_remote = f"basic-memory-{tenant_id}:{bucket_name}"
269 |     filter_path = get_bisync_filter_path()
270 |     state_path = get_bisync_state_path(tenant_id)
271 | 
272 |     # Ensure state directory exists
273 |     state_path.mkdir(parents=True, exist_ok=True)
274 | 
275 |     cmd = [
276 |         "rclone",
277 |         "bisync",
278 |         str(local_path),
279 |         rclone_remote,
280 |         "--create-empty-src-dirs",
281 |         "--resilient",
282 |         f"--conflict-resolve={profile.conflict_resolve}",
283 |         f"--max-delete={profile.max_delete}",
284 |         "--filters-file",
285 |         str(filter_path),
286 |         "--workdir",
287 |         str(state_path),
288 |     ]
289 | 
290 |     # Add verbosity flags
291 |     if verbose:
292 |         cmd.append("--verbose")  # Full details with file-by-file output
293 |     else:
294 |         # Show progress bar during transfers
295 |         cmd.append("--progress")
296 | 
297 |     if profile.check_access:
298 |         cmd.append("--check-access")
299 | 
300 |     if dry_run:
301 |         cmd.append("--dry-run")
302 | 
303 |     if resync:
304 |         cmd.append("--resync")
305 | 
306 |     cmd.extend(profile.extra_args)
307 | 
308 |     return cmd
309 | 
310 | 
311 | def setup_cloud_bisync(sync_dir: Optional[str] = None) -> None:
312 |     """Set up cloud bisync with rclone installation and configuration.
313 | 
314 |     Args:
315 |         sync_dir: Optional custom sync directory path. If not provided, uses config default.
316 |     """
317 |     console.print("[bold blue]Basic Memory Cloud Bisync Setup[/bold blue]")
318 |     console.print("Setting up bidirectional sync to your cloud tenant...\n")
319 | 
320 |     try:
321 |         # Step 1: Install rclone
322 |         console.print("[blue]Step 1: Installing rclone...[/blue]")
323 |         install_rclone()
324 | 
325 |         # Step 2: Get mount info (for tenant_id, bucket)
326 |         console.print("\n[blue]Step 2: Getting tenant information...[/blue]")
327 |         tenant_info = asyncio.run(get_mount_info())
328 | 
329 |         tenant_id = tenant_info.tenant_id
330 |         bucket_name = tenant_info.bucket_name
331 | 
332 |         console.print(f"[green]✓ Found tenant: {tenant_id}[/green]")
333 |         console.print(f"[green]✓ Bucket: {bucket_name}[/green]")
334 | 
335 |         # Step 3: Generate credentials
336 |         console.print("\n[blue]Step 3: Generating sync credentials...[/blue]")
337 |         creds = asyncio.run(generate_mount_credentials(tenant_id))
338 | 
339 |         access_key = creds.access_key
340 |         secret_key = creds.secret_key
341 | 
342 |         console.print("[green]✓ Generated secure credentials[/green]")
343 | 
344 |         # Step 4: Configure rclone
345 |         console.print("\n[blue]Step 4: Configuring rclone...[/blue]")
346 |         add_tenant_to_rclone_config(
347 |             tenant_id=tenant_id,
348 |             bucket_name=bucket_name,
349 |             access_key=access_key,
350 |             secret_key=secret_key,
351 |         )
352 | 
353 |         # Step 5: Configure and create local directory
354 |         console.print("\n[blue]Step 5: Configuring sync directory...[/blue]")
355 | 
356 |         # If custom sync_dir provided, save to config
357 |         if sync_dir:
358 |             config_manager = ConfigManager()
359 |             config = config_manager.load_config()
360 |             config.bisync_config["sync_dir"] = sync_dir
361 |             config_manager.save_config(config)
362 |             console.print("[green]✓ Saved custom sync directory to config[/green]")
363 | 
364 |         # Get bisync directory (from config or default)
365 |         local_path = get_bisync_directory()
366 | 
367 |         # Validate bisync directory
368 |         validate_bisync_directory(local_path)
369 | 
370 |         # Create directory
371 |         local_path.mkdir(parents=True, exist_ok=True)
372 |         console.print(f"[green]✓ Created sync directory: {local_path}[/green]")
373 | 
374 |         # Step 6: Perform initial resync
375 |         console.print("\n[blue]Step 6: Performing initial sync...[/blue]")
376 |         console.print("[yellow]This will establish the baseline for bidirectional sync.[/yellow]")
377 | 
378 |         run_bisync(
379 |             tenant_id=tenant_id,
380 |             bucket_name=bucket_name,
381 |             local_path=local_path,
382 |             profile_name="balanced",
383 |             resync=True,
384 |         )
385 | 
386 |         console.print("\n[bold green]✓ Bisync setup completed successfully![/bold green]")
387 |         console.print("\nYour local files will now sync bidirectionally with the cloud!")
388 |         console.print(f"\nLocal directory: {local_path}")
389 |         console.print("\nUseful commands:")
390 |         console.print("  bm sync                      # Run sync (recommended)")
391 |         console.print("  bm sync --watch              # Start watch mode")
392 |         console.print("  bm cloud status              # Check sync status")
393 |         console.print("  bm cloud check               # Verify file integrity")
394 |         console.print("  bm cloud bisync --dry-run    # Preview changes (advanced)")
395 | 
396 |     except (RcloneInstallError, BisyncError, CloudAPIError) as e:
397 |         console.print(f"\n[red]Setup failed: {e}[/red]")
398 |         raise typer.Exit(1)
399 |     except Exception as e:
400 |         console.print(f"\n[red]Unexpected error during setup: {e}[/red]")
401 |         raise typer.Exit(1)
402 | 
403 | 
404 | def run_bisync(
405 |     tenant_id: Optional[str] = None,
406 |     bucket_name: Optional[str] = None,
407 |     local_path: Optional[Path] = None,
408 |     profile_name: str = "balanced",
409 |     dry_run: bool = False,
410 |     resync: bool = False,
411 |     verbose: bool = False,
412 | ) -> bool:
413 |     """Run rclone bisync with specified profile."""
414 | 
415 |     try:
416 |         # Get tenant info if not provided
417 |         if not tenant_id or not bucket_name:
418 |             tenant_info = asyncio.run(get_mount_info())
419 |             tenant_id = tenant_info.tenant_id
420 |             bucket_name = tenant_info.bucket_name
421 | 
422 |         # Set default local path if not provided
423 |         if not local_path:
424 |             local_path = get_bisync_directory()
425 | 
426 |         # Validate bisync directory
427 |         validate_bisync_directory(local_path)
428 | 
429 |         # Check if local path exists
430 |         if not local_path.exists():
431 |             raise BisyncError(
432 |                 f"Local directory {local_path} does not exist. Run 'basic-memory cloud bisync-setup' first."
433 |             )
434 | 
435 |         # Get bisync profile
436 |         if profile_name not in BISYNC_PROFILES:
437 |             raise BisyncError(
438 |                 f"Unknown profile: {profile_name}. Available: {list(BISYNC_PROFILES.keys())}"
439 |             )
440 | 
441 |         profile = BISYNC_PROFILES[profile_name]
442 | 
443 |         # Auto-register projects before sync (unless dry-run or resync)
444 |         if not dry_run and not resync:
445 |             try:
446 |                 console.print("[dim]Checking for new projects...[/dim]")
447 | 
448 |                 # Fetch cloud projects and extract directory names from paths
449 |                 cloud_data = asyncio.run(fetch_cloud_projects())
450 |                 cloud_projects = cloud_data.projects
451 | 
452 |                 # Extract directory names from cloud project paths
453 |                 # Compare directory names, not project names
454 |                 # Cloud path /app/data/basic-memory -> directory name "basic-memory"
455 |                 cloud_dir_names = set()
456 |                 for p in cloud_projects:
457 |                     path = p.path
458 |                     # Strip /app/data/ prefix if present (cloud mode)
459 |                     if path.startswith("/app/data/"):
460 |                         path = path[len("/app/data/") :]
461 |                     # Get the last segment (directory name)
462 |                     dir_name = Path(path).name
463 |                     cloud_dir_names.add(dir_name)
464 | 
465 |                 # Scan local directories
466 |                 local_dirs = scan_local_directories(local_path)
467 | 
468 |                 # Create missing cloud projects
469 |                 new_projects = []
470 |                 for dir_name in local_dirs:
471 |                     if dir_name not in cloud_dir_names:
472 |                         new_projects.append(dir_name)
473 | 
474 |                 if new_projects:
475 |                     console.print(
476 |                         f"[blue]Found {len(new_projects)} new local project(s), creating on cloud...[/blue]"
477 |                     )
478 |                     for project_name in new_projects:
479 |                         try:
480 |                             asyncio.run(create_cloud_project(project_name))
481 |                             console.print(f"[green]  ✓ Created project: {project_name}[/green]")
482 |                         except BisyncError as e:
483 |                             console.print(
484 |                                 f"[yellow]  ⚠ Could not create {project_name}: {e}[/yellow]"
485 |                             )
486 |                 else:
487 |                     console.print("[dim]All local projects already registered on cloud[/dim]")
488 | 
489 |             except Exception as e:
490 |                 console.print(f"[yellow]Warning: Project auto-registration failed: {e}[/yellow]")
491 |                 console.print("[yellow]Continuing with sync anyway...[/yellow]")
492 | 
493 |         # Check if first run and require resync
494 |         if not resync and not bisync_state_exists(tenant_id) and not dry_run:
495 |             raise BisyncError(
496 |                 "First bisync requires --resync to establish baseline. "
497 |                 "Run: basic-memory cloud bisync --resync"
498 |             )
499 | 
500 |         # Build and execute bisync command
501 |         bisync_cmd = build_bisync_command(
502 |             tenant_id,
503 |             bucket_name,
504 |             local_path,
505 |             profile,
506 |             dry_run=dry_run,
507 |             resync=resync,
508 |             verbose=verbose,
509 |         )
510 | 
511 |         if dry_run:
512 |             console.print("[yellow]DRY RUN MODE - No changes will be made[/yellow]")
513 | 
514 |         console.print(
515 |             f"[blue]Running bisync with profile '{profile_name}' ({profile.description})...[/blue]"
516 |         )
517 |         console.print(f"[dim]Command: {' '.join(bisync_cmd)}[/dim]")
518 |         console.print()  # Blank line before output
519 | 
520 |         # Stream output in real-time so user sees progress
521 |         result = subprocess.run(bisync_cmd, text=True)
522 | 
523 |         if result.returncode != 0:
524 |             raise BisyncError(f"Bisync command failed with code {result.returncode}")
525 | 
526 |         console.print()  # Blank line after output
527 | 
528 |         if dry_run:
529 |             console.print("[green]✓ Dry run completed successfully[/green]")
530 |         elif resync:
531 |             console.print("[green]✓ Initial sync baseline established[/green]")
532 |         else:
533 |             console.print("[green]✓ Sync completed successfully[/green]")
534 | 
535 |         # Notify container to refresh cache (if not dry run)
536 |         if not dry_run:
537 |             try:
538 |                 asyncio.run(notify_container_sync(tenant_id))
539 |             except Exception as e:
540 |                 console.print(f"[yellow]Warning: Could not notify container: {e}[/yellow]")
541 | 
542 |         return True
543 | 
544 |     except BisyncError:
545 |         raise
546 |     except Exception as e:
547 |         raise BisyncError(f"Unexpected error during bisync: {e}") from e
548 | 
549 | 
550 | async def notify_container_sync(tenant_id: str) -> None:
551 |     """Sync all projects after bisync completes."""
552 |     try:
553 |         from basic_memory.cli.commands.command_utils import run_sync
554 | 
555 |         # Fetch all projects and sync each one
556 |         cloud_data = await fetch_cloud_projects()
557 |         projects = cloud_data.projects
558 | 
559 |         if not projects:
560 |             console.print("[dim]No projects to sync[/dim]")
561 |             return
562 | 
563 |         console.print(f"[blue]Notifying cloud to index {len(projects)} project(s)...[/blue]")
564 | 
565 |         for project in projects:
566 |             project_name = project.name
567 |             if project_name:
568 |                 try:
569 |                     await run_sync(project=project_name)
570 |                 except Exception as e:
571 |                     # Non-critical, log and continue
572 |                     console.print(f"[yellow]  ⚠ Sync failed for {project_name}: {e}[/yellow]")
573 | 
574 |         console.print("[dim]Note: Cloud indexing has started and may take a few moments[/dim]")
575 | 
576 |     except Exception as e:
577 |         # Non-critical, don't fail the bisync
578 |         console.print(f"[yellow]Warning: Post-sync failed: {e}[/yellow]")
579 | 
580 | 
581 | def run_bisync_watch(
582 |     tenant_id: Optional[str] = None,
583 |     bucket_name: Optional[str] = None,
584 |     local_path: Optional[Path] = None,
585 |     profile_name: str = "balanced",
586 |     interval_seconds: int = 60,
587 | ) -> None:
588 |     """Run bisync in watch mode with periodic syncs."""
589 | 
590 |     console.print("[bold blue]Starting bisync watch mode[/bold blue]")
591 |     console.print(f"Sync interval: {interval_seconds} seconds")
592 |     console.print("Press Ctrl+C to stop\n")
593 | 
594 |     try:
595 |         while True:
596 |             try:
597 |                 start_time = time.time()
598 | 
599 |                 run_bisync(
600 |                     tenant_id=tenant_id,
601 |                     bucket_name=bucket_name,
602 |                     local_path=local_path,
603 |                     profile_name=profile_name,
604 |                 )
605 | 
606 |                 elapsed = time.time() - start_time
607 |                 console.print(f"[dim]Sync completed in {elapsed:.1f}s[/dim]")
608 | 
609 |                 # Wait for next interval
610 |                 time.sleep(interval_seconds)
611 | 
612 |             except BisyncError as e:
613 |                 console.print(f"[red]Sync error: {e}[/red]")
614 |                 console.print(f"[yellow]Retrying in {interval_seconds} seconds...[/yellow]")
615 |                 time.sleep(interval_seconds)
616 | 
617 |     except KeyboardInterrupt:
618 |         console.print("\n[yellow]Watch mode stopped[/yellow]")
619 | 
620 | 
621 | def show_bisync_status() -> None:
622 |     """Show current bisync status and configuration."""
623 | 
624 |     try:
625 |         # Get tenant info
626 |         tenant_info = asyncio.run(get_mount_info())
627 |         tenant_id = tenant_info.tenant_id
628 | 
629 |         local_path = get_bisync_directory()
630 |         state_path = get_bisync_state_path(tenant_id)
631 | 
632 |         # Create status table
633 |         table = Table(title="Cloud Bisync Status", show_header=True, header_style="bold blue")
634 |         table.add_column("Property", style="green", min_width=20)
635 |         table.add_column("Value", style="dim", min_width=30)
636 | 
637 |         # Check initialization status
638 |         is_initialized = bisync_state_exists(tenant_id)
639 |         init_status = (
640 |             "[green]✓ Initialized[/green]" if is_initialized else "[red]✗ Not initialized[/red]"
641 |         )
642 | 
643 |         table.add_row("Tenant ID", tenant_id)
644 |         table.add_row("Local Directory", str(local_path))
645 |         table.add_row("Status", init_status)
646 |         table.add_row("State Directory", str(state_path))
647 | 
648 |         # Check for last sync info
649 |         if is_initialized:
650 |             # Look for most recent state file
651 |             state_files = list(state_path.glob("*.lst"))
652 |             if state_files:
653 |                 latest = max(state_files, key=lambda p: p.stat().st_mtime)
654 |                 last_sync = datetime.fromtimestamp(latest.stat().st_mtime)
655 |                 table.add_row("Last Sync", last_sync.strftime("%Y-%m-%d %H:%M:%S"))
656 | 
657 |         console.print(table)
658 | 
659 |         # Show bisync profiles
660 |         console.print("\n[bold]Available bisync profiles:[/bold]")
661 |         for name, profile in BISYNC_PROFILES.items():
662 |             console.print(f"  {name}: {profile.description}")
663 |             console.print(f"    - Conflict resolution: {profile.conflict_resolve}")
664 |             console.print(f"    - Max delete: {profile.max_delete} files")
665 | 
666 |         console.print("\n[dim]To use a profile: bm cloud bisync --profile <name>[/dim]")
667 | 
668 |         # Show setup instructions if not initialized
669 |         if not is_initialized:
670 |             console.print("\n[yellow]To initialize bisync, run:[/yellow]")
671 |             console.print("  bm cloud setup")
672 |             console.print("  or")
673 |             console.print("  bm cloud bisync --resync")
674 | 
675 |     except Exception as e:
676 |         console.print(f"[red]Error getting bisync status: {e}[/red]")
677 |         raise typer.Exit(1)
678 | 
679 | 
680 | def run_check(
681 |     tenant_id: Optional[str] = None,
682 |     bucket_name: Optional[str] = None,
683 |     local_path: Optional[Path] = None,
684 |     one_way: bool = False,
685 | ) -> bool:
686 |     """Check file integrity between local and cloud using rclone check.
687 | 
688 |     Args:
689 |         tenant_id: Cloud tenant ID (auto-detected if not provided)
690 |         bucket_name: S3 bucket name (auto-detected if not provided)
691 |         local_path: Local bisync directory (uses config default if not provided)
692 |         one_way: If True, only check for missing files on destination (faster)
693 | 
694 |     Returns:
695 |         True if check passed (files match), False if differences found
696 |     """
697 |     try:
698 |         # Check if rclone is installed
699 |         from basic_memory.cli.commands.cloud.rclone_installer import is_rclone_installed
700 | 
701 |         if not is_rclone_installed():
702 |             raise BisyncError(
703 |                 "rclone is not installed. Run 'bm cloud bisync-setup' first to set up cloud sync."
704 |             )
705 | 
706 |         # Get tenant info if not provided
707 |         if not tenant_id or not bucket_name:
708 |             tenant_info = asyncio.run(get_mount_info())
709 |             tenant_id = tenant_id or tenant_info.tenant_id
710 |             bucket_name = bucket_name or tenant_info.bucket_name
711 | 
712 |         # Get local path from config
713 |         if not local_path:
714 |             local_path = get_bisync_directory()
715 | 
716 |         # Check if bisync is initialized
717 |         if not bisync_state_exists(tenant_id):
718 |             raise BisyncError(
719 |                 "Bisync not initialized. Run 'bm cloud bisync --resync' to establish baseline."
720 |             )
721 | 
722 |         # Build rclone check command
723 |         rclone_remote = f"basic-memory-{tenant_id}:{bucket_name}"
724 |         filter_path = get_bisync_filter_path()
725 | 
726 |         cmd = [
727 |             "rclone",
728 |             "check",
729 |             str(local_path),
730 |             rclone_remote,
731 |             "--filter-from",
732 |             str(filter_path),
733 |         ]
734 | 
735 |         if one_way:
736 |             cmd.append("--one-way")
737 | 
738 |         console.print("[bold blue]Checking file integrity between local and cloud[/bold blue]")
739 |         console.print(f"[dim]Local:  {local_path}[/dim]")
740 |         console.print(f"[dim]Remote: {rclone_remote}[/dim]")
741 |         console.print(f"[dim]Command: {' '.join(cmd)}[/dim]")
742 |         console.print()
743 | 
744 |         # Run check command
745 |         result = subprocess.run(cmd, capture_output=True, text=True)
746 | 
747 |         # rclone check returns:
748 |         # 0 = success (all files match)
749 |         # non-zero = differences found or error
750 |         if result.returncode == 0:
751 |             console.print("[green]✓ All files match between local and cloud[/green]")
752 |             return True
753 |         else:
754 |             console.print("[yellow]⚠ Differences found:[/yellow]")
755 |             if result.stderr:
756 |                 console.print(result.stderr)
757 |             if result.stdout:
758 |                 console.print(result.stdout)
759 |             console.print("\n[dim]To sync differences, run: bm sync[/dim]")
760 |             return False
761 | 
762 |     except BisyncError:
763 |         raise
764 |     except Exception as e:
765 |         raise BisyncError(f"Check failed: {e}") from e
766 | 
```

--------------------------------------------------------------------------------
/specs/SPEC-10 Unified Deployment Workflow and Event Tracking.md:
--------------------------------------------------------------------------------

```markdown
  1 | ---
  2 | title: 'SPEC-10: Unified Deployment Workflow and Event Tracking'
  3 | type: spec
  4 | permalink: specs/spec-10-unified-deployment-workflow-event-tracking
  5 | tags:
  6 | - workflow
  7 | - deployment
  8 | - event-sourcing
  9 | - architecture
 10 | - simplification
 11 | ---
 12 | 
 13 | # SPEC-10: Unified Deployment Workflow and Event Tracking
 14 | 
 15 | ## Why
 16 | 
 17 | We replaced a complex multi-workflow system with DBOS orchestration that was proving to be more trouble than it was worth. The previous architecture had four separate workflows (`tenant_provisioning`, `tenant_update`, `tenant_deployment`, `tenant_undeploy`) with overlapping logic, complex state management, and fragmented event tracking. DBOS added unnecessary complexity without providing sufficient value, leading to harder debugging and maintenance.
 18 | 
 19 | **Problems Solved:**
 20 | - **Framework Complexity**: DBOS configuration overhead and fighting framework limitations
 21 | - **Code Duplication**: Multiple workflows implementing similar operations with duplicate logic
 22 | - **Poor Observability**: Fragmented event tracking across workflow boundaries
 23 | - **Maintenance Overhead**: Complex orchestration for fundamentally simple operations
 24 | - **Debugging Difficulty**: Framework abstractions hiding simple Python stack traces
 25 | 
 26 | ## What
 27 | 
 28 | This spec documents the architectural simplification that consolidates tenant lifecycle management into a unified system with comprehensive event tracking.
 29 | 
 30 | **Affected Areas:**
 31 | - Tenant deployment workflows (provisioning, updates, undeploying)
 32 | - Event sourcing and workflow tracking infrastructure
 33 | - API endpoints for tenant operations
 34 | - Database schema for workflow and event correlation
 35 | - Integration testing for tenant lifecycle operations
 36 | 
 37 | **Key Changes:**
 38 | - **Removed DBOS entirely** - eliminated framework dependency and complexity
 39 | - **Consolidated 4 workflows → 2 unified deployment workflows (deploy/undeploy)**
 40 | - **Added workflow tracking system** with complete event correlation
 41 | - **Simplified API surface** - single `/deploy` endpoint handles all scenarios
 42 | - **Enhanced observability** through event sourcing with workflow grouping
 43 | 
 44 | ## How (High Level)
 45 | 
 46 | ### Architectural Philosophy
 47 | **Embrace simplicity over framework complexity** - use well-structured Python with proper database design instead of complex orchestration frameworks.
 48 | 
 49 | ### Core Components
 50 | 
 51 | #### 1. Unified Deployment Workflow
 52 | ```python
 53 | class TenantDeploymentWorkflow:
 54 |     async def deploy_tenant_workflow(self, tenant_id: str, workflow_id: UUID, image_tag: str = None):
 55 |         # Single workflow handles both initial provisioning AND updates
 56 |         # Each step is idempotent and handles its own error recovery
 57 |         # Database transactions provide the durability we need
 58 |         await self.start_deployment_step(workflow_id, tenant_uuid, image_tag)
 59 |         await self.create_fly_app_step(workflow_id, tenant_uuid)
 60 |         await self.create_bucket_step(workflow_id, tenant_uuid)
 61 |         await self.deploy_machine_step(workflow_id, tenant_uuid, image_tag)
 62 |         await self.complete_deployment_step(workflow_id, tenant_uuid, image_tag, deployment_time)
 63 | ```
 64 | 
 65 | **Key Benefits:**
 66 | - **Handles both provisioning and updates** in single workflow
 67 | - **Idempotent operations** - safe to retry any step
 68 | - **Clean error handling** via simple Python exceptions
 69 | - **Resumable** - can restart from any failed step
 70 | 
 71 | #### 2. Workflow Tracking System
 72 | 
 73 | **Database Schema:**
 74 | ```sql
 75 | CREATE TABLE workflow (
 76 |     id UUID PRIMARY KEY,
 77 |     workflow_type VARCHAR(50) NOT NULL,  -- 'tenant_deployment', 'tenant_undeploy'
 78 |     tenant_id UUID REFERENCES tenant(id),
 79 |     status VARCHAR(20) DEFAULT 'running', -- 'running', 'completed', 'failed'
 80 |     workflow_metadata JSONB DEFAULT '{}'  -- image_tag, etc.
 81 | );
 82 | 
 83 | ALTER TABLE event ADD COLUMN workflow_id UUID REFERENCES workflow(id);
 84 | ```
 85 | 
 86 | **Event Correlation:**
 87 | - Every workflow operation generates events tagged with `workflow_id`
 88 | - Complete audit trail from workflow start to completion
 89 | - Events grouped by workflow for easy reconstruction of operations
 90 | 
 91 | #### 3. Parameter Standardization
 92 | All workflow methods follow consistent signature pattern:
 93 | ```python
 94 | async def method_name(self, session: AsyncSession, workflow_id: UUID | None, tenant_id: UUID, ...)
 95 | ```
 96 | 
 97 | **Benefits:**
 98 | - **Consistent event tagging** - all events properly correlated
 99 | - **Clear method contracts** - workflow_id always first parameter
100 | - **Type safety** - proper UUID handling throughout
101 | 
102 | ### Implementation Strategy
103 | 
104 | #### Phase 1: Workflow Consolidation ✅ COMPLETED
105 | - [x] **Remove DBOS dependency** - eliminated dbos_config.py and all DBOS imports
106 | - [x] **Create unified TenantDeploymentWorkflow** - handles both provisioning and updates
107 | - [x] **Remove legacy workflows** - deleted tenant_provisioning.py, tenant_update.py
108 | - [x] **Simplify API endpoints** - consolidated to single `/deploy` endpoint
109 | - [x] **Update integration tests** - comprehensive edge case testing
110 | 
111 | #### Phase 2: Workflow Tracking System ✅ COMPLETED
112 | - [x] **Database migration** - added workflow table and event.workflow_id foreign key
113 | - [x] **Workflow repository** - CRUD operations for workflow records
114 | - [x] **Event correlation** - all workflow events tagged with workflow_id
115 | - [x] **Comprehensive testing** - workflow lifecycle and event grouping tests
116 | 
117 | #### Phase 3: Parameter Standardization ✅ COMPLETED
118 | - [x] **Standardize method signatures** - workflow_id as first parameter pattern
119 | - [x] **Fix event tagging** - ensure all workflow events properly correlated
120 | - [x] **Update service methods** - consistent parameter order across tenant_service
121 | - [x] **Integration test validation** - verify complete event sequences
122 | 
123 | ### Architectural Benefits
124 | 
125 | #### Code Simplification
126 | - **39 files changed**: 2,247 additions, 3,256 deletions (net -1,009 lines)
127 | - **Eliminated framework complexity** - no more DBOS configuration or abstractions
128 | - **Consolidated logic** - single deployment workflow vs 4 separate workflows
129 | - **Cleaner API surface** - unified endpoint vs multiple workflow-specific endpoints
130 | 
131 | #### Enhanced Observability
132 | - **Complete event correlation** - every workflow event tagged with workflow_id
133 | - **Audit trail reconstruction** - can trace entire tenant lifecycle through events
134 | - **Workflow status tracking** - running/completed/failed states in database
135 | - **Comprehensive testing** - edge cases covered with real infrastructure
136 | 
137 | #### Operational Benefits
138 | - **Simpler debugging** - plain Python stack traces vs framework abstractions
139 | - **Reduced dependencies** - one less complex framework to maintain
140 | - **Better error handling** - explicit exception handling vs framework magic
141 | - **Easier maintenance** - straightforward Python code vs orchestration complexity
142 | 
143 | ## How to Evaluate
144 | 
145 | ### Success Criteria
146 | 
147 | #### Functional Completeness ✅ VERIFIED
148 | - [x] **Unified deployment workflow** handles both initial provisioning and updates
149 | - [x] **Undeploy workflow** properly integrated with event tracking
150 | - [x] **All operations idempotent** - safe to retry any step without duplication
151 | - [x] **Complete tenant lifecycle** - provision → active → update → undeploy
152 | 
153 | #### Event Tracking and Correlation ✅ VERIFIED
154 | - [x] **All workflow events tagged** with proper workflow_id
155 | - [x] **Event sequence verification** - tests assert exact event order and content
156 | - [x] **Workflow grouping** - events can be queried by workflow_id for complete audit trail
157 | - [x] **Cross-workflow isolation** - deployment vs undeploy events properly separated
158 | 
159 | #### Database Schema and Performance ✅ VERIFIED
160 | - [x] **Migration applied** - workflow table and event.workflow_id column created
161 | - [x] **Proper indexing** - performance optimized queries on workflow_type, tenant_id, status
162 | - [x] **Foreign key constraints** - referential integrity between workflows and events
163 | - [x] **Database triggers** - updated_at timestamp automation
164 | 
165 | #### Test Coverage ✅ COMPREHENSIVE
166 | - [x] **Unit tests**: 4 workflow tracking tests covering lifecycle and event grouping
167 | - [x] **Integration tests**: Real infrastructure testing with Fly.io resources
168 | - [x] **Edge case coverage**: Failed deployments, partial state recovery, resource conflicts
169 | - [x] **Event sequence verification**: Exact event order and content validation
170 | 
171 | ### Testing Procedure
172 | 
173 | #### Unit Test Validation ✅ PASSING
174 | ```bash
175 | cd apps/cloud && pytest tests/test_workflow_tracking.py -v
176 | # 4/4 tests passing - workflow lifecycle and event grouping
177 | ```
178 | 
179 | #### Integration Test Validation ✅ PASSING
180 | ```bash
181 | cd apps/cloud && pytest tests/integration/test_tenant_workflow_deployment_integration.py -v
182 | cd apps/cloud && pytest tests/integration/test_tenant_workflow_undeploy_integration.py -v
183 | # Comprehensive real infrastructure testing with actual Fly.io resources
184 | # Tests provision → deploy → update → undeploy → cleanup cycles
185 | ```
186 | 
187 | ### Performance Metrics
188 | 
189 | #### Code Metrics ✅ ACHIEVED
190 | - **Net code reduction**: -1,009 lines (3,256 deletions, 2,247 additions)
191 | - **Workflow consolidation**: 4 workflows → 1 unified deployment workflow
192 | - **Dependency reduction**: Removed DBOS framework dependency entirely
193 | - **API simplification**: Multiple endpoints → single `/deploy` endpoint
194 | 
195 | #### Operational Metrics ✅ VERIFIED
196 | - **Event correlation**: 100% of workflow events properly tagged with workflow_id
197 | - **Audit trail completeness**: Full tenant lifecycle traceable through event sequences
198 | - **Error handling**: Clean Python exceptions vs framework abstractions
199 | - **Debugging simplicity**: Direct stack traces vs orchestration complexity
200 | 
201 | ### Implementation Status: ✅ COMPLETE
202 | 
203 | All phases completed successfully with comprehensive testing and verification:
204 | 
205 | **Phase 1 - Workflow Consolidation**: ✅ COMPLETE
206 | - Removed DBOS dependency and consolidated workflows
207 | - Unified deployment workflow handles all scenarios
208 | - Comprehensive integration testing with real infrastructure
209 | 
210 | **Phase 2 - Workflow Tracking**: ✅ COMPLETE
211 | - Database schema implemented with proper indexing
212 | - Event correlation system fully functional
213 | - Complete audit trail capability verified
214 | 
215 | **Phase 3 - Parameter Standardization**: ✅ COMPLETE
216 | - Consistent method signatures across all workflow methods
217 | - All events properly tagged with workflow_id
218 | - Type safety verified across entire codebase
219 | 
220 | **Phase 4 - Asynchronous Job Queuing**:
221 | **Goal**: Transform synchronous deployment workflows into background jobs for better user experience and system reliability.
222 | 
223 | **Current Problem**:
224 | - Deployment API calls are synchronous - users wait for entire tenant provisioning (30-60 seconds)
225 | - No retry mechanism for failed operations
226 | - HTTP timeouts on long-running deployments
227 | - Poor user experience during infrastructure provisioning
228 | 
229 | **Solution**: Redis-backed job queue with arq for reliable background processing
230 | 
231 | #### Architecture Overview
232 | ```python
233 | # API Layer: Return immediately with job tracking
234 | @router.post("/{tenant_id}/deploy")
235 | async def deploy_tenant(tenant_id: UUID):
236 |     # Create workflow record in Postgres
237 |     workflow = await workflow_repo.create_workflow("tenant_deployment", tenant_id)
238 | 
239 |     # Enqueue job in Redis
240 |     job = await arq_pool.enqueue_job('deploy_tenant_task', tenant_id, workflow.id)
241 | 
242 |     # Return job ID immediately
243 |     return {"job_id": job.job_id, "workflow_id": workflow.id, "status": "queued"}
244 | 
245 | # Background Worker: Process via existing unified workflow
246 | async def deploy_tenant_task(ctx, tenant_id: str, workflow_id: str):
247 |     # Existing workflow logic - zero changes needed!
248 |     await workflow_manager.deploy_tenant(UUID(tenant_id), workflow_id=UUID(workflow_id))
249 | ```
250 | 
251 | #### Implementation Tasks
252 | 
253 | **Phase 4.1: Core Job Queue Setup** ✅ COMPLETED
254 | - [x] **Add arq dependency** - integrated Redis job queue with existing infrastructure
255 | - [x] **Create job definitions** - wrapped existing deployment/undeploy workflows as arq tasks
256 | - [x] **Update API endpoints** - updated provisioning endpoints to return job IDs instead of waiting for completion
257 | - [x] **JobQueueService implementation** - service layer for job enqueueing and status tracking
258 | - [x] **Job status tracking** - integrated with existing workflow table for status updates
259 | - [x] **Comprehensive testing** - 18 tests covering positive, negative, and edge cases
260 | 
261 | **Phase 4.2: Background Worker Implementation** ✅ COMPLETED
262 | - [x] **Job status API** - GET /jobs/{job_id}/status endpoint integrated with JobQueueService
263 | - [x] **Background worker process** - arq worker to process queued jobs with proper settings and Redis configuration
264 | - [x] **Worker settings and configuration** - WorkerSettings class with proper timeouts, max jobs, and error handling
265 | - [x] **Fix API endpoints** - updated job status API to use JobQueueService instead of direct Redis access
266 | - [x] **Integration testing** - comprehensive end-to-end testing with real ARQ workers and Fly.io infrastructure
267 | - [x] **Worker entry points** - dual-purpose entrypoint.sh script and __main__.py module support for both API and worker processes
268 | - [x] **Test fixture updates** - fixed all API and service test fixtures to work with job queue dependencies
269 | - [x] **AsyncIO event loop fixes** - resolved event loop issues in integration tests for subprocess worker compatibility
270 | - [x] **Complete test coverage** - all 46 tests passing across unit, integration, and API test suites
271 | - [x] **Type safety verification** - 0 type checking errors across entire ARQ job queue implementation
272 | 
273 | #### Phase 4.2 Implementation Summary ✅ COMPLETE
274 | 
275 | **Core ARQ Job Queue System:**
276 | - **JobQueueService** - Centralized service for job enqueueing, status tracking, and Redis pool management
277 | - **deployment_jobs.py** - ARQ job functions that wrap existing deployment/undeploy workflows
278 | - **Worker Settings** - Production-ready ARQ configuration with proper timeouts and error handling
279 | - **Dual-Process Architecture** - Single Docker image with entrypoint.sh supporting both API and worker modes
280 | 
281 | **Key Files Added:**
282 | - `apps/cloud/src/basic_memory_cloud/jobs/` - Complete job queue implementation (7 files)
283 | - `apps/cloud/entrypoint.sh` - Dual-purpose Docker container entry point
284 | - `apps/cloud/tests/integration/test_worker_integration.py` - Real infrastructure integration tests
285 | - `apps/cloud/src/basic_memory_cloud/schemas/job_responses.py` - API response schemas
286 | 
287 | **API Integration:**
288 | - Provisioning endpoints return job IDs immediately instead of blocking for 60+ seconds
289 | - Job status API endpoints for real-time monitoring of deployment progress
290 | - Proper error handling and job failure scenarios with detailed error messages
291 | 
292 | **Testing Achievement:**
293 | - **46 total tests passing** across all test suites (unit, integration, API, services)
294 | - **Real infrastructure testing** - ARQ workers process actual Fly.io deployments
295 | - **Event loop safety** - Fixed asyncio issues for subprocess worker compatibility
296 | - **Test fixture updates** - All fixtures properly support job queue dependencies
297 | - **Type checking** - 0 errors across entire codebase
298 | 
299 | **Technical Metrics:**
300 | - **38 files changed** - +1,736 insertions, -334 deletions
301 | - **Integration test runtime** - ~18 seconds with real ARQ workers and Fly.io verification
302 | - **Event loop isolation** - Proper async session management for subprocess compatibility
303 | - **Redis integration** - Production-ready Redis configuration with connection pooling
304 | 
305 | **Phase 4.3: Production Hardening** ✅ COMPLETED
306 | - [x] **Configure Upstash Redis** - production Redis setup on Fly.io
307 | - [x] **Retry logic for external APIs** - exponential backoff for flaky Tigris IAM operations
308 | - [x] **Monitoring and observability** - comprehensive Redis queue monitoring with CLI tools
309 | - [x] **Error handling improvements** - graceful handling of expected API errors with appropriate log levels
310 | - [x] **CLI tooling enhancements** - bulk update commands for CI/CD automation
311 | - [x] **Documentation improvements** - comprehensive monitoring guide with Redis patterns
312 | - [x] **Job uniqueness** - ARQ-based duplicate prevention for tenant operations
313 | - [ ] **Worker scaling** - multiple arq workers for parallel job processing
314 | - [ ] **Job persistence** - ensure jobs survive Redis/worker restarts
315 | - [ ] **Error alerting** - notifications for failed deployment jobs
316 | 
317 | **Phase 4.4: Advanced Features** (Future)
318 | - [ ] **Job scheduling** - deploy tenants at specific times
319 | - [ ] **Priority queues** - urgent deployments processed first
320 | - [ ] **Batch operations** - bulk tenant deployments
321 | - [ ] **Job dependencies** - deployment → configuration → activation chains
322 | 
323 | #### Benefits Achieved ✅ REALIZED
324 | 
325 | **User Experience Improvements:**
326 | - **Immediate API responses** - users get job ID instantly vs waiting 60+ seconds for deployment completion
327 | - **Real-time job tracking** - status API provides live updates on deployment progress
328 | - **Better error visibility** - detailed error messages and job failure tracking
329 | - **CI/CD automation ready** - bulk update commands for automated tenant deployments
330 | 
331 | **System Reliability:**
332 | - **Redis persistence** - jobs survive Redis/worker restarts with proper queue durability
333 | - **Idempotent job processing** - jobs can be safely retried without side effects
334 | - **Event loop isolation** - worker processes operate independently from API server
335 | - **Retry resilience** - exponential backoff for flaky external API calls (3 attempts, 1s/2s delays)
336 | - **Graceful error handling** - expected API errors logged at INFO level, unexpected at ERROR level
337 | - **Job uniqueness** - prevent duplicate tenant operations with ARQ's built-in uniqueness feature
338 | 
339 | **Operational Benefits:**
340 | - **Horizontal scaling ready** - architecture supports adding more workers for parallel processing
341 | - **Comprehensive testing** - real infrastructure integration tests ensure production reliability
342 | - **Type safety** - full type checking prevents runtime errors in job processing
343 | - **Clean separation** - API and worker processes use same codebase with different entry points
344 | - **Queue monitoring** - Redis CLI integration for real-time queue activity monitoring
345 | - **Comprehensive documentation** - detailed monitoring guide with Redis pattern explanations
346 | 
347 | **Development Benefits:**
348 | - **Zero workflow changes** - existing deployment/undeploy workflows work unchanged as background jobs
349 | - **Async/await native** - modern Python asyncio patterns throughout the implementation
350 | - **Event correlation preserved** - all existing workflow tracking and event sourcing continues to work
351 | - **Enhanced CLI tooling** - unified tenant commands with proper endpoint routing
352 | - **Database integrity** - proper foreign key constraint handling in tenant deletion
353 | 
354 | #### Infrastructure Requirements
355 | - **Local**: Redis via docker-compose (already exists) ✅
356 | - **Production**: Upstash Redis on Fly.io (already configured) ✅
357 | - **Workers**: arq worker processes (new deployment target)
358 | - **Monitoring**: Job status dashboard (simple web interface)
359 | 
360 | #### API Evolution
361 | ```python
362 | # Before: Synchronous (blocks for 60+ seconds)
363 | POST /tenant/{id}/deploy → {status: "active", machine_id: "..."}
364 | 
365 | # After: Asynchronous (returns immediately)
366 | POST /tenant/{id}/deploy → {job_id: "uuid", workflow_id: "uuid", status: "queued"}
367 | GET  /jobs/{job_id}/status → {status: "running", progress: "deploying_machine", workflow_id: "uuid"}
368 | GET  /workflows/{workflow_id}/events → [...] # Existing event tracking works unchanged
369 | ```
370 | 
371 | **Technology Choice**: **arq (Redis)** over pgqueuer
372 | - **Existing Redis infrastructure** - Upstash + docker-compose already configured
373 | - **Better ecosystem** - monitoring tools, documentation, community
374 | - **Made by pydantic team** - aligns with existing Python stack
375 | - **Hybrid approach** - Redis for queue operations + Postgres for workflow state
376 | 
377 | #### Job Uniqueness Implementation
378 | 
379 | **Problem**: Multiple concurrent deployment requests for the same tenant could create duplicate jobs, wasting resources and potentially causing conflicts.
380 | 
381 | **Solution**: Leverage ARQ's built-in job uniqueness feature using predictable job IDs:
382 | 
383 | ```python
384 | # JobQueueService implementation
385 | async def enqueue_deploy_job(self, tenant_id: UUID, image_tag: str | None = None) -> str:
386 |     unique_job_id = f"deploy-{tenant_id}"
387 | 
388 |     job = await self.redis_pool.enqueue_job(
389 |         "deploy_tenant_job",
390 |         str(tenant_id),
391 |         image_tag,
392 |         _job_id=unique_job_id,  # ARQ prevents duplicates
393 |     )
394 | 
395 |     if job is None:
396 |         # Job already exists - return existing job ID
397 |         return unique_job_id
398 |     else:
399 |         # New job created - return ARQ job ID
400 |         return job.job_id
401 | ```
402 | 
403 | **Key Features:**
404 | - **Predictable Job IDs**: `deploy-{tenant_id}`, `undeploy-{tenant_id}`
405 | - **Duplicate Prevention**: ARQ returns `None` for duplicate job IDs
406 | - **Graceful Handling**: Return existing job ID instead of raising errors
407 | - **Idempotent Operations**: Safe to retry deployment requests
408 | - **Clear Logging**: Distinguish "Enqueued new" vs "Found existing" jobs
409 | 
410 | **Benefits:**
411 | - Prevents resource waste from duplicate deployments
412 | - Eliminates race conditions from concurrent requests
413 | - Makes job monitoring more predictable with consistent IDs
414 | - Provides natural deduplication without complex locking mechanisms
415 | 
416 | 
417 | ## Notes
418 | 
419 | ### Design Philosophy Lessons
420 | - **Simplicity beats framework magic** - removing DBOS made the system more reliable and debuggable
421 | - **Event sourcing > complex orchestration** - database-backed event tracking provides better observability than framework abstractions
422 | - **Idempotent operations > resumable workflows** - each step handling its own retry logic is simpler than framework-managed resumability
423 | - **Explicit error handling > framework exception handling** - Python exceptions are clearer than orchestration framework error states
424 | 
425 | ### Future Considerations
426 | - **Monitoring integration** - workflow tracking events could feed into observability systems
427 | - **Performance optimization** - event querying patterns may benefit from additional indexing
428 | - **Audit compliance** - complete event trail supports regulatory requirements
429 | - **Operational dashboards** - workflow status could drive tenant health monitoring
430 | 
431 | ### Related Specifications
432 | - **SPEC-8**: TigrisFS Integration - bucket provisioning integrated with deployment workflow
433 | - **SPEC-1**: Specification-Driven Development Process - this spec follows the established format
434 | 
435 | ## Observations
436 | 
437 | - [architecture] Removing framework complexity led to more maintainable system #simplification
438 | - [workflow] Single unified deployment workflow handles both provisioning and updates #consolidation
439 | - [observability] Event sourcing with workflow correlation provides complete audit trail #event-tracking
440 | - [database] Foreign key relationships between workflows and events enable powerful queries #schema-design
441 | - [testing] Integration tests with real infrastructure catch edge cases that unit tests miss #testing-strategy
442 | - [parameters] Consistent method signatures (workflow_id first) reduce cognitive overhead #api-design
443 | - [maintenance] Fewer workflows and dependencies reduce long-term maintenance burden #operational-excellence
444 | - [debugging] Plain Python exceptions are clearer than framework abstraction layers #developer-experience
445 | - [resilience] Exponential backoff retry patterns handle flaky external API calls gracefully #error-handling
446 | - [monitoring] Redis queue monitoring provides real-time operational visibility #observability
447 | - [ci-cd] Bulk update commands enable automated tenant deployments in continuous delivery pipelines #automation
448 | - [documentation] Comprehensive monitoring guides reduce operational learning curve #knowledge-management
449 | - [error-logging] Context-aware log levels (INFO for expected errors, ERROR for unexpected) improve signal-to-noise ratio #logging-strategy
450 | - [job-uniqueness] ARQ job uniqueness with predictable tenant-based IDs prevents duplicate operations and resource waste #deduplication
451 | 
452 | ## Implementation Notes
453 | 
454 | ### Configuration Integration
455 | - **Redis Configuration**: Add Redis settings to existing `apps/cloud/src/basic_memory_cloud/config.py`
456 | - **Local Development**: Leverage existing Redis setup from `docker-compose.yml`
457 | - **Production**: Use Upstash Redis configuration for production environments
458 | 
459 | ### Docker Entrypoint Strategy
460 | Create `entrypoint.sh` script to toggle between API server and worker processes using single Docker image:
461 | 
462 | ```bash
463 | #!/bin/bash
464 | 
465 | # Entrypoint script for Basic Memory Cloud service
466 | # Supports multiple process types: api, worker
467 | 
468 | set -e
469 | 
470 | case "$1" in
471 |   "api")
472 |     echo "Starting Basic Memory Cloud API server..."
473 |     exec uvicorn basic_memory_cloud.main:app \
474 |       --host 0.0.0.0 \
475 |       --port 8000 \
476 |       --log-level info
477 |     ;;
478 |   "worker")
479 |     echo "Starting Basic Memory Cloud ARQ worker..."
480 |     # For ARQ worker implementation
481 |     exec python -m arq basic_memory_cloud.jobs.settings.WorkerSettings
482 |     ;;
483 |   *)
484 |     echo "Usage: $0 {api|worker}"
485 |     echo "  api    - Start the FastAPI server"
486 |     echo "  worker - Start the ARQ worker"
487 |     exit 1
488 |     ;;
489 | esac
490 | ```
491 | 
492 | ### Fly.io Process Groups Configuration
493 | Use separate machine groups for API and worker processes with independent scaling:
494 | 
495 | ```toml
496 | # fly.toml app configuration for basic-memory-cloud
497 | app = 'basic-memory-cloud-dev-basic-machines'
498 | primary_region = 'dfw'
499 | org = 'basic-machines'
500 | kill_signal = 'SIGINT'
501 | kill_timeout = '5s'
502 | 
503 | [build]
504 | 
505 | # Process groups for API server and worker
506 | [processes]
507 |   api = "api"
508 |   worker = "worker"
509 | 
510 | # Machine scaling configuration
511 | [[machine]]
512 |   size = 'shared-cpu-1x'
513 |   processes = ['api']
514 |   min_machines_running = 1
515 |   auto_stop_machines = false
516 |   auto_start_machines = true
517 | 
518 | [[machine]]
519 |   size = 'shared-cpu-1x'
520 |   processes = ['worker']
521 |   min_machines_running = 1
522 |   auto_stop_machines = false
523 |   auto_start_machines = true
524 | 
525 | [env]
526 |   # Python configuration
527 |   PYTHONUNBUFFERED = '1'
528 |   PYTHONPATH = '/app'
529 | 
530 |   # Logging configuration
531 |   LOG_LEVEL = 'DEBUG'
532 | 
533 |   # Redis configuration for ARQ
534 |   REDIS_URL = 'redis://basic-memory-cloud-redis.upstash.io'
535 | 
536 |   # Database configuration
537 |   DATABASE_HOST = 'basic-memory-cloud-db-dev-basic-machines.internal'
538 |   DATABASE_PORT = '5432'
539 |   DATABASE_NAME = 'basic_memory_cloud'
540 |   DATABASE_USER = 'postgres'
541 |   DATABASE_SSL = 'true'
542 | 
543 |   # Worker configuration
544 |   ARQ_MAX_JOBS = '10'
545 |   ARQ_KEEP_RESULT = '3600'
546 | 
547 |   # Fly.io configuration
548 |   FLY_ORG = 'basic-machines'
549 |   FLY_REGION = 'dfw'
550 | 
551 | # Internal service - no external HTTP exposure for worker
552 | # API accessible via basic-memory-cloud-dev-basic-machines.flycast:8000
553 | 
554 | [[vm]]
555 |   size = 'shared-cpu-1x'
556 | ```
557 | 
558 | ### Benefits of This Architecture
559 | - **Single Docker Image**: Both API and worker use same container with different entrypoints
560 | - **Independent Scaling**: Scale API and worker processes separately based on demand
561 | - **Clean Separation**: Web traffic handling separate from background job processing
562 | - **Existing Infrastructure**: Leverages current PostgreSQL + Redis setup without complexity
563 | - **Hybrid State Management**: Redis for queue operations, PostgreSQL for persistent workflow tracking
564 | 
565 | ## Relations
566 | 
567 | - implements [[SPEC-8 TigrisFS Integration]]
568 | - follows [[SPEC-1 Specification-Driven Development Process]]
569 | - supersedes previous multi-workflow architecture
570 | 
```

--------------------------------------------------------------------------------
/tests/repository/test_entity_repository.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for the EntityRepository."""
  2 | 
  3 | from datetime import datetime, timezone
  4 | 
  5 | import pytest
  6 | import pytest_asyncio
  7 | from sqlalchemy import select
  8 | 
  9 | from basic_memory import db
 10 | from basic_memory.models import Entity, Observation, Relation, Project
 11 | from basic_memory.repository.entity_repository import EntityRepository
 12 | from basic_memory.utils import generate_permalink
 13 | 
 14 | 
 15 | @pytest_asyncio.fixture
 16 | async def entity_with_observations(session_maker, sample_entity):
 17 |     """Create an entity with observations."""
 18 |     async with db.scoped_session(session_maker) as session:
 19 |         observations = [
 20 |             Observation(
 21 |                 entity_id=sample_entity.id,
 22 |                 content="First observation",
 23 |             ),
 24 |             Observation(
 25 |                 entity_id=sample_entity.id,
 26 |                 content="Second observation",
 27 |             ),
 28 |         ]
 29 |         session.add_all(observations)
 30 |         return sample_entity
 31 | 
 32 | 
 33 | @pytest_asyncio.fixture
 34 | async def related_results(session_maker, test_project: Project):
 35 |     """Create entities with relations between them."""
 36 |     async with db.scoped_session(session_maker) as session:
 37 |         source = Entity(
 38 |             project_id=test_project.id,
 39 |             title="source",
 40 |             entity_type="test",
 41 |             permalink="source/source",
 42 |             file_path="source/source.md",
 43 |             content_type="text/markdown",
 44 |             created_at=datetime.now(timezone.utc),
 45 |             updated_at=datetime.now(timezone.utc),
 46 |         )
 47 |         target = Entity(
 48 |             project_id=test_project.id,
 49 |             title="target",
 50 |             entity_type="test",
 51 |             permalink="target/target",
 52 |             file_path="target/target.md",
 53 |             content_type="text/markdown",
 54 |             created_at=datetime.now(timezone.utc),
 55 |             updated_at=datetime.now(timezone.utc),
 56 |         )
 57 |         session.add(source)
 58 |         session.add(target)
 59 |         await session.flush()
 60 | 
 61 |         relation = Relation(
 62 |             from_id=source.id,
 63 |             to_id=target.id,
 64 |             to_name=target.title,
 65 |             relation_type="connects_to",
 66 |         )
 67 |         session.add(relation)
 68 | 
 69 |         return source, target, relation
 70 | 
 71 | 
 72 | @pytest.mark.asyncio
 73 | async def test_create_entity(entity_repository: EntityRepository):
 74 |     """Test creating a new entity"""
 75 |     entity_data = {
 76 |         "project_id": entity_repository.project_id,
 77 |         "title": "Test",
 78 |         "entity_type": "test",
 79 |         "permalink": "test/test",
 80 |         "file_path": "test/test.md",
 81 |         "content_type": "text/markdown",
 82 |         "created_at": datetime.now(timezone.utc),
 83 |         "updated_at": datetime.now(timezone.utc),
 84 |     }
 85 |     entity = await entity_repository.create(entity_data)
 86 | 
 87 |     # Verify returned object
 88 |     assert entity.id is not None
 89 |     assert entity.title == "Test"
 90 |     assert isinstance(entity.created_at, datetime)
 91 |     assert isinstance(entity.updated_at, datetime)
 92 | 
 93 |     # Verify in database
 94 |     found = await entity_repository.find_by_id(entity.id)
 95 |     assert found is not None
 96 |     assert found.id is not None
 97 |     assert found.id == entity.id
 98 |     assert found.title == entity.title
 99 | 
100 |     # assert relations are eagerly loaded
101 |     assert len(entity.observations) == 0
102 |     assert len(entity.relations) == 0
103 | 
104 | 
105 | @pytest.mark.asyncio
106 | async def test_create_all(entity_repository: EntityRepository):
107 |     """Test creating a new entity"""
108 |     entity_data = [
109 |         {
110 |             "project_id": entity_repository.project_id,
111 |             "title": "Test_1",
112 |             "entity_type": "test",
113 |             "permalink": "test/test-1",
114 |             "file_path": "test/test_1.md",
115 |             "content_type": "text/markdown",
116 |             "created_at": datetime.now(timezone.utc),
117 |             "updated_at": datetime.now(timezone.utc),
118 |         },
119 |         {
120 |             "project_id": entity_repository.project_id,
121 |             "title": "Test-2",
122 |             "entity_type": "test",
123 |             "permalink": "test/test-2",
124 |             "file_path": "test/test_2.md",
125 |             "content_type": "text/markdown",
126 |             "created_at": datetime.now(timezone.utc),
127 |             "updated_at": datetime.now(timezone.utc),
128 |         },
129 |     ]
130 |     entities = await entity_repository.create_all(entity_data)
131 | 
132 |     assert len(entities) == 2
133 |     entity = entities[0]
134 | 
135 |     # Verify in database
136 |     found = await entity_repository.find_by_id(entity.id)
137 |     assert found is not None
138 |     assert found.id is not None
139 |     assert found.id == entity.id
140 |     assert found.title == entity.title
141 | 
142 |     # assert relations are eagerly loaded
143 |     assert len(entity.observations) == 0
144 |     assert len(entity.relations) == 0
145 | 
146 | 
147 | @pytest.mark.asyncio
148 | async def test_find_by_id(entity_repository: EntityRepository, sample_entity: Entity):
149 |     """Test finding an entity by ID"""
150 |     found = await entity_repository.find_by_id(sample_entity.id)
151 |     assert found is not None
152 |     assert found.id == sample_entity.id
153 |     assert found.title == sample_entity.title
154 | 
155 |     # Verify against direct database query
156 |     async with db.scoped_session(entity_repository.session_maker) as session:
157 |         stmt = select(Entity).where(Entity.id == sample_entity.id)
158 |         result = await session.execute(stmt)
159 |         db_entity = result.scalar_one()
160 |         assert db_entity.id == found.id
161 |         assert db_entity.title == found.title
162 | 
163 | 
164 | @pytest.mark.asyncio
165 | async def test_update_entity(entity_repository: EntityRepository, sample_entity: Entity):
166 |     """Test updating an entity"""
167 |     updated = await entity_repository.update(sample_entity.id, {"title": "Updated title"})
168 |     assert updated is not None
169 |     assert updated.title == "Updated title"
170 | 
171 |     # Verify in database
172 |     async with db.scoped_session(entity_repository.session_maker) as session:
173 |         stmt = select(Entity).where(Entity.id == sample_entity.id)
174 |         result = await session.execute(stmt)
175 |         db_entity = result.scalar_one()
176 |         assert db_entity.title == "Updated title"
177 | 
178 | 
179 | @pytest.mark.asyncio
180 | async def test_delete_entity(entity_repository: EntityRepository, sample_entity):
181 |     """Test deleting an entity."""
182 |     result = await entity_repository.delete(sample_entity.id)
183 |     assert result is True
184 | 
185 |     # Verify deletion
186 |     deleted = await entity_repository.find_by_id(sample_entity.id)
187 |     assert deleted is None
188 | 
189 | 
190 | @pytest.mark.asyncio
191 | async def test_delete_entity_with_observations(
192 |     entity_repository: EntityRepository, entity_with_observations
193 | ):
194 |     """Test deleting an entity cascades to its observations."""
195 |     entity = entity_with_observations
196 | 
197 |     result = await entity_repository.delete(entity.id)
198 |     assert result is True
199 | 
200 |     # Verify entity deletion
201 |     deleted = await entity_repository.find_by_id(entity.id)
202 |     assert deleted is None
203 | 
204 |     # Verify observations were cascaded
205 |     async with db.scoped_session(entity_repository.session_maker) as session:
206 |         query = select(Observation).filter(Observation.entity_id == entity.id)
207 |         result = await session.execute(query)
208 |         remaining_observations = result.scalars().all()
209 |         assert len(remaining_observations) == 0
210 | 
211 | 
212 | @pytest.mark.asyncio
213 | async def test_delete_entities_by_type(entity_repository: EntityRepository, sample_entity):
214 |     """Test deleting entities by type."""
215 |     result = await entity_repository.delete_by_fields(entity_type=sample_entity.entity_type)
216 |     assert result is True
217 | 
218 |     # Verify deletion
219 |     async with db.scoped_session(entity_repository.session_maker) as session:
220 |         query = select(Entity).filter(Entity.entity_type == sample_entity.entity_type)
221 |         result = await session.execute(query)
222 |         remaining = result.scalars().all()
223 |         assert len(remaining) == 0
224 | 
225 | 
226 | @pytest.mark.asyncio
227 | async def test_delete_entity_with_relations(entity_repository: EntityRepository, related_results):
228 |     """Test deleting an entity cascades to its relations."""
229 |     source, target, relation = related_results
230 | 
231 |     # Delete source entity
232 |     result = await entity_repository.delete(source.id)
233 |     assert result is True
234 | 
235 |     # Verify relation was cascaded
236 |     async with db.scoped_session(entity_repository.session_maker) as session:
237 |         query = select(Relation).filter(Relation.from_id == source.id)
238 |         result = await session.execute(query)
239 |         remaining_relations = result.scalars().all()
240 |         assert len(remaining_relations) == 0
241 | 
242 |         # Verify target entity still exists
243 |         target_exists = await entity_repository.find_by_id(target.id)
244 |         assert target_exists is not None
245 | 
246 | 
247 | @pytest.mark.asyncio
248 | async def test_delete_nonexistent_entity(entity_repository: EntityRepository):
249 |     """Test deleting an entity that doesn't exist."""
250 |     result = await entity_repository.delete(0)
251 |     assert result is False
252 | 
253 | 
254 | @pytest_asyncio.fixture
255 | async def test_entities(session_maker, test_project: Project):
256 |     """Create multiple test entities."""
257 |     async with db.scoped_session(session_maker) as session:
258 |         entities = [
259 |             Entity(
260 |                 project_id=test_project.id,
261 |                 title="entity1",
262 |                 entity_type="test",
263 |                 permalink="type1/entity1",
264 |                 file_path="type1/entity1.md",
265 |                 content_type="text/markdown",
266 |                 created_at=datetime.now(timezone.utc),
267 |                 updated_at=datetime.now(timezone.utc),
268 |             ),
269 |             Entity(
270 |                 project_id=test_project.id,
271 |                 title="entity2",
272 |                 entity_type="test",
273 |                 permalink="type1/entity2",
274 |                 file_path="type1/entity2.md",
275 |                 content_type="text/markdown",
276 |                 created_at=datetime.now(timezone.utc),
277 |                 updated_at=datetime.now(timezone.utc),
278 |             ),
279 |             Entity(
280 |                 project_id=test_project.id,
281 |                 title="entity3",
282 |                 entity_type="test",
283 |                 permalink="type2/entity3",
284 |                 file_path="type2/entity3.md",
285 |                 content_type="text/markdown",
286 |                 created_at=datetime.now(timezone.utc),
287 |                 updated_at=datetime.now(timezone.utc),
288 |             ),
289 |         ]
290 |         session.add_all(entities)
291 |         return entities
292 | 
293 | 
294 | @pytest.mark.asyncio
295 | async def test_find_by_permalinks(entity_repository: EntityRepository, test_entities):
296 |     """Test finding multiple entities by their type/name pairs."""
297 |     # Test finding multiple entities
298 |     permalinks = [e.permalink for e in test_entities]
299 |     found = await entity_repository.find_by_permalinks(permalinks)
300 |     assert len(found) == 3
301 |     names = {e.title for e in found}
302 |     assert names == {"entity1", "entity2", "entity3"}
303 | 
304 |     # Test finding subset of entities
305 |     permalinks = [e.permalink for e in test_entities if e.title != "entity2"]
306 |     found = await entity_repository.find_by_permalinks(permalinks)
307 |     assert len(found) == 2
308 |     names = {e.title for e in found}
309 |     assert names == {"entity1", "entity3"}
310 | 
311 |     # Test with non-existent entities
312 |     permalinks = ["type1/entity1", "type3/nonexistent"]
313 |     found = await entity_repository.find_by_permalinks(permalinks)
314 |     assert len(found) == 1
315 |     assert found[0].title == "entity1"
316 | 
317 |     # Test empty input
318 |     found = await entity_repository.find_by_permalinks([])
319 |     assert len(found) == 0
320 | 
321 | 
322 | @pytest.mark.asyncio
323 | async def test_generate_permalink_from_file_path():
324 |     """Test permalink generation from different file paths."""
325 |     test_cases = [
326 |         ("docs/My Feature.md", "docs/my-feature"),
327 |         ("specs/API (v2).md", "specs/api-v2"),
328 |         ("notes/2024/Q1 Planning!!!.md", "notes/2024/q1-planning"),
329 |         ("test/Über File.md", "test/uber-file"),
330 |         ("docs/my_feature_name.md", "docs/my-feature-name"),
331 |         ("specs/multiple--dashes.md", "specs/multiple-dashes"),
332 |         ("notes/trailing/space/ file.md", "notes/trailing/space/file"),
333 |     ]
334 | 
335 |     for input_path, expected in test_cases:
336 |         result = generate_permalink(input_path)
337 |         assert result == expected, f"Failed for {input_path}"
338 |         # Verify the result passes validation
339 |         Entity(
340 |             title="test",
341 |             entity_type="test",
342 |             permalink=result,
343 |             file_path=input_path,
344 |             content_type="text/markdown",
345 |         )  # This will raise ValueError if invalid
346 | 
347 | 
348 | @pytest.mark.asyncio
349 | async def test_get_by_title(entity_repository: EntityRepository, session_maker):
350 |     """Test getting an entity by title."""
351 |     # Create test entities
352 |     async with db.scoped_session(session_maker) as session:
353 |         entities = [
354 |             Entity(
355 |                 project_id=entity_repository.project_id,
356 |                 title="Unique Title",
357 |                 entity_type="test",
358 |                 permalink="test/unique-title",
359 |                 file_path="test/unique-title.md",
360 |                 content_type="text/markdown",
361 |                 created_at=datetime.now(timezone.utc),
362 |                 updated_at=datetime.now(timezone.utc),
363 |             ),
364 |             Entity(
365 |                 project_id=entity_repository.project_id,
366 |                 title="Another Title",
367 |                 entity_type="test",
368 |                 permalink="test/another-title",
369 |                 file_path="test/another-title.md",
370 |                 content_type="text/markdown",
371 |                 created_at=datetime.now(timezone.utc),
372 |                 updated_at=datetime.now(timezone.utc),
373 |             ),
374 |             Entity(
375 |                 project_id=entity_repository.project_id,
376 |                 title="Another Title",
377 |                 entity_type="test",
378 |                 permalink="test/another-title-1",
379 |                 file_path="test/another-title-1.md",
380 |                 content_type="text/markdown",
381 |                 created_at=datetime.now(timezone.utc),
382 |                 updated_at=datetime.now(timezone.utc),
383 |             ),
384 |         ]
385 |         session.add_all(entities)
386 |         await session.flush()
387 | 
388 |     # Test getting by exact title
389 |     found = await entity_repository.get_by_title("Unique Title")
390 |     assert found is not None
391 |     assert len(found) == 1
392 |     assert found[0].title == "Unique Title"
393 | 
394 |     # Test case sensitivity
395 |     found = await entity_repository.get_by_title("unique title")
396 |     assert not found  # Should be case-sensitive
397 | 
398 |     # Test non-existent title
399 |     found = await entity_repository.get_by_title("Non Existent")
400 |     assert not found
401 | 
402 |     # Test multiple rows found
403 |     found = await entity_repository.get_by_title("Another Title")
404 |     assert len(found) == 2
405 | 
406 | 
407 | @pytest.mark.asyncio
408 | async def test_get_by_file_path(entity_repository: EntityRepository, session_maker):
409 |     """Test getting an entity by title."""
410 |     # Create test entities
411 |     async with db.scoped_session(session_maker) as session:
412 |         entities = [
413 |             Entity(
414 |                 project_id=entity_repository.project_id,
415 |                 title="Unique Title",
416 |                 entity_type="test",
417 |                 permalink="test/unique-title",
418 |                 file_path="test/unique-title.md",
419 |                 content_type="text/markdown",
420 |                 created_at=datetime.now(timezone.utc),
421 |                 updated_at=datetime.now(timezone.utc),
422 |             ),
423 |         ]
424 |         session.add_all(entities)
425 |         await session.flush()
426 | 
427 |     # Test getting by file_path
428 |     found = await entity_repository.get_by_file_path("test/unique-title.md")
429 |     assert found is not None
430 |     assert found.title == "Unique Title"
431 | 
432 |     # Test non-existent file_path
433 |     found = await entity_repository.get_by_file_path("not/a/real/file.md")
434 |     assert found is None
435 | 
436 | 
437 | @pytest.mark.asyncio
438 | async def test_get_distinct_directories(entity_repository: EntityRepository, session_maker):
439 |     """Test getting distinct directory paths from entity file paths."""
440 |     # Create test entities with various directory structures
441 |     async with db.scoped_session(session_maker) as session:
442 |         entities = [
443 |             Entity(
444 |                 project_id=entity_repository.project_id,
445 |                 title="File 1",
446 |                 entity_type="test",
447 |                 permalink="docs/guides/file1",
448 |                 file_path="docs/guides/file1.md",
449 |                 content_type="text/markdown",
450 |                 created_at=datetime.now(timezone.utc),
451 |                 updated_at=datetime.now(timezone.utc),
452 |             ),
453 |             Entity(
454 |                 project_id=entity_repository.project_id,
455 |                 title="File 2",
456 |                 entity_type="test",
457 |                 permalink="docs/guides/file2",
458 |                 file_path="docs/guides/file2.md",
459 |                 content_type="text/markdown",
460 |                 created_at=datetime.now(timezone.utc),
461 |                 updated_at=datetime.now(timezone.utc),
462 |             ),
463 |             Entity(
464 |                 project_id=entity_repository.project_id,
465 |                 title="File 3",
466 |                 entity_type="test",
467 |                 permalink="docs/api/file3",
468 |                 file_path="docs/api/file3.md",
469 |                 content_type="text/markdown",
470 |                 created_at=datetime.now(timezone.utc),
471 |                 updated_at=datetime.now(timezone.utc),
472 |             ),
473 |             Entity(
474 |                 project_id=entity_repository.project_id,
475 |                 title="File 4",
476 |                 entity_type="test",
477 |                 permalink="specs/file4",
478 |                 file_path="specs/file4.md",
479 |                 content_type="text/markdown",
480 |                 created_at=datetime.now(timezone.utc),
481 |                 updated_at=datetime.now(timezone.utc),
482 |             ),
483 |             Entity(
484 |                 project_id=entity_repository.project_id,
485 |                 title="File 5",
486 |                 entity_type="test",
487 |                 permalink="notes/2024/q1/file5",
488 |                 file_path="notes/2024/q1/file5.md",
489 |                 content_type="text/markdown",
490 |                 created_at=datetime.now(timezone.utc),
491 |                 updated_at=datetime.now(timezone.utc),
492 |             ),
493 |         ]
494 |         session.add_all(entities)
495 |         await session.flush()
496 | 
497 |     # Get distinct directories
498 |     directories = await entity_repository.get_distinct_directories()
499 | 
500 |     # Verify directories are extracted correctly
501 |     assert isinstance(directories, list)
502 |     assert len(directories) > 0
503 | 
504 |     # Should include all parent directories but not filenames
505 |     expected_dirs = {
506 |         "docs",
507 |         "docs/guides",
508 |         "docs/api",
509 |         "notes",
510 |         "notes/2024",
511 |         "notes/2024/q1",
512 |         "specs",
513 |     }
514 |     assert set(directories) == expected_dirs
515 | 
516 |     # Verify results are sorted
517 |     assert directories == sorted(directories)
518 | 
519 |     # Verify no file paths are included
520 |     for dir_path in directories:
521 |         assert not dir_path.endswith(".md")
522 | 
523 | 
524 | @pytest.mark.asyncio
525 | async def test_get_distinct_directories_empty_db(entity_repository: EntityRepository):
526 |     """Test getting distinct directories when database is empty."""
527 |     directories = await entity_repository.get_distinct_directories()
528 |     assert directories == []
529 | 
530 | 
531 | @pytest.mark.asyncio
532 | async def test_find_by_directory_prefix(entity_repository: EntityRepository, session_maker):
533 |     """Test finding entities by directory prefix."""
534 |     # Create test entities in various directories
535 |     async with db.scoped_session(session_maker) as session:
536 |         entities = [
537 |             Entity(
538 |                 project_id=entity_repository.project_id,
539 |                 title="File 1",
540 |                 entity_type="test",
541 |                 permalink="docs/file1",
542 |                 file_path="docs/file1.md",
543 |                 content_type="text/markdown",
544 |                 created_at=datetime.now(timezone.utc),
545 |                 updated_at=datetime.now(timezone.utc),
546 |             ),
547 |             Entity(
548 |                 project_id=entity_repository.project_id,
549 |                 title="File 2",
550 |                 entity_type="test",
551 |                 permalink="docs/guides/file2",
552 |                 file_path="docs/guides/file2.md",
553 |                 content_type="text/markdown",
554 |                 created_at=datetime.now(timezone.utc),
555 |                 updated_at=datetime.now(timezone.utc),
556 |             ),
557 |             Entity(
558 |                 project_id=entity_repository.project_id,
559 |                 title="File 3",
560 |                 entity_type="test",
561 |                 permalink="docs/api/file3",
562 |                 file_path="docs/api/file3.md",
563 |                 content_type="text/markdown",
564 |                 created_at=datetime.now(timezone.utc),
565 |                 updated_at=datetime.now(timezone.utc),
566 |             ),
567 |             Entity(
568 |                 project_id=entity_repository.project_id,
569 |                 title="File 4",
570 |                 entity_type="test",
571 |                 permalink="specs/file4",
572 |                 file_path="specs/file4.md",
573 |                 content_type="text/markdown",
574 |                 created_at=datetime.now(timezone.utc),
575 |                 updated_at=datetime.now(timezone.utc),
576 |             ),
577 |         ]
578 |         session.add_all(entities)
579 |         await session.flush()
580 | 
581 |     # Test finding all entities in "docs" directory and subdirectories
582 |     docs_entities = await entity_repository.find_by_directory_prefix("docs")
583 |     assert len(docs_entities) == 3
584 |     file_paths = {e.file_path for e in docs_entities}
585 |     assert file_paths == {"docs/file1.md", "docs/guides/file2.md", "docs/api/file3.md"}
586 | 
587 |     # Test finding entities in "docs/guides" subdirectory
588 |     guides_entities = await entity_repository.find_by_directory_prefix("docs/guides")
589 |     assert len(guides_entities) == 1
590 |     assert guides_entities[0].file_path == "docs/guides/file2.md"
591 | 
592 |     # Test finding entities in "specs" directory
593 |     specs_entities = await entity_repository.find_by_directory_prefix("specs")
594 |     assert len(specs_entities) == 1
595 |     assert specs_entities[0].file_path == "specs/file4.md"
596 | 
597 |     # Test with root directory (empty string)
598 |     all_entities = await entity_repository.find_by_directory_prefix("")
599 |     assert len(all_entities) == 4
600 | 
601 |     # Test with root directory (slash)
602 |     all_entities = await entity_repository.find_by_directory_prefix("/")
603 |     assert len(all_entities) == 4
604 | 
605 |     # Test with non-existent directory
606 |     nonexistent = await entity_repository.find_by_directory_prefix("nonexistent")
607 |     assert len(nonexistent) == 0
608 | 
609 | 
610 | @pytest.mark.asyncio
611 | async def test_find_by_directory_prefix_basic_fields_only(
612 |     entity_repository: EntityRepository, session_maker
613 | ):
614 |     """Test that find_by_directory_prefix returns basic entity fields.
615 | 
616 |     Note: This method uses use_query_options=False for performance,
617 |     so it doesn't eager load relationships. Directory trees only need
618 |     basic entity fields.
619 |     """
620 |     # Create test entity
621 |     async with db.scoped_session(session_maker) as session:
622 |         entity = Entity(
623 |             project_id=entity_repository.project_id,
624 |             title="Test Entity",
625 |             entity_type="test",
626 |             permalink="docs/test",
627 |             file_path="docs/test.md",
628 |             content_type="text/markdown",
629 |             created_at=datetime.now(timezone.utc),
630 |             updated_at=datetime.now(timezone.utc),
631 |         )
632 |         session.add(entity)
633 |         await session.flush()
634 | 
635 |     # Query entity by directory prefix
636 |     entities = await entity_repository.find_by_directory_prefix("docs")
637 |     assert len(entities) == 1
638 | 
639 |     # Verify basic fields are present (all we need for directory trees)
640 |     entity = entities[0]
641 |     assert entity.title == "Test Entity"
642 |     assert entity.file_path == "docs/test.md"
643 |     assert entity.permalink == "docs/test"
644 |     assert entity.entity_type == "test"
645 |     assert entity.content_type == "text/markdown"
646 |     assert entity.updated_at is not None
647 | 
648 | 
649 | @pytest.mark.asyncio
650 | async def test_get_all_file_paths(entity_repository: EntityRepository, session_maker):
651 |     """Test getting all file paths for deletion detection during sync."""
652 |     # Create test entities with various file paths
653 |     async with db.scoped_session(session_maker) as session:
654 |         entities = [
655 |             Entity(
656 |                 project_id=entity_repository.project_id,
657 |                 title="File 1",
658 |                 entity_type="test",
659 |                 permalink="docs/file1",
660 |                 file_path="docs/file1.md",
661 |                 content_type="text/markdown",
662 |                 created_at=datetime.now(timezone.utc),
663 |                 updated_at=datetime.now(timezone.utc),
664 |             ),
665 |             Entity(
666 |                 project_id=entity_repository.project_id,
667 |                 title="File 2",
668 |                 entity_type="test",
669 |                 permalink="specs/file2",
670 |                 file_path="specs/file2.md",
671 |                 content_type="text/markdown",
672 |                 created_at=datetime.now(timezone.utc),
673 |                 updated_at=datetime.now(timezone.utc),
674 |             ),
675 |             Entity(
676 |                 project_id=entity_repository.project_id,
677 |                 title="File 3",
678 |                 entity_type="test",
679 |                 permalink="notes/file3",
680 |                 file_path="notes/file3.md",
681 |                 content_type="text/markdown",
682 |                 created_at=datetime.now(timezone.utc),
683 |                 updated_at=datetime.now(timezone.utc),
684 |             ),
685 |         ]
686 |         session.add_all(entities)
687 |         await session.flush()
688 | 
689 |     # Get all file paths
690 |     file_paths = await entity_repository.get_all_file_paths()
691 | 
692 |     # Verify results
693 |     assert isinstance(file_paths, list)
694 |     assert len(file_paths) == 3
695 |     assert set(file_paths) == {"docs/file1.md", "specs/file2.md", "notes/file3.md"}
696 | 
697 | 
698 | @pytest.mark.asyncio
699 | async def test_get_all_file_paths_empty_db(entity_repository: EntityRepository):
700 |     """Test getting all file paths when database is empty."""
701 |     file_paths = await entity_repository.get_all_file_paths()
702 |     assert file_paths == []
703 | 
704 | 
705 | @pytest.mark.asyncio
706 | async def test_get_all_file_paths_performance(entity_repository: EntityRepository, session_maker):
707 |     """Test that get_all_file_paths doesn't load entities or relationships.
708 | 
709 |     This method is optimized for deletion detection during streaming sync.
710 |     It should only query file_path strings, not full entity objects.
711 |     """
712 |     # Create test entity with observations and relations
713 |     async with db.scoped_session(session_maker) as session:
714 |         # Create entities
715 |         entity1 = Entity(
716 |             project_id=entity_repository.project_id,
717 |             title="Entity 1",
718 |             entity_type="test",
719 |             permalink="test/entity1",
720 |             file_path="test/entity1.md",
721 |             content_type="text/markdown",
722 |             created_at=datetime.now(timezone.utc),
723 |             updated_at=datetime.now(timezone.utc),
724 |         )
725 |         entity2 = Entity(
726 |             project_id=entity_repository.project_id,
727 |             title="Entity 2",
728 |             entity_type="test",
729 |             permalink="test/entity2",
730 |             file_path="test/entity2.md",
731 |             content_type="text/markdown",
732 |             created_at=datetime.now(timezone.utc),
733 |             updated_at=datetime.now(timezone.utc),
734 |         )
735 |         session.add_all([entity1, entity2])
736 |         await session.flush()
737 | 
738 |         # Add observations to entity1
739 |         observation = Observation(
740 |             entity_id=entity1.id,
741 |             content="Test observation",
742 |             category="note",
743 |         )
744 |         session.add(observation)
745 | 
746 |         # Add relation between entities
747 |         relation = Relation(
748 |             from_id=entity1.id,
749 |             to_id=entity2.id,
750 |             to_name=entity2.title,
751 |             relation_type="relates_to",
752 |         )
753 |         session.add(relation)
754 |         await session.flush()
755 | 
756 |     # Get all file paths - should be fast and not load relationships
757 |     file_paths = await entity_repository.get_all_file_paths()
758 | 
759 |     # Verify results - just file paths, no entities or relationships loaded
760 |     assert len(file_paths) == 2
761 |     assert set(file_paths) == {"test/entity1.md", "test/entity2.md"}
762 | 
763 |     # Result should be list of strings, not entity objects
764 |     for path in file_paths:
765 |         assert isinstance(path, str)
766 | 
767 | 
768 | @pytest.mark.asyncio
769 | async def test_get_all_file_paths_project_isolation(
770 |     entity_repository: EntityRepository, session_maker
771 | ):
772 |     """Test that get_all_file_paths only returns paths from the current project."""
773 |     # Create entities in the repository's project
774 |     async with db.scoped_session(session_maker) as session:
775 |         entity1 = Entity(
776 |             project_id=entity_repository.project_id,
777 |             title="Project 1 File",
778 |             entity_type="test",
779 |             permalink="test/file1",
780 |             file_path="test/file1.md",
781 |             content_type="text/markdown",
782 |             created_at=datetime.now(timezone.utc),
783 |             updated_at=datetime.now(timezone.utc),
784 |         )
785 |         session.add(entity1)
786 |         await session.flush()
787 | 
788 |         # Create a second project
789 |         project2 = Project(name="other-project", path="/tmp/other")
790 |         session.add(project2)
791 |         await session.flush()
792 | 
793 |         # Create entity in different project
794 |         entity2 = Entity(
795 |             project_id=project2.id,
796 |             title="Project 2 File",
797 |             entity_type="test",
798 |             permalink="test/file2",
799 |             file_path="test/file2.md",
800 |             content_type="text/markdown",
801 |             created_at=datetime.now(timezone.utc),
802 |             updated_at=datetime.now(timezone.utc),
803 |         )
804 |         session.add(entity2)
805 |         await session.flush()
806 | 
807 |     # Get all file paths for project 1
808 |     file_paths = await entity_repository.get_all_file_paths()
809 | 
810 |     # Should only include files from project 1
811 |     assert len(file_paths) == 1
812 |     assert file_paths == ["test/file1.md"]
813 | 
```