This is page 16 of 23. Use http://codebase.md/basicmachines-co/basic-memory?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .claude
│ ├── agents
│ │ ├── python-developer.md
│ │ └── system-architect.md
│ └── commands
│ ├── release
│ │ ├── beta.md
│ │ ├── changelog.md
│ │ ├── release-check.md
│ │ └── release.md
│ ├── spec.md
│ └── test-live.md
├── .dockerignore
├── .github
│ ├── dependabot.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── bug_report.md
│ │ ├── config.yml
│ │ ├── documentation.md
│ │ └── feature_request.md
│ └── workflows
│ ├── claude-code-review.yml
│ ├── claude-issue-triage.yml
│ ├── claude.yml
│ ├── dev-release.yml
│ ├── docker.yml
│ ├── pr-title.yml
│ ├── release.yml
│ └── test.yml
├── .gitignore
├── .python-version
├── CHANGELOG.md
├── CITATION.cff
├── CLA.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── docker-compose.yml
├── Dockerfile
├── docs
│ ├── ai-assistant-guide-extended.md
│ ├── character-handling.md
│ ├── cloud-cli.md
│ └── Docker.md
├── justfile
├── LICENSE
├── llms-install.md
├── pyproject.toml
├── README.md
├── SECURITY.md
├── smithery.yaml
├── specs
│ ├── SPEC-1 Specification-Driven Development Process.md
│ ├── SPEC-10 Unified Deployment Workflow and Event Tracking.md
│ ├── SPEC-11 Basic Memory API Performance Optimization.md
│ ├── SPEC-12 OpenTelemetry Observability.md
│ ├── SPEC-13 CLI Authentication with Subscription Validation.md
│ ├── SPEC-14 Cloud Git Versioning & GitHub Backup.md
│ ├── SPEC-14- Cloud Git Versioning & GitHub Backup.md
│ ├── SPEC-15 Configuration Persistence via Tigris for Cloud Tenants.md
│ ├── SPEC-16 MCP Cloud Service Consolidation.md
│ ├── SPEC-17 Semantic Search with ChromaDB.md
│ ├── SPEC-18 AI Memory Management Tool.md
│ ├── SPEC-19 Sync Performance and Memory Optimization.md
│ ├── SPEC-2 Slash Commands Reference.md
│ ├── SPEC-3 Agent Definitions.md
│ ├── SPEC-4 Notes Web UI Component Architecture.md
│ ├── SPEC-5 CLI Cloud Upload via WebDAV.md
│ ├── SPEC-6 Explicit Project Parameter Architecture.md
│ ├── SPEC-7 POC to spike Tigris Turso for local access to cloud data.md
│ ├── SPEC-8 TigrisFS Integration.md
│ ├── SPEC-9 Multi-Project Bidirectional Sync Architecture.md
│ ├── SPEC-9 Signed Header Tenant Information.md
│ └── SPEC-9-1 Follow-Ups- Conflict, Sync, and Observability.md
├── src
│ └── basic_memory
│ ├── __init__.py
│ ├── alembic
│ │ ├── alembic.ini
│ │ ├── env.py
│ │ ├── migrations.py
│ │ ├── script.py.mako
│ │ └── versions
│ │ ├── 3dae7c7b1564_initial_schema.py
│ │ ├── 502b60eaa905_remove_required_from_entity_permalink.py
│ │ ├── 5fe1ab1ccebe_add_projects_table.py
│ │ ├── 647e7a75e2cd_project_constraint_fix.py
│ │ ├── 9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py
│ │ ├── a1b2c3d4e5f6_fix_project_foreign_keys.py
│ │ ├── b3c3938bacdb_relation_to_name_unique_index.py
│ │ ├── cc7172b46608_update_search_index_schema.py
│ │ └── e7e1f4367280_add_scan_watermark_tracking_to_project.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── app.py
│ │ ├── routers
│ │ │ ├── __init__.py
│ │ │ ├── directory_router.py
│ │ │ ├── importer_router.py
│ │ │ ├── knowledge_router.py
│ │ │ ├── management_router.py
│ │ │ ├── memory_router.py
│ │ │ ├── project_router.py
│ │ │ ├── prompt_router.py
│ │ │ ├── resource_router.py
│ │ │ ├── search_router.py
│ │ │ └── utils.py
│ │ └── template_loader.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── app.py
│ │ ├── auth.py
│ │ ├── commands
│ │ │ ├── __init__.py
│ │ │ ├── cloud
│ │ │ │ ├── __init__.py
│ │ │ │ ├── api_client.py
│ │ │ │ ├── bisync_commands.py
│ │ │ │ ├── cloud_utils.py
│ │ │ │ ├── core_commands.py
│ │ │ │ ├── mount_commands.py
│ │ │ │ ├── rclone_config.py
│ │ │ │ ├── rclone_installer.py
│ │ │ │ ├── upload_command.py
│ │ │ │ └── upload.py
│ │ │ ├── command_utils.py
│ │ │ ├── db.py
│ │ │ ├── import_chatgpt.py
│ │ │ ├── import_claude_conversations.py
│ │ │ ├── import_claude_projects.py
│ │ │ ├── import_memory_json.py
│ │ │ ├── mcp.py
│ │ │ ├── project.py
│ │ │ ├── status.py
│ │ │ ├── sync.py
│ │ │ └── tool.py
│ │ └── main.py
│ ├── config.py
│ ├── db.py
│ ├── deps.py
│ ├── file_utils.py
│ ├── ignore_utils.py
│ ├── importers
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── chatgpt_importer.py
│ │ ├── claude_conversations_importer.py
│ │ ├── claude_projects_importer.py
│ │ ├── memory_json_importer.py
│ │ └── utils.py
│ ├── markdown
│ │ ├── __init__.py
│ │ ├── entity_parser.py
│ │ ├── markdown_processor.py
│ │ ├── plugins.py
│ │ ├── schemas.py
│ │ └── utils.py
│ ├── mcp
│ │ ├── __init__.py
│ │ ├── async_client.py
│ │ ├── project_context.py
│ │ ├── prompts
│ │ │ ├── __init__.py
│ │ │ ├── ai_assistant_guide.py
│ │ │ ├── continue_conversation.py
│ │ │ ├── recent_activity.py
│ │ │ ├── search.py
│ │ │ └── utils.py
│ │ ├── resources
│ │ │ ├── ai_assistant_guide.md
│ │ │ └── project_info.py
│ │ ├── server.py
│ │ └── tools
│ │ ├── __init__.py
│ │ ├── build_context.py
│ │ ├── canvas.py
│ │ ├── chatgpt_tools.py
│ │ ├── delete_note.py
│ │ ├── edit_note.py
│ │ ├── list_directory.py
│ │ ├── move_note.py
│ │ ├── project_management.py
│ │ ├── read_content.py
│ │ ├── read_note.py
│ │ ├── recent_activity.py
│ │ ├── search.py
│ │ ├── utils.py
│ │ ├── view_note.py
│ │ └── write_note.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── knowledge.py
│ │ ├── project.py
│ │ └── search.py
│ ├── repository
│ │ ├── __init__.py
│ │ ├── entity_repository.py
│ │ ├── observation_repository.py
│ │ ├── project_info_repository.py
│ │ ├── project_repository.py
│ │ ├── relation_repository.py
│ │ ├── repository.py
│ │ └── search_repository.py
│ ├── schemas
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cloud.py
│ │ ├── delete.py
│ │ ├── directory.py
│ │ ├── importer.py
│ │ ├── memory.py
│ │ ├── project_info.py
│ │ ├── prompt.py
│ │ ├── request.py
│ │ ├── response.py
│ │ ├── search.py
│ │ └── sync_report.py
│ ├── services
│ │ ├── __init__.py
│ │ ├── context_service.py
│ │ ├── directory_service.py
│ │ ├── entity_service.py
│ │ ├── exceptions.py
│ │ ├── file_service.py
│ │ ├── initialization.py
│ │ ├── link_resolver.py
│ │ ├── project_service.py
│ │ ├── search_service.py
│ │ └── service.py
│ ├── sync
│ │ ├── __init__.py
│ │ ├── background_sync.py
│ │ ├── sync_service.py
│ │ └── watch_service.py
│ ├── templates
│ │ └── prompts
│ │ ├── continue_conversation.hbs
│ │ └── search.hbs
│ └── utils.py
├── test-int
│ ├── BENCHMARKS.md
│ ├── cli
│ │ ├── test_project_commands_integration.py
│ │ ├── test_sync_commands_integration.py
│ │ └── test_version_integration.py
│ ├── conftest.py
│ ├── mcp
│ │ ├── test_build_context_underscore.py
│ │ ├── test_build_context_validation.py
│ │ ├── test_chatgpt_tools_integration.py
│ │ ├── test_default_project_mode_integration.py
│ │ ├── test_delete_note_integration.py
│ │ ├── test_edit_note_integration.py
│ │ ├── test_list_directory_integration.py
│ │ ├── test_move_note_integration.py
│ │ ├── test_project_management_integration.py
│ │ ├── test_project_state_sync_integration.py
│ │ ├── test_read_content_integration.py
│ │ ├── test_read_note_integration.py
│ │ ├── test_search_integration.py
│ │ ├── test_single_project_mcp_integration.py
│ │ └── test_write_note_integration.py
│ ├── test_db_wal_mode.py
│ ├── test_disable_permalinks_integration.py
│ └── test_sync_performance_benchmark.py
├── tests
│ ├── __init__.py
│ ├── api
│ │ ├── conftest.py
│ │ ├── test_async_client.py
│ │ ├── test_continue_conversation_template.py
│ │ ├── test_directory_router.py
│ │ ├── test_importer_router.py
│ │ ├── test_knowledge_router.py
│ │ ├── test_management_router.py
│ │ ├── test_memory_router.py
│ │ ├── test_project_router_operations.py
│ │ ├── test_project_router.py
│ │ ├── test_prompt_router.py
│ │ ├── test_relation_background_resolution.py
│ │ ├── test_resource_router.py
│ │ ├── test_search_router.py
│ │ ├── test_search_template.py
│ │ ├── test_template_loader_helpers.py
│ │ └── test_template_loader.py
│ ├── cli
│ │ ├── conftest.py
│ │ ├── test_bisync_commands.py
│ │ ├── test_cli_tools.py
│ │ ├── test_cloud_authentication.py
│ │ ├── test_cloud_utils.py
│ │ ├── test_ignore_utils.py
│ │ ├── test_import_chatgpt.py
│ │ ├── test_import_claude_conversations.py
│ │ ├── test_import_claude_projects.py
│ │ ├── test_import_memory_json.py
│ │ └── test_upload.py
│ ├── conftest.py
│ ├── db
│ │ └── test_issue_254_foreign_key_constraints.py
│ ├── importers
│ │ ├── test_importer_base.py
│ │ └── test_importer_utils.py
│ ├── markdown
│ │ ├── __init__.py
│ │ ├── test_date_frontmatter_parsing.py
│ │ ├── test_entity_parser_error_handling.py
│ │ ├── test_entity_parser.py
│ │ ├── test_markdown_plugins.py
│ │ ├── test_markdown_processor.py
│ │ ├── test_observation_edge_cases.py
│ │ ├── test_parser_edge_cases.py
│ │ ├── test_relation_edge_cases.py
│ │ └── test_task_detection.py
│ ├── mcp
│ │ ├── conftest.py
│ │ ├── test_obsidian_yaml_formatting.py
│ │ ├── test_permalink_collision_file_overwrite.py
│ │ ├── test_prompts.py
│ │ ├── test_resources.py
│ │ ├── test_tool_build_context.py
│ │ ├── test_tool_canvas.py
│ │ ├── test_tool_delete_note.py
│ │ ├── test_tool_edit_note.py
│ │ ├── test_tool_list_directory.py
│ │ ├── test_tool_move_note.py
│ │ ├── test_tool_read_content.py
│ │ ├── test_tool_read_note.py
│ │ ├── test_tool_recent_activity.py
│ │ ├── test_tool_resource.py
│ │ ├── test_tool_search.py
│ │ ├── test_tool_utils.py
│ │ ├── test_tool_view_note.py
│ │ ├── test_tool_write_note.py
│ │ └── tools
│ │ └── test_chatgpt_tools.py
│ ├── Non-MarkdownFileSupport.pdf
│ ├── repository
│ │ ├── test_entity_repository_upsert.py
│ │ ├── test_entity_repository.py
│ │ ├── test_entity_upsert_issue_187.py
│ │ ├── test_observation_repository.py
│ │ ├── test_project_info_repository.py
│ │ ├── test_project_repository.py
│ │ ├── test_relation_repository.py
│ │ ├── test_repository.py
│ │ ├── test_search_repository_edit_bug_fix.py
│ │ └── test_search_repository.py
│ ├── schemas
│ │ ├── test_base_timeframe_minimum.py
│ │ ├── test_memory_serialization.py
│ │ ├── test_memory_url_validation.py
│ │ ├── test_memory_url.py
│ │ ├── test_schemas.py
│ │ └── test_search.py
│ ├── Screenshot.png
│ ├── services
│ │ ├── test_context_service.py
│ │ ├── test_directory_service.py
│ │ ├── test_entity_service_disable_permalinks.py
│ │ ├── test_entity_service.py
│ │ ├── test_file_service.py
│ │ ├── test_initialization.py
│ │ ├── test_link_resolver.py
│ │ ├── test_project_removal_bug.py
│ │ ├── test_project_service_operations.py
│ │ ├── test_project_service.py
│ │ └── test_search_service.py
│ ├── sync
│ │ ├── test_character_conflicts.py
│ │ ├── test_sync_service_incremental.py
│ │ ├── test_sync_service.py
│ │ ├── test_sync_wikilink_issue.py
│ │ ├── test_tmp_files.py
│ │ ├── test_watch_service_edge_cases.py
│ │ ├── test_watch_service_reload.py
│ │ └── test_watch_service.py
│ ├── test_config.py
│ ├── test_db_migration_deduplication.py
│ ├── test_deps.py
│ ├── test_production_cascade_delete.py
│ └── utils
│ ├── test_file_utils.py
│ ├── test_frontmatter_obsidian_compatible.py
│ ├── test_parse_tags.py
│ ├── test_permalink_formatting.py
│ ├── test_utf8_handling.py
│ └── test_validate_project_path.py
├── uv.lock
├── v0.15.0-RELEASE-DOCS.md
└── v15-docs
├── api-performance.md
├── background-relations.md
├── basic-memory-home.md
├── bug-fixes.md
├── chatgpt-integration.md
├── cloud-authentication.md
├── cloud-bisync.md
├── cloud-mode-usage.md
├── cloud-mount.md
├── default-project-mode.md
├── env-file-removal.md
├── env-var-overrides.md
├── explicit-project-parameter.md
├── gitignore-integration.md
├── project-root-env-var.md
├── README.md
└── sqlite-performance.md
```
# Files
--------------------------------------------------------------------------------
/tests/services/test_search_service.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for search service."""
2 |
3 | from datetime import datetime
4 |
5 | import pytest
6 | from sqlalchemy import text
7 |
8 | from basic_memory import db
9 | from basic_memory.schemas.search import SearchQuery, SearchItemType
10 |
11 |
12 | @pytest.mark.asyncio
13 | async def test_search_permalink(search_service, test_graph):
14 | """Exact permalink"""
15 | results = await search_service.search(SearchQuery(permalink="test/root"))
16 | assert len(results) == 1
17 |
18 | for r in results:
19 | assert "test/root" in r.permalink
20 |
21 |
22 | @pytest.mark.asyncio
23 | async def test_search_limit_offset(search_service, test_graph):
24 | """Exact permalink"""
25 | results = await search_service.search(SearchQuery(permalink_match="test/*"))
26 | assert len(results) > 1
27 |
28 | results = await search_service.search(SearchQuery(permalink_match="test/*"), limit=1)
29 | assert len(results) == 1
30 |
31 | results = await search_service.search(SearchQuery(permalink_match="test/*"), limit=100)
32 | num_results = len(results)
33 |
34 | # assert offset
35 | offset_results = await search_service.search(
36 | SearchQuery(permalink_match="test/*"), limit=100, offset=1
37 | )
38 | assert len(offset_results) == num_results - 1
39 |
40 |
41 | @pytest.mark.asyncio
42 | async def test_search_permalink_observations_wildcard(search_service, test_graph):
43 | """Pattern matching"""
44 | results = await search_service.search(SearchQuery(permalink_match="test/root/observations/*"))
45 | assert len(results) == 2
46 | permalinks = {r.permalink for r in results}
47 | assert "test/root/observations/note/root-note-1" in permalinks
48 | assert "test/root/observations/tech/root-tech-note" in permalinks
49 |
50 |
51 | @pytest.mark.asyncio
52 | async def test_search_permalink_relation_wildcard(search_service, test_graph):
53 | """Pattern matching"""
54 | results = await search_service.search(SearchQuery(permalink_match="test/root/connects-to/*"))
55 | assert len(results) == 1
56 | permalinks = {r.permalink for r in results}
57 | assert "test/root/connects-to/test/connected-entity-1" in permalinks
58 |
59 |
60 | @pytest.mark.asyncio
61 | async def test_search_permalink_wildcard2(search_service, test_graph):
62 | """Pattern matching"""
63 | results = await search_service.search(
64 | SearchQuery(
65 | permalink_match="test/connected*",
66 | )
67 | )
68 | assert len(results) >= 2
69 | permalinks = {r.permalink for r in results}
70 | assert "test/connected-entity-1" in permalinks
71 | assert "test/connected-entity-2" in permalinks
72 |
73 |
74 | @pytest.mark.asyncio
75 | async def test_search_text(search_service, test_graph):
76 | """Full-text search"""
77 | results = await search_service.search(
78 | SearchQuery(text="Root Entity", entity_types=[SearchItemType.ENTITY])
79 | )
80 | assert len(results) >= 1
81 | assert results[0].permalink == "test/root"
82 |
83 |
84 | @pytest.mark.asyncio
85 | async def test_search_title(search_service, test_graph):
86 | """Title only search"""
87 | results = await search_service.search(
88 | SearchQuery(title="Root", entity_types=[SearchItemType.ENTITY])
89 | )
90 | assert len(results) >= 1
91 | assert results[0].permalink == "test/root"
92 |
93 |
94 | @pytest.mark.asyncio
95 | async def test_text_search_case_insensitive(search_service, test_graph):
96 | """Test text search functionality."""
97 | # Case insensitive
98 | results = await search_service.search(SearchQuery(text="ENTITY"))
99 | assert any("test/root" in r.permalink for r in results)
100 |
101 |
102 | @pytest.mark.asyncio
103 | async def test_text_search_content_word_match(search_service, test_graph):
104 | """Test text search functionality."""
105 |
106 | # content word match
107 | results = await search_service.search(SearchQuery(text="Connected"))
108 | assert len(results) > 0
109 | assert any(r.file_path == "test/Connected Entity 2.md" for r in results)
110 |
111 |
112 | @pytest.mark.asyncio
113 | async def test_text_search_multiple_terms(search_service, test_graph):
114 | """Test text search functionality."""
115 |
116 | # Multiple terms
117 | results = await search_service.search(SearchQuery(text="root note"))
118 | assert any("test/root" in r.permalink for r in results)
119 |
120 |
121 | @pytest.mark.asyncio
122 | async def test_pattern_matching(search_service, test_graph):
123 | """Test pattern matching with various wildcards."""
124 | # Test wildcards
125 | results = await search_service.search(SearchQuery(permalink_match="test/*"))
126 | for r in results:
127 | assert "test/" in r.permalink
128 |
129 | # Test start wildcards
130 | results = await search_service.search(SearchQuery(permalink_match="*/observations"))
131 | for r in results:
132 | assert "/observations" in r.permalink
133 |
134 | # Test permalink partial match
135 | results = await search_service.search(SearchQuery(permalink_match="test"))
136 | for r in results:
137 | assert "test/" in r.permalink
138 |
139 |
140 | @pytest.mark.asyncio
141 | async def test_filters(search_service, test_graph):
142 | """Test search filters."""
143 | # Combined filters
144 | results = await search_service.search(
145 | SearchQuery(text="Deep", entity_types=[SearchItemType.ENTITY], types=["deep"])
146 | )
147 | assert len(results) == 1
148 | for r in results:
149 | assert r.type == SearchItemType.ENTITY
150 | assert r.metadata.get("entity_type") == "deep"
151 |
152 |
153 | @pytest.mark.asyncio
154 | async def test_after_date(search_service, test_graph):
155 | """Test search filters."""
156 |
157 | # Should find with past date
158 | past_date = datetime(2020, 1, 1).astimezone()
159 | results = await search_service.search(
160 | SearchQuery(
161 | text="entity",
162 | after_date=past_date.isoformat(),
163 | )
164 | )
165 | for r in results:
166 | assert datetime.fromisoformat(r.created_at) > past_date
167 |
168 | # Should not find with future date
169 | future_date = datetime(2030, 1, 1).astimezone()
170 | results = await search_service.search(
171 | SearchQuery(
172 | text="entity",
173 | after_date=future_date.isoformat(),
174 | )
175 | )
176 | assert len(results) == 0
177 |
178 |
179 | @pytest.mark.asyncio
180 | async def test_search_type(search_service, test_graph):
181 | """Test search filters."""
182 |
183 | # Should find only type
184 | results = await search_service.search(SearchQuery(types=["test"]))
185 | assert len(results) > 0
186 | for r in results:
187 | assert r.type == SearchItemType.ENTITY
188 |
189 |
190 | @pytest.mark.asyncio
191 | async def test_search_entity_type(search_service, test_graph):
192 | """Test search filters."""
193 |
194 | # Should find only type
195 | results = await search_service.search(SearchQuery(entity_types=[SearchItemType.ENTITY]))
196 | assert len(results) > 0
197 | for r in results:
198 | assert r.type == SearchItemType.ENTITY
199 |
200 |
201 | @pytest.mark.asyncio
202 | async def test_extract_entity_tags_exception_handling(search_service):
203 | """Test the _extract_entity_tags method exception handling (lines 147-151)."""
204 | from basic_memory.models.knowledge import Entity
205 |
206 | # Create entity with string tags that will cause parsing to fail and fall back to single tag
207 | entity_with_invalid_tags = Entity(
208 | title="Test Entity",
209 | entity_type="test",
210 | entity_metadata={"tags": "just a string"}, # This will fail ast.literal_eval
211 | content_type="text/markdown",
212 | file_path="test/test-entity.md",
213 | project_id=1,
214 | )
215 |
216 | # This should trigger the except block on lines 147-149
217 | result = search_service._extract_entity_tags(entity_with_invalid_tags)
218 | assert result == ["just a string"]
219 |
220 | # Test with empty string (should return empty list) - covers line 149
221 | entity_with_empty_tags = Entity(
222 | title="Test Entity Empty",
223 | entity_type="test",
224 | entity_metadata={"tags": ""},
225 | content_type="text/markdown",
226 | file_path="test/test-entity-empty.md",
227 | project_id=1,
228 | )
229 |
230 | result = search_service._extract_entity_tags(entity_with_empty_tags)
231 | assert result == []
232 |
233 |
234 | @pytest.mark.asyncio
235 | async def test_delete_entity_without_permalink(search_service, sample_entity):
236 | """Test deleting an entity that has no permalink (edge case)."""
237 |
238 | # Set the entity permalink to None to trigger the else branch on line 355
239 | sample_entity.permalink = None
240 |
241 | # This should trigger the delete_by_entity_id path (line 355) in handle_delete
242 | await search_service.handle_delete(sample_entity)
243 |
244 |
245 | @pytest.mark.asyncio
246 | async def test_no_criteria(search_service, test_graph):
247 | """Test search with no criteria returns empty list."""
248 | results = await search_service.search(SearchQuery())
249 | assert len(results) == 0
250 |
251 |
252 | @pytest.mark.asyncio
253 | async def test_init_search_index(search_service, session_maker):
254 | """Test search index initialization."""
255 | async with db.scoped_session(session_maker) as session:
256 | result = await session.execute(
257 | text("SELECT name FROM sqlite_master WHERE type='table' AND name='search_index';")
258 | )
259 | assert result.scalar() == "search_index"
260 |
261 |
262 | @pytest.mark.asyncio
263 | async def test_update_index(search_service, full_entity):
264 | """Test updating indexed content."""
265 | await search_service.index_entity(full_entity)
266 |
267 | # Update entity
268 | full_entity.title = "OMG I AM UPDATED"
269 | await search_service.index_entity(full_entity)
270 |
271 | # Search for new title
272 | results = await search_service.search(SearchQuery(text="OMG I AM UPDATED"))
273 | assert len(results) > 1
274 |
275 |
276 | @pytest.mark.asyncio
277 | async def test_boolean_and_search(search_service, test_graph):
278 | """Test boolean AND search."""
279 | # Create an entity with specific terms for testing
280 | # This assumes the test_graph fixture already has entities with relevant terms
281 |
282 | # Test AND operator - both terms must be present
283 | results = await search_service.search(SearchQuery(text="Root AND Entity"))
284 | assert len(results) >= 1
285 |
286 | # Verify the result contains both terms
287 | found = False
288 | for result in results:
289 | if (result.title and "Root" in result.title and "Entity" in result.title) or (
290 | result.content_snippet
291 | and "Root" in result.content_snippet
292 | and "Entity" in result.content_snippet
293 | ):
294 | found = True
295 | break
296 | assert found, "Boolean AND search failed to find items containing both terms"
297 |
298 | # Verify that items with only one term are not returned
299 | results = await search_service.search(SearchQuery(text="NonexistentTerm AND Root"))
300 | assert len(results) == 0, "Boolean AND search returned results when it shouldn't have"
301 |
302 |
303 | @pytest.mark.asyncio
304 | async def test_boolean_or_search(search_service, test_graph):
305 | """Test boolean OR search."""
306 | # Test OR operator - either term can be present
307 | results = await search_service.search(SearchQuery(text="Root OR Connected"))
308 |
309 | # Should find both "Root Entity" and "Connected Entity"
310 | assert len(results) >= 2
311 |
312 | # Verify we find items with either term
313 | root_found = False
314 | connected_found = False
315 |
316 | for result in results:
317 | if result.permalink == "test/root":
318 | root_found = True
319 | elif "connected" in result.permalink.lower():
320 | connected_found = True
321 |
322 | assert root_found, "Boolean OR search failed to find 'Root' term"
323 | assert connected_found, "Boolean OR search failed to find 'Connected' term"
324 |
325 |
326 | @pytest.mark.asyncio
327 | async def test_boolean_not_search(search_service, test_graph):
328 | """Test boolean NOT search."""
329 | # Test NOT operator - exclude certain terms
330 | results = await search_service.search(SearchQuery(text="Entity NOT Connected"))
331 |
332 | # Should find "Root Entity" but not "Connected Entity"
333 | for result in results:
334 | assert "connected" not in result.permalink.lower(), (
335 | "Boolean NOT search returned excluded term"
336 | )
337 |
338 |
339 | @pytest.mark.asyncio
340 | async def test_boolean_group_search(search_service, test_graph):
341 | """Test boolean grouping with parentheses."""
342 | # Test grouping - (A OR B) AND C
343 | results = await search_service.search(SearchQuery(title="(Root OR Connected) AND Entity"))
344 |
345 | # Should find both entities that contain "Entity" and either "Root" or "Connected"
346 | assert len(results) >= 2
347 |
348 | for result in results:
349 | # Each result should contain "Entity" and either "Root" or "Connected"
350 | contains_entity = "entity" in result.title.lower()
351 | contains_root_or_connected = (
352 | "root" in result.title.lower() or "connected" in result.title.lower()
353 | )
354 |
355 | assert contains_entity and contains_root_or_connected, (
356 | "Boolean grouped search returned incorrect results"
357 | )
358 |
359 |
360 | @pytest.mark.asyncio
361 | async def test_boolean_operators_detection(search_service):
362 | """Test detection of boolean operators in query."""
363 | # Test various queries that should be detected as boolean
364 | boolean_queries = [
365 | "term1 AND term2",
366 | "term1 OR term2",
367 | "term1 NOT term2",
368 | "(term1 OR term2) AND term3",
369 | "complex (nested OR grouping) AND term",
370 | ]
371 |
372 | for query_text in boolean_queries:
373 | query = SearchQuery(text=query_text)
374 | assert query.has_boolean_operators(), f"Failed to detect boolean operators in: {query_text}"
375 |
376 | # Test queries that should not be detected as boolean
377 | non_boolean_queries = [
378 | "normal search query",
379 | "brand name", # Should not detect "AND" within "brand"
380 | "understand this concept", # Should not detect "AND" within "understand"
381 | "command line",
382 | "sandbox testing",
383 | ]
384 |
385 | for query_text in non_boolean_queries:
386 | query = SearchQuery(text=query_text)
387 | assert not query.has_boolean_operators(), (
388 | f"Incorrectly detected boolean operators in: {query_text}"
389 | )
390 |
391 |
392 | # Tests for frontmatter tag search functionality
393 |
394 |
395 | @pytest.mark.asyncio
396 | async def test_extract_entity_tags_list_format(search_service, session_maker):
397 | """Test tag extraction from list format in entity metadata."""
398 | from basic_memory.models import Entity
399 |
400 | entity = Entity(
401 | title="Test Entity",
402 | entity_type="note",
403 | entity_metadata={"tags": ["business", "strategy", "planning"]},
404 | content_type="text/markdown",
405 | file_path="test/business-strategy.md",
406 | project_id=1,
407 | )
408 |
409 | tags = search_service._extract_entity_tags(entity)
410 | assert tags == ["business", "strategy", "planning"]
411 |
412 |
413 | @pytest.mark.asyncio
414 | async def test_extract_entity_tags_string_format(search_service, session_maker):
415 | """Test tag extraction from string format in entity metadata."""
416 | from basic_memory.models import Entity
417 |
418 | entity = Entity(
419 | title="Test Entity",
420 | entity_type="note",
421 | entity_metadata={"tags": "['documentation', 'tools', 'best-practices']"},
422 | content_type="text/markdown",
423 | file_path="test/docs.md",
424 | project_id=1,
425 | )
426 |
427 | tags = search_service._extract_entity_tags(entity)
428 | assert tags == ["documentation", "tools", "best-practices"]
429 |
430 |
431 | @pytest.mark.asyncio
432 | async def test_extract_entity_tags_empty_list(search_service, session_maker):
433 | """Test tag extraction from empty list in entity metadata."""
434 | from basic_memory.models import Entity
435 |
436 | entity = Entity(
437 | title="Test Entity",
438 | entity_type="note",
439 | entity_metadata={"tags": []},
440 | content_type="text/markdown",
441 | file_path="test/empty-tags.md",
442 | project_id=1,
443 | )
444 |
445 | tags = search_service._extract_entity_tags(entity)
446 | assert tags == []
447 |
448 |
449 | @pytest.mark.asyncio
450 | async def test_extract_entity_tags_empty_string(search_service, session_maker):
451 | """Test tag extraction from empty string in entity metadata."""
452 | from basic_memory.models import Entity
453 |
454 | entity = Entity(
455 | title="Test Entity",
456 | entity_type="note",
457 | entity_metadata={"tags": "[]"},
458 | content_type="text/markdown",
459 | file_path="test/empty-string-tags.md",
460 | project_id=1,
461 | )
462 |
463 | tags = search_service._extract_entity_tags(entity)
464 | assert tags == []
465 |
466 |
467 | @pytest.mark.asyncio
468 | async def test_extract_entity_tags_no_metadata(search_service, session_maker):
469 | """Test tag extraction when entity has no metadata."""
470 | from basic_memory.models import Entity
471 |
472 | entity = Entity(
473 | title="Test Entity",
474 | entity_type="note",
475 | entity_metadata=None,
476 | content_type="text/markdown",
477 | file_path="test/no-metadata.md",
478 | project_id=1,
479 | )
480 |
481 | tags = search_service._extract_entity_tags(entity)
482 | assert tags == []
483 |
484 |
485 | @pytest.mark.asyncio
486 | async def test_extract_entity_tags_no_tags_key(search_service, session_maker):
487 | """Test tag extraction when metadata exists but has no tags key."""
488 | from basic_memory.models import Entity
489 |
490 | entity = Entity(
491 | title="Test Entity",
492 | entity_type="note",
493 | entity_metadata={"title": "Some Title", "type": "note"},
494 | content_type="text/markdown",
495 | file_path="test/no-tags-key.md",
496 | project_id=1,
497 | )
498 |
499 | tags = search_service._extract_entity_tags(entity)
500 | assert tags == []
501 |
502 |
503 | @pytest.mark.asyncio
504 | async def test_search_by_frontmatter_tags(search_service, session_maker, test_project):
505 | """Test that entities can be found by searching for their frontmatter tags."""
506 | from basic_memory.repository import EntityRepository
507 | from unittest.mock import AsyncMock
508 |
509 | entity_repo = EntityRepository(session_maker, project_id=test_project.id)
510 |
511 | # Create entity with tags
512 | from datetime import datetime
513 |
514 | entity_data = {
515 | "title": "Business Strategy Guide",
516 | "entity_type": "note",
517 | "entity_metadata": {"tags": ["business", "strategy", "planning", "organization"]},
518 | "content_type": "text/markdown",
519 | "file_path": "guides/business-strategy.md",
520 | "permalink": "guides/business-strategy",
521 | "project_id": test_project.id,
522 | "created_at": datetime.now(),
523 | "updated_at": datetime.now(),
524 | }
525 |
526 | entity = await entity_repo.create(entity_data)
527 |
528 | # Mock file service to avoid file I/O
529 | search_service.file_service.read_entity_content = AsyncMock(return_value="")
530 |
531 | await search_service.index_entity(entity)
532 |
533 | # Search for entities by tag
534 | results = await search_service.search(SearchQuery(text="business"))
535 | assert len(results) >= 1
536 |
537 | # Check that our entity is in the results
538 | entity_found = False
539 | for result in results:
540 | if result.title == "Business Strategy Guide":
541 | entity_found = True
542 | break
543 | assert entity_found, "Entity with 'business' tag should be found in search results"
544 |
545 | # Test searching by another tag
546 | results = await search_service.search(SearchQuery(text="planning"))
547 | assert len(results) >= 1
548 |
549 | entity_found = False
550 | for result in results:
551 | if result.title == "Business Strategy Guide":
552 | entity_found = True
553 | break
554 | assert entity_found, "Entity with 'planning' tag should be found in search results"
555 |
556 |
557 | @pytest.mark.asyncio
558 | async def test_search_by_frontmatter_tags_string_format(
559 | search_service, session_maker, test_project
560 | ):
561 | """Test that entities with string format tags can be found in search."""
562 | from basic_memory.repository import EntityRepository
563 | from unittest.mock import AsyncMock
564 |
565 | entity_repo = EntityRepository(session_maker, project_id=test_project.id)
566 |
567 | # Create entity with tags in string format
568 | from datetime import datetime
569 |
570 | entity_data = {
571 | "title": "Documentation Guidelines",
572 | "entity_type": "note",
573 | "entity_metadata": {"tags": "['documentation', 'tools', 'best-practices']"},
574 | "content_type": "text/markdown",
575 | "file_path": "guides/documentation.md",
576 | "permalink": "guides/documentation",
577 | "project_id": test_project.id,
578 | "created_at": datetime.now(),
579 | "updated_at": datetime.now(),
580 | }
581 |
582 | entity = await entity_repo.create(entity_data)
583 |
584 | # Mock file service to avoid file I/O
585 | search_service.file_service.read_entity_content = AsyncMock(return_value="")
586 |
587 | await search_service.index_entity(entity)
588 |
589 | # Search for entities by tag
590 | results = await search_service.search(SearchQuery(text="documentation"))
591 | assert len(results) >= 1
592 |
593 | # Check that our entity is in the results
594 | entity_found = False
595 | for result in results:
596 | if result.title == "Documentation Guidelines":
597 | entity_found = True
598 | break
599 | assert entity_found, "Entity with 'documentation' tag should be found in search results"
600 |
601 |
602 | @pytest.mark.asyncio
603 | async def test_search_special_characters_in_title(search_service, session_maker, test_project):
604 | """Test that entities with special characters in titles can be searched without FTS5 syntax errors."""
605 | from basic_memory.repository import EntityRepository
606 | from unittest.mock import AsyncMock
607 |
608 | entity_repo = EntityRepository(session_maker, project_id=test_project.id)
609 |
610 | # Create entities with special characters that could cause FTS5 syntax errors
611 | special_titles = [
612 | "Note with spaces",
613 | "Note-with-dashes",
614 | "Note_with_underscores",
615 | "Note (with parentheses)", # This is the problematic one
616 | "Note & Symbols!",
617 | "Note [with brackets]",
618 | "Note {with braces}",
619 | 'Note "with quotes"',
620 | "Note 'with apostrophes'",
621 | ]
622 |
623 | entities = []
624 | for i, title in enumerate(special_titles):
625 | from datetime import datetime
626 |
627 | entity_data = {
628 | "title": title,
629 | "entity_type": "note",
630 | "entity_metadata": {"tags": ["special", "characters"]},
631 | "content_type": "text/markdown",
632 | "file_path": f"special/{title}.md",
633 | "permalink": f"special/note-{i}",
634 | "project_id": test_project.id,
635 | "created_at": datetime.now(),
636 | "updated_at": datetime.now(),
637 | }
638 |
639 | entity = await entity_repo.create(entity_data)
640 | entities.append(entity)
641 |
642 | # Mock file service to avoid file I/O
643 | search_service.file_service.read_entity_content = AsyncMock(return_value="")
644 |
645 | # Index all entities
646 | for entity in entities:
647 | await search_service.index_entity(entity)
648 |
649 | # Test searching for each title - this should not cause FTS5 syntax errors
650 | for title in special_titles:
651 | results = await search_service.search(SearchQuery(title=title))
652 |
653 | # Should find the entity without throwing FTS5 syntax errors
654 | entity_found = False
655 | for result in results:
656 | if result.title == title:
657 | entity_found = True
658 | break
659 |
660 | assert entity_found, f"Entity with title '{title}' should be found in search results"
661 |
662 |
663 | @pytest.mark.asyncio
664 | async def test_search_title_with_parentheses_specific(search_service, session_maker, test_project):
665 | """Test searching specifically for title with parentheses to reproduce FTS5 error."""
666 | from basic_memory.repository import EntityRepository
667 | from unittest.mock import AsyncMock
668 |
669 | entity_repo = EntityRepository(session_maker, project_id=test_project.id)
670 |
671 | # Create the problematic entity
672 | from datetime import datetime
673 |
674 | entity_data = {
675 | "title": "Note (with parentheses)",
676 | "entity_type": "note",
677 | "entity_metadata": {"tags": ["test"]},
678 | "content_type": "text/markdown",
679 | "file_path": "special/Note (with parentheses).md",
680 | "permalink": "special/note-with-parentheses",
681 | "project_id": test_project.id,
682 | "created_at": datetime.now(),
683 | "updated_at": datetime.now(),
684 | }
685 |
686 | entity = await entity_repo.create(entity_data)
687 |
688 | # Mock file service to avoid file I/O
689 | search_service.file_service.read_entity_content = AsyncMock(return_value="")
690 |
691 | # Index the entity
692 | await search_service.index_entity(entity)
693 |
694 | # Test searching for the title - this should not cause FTS5 syntax errors
695 | search_query = SearchQuery(title="Note (with parentheses)")
696 | results = await search_service.search(search_query)
697 |
698 | # Should find the entity without throwing FTS5 syntax errors
699 | assert len(results) >= 1
700 | assert any(result.title == "Note (with parentheses)" for result in results)
701 |
702 |
703 | @pytest.mark.asyncio
704 | async def test_search_title_via_repository_direct(search_service, session_maker, test_project):
705 | """Test searching via search repository directly to isolate the FTS5 error."""
706 | from basic_memory.repository import EntityRepository
707 | from unittest.mock import AsyncMock
708 |
709 | entity_repo = EntityRepository(session_maker, project_id=test_project.id)
710 |
711 | # Create the problematic entity
712 | from datetime import datetime
713 |
714 | entity_data = {
715 | "title": "Note (with parentheses)",
716 | "entity_type": "note",
717 | "entity_metadata": {"tags": ["test"]},
718 | "content_type": "text/markdown",
719 | "file_path": "special/Note (with parentheses).md",
720 | "permalink": "special/note-with-parentheses",
721 | "project_id": test_project.id,
722 | "created_at": datetime.now(),
723 | "updated_at": datetime.now(),
724 | }
725 |
726 | entity = await entity_repo.create(entity_data)
727 |
728 | # Mock file service to avoid file I/O
729 | search_service.file_service.read_entity_content = AsyncMock(return_value="")
730 |
731 | # Index the entity
732 | await search_service.index_entity(entity)
733 |
734 | # Test searching via repository directly - this reproduces the error path
735 | results = await search_service.repository.search(
736 | title="Note (with parentheses)",
737 | limit=10,
738 | offset=0,
739 | )
740 |
741 | # Should find the entity without throwing FTS5 syntax errors
742 | assert len(results) >= 1
743 | assert any(result.title == "Note (with parentheses)" for result in results)
744 |
```
--------------------------------------------------------------------------------
/tests/api/test_project_router.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for the project router API endpoints."""
2 |
3 | import tempfile
4 | from pathlib import Path
5 |
6 | import pytest
7 |
8 | from basic_memory.schemas.project_info import ProjectItem
9 |
10 |
11 | @pytest.mark.asyncio
12 | async def test_get_project_item(test_graph, client, project_config, test_project, project_url):
13 | """Test the project item endpoint returns correctly structured data."""
14 | # Set up some test data in the database
15 |
16 | # Call the endpoint
17 | response = await client.get(f"{project_url}/project/item")
18 |
19 | # Verify response
20 | assert response.status_code == 200
21 | project_info = ProjectItem.model_validate(response.json())
22 | assert project_info.name == test_project.name
23 | assert project_info.path == test_project.path
24 | assert project_info.is_default == test_project.is_default
25 |
26 |
27 | @pytest.mark.asyncio
28 | async def test_get_project_item_not_found(
29 | test_graph, client, project_config, test_project, project_url
30 | ):
31 | """Test the project item endpoint returns correctly structured data."""
32 | # Set up some test data in the database
33 |
34 | # Call the endpoint
35 | response = await client.get("/not-found/project/item")
36 |
37 | # Verify response
38 | assert response.status_code == 404
39 |
40 |
41 | @pytest.mark.asyncio
42 | async def test_get_default_project(test_graph, client, project_config, test_project, project_url):
43 | """Test the default project item endpoint returns the default project."""
44 | # Set up some test data in the database
45 |
46 | # Call the endpoint
47 | response = await client.get("/projects/default")
48 |
49 | # Verify response
50 | assert response.status_code == 200
51 | project_info = ProjectItem.model_validate(response.json())
52 | assert project_info.name == test_project.name
53 | assert project_info.path == test_project.path
54 | assert project_info.is_default == test_project.is_default
55 |
56 |
57 | @pytest.mark.asyncio
58 | async def test_get_project_info_endpoint(test_graph, client, project_config, project_url):
59 | """Test the project-info endpoint returns correctly structured data."""
60 | # Set up some test data in the database
61 |
62 | # Call the endpoint
63 | response = await client.get(f"{project_url}/project/info")
64 |
65 | # Verify response
66 | assert response.status_code == 200
67 | data = response.json()
68 |
69 | # Check top-level keys
70 | assert "project_name" in data
71 | assert "project_path" in data
72 | assert "available_projects" in data
73 | assert "default_project" in data
74 | assert "statistics" in data
75 | assert "activity" in data
76 | assert "system" in data
77 |
78 | # Check statistics
79 | stats = data["statistics"]
80 | assert "total_entities" in stats
81 | assert stats["total_entities"] >= 0
82 | assert "total_observations" in stats
83 | assert stats["total_observations"] >= 0
84 | assert "total_relations" in stats
85 | assert stats["total_relations"] >= 0
86 |
87 | # Check activity
88 | activity = data["activity"]
89 | assert "recently_created" in activity
90 | assert "recently_updated" in activity
91 | assert "monthly_growth" in activity
92 |
93 | # Check system
94 | system = data["system"]
95 | assert "version" in system
96 | assert "database_path" in system
97 | assert "database_size" in system
98 | assert "timestamp" in system
99 |
100 |
101 | @pytest.mark.asyncio
102 | async def test_get_project_info_content(test_graph, client, project_config, project_url):
103 | """Test that project-info contains actual data from the test database."""
104 | # Call the endpoint
105 | response = await client.get(f"{project_url}/project/info")
106 |
107 | # Verify response
108 | assert response.status_code == 200
109 | data = response.json()
110 |
111 | # Check that test_graph content is reflected in statistics
112 | stats = data["statistics"]
113 |
114 | # Our test graph should have at least a few entities
115 | assert stats["total_entities"] > 0
116 |
117 | # It should also have some observations
118 | assert stats["total_observations"] > 0
119 |
120 | # And relations
121 | assert stats["total_relations"] > 0
122 |
123 | # Check that entity types include 'test'
124 | assert "test" in stats["entity_types"] or "entity" in stats["entity_types"]
125 |
126 |
127 | @pytest.mark.asyncio
128 | async def test_list_projects_endpoint(test_config, test_graph, client, project_config, project_url):
129 | """Test the list projects endpoint returns correctly structured data."""
130 | # Call the endpoint
131 | response = await client.get("/projects/projects")
132 |
133 | # Verify response
134 | assert response.status_code == 200
135 | data = response.json()
136 |
137 | # Check that the response contains expected fields
138 | assert "projects" in data
139 | assert "default_project" in data
140 |
141 | # Check that projects is a list
142 | assert isinstance(data["projects"], list)
143 |
144 | # There should be at least one project (the test project)
145 | assert len(data["projects"]) > 0
146 |
147 | # Verify project item structure
148 | if data["projects"]:
149 | project = data["projects"][0]
150 | assert "name" in project
151 | assert "path" in project
152 | assert "is_default" in project
153 |
154 | # Default project should be marked
155 | default_project = next((p for p in data["projects"] if p["is_default"]), None)
156 | assert default_project is not None
157 | assert default_project["name"] == data["default_project"]
158 |
159 |
160 | @pytest.mark.asyncio
161 | async def test_remove_project_endpoint(test_config, client, project_service):
162 | """Test the remove project endpoint."""
163 | # First create a test project to remove
164 | test_project_name = "test-remove-project"
165 | await project_service.add_project(test_project_name, "/tmp/test-remove-project")
166 |
167 | # Verify it exists
168 | project = await project_service.get_project(test_project_name)
169 | assert project is not None
170 |
171 | # Remove the project
172 | response = await client.delete(f"/projects/{test_project_name}")
173 |
174 | # Verify response
175 | assert response.status_code == 200
176 | data = response.json()
177 |
178 | # Check response structure
179 | assert "message" in data
180 | assert "status" in data
181 | assert data["status"] == "success"
182 | assert "old_project" in data
183 | assert data["old_project"]["name"] == test_project_name
184 |
185 | # Verify project is actually removed
186 | removed_project = await project_service.get_project(test_project_name)
187 | assert removed_project is None
188 |
189 |
190 | @pytest.mark.asyncio
191 | async def test_set_default_project_endpoint(test_config, client, project_service):
192 | """Test the set default project endpoint."""
193 | # Create a test project to set as default
194 | test_project_name = "test-default-project"
195 | await project_service.add_project(test_project_name, "/tmp/test-default-project")
196 |
197 | # Set it as default
198 | response = await client.put(f"/projects/{test_project_name}/default")
199 |
200 | # Verify response
201 | assert response.status_code == 200
202 | data = response.json()
203 |
204 | # Check response structure
205 | assert "message" in data
206 | assert "status" in data
207 | assert data["status"] == "success"
208 | assert "new_project" in data
209 | assert data["new_project"]["name"] == test_project_name
210 |
211 | # Verify it's actually set as default
212 | assert project_service.default_project == test_project_name
213 |
214 |
215 | @pytest.mark.asyncio
216 | async def test_update_project_path_endpoint(test_config, client, project_service, project_url):
217 | """Test the update project endpoint for changing project path."""
218 | # Create a test project to update
219 | test_project_name = "test-update-project"
220 | with tempfile.TemporaryDirectory() as temp_dir:
221 | test_root = Path(temp_dir)
222 | old_path = (test_root / "old-location").as_posix()
223 | new_path = (test_root / "new-location").as_posix()
224 |
225 | await project_service.add_project(test_project_name, old_path)
226 |
227 | try:
228 | # Verify initial state
229 | project = await project_service.get_project(test_project_name)
230 | assert project is not None
231 | assert project.path == old_path
232 |
233 | # Update the project path
234 | response = await client.patch(
235 | f"{project_url}/project/{test_project_name}", json={"path": new_path}
236 | )
237 |
238 | # Verify response
239 | assert response.status_code == 200
240 | data = response.json()
241 |
242 | # Check response structure
243 | assert "message" in data
244 | assert "status" in data
245 | assert data["status"] == "success"
246 | assert "old_project" in data
247 | assert "new_project" in data
248 |
249 | # Check old project data
250 | assert data["old_project"]["name"] == test_project_name
251 | assert data["old_project"]["path"] == old_path
252 |
253 | # Check new project data
254 | assert data["new_project"]["name"] == test_project_name
255 | assert data["new_project"]["path"] == new_path
256 |
257 | # Verify project was actually updated in database
258 | updated_project = await project_service.get_project(test_project_name)
259 | assert updated_project is not None
260 | assert updated_project.path == new_path
261 |
262 | finally:
263 | # Clean up
264 | try:
265 | await project_service.remove_project(test_project_name)
266 | except Exception:
267 | pass
268 |
269 |
270 | @pytest.mark.asyncio
271 | async def test_update_project_is_active_endpoint(test_config, client, project_service, project_url):
272 | """Test the update project endpoint for changing is_active status."""
273 | # Create a test project to update
274 | test_project_name = "test-update-active-project"
275 | test_path = "/tmp/test-update-active"
276 |
277 | await project_service.add_project(test_project_name, test_path)
278 |
279 | try:
280 | # Update the project is_active status
281 | response = await client.patch(
282 | f"{project_url}/project/{test_project_name}", json={"is_active": False}
283 | )
284 |
285 | # Verify response
286 | assert response.status_code == 200
287 | data = response.json()
288 |
289 | # Check response structure
290 | assert "message" in data
291 | assert "status" in data
292 | assert data["status"] == "success"
293 | assert f"Project '{test_project_name}' updated successfully" == data["message"]
294 |
295 | finally:
296 | # Clean up
297 | try:
298 | await project_service.remove_project(test_project_name)
299 | except Exception:
300 | pass
301 |
302 |
303 | @pytest.mark.asyncio
304 | async def test_update_project_both_params_endpoint(
305 | test_config, client, project_service, project_url
306 | ):
307 | """Test the update project endpoint with both path and is_active parameters."""
308 | # Create a test project to update
309 | test_project_name = "test-update-both-project"
310 | with tempfile.TemporaryDirectory() as temp_dir:
311 | test_root = Path(temp_dir)
312 | old_path = (test_root / "old-location").as_posix()
313 | new_path = (test_root / "new-location").as_posix()
314 |
315 | await project_service.add_project(test_project_name, old_path)
316 |
317 | try:
318 | # Update both path and is_active (path should take precedence)
319 | response = await client.patch(
320 | f"{project_url}/project/{test_project_name}",
321 | json={"path": new_path, "is_active": False},
322 | )
323 |
324 | # Verify response
325 | assert response.status_code == 200
326 | data = response.json()
327 |
328 | # Check that path update was performed (takes precedence)
329 | assert data["new_project"]["path"] == new_path
330 |
331 | # Verify project was actually updated in database
332 | updated_project = await project_service.get_project(test_project_name)
333 | assert updated_project is not None
334 | assert updated_project.path == new_path
335 |
336 | finally:
337 | # Clean up
338 | try:
339 | await project_service.remove_project(test_project_name)
340 | except Exception:
341 | pass
342 |
343 |
344 | @pytest.mark.asyncio
345 | async def test_update_project_nonexistent_endpoint(client, project_url):
346 | """Test the update project endpoint with a nonexistent project."""
347 | # Try to update a project that doesn't exist
348 | response = await client.patch(
349 | f"{project_url}/project/nonexistent-project", json={"path": "/tmp/new-path"}
350 | )
351 |
352 | # Should return 400 error
353 | assert response.status_code == 400
354 | data = response.json()
355 | assert "detail" in data
356 | assert "not found in configuration" in data["detail"]
357 |
358 |
359 | @pytest.mark.asyncio
360 | async def test_update_project_relative_path_error_endpoint(
361 | test_config, client, project_service, project_url
362 | ):
363 | """Test the update project endpoint with relative path (should fail)."""
364 | # Create a test project to update
365 | test_project_name = "test-update-relative-project"
366 | test_path = "/tmp/test-update-relative"
367 |
368 | await project_service.add_project(test_project_name, test_path)
369 |
370 | try:
371 | # Try to update with relative path
372 | response = await client.patch(
373 | f"{project_url}/project/{test_project_name}", json={"path": "./relative-path"}
374 | )
375 |
376 | # Should return 400 error
377 | assert response.status_code == 400
378 | data = response.json()
379 | assert "detail" in data
380 | assert "Path must be absolute" in data["detail"]
381 |
382 | finally:
383 | # Clean up
384 | try:
385 | await project_service.remove_project(test_project_name)
386 | except Exception:
387 | pass
388 |
389 |
390 | @pytest.mark.asyncio
391 | async def test_update_project_no_params_endpoint(test_config, client, project_service, project_url):
392 | """Test the update project endpoint with no parameters (should fail)."""
393 | # Create a test project to update
394 | test_project_name = "test-update-no-params-project"
395 | test_path = "/tmp/test-update-no-params"
396 |
397 | await project_service.add_project(test_project_name, test_path)
398 | proj_info = await project_service.get_project(test_project_name)
399 | assert proj_info.name == test_project_name
400 | # On Windows the path is prepended with a drive letter
401 | assert test_path in proj_info.path
402 |
403 | try:
404 | # Try to update with no parameters
405 | response = await client.patch(f"{project_url}/project/{test_project_name}", json={})
406 |
407 | # Should return 200 (no-op)
408 | assert response.status_code == 200
409 | proj_info = await project_service.get_project(test_project_name)
410 | assert proj_info.name == test_project_name
411 | # On Windows the path is prepended with a drive letter
412 | assert test_path in proj_info.path
413 |
414 | finally:
415 | # Clean up
416 | try:
417 | await project_service.remove_project(test_project_name)
418 | except Exception:
419 | pass
420 |
421 |
422 | @pytest.mark.asyncio
423 | async def test_update_project_empty_path_endpoint(
424 | test_config, client, project_service, project_url
425 | ):
426 | """Test the update project endpoint with empty path parameter."""
427 | # Create a test project to update
428 | test_project_name = "test-update-empty-path-project"
429 | test_path = "/tmp/test-update-empty-path"
430 |
431 | await project_service.add_project(test_project_name, test_path)
432 |
433 | try:
434 | # Try to update with empty/null path - should be treated as no path update
435 | response = await client.patch(
436 | f"{project_url}/project/{test_project_name}", json={"path": None, "is_active": True}
437 | )
438 |
439 | # Should succeed and perform is_active update
440 | assert response.status_code == 200
441 | data = response.json()
442 | assert data["status"] == "success"
443 |
444 | finally:
445 | # Clean up
446 | try:
447 | await project_service.remove_project(test_project_name)
448 | except Exception:
449 | pass
450 |
451 |
452 | @pytest.mark.asyncio
453 | async def test_sync_project_endpoint(test_graph, client, project_url):
454 | """Test the project sync endpoint initiates background sync."""
455 | # Call the sync endpoint
456 | response = await client.post(f"{project_url}/project/sync")
457 |
458 | # Verify response
459 | assert response.status_code == 200
460 | data = response.json()
461 |
462 | # Check response structure
463 | assert "status" in data
464 | assert "message" in data
465 | assert data["status"] == "sync_started"
466 | assert "Filesystem sync initiated" in data["message"]
467 |
468 |
469 | @pytest.mark.asyncio
470 | async def test_sync_project_endpoint_not_found(client):
471 | """Test the project sync endpoint with nonexistent project."""
472 | # Call the sync endpoint for a project that doesn't exist
473 | response = await client.post("/nonexistent-project/project/sync")
474 |
475 | # Should return 404
476 | assert response.status_code == 404
477 |
478 |
479 | @pytest.mark.asyncio
480 | async def test_remove_default_project_fails(test_config, client, project_service):
481 | """Test that removing the default project returns an error."""
482 | # Get the current default project
483 | default_project_name = project_service.default_project
484 |
485 | # Try to remove the default project
486 | response = await client.delete(f"/projects/{default_project_name}")
487 |
488 | # Should return 400 with helpful error message
489 | assert response.status_code == 400
490 | data = response.json()
491 | assert "detail" in data
492 | assert "Cannot delete default project" in data["detail"]
493 | assert default_project_name in data["detail"]
494 |
495 |
496 | @pytest.mark.asyncio
497 | async def test_remove_default_project_with_alternatives(test_config, client, project_service):
498 | """Test that error message includes alternative projects when trying to delete default."""
499 | # Get the current default project
500 | default_project_name = project_service.default_project
501 |
502 | # Create another project so there are alternatives
503 | test_project_name = "test-alternative-project"
504 | await project_service.add_project(test_project_name, "/tmp/test-alternative")
505 |
506 | try:
507 | # Try to remove the default project
508 | response = await client.delete(f"/projects/{default_project_name}")
509 |
510 | # Should return 400 with helpful error message including alternatives
511 | assert response.status_code == 400
512 | data = response.json()
513 | assert "detail" in data
514 | assert "Cannot delete default project" in data["detail"]
515 | assert "Set another project as default first" in data["detail"]
516 | assert test_project_name in data["detail"]
517 |
518 | finally:
519 | # Clean up
520 | try:
521 | await project_service.remove_project(test_project_name)
522 | except Exception:
523 | pass
524 |
525 |
526 | @pytest.mark.asyncio
527 | async def test_remove_non_default_project_succeeds(test_config, client, project_service):
528 | """Test that removing a non-default project succeeds."""
529 | # Create a test project to remove
530 | test_project_name = "test-remove-non-default"
531 | await project_service.add_project(test_project_name, "/tmp/test-remove-non-default")
532 |
533 | # Verify it's not the default
534 | assert project_service.default_project != test_project_name
535 |
536 | # Remove the project
537 | response = await client.delete(f"/projects/{test_project_name}")
538 |
539 | # Should succeed
540 | assert response.status_code == 200
541 | data = response.json()
542 | assert data["status"] == "success"
543 |
544 | # Verify project is removed
545 | removed_project = await project_service.get_project(test_project_name)
546 | assert removed_project is None
547 |
548 |
549 | @pytest.mark.asyncio
550 | async def test_set_nonexistent_project_as_default_fails(test_config, client, project_service):
551 | """Test that setting a non-existent project as default returns 404."""
552 | # Try to set a project that doesn't exist as default
553 | response = await client.put("/projects/nonexistent-project/default")
554 |
555 | # Should return 404
556 | assert response.status_code == 404
557 | data = response.json()
558 | assert "detail" in data
559 | assert "does not exist" in data["detail"]
560 |
561 |
562 | @pytest.mark.asyncio
563 | async def test_create_project_idempotent_same_path(test_config, client, project_service):
564 | """Test that creating a project with same name and same path is idempotent."""
565 | # Create a project with platform-independent path
566 | test_project_name = "test-idempotent"
567 | with tempfile.TemporaryDirectory() as temp_dir:
568 | test_project_path = (Path(temp_dir) / "test-idempotent").as_posix()
569 |
570 | response1 = await client.post(
571 | "/projects/projects",
572 | json={"name": test_project_name, "path": test_project_path, "set_default": False},
573 | )
574 |
575 | # Should succeed with 201 Created
576 | assert response1.status_code == 201
577 | data1 = response1.json()
578 | assert data1["status"] == "success"
579 | assert data1["new_project"]["name"] == test_project_name
580 |
581 | # Try to create the same project again with same name and path
582 | response2 = await client.post(
583 | "/projects/projects",
584 | json={"name": test_project_name, "path": test_project_path, "set_default": False},
585 | )
586 |
587 | # Should also succeed (idempotent)
588 | assert response2.status_code == 200
589 | data2 = response2.json()
590 | assert data2["status"] == "success"
591 | assert "already exists" in data2["message"]
592 | assert data2["new_project"]["name"] == test_project_name
593 | # Normalize paths for cross-platform comparison
594 | assert Path(data2["new_project"]["path"]).resolve() == Path(test_project_path).resolve()
595 |
596 | # Clean up
597 | try:
598 | await project_service.remove_project(test_project_name)
599 | except Exception:
600 | pass
601 |
602 |
603 | @pytest.mark.asyncio
604 | async def test_create_project_fails_different_path(test_config, client, project_service):
605 | """Test that creating a project with same name but different path fails."""
606 | # Create a project
607 | test_project_name = "test-path-conflict"
608 | test_project_path1 = "/tmp/test-path-conflict-1"
609 |
610 | response1 = await client.post(
611 | "/projects/projects",
612 | json={"name": test_project_name, "path": test_project_path1, "set_default": False},
613 | )
614 |
615 | # Should succeed with 201 Created
616 | assert response1.status_code == 201
617 |
618 | # Try to create the same project with different path
619 | test_project_path2 = "/tmp/test-path-conflict-2"
620 | response2 = await client.post(
621 | "/projects/projects",
622 | json={"name": test_project_name, "path": test_project_path2, "set_default": False},
623 | )
624 |
625 | # Should fail with 400
626 | assert response2.status_code == 400
627 | data2 = response2.json()
628 | assert "detail" in data2
629 | assert "already exists with different path" in data2["detail"]
630 | assert test_project_path1 in data2["detail"]
631 | assert test_project_path2 in data2["detail"]
632 |
633 | # Clean up
634 | try:
635 | await project_service.remove_project(test_project_name)
636 | except Exception:
637 | pass
638 |
639 |
640 | @pytest.mark.asyncio
641 | async def test_remove_project_with_delete_notes_false(test_config, client, project_service):
642 | """Test that removing a project with delete_notes=False leaves directory intact."""
643 | # Create a test project with actual directory
644 | test_project_name = "test-remove-keep-files"
645 | with tempfile.TemporaryDirectory() as temp_dir:
646 | test_path = Path(temp_dir) / "test-project"
647 | test_path.mkdir()
648 | test_file = test_path / "test.md"
649 | test_file.write_text("# Test Note")
650 |
651 | await project_service.add_project(test_project_name, str(test_path))
652 |
653 | # Remove the project without deleting files (default)
654 | response = await client.delete(f"/projects/{test_project_name}")
655 |
656 | # Verify response
657 | assert response.status_code == 200
658 | data = response.json()
659 | assert data["status"] == "success"
660 |
661 | # Verify project is removed from config/db
662 | removed_project = await project_service.get_project(test_project_name)
663 | assert removed_project is None
664 |
665 | # Verify directory still exists
666 | assert test_path.exists()
667 | assert test_file.exists()
668 |
669 |
670 | @pytest.mark.asyncio
671 | async def test_remove_project_with_delete_notes_true(test_config, client, project_service):
672 | """Test that removing a project with delete_notes=True deletes the directory."""
673 | # Create a test project with actual directory
674 | test_project_name = "test-remove-delete-files"
675 | with tempfile.TemporaryDirectory() as temp_dir:
676 | test_path = Path(temp_dir) / "test-project"
677 | test_path.mkdir()
678 | test_file = test_path / "test.md"
679 | test_file.write_text("# Test Note")
680 |
681 | await project_service.add_project(test_project_name, str(test_path))
682 |
683 | # Remove the project with delete_notes=True
684 | response = await client.delete(f"/projects/{test_project_name}?delete_notes=true")
685 |
686 | # Verify response
687 | assert response.status_code == 200
688 | data = response.json()
689 | assert data["status"] == "success"
690 |
691 | # Verify project is removed from config/db
692 | removed_project = await project_service.get_project(test_project_name)
693 | assert removed_project is None
694 |
695 | # Verify directory is deleted
696 | assert not test_path.exists()
697 |
698 |
699 | @pytest.mark.asyncio
700 | async def test_remove_project_delete_notes_nonexistent_directory(
701 | test_config, client, project_service
702 | ):
703 | """Test that removing a project with delete_notes=True handles missing directory gracefully."""
704 | # Create a project pointing to a non-existent path
705 | test_project_name = "test-remove-missing-dir"
706 | test_path = "/tmp/this-directory-does-not-exist-12345"
707 |
708 | await project_service.add_project(test_project_name, test_path)
709 |
710 | # Remove the project with delete_notes=True (should not fail even if dir doesn't exist)
711 | response = await client.delete(f"/projects/{test_project_name}?delete_notes=true")
712 |
713 | # Should succeed
714 | assert response.status_code == 200
715 | data = response.json()
716 | assert data["status"] == "success"
717 |
718 | # Verify project is removed
719 | removed_project = await project_service.get_project(test_project_name)
720 | assert removed_project is None
721 |
```
--------------------------------------------------------------------------------
/tests/repository/test_search_repository.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for the SearchRepository."""
2 |
3 | from datetime import datetime, timezone
4 |
5 | import pytest
6 | import pytest_asyncio
7 | from sqlalchemy import text
8 |
9 | from basic_memory import db
10 | from basic_memory.models import Entity
11 | from basic_memory.models.project import Project
12 | from basic_memory.repository.search_repository import SearchRepository, SearchIndexRow
13 | from basic_memory.schemas.search import SearchItemType
14 |
15 |
16 | @pytest_asyncio.fixture
17 | async def search_entity(session_maker, test_project: Project):
18 | """Create a test entity for search testing."""
19 | async with db.scoped_session(session_maker) as session:
20 | entity = Entity(
21 | project_id=test_project.id,
22 | title="Search Test Entity",
23 | entity_type="test",
24 | permalink="test/search-test-entity",
25 | file_path="test/search_test_entity.md",
26 | content_type="text/markdown",
27 | created_at=datetime.now(timezone.utc),
28 | updated_at=datetime.now(timezone.utc),
29 | )
30 | session.add(entity)
31 | await session.flush()
32 | return entity
33 |
34 |
35 | @pytest_asyncio.fixture
36 | async def second_project(project_repository):
37 | """Create a second project for testing project isolation."""
38 | project_data = {
39 | "name": "Second Test Project",
40 | "description": "Another project for testing",
41 | "path": "/second/project/path",
42 | "is_active": True,
43 | "is_default": None,
44 | }
45 | return await project_repository.create(project_data)
46 |
47 |
48 | @pytest_asyncio.fixture
49 | async def second_project_repository(session_maker, second_project):
50 | """Create a repository for the second project."""
51 | return SearchRepository(session_maker, project_id=second_project.id)
52 |
53 |
54 | @pytest_asyncio.fixture
55 | async def second_entity(session_maker, second_project: Project):
56 | """Create a test entity in the second project."""
57 | async with db.scoped_session(session_maker) as session:
58 | entity = Entity(
59 | project_id=second_project.id,
60 | title="Second Project Entity",
61 | entity_type="test",
62 | permalink="test/second-project-entity",
63 | file_path="test/second_project_entity.md",
64 | content_type="text/markdown",
65 | created_at=datetime.now(timezone.utc),
66 | updated_at=datetime.now(timezone.utc),
67 | )
68 | session.add(entity)
69 | await session.flush()
70 | return entity
71 |
72 |
73 | @pytest.mark.asyncio
74 | async def test_init_search_index(search_repository):
75 | """Test that search index can be initialized."""
76 | await search_repository.init_search_index()
77 |
78 | # Verify search_index table exists
79 | async with db.scoped_session(search_repository.session_maker) as session:
80 | result = await session.execute(
81 | text("SELECT name FROM sqlite_master WHERE type='table' AND name='search_index';")
82 | )
83 | assert result.scalar() == "search_index"
84 |
85 |
86 | @pytest.mark.asyncio
87 | async def test_index_item(search_repository, search_entity):
88 | """Test indexing an item with project_id."""
89 | # Create search index row for the entity
90 | search_row = SearchIndexRow(
91 | id=search_entity.id,
92 | type=SearchItemType.ENTITY.value,
93 | title=search_entity.title,
94 | content_stems="search test entity content",
95 | content_snippet="This is a test entity for search",
96 | permalink=search_entity.permalink,
97 | file_path=search_entity.file_path,
98 | entity_id=search_entity.id,
99 | metadata={"entity_type": search_entity.entity_type},
100 | created_at=search_entity.created_at,
101 | updated_at=search_entity.updated_at,
102 | project_id=search_repository.project_id,
103 | )
104 |
105 | # Index the item
106 | await search_repository.index_item(search_row)
107 |
108 | # Search for the item
109 | results = await search_repository.search(search_text="search test")
110 |
111 | # Verify we found the item
112 | assert len(results) == 1
113 | assert results[0].title == search_entity.title
114 | assert results[0].project_id == search_repository.project_id
115 |
116 |
117 | @pytest.mark.asyncio
118 | async def test_project_isolation(
119 | search_repository, second_project_repository, search_entity, second_entity
120 | ):
121 | """Test that search is isolated by project."""
122 | # Index entities in both projects
123 | search_row1 = SearchIndexRow(
124 | id=search_entity.id,
125 | type=SearchItemType.ENTITY.value,
126 | title=search_entity.title,
127 | content_stems="unique first project content",
128 | content_snippet="This is a test entity in the first project",
129 | permalink=search_entity.permalink,
130 | file_path=search_entity.file_path,
131 | entity_id=search_entity.id,
132 | metadata={"entity_type": search_entity.entity_type},
133 | created_at=search_entity.created_at,
134 | updated_at=search_entity.updated_at,
135 | project_id=search_repository.project_id,
136 | )
137 |
138 | search_row2 = SearchIndexRow(
139 | id=second_entity.id,
140 | type=SearchItemType.ENTITY.value,
141 | title=second_entity.title,
142 | content_stems="unique second project content",
143 | content_snippet="This is a test entity in the second project",
144 | permalink=second_entity.permalink,
145 | file_path=second_entity.file_path,
146 | entity_id=second_entity.id,
147 | metadata={"entity_type": second_entity.entity_type},
148 | created_at=second_entity.created_at,
149 | updated_at=second_entity.updated_at,
150 | project_id=second_project_repository.project_id,
151 | )
152 |
153 | # Index items in their respective repositories
154 | await search_repository.index_item(search_row1)
155 | await second_project_repository.index_item(search_row2)
156 |
157 | # Search in first project
158 | results1 = await search_repository.search(search_text="unique first")
159 | assert len(results1) == 1
160 | assert results1[0].title == search_entity.title
161 | assert results1[0].project_id == search_repository.project_id
162 |
163 | # Search in second project
164 | results2 = await second_project_repository.search(search_text="unique second")
165 | assert len(results2) == 1
166 | assert results2[0].title == second_entity.title
167 | assert results2[0].project_id == second_project_repository.project_id
168 |
169 | # Make sure first project can't see second project's content
170 | results_cross1 = await search_repository.search(search_text="unique second")
171 | assert len(results_cross1) == 0
172 |
173 | # Make sure second project can't see first project's content
174 | results_cross2 = await second_project_repository.search(search_text="unique first")
175 | assert len(results_cross2) == 0
176 |
177 |
178 | @pytest.mark.asyncio
179 | async def test_delete_by_permalink(search_repository, search_entity):
180 | """Test deleting an item by permalink respects project isolation."""
181 | # Index the item
182 | search_row = SearchIndexRow(
183 | id=search_entity.id,
184 | type=SearchItemType.ENTITY.value,
185 | title=search_entity.title,
186 | content_stems="content to delete",
187 | content_snippet="This content should be deleted",
188 | permalink=search_entity.permalink,
189 | file_path=search_entity.file_path,
190 | entity_id=search_entity.id,
191 | metadata={"entity_type": search_entity.entity_type},
192 | created_at=search_entity.created_at,
193 | updated_at=search_entity.updated_at,
194 | project_id=search_repository.project_id,
195 | )
196 |
197 | await search_repository.index_item(search_row)
198 |
199 | # Verify it exists
200 | results = await search_repository.search(search_text="content to delete")
201 | assert len(results) == 1
202 |
203 | # Delete by permalink
204 | await search_repository.delete_by_permalink(search_entity.permalink)
205 |
206 | # Verify it's gone
207 | results_after = await search_repository.search(search_text="content to delete")
208 | assert len(results_after) == 0
209 |
210 |
211 | @pytest.mark.asyncio
212 | async def test_delete_by_entity_id(search_repository, search_entity):
213 | """Test deleting an item by entity_id respects project isolation."""
214 | # Index the item
215 | search_row = SearchIndexRow(
216 | id=search_entity.id,
217 | type=SearchItemType.ENTITY.value,
218 | title=search_entity.title,
219 | content_stems="entity to delete",
220 | content_snippet="This entity should be deleted",
221 | permalink=search_entity.permalink,
222 | file_path=search_entity.file_path,
223 | entity_id=search_entity.id,
224 | metadata={"entity_type": search_entity.entity_type},
225 | created_at=search_entity.created_at,
226 | updated_at=search_entity.updated_at,
227 | project_id=search_repository.project_id,
228 | )
229 |
230 | await search_repository.index_item(search_row)
231 |
232 | # Verify it exists
233 | results = await search_repository.search(search_text="entity to delete")
234 | assert len(results) == 1
235 |
236 | # Delete by entity_id
237 | await search_repository.delete_by_entity_id(search_entity.id)
238 |
239 | # Verify it's gone
240 | results_after = await search_repository.search(search_text="entity to delete")
241 | assert len(results_after) == 0
242 |
243 |
244 | @pytest.mark.asyncio
245 | async def test_to_insert_includes_project_id(search_repository):
246 | """Test that the to_insert method includes project_id."""
247 | # Create a search index row with project_id
248 | row = SearchIndexRow(
249 | id=1234,
250 | type=SearchItemType.ENTITY.value,
251 | title="Test Title",
252 | content_stems="test content",
253 | content_snippet="test snippet",
254 | permalink="test/permalink",
255 | file_path="test/file.md",
256 | metadata={"test": "metadata"},
257 | created_at=datetime.now(timezone.utc),
258 | updated_at=datetime.now(timezone.utc),
259 | project_id=search_repository.project_id,
260 | )
261 |
262 | # Get insert data
263 | insert_data = row.to_insert()
264 |
265 | # Verify project_id is included
266 | assert "project_id" in insert_data
267 | assert insert_data["project_id"] == search_repository.project_id
268 |
269 |
270 | def test_directory_property():
271 | """Test the directory property of SearchIndexRow."""
272 | # Test a file in a nested directory
273 | row1 = SearchIndexRow(
274 | id=1,
275 | type=SearchItemType.ENTITY.value,
276 | file_path="projects/notes/ideas.md",
277 | created_at=datetime.now(timezone.utc),
278 | updated_at=datetime.now(timezone.utc),
279 | project_id=1,
280 | )
281 | assert row1.directory == "/projects/notes"
282 |
283 | # Test a file at the root level
284 | row2 = SearchIndexRow(
285 | id=2,
286 | type=SearchItemType.ENTITY.value,
287 | file_path="README.md",
288 | created_at=datetime.now(timezone.utc),
289 | updated_at=datetime.now(timezone.utc),
290 | project_id=1,
291 | )
292 | assert row2.directory == "/"
293 |
294 | # Test a non-entity type with empty file_path
295 | row3 = SearchIndexRow(
296 | id=3,
297 | type=SearchItemType.OBSERVATION.value,
298 | file_path="",
299 | created_at=datetime.now(timezone.utc),
300 | updated_at=datetime.now(timezone.utc),
301 | project_id=1,
302 | )
303 | assert row3.directory == ""
304 |
305 |
306 | class TestSearchTermPreparation:
307 | """Test cases for FTS5 search term preparation."""
308 |
309 | def test_simple_terms_get_prefix_wildcard(self, search_repository):
310 | """Simple alphanumeric terms should get prefix matching."""
311 | assert search_repository._prepare_search_term("hello") == "hello*"
312 | assert search_repository._prepare_search_term("project") == "project*"
313 | assert search_repository._prepare_search_term("test123") == "test123*"
314 |
315 | def test_terms_with_existing_wildcard_unchanged(self, search_repository):
316 | """Terms that already contain * should remain unchanged."""
317 | assert search_repository._prepare_search_term("hello*") == "hello*"
318 | assert search_repository._prepare_search_term("test*world") == "test*world"
319 |
320 | def test_boolean_operators_preserved(self, search_repository):
321 | """Boolean operators should be preserved without modification."""
322 | assert search_repository._prepare_search_term("hello AND world") == "hello AND world"
323 | assert search_repository._prepare_search_term("cat OR dog") == "cat OR dog"
324 | assert (
325 | search_repository._prepare_search_term("project NOT meeting") == "project NOT meeting"
326 | )
327 | assert (
328 | search_repository._prepare_search_term("(hello AND world) OR test")
329 | == "(hello AND world) OR test"
330 | )
331 |
332 | def test_hyphenated_terms_with_boolean_operators(self, search_repository):
333 | """Hyphenated terms with Boolean operators should be properly quoted."""
334 | # Test the specific case from the GitHub issue
335 | result = search_repository._prepare_search_term("tier1-test AND unicode")
336 | assert result == '"tier1-test" AND unicode'
337 |
338 | # Test other hyphenated Boolean combinations
339 | assert (
340 | search_repository._prepare_search_term("multi-word OR single")
341 | == '"multi-word" OR single'
342 | )
343 | assert (
344 | search_repository._prepare_search_term("well-formed NOT badly-formed")
345 | == '"well-formed" NOT "badly-formed"'
346 | )
347 | assert (
348 | search_repository._prepare_search_term("test-case AND (hello OR world)")
349 | == '"test-case" AND (hello OR world)'
350 | )
351 |
352 | # Test mixed special characters with Boolean operators
353 | assert (
354 | search_repository._prepare_search_term("config.json AND test-file")
355 | == '"config.json" AND "test-file"'
356 | )
357 | assert (
358 | search_repository._prepare_search_term("C++ OR python-script")
359 | == '"C++" OR "python-script"'
360 | )
361 |
362 | def test_programming_terms_should_work(self, search_repository):
363 | """Programming-related terms with special chars should be searchable."""
364 | # These should be quoted to handle special characters safely
365 | assert search_repository._prepare_search_term("C++") == '"C++"*'
366 | assert search_repository._prepare_search_term("function()") == '"function()"*'
367 | assert search_repository._prepare_search_term("[email protected]") == '"[email protected]"*'
368 | assert search_repository._prepare_search_term("array[index]") == '"array[index]"*'
369 | assert search_repository._prepare_search_term("config.json") == '"config.json"*'
370 |
371 | def test_malformed_fts5_syntax_quoted(self, search_repository):
372 | """Malformed FTS5 syntax should be quoted to prevent errors."""
373 | # Multiple operators without proper syntax
374 | assert search_repository._prepare_search_term("+++invalid+++") == '"+++invalid+++"*'
375 | assert search_repository._prepare_search_term("!!!error!!!") == '"!!!error!!!"*'
376 | assert search_repository._prepare_search_term("@#$%^&*()") == '"@#$%^&*()"*'
377 |
378 | def test_quoted_strings_handled_properly(self, search_repository):
379 | """Strings with quotes should have quotes escaped."""
380 | assert search_repository._prepare_search_term('say "hello"') == '"say ""hello"""*'
381 | assert search_repository._prepare_search_term("it's working") == '"it\'s working"*'
382 |
383 | def test_file_paths_no_prefix_wildcard(self, search_repository):
384 | """File paths should not get prefix wildcards."""
385 | assert (
386 | search_repository._prepare_search_term("config.json", is_prefix=False)
387 | == '"config.json"'
388 | )
389 | assert (
390 | search_repository._prepare_search_term("docs/readme.md", is_prefix=False)
391 | == '"docs/readme.md"'
392 | )
393 |
394 | def test_spaces_handled_correctly(self, search_repository):
395 | """Terms with spaces should use boolean AND for word order independence."""
396 | assert search_repository._prepare_search_term("hello world") == "hello* AND world*"
397 | assert (
398 | search_repository._prepare_search_term("project planning") == "project* AND planning*"
399 | )
400 |
401 | def test_version_strings_with_dots_handled_correctly(self, search_repository):
402 | """Version strings with dots should be quoted to prevent FTS5 syntax errors."""
403 | # This reproduces the bug where "Basic Memory v0.13.0b2" becomes "Basic* AND Memory* AND v0.13.0b2*"
404 | # which causes FTS5 syntax errors because v0.13.0b2* is not valid FTS5 syntax
405 | result = search_repository._prepare_search_term("Basic Memory v0.13.0b2")
406 | # Should be quoted because of dots in v0.13.0b2
407 | assert result == '"Basic Memory v0.13.0b2"*'
408 |
409 | def test_mixed_special_characters_in_multi_word_queries(self, search_repository):
410 | """Multi-word queries with special characters in any word should be fully quoted."""
411 | # Any word containing special characters should cause the entire phrase to be quoted
412 | assert search_repository._prepare_search_term("config.json file") == '"config.json file"*'
413 | assert (
414 | search_repository._prepare_search_term("[email protected] account")
415 | == '"[email protected] account"*'
416 | )
417 | assert search_repository._prepare_search_term("node.js and react") == '"node.js and react"*'
418 |
419 | @pytest.mark.asyncio
420 | async def test_search_with_special_characters_returns_results(self, search_repository):
421 | """Integration test: search with special characters should work gracefully."""
422 | # This test ensures the search doesn't crash with FTS5 syntax errors
423 |
424 | # These should all return empty results gracefully, not crash
425 | results1 = await search_repository.search(search_text="C++")
426 | assert isinstance(results1, list) # Should not crash
427 |
428 | results2 = await search_repository.search(search_text="function()")
429 | assert isinstance(results2, list) # Should not crash
430 |
431 | results3 = await search_repository.search(search_text="+++malformed+++")
432 | assert isinstance(results3, list) # Should not crash, return empty results
433 |
434 | results4 = await search_repository.search(search_text="[email protected]")
435 | assert isinstance(results4, list) # Should not crash
436 |
437 | @pytest.mark.asyncio
438 | async def test_boolean_search_still_works(self, search_repository):
439 | """Boolean search operations should continue to work."""
440 | # These should not crash and should respect boolean logic
441 | results1 = await search_repository.search(search_text="hello AND world")
442 | assert isinstance(results1, list)
443 |
444 | results2 = await search_repository.search(search_text="cat OR dog")
445 | assert isinstance(results2, list)
446 |
447 | results3 = await search_repository.search(search_text="project NOT meeting")
448 | assert isinstance(results3, list)
449 |
450 | @pytest.mark.asyncio
451 | async def test_permalink_match_exact_with_slash(self, search_repository):
452 | """Test exact permalink matching with slash (line 249 coverage)."""
453 | # This tests the exact match path: if "/" in permalink_text:
454 | results = await search_repository.search(permalink_match="test/path")
455 | assert isinstance(results, list)
456 | # Should use exact equality matching for paths with slashes
457 |
458 | @pytest.mark.asyncio
459 | async def test_permalink_match_simple_term(self, search_repository):
460 | """Test permalink matching with simple term (no slash)."""
461 | # This tests the simple term path that goes through _prepare_search_term
462 | results = await search_repository.search(permalink_match="simpleterm")
463 | assert isinstance(results, list)
464 | # Should use FTS5 MATCH for simple terms
465 |
466 | @pytest.mark.asyncio
467 | async def test_fts5_error_handling_database_error(self, search_repository):
468 | """Test that non-FTS5 database errors are properly re-raised."""
469 | import unittest.mock
470 |
471 | # Mock the scoped_session to raise a non-FTS5 error
472 | with unittest.mock.patch("basic_memory.db.scoped_session") as mock_scoped_session:
473 | mock_session = unittest.mock.AsyncMock()
474 | mock_scoped_session.return_value.__aenter__.return_value = mock_session
475 |
476 | # Simulate a database error that's NOT an FTS5 syntax error
477 | mock_session.execute.side_effect = Exception("Database connection failed")
478 |
479 | # This should re-raise the exception (not return empty list)
480 | with pytest.raises(Exception, match="Database connection failed"):
481 | await search_repository.search(search_text="test")
482 |
483 | @pytest.mark.asyncio
484 | async def test_version_string_search_integration(self, search_repository, search_entity):
485 | """Integration test: searching for version strings should work without FTS5 errors."""
486 | # Index an entity with version information
487 | search_row = SearchIndexRow(
488 | id=search_entity.id,
489 | type=SearchItemType.ENTITY.value,
490 | title="Basic Memory v0.13.0b2 Release",
491 | content_stems="basic memory version 0.13.0b2 beta release notes features",
492 | content_snippet="Basic Memory v0.13.0b2 is a beta release with new features",
493 | permalink=search_entity.permalink,
494 | file_path=search_entity.file_path,
495 | entity_id=search_entity.id,
496 | metadata={"entity_type": search_entity.entity_type},
497 | created_at=search_entity.created_at,
498 | updated_at=search_entity.updated_at,
499 | project_id=search_repository.project_id,
500 | )
501 |
502 | await search_repository.index_item(search_row)
503 |
504 | # This should not cause FTS5 syntax errors and should find the entity
505 | results = await search_repository.search(search_text="Basic Memory v0.13.0b2")
506 | assert len(results) == 1
507 | assert results[0].title == "Basic Memory v0.13.0b2 Release"
508 |
509 | # Test other version-like patterns
510 | results2 = await search_repository.search(search_text="v0.13.0b2")
511 | assert len(results2) == 1 # Should still find it due to content_stems
512 |
513 | # Test with other problematic patterns
514 | results3 = await search_repository.search(search_text="node.js version")
515 | assert isinstance(results3, list) # Should not crash
516 |
517 | @pytest.mark.asyncio
518 | async def test_wildcard_only_search(self, search_repository, search_entity):
519 | """Test that wildcard-only search '*' doesn't cause FTS5 errors (line 243 coverage)."""
520 | # Index an entity for testing
521 | search_row = SearchIndexRow(
522 | id=search_entity.id,
523 | type=SearchItemType.ENTITY.value,
524 | title="Test Entity",
525 | content_stems="test entity content",
526 | content_snippet="This is a test entity",
527 | permalink=search_entity.permalink,
528 | file_path=search_entity.file_path,
529 | entity_id=search_entity.id,
530 | metadata={"entity_type": search_entity.entity_type},
531 | created_at=search_entity.created_at,
532 | updated_at=search_entity.updated_at,
533 | project_id=search_repository.project_id,
534 | )
535 |
536 | await search_repository.index_item(search_row)
537 |
538 | # Test wildcard-only search - should not crash and should return results
539 | results = await search_repository.search(search_text="*")
540 | assert isinstance(results, list) # Should not crash
541 | assert len(results) >= 1 # Should return all results, including our test entity
542 |
543 | # Test empty string search - should also not crash
544 | results_empty = await search_repository.search(search_text="")
545 | assert isinstance(results_empty, list) # Should not crash
546 |
547 | # Test whitespace-only search
548 | results_whitespace = await search_repository.search(search_text=" ")
549 | assert isinstance(results_whitespace, list) # Should not crash
550 |
551 | def test_boolean_query_empty_parts_coverage(self, search_repository):
552 | """Test Boolean query parsing with empty parts (line 143 coverage)."""
553 | # Create queries that will result in empty parts after splitting
554 | result1 = search_repository._prepare_boolean_query(
555 | "hello AND AND world"
556 | ) # Double operator
557 | assert "hello" in result1 and "world" in result1
558 |
559 | result2 = search_repository._prepare_boolean_query(" OR test") # Leading operator
560 | assert "test" in result2
561 |
562 | result3 = search_repository._prepare_boolean_query("test OR ") # Trailing operator
563 | assert "test" in result3
564 |
565 | def test_parenthetical_term_quote_escaping(self, search_repository):
566 | """Test quote escaping in parenthetical terms (lines 190-191 coverage)."""
567 | # Test term with quotes that needs escaping
568 | result = search_repository._prepare_parenthetical_term('(say "hello" world)')
569 | # Should escape quotes by doubling them
570 | assert '""hello""' in result
571 |
572 | # Test term with single quotes
573 | result2 = search_repository._prepare_parenthetical_term("(it's working)")
574 | assert "it's working" in result2
575 |
576 | def test_needs_quoting_empty_input(self, search_repository):
577 | """Test _needs_quoting with empty inputs (line 207 coverage)."""
578 | # Test empty string
579 | assert not search_repository._needs_quoting("")
580 |
581 | # Test whitespace-only string
582 | assert not search_repository._needs_quoting(" ")
583 |
584 | # Test None-like cases
585 | assert not search_repository._needs_quoting("\t")
586 |
587 | def test_prepare_single_term_empty_input(self, search_repository):
588 | """Test _prepare_single_term with empty inputs (line 227 coverage)."""
589 | # Test empty string
590 | result1 = search_repository._prepare_single_term("")
591 | assert result1 == ""
592 |
593 | # Test whitespace-only string
594 | result2 = search_repository._prepare_single_term(" ")
595 | assert result2 == " " # Should return as-is
596 |
597 | # Test string that becomes empty after strip
598 | result3 = search_repository._prepare_single_term("\t\n")
599 | assert result3 == "\t\n" # Should return original
600 |
```
--------------------------------------------------------------------------------
/src/basic_memory/cli/commands/cloud/bisync_commands.py:
--------------------------------------------------------------------------------
```python
1 | """Cloud bisync commands for Basic Memory CLI."""
2 |
3 | import asyncio
4 | import subprocess
5 | import time
6 | from datetime import datetime
7 | from pathlib import Path
8 | from typing import Optional
9 |
10 | import typer
11 | from rich.console import Console
12 | from rich.table import Table
13 |
14 | from basic_memory.cli.commands.cloud.api_client import CloudAPIError, make_api_request
15 | from basic_memory.cli.commands.cloud.cloud_utils import (
16 | create_cloud_project,
17 | fetch_cloud_projects,
18 | )
19 | from basic_memory.cli.commands.cloud.rclone_config import (
20 | add_tenant_to_rclone_config,
21 | )
22 | from basic_memory.cli.commands.cloud.rclone_installer import RcloneInstallError, install_rclone
23 | from basic_memory.config import ConfigManager
24 | from basic_memory.ignore_utils import get_bmignore_path, create_default_bmignore
25 | from basic_memory.schemas.cloud import (
26 | TenantMountInfo,
27 | MountCredentials,
28 | )
29 |
30 | console = Console()
31 |
32 |
33 | class BisyncError(Exception):
34 | """Exception raised for bisync-related errors."""
35 |
36 | pass
37 |
38 |
39 | class RcloneBisyncProfile:
40 | """Bisync profile with safety settings."""
41 |
42 | def __init__(
43 | self,
44 | name: str,
45 | conflict_resolve: str,
46 | max_delete: int,
47 | check_access: bool,
48 | description: str,
49 | extra_args: Optional[list[str]] = None,
50 | ):
51 | self.name = name
52 | self.conflict_resolve = conflict_resolve
53 | self.max_delete = max_delete
54 | self.check_access = check_access
55 | self.description = description
56 | self.extra_args = extra_args or []
57 |
58 |
59 | # Bisync profiles based on SPEC-9 Phase 2.1
60 | BISYNC_PROFILES = {
61 | "safe": RcloneBisyncProfile(
62 | name="safe",
63 | conflict_resolve="none",
64 | max_delete=10,
65 | check_access=False,
66 | description="Safe mode with conflict preservation (keeps both versions)",
67 | ),
68 | "balanced": RcloneBisyncProfile(
69 | name="balanced",
70 | conflict_resolve="newer",
71 | max_delete=25,
72 | check_access=False,
73 | description="Balanced mode - auto-resolve to newer file (recommended)",
74 | ),
75 | "fast": RcloneBisyncProfile(
76 | name="fast",
77 | conflict_resolve="newer",
78 | max_delete=50,
79 | check_access=False,
80 | description="Fast mode for rapid iteration (skip verification)",
81 | ),
82 | }
83 |
84 |
85 | async def get_mount_info() -> TenantMountInfo:
86 | """Get current tenant information from cloud API."""
87 | try:
88 | config_manager = ConfigManager()
89 | config = config_manager.config
90 | host_url = config.cloud_host.rstrip("/")
91 |
92 | response = await make_api_request(method="GET", url=f"{host_url}/tenant/mount/info")
93 |
94 | return TenantMountInfo.model_validate(response.json())
95 | except Exception as e:
96 | raise BisyncError(f"Failed to get tenant info: {e}") from e
97 |
98 |
99 | async def generate_mount_credentials(tenant_id: str) -> MountCredentials:
100 | """Generate scoped credentials for syncing."""
101 | try:
102 | config_manager = ConfigManager()
103 | config = config_manager.config
104 | host_url = config.cloud_host.rstrip("/")
105 |
106 | response = await make_api_request(method="POST", url=f"{host_url}/tenant/mount/credentials")
107 |
108 | return MountCredentials.model_validate(response.json())
109 | except Exception as e:
110 | raise BisyncError(f"Failed to generate credentials: {e}") from e
111 |
112 |
113 | def scan_local_directories(sync_dir: Path) -> list[str]:
114 | """Scan local sync directory for project folders.
115 |
116 | Args:
117 | sync_dir: Path to bisync directory
118 |
119 | Returns:
120 | List of directory names (project names)
121 | """
122 | if not sync_dir.exists():
123 | return []
124 |
125 | directories = []
126 | for item in sync_dir.iterdir():
127 | if item.is_dir() and not item.name.startswith("."):
128 | directories.append(item.name)
129 |
130 | return directories
131 |
132 |
133 | def get_bisync_state_path(tenant_id: str) -> Path:
134 | """Get path to bisync state directory."""
135 | return Path.home() / ".basic-memory" / "bisync-state" / tenant_id
136 |
137 |
138 | def get_bisync_directory() -> Path:
139 | """Get bisync directory from config.
140 |
141 | Returns:
142 | Path to bisync directory (default: ~/basic-memory-cloud-sync)
143 | """
144 | config_manager = ConfigManager()
145 | config = config_manager.config
146 |
147 | sync_dir = config.bisync_config.get("sync_dir", str(Path.home() / "basic-memory-cloud-sync"))
148 | return Path(sync_dir).expanduser().resolve()
149 |
150 |
151 | def validate_bisync_directory(bisync_dir: Path) -> None:
152 | """Validate bisync directory doesn't conflict with mount.
153 |
154 | Raises:
155 | BisyncError: If bisync directory conflicts with mount directory
156 | """
157 | # Get fixed mount directory
158 | mount_dir = (Path.home() / "basic-memory-cloud").resolve()
159 |
160 | # Check if bisync dir is the same as mount dir
161 | if bisync_dir == mount_dir:
162 | raise BisyncError(
163 | f"Cannot use {bisync_dir} for bisync - it's the mount directory!\n"
164 | f"Mount and bisync must use different directories.\n\n"
165 | f"Options:\n"
166 | f" 1. Use default: ~/basic-memory-cloud-sync/\n"
167 | f" 2. Specify different directory: --dir ~/my-sync-folder"
168 | )
169 |
170 | # Check if mount is active at this location
171 | result = subprocess.run(["mount"], capture_output=True, text=True)
172 | if str(bisync_dir) in result.stdout and "rclone" in result.stdout:
173 | raise BisyncError(
174 | f"{bisync_dir} is currently mounted via 'bm cloud mount'\n"
175 | f"Cannot use mounted directory for bisync.\n\n"
176 | f"Either:\n"
177 | f" 1. Unmount first: bm cloud unmount\n"
178 | f" 2. Use different directory for bisync"
179 | )
180 |
181 |
182 | def convert_bmignore_to_rclone_filters() -> Path:
183 | """Convert .bmignore patterns to rclone filter format.
184 |
185 | Reads ~/.basic-memory/.bmignore (gitignore-style) and converts to
186 | ~/.basic-memory/.bmignore.rclone (rclone filter format).
187 |
188 | Only regenerates if .bmignore has been modified since last conversion.
189 |
190 | Returns:
191 | Path to converted rclone filter file
192 | """
193 | # Ensure .bmignore exists
194 | create_default_bmignore()
195 |
196 | bmignore_path = get_bmignore_path()
197 | # Create rclone filter path: ~/.basic-memory/.bmignore -> ~/.basic-memory/.bmignore.rclone
198 | rclone_filter_path = bmignore_path.parent / f"{bmignore_path.name}.rclone"
199 |
200 | # Skip regeneration if rclone file is newer than bmignore
201 | if rclone_filter_path.exists():
202 | bmignore_mtime = bmignore_path.stat().st_mtime
203 | rclone_mtime = rclone_filter_path.stat().st_mtime
204 | if rclone_mtime >= bmignore_mtime:
205 | return rclone_filter_path
206 |
207 | # Read .bmignore patterns
208 | patterns = []
209 | try:
210 | with bmignore_path.open("r", encoding="utf-8") as f:
211 | for line in f:
212 | line = line.strip()
213 | # Keep comments and empty lines
214 | if not line or line.startswith("#"):
215 | patterns.append(line)
216 | continue
217 |
218 | # Convert gitignore pattern to rclone filter syntax
219 | # gitignore: node_modules → rclone: - node_modules/**
220 | # gitignore: *.pyc → rclone: - *.pyc
221 | if "*" in line:
222 | # Pattern already has wildcard, just add exclude prefix
223 | patterns.append(f"- {line}")
224 | else:
225 | # Directory pattern - add /** for recursive exclude
226 | patterns.append(f"- {line}/**")
227 |
228 | except Exception:
229 | # If we can't read the file, create a minimal filter
230 | patterns = ["# Error reading .bmignore, using minimal filters", "- .git/**"]
231 |
232 | # Write rclone filter file
233 | rclone_filter_path.write_text("\n".join(patterns) + "\n")
234 |
235 | return rclone_filter_path
236 |
237 |
238 | def get_bisync_filter_path() -> Path:
239 | """Get path to bisync filter file.
240 |
241 | Uses ~/.basic-memory/.bmignore (converted to rclone format).
242 | The file is automatically created with default patterns on first use.
243 |
244 | Returns:
245 | Path to rclone filter file
246 | """
247 | return convert_bmignore_to_rclone_filters()
248 |
249 |
250 | def bisync_state_exists(tenant_id: str) -> bool:
251 | """Check if bisync state exists (has been initialized)."""
252 | state_path = get_bisync_state_path(tenant_id)
253 | return state_path.exists() and any(state_path.iterdir())
254 |
255 |
256 | def build_bisync_command(
257 | tenant_id: str,
258 | bucket_name: str,
259 | local_path: Path,
260 | profile: RcloneBisyncProfile,
261 | dry_run: bool = False,
262 | resync: bool = False,
263 | verbose: bool = False,
264 | ) -> list[str]:
265 | """Build rclone bisync command with profile settings."""
266 |
267 | # Sync with the entire bucket root (all projects)
268 | rclone_remote = f"basic-memory-{tenant_id}:{bucket_name}"
269 | filter_path = get_bisync_filter_path()
270 | state_path = get_bisync_state_path(tenant_id)
271 |
272 | # Ensure state directory exists
273 | state_path.mkdir(parents=True, exist_ok=True)
274 |
275 | cmd = [
276 | "rclone",
277 | "bisync",
278 | str(local_path),
279 | rclone_remote,
280 | "--create-empty-src-dirs",
281 | "--resilient",
282 | f"--conflict-resolve={profile.conflict_resolve}",
283 | f"--max-delete={profile.max_delete}",
284 | "--filters-file",
285 | str(filter_path),
286 | "--workdir",
287 | str(state_path),
288 | ]
289 |
290 | # Add verbosity flags
291 | if verbose:
292 | cmd.append("--verbose") # Full details with file-by-file output
293 | else:
294 | # Show progress bar during transfers
295 | cmd.append("--progress")
296 |
297 | if profile.check_access:
298 | cmd.append("--check-access")
299 |
300 | if dry_run:
301 | cmd.append("--dry-run")
302 |
303 | if resync:
304 | cmd.append("--resync")
305 |
306 | cmd.extend(profile.extra_args)
307 |
308 | return cmd
309 |
310 |
311 | def setup_cloud_bisync(sync_dir: Optional[str] = None) -> None:
312 | """Set up cloud bisync with rclone installation and configuration.
313 |
314 | Args:
315 | sync_dir: Optional custom sync directory path. If not provided, uses config default.
316 | """
317 | console.print("[bold blue]Basic Memory Cloud Bisync Setup[/bold blue]")
318 | console.print("Setting up bidirectional sync to your cloud tenant...\n")
319 |
320 | try:
321 | # Step 1: Install rclone
322 | console.print("[blue]Step 1: Installing rclone...[/blue]")
323 | install_rclone()
324 |
325 | # Step 2: Get mount info (for tenant_id, bucket)
326 | console.print("\n[blue]Step 2: Getting tenant information...[/blue]")
327 | tenant_info = asyncio.run(get_mount_info())
328 |
329 | tenant_id = tenant_info.tenant_id
330 | bucket_name = tenant_info.bucket_name
331 |
332 | console.print(f"[green]✓ Found tenant: {tenant_id}[/green]")
333 | console.print(f"[green]✓ Bucket: {bucket_name}[/green]")
334 |
335 | # Step 3: Generate credentials
336 | console.print("\n[blue]Step 3: Generating sync credentials...[/blue]")
337 | creds = asyncio.run(generate_mount_credentials(tenant_id))
338 |
339 | access_key = creds.access_key
340 | secret_key = creds.secret_key
341 |
342 | console.print("[green]✓ Generated secure credentials[/green]")
343 |
344 | # Step 4: Configure rclone
345 | console.print("\n[blue]Step 4: Configuring rclone...[/blue]")
346 | add_tenant_to_rclone_config(
347 | tenant_id=tenant_id,
348 | bucket_name=bucket_name,
349 | access_key=access_key,
350 | secret_key=secret_key,
351 | )
352 |
353 | # Step 5: Configure and create local directory
354 | console.print("\n[blue]Step 5: Configuring sync directory...[/blue]")
355 |
356 | # If custom sync_dir provided, save to config
357 | if sync_dir:
358 | config_manager = ConfigManager()
359 | config = config_manager.load_config()
360 | config.bisync_config["sync_dir"] = sync_dir
361 | config_manager.save_config(config)
362 | console.print("[green]✓ Saved custom sync directory to config[/green]")
363 |
364 | # Get bisync directory (from config or default)
365 | local_path = get_bisync_directory()
366 |
367 | # Validate bisync directory
368 | validate_bisync_directory(local_path)
369 |
370 | # Create directory
371 | local_path.mkdir(parents=True, exist_ok=True)
372 | console.print(f"[green]✓ Created sync directory: {local_path}[/green]")
373 |
374 | # Step 6: Perform initial resync
375 | console.print("\n[blue]Step 6: Performing initial sync...[/blue]")
376 | console.print("[yellow]This will establish the baseline for bidirectional sync.[/yellow]")
377 |
378 | run_bisync(
379 | tenant_id=tenant_id,
380 | bucket_name=bucket_name,
381 | local_path=local_path,
382 | profile_name="balanced",
383 | resync=True,
384 | )
385 |
386 | console.print("\n[bold green]✓ Bisync setup completed successfully![/bold green]")
387 | console.print("\nYour local files will now sync bidirectionally with the cloud!")
388 | console.print(f"\nLocal directory: {local_path}")
389 | console.print("\nUseful commands:")
390 | console.print(" bm sync # Run sync (recommended)")
391 | console.print(" bm sync --watch # Start watch mode")
392 | console.print(" bm cloud status # Check sync status")
393 | console.print(" bm cloud check # Verify file integrity")
394 | console.print(" bm cloud bisync --dry-run # Preview changes (advanced)")
395 |
396 | except (RcloneInstallError, BisyncError, CloudAPIError) as e:
397 | console.print(f"\n[red]Setup failed: {e}[/red]")
398 | raise typer.Exit(1)
399 | except Exception as e:
400 | console.print(f"\n[red]Unexpected error during setup: {e}[/red]")
401 | raise typer.Exit(1)
402 |
403 |
404 | def run_bisync(
405 | tenant_id: Optional[str] = None,
406 | bucket_name: Optional[str] = None,
407 | local_path: Optional[Path] = None,
408 | profile_name: str = "balanced",
409 | dry_run: bool = False,
410 | resync: bool = False,
411 | verbose: bool = False,
412 | ) -> bool:
413 | """Run rclone bisync with specified profile."""
414 |
415 | try:
416 | # Get tenant info if not provided
417 | if not tenant_id or not bucket_name:
418 | tenant_info = asyncio.run(get_mount_info())
419 | tenant_id = tenant_info.tenant_id
420 | bucket_name = tenant_info.bucket_name
421 |
422 | # Set default local path if not provided
423 | if not local_path:
424 | local_path = get_bisync_directory()
425 |
426 | # Validate bisync directory
427 | validate_bisync_directory(local_path)
428 |
429 | # Check if local path exists
430 | if not local_path.exists():
431 | raise BisyncError(
432 | f"Local directory {local_path} does not exist. Run 'basic-memory cloud bisync-setup' first."
433 | )
434 |
435 | # Get bisync profile
436 | if profile_name not in BISYNC_PROFILES:
437 | raise BisyncError(
438 | f"Unknown profile: {profile_name}. Available: {list(BISYNC_PROFILES.keys())}"
439 | )
440 |
441 | profile = BISYNC_PROFILES[profile_name]
442 |
443 | # Auto-register projects before sync (unless dry-run or resync)
444 | if not dry_run and not resync:
445 | try:
446 | console.print("[dim]Checking for new projects...[/dim]")
447 |
448 | # Fetch cloud projects and extract directory names from paths
449 | cloud_data = asyncio.run(fetch_cloud_projects())
450 | cloud_projects = cloud_data.projects
451 |
452 | # Extract directory names from cloud project paths
453 | # Compare directory names, not project names
454 | # Cloud path /app/data/basic-memory -> directory name "basic-memory"
455 | cloud_dir_names = set()
456 | for p in cloud_projects:
457 | path = p.path
458 | # Strip /app/data/ prefix if present (cloud mode)
459 | if path.startswith("/app/data/"):
460 | path = path[len("/app/data/") :]
461 | # Get the last segment (directory name)
462 | dir_name = Path(path).name
463 | cloud_dir_names.add(dir_name)
464 |
465 | # Scan local directories
466 | local_dirs = scan_local_directories(local_path)
467 |
468 | # Create missing cloud projects
469 | new_projects = []
470 | for dir_name in local_dirs:
471 | if dir_name not in cloud_dir_names:
472 | new_projects.append(dir_name)
473 |
474 | if new_projects:
475 | console.print(
476 | f"[blue]Found {len(new_projects)} new local project(s), creating on cloud...[/blue]"
477 | )
478 | for project_name in new_projects:
479 | try:
480 | asyncio.run(create_cloud_project(project_name))
481 | console.print(f"[green] ✓ Created project: {project_name}[/green]")
482 | except BisyncError as e:
483 | console.print(
484 | f"[yellow] ⚠ Could not create {project_name}: {e}[/yellow]"
485 | )
486 | else:
487 | console.print("[dim]All local projects already registered on cloud[/dim]")
488 |
489 | except Exception as e:
490 | console.print(f"[yellow]Warning: Project auto-registration failed: {e}[/yellow]")
491 | console.print("[yellow]Continuing with sync anyway...[/yellow]")
492 |
493 | # Check if first run and require resync
494 | if not resync and not bisync_state_exists(tenant_id) and not dry_run:
495 | raise BisyncError(
496 | "First bisync requires --resync to establish baseline. "
497 | "Run: basic-memory cloud bisync --resync"
498 | )
499 |
500 | # Build and execute bisync command
501 | bisync_cmd = build_bisync_command(
502 | tenant_id,
503 | bucket_name,
504 | local_path,
505 | profile,
506 | dry_run=dry_run,
507 | resync=resync,
508 | verbose=verbose,
509 | )
510 |
511 | if dry_run:
512 | console.print("[yellow]DRY RUN MODE - No changes will be made[/yellow]")
513 |
514 | console.print(
515 | f"[blue]Running bisync with profile '{profile_name}' ({profile.description})...[/blue]"
516 | )
517 | console.print(f"[dim]Command: {' '.join(bisync_cmd)}[/dim]")
518 | console.print() # Blank line before output
519 |
520 | # Stream output in real-time so user sees progress
521 | result = subprocess.run(bisync_cmd, text=True)
522 |
523 | if result.returncode != 0:
524 | raise BisyncError(f"Bisync command failed with code {result.returncode}")
525 |
526 | console.print() # Blank line after output
527 |
528 | if dry_run:
529 | console.print("[green]✓ Dry run completed successfully[/green]")
530 | elif resync:
531 | console.print("[green]✓ Initial sync baseline established[/green]")
532 | else:
533 | console.print("[green]✓ Sync completed successfully[/green]")
534 |
535 | # Notify container to refresh cache (if not dry run)
536 | if not dry_run:
537 | try:
538 | asyncio.run(notify_container_sync(tenant_id))
539 | except Exception as e:
540 | console.print(f"[yellow]Warning: Could not notify container: {e}[/yellow]")
541 |
542 | return True
543 |
544 | except BisyncError:
545 | raise
546 | except Exception as e:
547 | raise BisyncError(f"Unexpected error during bisync: {e}") from e
548 |
549 |
550 | async def notify_container_sync(tenant_id: str) -> None:
551 | """Sync all projects after bisync completes."""
552 | try:
553 | from basic_memory.cli.commands.command_utils import run_sync
554 |
555 | # Fetch all projects and sync each one
556 | cloud_data = await fetch_cloud_projects()
557 | projects = cloud_data.projects
558 |
559 | if not projects:
560 | console.print("[dim]No projects to sync[/dim]")
561 | return
562 |
563 | console.print(f"[blue]Notifying cloud to index {len(projects)} project(s)...[/blue]")
564 |
565 | for project in projects:
566 | project_name = project.name
567 | if project_name:
568 | try:
569 | await run_sync(project=project_name)
570 | except Exception as e:
571 | # Non-critical, log and continue
572 | console.print(f"[yellow] ⚠ Sync failed for {project_name}: {e}[/yellow]")
573 |
574 | console.print("[dim]Note: Cloud indexing has started and may take a few moments[/dim]")
575 |
576 | except Exception as e:
577 | # Non-critical, don't fail the bisync
578 | console.print(f"[yellow]Warning: Post-sync failed: {e}[/yellow]")
579 |
580 |
581 | def run_bisync_watch(
582 | tenant_id: Optional[str] = None,
583 | bucket_name: Optional[str] = None,
584 | local_path: Optional[Path] = None,
585 | profile_name: str = "balanced",
586 | interval_seconds: int = 60,
587 | ) -> None:
588 | """Run bisync in watch mode with periodic syncs."""
589 |
590 | console.print("[bold blue]Starting bisync watch mode[/bold blue]")
591 | console.print(f"Sync interval: {interval_seconds} seconds")
592 | console.print("Press Ctrl+C to stop\n")
593 |
594 | try:
595 | while True:
596 | try:
597 | start_time = time.time()
598 |
599 | run_bisync(
600 | tenant_id=tenant_id,
601 | bucket_name=bucket_name,
602 | local_path=local_path,
603 | profile_name=profile_name,
604 | )
605 |
606 | elapsed = time.time() - start_time
607 | console.print(f"[dim]Sync completed in {elapsed:.1f}s[/dim]")
608 |
609 | # Wait for next interval
610 | time.sleep(interval_seconds)
611 |
612 | except BisyncError as e:
613 | console.print(f"[red]Sync error: {e}[/red]")
614 | console.print(f"[yellow]Retrying in {interval_seconds} seconds...[/yellow]")
615 | time.sleep(interval_seconds)
616 |
617 | except KeyboardInterrupt:
618 | console.print("\n[yellow]Watch mode stopped[/yellow]")
619 |
620 |
621 | def show_bisync_status() -> None:
622 | """Show current bisync status and configuration."""
623 |
624 | try:
625 | # Get tenant info
626 | tenant_info = asyncio.run(get_mount_info())
627 | tenant_id = tenant_info.tenant_id
628 |
629 | local_path = get_bisync_directory()
630 | state_path = get_bisync_state_path(tenant_id)
631 |
632 | # Create status table
633 | table = Table(title="Cloud Bisync Status", show_header=True, header_style="bold blue")
634 | table.add_column("Property", style="green", min_width=20)
635 | table.add_column("Value", style="dim", min_width=30)
636 |
637 | # Check initialization status
638 | is_initialized = bisync_state_exists(tenant_id)
639 | init_status = (
640 | "[green]✓ Initialized[/green]" if is_initialized else "[red]✗ Not initialized[/red]"
641 | )
642 |
643 | table.add_row("Tenant ID", tenant_id)
644 | table.add_row("Local Directory", str(local_path))
645 | table.add_row("Status", init_status)
646 | table.add_row("State Directory", str(state_path))
647 |
648 | # Check for last sync info
649 | if is_initialized:
650 | # Look for most recent state file
651 | state_files = list(state_path.glob("*.lst"))
652 | if state_files:
653 | latest = max(state_files, key=lambda p: p.stat().st_mtime)
654 | last_sync = datetime.fromtimestamp(latest.stat().st_mtime)
655 | table.add_row("Last Sync", last_sync.strftime("%Y-%m-%d %H:%M:%S"))
656 |
657 | console.print(table)
658 |
659 | # Show bisync profiles
660 | console.print("\n[bold]Available bisync profiles:[/bold]")
661 | for name, profile in BISYNC_PROFILES.items():
662 | console.print(f" {name}: {profile.description}")
663 | console.print(f" - Conflict resolution: {profile.conflict_resolve}")
664 | console.print(f" - Max delete: {profile.max_delete} files")
665 |
666 | console.print("\n[dim]To use a profile: bm cloud bisync --profile <name>[/dim]")
667 |
668 | # Show setup instructions if not initialized
669 | if not is_initialized:
670 | console.print("\n[yellow]To initialize bisync, run:[/yellow]")
671 | console.print(" bm cloud setup")
672 | console.print(" or")
673 | console.print(" bm cloud bisync --resync")
674 |
675 | except Exception as e:
676 | console.print(f"[red]Error getting bisync status: {e}[/red]")
677 | raise typer.Exit(1)
678 |
679 |
680 | def run_check(
681 | tenant_id: Optional[str] = None,
682 | bucket_name: Optional[str] = None,
683 | local_path: Optional[Path] = None,
684 | one_way: bool = False,
685 | ) -> bool:
686 | """Check file integrity between local and cloud using rclone check.
687 |
688 | Args:
689 | tenant_id: Cloud tenant ID (auto-detected if not provided)
690 | bucket_name: S3 bucket name (auto-detected if not provided)
691 | local_path: Local bisync directory (uses config default if not provided)
692 | one_way: If True, only check for missing files on destination (faster)
693 |
694 | Returns:
695 | True if check passed (files match), False if differences found
696 | """
697 | try:
698 | # Check if rclone is installed
699 | from basic_memory.cli.commands.cloud.rclone_installer import is_rclone_installed
700 |
701 | if not is_rclone_installed():
702 | raise BisyncError(
703 | "rclone is not installed. Run 'bm cloud bisync-setup' first to set up cloud sync."
704 | )
705 |
706 | # Get tenant info if not provided
707 | if not tenant_id or not bucket_name:
708 | tenant_info = asyncio.run(get_mount_info())
709 | tenant_id = tenant_id or tenant_info.tenant_id
710 | bucket_name = bucket_name or tenant_info.bucket_name
711 |
712 | # Get local path from config
713 | if not local_path:
714 | local_path = get_bisync_directory()
715 |
716 | # Check if bisync is initialized
717 | if not bisync_state_exists(tenant_id):
718 | raise BisyncError(
719 | "Bisync not initialized. Run 'bm cloud bisync --resync' to establish baseline."
720 | )
721 |
722 | # Build rclone check command
723 | rclone_remote = f"basic-memory-{tenant_id}:{bucket_name}"
724 | filter_path = get_bisync_filter_path()
725 |
726 | cmd = [
727 | "rclone",
728 | "check",
729 | str(local_path),
730 | rclone_remote,
731 | "--filter-from",
732 | str(filter_path),
733 | ]
734 |
735 | if one_way:
736 | cmd.append("--one-way")
737 |
738 | console.print("[bold blue]Checking file integrity between local and cloud[/bold blue]")
739 | console.print(f"[dim]Local: {local_path}[/dim]")
740 | console.print(f"[dim]Remote: {rclone_remote}[/dim]")
741 | console.print(f"[dim]Command: {' '.join(cmd)}[/dim]")
742 | console.print()
743 |
744 | # Run check command
745 | result = subprocess.run(cmd, capture_output=True, text=True)
746 |
747 | # rclone check returns:
748 | # 0 = success (all files match)
749 | # non-zero = differences found or error
750 | if result.returncode == 0:
751 | console.print("[green]✓ All files match between local and cloud[/green]")
752 | return True
753 | else:
754 | console.print("[yellow]⚠ Differences found:[/yellow]")
755 | if result.stderr:
756 | console.print(result.stderr)
757 | if result.stdout:
758 | console.print(result.stdout)
759 | console.print("\n[dim]To sync differences, run: bm sync[/dim]")
760 | return False
761 |
762 | except BisyncError:
763 | raise
764 | except Exception as e:
765 | raise BisyncError(f"Check failed: {e}") from e
766 |
```
--------------------------------------------------------------------------------
/specs/SPEC-10 Unified Deployment Workflow and Event Tracking.md:
--------------------------------------------------------------------------------
```markdown
1 | ---
2 | title: 'SPEC-10: Unified Deployment Workflow and Event Tracking'
3 | type: spec
4 | permalink: specs/spec-10-unified-deployment-workflow-event-tracking
5 | tags:
6 | - workflow
7 | - deployment
8 | - event-sourcing
9 | - architecture
10 | - simplification
11 | ---
12 |
13 | # SPEC-10: Unified Deployment Workflow and Event Tracking
14 |
15 | ## Why
16 |
17 | We replaced a complex multi-workflow system with DBOS orchestration that was proving to be more trouble than it was worth. The previous architecture had four separate workflows (`tenant_provisioning`, `tenant_update`, `tenant_deployment`, `tenant_undeploy`) with overlapping logic, complex state management, and fragmented event tracking. DBOS added unnecessary complexity without providing sufficient value, leading to harder debugging and maintenance.
18 |
19 | **Problems Solved:**
20 | - **Framework Complexity**: DBOS configuration overhead and fighting framework limitations
21 | - **Code Duplication**: Multiple workflows implementing similar operations with duplicate logic
22 | - **Poor Observability**: Fragmented event tracking across workflow boundaries
23 | - **Maintenance Overhead**: Complex orchestration for fundamentally simple operations
24 | - **Debugging Difficulty**: Framework abstractions hiding simple Python stack traces
25 |
26 | ## What
27 |
28 | This spec documents the architectural simplification that consolidates tenant lifecycle management into a unified system with comprehensive event tracking.
29 |
30 | **Affected Areas:**
31 | - Tenant deployment workflows (provisioning, updates, undeploying)
32 | - Event sourcing and workflow tracking infrastructure
33 | - API endpoints for tenant operations
34 | - Database schema for workflow and event correlation
35 | - Integration testing for tenant lifecycle operations
36 |
37 | **Key Changes:**
38 | - **Removed DBOS entirely** - eliminated framework dependency and complexity
39 | - **Consolidated 4 workflows → 2 unified deployment workflows (deploy/undeploy)**
40 | - **Added workflow tracking system** with complete event correlation
41 | - **Simplified API surface** - single `/deploy` endpoint handles all scenarios
42 | - **Enhanced observability** through event sourcing with workflow grouping
43 |
44 | ## How (High Level)
45 |
46 | ### Architectural Philosophy
47 | **Embrace simplicity over framework complexity** - use well-structured Python with proper database design instead of complex orchestration frameworks.
48 |
49 | ### Core Components
50 |
51 | #### 1. Unified Deployment Workflow
52 | ```python
53 | class TenantDeploymentWorkflow:
54 | async def deploy_tenant_workflow(self, tenant_id: str, workflow_id: UUID, image_tag: str = None):
55 | # Single workflow handles both initial provisioning AND updates
56 | # Each step is idempotent and handles its own error recovery
57 | # Database transactions provide the durability we need
58 | await self.start_deployment_step(workflow_id, tenant_uuid, image_tag)
59 | await self.create_fly_app_step(workflow_id, tenant_uuid)
60 | await self.create_bucket_step(workflow_id, tenant_uuid)
61 | await self.deploy_machine_step(workflow_id, tenant_uuid, image_tag)
62 | await self.complete_deployment_step(workflow_id, tenant_uuid, image_tag, deployment_time)
63 | ```
64 |
65 | **Key Benefits:**
66 | - **Handles both provisioning and updates** in single workflow
67 | - **Idempotent operations** - safe to retry any step
68 | - **Clean error handling** via simple Python exceptions
69 | - **Resumable** - can restart from any failed step
70 |
71 | #### 2. Workflow Tracking System
72 |
73 | **Database Schema:**
74 | ```sql
75 | CREATE TABLE workflow (
76 | id UUID PRIMARY KEY,
77 | workflow_type VARCHAR(50) NOT NULL, -- 'tenant_deployment', 'tenant_undeploy'
78 | tenant_id UUID REFERENCES tenant(id),
79 | status VARCHAR(20) DEFAULT 'running', -- 'running', 'completed', 'failed'
80 | workflow_metadata JSONB DEFAULT '{}' -- image_tag, etc.
81 | );
82 |
83 | ALTER TABLE event ADD COLUMN workflow_id UUID REFERENCES workflow(id);
84 | ```
85 |
86 | **Event Correlation:**
87 | - Every workflow operation generates events tagged with `workflow_id`
88 | - Complete audit trail from workflow start to completion
89 | - Events grouped by workflow for easy reconstruction of operations
90 |
91 | #### 3. Parameter Standardization
92 | All workflow methods follow consistent signature pattern:
93 | ```python
94 | async def method_name(self, session: AsyncSession, workflow_id: UUID | None, tenant_id: UUID, ...)
95 | ```
96 |
97 | **Benefits:**
98 | - **Consistent event tagging** - all events properly correlated
99 | - **Clear method contracts** - workflow_id always first parameter
100 | - **Type safety** - proper UUID handling throughout
101 |
102 | ### Implementation Strategy
103 |
104 | #### Phase 1: Workflow Consolidation ✅ COMPLETED
105 | - [x] **Remove DBOS dependency** - eliminated dbos_config.py and all DBOS imports
106 | - [x] **Create unified TenantDeploymentWorkflow** - handles both provisioning and updates
107 | - [x] **Remove legacy workflows** - deleted tenant_provisioning.py, tenant_update.py
108 | - [x] **Simplify API endpoints** - consolidated to single `/deploy` endpoint
109 | - [x] **Update integration tests** - comprehensive edge case testing
110 |
111 | #### Phase 2: Workflow Tracking System ✅ COMPLETED
112 | - [x] **Database migration** - added workflow table and event.workflow_id foreign key
113 | - [x] **Workflow repository** - CRUD operations for workflow records
114 | - [x] **Event correlation** - all workflow events tagged with workflow_id
115 | - [x] **Comprehensive testing** - workflow lifecycle and event grouping tests
116 |
117 | #### Phase 3: Parameter Standardization ✅ COMPLETED
118 | - [x] **Standardize method signatures** - workflow_id as first parameter pattern
119 | - [x] **Fix event tagging** - ensure all workflow events properly correlated
120 | - [x] **Update service methods** - consistent parameter order across tenant_service
121 | - [x] **Integration test validation** - verify complete event sequences
122 |
123 | ### Architectural Benefits
124 |
125 | #### Code Simplification
126 | - **39 files changed**: 2,247 additions, 3,256 deletions (net -1,009 lines)
127 | - **Eliminated framework complexity** - no more DBOS configuration or abstractions
128 | - **Consolidated logic** - single deployment workflow vs 4 separate workflows
129 | - **Cleaner API surface** - unified endpoint vs multiple workflow-specific endpoints
130 |
131 | #### Enhanced Observability
132 | - **Complete event correlation** - every workflow event tagged with workflow_id
133 | - **Audit trail reconstruction** - can trace entire tenant lifecycle through events
134 | - **Workflow status tracking** - running/completed/failed states in database
135 | - **Comprehensive testing** - edge cases covered with real infrastructure
136 |
137 | #### Operational Benefits
138 | - **Simpler debugging** - plain Python stack traces vs framework abstractions
139 | - **Reduced dependencies** - one less complex framework to maintain
140 | - **Better error handling** - explicit exception handling vs framework magic
141 | - **Easier maintenance** - straightforward Python code vs orchestration complexity
142 |
143 | ## How to Evaluate
144 |
145 | ### Success Criteria
146 |
147 | #### Functional Completeness ✅ VERIFIED
148 | - [x] **Unified deployment workflow** handles both initial provisioning and updates
149 | - [x] **Undeploy workflow** properly integrated with event tracking
150 | - [x] **All operations idempotent** - safe to retry any step without duplication
151 | - [x] **Complete tenant lifecycle** - provision → active → update → undeploy
152 |
153 | #### Event Tracking and Correlation ✅ VERIFIED
154 | - [x] **All workflow events tagged** with proper workflow_id
155 | - [x] **Event sequence verification** - tests assert exact event order and content
156 | - [x] **Workflow grouping** - events can be queried by workflow_id for complete audit trail
157 | - [x] **Cross-workflow isolation** - deployment vs undeploy events properly separated
158 |
159 | #### Database Schema and Performance ✅ VERIFIED
160 | - [x] **Migration applied** - workflow table and event.workflow_id column created
161 | - [x] **Proper indexing** - performance optimized queries on workflow_type, tenant_id, status
162 | - [x] **Foreign key constraints** - referential integrity between workflows and events
163 | - [x] **Database triggers** - updated_at timestamp automation
164 |
165 | #### Test Coverage ✅ COMPREHENSIVE
166 | - [x] **Unit tests**: 4 workflow tracking tests covering lifecycle and event grouping
167 | - [x] **Integration tests**: Real infrastructure testing with Fly.io resources
168 | - [x] **Edge case coverage**: Failed deployments, partial state recovery, resource conflicts
169 | - [x] **Event sequence verification**: Exact event order and content validation
170 |
171 | ### Testing Procedure
172 |
173 | #### Unit Test Validation ✅ PASSING
174 | ```bash
175 | cd apps/cloud && pytest tests/test_workflow_tracking.py -v
176 | # 4/4 tests passing - workflow lifecycle and event grouping
177 | ```
178 |
179 | #### Integration Test Validation ✅ PASSING
180 | ```bash
181 | cd apps/cloud && pytest tests/integration/test_tenant_workflow_deployment_integration.py -v
182 | cd apps/cloud && pytest tests/integration/test_tenant_workflow_undeploy_integration.py -v
183 | # Comprehensive real infrastructure testing with actual Fly.io resources
184 | # Tests provision → deploy → update → undeploy → cleanup cycles
185 | ```
186 |
187 | ### Performance Metrics
188 |
189 | #### Code Metrics ✅ ACHIEVED
190 | - **Net code reduction**: -1,009 lines (3,256 deletions, 2,247 additions)
191 | - **Workflow consolidation**: 4 workflows → 1 unified deployment workflow
192 | - **Dependency reduction**: Removed DBOS framework dependency entirely
193 | - **API simplification**: Multiple endpoints → single `/deploy` endpoint
194 |
195 | #### Operational Metrics ✅ VERIFIED
196 | - **Event correlation**: 100% of workflow events properly tagged with workflow_id
197 | - **Audit trail completeness**: Full tenant lifecycle traceable through event sequences
198 | - **Error handling**: Clean Python exceptions vs framework abstractions
199 | - **Debugging simplicity**: Direct stack traces vs orchestration complexity
200 |
201 | ### Implementation Status: ✅ COMPLETE
202 |
203 | All phases completed successfully with comprehensive testing and verification:
204 |
205 | **Phase 1 - Workflow Consolidation**: ✅ COMPLETE
206 | - Removed DBOS dependency and consolidated workflows
207 | - Unified deployment workflow handles all scenarios
208 | - Comprehensive integration testing with real infrastructure
209 |
210 | **Phase 2 - Workflow Tracking**: ✅ COMPLETE
211 | - Database schema implemented with proper indexing
212 | - Event correlation system fully functional
213 | - Complete audit trail capability verified
214 |
215 | **Phase 3 - Parameter Standardization**: ✅ COMPLETE
216 | - Consistent method signatures across all workflow methods
217 | - All events properly tagged with workflow_id
218 | - Type safety verified across entire codebase
219 |
220 | **Phase 4 - Asynchronous Job Queuing**:
221 | **Goal**: Transform synchronous deployment workflows into background jobs for better user experience and system reliability.
222 |
223 | **Current Problem**:
224 | - Deployment API calls are synchronous - users wait for entire tenant provisioning (30-60 seconds)
225 | - No retry mechanism for failed operations
226 | - HTTP timeouts on long-running deployments
227 | - Poor user experience during infrastructure provisioning
228 |
229 | **Solution**: Redis-backed job queue with arq for reliable background processing
230 |
231 | #### Architecture Overview
232 | ```python
233 | # API Layer: Return immediately with job tracking
234 | @router.post("/{tenant_id}/deploy")
235 | async def deploy_tenant(tenant_id: UUID):
236 | # Create workflow record in Postgres
237 | workflow = await workflow_repo.create_workflow("tenant_deployment", tenant_id)
238 |
239 | # Enqueue job in Redis
240 | job = await arq_pool.enqueue_job('deploy_tenant_task', tenant_id, workflow.id)
241 |
242 | # Return job ID immediately
243 | return {"job_id": job.job_id, "workflow_id": workflow.id, "status": "queued"}
244 |
245 | # Background Worker: Process via existing unified workflow
246 | async def deploy_tenant_task(ctx, tenant_id: str, workflow_id: str):
247 | # Existing workflow logic - zero changes needed!
248 | await workflow_manager.deploy_tenant(UUID(tenant_id), workflow_id=UUID(workflow_id))
249 | ```
250 |
251 | #### Implementation Tasks
252 |
253 | **Phase 4.1: Core Job Queue Setup** ✅ COMPLETED
254 | - [x] **Add arq dependency** - integrated Redis job queue with existing infrastructure
255 | - [x] **Create job definitions** - wrapped existing deployment/undeploy workflows as arq tasks
256 | - [x] **Update API endpoints** - updated provisioning endpoints to return job IDs instead of waiting for completion
257 | - [x] **JobQueueService implementation** - service layer for job enqueueing and status tracking
258 | - [x] **Job status tracking** - integrated with existing workflow table for status updates
259 | - [x] **Comprehensive testing** - 18 tests covering positive, negative, and edge cases
260 |
261 | **Phase 4.2: Background Worker Implementation** ✅ COMPLETED
262 | - [x] **Job status API** - GET /jobs/{job_id}/status endpoint integrated with JobQueueService
263 | - [x] **Background worker process** - arq worker to process queued jobs with proper settings and Redis configuration
264 | - [x] **Worker settings and configuration** - WorkerSettings class with proper timeouts, max jobs, and error handling
265 | - [x] **Fix API endpoints** - updated job status API to use JobQueueService instead of direct Redis access
266 | - [x] **Integration testing** - comprehensive end-to-end testing with real ARQ workers and Fly.io infrastructure
267 | - [x] **Worker entry points** - dual-purpose entrypoint.sh script and __main__.py module support for both API and worker processes
268 | - [x] **Test fixture updates** - fixed all API and service test fixtures to work with job queue dependencies
269 | - [x] **AsyncIO event loop fixes** - resolved event loop issues in integration tests for subprocess worker compatibility
270 | - [x] **Complete test coverage** - all 46 tests passing across unit, integration, and API test suites
271 | - [x] **Type safety verification** - 0 type checking errors across entire ARQ job queue implementation
272 |
273 | #### Phase 4.2 Implementation Summary ✅ COMPLETE
274 |
275 | **Core ARQ Job Queue System:**
276 | - **JobQueueService** - Centralized service for job enqueueing, status tracking, and Redis pool management
277 | - **deployment_jobs.py** - ARQ job functions that wrap existing deployment/undeploy workflows
278 | - **Worker Settings** - Production-ready ARQ configuration with proper timeouts and error handling
279 | - **Dual-Process Architecture** - Single Docker image with entrypoint.sh supporting both API and worker modes
280 |
281 | **Key Files Added:**
282 | - `apps/cloud/src/basic_memory_cloud/jobs/` - Complete job queue implementation (7 files)
283 | - `apps/cloud/entrypoint.sh` - Dual-purpose Docker container entry point
284 | - `apps/cloud/tests/integration/test_worker_integration.py` - Real infrastructure integration tests
285 | - `apps/cloud/src/basic_memory_cloud/schemas/job_responses.py` - API response schemas
286 |
287 | **API Integration:**
288 | - Provisioning endpoints return job IDs immediately instead of blocking for 60+ seconds
289 | - Job status API endpoints for real-time monitoring of deployment progress
290 | - Proper error handling and job failure scenarios with detailed error messages
291 |
292 | **Testing Achievement:**
293 | - **46 total tests passing** across all test suites (unit, integration, API, services)
294 | - **Real infrastructure testing** - ARQ workers process actual Fly.io deployments
295 | - **Event loop safety** - Fixed asyncio issues for subprocess worker compatibility
296 | - **Test fixture updates** - All fixtures properly support job queue dependencies
297 | - **Type checking** - 0 errors across entire codebase
298 |
299 | **Technical Metrics:**
300 | - **38 files changed** - +1,736 insertions, -334 deletions
301 | - **Integration test runtime** - ~18 seconds with real ARQ workers and Fly.io verification
302 | - **Event loop isolation** - Proper async session management for subprocess compatibility
303 | - **Redis integration** - Production-ready Redis configuration with connection pooling
304 |
305 | **Phase 4.3: Production Hardening** ✅ COMPLETED
306 | - [x] **Configure Upstash Redis** - production Redis setup on Fly.io
307 | - [x] **Retry logic for external APIs** - exponential backoff for flaky Tigris IAM operations
308 | - [x] **Monitoring and observability** - comprehensive Redis queue monitoring with CLI tools
309 | - [x] **Error handling improvements** - graceful handling of expected API errors with appropriate log levels
310 | - [x] **CLI tooling enhancements** - bulk update commands for CI/CD automation
311 | - [x] **Documentation improvements** - comprehensive monitoring guide with Redis patterns
312 | - [x] **Job uniqueness** - ARQ-based duplicate prevention for tenant operations
313 | - [ ] **Worker scaling** - multiple arq workers for parallel job processing
314 | - [ ] **Job persistence** - ensure jobs survive Redis/worker restarts
315 | - [ ] **Error alerting** - notifications for failed deployment jobs
316 |
317 | **Phase 4.4: Advanced Features** (Future)
318 | - [ ] **Job scheduling** - deploy tenants at specific times
319 | - [ ] **Priority queues** - urgent deployments processed first
320 | - [ ] **Batch operations** - bulk tenant deployments
321 | - [ ] **Job dependencies** - deployment → configuration → activation chains
322 |
323 | #### Benefits Achieved ✅ REALIZED
324 |
325 | **User Experience Improvements:**
326 | - **Immediate API responses** - users get job ID instantly vs waiting 60+ seconds for deployment completion
327 | - **Real-time job tracking** - status API provides live updates on deployment progress
328 | - **Better error visibility** - detailed error messages and job failure tracking
329 | - **CI/CD automation ready** - bulk update commands for automated tenant deployments
330 |
331 | **System Reliability:**
332 | - **Redis persistence** - jobs survive Redis/worker restarts with proper queue durability
333 | - **Idempotent job processing** - jobs can be safely retried without side effects
334 | - **Event loop isolation** - worker processes operate independently from API server
335 | - **Retry resilience** - exponential backoff for flaky external API calls (3 attempts, 1s/2s delays)
336 | - **Graceful error handling** - expected API errors logged at INFO level, unexpected at ERROR level
337 | - **Job uniqueness** - prevent duplicate tenant operations with ARQ's built-in uniqueness feature
338 |
339 | **Operational Benefits:**
340 | - **Horizontal scaling ready** - architecture supports adding more workers for parallel processing
341 | - **Comprehensive testing** - real infrastructure integration tests ensure production reliability
342 | - **Type safety** - full type checking prevents runtime errors in job processing
343 | - **Clean separation** - API and worker processes use same codebase with different entry points
344 | - **Queue monitoring** - Redis CLI integration for real-time queue activity monitoring
345 | - **Comprehensive documentation** - detailed monitoring guide with Redis pattern explanations
346 |
347 | **Development Benefits:**
348 | - **Zero workflow changes** - existing deployment/undeploy workflows work unchanged as background jobs
349 | - **Async/await native** - modern Python asyncio patterns throughout the implementation
350 | - **Event correlation preserved** - all existing workflow tracking and event sourcing continues to work
351 | - **Enhanced CLI tooling** - unified tenant commands with proper endpoint routing
352 | - **Database integrity** - proper foreign key constraint handling in tenant deletion
353 |
354 | #### Infrastructure Requirements
355 | - **Local**: Redis via docker-compose (already exists) ✅
356 | - **Production**: Upstash Redis on Fly.io (already configured) ✅
357 | - **Workers**: arq worker processes (new deployment target)
358 | - **Monitoring**: Job status dashboard (simple web interface)
359 |
360 | #### API Evolution
361 | ```python
362 | # Before: Synchronous (blocks for 60+ seconds)
363 | POST /tenant/{id}/deploy → {status: "active", machine_id: "..."}
364 |
365 | # After: Asynchronous (returns immediately)
366 | POST /tenant/{id}/deploy → {job_id: "uuid", workflow_id: "uuid", status: "queued"}
367 | GET /jobs/{job_id}/status → {status: "running", progress: "deploying_machine", workflow_id: "uuid"}
368 | GET /workflows/{workflow_id}/events → [...] # Existing event tracking works unchanged
369 | ```
370 |
371 | **Technology Choice**: **arq (Redis)** over pgqueuer
372 | - **Existing Redis infrastructure** - Upstash + docker-compose already configured
373 | - **Better ecosystem** - monitoring tools, documentation, community
374 | - **Made by pydantic team** - aligns with existing Python stack
375 | - **Hybrid approach** - Redis for queue operations + Postgres for workflow state
376 |
377 | #### Job Uniqueness Implementation
378 |
379 | **Problem**: Multiple concurrent deployment requests for the same tenant could create duplicate jobs, wasting resources and potentially causing conflicts.
380 |
381 | **Solution**: Leverage ARQ's built-in job uniqueness feature using predictable job IDs:
382 |
383 | ```python
384 | # JobQueueService implementation
385 | async def enqueue_deploy_job(self, tenant_id: UUID, image_tag: str | None = None) -> str:
386 | unique_job_id = f"deploy-{tenant_id}"
387 |
388 | job = await self.redis_pool.enqueue_job(
389 | "deploy_tenant_job",
390 | str(tenant_id),
391 | image_tag,
392 | _job_id=unique_job_id, # ARQ prevents duplicates
393 | )
394 |
395 | if job is None:
396 | # Job already exists - return existing job ID
397 | return unique_job_id
398 | else:
399 | # New job created - return ARQ job ID
400 | return job.job_id
401 | ```
402 |
403 | **Key Features:**
404 | - **Predictable Job IDs**: `deploy-{tenant_id}`, `undeploy-{tenant_id}`
405 | - **Duplicate Prevention**: ARQ returns `None` for duplicate job IDs
406 | - **Graceful Handling**: Return existing job ID instead of raising errors
407 | - **Idempotent Operations**: Safe to retry deployment requests
408 | - **Clear Logging**: Distinguish "Enqueued new" vs "Found existing" jobs
409 |
410 | **Benefits:**
411 | - Prevents resource waste from duplicate deployments
412 | - Eliminates race conditions from concurrent requests
413 | - Makes job monitoring more predictable with consistent IDs
414 | - Provides natural deduplication without complex locking mechanisms
415 |
416 |
417 | ## Notes
418 |
419 | ### Design Philosophy Lessons
420 | - **Simplicity beats framework magic** - removing DBOS made the system more reliable and debuggable
421 | - **Event sourcing > complex orchestration** - database-backed event tracking provides better observability than framework abstractions
422 | - **Idempotent operations > resumable workflows** - each step handling its own retry logic is simpler than framework-managed resumability
423 | - **Explicit error handling > framework exception handling** - Python exceptions are clearer than orchestration framework error states
424 |
425 | ### Future Considerations
426 | - **Monitoring integration** - workflow tracking events could feed into observability systems
427 | - **Performance optimization** - event querying patterns may benefit from additional indexing
428 | - **Audit compliance** - complete event trail supports regulatory requirements
429 | - **Operational dashboards** - workflow status could drive tenant health monitoring
430 |
431 | ### Related Specifications
432 | - **SPEC-8**: TigrisFS Integration - bucket provisioning integrated with deployment workflow
433 | - **SPEC-1**: Specification-Driven Development Process - this spec follows the established format
434 |
435 | ## Observations
436 |
437 | - [architecture] Removing framework complexity led to more maintainable system #simplification
438 | - [workflow] Single unified deployment workflow handles both provisioning and updates #consolidation
439 | - [observability] Event sourcing with workflow correlation provides complete audit trail #event-tracking
440 | - [database] Foreign key relationships between workflows and events enable powerful queries #schema-design
441 | - [testing] Integration tests with real infrastructure catch edge cases that unit tests miss #testing-strategy
442 | - [parameters] Consistent method signatures (workflow_id first) reduce cognitive overhead #api-design
443 | - [maintenance] Fewer workflows and dependencies reduce long-term maintenance burden #operational-excellence
444 | - [debugging] Plain Python exceptions are clearer than framework abstraction layers #developer-experience
445 | - [resilience] Exponential backoff retry patterns handle flaky external API calls gracefully #error-handling
446 | - [monitoring] Redis queue monitoring provides real-time operational visibility #observability
447 | - [ci-cd] Bulk update commands enable automated tenant deployments in continuous delivery pipelines #automation
448 | - [documentation] Comprehensive monitoring guides reduce operational learning curve #knowledge-management
449 | - [error-logging] Context-aware log levels (INFO for expected errors, ERROR for unexpected) improve signal-to-noise ratio #logging-strategy
450 | - [job-uniqueness] ARQ job uniqueness with predictable tenant-based IDs prevents duplicate operations and resource waste #deduplication
451 |
452 | ## Implementation Notes
453 |
454 | ### Configuration Integration
455 | - **Redis Configuration**: Add Redis settings to existing `apps/cloud/src/basic_memory_cloud/config.py`
456 | - **Local Development**: Leverage existing Redis setup from `docker-compose.yml`
457 | - **Production**: Use Upstash Redis configuration for production environments
458 |
459 | ### Docker Entrypoint Strategy
460 | Create `entrypoint.sh` script to toggle between API server and worker processes using single Docker image:
461 |
462 | ```bash
463 | #!/bin/bash
464 |
465 | # Entrypoint script for Basic Memory Cloud service
466 | # Supports multiple process types: api, worker
467 |
468 | set -e
469 |
470 | case "$1" in
471 | "api")
472 | echo "Starting Basic Memory Cloud API server..."
473 | exec uvicorn basic_memory_cloud.main:app \
474 | --host 0.0.0.0 \
475 | --port 8000 \
476 | --log-level info
477 | ;;
478 | "worker")
479 | echo "Starting Basic Memory Cloud ARQ worker..."
480 | # For ARQ worker implementation
481 | exec python -m arq basic_memory_cloud.jobs.settings.WorkerSettings
482 | ;;
483 | *)
484 | echo "Usage: $0 {api|worker}"
485 | echo " api - Start the FastAPI server"
486 | echo " worker - Start the ARQ worker"
487 | exit 1
488 | ;;
489 | esac
490 | ```
491 |
492 | ### Fly.io Process Groups Configuration
493 | Use separate machine groups for API and worker processes with independent scaling:
494 |
495 | ```toml
496 | # fly.toml app configuration for basic-memory-cloud
497 | app = 'basic-memory-cloud-dev-basic-machines'
498 | primary_region = 'dfw'
499 | org = 'basic-machines'
500 | kill_signal = 'SIGINT'
501 | kill_timeout = '5s'
502 |
503 | [build]
504 |
505 | # Process groups for API server and worker
506 | [processes]
507 | api = "api"
508 | worker = "worker"
509 |
510 | # Machine scaling configuration
511 | [[machine]]
512 | size = 'shared-cpu-1x'
513 | processes = ['api']
514 | min_machines_running = 1
515 | auto_stop_machines = false
516 | auto_start_machines = true
517 |
518 | [[machine]]
519 | size = 'shared-cpu-1x'
520 | processes = ['worker']
521 | min_machines_running = 1
522 | auto_stop_machines = false
523 | auto_start_machines = true
524 |
525 | [env]
526 | # Python configuration
527 | PYTHONUNBUFFERED = '1'
528 | PYTHONPATH = '/app'
529 |
530 | # Logging configuration
531 | LOG_LEVEL = 'DEBUG'
532 |
533 | # Redis configuration for ARQ
534 | REDIS_URL = 'redis://basic-memory-cloud-redis.upstash.io'
535 |
536 | # Database configuration
537 | DATABASE_HOST = 'basic-memory-cloud-db-dev-basic-machines.internal'
538 | DATABASE_PORT = '5432'
539 | DATABASE_NAME = 'basic_memory_cloud'
540 | DATABASE_USER = 'postgres'
541 | DATABASE_SSL = 'true'
542 |
543 | # Worker configuration
544 | ARQ_MAX_JOBS = '10'
545 | ARQ_KEEP_RESULT = '3600'
546 |
547 | # Fly.io configuration
548 | FLY_ORG = 'basic-machines'
549 | FLY_REGION = 'dfw'
550 |
551 | # Internal service - no external HTTP exposure for worker
552 | # API accessible via basic-memory-cloud-dev-basic-machines.flycast:8000
553 |
554 | [[vm]]
555 | size = 'shared-cpu-1x'
556 | ```
557 |
558 | ### Benefits of This Architecture
559 | - **Single Docker Image**: Both API and worker use same container with different entrypoints
560 | - **Independent Scaling**: Scale API and worker processes separately based on demand
561 | - **Clean Separation**: Web traffic handling separate from background job processing
562 | - **Existing Infrastructure**: Leverages current PostgreSQL + Redis setup without complexity
563 | - **Hybrid State Management**: Redis for queue operations, PostgreSQL for persistent workflow tracking
564 |
565 | ## Relations
566 |
567 | - implements [[SPEC-8 TigrisFS Integration]]
568 | - follows [[SPEC-1 Specification-Driven Development Process]]
569 | - supersedes previous multi-workflow architecture
570 |
```
--------------------------------------------------------------------------------
/tests/repository/test_entity_repository.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for the EntityRepository."""
2 |
3 | from datetime import datetime, timezone
4 |
5 | import pytest
6 | import pytest_asyncio
7 | from sqlalchemy import select
8 |
9 | from basic_memory import db
10 | from basic_memory.models import Entity, Observation, Relation, Project
11 | from basic_memory.repository.entity_repository import EntityRepository
12 | from basic_memory.utils import generate_permalink
13 |
14 |
15 | @pytest_asyncio.fixture
16 | async def entity_with_observations(session_maker, sample_entity):
17 | """Create an entity with observations."""
18 | async with db.scoped_session(session_maker) as session:
19 | observations = [
20 | Observation(
21 | entity_id=sample_entity.id,
22 | content="First observation",
23 | ),
24 | Observation(
25 | entity_id=sample_entity.id,
26 | content="Second observation",
27 | ),
28 | ]
29 | session.add_all(observations)
30 | return sample_entity
31 |
32 |
33 | @pytest_asyncio.fixture
34 | async def related_results(session_maker, test_project: Project):
35 | """Create entities with relations between them."""
36 | async with db.scoped_session(session_maker) as session:
37 | source = Entity(
38 | project_id=test_project.id,
39 | title="source",
40 | entity_type="test",
41 | permalink="source/source",
42 | file_path="source/source.md",
43 | content_type="text/markdown",
44 | created_at=datetime.now(timezone.utc),
45 | updated_at=datetime.now(timezone.utc),
46 | )
47 | target = Entity(
48 | project_id=test_project.id,
49 | title="target",
50 | entity_type="test",
51 | permalink="target/target",
52 | file_path="target/target.md",
53 | content_type="text/markdown",
54 | created_at=datetime.now(timezone.utc),
55 | updated_at=datetime.now(timezone.utc),
56 | )
57 | session.add(source)
58 | session.add(target)
59 | await session.flush()
60 |
61 | relation = Relation(
62 | from_id=source.id,
63 | to_id=target.id,
64 | to_name=target.title,
65 | relation_type="connects_to",
66 | )
67 | session.add(relation)
68 |
69 | return source, target, relation
70 |
71 |
72 | @pytest.mark.asyncio
73 | async def test_create_entity(entity_repository: EntityRepository):
74 | """Test creating a new entity"""
75 | entity_data = {
76 | "project_id": entity_repository.project_id,
77 | "title": "Test",
78 | "entity_type": "test",
79 | "permalink": "test/test",
80 | "file_path": "test/test.md",
81 | "content_type": "text/markdown",
82 | "created_at": datetime.now(timezone.utc),
83 | "updated_at": datetime.now(timezone.utc),
84 | }
85 | entity = await entity_repository.create(entity_data)
86 |
87 | # Verify returned object
88 | assert entity.id is not None
89 | assert entity.title == "Test"
90 | assert isinstance(entity.created_at, datetime)
91 | assert isinstance(entity.updated_at, datetime)
92 |
93 | # Verify in database
94 | found = await entity_repository.find_by_id(entity.id)
95 | assert found is not None
96 | assert found.id is not None
97 | assert found.id == entity.id
98 | assert found.title == entity.title
99 |
100 | # assert relations are eagerly loaded
101 | assert len(entity.observations) == 0
102 | assert len(entity.relations) == 0
103 |
104 |
105 | @pytest.mark.asyncio
106 | async def test_create_all(entity_repository: EntityRepository):
107 | """Test creating a new entity"""
108 | entity_data = [
109 | {
110 | "project_id": entity_repository.project_id,
111 | "title": "Test_1",
112 | "entity_type": "test",
113 | "permalink": "test/test-1",
114 | "file_path": "test/test_1.md",
115 | "content_type": "text/markdown",
116 | "created_at": datetime.now(timezone.utc),
117 | "updated_at": datetime.now(timezone.utc),
118 | },
119 | {
120 | "project_id": entity_repository.project_id,
121 | "title": "Test-2",
122 | "entity_type": "test",
123 | "permalink": "test/test-2",
124 | "file_path": "test/test_2.md",
125 | "content_type": "text/markdown",
126 | "created_at": datetime.now(timezone.utc),
127 | "updated_at": datetime.now(timezone.utc),
128 | },
129 | ]
130 | entities = await entity_repository.create_all(entity_data)
131 |
132 | assert len(entities) == 2
133 | entity = entities[0]
134 |
135 | # Verify in database
136 | found = await entity_repository.find_by_id(entity.id)
137 | assert found is not None
138 | assert found.id is not None
139 | assert found.id == entity.id
140 | assert found.title == entity.title
141 |
142 | # assert relations are eagerly loaded
143 | assert len(entity.observations) == 0
144 | assert len(entity.relations) == 0
145 |
146 |
147 | @pytest.mark.asyncio
148 | async def test_find_by_id(entity_repository: EntityRepository, sample_entity: Entity):
149 | """Test finding an entity by ID"""
150 | found = await entity_repository.find_by_id(sample_entity.id)
151 | assert found is not None
152 | assert found.id == sample_entity.id
153 | assert found.title == sample_entity.title
154 |
155 | # Verify against direct database query
156 | async with db.scoped_session(entity_repository.session_maker) as session:
157 | stmt = select(Entity).where(Entity.id == sample_entity.id)
158 | result = await session.execute(stmt)
159 | db_entity = result.scalar_one()
160 | assert db_entity.id == found.id
161 | assert db_entity.title == found.title
162 |
163 |
164 | @pytest.mark.asyncio
165 | async def test_update_entity(entity_repository: EntityRepository, sample_entity: Entity):
166 | """Test updating an entity"""
167 | updated = await entity_repository.update(sample_entity.id, {"title": "Updated title"})
168 | assert updated is not None
169 | assert updated.title == "Updated title"
170 |
171 | # Verify in database
172 | async with db.scoped_session(entity_repository.session_maker) as session:
173 | stmt = select(Entity).where(Entity.id == sample_entity.id)
174 | result = await session.execute(stmt)
175 | db_entity = result.scalar_one()
176 | assert db_entity.title == "Updated title"
177 |
178 |
179 | @pytest.mark.asyncio
180 | async def test_delete_entity(entity_repository: EntityRepository, sample_entity):
181 | """Test deleting an entity."""
182 | result = await entity_repository.delete(sample_entity.id)
183 | assert result is True
184 |
185 | # Verify deletion
186 | deleted = await entity_repository.find_by_id(sample_entity.id)
187 | assert deleted is None
188 |
189 |
190 | @pytest.mark.asyncio
191 | async def test_delete_entity_with_observations(
192 | entity_repository: EntityRepository, entity_with_observations
193 | ):
194 | """Test deleting an entity cascades to its observations."""
195 | entity = entity_with_observations
196 |
197 | result = await entity_repository.delete(entity.id)
198 | assert result is True
199 |
200 | # Verify entity deletion
201 | deleted = await entity_repository.find_by_id(entity.id)
202 | assert deleted is None
203 |
204 | # Verify observations were cascaded
205 | async with db.scoped_session(entity_repository.session_maker) as session:
206 | query = select(Observation).filter(Observation.entity_id == entity.id)
207 | result = await session.execute(query)
208 | remaining_observations = result.scalars().all()
209 | assert len(remaining_observations) == 0
210 |
211 |
212 | @pytest.mark.asyncio
213 | async def test_delete_entities_by_type(entity_repository: EntityRepository, sample_entity):
214 | """Test deleting entities by type."""
215 | result = await entity_repository.delete_by_fields(entity_type=sample_entity.entity_type)
216 | assert result is True
217 |
218 | # Verify deletion
219 | async with db.scoped_session(entity_repository.session_maker) as session:
220 | query = select(Entity).filter(Entity.entity_type == sample_entity.entity_type)
221 | result = await session.execute(query)
222 | remaining = result.scalars().all()
223 | assert len(remaining) == 0
224 |
225 |
226 | @pytest.mark.asyncio
227 | async def test_delete_entity_with_relations(entity_repository: EntityRepository, related_results):
228 | """Test deleting an entity cascades to its relations."""
229 | source, target, relation = related_results
230 |
231 | # Delete source entity
232 | result = await entity_repository.delete(source.id)
233 | assert result is True
234 |
235 | # Verify relation was cascaded
236 | async with db.scoped_session(entity_repository.session_maker) as session:
237 | query = select(Relation).filter(Relation.from_id == source.id)
238 | result = await session.execute(query)
239 | remaining_relations = result.scalars().all()
240 | assert len(remaining_relations) == 0
241 |
242 | # Verify target entity still exists
243 | target_exists = await entity_repository.find_by_id(target.id)
244 | assert target_exists is not None
245 |
246 |
247 | @pytest.mark.asyncio
248 | async def test_delete_nonexistent_entity(entity_repository: EntityRepository):
249 | """Test deleting an entity that doesn't exist."""
250 | result = await entity_repository.delete(0)
251 | assert result is False
252 |
253 |
254 | @pytest_asyncio.fixture
255 | async def test_entities(session_maker, test_project: Project):
256 | """Create multiple test entities."""
257 | async with db.scoped_session(session_maker) as session:
258 | entities = [
259 | Entity(
260 | project_id=test_project.id,
261 | title="entity1",
262 | entity_type="test",
263 | permalink="type1/entity1",
264 | file_path="type1/entity1.md",
265 | content_type="text/markdown",
266 | created_at=datetime.now(timezone.utc),
267 | updated_at=datetime.now(timezone.utc),
268 | ),
269 | Entity(
270 | project_id=test_project.id,
271 | title="entity2",
272 | entity_type="test",
273 | permalink="type1/entity2",
274 | file_path="type1/entity2.md",
275 | content_type="text/markdown",
276 | created_at=datetime.now(timezone.utc),
277 | updated_at=datetime.now(timezone.utc),
278 | ),
279 | Entity(
280 | project_id=test_project.id,
281 | title="entity3",
282 | entity_type="test",
283 | permalink="type2/entity3",
284 | file_path="type2/entity3.md",
285 | content_type="text/markdown",
286 | created_at=datetime.now(timezone.utc),
287 | updated_at=datetime.now(timezone.utc),
288 | ),
289 | ]
290 | session.add_all(entities)
291 | return entities
292 |
293 |
294 | @pytest.mark.asyncio
295 | async def test_find_by_permalinks(entity_repository: EntityRepository, test_entities):
296 | """Test finding multiple entities by their type/name pairs."""
297 | # Test finding multiple entities
298 | permalinks = [e.permalink for e in test_entities]
299 | found = await entity_repository.find_by_permalinks(permalinks)
300 | assert len(found) == 3
301 | names = {e.title for e in found}
302 | assert names == {"entity1", "entity2", "entity3"}
303 |
304 | # Test finding subset of entities
305 | permalinks = [e.permalink for e in test_entities if e.title != "entity2"]
306 | found = await entity_repository.find_by_permalinks(permalinks)
307 | assert len(found) == 2
308 | names = {e.title for e in found}
309 | assert names == {"entity1", "entity3"}
310 |
311 | # Test with non-existent entities
312 | permalinks = ["type1/entity1", "type3/nonexistent"]
313 | found = await entity_repository.find_by_permalinks(permalinks)
314 | assert len(found) == 1
315 | assert found[0].title == "entity1"
316 |
317 | # Test empty input
318 | found = await entity_repository.find_by_permalinks([])
319 | assert len(found) == 0
320 |
321 |
322 | @pytest.mark.asyncio
323 | async def test_generate_permalink_from_file_path():
324 | """Test permalink generation from different file paths."""
325 | test_cases = [
326 | ("docs/My Feature.md", "docs/my-feature"),
327 | ("specs/API (v2).md", "specs/api-v2"),
328 | ("notes/2024/Q1 Planning!!!.md", "notes/2024/q1-planning"),
329 | ("test/Über File.md", "test/uber-file"),
330 | ("docs/my_feature_name.md", "docs/my-feature-name"),
331 | ("specs/multiple--dashes.md", "specs/multiple-dashes"),
332 | ("notes/trailing/space/ file.md", "notes/trailing/space/file"),
333 | ]
334 |
335 | for input_path, expected in test_cases:
336 | result = generate_permalink(input_path)
337 | assert result == expected, f"Failed for {input_path}"
338 | # Verify the result passes validation
339 | Entity(
340 | title="test",
341 | entity_type="test",
342 | permalink=result,
343 | file_path=input_path,
344 | content_type="text/markdown",
345 | ) # This will raise ValueError if invalid
346 |
347 |
348 | @pytest.mark.asyncio
349 | async def test_get_by_title(entity_repository: EntityRepository, session_maker):
350 | """Test getting an entity by title."""
351 | # Create test entities
352 | async with db.scoped_session(session_maker) as session:
353 | entities = [
354 | Entity(
355 | project_id=entity_repository.project_id,
356 | title="Unique Title",
357 | entity_type="test",
358 | permalink="test/unique-title",
359 | file_path="test/unique-title.md",
360 | content_type="text/markdown",
361 | created_at=datetime.now(timezone.utc),
362 | updated_at=datetime.now(timezone.utc),
363 | ),
364 | Entity(
365 | project_id=entity_repository.project_id,
366 | title="Another Title",
367 | entity_type="test",
368 | permalink="test/another-title",
369 | file_path="test/another-title.md",
370 | content_type="text/markdown",
371 | created_at=datetime.now(timezone.utc),
372 | updated_at=datetime.now(timezone.utc),
373 | ),
374 | Entity(
375 | project_id=entity_repository.project_id,
376 | title="Another Title",
377 | entity_type="test",
378 | permalink="test/another-title-1",
379 | file_path="test/another-title-1.md",
380 | content_type="text/markdown",
381 | created_at=datetime.now(timezone.utc),
382 | updated_at=datetime.now(timezone.utc),
383 | ),
384 | ]
385 | session.add_all(entities)
386 | await session.flush()
387 |
388 | # Test getting by exact title
389 | found = await entity_repository.get_by_title("Unique Title")
390 | assert found is not None
391 | assert len(found) == 1
392 | assert found[0].title == "Unique Title"
393 |
394 | # Test case sensitivity
395 | found = await entity_repository.get_by_title("unique title")
396 | assert not found # Should be case-sensitive
397 |
398 | # Test non-existent title
399 | found = await entity_repository.get_by_title("Non Existent")
400 | assert not found
401 |
402 | # Test multiple rows found
403 | found = await entity_repository.get_by_title("Another Title")
404 | assert len(found) == 2
405 |
406 |
407 | @pytest.mark.asyncio
408 | async def test_get_by_file_path(entity_repository: EntityRepository, session_maker):
409 | """Test getting an entity by title."""
410 | # Create test entities
411 | async with db.scoped_session(session_maker) as session:
412 | entities = [
413 | Entity(
414 | project_id=entity_repository.project_id,
415 | title="Unique Title",
416 | entity_type="test",
417 | permalink="test/unique-title",
418 | file_path="test/unique-title.md",
419 | content_type="text/markdown",
420 | created_at=datetime.now(timezone.utc),
421 | updated_at=datetime.now(timezone.utc),
422 | ),
423 | ]
424 | session.add_all(entities)
425 | await session.flush()
426 |
427 | # Test getting by file_path
428 | found = await entity_repository.get_by_file_path("test/unique-title.md")
429 | assert found is not None
430 | assert found.title == "Unique Title"
431 |
432 | # Test non-existent file_path
433 | found = await entity_repository.get_by_file_path("not/a/real/file.md")
434 | assert found is None
435 |
436 |
437 | @pytest.mark.asyncio
438 | async def test_get_distinct_directories(entity_repository: EntityRepository, session_maker):
439 | """Test getting distinct directory paths from entity file paths."""
440 | # Create test entities with various directory structures
441 | async with db.scoped_session(session_maker) as session:
442 | entities = [
443 | Entity(
444 | project_id=entity_repository.project_id,
445 | title="File 1",
446 | entity_type="test",
447 | permalink="docs/guides/file1",
448 | file_path="docs/guides/file1.md",
449 | content_type="text/markdown",
450 | created_at=datetime.now(timezone.utc),
451 | updated_at=datetime.now(timezone.utc),
452 | ),
453 | Entity(
454 | project_id=entity_repository.project_id,
455 | title="File 2",
456 | entity_type="test",
457 | permalink="docs/guides/file2",
458 | file_path="docs/guides/file2.md",
459 | content_type="text/markdown",
460 | created_at=datetime.now(timezone.utc),
461 | updated_at=datetime.now(timezone.utc),
462 | ),
463 | Entity(
464 | project_id=entity_repository.project_id,
465 | title="File 3",
466 | entity_type="test",
467 | permalink="docs/api/file3",
468 | file_path="docs/api/file3.md",
469 | content_type="text/markdown",
470 | created_at=datetime.now(timezone.utc),
471 | updated_at=datetime.now(timezone.utc),
472 | ),
473 | Entity(
474 | project_id=entity_repository.project_id,
475 | title="File 4",
476 | entity_type="test",
477 | permalink="specs/file4",
478 | file_path="specs/file4.md",
479 | content_type="text/markdown",
480 | created_at=datetime.now(timezone.utc),
481 | updated_at=datetime.now(timezone.utc),
482 | ),
483 | Entity(
484 | project_id=entity_repository.project_id,
485 | title="File 5",
486 | entity_type="test",
487 | permalink="notes/2024/q1/file5",
488 | file_path="notes/2024/q1/file5.md",
489 | content_type="text/markdown",
490 | created_at=datetime.now(timezone.utc),
491 | updated_at=datetime.now(timezone.utc),
492 | ),
493 | ]
494 | session.add_all(entities)
495 | await session.flush()
496 |
497 | # Get distinct directories
498 | directories = await entity_repository.get_distinct_directories()
499 |
500 | # Verify directories are extracted correctly
501 | assert isinstance(directories, list)
502 | assert len(directories) > 0
503 |
504 | # Should include all parent directories but not filenames
505 | expected_dirs = {
506 | "docs",
507 | "docs/guides",
508 | "docs/api",
509 | "notes",
510 | "notes/2024",
511 | "notes/2024/q1",
512 | "specs",
513 | }
514 | assert set(directories) == expected_dirs
515 |
516 | # Verify results are sorted
517 | assert directories == sorted(directories)
518 |
519 | # Verify no file paths are included
520 | for dir_path in directories:
521 | assert not dir_path.endswith(".md")
522 |
523 |
524 | @pytest.mark.asyncio
525 | async def test_get_distinct_directories_empty_db(entity_repository: EntityRepository):
526 | """Test getting distinct directories when database is empty."""
527 | directories = await entity_repository.get_distinct_directories()
528 | assert directories == []
529 |
530 |
531 | @pytest.mark.asyncio
532 | async def test_find_by_directory_prefix(entity_repository: EntityRepository, session_maker):
533 | """Test finding entities by directory prefix."""
534 | # Create test entities in various directories
535 | async with db.scoped_session(session_maker) as session:
536 | entities = [
537 | Entity(
538 | project_id=entity_repository.project_id,
539 | title="File 1",
540 | entity_type="test",
541 | permalink="docs/file1",
542 | file_path="docs/file1.md",
543 | content_type="text/markdown",
544 | created_at=datetime.now(timezone.utc),
545 | updated_at=datetime.now(timezone.utc),
546 | ),
547 | Entity(
548 | project_id=entity_repository.project_id,
549 | title="File 2",
550 | entity_type="test",
551 | permalink="docs/guides/file2",
552 | file_path="docs/guides/file2.md",
553 | content_type="text/markdown",
554 | created_at=datetime.now(timezone.utc),
555 | updated_at=datetime.now(timezone.utc),
556 | ),
557 | Entity(
558 | project_id=entity_repository.project_id,
559 | title="File 3",
560 | entity_type="test",
561 | permalink="docs/api/file3",
562 | file_path="docs/api/file3.md",
563 | content_type="text/markdown",
564 | created_at=datetime.now(timezone.utc),
565 | updated_at=datetime.now(timezone.utc),
566 | ),
567 | Entity(
568 | project_id=entity_repository.project_id,
569 | title="File 4",
570 | entity_type="test",
571 | permalink="specs/file4",
572 | file_path="specs/file4.md",
573 | content_type="text/markdown",
574 | created_at=datetime.now(timezone.utc),
575 | updated_at=datetime.now(timezone.utc),
576 | ),
577 | ]
578 | session.add_all(entities)
579 | await session.flush()
580 |
581 | # Test finding all entities in "docs" directory and subdirectories
582 | docs_entities = await entity_repository.find_by_directory_prefix("docs")
583 | assert len(docs_entities) == 3
584 | file_paths = {e.file_path for e in docs_entities}
585 | assert file_paths == {"docs/file1.md", "docs/guides/file2.md", "docs/api/file3.md"}
586 |
587 | # Test finding entities in "docs/guides" subdirectory
588 | guides_entities = await entity_repository.find_by_directory_prefix("docs/guides")
589 | assert len(guides_entities) == 1
590 | assert guides_entities[0].file_path == "docs/guides/file2.md"
591 |
592 | # Test finding entities in "specs" directory
593 | specs_entities = await entity_repository.find_by_directory_prefix("specs")
594 | assert len(specs_entities) == 1
595 | assert specs_entities[0].file_path == "specs/file4.md"
596 |
597 | # Test with root directory (empty string)
598 | all_entities = await entity_repository.find_by_directory_prefix("")
599 | assert len(all_entities) == 4
600 |
601 | # Test with root directory (slash)
602 | all_entities = await entity_repository.find_by_directory_prefix("/")
603 | assert len(all_entities) == 4
604 |
605 | # Test with non-existent directory
606 | nonexistent = await entity_repository.find_by_directory_prefix("nonexistent")
607 | assert len(nonexistent) == 0
608 |
609 |
610 | @pytest.mark.asyncio
611 | async def test_find_by_directory_prefix_basic_fields_only(
612 | entity_repository: EntityRepository, session_maker
613 | ):
614 | """Test that find_by_directory_prefix returns basic entity fields.
615 |
616 | Note: This method uses use_query_options=False for performance,
617 | so it doesn't eager load relationships. Directory trees only need
618 | basic entity fields.
619 | """
620 | # Create test entity
621 | async with db.scoped_session(session_maker) as session:
622 | entity = Entity(
623 | project_id=entity_repository.project_id,
624 | title="Test Entity",
625 | entity_type="test",
626 | permalink="docs/test",
627 | file_path="docs/test.md",
628 | content_type="text/markdown",
629 | created_at=datetime.now(timezone.utc),
630 | updated_at=datetime.now(timezone.utc),
631 | )
632 | session.add(entity)
633 | await session.flush()
634 |
635 | # Query entity by directory prefix
636 | entities = await entity_repository.find_by_directory_prefix("docs")
637 | assert len(entities) == 1
638 |
639 | # Verify basic fields are present (all we need for directory trees)
640 | entity = entities[0]
641 | assert entity.title == "Test Entity"
642 | assert entity.file_path == "docs/test.md"
643 | assert entity.permalink == "docs/test"
644 | assert entity.entity_type == "test"
645 | assert entity.content_type == "text/markdown"
646 | assert entity.updated_at is not None
647 |
648 |
649 | @pytest.mark.asyncio
650 | async def test_get_all_file_paths(entity_repository: EntityRepository, session_maker):
651 | """Test getting all file paths for deletion detection during sync."""
652 | # Create test entities with various file paths
653 | async with db.scoped_session(session_maker) as session:
654 | entities = [
655 | Entity(
656 | project_id=entity_repository.project_id,
657 | title="File 1",
658 | entity_type="test",
659 | permalink="docs/file1",
660 | file_path="docs/file1.md",
661 | content_type="text/markdown",
662 | created_at=datetime.now(timezone.utc),
663 | updated_at=datetime.now(timezone.utc),
664 | ),
665 | Entity(
666 | project_id=entity_repository.project_id,
667 | title="File 2",
668 | entity_type="test",
669 | permalink="specs/file2",
670 | file_path="specs/file2.md",
671 | content_type="text/markdown",
672 | created_at=datetime.now(timezone.utc),
673 | updated_at=datetime.now(timezone.utc),
674 | ),
675 | Entity(
676 | project_id=entity_repository.project_id,
677 | title="File 3",
678 | entity_type="test",
679 | permalink="notes/file3",
680 | file_path="notes/file3.md",
681 | content_type="text/markdown",
682 | created_at=datetime.now(timezone.utc),
683 | updated_at=datetime.now(timezone.utc),
684 | ),
685 | ]
686 | session.add_all(entities)
687 | await session.flush()
688 |
689 | # Get all file paths
690 | file_paths = await entity_repository.get_all_file_paths()
691 |
692 | # Verify results
693 | assert isinstance(file_paths, list)
694 | assert len(file_paths) == 3
695 | assert set(file_paths) == {"docs/file1.md", "specs/file2.md", "notes/file3.md"}
696 |
697 |
698 | @pytest.mark.asyncio
699 | async def test_get_all_file_paths_empty_db(entity_repository: EntityRepository):
700 | """Test getting all file paths when database is empty."""
701 | file_paths = await entity_repository.get_all_file_paths()
702 | assert file_paths == []
703 |
704 |
705 | @pytest.mark.asyncio
706 | async def test_get_all_file_paths_performance(entity_repository: EntityRepository, session_maker):
707 | """Test that get_all_file_paths doesn't load entities or relationships.
708 |
709 | This method is optimized for deletion detection during streaming sync.
710 | It should only query file_path strings, not full entity objects.
711 | """
712 | # Create test entity with observations and relations
713 | async with db.scoped_session(session_maker) as session:
714 | # Create entities
715 | entity1 = Entity(
716 | project_id=entity_repository.project_id,
717 | title="Entity 1",
718 | entity_type="test",
719 | permalink="test/entity1",
720 | file_path="test/entity1.md",
721 | content_type="text/markdown",
722 | created_at=datetime.now(timezone.utc),
723 | updated_at=datetime.now(timezone.utc),
724 | )
725 | entity2 = Entity(
726 | project_id=entity_repository.project_id,
727 | title="Entity 2",
728 | entity_type="test",
729 | permalink="test/entity2",
730 | file_path="test/entity2.md",
731 | content_type="text/markdown",
732 | created_at=datetime.now(timezone.utc),
733 | updated_at=datetime.now(timezone.utc),
734 | )
735 | session.add_all([entity1, entity2])
736 | await session.flush()
737 |
738 | # Add observations to entity1
739 | observation = Observation(
740 | entity_id=entity1.id,
741 | content="Test observation",
742 | category="note",
743 | )
744 | session.add(observation)
745 |
746 | # Add relation between entities
747 | relation = Relation(
748 | from_id=entity1.id,
749 | to_id=entity2.id,
750 | to_name=entity2.title,
751 | relation_type="relates_to",
752 | )
753 | session.add(relation)
754 | await session.flush()
755 |
756 | # Get all file paths - should be fast and not load relationships
757 | file_paths = await entity_repository.get_all_file_paths()
758 |
759 | # Verify results - just file paths, no entities or relationships loaded
760 | assert len(file_paths) == 2
761 | assert set(file_paths) == {"test/entity1.md", "test/entity2.md"}
762 |
763 | # Result should be list of strings, not entity objects
764 | for path in file_paths:
765 | assert isinstance(path, str)
766 |
767 |
768 | @pytest.mark.asyncio
769 | async def test_get_all_file_paths_project_isolation(
770 | entity_repository: EntityRepository, session_maker
771 | ):
772 | """Test that get_all_file_paths only returns paths from the current project."""
773 | # Create entities in the repository's project
774 | async with db.scoped_session(session_maker) as session:
775 | entity1 = Entity(
776 | project_id=entity_repository.project_id,
777 | title="Project 1 File",
778 | entity_type="test",
779 | permalink="test/file1",
780 | file_path="test/file1.md",
781 | content_type="text/markdown",
782 | created_at=datetime.now(timezone.utc),
783 | updated_at=datetime.now(timezone.utc),
784 | )
785 | session.add(entity1)
786 | await session.flush()
787 |
788 | # Create a second project
789 | project2 = Project(name="other-project", path="/tmp/other")
790 | session.add(project2)
791 | await session.flush()
792 |
793 | # Create entity in different project
794 | entity2 = Entity(
795 | project_id=project2.id,
796 | title="Project 2 File",
797 | entity_type="test",
798 | permalink="test/file2",
799 | file_path="test/file2.md",
800 | content_type="text/markdown",
801 | created_at=datetime.now(timezone.utc),
802 | updated_at=datetime.now(timezone.utc),
803 | )
804 | session.add(entity2)
805 | await session.flush()
806 |
807 | # Get all file paths for project 1
808 | file_paths = await entity_repository.get_all_file_paths()
809 |
810 | # Should only include files from project 1
811 | assert len(file_paths) == 1
812 | assert file_paths == ["test/file1.md"]
813 |
```