root-signals/root-signals-mcp # codebase.md

This is page 2 of 2. Use http://codebase.md/root-signals/root-signals-mcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .coverage
├── .env.example
├── .github
│   └── workflows
│       ├── build-container.yml
│       └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .python-version
├── demonstrations
│   └── example_pydantic-ai.py
├── docker-compose.yml
├── Dockerfile
├── main.py
├── pyproject.toml
├── README.md
├── src
│   ├── __init__.py
│   └── root_signals_mcp
│       ├── __init__.py
│       ├── client.py
│       ├── core.py
│       ├── evaluator.py
│       ├── fastmcp_adapter.py
│       ├── judge.py
│       ├── py.typed
│       ├── root_api_client.py
│       ├── schema.py
│       ├── settings.py
│       ├── sse_server.py
│       ├── stdio_server.py
│       ├── test
│       │   ├── __init__.py
│       │   ├── conftest.py
│       │   ├── test_client.py
│       │   ├── test_evaluator.py
│       │   ├── test_judge.py
│       │   ├── test_root_client.py
│       │   ├── test_settings.py
│       │   ├── test_sse_integration.py
│       │   ├── test_sse_server.py
│       │   └── test_stdio_integration.py
│       └── tools.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/src/root_signals_mcp/test/test_root_client.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for the RootSignals HTTP client."""
  2 | 
  3 | import logging
  4 | from unittest.mock import patch
  5 | 
  6 | import httpx
  7 | import pytest
  8 | 
  9 | from root_signals_mcp.root_api_client import (
 10 |     ResponseValidationError,
 11 |     RootSignalsAPIError,
 12 |     RootSignalsEvaluatorRepository,
 13 |     RootSignalsJudgeRepository,
 14 | )
 15 | from root_signals_mcp.schema import EvaluatorInfo, RunJudgeRequest
 16 | from root_signals_mcp.settings import settings
 17 | 
 18 | pytestmark = [
 19 |     pytest.mark.skipif(
 20 |         settings.root_signals_api_key.get_secret_value() == "",
 21 |         reason="ROOT_SIGNALS_API_KEY environment variable not set or empty",
 22 |     ),
 23 |     pytest.mark.integration,
 24 |     pytest.mark.asyncio(loop_scope="session"),
 25 | ]
 26 | 
 27 | logger = logging.getLogger("root_mcp_server_tests")
 28 | 
 29 | 
 30 | async def test_user_agent_header() -> None:
 31 |     """Test that the User-Agent header is properly set."""
 32 |     client = RootSignalsEvaluatorRepository()
 33 | 
 34 |     assert "User-Agent" in client.headers, "User-Agent header is missing"
 35 | 
 36 |     user_agent = client.headers["User-Agent"]
 37 |     assert user_agent.startswith("root-signals-mcp/"), f"Unexpected User-Agent format: {user_agent}"
 38 | 
 39 |     version = user_agent.split("/")[1]
 40 |     assert version, "Version part is missing in User-Agent"
 41 | 
 42 |     assert version == settings.version, "Version in User-Agent does not match settings.version"
 43 | 
 44 |     logger.info(f"User-Agent header: {user_agent}")
 45 |     logger.info(f"Package version from settings: {settings.version}")
 46 | 
 47 | 
 48 | @pytest.mark.asyncio
 49 | async def test_list_evaluators() -> None:
 50 |     """Test listing evaluators from the API."""
 51 |     client = RootSignalsEvaluatorRepository()
 52 | 
 53 |     evaluators = await client.list_evaluators()
 54 | 
 55 |     assert evaluators, "No evaluators returned"
 56 |     assert len(evaluators) > 0, "Empty evaluators list"
 57 | 
 58 |     first_evaluator = evaluators[0]
 59 |     assert first_evaluator.id, "Evaluator missing ID"
 60 |     assert first_evaluator.name, "Evaluator missing name"
 61 |     assert first_evaluator.created_at, "Evaluator missing created_at"
 62 | 
 63 |     assert first_evaluator.inputs, "Evaluator missing inputs"
 64 |     assert first_evaluator.inputs != {}, "Evaluator inputs are empty"
 65 | 
 66 |     logger.info(f"Found {len(evaluators)} evaluators")
 67 |     logger.info(f"First evaluator: {first_evaluator.name} (ID: {first_evaluator.id})")
 68 | 
 69 | 
 70 | @pytest.mark.asyncio
 71 | async def test_list_evaluators_with_count() -> None:
 72 |     """Test listing evaluators with a specific count limit."""
 73 |     client = RootSignalsEvaluatorRepository()
 74 | 
 75 |     max_count = 5
 76 |     evaluators = await client.list_evaluators(max_count=max_count)
 77 | 
 78 |     assert len(evaluators) <= max_count, f"Got more than {max_count} evaluators"
 79 |     logger.info(f"Retrieved {len(evaluators)} evaluators with max_count={max_count}")
 80 | 
 81 |     max_count_large = 30
 82 |     evaluators_large = await client.list_evaluators(max_count=max_count_large)
 83 | 
 84 |     assert len(evaluators_large) <= max_count_large, f"Got more than {max_count_large} evaluators"
 85 |     logger.info(f"Retrieved {len(evaluators_large)} evaluators with max_count={max_count_large}")
 86 | 
 87 |     if len(evaluators) == max_count:
 88 |         assert len(evaluators_large) > len(evaluators), (
 89 |             "Larger max_count didn't return more evaluators"
 90 |         )
 91 | 
 92 | 
 93 | @pytest.mark.asyncio
 94 | async def test_pagination_handling() -> None:
 95 |     """Test that pagination works correctly when more evaluators are available."""
 96 |     client = RootSignalsEvaluatorRepository()
 97 | 
 98 |     small_limit = 2
 99 |     evaluators = await client.list_evaluators(max_count=small_limit)
100 | 
101 |     assert len(evaluators) == small_limit, f"Expected exactly {small_limit} evaluators"
102 |     assert isinstance(evaluators[0], EvaluatorInfo), "Result items are not EvaluatorInfo objects"
103 | 
104 | 
105 | @pytest.mark.asyncio
106 | async def test_run_evaluator() -> None:
107 |     """Test running an evaluation with the API client."""
108 |     client = RootSignalsEvaluatorRepository()
109 | 
110 |     evaluators = await client.list_evaluators()
111 | 
112 |     standard_evaluator = next((e for e in evaluators if not e.requires_contexts), None)
113 | 
114 |     assert standard_evaluator, "No standard evaluator found"
115 |     logger.info(f"Using evaluator: {standard_evaluator.name} (ID: {standard_evaluator.id})")
116 | 
117 |     result = await client.run_evaluator(
118 |         evaluator_id=standard_evaluator.id,
119 |         request="What is the capital of France?",
120 |         response="The capital of France is Paris, which is known as the City of Light.",
121 |     )
122 | 
123 |     assert result.evaluator_name, "Missing evaluator name in result"
124 |     assert isinstance(result.score, float), "Score is not a float"
125 |     assert 0 <= result.score <= 1, "Score outside expected range (0-1)"
126 | 
127 |     logger.info(f"Evaluation score: {result.score}")
128 |     logger.info(f"Justification: {result.justification}")
129 | 
130 | 
131 | @pytest.mark.asyncio
132 | async def test_run_evaluator_with_contexts() -> None:
133 |     """Test running a RAG evaluation with contexts."""
134 |     client = RootSignalsEvaluatorRepository()
135 | 
136 |     evaluators = await client.list_evaluators()
137 | 
138 |     rag_evaluator = next((e for e in evaluators if e.requires_contexts), None)
139 | 
140 |     if not rag_evaluator:
141 |         pytest.skip("No RAG evaluator found")
142 | 
143 |     logger.info(f"Using RAG evaluator: {rag_evaluator.name} (ID: {rag_evaluator.id})")
144 | 
145 |     result = await client.run_evaluator(
146 |         evaluator_id=rag_evaluator.id,
147 |         request="What is the capital of France?",
148 |         response="The capital of France is Paris, which is known as the City of Light.",
149 |         contexts=[
150 |             "Paris is the capital and most populous city of France. It is located on the Seine River.",
151 |             "France is a country in Western Europe with several overseas territories and regions.",
152 |         ],
153 |     )
154 | 
155 |     assert result.evaluator_name, "Missing evaluator name in result"
156 |     assert isinstance(result.score, float), "Score is not a float"
157 |     assert 0 <= result.score <= 1, "Score outside expected range (0-1)"
158 | 
159 |     logger.info(f"RAG evaluation score: {result.score}")
160 |     logger.info(f"Justification: {result.justification}")
161 | 
162 | 
163 | @pytest.mark.asyncio
164 | async def test_evaluator_not_found() -> None:
165 |     """Test error handling when evaluator is not found."""
166 |     client = RootSignalsEvaluatorRepository()
167 | 
168 |     with pytest.raises(RootSignalsAPIError) as excinfo:
169 |         await client.run_evaluator(
170 |             evaluator_id="nonexistent-evaluator-id",
171 |             request="Test request",
172 |             response="Test response",
173 |         )
174 | 
175 |     assert excinfo.value.status_code == 404, "Expected 404 status code"
176 |     logger.info(f"Got expected error: {excinfo.value}")
177 | 
178 | 
179 | @pytest.mark.asyncio
180 | async def test_run_evaluator_with_expected_output() -> None:
181 |     """Test running an evaluation with expected output."""
182 |     client = RootSignalsEvaluatorRepository()
183 | 
184 |     evaluators = await client.list_evaluators()
185 |     eval_with_expected = next(
186 |         (e for e in evaluators if e.inputs.get("expected_output") is not None),
187 |         next((e for e in evaluators), None),
188 |     )
189 | 
190 |     if not eval_with_expected:
191 |         pytest.skip("No suitable evaluator found")
192 | 
193 |     try:
194 |         result = await client.run_evaluator(
195 |             evaluator_id=eval_with_expected.id,
196 |             request="What is the capital of France?",
197 |             response="The capital of France is Paris.",
198 |             contexts=["Paris is the capital of France."],
199 |             expected_output="Paris is the capital of France.",
200 |         )
201 | 
202 |         assert result.evaluator_name, "Missing evaluator name in result"
203 |         assert isinstance(result.score, float), "Score is not a float"
204 |         logger.info(f"Evaluation with expected output - score: {result.score}")
205 |     except RootSignalsAPIError as e:
206 |         logger.warning(f"Could not run evaluator with expected output: {e}")
207 |         assert e.status_code in (400, 422), f"Unexpected error code: {e.status_code}"
208 | 
209 | 
210 | @pytest.mark.asyncio
211 | async def test_run_evaluator_by_name() -> None:
212 |     """Test running an evaluation using the evaluator name instead of ID."""
213 |     client = RootSignalsEvaluatorRepository()
214 | 
215 |     evaluators = await client.list_evaluators()
216 |     assert evaluators, "No evaluators returned"
217 | 
218 |     standard_evaluator = next((e for e in evaluators if not e.requires_contexts), None)
219 |     if not standard_evaluator:
220 |         pytest.skip("No standard evaluator found")
221 | 
222 |     logger.info(f"Using evaluator by name: {standard_evaluator.name}")
223 | 
224 |     result = await client.run_evaluator_by_name(
225 |         evaluator_name=standard_evaluator.name,
226 |         request="What is the capital of France?",
227 |         response="The capital of France is Paris, which is known as the City of Light.",
228 |     )
229 | 
230 |     assert result.evaluator_name, "Missing evaluator name in result"
231 |     assert isinstance(result.score, float), "Score is not a float"
232 |     assert 0 <= result.score <= 1, "Score outside expected range (0-1)"
233 | 
234 |     logger.info(f"Evaluation by name score: {result.score}")
235 |     logger.info(f"Justification: {result.justification}")
236 | 
237 | 
238 | @pytest.mark.asyncio
239 | async def test_run_rag_evaluator_by_name() -> None:
240 |     """Test running a RAG evaluation using the evaluator name instead of ID."""
241 |     client = RootSignalsEvaluatorRepository()
242 | 
243 |     evaluators = await client.list_evaluators()
244 |     rag_evaluator = next((e for e in evaluators if e.requires_contexts), None)
245 | 
246 |     if not rag_evaluator:
247 |         pytest.skip("No RAG evaluator found")
248 | 
249 |     logger.info(f"Using RAG evaluator by name: {rag_evaluator.name}")
250 | 
251 |     result = await client.run_evaluator_by_name(
252 |         evaluator_name=rag_evaluator.name,
253 |         request="What is the capital of France?",
254 |         response="The capital of France is Paris, which is known as the City of Light.",
255 |         contexts=[
256 |             "Paris is the capital and most populous city of France. It is located on the Seine River.",
257 |             "France is a country in Western Europe with several overseas territories and regions.",
258 |         ],
259 |     )
260 | 
261 |     assert result.evaluator_name, "Missing evaluator name in result"
262 |     assert isinstance(result.score, float), "Score is not a float"
263 |     assert 0 <= result.score <= 1, "Score outside expected range (0-1)"
264 | 
265 |     logger.info(f"RAG evaluation by name score: {result.score}")
266 |     logger.info(f"Justification: {result.justification}")
267 | 
268 | 
269 | @pytest.mark.asyncio
270 | async def test_api_client_connection_error() -> None:
271 |     """Test error handling when connection fails."""
272 |     with patch("httpx.AsyncClient.request", side_effect=httpx.ConnectError("Connection failed")):
273 |         client = RootSignalsEvaluatorRepository()
274 |         with pytest.raises(RootSignalsAPIError) as excinfo:
275 |             await client.list_evaluators()
276 | 
277 |         assert excinfo.value.status_code == 0, "Expected status code 0 for connection error"
278 |         assert "Connection error" in str(excinfo.value), (
279 |             "Error message should indicate connection error"
280 |         )
281 | 
282 | 
283 | @pytest.mark.asyncio
284 | async def test_api_response_validation_error() -> None:
285 |     """Test validation error handling with invalid responses."""
286 |     with patch.object(RootSignalsEvaluatorRepository, "_make_request") as mock_request:
287 |         client = RootSignalsEvaluatorRepository()
288 | 
289 |         # Case 1: Empty response when results field expected
290 |         mock_request.return_value = {}
291 |         with pytest.raises(ResponseValidationError) as excinfo:
292 |             await client.list_evaluators()
293 |         error_message = str(excinfo.value)
294 |         assert "Could not find 'results' field" in error_message, (
295 |             "Expected specific error about missing results field"
296 |         )
297 | 
298 |         # Case 2: Wrong response type (string instead of dict/list)
299 |         mock_request.return_value = "not a dict or list"
300 |         with pytest.raises(ResponseValidationError) as excinfo:
301 |             await client.list_evaluators()
302 |         error_message = str(excinfo.value)
303 |         assert "Expected response to be a dict or list" in error_message, (
304 |             "Error should specify invalid response type"
305 |         )
306 |         assert "got str" in error_message.lower(), "Error should mention the actual type received"
307 | 
308 |         mock_request.return_value = "not a valid format"
309 |         with pytest.raises(ResponseValidationError) as excinfo:
310 |             await client.run_evaluator(
311 |                 evaluator_id="test-id", request="Test request", response="Test response"
312 |             )
313 |         error_message = str(excinfo.value)
314 |         assert "Invalid evaluation response format" in error_message, (
315 |             "Should indicate format validation error"
316 |         )
317 | 
318 | 
319 | @pytest.mark.asyncio
320 | async def test_evaluator_missing_fields() -> None:
321 |     """Test handling of evaluators with missing required fields."""
322 |     with patch.object(RootSignalsEvaluatorRepository, "_make_request") as mock_request:
323 |         client = RootSignalsEvaluatorRepository()
324 | 
325 |         mock_request.return_value = {
326 |             "results": [
327 |                 {
328 |                     "id": "valid-id",
329 |                     "name": "Valid Evaluator",
330 |                     "created_at": "2023-01-01T00:00:00Z",
331 |                     "inputs": {},
332 |                 },
333 |                 {
334 |                     "created_at": "2023-01-01T00:00:00Z",
335 |                     # Missing required fields: id, name
336 |                 },
337 |             ]
338 |         }
339 | 
340 |         with pytest.raises(ResponseValidationError) as excinfo:
341 |             await client.list_evaluators()
342 | 
343 |         error_message = str(excinfo.value)
344 |         assert "missing required field" in error_message.lower(), (
345 |             "Error should mention missing required field"
346 |         )
347 |         assert "id" in error_message or "name" in error_message, (
348 |             "Error should specify which field is missing"
349 |         )
350 | 
351 |         mock_request.return_value = {
352 |             "results": [
353 |                 {
354 |                     "id": "valid-id",
355 |                     "name": "Valid Evaluator",
356 |                     "created_at": "2023-01-01T00:00:00Z",
357 |                     "inputs": {},
358 |                 }
359 |             ]
360 |         }
361 | 
362 |         evaluators = await client.list_evaluators()
363 |         assert len(evaluators) == 1, "Should have one valid evaluator"
364 |         assert evaluators[0].id == "valid-id", "Valid evaluator should be included"
365 | 
366 | 
367 | @pytest.mark.asyncio
368 | async def test_root_client_schema_compatibility__detects_api_schema_changes() -> None:
369 |     """Test that our schema models detect changes in the API response format."""
370 |     with patch.object(RootSignalsEvaluatorRepository, "_make_request") as mock_request:
371 |         # Case 1: Missing required field (evaluator_name)
372 |         mock_request.return_value = {
373 |             "result": {
374 |                 "score": 0.9,
375 |                 "justification": "Some justification",
376 |             }
377 |         }
378 | 
379 |         client = RootSignalsEvaluatorRepository()
380 |         with pytest.raises(ResponseValidationError) as excinfo:
381 |             await client.run_evaluator(
382 |                 evaluator_id="test-id", request="Test request", response="Test response"
383 |             )
384 | 
385 |         error_message = str(excinfo.value)
386 |         assert "Invalid evaluation response format" in error_message, (
387 |             "Should show validation error message"
388 |         )
389 |         # The exact error format will come from Pydantic now
390 |         assert "evaluator_name" in error_message.lower(), "Should mention the missing field"
391 | 
392 |         # Case 2: Missing another required field (score)
393 |         mock_request.return_value = {
394 |             "result": {
395 |                 "evaluator_name": "Test Evaluator",
396 |                 "justification": "Some justification",
397 |             }
398 |         }
399 | 
400 |         with pytest.raises(ResponseValidationError) as excinfo:
401 |             await client.run_evaluator(
402 |                 evaluator_id="test-id", request="Test request", response="Test response"
403 |             )
404 | 
405 |         error_message = str(excinfo.value)
406 |         assert "Invalid evaluation response format" in error_message, (
407 |             "Should show validation error message"
408 |         )
409 |         assert "score" in error_message.lower(), "Should mention the missing field"
410 | 
411 |         # Case 3: Empty response
412 |         mock_request.return_value = {}
413 | 
414 |         with pytest.raises(ResponseValidationError) as excinfo:
415 |             await client.run_evaluator(
416 |                 evaluator_id="test-id", request="Test request", response="Test response"
417 |             )
418 | 
419 |         assert "Invalid evaluation response format" in str(excinfo.value), (
420 |             "Should show validation error for empty response"
421 |         )
422 | 
423 | 
424 | @pytest.mark.asyncio
425 | async def test_root_client_run_evaluator__handles_unexpected_response_fields() -> None:
426 |     """Test handling of extra fields in API response."""
427 |     with patch.object(RootSignalsEvaluatorRepository, "_make_request") as mock_request:
428 |         # Include extra fields that aren't in our schema
429 |         mock_request.return_value = {
430 |             "result": {
431 |                 "evaluator_name": "Test",
432 |                 "score": 0.9,
433 |                 "new_field_not_in_schema": "value",
434 |                 "another_new_field": {"nested": "data", "that": ["should", "be", "ignored"]},
435 |             }
436 |         }
437 | 
438 |         client = RootSignalsEvaluatorRepository()
439 |         result = await client.run_evaluator(evaluator_id="test-id", request="Test", response="Test")
440 | 
441 |         assert result.evaluator_name == "Test", "Required field should be correctly parsed"
442 |         assert result.score == 0.9, "Required field should be correctly parsed"
443 | 
444 |         # Extra fields should be ignored by Pydantic's model_validate
445 |         assert not hasattr(result, "new_field_not_in_schema"), "Extra fields should be ignored"
446 |         assert not hasattr(result, "another_new_field"), "Extra fields should be ignored"
447 | 
448 | 
449 | @pytest.mark.asyncio
450 | async def test_list_judges() -> None:
451 |     """Test listing judges from the API."""
452 |     client = RootSignalsJudgeRepository()
453 | 
454 |     judges = await client.list_judges()
455 | 
456 |     assert judges, "No judges returned"
457 |     assert len(judges) > 0, "Empty judges list"
458 | 
459 |     first_judge = judges[0]
460 |     assert first_judge.id, "Judge missing ID"
461 |     assert first_judge.name, "Judge missing name"
462 |     assert first_judge.created_at, "Judge missing created_at"
463 | 
464 |     logger.info(f"Found {len(judges)} judges")
465 |     logger.info(f"First judge: {first_judge.name} (ID: {first_judge.id})")
466 | 
467 | 
468 | @pytest.mark.asyncio
469 | async def test_list_judges_with_count() -> None:
470 |     """Test listing judges with a specific count limit."""
471 |     client = RootSignalsJudgeRepository()
472 | 
473 |     max_count = 5
474 |     judges = await client.list_judges(max_count=max_count)
475 | 
476 |     assert len(judges) <= max_count, f"Got more than {max_count} judges"
477 |     logger.info(f"Retrieved {len(judges)} judges with max_count={max_count}")
478 | 
479 |     max_count_large = 30
480 |     judges_large = await client.list_judges(max_count=max_count_large)
481 | 
482 |     assert len(judges_large) <= max_count_large, f"Got more than {max_count_large} judges"
483 |     logger.info(f"Retrieved {len(judges_large)} judges with max_count={max_count_large}")
484 | 
485 |     if len(judges) == max_count:
486 |         assert len(judges_large) > len(judges), "Larger max_count didn't return more judges"
487 | 
488 | 
489 | @pytest.mark.asyncio
490 | async def test_root_client_list_judges__handles_unexpected_response_fields() -> None:
491 |     """Test handling of extra fields in judge API response."""
492 |     with patch.object(RootSignalsJudgeRepository, "_make_request") as mock_request:
493 |         # Include extra fields that aren't in our schema
494 |         mock_request.return_value = {
495 |             "results": [
496 |                 {
497 |                     "id": "test-judge-id",
498 |                     "name": "Test Judge",
499 |                     "created_at": "2023-01-01T00:00:00Z",
500 |                     "new_field_not_in_schema": "value",
501 |                     "another_new_field": {"nested": "data", "that": ["should", "be", "ignored"]},
502 |                 }
503 |             ]
504 |         }
505 | 
506 |         client = RootSignalsJudgeRepository()
507 |         judges = await client.list_judges()
508 | 
509 |         assert len(judges) == 1, "Should have one judge in the result"
510 |         assert judges[0].id == "test-judge-id", "Judge ID should be correctly parsed"
511 |         assert judges[0].name == "Test Judge", "Judge name should be correctly parsed"
512 | 
513 |         # Extra fields should be ignored by Pydantic's model_validate
514 |         assert not hasattr(judges[0], "new_field_not_in_schema"), "Extra fields should be ignored"
515 |         assert not hasattr(judges[0], "another_new_field"), "Extra fields should be ignored"
516 | 
517 | 
518 | @pytest.mark.asyncio
519 | async def test_run_judge() -> None:
520 |     """Test running a judge with the API client."""
521 |     client = RootSignalsJudgeRepository()
522 | 
523 |     judges = await client.list_judges()
524 | 
525 |     judge = next(iter(judges), None)
526 |     assert judge is not None, "No judge found"
527 | 
528 |     logger.info(f"Using judge: {judge.name} (ID: {judge.id})")
529 | 
530 |     result = await client.run_judge(
531 |         RunJudgeRequest(
532 |             judge_id=judge.id,
533 |             judge_name=judge.name,
534 |             request="What is the capital of France?",
535 |             response="The capital of France is Paris, which is known as the City of Light.",
536 |         )
537 |     )
538 | 
539 |     assert result.evaluator_results, "Missing evaluator results in result"
540 |     assert isinstance(result.evaluator_results[0].score, float), "Score is not a float"
541 |     assert 0 <= result.evaluator_results[0].score <= 1, "Score outside expected range (0-1)"
542 | 
543 |     logger.info(f"Evaluation score: {result.evaluator_results[0].score}")
544 |     logger.info(f"Justification: {result.evaluator_results[0].justification}")
545 | 
```