This is page 2 of 2. Use http://codebase.md/root-signals/root-signals-mcp?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .coverage ├── .env.example ├── .github │ └── workflows │ ├── build-container.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── demonstrations │ └── example_pydantic-ai.py ├── docker-compose.yml ├── Dockerfile ├── main.py ├── pyproject.toml ├── README.md ├── src │ ├── __init__.py │ └── root_signals_mcp │ ├── __init__.py │ ├── client.py │ ├── core.py │ ├── evaluator.py │ ├── fastmcp_adapter.py │ ├── judge.py │ ├── py.typed │ ├── root_api_client.py │ ├── schema.py │ ├── settings.py │ ├── sse_server.py │ ├── stdio_server.py │ ├── test │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_client.py │ │ ├── test_evaluator.py │ │ ├── test_judge.py │ │ ├── test_root_client.py │ │ ├── test_settings.py │ │ ├── test_sse_integration.py │ │ ├── test_sse_server.py │ │ └── test_stdio_integration.py │ └── tools.py └── uv.lock ``` # Files -------------------------------------------------------------------------------- /src/root_signals_mcp/test/test_root_client.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for the RootSignals HTTP client.""" 2 | 3 | import logging 4 | from unittest.mock import patch 5 | 6 | import httpx 7 | import pytest 8 | 9 | from root_signals_mcp.root_api_client import ( 10 | ResponseValidationError, 11 | RootSignalsAPIError, 12 | RootSignalsEvaluatorRepository, 13 | RootSignalsJudgeRepository, 14 | ) 15 | from root_signals_mcp.schema import EvaluatorInfo, RunJudgeRequest 16 | from root_signals_mcp.settings import settings 17 | 18 | pytestmark = [ 19 | pytest.mark.skipif( 20 | settings.root_signals_api_key.get_secret_value() == "", 21 | reason="ROOT_SIGNALS_API_KEY environment variable not set or empty", 22 | ), 23 | pytest.mark.integration, 24 | pytest.mark.asyncio(loop_scope="session"), 25 | ] 26 | 27 | logger = logging.getLogger("root_mcp_server_tests") 28 | 29 | 30 | async def test_user_agent_header() -> None: 31 | """Test that the User-Agent header is properly set.""" 32 | client = RootSignalsEvaluatorRepository() 33 | 34 | assert "User-Agent" in client.headers, "User-Agent header is missing" 35 | 36 | user_agent = client.headers["User-Agent"] 37 | assert user_agent.startswith("root-signals-mcp/"), f"Unexpected User-Agent format: {user_agent}" 38 | 39 | version = user_agent.split("/")[1] 40 | assert version, "Version part is missing in User-Agent" 41 | 42 | assert version == settings.version, "Version in User-Agent does not match settings.version" 43 | 44 | logger.info(f"User-Agent header: {user_agent}") 45 | logger.info(f"Package version from settings: {settings.version}") 46 | 47 | 48 | @pytest.mark.asyncio 49 | async def test_list_evaluators() -> None: 50 | """Test listing evaluators from the API.""" 51 | client = RootSignalsEvaluatorRepository() 52 | 53 | evaluators = await client.list_evaluators() 54 | 55 | assert evaluators, "No evaluators returned" 56 | assert len(evaluators) > 0, "Empty evaluators list" 57 | 58 | first_evaluator = evaluators[0] 59 | assert first_evaluator.id, "Evaluator missing ID" 60 | assert first_evaluator.name, "Evaluator missing name" 61 | assert first_evaluator.created_at, "Evaluator missing created_at" 62 | 63 | assert first_evaluator.inputs, "Evaluator missing inputs" 64 | assert first_evaluator.inputs != {}, "Evaluator inputs are empty" 65 | 66 | logger.info(f"Found {len(evaluators)} evaluators") 67 | logger.info(f"First evaluator: {first_evaluator.name} (ID: {first_evaluator.id})") 68 | 69 | 70 | @pytest.mark.asyncio 71 | async def test_list_evaluators_with_count() -> None: 72 | """Test listing evaluators with a specific count limit.""" 73 | client = RootSignalsEvaluatorRepository() 74 | 75 | max_count = 5 76 | evaluators = await client.list_evaluators(max_count=max_count) 77 | 78 | assert len(evaluators) <= max_count, f"Got more than {max_count} evaluators" 79 | logger.info(f"Retrieved {len(evaluators)} evaluators with max_count={max_count}") 80 | 81 | max_count_large = 30 82 | evaluators_large = await client.list_evaluators(max_count=max_count_large) 83 | 84 | assert len(evaluators_large) <= max_count_large, f"Got more than {max_count_large} evaluators" 85 | logger.info(f"Retrieved {len(evaluators_large)} evaluators with max_count={max_count_large}") 86 | 87 | if len(evaluators) == max_count: 88 | assert len(evaluators_large) > len(evaluators), ( 89 | "Larger max_count didn't return more evaluators" 90 | ) 91 | 92 | 93 | @pytest.mark.asyncio 94 | async def test_pagination_handling() -> None: 95 | """Test that pagination works correctly when more evaluators are available.""" 96 | client = RootSignalsEvaluatorRepository() 97 | 98 | small_limit = 2 99 | evaluators = await client.list_evaluators(max_count=small_limit) 100 | 101 | assert len(evaluators) == small_limit, f"Expected exactly {small_limit} evaluators" 102 | assert isinstance(evaluators[0], EvaluatorInfo), "Result items are not EvaluatorInfo objects" 103 | 104 | 105 | @pytest.mark.asyncio 106 | async def test_run_evaluator() -> None: 107 | """Test running an evaluation with the API client.""" 108 | client = RootSignalsEvaluatorRepository() 109 | 110 | evaluators = await client.list_evaluators() 111 | 112 | standard_evaluator = next((e for e in evaluators if not e.requires_contexts), None) 113 | 114 | assert standard_evaluator, "No standard evaluator found" 115 | logger.info(f"Using evaluator: {standard_evaluator.name} (ID: {standard_evaluator.id})") 116 | 117 | result = await client.run_evaluator( 118 | evaluator_id=standard_evaluator.id, 119 | request="What is the capital of France?", 120 | response="The capital of France is Paris, which is known as the City of Light.", 121 | ) 122 | 123 | assert result.evaluator_name, "Missing evaluator name in result" 124 | assert isinstance(result.score, float), "Score is not a float" 125 | assert 0 <= result.score <= 1, "Score outside expected range (0-1)" 126 | 127 | logger.info(f"Evaluation score: {result.score}") 128 | logger.info(f"Justification: {result.justification}") 129 | 130 | 131 | @pytest.mark.asyncio 132 | async def test_run_evaluator_with_contexts() -> None: 133 | """Test running a RAG evaluation with contexts.""" 134 | client = RootSignalsEvaluatorRepository() 135 | 136 | evaluators = await client.list_evaluators() 137 | 138 | rag_evaluator = next((e for e in evaluators if e.requires_contexts), None) 139 | 140 | if not rag_evaluator: 141 | pytest.skip("No RAG evaluator found") 142 | 143 | logger.info(f"Using RAG evaluator: {rag_evaluator.name} (ID: {rag_evaluator.id})") 144 | 145 | result = await client.run_evaluator( 146 | evaluator_id=rag_evaluator.id, 147 | request="What is the capital of France?", 148 | response="The capital of France is Paris, which is known as the City of Light.", 149 | contexts=[ 150 | "Paris is the capital and most populous city of France. It is located on the Seine River.", 151 | "France is a country in Western Europe with several overseas territories and regions.", 152 | ], 153 | ) 154 | 155 | assert result.evaluator_name, "Missing evaluator name in result" 156 | assert isinstance(result.score, float), "Score is not a float" 157 | assert 0 <= result.score <= 1, "Score outside expected range (0-1)" 158 | 159 | logger.info(f"RAG evaluation score: {result.score}") 160 | logger.info(f"Justification: {result.justification}") 161 | 162 | 163 | @pytest.mark.asyncio 164 | async def test_evaluator_not_found() -> None: 165 | """Test error handling when evaluator is not found.""" 166 | client = RootSignalsEvaluatorRepository() 167 | 168 | with pytest.raises(RootSignalsAPIError) as excinfo: 169 | await client.run_evaluator( 170 | evaluator_id="nonexistent-evaluator-id", 171 | request="Test request", 172 | response="Test response", 173 | ) 174 | 175 | assert excinfo.value.status_code == 404, "Expected 404 status code" 176 | logger.info(f"Got expected error: {excinfo.value}") 177 | 178 | 179 | @pytest.mark.asyncio 180 | async def test_run_evaluator_with_expected_output() -> None: 181 | """Test running an evaluation with expected output.""" 182 | client = RootSignalsEvaluatorRepository() 183 | 184 | evaluators = await client.list_evaluators() 185 | eval_with_expected = next( 186 | (e for e in evaluators if e.inputs.get("expected_output") is not None), 187 | next((e for e in evaluators), None), 188 | ) 189 | 190 | if not eval_with_expected: 191 | pytest.skip("No suitable evaluator found") 192 | 193 | try: 194 | result = await client.run_evaluator( 195 | evaluator_id=eval_with_expected.id, 196 | request="What is the capital of France?", 197 | response="The capital of France is Paris.", 198 | contexts=["Paris is the capital of France."], 199 | expected_output="Paris is the capital of France.", 200 | ) 201 | 202 | assert result.evaluator_name, "Missing evaluator name in result" 203 | assert isinstance(result.score, float), "Score is not a float" 204 | logger.info(f"Evaluation with expected output - score: {result.score}") 205 | except RootSignalsAPIError as e: 206 | logger.warning(f"Could not run evaluator with expected output: {e}") 207 | assert e.status_code in (400, 422), f"Unexpected error code: {e.status_code}" 208 | 209 | 210 | @pytest.mark.asyncio 211 | async def test_run_evaluator_by_name() -> None: 212 | """Test running an evaluation using the evaluator name instead of ID.""" 213 | client = RootSignalsEvaluatorRepository() 214 | 215 | evaluators = await client.list_evaluators() 216 | assert evaluators, "No evaluators returned" 217 | 218 | standard_evaluator = next((e for e in evaluators if not e.requires_contexts), None) 219 | if not standard_evaluator: 220 | pytest.skip("No standard evaluator found") 221 | 222 | logger.info(f"Using evaluator by name: {standard_evaluator.name}") 223 | 224 | result = await client.run_evaluator_by_name( 225 | evaluator_name=standard_evaluator.name, 226 | request="What is the capital of France?", 227 | response="The capital of France is Paris, which is known as the City of Light.", 228 | ) 229 | 230 | assert result.evaluator_name, "Missing evaluator name in result" 231 | assert isinstance(result.score, float), "Score is not a float" 232 | assert 0 <= result.score <= 1, "Score outside expected range (0-1)" 233 | 234 | logger.info(f"Evaluation by name score: {result.score}") 235 | logger.info(f"Justification: {result.justification}") 236 | 237 | 238 | @pytest.mark.asyncio 239 | async def test_run_rag_evaluator_by_name() -> None: 240 | """Test running a RAG evaluation using the evaluator name instead of ID.""" 241 | client = RootSignalsEvaluatorRepository() 242 | 243 | evaluators = await client.list_evaluators() 244 | rag_evaluator = next((e for e in evaluators if e.requires_contexts), None) 245 | 246 | if not rag_evaluator: 247 | pytest.skip("No RAG evaluator found") 248 | 249 | logger.info(f"Using RAG evaluator by name: {rag_evaluator.name}") 250 | 251 | result = await client.run_evaluator_by_name( 252 | evaluator_name=rag_evaluator.name, 253 | request="What is the capital of France?", 254 | response="The capital of France is Paris, which is known as the City of Light.", 255 | contexts=[ 256 | "Paris is the capital and most populous city of France. It is located on the Seine River.", 257 | "France is a country in Western Europe with several overseas territories and regions.", 258 | ], 259 | ) 260 | 261 | assert result.evaluator_name, "Missing evaluator name in result" 262 | assert isinstance(result.score, float), "Score is not a float" 263 | assert 0 <= result.score <= 1, "Score outside expected range (0-1)" 264 | 265 | logger.info(f"RAG evaluation by name score: {result.score}") 266 | logger.info(f"Justification: {result.justification}") 267 | 268 | 269 | @pytest.mark.asyncio 270 | async def test_api_client_connection_error() -> None: 271 | """Test error handling when connection fails.""" 272 | with patch("httpx.AsyncClient.request", side_effect=httpx.ConnectError("Connection failed")): 273 | client = RootSignalsEvaluatorRepository() 274 | with pytest.raises(RootSignalsAPIError) as excinfo: 275 | await client.list_evaluators() 276 | 277 | assert excinfo.value.status_code == 0, "Expected status code 0 for connection error" 278 | assert "Connection error" in str(excinfo.value), ( 279 | "Error message should indicate connection error" 280 | ) 281 | 282 | 283 | @pytest.mark.asyncio 284 | async def test_api_response_validation_error() -> None: 285 | """Test validation error handling with invalid responses.""" 286 | with patch.object(RootSignalsEvaluatorRepository, "_make_request") as mock_request: 287 | client = RootSignalsEvaluatorRepository() 288 | 289 | # Case 1: Empty response when results field expected 290 | mock_request.return_value = {} 291 | with pytest.raises(ResponseValidationError) as excinfo: 292 | await client.list_evaluators() 293 | error_message = str(excinfo.value) 294 | assert "Could not find 'results' field" in error_message, ( 295 | "Expected specific error about missing results field" 296 | ) 297 | 298 | # Case 2: Wrong response type (string instead of dict/list) 299 | mock_request.return_value = "not a dict or list" 300 | with pytest.raises(ResponseValidationError) as excinfo: 301 | await client.list_evaluators() 302 | error_message = str(excinfo.value) 303 | assert "Expected response to be a dict or list" in error_message, ( 304 | "Error should specify invalid response type" 305 | ) 306 | assert "got str" in error_message.lower(), "Error should mention the actual type received" 307 | 308 | mock_request.return_value = "not a valid format" 309 | with pytest.raises(ResponseValidationError) as excinfo: 310 | await client.run_evaluator( 311 | evaluator_id="test-id", request="Test request", response="Test response" 312 | ) 313 | error_message = str(excinfo.value) 314 | assert "Invalid evaluation response format" in error_message, ( 315 | "Should indicate format validation error" 316 | ) 317 | 318 | 319 | @pytest.mark.asyncio 320 | async def test_evaluator_missing_fields() -> None: 321 | """Test handling of evaluators with missing required fields.""" 322 | with patch.object(RootSignalsEvaluatorRepository, "_make_request") as mock_request: 323 | client = RootSignalsEvaluatorRepository() 324 | 325 | mock_request.return_value = { 326 | "results": [ 327 | { 328 | "id": "valid-id", 329 | "name": "Valid Evaluator", 330 | "created_at": "2023-01-01T00:00:00Z", 331 | "inputs": {}, 332 | }, 333 | { 334 | "created_at": "2023-01-01T00:00:00Z", 335 | # Missing required fields: id, name 336 | }, 337 | ] 338 | } 339 | 340 | with pytest.raises(ResponseValidationError) as excinfo: 341 | await client.list_evaluators() 342 | 343 | error_message = str(excinfo.value) 344 | assert "missing required field" in error_message.lower(), ( 345 | "Error should mention missing required field" 346 | ) 347 | assert "id" in error_message or "name" in error_message, ( 348 | "Error should specify which field is missing" 349 | ) 350 | 351 | mock_request.return_value = { 352 | "results": [ 353 | { 354 | "id": "valid-id", 355 | "name": "Valid Evaluator", 356 | "created_at": "2023-01-01T00:00:00Z", 357 | "inputs": {}, 358 | } 359 | ] 360 | } 361 | 362 | evaluators = await client.list_evaluators() 363 | assert len(evaluators) == 1, "Should have one valid evaluator" 364 | assert evaluators[0].id == "valid-id", "Valid evaluator should be included" 365 | 366 | 367 | @pytest.mark.asyncio 368 | async def test_root_client_schema_compatibility__detects_api_schema_changes() -> None: 369 | """Test that our schema models detect changes in the API response format.""" 370 | with patch.object(RootSignalsEvaluatorRepository, "_make_request") as mock_request: 371 | # Case 1: Missing required field (evaluator_name) 372 | mock_request.return_value = { 373 | "result": { 374 | "score": 0.9, 375 | "justification": "Some justification", 376 | } 377 | } 378 | 379 | client = RootSignalsEvaluatorRepository() 380 | with pytest.raises(ResponseValidationError) as excinfo: 381 | await client.run_evaluator( 382 | evaluator_id="test-id", request="Test request", response="Test response" 383 | ) 384 | 385 | error_message = str(excinfo.value) 386 | assert "Invalid evaluation response format" in error_message, ( 387 | "Should show validation error message" 388 | ) 389 | # The exact error format will come from Pydantic now 390 | assert "evaluator_name" in error_message.lower(), "Should mention the missing field" 391 | 392 | # Case 2: Missing another required field (score) 393 | mock_request.return_value = { 394 | "result": { 395 | "evaluator_name": "Test Evaluator", 396 | "justification": "Some justification", 397 | } 398 | } 399 | 400 | with pytest.raises(ResponseValidationError) as excinfo: 401 | await client.run_evaluator( 402 | evaluator_id="test-id", request="Test request", response="Test response" 403 | ) 404 | 405 | error_message = str(excinfo.value) 406 | assert "Invalid evaluation response format" in error_message, ( 407 | "Should show validation error message" 408 | ) 409 | assert "score" in error_message.lower(), "Should mention the missing field" 410 | 411 | # Case 3: Empty response 412 | mock_request.return_value = {} 413 | 414 | with pytest.raises(ResponseValidationError) as excinfo: 415 | await client.run_evaluator( 416 | evaluator_id="test-id", request="Test request", response="Test response" 417 | ) 418 | 419 | assert "Invalid evaluation response format" in str(excinfo.value), ( 420 | "Should show validation error for empty response" 421 | ) 422 | 423 | 424 | @pytest.mark.asyncio 425 | async def test_root_client_run_evaluator__handles_unexpected_response_fields() -> None: 426 | """Test handling of extra fields in API response.""" 427 | with patch.object(RootSignalsEvaluatorRepository, "_make_request") as mock_request: 428 | # Include extra fields that aren't in our schema 429 | mock_request.return_value = { 430 | "result": { 431 | "evaluator_name": "Test", 432 | "score": 0.9, 433 | "new_field_not_in_schema": "value", 434 | "another_new_field": {"nested": "data", "that": ["should", "be", "ignored"]}, 435 | } 436 | } 437 | 438 | client = RootSignalsEvaluatorRepository() 439 | result = await client.run_evaluator(evaluator_id="test-id", request="Test", response="Test") 440 | 441 | assert result.evaluator_name == "Test", "Required field should be correctly parsed" 442 | assert result.score == 0.9, "Required field should be correctly parsed" 443 | 444 | # Extra fields should be ignored by Pydantic's model_validate 445 | assert not hasattr(result, "new_field_not_in_schema"), "Extra fields should be ignored" 446 | assert not hasattr(result, "another_new_field"), "Extra fields should be ignored" 447 | 448 | 449 | @pytest.mark.asyncio 450 | async def test_list_judges() -> None: 451 | """Test listing judges from the API.""" 452 | client = RootSignalsJudgeRepository() 453 | 454 | judges = await client.list_judges() 455 | 456 | assert judges, "No judges returned" 457 | assert len(judges) > 0, "Empty judges list" 458 | 459 | first_judge = judges[0] 460 | assert first_judge.id, "Judge missing ID" 461 | assert first_judge.name, "Judge missing name" 462 | assert first_judge.created_at, "Judge missing created_at" 463 | 464 | logger.info(f"Found {len(judges)} judges") 465 | logger.info(f"First judge: {first_judge.name} (ID: {first_judge.id})") 466 | 467 | 468 | @pytest.mark.asyncio 469 | async def test_list_judges_with_count() -> None: 470 | """Test listing judges with a specific count limit.""" 471 | client = RootSignalsJudgeRepository() 472 | 473 | max_count = 5 474 | judges = await client.list_judges(max_count=max_count) 475 | 476 | assert len(judges) <= max_count, f"Got more than {max_count} judges" 477 | logger.info(f"Retrieved {len(judges)} judges with max_count={max_count}") 478 | 479 | max_count_large = 30 480 | judges_large = await client.list_judges(max_count=max_count_large) 481 | 482 | assert len(judges_large) <= max_count_large, f"Got more than {max_count_large} judges" 483 | logger.info(f"Retrieved {len(judges_large)} judges with max_count={max_count_large}") 484 | 485 | if len(judges) == max_count: 486 | assert len(judges_large) > len(judges), "Larger max_count didn't return more judges" 487 | 488 | 489 | @pytest.mark.asyncio 490 | async def test_root_client_list_judges__handles_unexpected_response_fields() -> None: 491 | """Test handling of extra fields in judge API response.""" 492 | with patch.object(RootSignalsJudgeRepository, "_make_request") as mock_request: 493 | # Include extra fields that aren't in our schema 494 | mock_request.return_value = { 495 | "results": [ 496 | { 497 | "id": "test-judge-id", 498 | "name": "Test Judge", 499 | "created_at": "2023-01-01T00:00:00Z", 500 | "new_field_not_in_schema": "value", 501 | "another_new_field": {"nested": "data", "that": ["should", "be", "ignored"]}, 502 | } 503 | ] 504 | } 505 | 506 | client = RootSignalsJudgeRepository() 507 | judges = await client.list_judges() 508 | 509 | assert len(judges) == 1, "Should have one judge in the result" 510 | assert judges[0].id == "test-judge-id", "Judge ID should be correctly parsed" 511 | assert judges[0].name == "Test Judge", "Judge name should be correctly parsed" 512 | 513 | # Extra fields should be ignored by Pydantic's model_validate 514 | assert not hasattr(judges[0], "new_field_not_in_schema"), "Extra fields should be ignored" 515 | assert not hasattr(judges[0], "another_new_field"), "Extra fields should be ignored" 516 | 517 | 518 | @pytest.mark.asyncio 519 | async def test_run_judge() -> None: 520 | """Test running a judge with the API client.""" 521 | client = RootSignalsJudgeRepository() 522 | 523 | judges = await client.list_judges() 524 | 525 | judge = next(iter(judges), None) 526 | assert judge is not None, "No judge found" 527 | 528 | logger.info(f"Using judge: {judge.name} (ID: {judge.id})") 529 | 530 | result = await client.run_judge( 531 | RunJudgeRequest( 532 | judge_id=judge.id, 533 | judge_name=judge.name, 534 | request="What is the capital of France?", 535 | response="The capital of France is Paris, which is known as the City of Light.", 536 | ) 537 | ) 538 | 539 | assert result.evaluator_results, "Missing evaluator results in result" 540 | assert isinstance(result.evaluator_results[0].score, float), "Score is not a float" 541 | assert 0 <= result.evaluator_results[0].score <= 1, "Score outside expected range (0-1)" 542 | 543 | logger.info(f"Evaluation score: {result.evaluator_results[0].score}") 544 | logger.info(f"Justification: {result.evaluator_results[0].justification}") 545 | ```