# Directory Structure ``` ├── .gitignore ├── .python-version ├── assets │ └── logo.svg ├── LICENSE ├── Makefile ├── pyproject.toml ├── README.md ├── server.py ├── test_server.py └── uv.lock ``` # Files -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- ``` 1 | 3.13 2 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # Installer logs 30 | pip-log.txt 31 | pip-delete-this-directory.txt 32 | 33 | # Unit test / coverage reports 34 | htmlcov/ 35 | .tox/ 36 | .nox/ 37 | .coverage 38 | .coverage.* 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | *.cover 43 | *.py,cover 44 | .hypothesis/ 45 | .pytest_cache/ 46 | cover/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | db.sqlite3 56 | db.sqlite3-journal 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | .pybuilder/ 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # IPython 76 | profile_default/ 77 | ipython_config.py 78 | 79 | # pyenv 80 | # For a library or package, you might want to ignore these files since the code is 81 | # intended to run in multiple environments; otherwise, check them in: 82 | # .python-version 83 | 84 | # pipenv 85 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 86 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 87 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 88 | # install all needed dependencies. 89 | #Pipfile.lock 90 | 91 | # UV 92 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 93 | # This is especially recommended for binary packages to ensure reproducibility, and is more 94 | # commonly ignored for libraries. 95 | #uv.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # mkdocs documentation 134 | /site 135 | 136 | # mypy 137 | .mypy_cache/ 138 | .dmypy.json 139 | dmypy.json 140 | 141 | # Pyre type checker 142 | .pyre/ 143 | 144 | # pytype static type analyzer 145 | .pytype/ 146 | 147 | # Cython debug symbols 148 | cython_debug/ 149 | 150 | # PyCharm 151 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 152 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 153 | # and can be added to the global gitignore or merged into this file. For a more nuclear 154 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 155 | #.idea/ 156 | 157 | # Custom 158 | .DS_Store 159 | .vscode/ 160 | node_modules/ 161 | theme/docs/* 162 | .ruff_cache/ 163 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | <img src="./assets/logo.svg" alt="Allseer Logo" width="400" height="400" /> 2 | 3 | # k8s-mcp 4 | [](https://smithery.ai/server/@vlttnv/k8s-mcp) 5 | 6 | A Python-based, read-only [Model Context Protocol (MCP)](https://modelcontextprotocol.io/introduction) server for Kubernetes clusters that exposes a comprehensive API to retrieve cluster information and diagnose issues. 7 | 8 | [Example chat using Claude](https://claude.ai/share/90ae39d3-a0c1-4065-ab79-45950b6b4806) 9 | 10 | ## Installation 11 | 12 | ### Prerequisites 13 | 14 | - Python 3.8+ 15 | - Access to a Kubernetes cluster (via kubeconfig or in-cluster configuration) 16 | - Required Python packages (see `dependencies` in `pyproject.toml`) 17 | - uv - https://github.com/astral-sh/uv 18 | 19 | ```bash 20 | # To install uv 21 | curl -LsSf https://astral.sh/uv/install.sh | sh 22 | ``` 23 | 24 | ```bash 25 | # Clone the repository 26 | git clone [email protected]:vlttnv/k8s-mcp.git 27 | cd k8s-mcp 28 | 29 | # Install dependencies 30 | uv venv 31 | source .venv/bin/activate 32 | uv sync 33 | ``` 34 | 35 | If using Claude configure open your Claude for Desktop App configuration at ~/Library/Application Support/Claude/claude_desktop_config.json in a text editor. Make sure to create the file if it doesn’t exist. 36 | 37 | ```bash 38 | code ~/Library/Application\ Support/Claude/claude_desktop_config.json 39 | ``` 40 | 41 | ```json 42 | { 43 | "mcpServers": { 44 | "k8s-mcp": { 45 | "command": "uv", 46 | "args": [ 47 | "--directory", 48 | "/ABSOLUTE/PATH/TO/PARENT/FOLDER/k8s-mcp", 49 | "run", 50 | "server.py" 51 | ] 52 | } 53 | } 54 | } 55 | ``` 56 | 57 | > You may need to put the full path to the uv executable in the command field. You can get this by running which uv on MacOS/Linux or where uv on Windows. 58 | 59 | ## Configuration 60 | 61 | The application automatically tries two methods to connect to your Kubernetes cluster: 62 | 63 | 1. **Kubeconfig File**: Uses your local kubeconfig file (typically located at `~/.kube/config`) 64 | 2. **In-Cluster Configuration**: If running inside a Kubernetes pod, uses the service account token 65 | 66 | No additional configuration is required if your kubeconfig is properly set up or if you're running inside a cluster with appropriate RBAC permissions. 67 | 68 | ## Usage 69 | 70 | ### Examples 71 | Here are some useful example prompts you can ask Claude about your Kubernetes cluster and its resources: 72 | 73 | #### General Cluster Status 74 | - "What's the overall health of my cluster?" 75 | - "Show me all namespaces in my cluster" 76 | - "What nodes are available in my cluster and what's their status?" 77 | - "How is resource utilization across my nodes?" 78 | 79 | #### Pods and Deployments 80 | - "List all pods in the production namespace" 81 | - "Are there any pods in CrashLoopBackOff state?" 82 | - "Show me pods with high restart counts" 83 | - "List all deployments across all namespaces" 84 | - "What deployments are failing to progress?" 85 | 86 | #### Debugging Issues 87 | - "Why is my pod in the staging namespace failing?" 88 | - "Get the YAML configuration for the service in the production namespace" 89 | - "Show me recent events in the default namespace" 90 | - "Are there any pods stuck in Pending state?" 91 | - "What's causing ImagePullBackOff errors in my cluster?" 92 | 93 | #### Resource Management 94 | - "Show me the resource consumption of nodes in my cluster" 95 | - "Are there any orphaned resources I should clean up?" 96 | - "List all services in the production namespace" 97 | - "Compare resource requests between staging and production" 98 | 99 | #### Specific Resource Inspection 100 | - "Show me the config for the coredns deployment in kube-system" 101 | - "Get details of the reverse-proxy service in staging" 102 | - "What containers are running in the pod xyz?" 103 | - "Show me the logs for the failing pod" 104 | 105 | ## API Reference 106 | 107 | ### Namespaces 108 | 109 | - `get_namespaces()`: List all available namespaces in the cluster 110 | 111 | ### Pods 112 | 113 | - `list_pods(namespace=None)`: List all pods, optionally filtered by namespace 114 | - `failed_pods()`: List all pods in Failed or Error state 115 | - `pending_pods()`: List all pods in Pending state with reasons 116 | - `high_restart_pods(restart_threshold=5)`: Find pods with restart counts above threshold 117 | 118 | ### Nodes 119 | 120 | - `list_nodes()`: List all nodes and their status 121 | - `node_capacity()`: Show available capacity on all nodes 122 | 123 | ### Deployments & Services 124 | 125 | - `list_deployments(namespace=None)`: List all deployments 126 | - `list_services(namespace=None)`: List all services 127 | - `list_events(namespace=None)`: List all events 128 | 129 | ### Resource Management 130 | 131 | - `orphaned_resources()`: List resources without owner references 132 | - `get_resource_yaml(namespace, resource_type, resource_name)`: Get YAML configuration for a specific resource 133 | 134 | ## License 135 | 136 | [MIT License](LICENSE) 137 | 138 | ## Contributing 139 | 140 | Contributions are welcome! Please feel free to submit a Pull Request. 141 | ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [project] 2 | name = "k8s-mcp" 3 | version = "0.1.0" 4 | description = "A read-only Model Context Protocol (MCP) for querying Kubernetes clusters." 5 | readme = "README.md" 6 | requires-python = ">=3.13" 7 | dependencies = [ 8 | "kubernetes>=32.0.1", 9 | "mcp[cli]>=1.3.0", 10 | "pytest>=8.3.5", 11 | "pyyaml>=6.0.2", 12 | ] 13 | 14 | [dependency-groups] 15 | dev = [ 16 | "ruff>=0.9.9", 17 | ] 18 | 19 | [tool.ruff.lint] 20 | select = [ 21 | # isort 22 | "I", 23 | ] 24 | ``` -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- ``` 1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 400"> 2 | <!-- Background --> 3 | <rect width="400" height="400" rx="12" ry="12" fill="#f8fafc" /> 4 | 5 | <!-- Server stack background --> 6 | <rect x="120" y="100" width="160" height="180" rx="8" ry="8" fill="#ebf5ff" stroke="#2563eb" stroke-width="2" /> 7 | 8 | <!-- Server components --> 9 | <rect x="140" y="125" width="120" height="30" rx="4" ry="4" fill="#ffffff" stroke="#2563eb" stroke-width="2" /> 10 | <rect x="140" y="165" width="120" height="30" rx="4" ry="4" fill="#ffffff" stroke="#2563eb" stroke-width="2" /> 11 | <rect x="140" y="205" width="120" height="30" rx="4" ry="4" fill="#ffffff" stroke="#2563eb" stroke-width="2" /> 12 | 13 | <!-- Data points on servers --> 14 | <circle cx="155" cy="140" r="4" fill="#2563eb" /> 15 | <circle cx="170" cy="140" r="4" fill="#2563eb" /> 16 | <circle cx="155" cy="180" r="4" fill="#2563eb" /> 17 | <circle cx="170" cy="180" r="4" fill="#2563eb" /> 18 | <circle cx="155" cy="220" r="4" fill="#2563eb" /> 19 | <circle cx="170" cy="220" r="4" fill="#2563eb" /> 20 | 21 | <!-- Status indicators --> 22 | <circle cx="245" cy="140" r="6" fill="#10b981" /> 23 | <circle cx="245" cy="180" r="6" fill="#10b981" /> 24 | <circle cx="245" cy="220" r="6" fill="#10b981" /> 25 | 26 | <!-- Python logo (simplified) --> 27 | <!-- <g transform="translate(200, 80) scale(0.6)"> 28 | <path d="M-20,-35 C-20,-43.28 -13.28,-50 -5,-50 L5,-50 C13.28,-50 20,-43.28 20,-35 L20,-15 C20,-6.72 13.28,0 5,0 L-5,0 C-13.28,0 -20,-6.72 -20,-15 Z" fill="#366a96" /> 29 | <path d="M-20,35 C-20,43.28 -13.28,50 -5,50 L5,50 C13.28,50 20,43.28 20,35 L20,15 C20,6.72 13.28,0 5,0 L-5,0 C-13.28,0 -20,6.72 -20,15 Z" fill="#ffd43b" /> 30 | <circle cx="-10" cy="-25" r="5" fill="#ffffff" /> 31 | <circle cx="-10" cy="25" r="5" fill="#ffffff" /> 32 | </g> --> 33 | 34 | <!-- Kubernetes wheel (simplified) --> 35 | <g transform="translate(200, 260) scale(0.7)"> 36 | <circle cx="0" cy="0" r="40" fill="#326ce5" opacity="0.2" /> 37 | <circle cx="0" cy="0" r="30" fill="#326ce5" opacity="0.3" /> 38 | <g stroke="#326ce5" stroke-width="6" stroke-linecap="round"> 39 | <line x1="0" y1="-40" x2="0" y2="-60" /> 40 | <line x1="28" y1="-28" x2="42" y2="-42" /> 41 | <line x1="40" y1="0" x2="60" y2="0" /> 42 | <line x1="28" y1="28" x2="42" y2="42" /> 43 | <line x1="0" y1="40" x2="0" y2="60" /> 44 | <line x1="-28" y1="28" x2="-42" y2="42" /> 45 | <line x1="-40" y1="0" x2="-60" y2="0" /> 46 | <line x1="-28" y1="-28" x2="-42" y2="-42" /> 47 | </g> 48 | </g> 49 | 50 | <!-- API connections --> 51 | <path d="M95,150 C75,150 75,180 95,180" fill="none" stroke="#7c3aed" stroke-width="2.5" stroke-dasharray="5,3" /> 52 | <path d="M95,180 C75,180 75,210 95,210" fill="none" stroke="#7c3aed" stroke-width="2.5" stroke-dasharray="5,3" /> 53 | <path d="M305,150 C325,150 325,180 305,180" fill="none" stroke="#7c3aed" stroke-width="2.5" stroke-dasharray="5,3" /> 54 | <path d="M305,180 C325,180 325,210 305,210" fill="none" stroke="#7c3aed" stroke-width="2.5" stroke-dasharray="5,3" /> 55 | 56 | <!-- API endpoint indicators --> 57 | <circle cx="95" cy="150" r="4" fill="#7c3aed" /> 58 | <circle cx="95" cy="180" r="4" fill="#7c3aed" /> 59 | <circle cx="95" cy="210" r="4" fill="#7c3aed" /> 60 | <circle cx="305" cy="150" r="4" fill="#7c3aed" /> 61 | <circle cx="305" cy="180" r="4" fill="#7c3aed" /> 62 | <circle cx="305" cy="210" r="4" fill="#7c3aed" /> 63 | 64 | <!-- MCP Badge --> 65 | <g transform="translate(200,260)"> 66 | <rect x="-45" y="-20" width="90" height="40" rx="20" ry="20" fill="#ffffff" stroke="#7c3aed" stroke-width="2.5" /> 67 | <text x="0" y="8" font-family="Arial, sans-serif" font-size="18" font-weight="bold" text-anchor="middle" fill="#7c3aed">MCP</text> 68 | </g> 69 | 70 | <!-- Title and description --> 71 | <text x="200" y="330" font-family="Arial, sans-serif" font-weight="bold" font-size="28" text-anchor="middle" fill="#1e293b">k8s-mcp</text> 72 | <text x="200" y="355" font-family="Arial, sans-serif" font-size="14" text-anchor="middle" fill="#475569">Read-only MCP server for Kubernetes clusters</text> 73 | </svg> 74 | ``` -------------------------------------------------------------------------------- /test_server.py: -------------------------------------------------------------------------------- ```python 1 | import unittest 2 | from unittest.mock import patch, MagicMock 3 | import json 4 | import datetime 5 | import asyncio 6 | from kubernetes.client.rest import ApiException 7 | 8 | # Import the module to be tested 9 | import server 10 | 11 | 12 | class AsyncTestCase(unittest.TestCase): 13 | """Base class for testing async functions.""" 14 | 15 | def run_async(self, coro): 16 | """Helper method to run coroutines in tests.""" 17 | return asyncio.run(coro) 18 | 19 | 20 | class TestKubernetesServer(AsyncTestCase): 21 | """Test cases for Kubernetes monitoring server functions.""" 22 | 23 | def setUp(self): 24 | """Set up test fixtures.""" 25 | # Mock configuration and API clients 26 | self.mock_config = patch("server.config").start() 27 | self.mock_core_v1 = patch("server.core_v1").start() 28 | self.mock_apps_v1 = patch("server.apps_v1").start() 29 | self.mock_batch_v1 = patch("server.batch_v1").start() 30 | self.mock_custom_objects = patch("server.custom_objects").start() 31 | 32 | # Mock FastMCP server 33 | self.mock_mcp = patch("server.mcp").start() 34 | 35 | def tearDown(self): 36 | """Tear down test fixtures.""" 37 | patch.stopall() 38 | 39 | def test_get_namespaces(self): 40 | """Test get_namespaces function.""" 41 | # Create mock namespace items 42 | mock_namespace1 = MagicMock() 43 | mock_namespace1.metadata.name = "default" 44 | mock_namespace1.status.phase = "Active" 45 | mock_namespace1.metadata.creation_timestamp = datetime.datetime( 46 | 2023, 1, 1, 12, 0, 0 47 | ) 48 | 49 | mock_namespace2 = MagicMock() 50 | mock_namespace2.metadata.name = "kube-system" 51 | mock_namespace2.status.phase = "Active" 52 | mock_namespace2.metadata.creation_timestamp = datetime.datetime( 53 | 2023, 1, 1, 12, 0, 0 54 | ) 55 | 56 | # Set up mock response 57 | mock_response = MagicMock() 58 | mock_response.items = [mock_namespace1, mock_namespace2] 59 | self.mock_core_v1.list_namespace.return_value = mock_response 60 | 61 | # Call the async function 62 | result = asyncio.run(server.get_namespaces()) 63 | 64 | # Verify the response 65 | namespaces = json.loads(result) 66 | self.assertEqual(len(namespaces), 2) 67 | self.assertEqual(namespaces[0]["name"], "default") 68 | self.assertEqual(namespaces[1]["name"], "kube-system") 69 | 70 | # Verify the API was called 71 | self.mock_core_v1.list_namespace.assert_called_once() 72 | 73 | def test_get_namespaces_error(self): 74 | """Test get_namespaces function with API error.""" 75 | # Simulate API exception 76 | self.mock_core_v1.list_namespace.side_effect = ApiException( 77 | status=403, reason="Forbidden" 78 | ) 79 | 80 | # Call the function 81 | result_tuple = asyncio.run(server.get_namespaces()) 82 | # If the function returns a tuple 83 | if isinstance(result_tuple, tuple): 84 | result, status_code = result_tuple 85 | else: 86 | # If function returns just the error JSON 87 | result = result_tuple 88 | status_code = 500 # Assuming default error code 89 | 90 | # Verify error response 91 | error_response = json.loads(result) 92 | self.assertEqual(status_code, 500) 93 | self.assertIn("error", error_response) 94 | 95 | def test_list_pods(self): 96 | """Test list_pods function with namespace parameter.""" 97 | # Create mock pod items 98 | mock_pod = MagicMock() 99 | mock_pod.metadata.name = "test-pod" 100 | mock_pod.metadata.namespace = "default" 101 | mock_pod.status.phase = "Running" 102 | mock_pod.status.pod_ip = "10.0.0.1" 103 | mock_pod.spec.node_name = "node1" 104 | mock_pod.metadata.creation_timestamp = datetime.datetime(2023, 1, 1, 12, 0, 0) 105 | 106 | # Create mock container 107 | mock_container = MagicMock() 108 | mock_container.name = "test-container" 109 | mock_container.image = "nginx:latest" 110 | mock_pod.spec.containers = [mock_container] 111 | 112 | # Create mock container status 113 | mock_container_status = MagicMock() 114 | mock_container_status.name = "test-container" 115 | mock_container_status.container_id = "container123" 116 | mock_pod.status.container_statuses = [mock_container_status] 117 | 118 | # Set up mock response 119 | mock_response = MagicMock() 120 | mock_response.items = [mock_pod] 121 | self.mock_core_v1.list_namespaced_pod.return_value = mock_response 122 | 123 | # Call the function with namespace 124 | result = server.list_pods(namespace="default") 125 | 126 | # Verify the response 127 | pods = json.loads(result) 128 | self.assertEqual(len(pods), 1) 129 | self.assertEqual(pods[0]["name"], "test-pod") 130 | self.assertEqual(pods[0]["namespace"], "default") 131 | self.assertEqual(pods[0]["containers"][0]["name"], "test-container") 132 | self.assertTrue(pods[0]["containers"][0]["ready"]) 133 | 134 | # Verify the API was called with correct namespace 135 | self.mock_core_v1.list_namespaced_pod.assert_called_once_with("default") 136 | 137 | def test_list_pods_all_namespaces(self): 138 | """Test list_pods function without namespace parameter.""" 139 | # Set up mock response 140 | mock_response = MagicMock() 141 | mock_response.items = [] 142 | self.mock_core_v1.list_pod_for_all_namespaces.return_value = mock_response 143 | 144 | # Call the function without namespace 145 | result = server.list_pods() # noqa: F841 146 | 147 | # Verify the API was called for all namespaces 148 | self.mock_core_v1.list_pod_for_all_namespaces.assert_called_once() 149 | 150 | def test_list_nodes(self): 151 | """Test list_nodes function.""" 152 | # Create mock node 153 | mock_node = MagicMock() 154 | mock_node.metadata.name = "node1" 155 | 156 | # Mock node conditions 157 | mock_condition = MagicMock() 158 | mock_condition.type = "Ready" 159 | mock_condition.status = "True" 160 | mock_node.status.conditions = [mock_condition] 161 | 162 | # Mock node addresses 163 | mock_address = MagicMock() 164 | mock_address.type = "InternalIP" 165 | mock_address.address = "192.168.1.1" 166 | mock_node.status.addresses = [mock_address] 167 | 168 | # Mock node capacity 169 | mock_node.status.capacity = {"cpu": "4", "memory": "8Gi", "pods": "110"} 170 | mock_node.status.allocatable = {"cpu": "3800m", "memory": "7Gi", "pods": "100"} 171 | 172 | # Mock node info 173 | mock_node.status.node_info = MagicMock() 174 | mock_node.status.node_info.kubelet_version = "v1.25.0" 175 | 176 | # Set up mock response 177 | mock_response = MagicMock() 178 | mock_response.items = [mock_node] 179 | self.mock_core_v1.list_node.return_value = mock_response 180 | 181 | # Call the function 182 | result = server.list_nodes() 183 | 184 | # Verify the response 185 | nodes = json.loads(result) 186 | self.assertEqual(len(nodes), 1) 187 | self.assertEqual(nodes[0]["name"], "node1") 188 | self.assertEqual(nodes[0]["conditions"]["Ready"], "True") 189 | self.assertEqual(nodes[0]["addresses"]["InternalIP"], "192.168.1.1") 190 | self.assertEqual(nodes[0]["capacity"]["cpu"], "4") 191 | self.assertEqual(nodes[0]["allocatable"]["memory"], "7Gi") 192 | self.assertEqual(nodes[0]["kubelet_version"], "v1.25.0") 193 | 194 | # Verify the API was called 195 | self.mock_core_v1.list_node.assert_called_once() 196 | 197 | def test_failed_pods(self): 198 | """Test failed_pods function.""" 199 | # Create mock failed pod 200 | mock_pod = MagicMock() 201 | mock_pod.metadata.name = "failed-pod" 202 | mock_pod.metadata.namespace = "default" 203 | mock_pod.status.phase = "Failed" 204 | mock_pod.spec.node_name = "node1" 205 | mock_pod.status.message = "Pod failed" 206 | mock_pod.status.reason = "Error" 207 | 208 | # Create mock container status 209 | mock_container_status = MagicMock() 210 | mock_container_status.name = "test-container" 211 | mock_container_status.restart_count = 3 212 | 213 | # Create mock container state 214 | mock_container_status.state = MagicMock() 215 | mock_container_status.state.waiting = MagicMock() 216 | mock_container_status.state.waiting.reason = "CrashLoopBackOff" 217 | mock_container_status.state.waiting.message = "Container crashed" 218 | mock_container_status.state.terminated = None 219 | 220 | mock_pod.status.container_statuses = [mock_container_status] 221 | 222 | # Set up mock response 223 | mock_response = MagicMock() 224 | mock_response.items = [mock_pod] 225 | self.mock_core_v1.list_pod_for_all_namespaces.return_value = mock_response 226 | 227 | # Call the function 228 | result = server.failed_pods() 229 | 230 | # Verify the response 231 | failed = json.loads(result) 232 | self.assertEqual(len(failed), 1) 233 | self.assertEqual(failed[0]["name"], "failed-pod") 234 | self.assertEqual(failed[0]["phase"], "Failed") 235 | self.assertEqual(failed[0]["container_statuses"][0]["name"], "test-container") 236 | self.assertEqual( 237 | failed[0]["container_statuses"][0]["state"]["reason"], "CrashLoopBackOff" 238 | ) 239 | 240 | # Verify the API was called 241 | self.mock_core_v1.list_pod_for_all_namespaces.assert_called_once() 242 | 243 | def test_get_resource_yaml(self): 244 | """Test get_resource_yaml function.""" 245 | # Create mock API client 246 | mock_api_client = MagicMock() 247 | server.client.ApiClient.return_value = mock_api_client 248 | 249 | # Create mock resource 250 | mock_resource = MagicMock() 251 | self.mock_core_v1.read_namespaced_pod.return_value = mock_resource 252 | 253 | # Set up serialization 254 | mock_dict = { 255 | "apiVersion": "v1", 256 | "kind": "Pod", 257 | "metadata": {"name": "test-pod"}, 258 | } 259 | mock_api_client.sanitize_for_serialization.return_value = mock_dict 260 | 261 | # Mock the yaml dump function to ensure consistent output 262 | with patch("server.yaml.dump") as mock_yaml_dump: 263 | mock_yaml_dump.return_value = ( 264 | "apiVersion: v1\nkind: Pod\nmetadata:\n name: test-pod\n" 265 | ) 266 | 267 | # Call the function 268 | result = server.get_resource_yaml("default", "pod", "test-pod") 269 | 270 | # Verify YAML output is what we expect based on our mock 271 | self.assertEqual( 272 | result, "apiVersion: v1\nkind: Pod\nmetadata:\n name: test-pod\n" 273 | ) 274 | 275 | # Verify yaml.dump was called with the correct parameters 276 | mock_yaml_dump.assert_called_once_with(mock_dict, default_flow_style=False) 277 | 278 | # Verify API calls 279 | self.mock_core_v1.read_namespaced_pod.assert_called_once_with( 280 | "test-pod", "default" 281 | ) 282 | mock_api_client.sanitize_for_serialization.assert_called_once_with( 283 | mock_resource 284 | ) 285 | 286 | def test_get_resource_yaml_unsupported_type(self): 287 | """Test get_resource_yaml function with unsupported resource type.""" 288 | # Call the function with unsupported type 289 | result, status_code = server.get_resource_yaml( 290 | "default", "unknown", "resource-name" 291 | ) 292 | 293 | # Verify error response 294 | error_response = json.loads(result) 295 | self.assertEqual(status_code, 400) 296 | self.assertIn("error", error_response) 297 | self.assertIn("Unsupported resource type", error_response["error"]) 298 | 299 | def test_format_bytes(self): 300 | """Test format_bytes helper function.""" 301 | # Test various sizes 302 | self.assertEqual(server.format_bytes(500), "500 B") 303 | self.assertEqual(server.format_bytes(1024), "1024 B") 304 | self.assertEqual(server.format_bytes(1536), "1.5 KiB") 305 | self.assertEqual(server.format_bytes(2 * 1024 * 1024), "2.0 MiB") 306 | self.assertEqual(server.format_bytes(3 * 1024 * 1024 * 1024), "3.0 GiB") 307 | 308 | 309 | if __name__ == "__main__": 310 | unittest.main() 311 | ``` -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Kubernetes Monitoring API Server 3 | 4 | This module provides a FastMCP server that exposes Kubernetes monitoring APIs. 5 | It connects to a Kubernetes cluster and provides endpoints to query various 6 | cluster resources including pods, services, deployments, nodes, and events. 7 | 8 | Dependencies: 9 | - kubernetes: Python client for Kubernetes 10 | - yaml: For YAML serialization 11 | - FastMCP: Server framework for API endpoints 12 | """ 13 | 14 | import yaml 15 | import json 16 | from datetime import datetime 17 | from kubernetes import client, config 18 | from kubernetes.client.rest import ApiException 19 | from mcp.server.fastmcp import FastMCP 20 | 21 | 22 | # Initialize FastMCP server 23 | mcp = FastMCP("k8s") 24 | 25 | # Kubernetes client configuration 26 | try: 27 | # Try to load from default kubeconfig 28 | config.load_kube_config() 29 | except Exception: 30 | # If running inside a pod 31 | try: 32 | config.load_incluster_config() 33 | except Exception as e: 34 | print(f"Failed to configure Kubernetes client: {e}") 35 | exit(1) 36 | 37 | # Initialize API clients 38 | core_v1 = client.CoreV1Api() 39 | apps_v1 = client.AppsV1Api() 40 | batch_v1 = client.BatchV1Api() 41 | custom_objects = client.CustomObjectsApi() 42 | 43 | 44 | @mcp.tool() 45 | def get_namespaces(): 46 | """ 47 | List all namespaces in the Kubernetes cluster. 48 | 49 | Returns: 50 | str: JSON string containing an array of namespace objects with fields: 51 | - name (str): Name of the namespace 52 | - status (str): Phase of the namespace (Active, Terminating) 53 | - creation_time (str): Timestamp when namespace was created 54 | 55 | Raises: 56 | ApiException: If there is an error communicating with the Kubernetes API 57 | """ 58 | try: 59 | namespaces = core_v1.list_namespace() 60 | result = [] 61 | for ns in namespaces.items: 62 | result.append( 63 | { 64 | "name": ns.metadata.name, 65 | "status": ns.status.phase, 66 | "creation_time": ns.metadata.creation_timestamp.strftime( 67 | "%Y-%m-%d %H:%M:%S" 68 | ) 69 | if ns.metadata.creation_timestamp 70 | else None, 71 | } 72 | ) 73 | return json.dumps(result) 74 | except ApiException as e: 75 | return json.dumps({"error": str(e)}), 500 76 | 77 | 78 | @mcp.tool() 79 | def list_pods(namespace=None): 80 | """ 81 | Lists all pods in the specified Kubernetes namespace or across all namespaces. 82 | 83 | Retrieves detailed information about pods including their status, containers, 84 | and hosting node. 85 | 86 | Args: 87 | namespace (str, optional): The namespace to filter pods by. 88 | If None, pods from all namespaces will be returned. Defaults to None. 89 | 90 | Returns: 91 | str: JSON string containing an array of pod objects with fields: 92 | - name (str): Name of the pod 93 | - namespace (str): Namespace where the pod is running 94 | - phase (str): Current phase of the pod (Running, Pending, etc.) 95 | - ip (str): Pod IP address 96 | - node (str): Name of the node running this pod 97 | - containers (list): List of containers in the pod with their status 98 | - creation_time (str): Timestamp when pod was created 99 | 100 | Raises: 101 | ApiException: If there is an error communicating with the Kubernetes API 102 | """ 103 | 104 | try: 105 | if namespace: 106 | pods = core_v1.list_namespaced_pod(namespace) 107 | else: 108 | pods = core_v1.list_pod_for_all_namespaces() 109 | 110 | result = [] 111 | for pod in pods.items: 112 | containers = [] 113 | for container in pod.spec.containers: 114 | containers.append( 115 | { 116 | "name": container.name, 117 | "image": container.image, 118 | "ready": any( 119 | s.container_id is not None and s.name == container.name 120 | for s in pod.status.container_statuses 121 | ) 122 | if pod.status.container_statuses 123 | else False, 124 | } 125 | ) 126 | 127 | result.append( 128 | { 129 | "name": pod.metadata.name, 130 | "namespace": pod.metadata.namespace, 131 | "phase": pod.status.phase, 132 | "ip": pod.status.pod_ip, 133 | "node": pod.spec.node_name, 134 | "containers": containers, 135 | "creation_time": pod.metadata.creation_timestamp.strftime( 136 | "%Y-%m-%d %H:%M:%S" 137 | ) 138 | if pod.metadata.creation_timestamp 139 | else None, 140 | } 141 | ) 142 | return json.dumps(result) 143 | except ApiException as e: 144 | return json.dumps({"error": str(e)}), 500 145 | 146 | 147 | @mcp.tool() 148 | def list_nodes(): 149 | """List all nodes and their status""" 150 | try: 151 | nodes = core_v1.list_node() 152 | result = [] 153 | for node in nodes.items: 154 | conditions = {} 155 | for condition in node.status.conditions: 156 | conditions[condition.type] = condition.status 157 | 158 | addresses = {} 159 | for address in node.status.addresses: 160 | addresses[address.type] = address.address 161 | 162 | # Get capacity and allocatable resources 163 | capacity = { 164 | "cpu": node.status.capacity.get("cpu"), 165 | "memory": node.status.capacity.get("memory"), 166 | "pods": node.status.capacity.get("pods"), 167 | } 168 | 169 | allocatable = { 170 | "cpu": node.status.allocatable.get("cpu"), 171 | "memory": node.status.allocatable.get("memory"), 172 | "pods": node.status.allocatable.get("pods"), 173 | } 174 | 175 | result.append( 176 | { 177 | "name": node.metadata.name, 178 | "conditions": conditions, 179 | "addresses": addresses, 180 | "capacity": capacity, 181 | "allocatable": allocatable, 182 | "kubelet_version": node.status.node_info.kubelet_version 183 | if node.status.node_info 184 | else None, 185 | } 186 | ) 187 | return json.dumps(result) 188 | except ApiException as e: 189 | return json.dumps({"error": str(e)}), 500 190 | 191 | 192 | @mcp.tool() 193 | def list_deployments(namespace=None): 194 | """ 195 | List deployments with optional namespace filter 196 | 197 | Args: 198 | namespaces (list, optional): A list of namespace names to filter pods by. 199 | If None, pods from all namespaces will be returned. Defaults to None. 200 | """ 201 | try: 202 | if namespace: 203 | deployments = apps_v1.list_namespaced_deployment(namespace) 204 | else: 205 | deployments = apps_v1.list_deployment_for_all_namespaces() 206 | 207 | result = [] 208 | for deployment in deployments.items: 209 | result.append( 210 | { 211 | "name": deployment.metadata.name, 212 | "namespace": deployment.metadata.namespace, 213 | "replicas": deployment.spec.replicas, 214 | "available_replicas": deployment.status.available_replicas, 215 | "ready_replicas": deployment.status.ready_replicas, 216 | "strategy": deployment.spec.strategy.type, 217 | "creation_time": deployment.metadata.creation_timestamp.strftime( 218 | "%Y-%m-%d %H:%M:%S" 219 | ) 220 | if deployment.metadata.creation_timestamp 221 | else None, 222 | } 223 | ) 224 | return json.dumps(result) 225 | except ApiException as e: 226 | return json.dumps({"error": str(e)}), 500 227 | 228 | 229 | @mcp.tool() 230 | def list_services(namespace=None): 231 | """ 232 | List services with optional namespace filter 233 | 234 | Args: 235 | namespaces (list, optional): A list of namespace names to filter pods by. 236 | If None, pods from all namespaces will be returned. Defaults to None. 237 | """ 238 | try: 239 | if namespace: 240 | services = core_v1.list_namespaced_service(namespace) 241 | else: 242 | services = core_v1.list_service_for_all_namespaces() 243 | 244 | result = [] 245 | for service in services.items: 246 | ports = [] 247 | for port in service.spec.ports: 248 | ports.append( 249 | { 250 | "name": port.name, 251 | "port": port.port, 252 | "target_port": port.target_port, 253 | "protocol": port.protocol, 254 | "node_port": port.node_port 255 | if hasattr(port, "node_port") 256 | else None, 257 | } 258 | ) 259 | 260 | result.append( 261 | { 262 | "name": service.metadata.name, 263 | "namespace": service.metadata.namespace, 264 | "type": service.spec.type, 265 | "cluster_ip": service.spec.cluster_ip, 266 | "external_ip": service.spec.external_i_ps 267 | if hasattr(service.spec, "external_i_ps") 268 | else None, 269 | "ports": ports, 270 | "selector": service.spec.selector, 271 | "creation_time": service.metadata.creation_timestamp.strftime( 272 | "%Y-%m-%d %H:%M:%S" 273 | ) 274 | if service.metadata.creation_timestamp 275 | else None, 276 | } 277 | ) 278 | return json.dumps(result) 279 | except ApiException as e: 280 | return json.dumps({"error": str(e)}), 500 281 | 282 | 283 | @mcp.tool() 284 | def list_events(namespace=None): 285 | """ 286 | List events with optional namespace filter 287 | 288 | Args: 289 | namespaces (list, optional): A list of namespace names to filter pods by. 290 | If None, pods from all namespaces will be returned. Defaults to None. 291 | """ 292 | try: 293 | if namespace: 294 | events = core_v1.list_namespaced_event(namespace) 295 | else: 296 | events = core_v1.list_event_for_all_namespaces() 297 | 298 | result = [] 299 | for event in events.items: 300 | result.append( 301 | { 302 | "type": event.type, 303 | "reason": event.reason, 304 | "message": event.message, 305 | "object": f"{event.involved_object.kind}/{event.involved_object.name}", 306 | "namespace": event.metadata.namespace, 307 | "count": event.count, 308 | "first_time": event.first_timestamp.strftime("%Y-%m-%d %H:%M:%S") 309 | if event.first_timestamp 310 | else None, 311 | "last_time": event.last_timestamp.strftime("%Y-%m-%d %H:%M:%S") 312 | if event.last_timestamp 313 | else None, 314 | } 315 | ) 316 | # Sort by last_time (newest first) 317 | # TODO: fix issue with sorting 318 | # result.sort(key=lambda x: x.get("last_time", ""), reverse=True) 319 | return json.dumps(result) 320 | except ApiException as e: 321 | return json.dumps({"error": str(e)}), 500 322 | 323 | 324 | @mcp.tool() 325 | def failed_pods(): 326 | """ 327 | List all pods in Failed or Error state across all namespaces. 328 | 329 | Identifies pods that are in a failed state, including those in CrashLoopBackOff, 330 | ImagePullBackOff, or other error states. Provides detailed container status 331 | information to aid in troubleshooting. 332 | 333 | Returns: 334 | str: JSON string containing an array of failed pod objects with fields: 335 | - name (str): Name of the pod 336 | - namespace (str): Namespace where the pod is running 337 | - phase (str): Current phase of the pod 338 | - container_statuses (list): Detailed status of each container 339 | including state, reason, exit codes, and restart counts 340 | - node (str): Name of the node running this pod 341 | - message (str): Status message from the pod, if any 342 | - reason (str): Reason for the current status, if any 343 | 344 | Raises: 345 | ApiException: If there is an error communicating with the Kubernetes API 346 | """ 347 | try: 348 | pods = core_v1.list_pod_for_all_namespaces() 349 | failed = [] 350 | 351 | for pod in pods.items: 352 | if pod.status.phase in ["Failed", "Error"] or any( 353 | s.state 354 | and s.state.waiting 355 | and s.state.waiting.reason 356 | in ["CrashLoopBackOff", "ImagePullBackOff", "ErrImagePull"] 357 | for s in pod.status.container_statuses 358 | if s.state and s.state.waiting 359 | ): 360 | container_statuses = [] 361 | if pod.status.container_statuses: 362 | for s in pod.status.container_statuses: 363 | state = {} 364 | if s.state.waiting: 365 | state = { 366 | "status": "waiting", 367 | "reason": s.state.waiting.reason, 368 | "message": s.state.waiting.message, 369 | } 370 | elif s.state.terminated: 371 | state = { 372 | "status": "terminated", 373 | "reason": s.state.terminated.reason, 374 | "exit_code": s.state.terminated.exit_code, 375 | "message": s.state.terminated.message, 376 | } 377 | container_statuses.append( 378 | { 379 | "name": s.name, 380 | "state": state, 381 | "restart_count": s.restart_count, 382 | } 383 | ) 384 | 385 | failed.append( 386 | { 387 | "name": pod.metadata.name, 388 | "namespace": pod.metadata.namespace, 389 | "phase": pod.status.phase, 390 | "container_statuses": container_statuses, 391 | "node": pod.spec.node_name, 392 | "message": pod.status.message if pod.status.message else None, 393 | "reason": pod.status.reason if pod.status.reason else None, 394 | } 395 | ) 396 | 397 | return json.dumps(failed) 398 | except ApiException as e: 399 | return json.dumps({"error": str(e)}), 500 400 | 401 | 402 | @mcp.tool() 403 | def pending_pods(): 404 | """List all pods in Pending state and why they're pending""" 405 | try: 406 | pods = core_v1.list_pod_for_all_namespaces() 407 | pending = [] 408 | 409 | for pod in pods.items: 410 | if pod.status.phase == "Pending": 411 | # Check for events related to this pod 412 | events = core_v1.list_namespaced_event( 413 | pod.metadata.namespace, 414 | field_selector=f"involvedObject.name={pod.metadata.name},involvedObject.kind=Pod", 415 | ) 416 | 417 | pending_reason = "Unknown" 418 | pending_message = None 419 | 420 | # Get the latest event that might explain why it's pending 421 | if events.items: 422 | latest_event = max( 423 | events.items, 424 | key=lambda e: e.last_timestamp 425 | if e.last_timestamp 426 | else datetime.min, 427 | ) 428 | pending_reason = latest_event.reason 429 | pending_message = latest_event.message 430 | 431 | pending.append( 432 | { 433 | "name": pod.metadata.name, 434 | "namespace": pod.metadata.namespace, 435 | "node": pod.spec.node_name, 436 | "reason": pending_reason, 437 | "message": pending_message, 438 | "creation_time": pod.metadata.creation_timestamp.strftime( 439 | "%Y-%m-%d %H:%M:%S" 440 | ) 441 | if pod.metadata.creation_timestamp 442 | else None, 443 | } 444 | ) 445 | 446 | return json.dumps(pending) 447 | except ApiException as e: 448 | return json.dumps({"error": str(e)}), 500 449 | 450 | 451 | @mcp.tool() 452 | def high_restart_pods(restart_threshold=5): 453 | """ 454 | Find pods with high restart counts (>5) 455 | 456 | Args: 457 | restart_threshold (int, optional): The minimum number of restarts 458 | required to include a pod in the results. Defaults to 5. 459 | """ 460 | 461 | try: 462 | pods = core_v1.list_pod_for_all_namespaces() 463 | high_restart = [] 464 | 465 | for pod in pods.items: 466 | high_restart_containers = [] 467 | 468 | if pod.status.container_statuses: 469 | for status in pod.status.container_statuses: 470 | if status.restart_count > restart_threshold: 471 | high_restart_containers.append( 472 | { 473 | "name": status.name, 474 | "restart_count": status.restart_count, 475 | "ready": status.ready, 476 | "image": status.image, 477 | } 478 | ) 479 | 480 | if high_restart_containers: 481 | high_restart.append( 482 | { 483 | "name": pod.metadata.name, 484 | "namespace": pod.metadata.namespace, 485 | "node": pod.spec.node_name, 486 | "containers": high_restart_containers, 487 | } 488 | ) 489 | 490 | return json.dumps(high_restart) 491 | except ApiException as e: 492 | return json.dumps({"error": str(e)}), 500 493 | 494 | 495 | @mcp.tool() 496 | def node_capacity(): 497 | """ 498 | Show available capacity and resource utilization on all nodes. 499 | 500 | Calculates the current resource usage across all nodes, including: 501 | - Pod count vs. maximum pods per node 502 | - CPU requests vs. allocatable CPU 503 | - Memory requests vs. allocatable memory 504 | 505 | The function provides both raw values and percentage utilization to help 506 | identify nodes approaching resource limits. 507 | 508 | Returns: 509 | str: JSON string containing an array of node capacity objects with fields: 510 | - name (str): Name of the node 511 | - pods (dict): Pod capacity information 512 | - used (int): Number of pods running on the node 513 | - capacity (int): Maximum number of pods the node can run 514 | - percent_used (float): Percentage of pod capacity in use 515 | - cpu (dict): CPU resource information 516 | - requested (float): CPU cores requested by pods 517 | - allocatable (float): CPU cores available on the node 518 | - percent_used (float): Percentage of CPU capacity in use 519 | - memory (dict): Memory resource information 520 | - requested (int): Memory requested by pods in bytes 521 | - requested_human (str): Human-readable memory requested 522 | - allocatable (int): Memory available on the node in bytes 523 | - allocatable_human (str): Human-readable allocatable memory 524 | - percent_used (float): Percentage of memory capacity in use 525 | - conditions (dict): Node condition statuses 526 | 527 | Raises: 528 | ApiException: If there is an error communicating with the Kubernetes API 529 | """ 530 | try: 531 | nodes = core_v1.list_node() 532 | pods = core_v1.list_pod_for_all_namespaces() 533 | 534 | # Group pods by node 535 | node_pods = {} 536 | for pod in pods.items: 537 | if pod.spec.node_name: 538 | if pod.spec.node_name not in node_pods: 539 | node_pods[pod.spec.node_name] = [] 540 | node_pods[pod.spec.node_name].append(pod) 541 | 542 | results = [] 543 | for node in nodes.items: 544 | # Calculate pod count 545 | pod_count = len(node_pods.get(node.metadata.name, [])) 546 | max_pods = int(node.status.allocatable.get("pods", 0)) 547 | 548 | # Calculate CPU and memory utilization (rough estimate) 549 | node_pods_list = node_pods.get(node.metadata.name, []) 550 | cpu_request = 0 551 | memory_request = 0 552 | 553 | for pod in node_pods_list: 554 | for container in pod.spec.containers: 555 | if container.resources and container.resources.requests: 556 | if container.resources.requests.get("cpu"): 557 | cpu_str = container.resources.requests.get("cpu") 558 | if cpu_str.endswith("m"): 559 | cpu_request += int(cpu_str[:-1]) / 1000 560 | else: 561 | cpu_request += float(cpu_str) 562 | 563 | if container.resources.requests.get("memory"): 564 | mem_str = container.resources.requests.get("memory") 565 | # Convert to bytes (rough approximation) 566 | if mem_str.endswith("Ki"): 567 | memory_request += int(mem_str[:-2]) * 1024 568 | elif mem_str.endswith("Mi"): 569 | memory_request += int(mem_str[:-2]) * 1024 * 1024 570 | elif mem_str.endswith("Gi"): 571 | memory_request += int(mem_str[:-2]) * 1024 * 1024 * 1024 572 | else: 573 | memory_request += int(mem_str) 574 | 575 | # Convert allocatable CPU to cores 576 | cpu_allocatable = node.status.allocatable.get("cpu", "0") 577 | if cpu_allocatable.endswith("m"): 578 | cpu_allocatable = int(cpu_allocatable[:-1]) / 1000 579 | else: 580 | cpu_allocatable = float(cpu_allocatable) 581 | 582 | # Convert allocatable memory to bytes 583 | mem_allocatable = node.status.allocatable.get("memory", "0") 584 | mem_bytes = 0 585 | if mem_allocatable.endswith("Ki"): 586 | mem_bytes = int(mem_allocatable[:-2]) * 1024 587 | elif mem_allocatable.endswith("Mi"): 588 | mem_bytes = int(mem_allocatable[:-2]) * 1024 * 1024 589 | elif mem_allocatable.endswith("Gi"): 590 | mem_bytes = int(mem_allocatable[:-2]) * 1024 * 1024 * 1024 591 | else: 592 | mem_bytes = int(mem_allocatable) 593 | 594 | results.append( 595 | { 596 | "name": node.metadata.name, 597 | "pods": { 598 | "used": pod_count, 599 | "capacity": max_pods, 600 | "percent_used": round((pod_count / max_pods) * 100, 2) 601 | if max_pods > 0 602 | else 0, 603 | }, 604 | "cpu": { 605 | "requested": round(cpu_request, 2), 606 | "allocatable": round(cpu_allocatable, 2), 607 | "percent_used": round((cpu_request / cpu_allocatable) * 100, 2) 608 | if cpu_allocatable > 0 609 | else 0, 610 | }, 611 | "memory": { 612 | "requested": memory_request, 613 | "requested_human": format_bytes(memory_request), 614 | "allocatable": mem_bytes, 615 | "allocatable_human": format_bytes(mem_bytes), 616 | "percent_used": round((memory_request / mem_bytes) * 100, 2) 617 | if mem_bytes > 0 618 | else 0, 619 | }, 620 | "conditions": { 621 | cond.type: cond.status for cond in node.status.conditions 622 | }, 623 | } 624 | ) 625 | 626 | return json.dumps(results) 627 | except ApiException as e: 628 | return json.dumps({"error": str(e)}), 500 629 | 630 | 631 | @mcp.tool() 632 | def orphaned_resources(): 633 | """List resources that might be orphaned (no owner references)""" 634 | try: 635 | results = { 636 | "pods": [], 637 | "services": [], 638 | "persistent_volume_claims": [], 639 | "config_maps": [], 640 | "secrets": [], 641 | } 642 | 643 | # Check for orphaned pods 644 | pods = core_v1.list_pod_for_all_namespaces() 645 | for pod in pods.items: 646 | if ( 647 | not pod.metadata.owner_references 648 | and not pod.metadata.name.startswith("kube-") 649 | and pod.metadata.namespace != "kube-system" 650 | ): 651 | results["pods"].append( 652 | { 653 | "name": pod.metadata.name, 654 | "namespace": pod.metadata.namespace, 655 | "creation_time": pod.metadata.creation_timestamp.strftime( 656 | "%Y-%m-%d %H:%M:%S" 657 | ) 658 | if pod.metadata.creation_timestamp 659 | else None, 660 | } 661 | ) 662 | 663 | # Check for orphaned services 664 | services = core_v1.list_service_for_all_namespaces() 665 | for service in services.items: 666 | if ( 667 | not service.metadata.owner_references 668 | and not service.metadata.name.startswith("kube-") 669 | and service.metadata.namespace != "kube-system" 670 | and service.metadata.name != "kubernetes" 671 | ): 672 | results["services"].append( 673 | { 674 | "name": service.metadata.name, 675 | "namespace": service.metadata.namespace, 676 | "creation_time": service.metadata.creation_timestamp.strftime( 677 | "%Y-%m-%d %H:%M:%S" 678 | ) 679 | if service.metadata.creation_timestamp 680 | else None, 681 | } 682 | ) 683 | 684 | # Check for orphaned PVCs 685 | pvcs = core_v1.list_persistent_volume_claim_for_all_namespaces() 686 | for pvc in pvcs.items: 687 | if not pvc.metadata.owner_references: 688 | results["persistent_volume_claims"].append( 689 | { 690 | "name": pvc.metadata.name, 691 | "namespace": pvc.metadata.namespace, 692 | "creation_time": pvc.metadata.creation_timestamp.strftime( 693 | "%Y-%m-%d %H:%M:%S" 694 | ) 695 | if pvc.metadata.creation_timestamp 696 | else None, 697 | } 698 | ) 699 | 700 | # Check for orphaned ConfigMaps 701 | config_maps = core_v1.list_config_map_for_all_namespaces() 702 | for cm in config_maps.items: 703 | if ( 704 | not cm.metadata.owner_references 705 | and not cm.metadata.name.startswith("kube-") 706 | and cm.metadata.namespace != "kube-system" 707 | ): 708 | results["config_maps"].append( 709 | { 710 | "name": cm.metadata.name, 711 | "namespace": cm.metadata.namespace, 712 | "creation_time": cm.metadata.creation_timestamp.strftime( 713 | "%Y-%m-%d %H:%M:%S" 714 | ) 715 | if cm.metadata.creation_timestamp 716 | else None, 717 | } 718 | ) 719 | 720 | # Check for orphaned Secrets 721 | secrets = core_v1.list_secret_for_all_namespaces() 722 | for secret in secrets.items: 723 | if ( 724 | not secret.metadata.owner_references 725 | and not secret.metadata.name.startswith("kube-") 726 | and secret.metadata.namespace != "kube-system" 727 | and not secret.type.startswith("kubernetes.io/") 728 | ): 729 | results["secrets"].append( 730 | { 731 | "name": secret.metadata.name, 732 | "namespace": secret.metadata.namespace, 733 | "type": secret.type, 734 | "creation_time": secret.metadata.creation_timestamp.strftime( 735 | "%Y-%m-%d %H:%M:%S" 736 | ) 737 | if secret.metadata.creation_timestamp 738 | else None, 739 | } 740 | ) 741 | 742 | return json.dumps(results) 743 | except ApiException as e: 744 | return json.dumps({"error": str(e)}), 500 745 | 746 | 747 | @mcp.tool() 748 | def get_resource_yaml(namespace, resource_type, resource_name): 749 | """ 750 | Retrieves the YAML configuration for a specified Kubernetes resource. 751 | 752 | Fetches the complete configuration of a resource, which can be useful for 753 | debugging, documentation, or backup purposes. 754 | 755 | Args: 756 | namespace (str): The Kubernetes namespace containing the resource. 757 | resource_type (str): The type of resource to retrieve. 758 | Supported types: 'pod', 'deployment', 'service', 'configmap', 759 | 'secret', 'job' 760 | resource_name (str): The name of the specific resource to retrieve. 761 | 762 | Returns: 763 | str: YAML string representation of the resource configuration. 764 | 765 | Raises: 766 | ApiException: If there is an error communicating with the Kubernetes API 767 | ValueError: If an unsupported resource type is specified 768 | """ 769 | try: 770 | resource_data = None 771 | 772 | if resource_type == "pod": 773 | resource_data = core_v1.read_namespaced_pod(resource_name, namespace) 774 | elif resource_type == "deployment": 775 | resource_data = apps_v1.read_namespaced_deployment(resource_name, namespace) 776 | elif resource_type == "service": 777 | resource_data = core_v1.read_namespaced_service(resource_name, namespace) 778 | elif resource_type == "configmap": 779 | resource_data = core_v1.read_namespaced_config_map(resource_name, namespace) 780 | elif resource_type == "secret": 781 | resource_data = core_v1.read_namespaced_secret(resource_name, namespace) 782 | elif resource_type == "job": 783 | resource_data = batch_v1.read_namespaced_job(resource_name, namespace) 784 | else: 785 | return json.dumps( 786 | {"error": f"Unsupported resource type: {resource_type}"} 787 | ), 400 788 | 789 | # Convert to dict and then to YAML 790 | resource_dict = client.ApiClient().sanitize_for_serialization(resource_data) 791 | yaml_str = yaml.dump(resource_dict, default_flow_style=False) 792 | 793 | return yaml_str 794 | except ApiException as e: 795 | return json.dumps({"error": str(e)}), 500 796 | 797 | 798 | # Helper function to format bytes into human-readable format 799 | def format_bytes(size): 800 | """ 801 | Format bytes to human readable string. 802 | 803 | Converts a byte value to a human-readable string with appropriate 804 | units (B, KiB, MiB, GiB, TiB). 805 | 806 | Args: 807 | size (int): Size in bytes 808 | 809 | Returns: 810 | str: Human-readable string representation of the size 811 | (e.g., "2.5 MiB") 812 | """ 813 | power = 2**10 814 | n = 0 815 | power_labels = {0: "B", 1: "KiB", 2: "MiB", 3: "GiB", 4: "TiB"} 816 | while size > power: 817 | size /= power 818 | n += 1 819 | return f"{round(size, 2)} {power_labels[n]}" 820 | 821 | 822 | if __name__ == "__main__": 823 | # # Initialize and run the server 824 | mcp.run(transport="stdio") 825 | ```