clickhouse/mcp-clickhouse # codebase.md

# Directory Structure

```
├── .dockerignore
├── .editorconfig
├── .github
│   └── workflows
│       ├── ci.yaml
│       └── publish.yml
├── .gitignore
├── .python-version
├── Dockerfile
├── fastmcp.json
├── LICENSE
├── mcp_clickhouse
│   ├── __init__.py
│   ├── chdb_prompt.py
│   ├── main.py
│   ├── mcp_env.py
│   └── mcp_server.py
├── pyproject.toml
├── README.md
├── test-services
│   └── docker-compose.yaml
├── tests
│   ├── test_chdb_tool.py
│   ├── test_config_interface.py
│   ├── test_mcp_server.py
│   └── test_tool.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------

```
1 | 3.10
2 | 
```

--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------

```
1 | [*]
2 | trim_trailing_whitespace = true
3 | end_of_line = lf
```

--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------

```
 1 | # Git
 2 | .git/
 3 | .gitignore
 4 | .github/
 5 | 
 6 | # Documentation
 7 | LICENSE
 8 | # README.md is needed for package build
 9 | # *.md files may be needed for package metadata
10 | 
11 | # Development environment
12 | .venv/
13 | .env
14 | .envrc
15 | .python-version
16 | 
17 | # IDE and editor files
18 | .vscode/
19 | .idea/
20 | *.swp
21 | *.swo
22 | *~
23 | 
24 | # Cache directories
25 | .ruff_cache/
26 | .pytest_cache/
27 | __pycache__/
28 | *.pyc
29 | *.pyo
30 | *.pyd
31 | .mypy_cache/
32 | .dmypy.json
33 | dmypy.json
34 | 
35 | # Test files and directories
36 | tests/
37 | test-services/
38 | .coverage
39 | .coverage.*
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | coverage.xml
44 | *.cover
45 | *.py,cover
46 | .hypothesis/
47 | cover/
48 | 
49 | # Build artifacts
50 | build/
51 | dist/
52 | *.egg-info/
53 | .eggs/
54 | *.egg
55 | MANIFEST
56 | 
57 | # Temporary files
58 | *.tmp
59 | *.temp
60 | *.log
61 | .DS_Store
62 | Thumbs.db
63 | 
64 | # Docker files (avoid copying Dockerfile into itself)
65 | Dockerfile
66 | .dockerignore
67 | 
68 | # Editor config (not needed at runtime)
69 | .editorconfig
70 | 
71 | # Claude AI files
72 | .claude/ 
```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
  1 | .envrc
  2 | .ruff_cache/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # UV
101 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #uv.lock
105 | 
106 | # poetry
107 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
109 | #   commonly ignored for libraries.
110 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111 | #poetry.lock
112 | 
113 | # pdm
114 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
115 | #pdm.lock
116 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
117 | #   in version control.
118 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
119 | .pdm.toml
120 | .pdm-python
121 | .pdm-build/
122 | 
123 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
124 | __pypackages__/
125 | 
126 | # Celery stuff
127 | celerybeat-schedule
128 | celerybeat.pid
129 | 
130 | # SageMath parsed files
131 | *.sage.py
132 | 
133 | # Environments
134 | .env
135 | .venv
136 | env/
137 | venv/
138 | ENV/
139 | env.bak/
140 | venv.bak/
141 | 
142 | # Spyder project settings
143 | .spyderproject
144 | .spyproject
145 | 
146 | # Rope project settings
147 | .ropeproject
148 | 
149 | # mkdocs documentation
150 | /site
151 | 
152 | # mypy
153 | .mypy_cache/
154 | .dmypy.json
155 | dmypy.json
156 | 
157 | # Pyre type checker
158 | .pyre/
159 | 
160 | # pytype static type analyzer
161 | .pytype/
162 | 
163 | # Cython debug symbols
164 | cython_debug/
165 | 
166 | # PyCharm
167 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
168 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
169 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
170 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
171 | #.idea/
172 | 
173 | # PyPI configuration file
174 | .pypirc
175 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | # ClickHouse MCP Server
  2 | 
  3 | [![PyPI - Version](https://img.shields.io/pypi/v/mcp-clickhouse)](https://pypi.org/project/mcp-clickhouse)
  4 | 
  5 | An MCP server for ClickHouse.
  6 | 
  7 | <a href="https://glama.ai/mcp/servers/yvjy4csvo1"><img width="380" height="200" src="https://glama.ai/mcp/servers/yvjy4csvo1/badge" alt="mcp-clickhouse MCP server" /></a>
  8 | 
  9 | ## Features
 10 | 
 11 | ### ClickHouse Tools
 12 | 
 13 | * `run_select_query`
 14 |   * Execute SQL queries on your ClickHouse cluster.
 15 |   * Input: `sql` (string): The SQL query to execute.
 16 |   * All ClickHouse queries are run with `readonly = 1` to ensure they are safe.
 17 | 
 18 | * `list_databases`
 19 |   * List all databases on your ClickHouse cluster.
 20 | 
 21 | * `list_tables`
 22 |   * List all tables in a database.
 23 |   * Input: `database` (string): The name of the database.
 24 | 
 25 | ### chDB Tools
 26 | 
 27 | * `run_chdb_select_query`
 28 |   * Execute SQL queries using [chDB](https://github.com/chdb-io/chdb)'s embedded ClickHouse engine.
 29 |   * Input: `sql` (string): The SQL query to execute.
 30 |   * Query data directly from various sources (files, URLs, databases) without ETL processes.
 31 | 
 32 | ### Health Check Endpoint
 33 | 
 34 | When running with HTTP or SSE transport, a health check endpoint is available at `/health`. This endpoint:
 35 | - Returns `200 OK` with the ClickHouse version if the server is healthy and can connect to ClickHouse
 36 | - Returns `503 Service Unavailable` if the server cannot connect to ClickHouse
 37 | 
 38 | Example:
 39 | ```bash
 40 | curl http://localhost:8000/health
 41 | # Response: OK - Connected to ClickHouse 24.3.1
 42 | ```
 43 | 
 44 | ## Configuration
 45 | 
 46 | This MCP server supports both ClickHouse and chDB. You can enable either or both depending on your needs.
 47 | 
 48 | 1. Open the Claude Desktop configuration file located at:
 49 |    * On macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
 50 |    * On Windows: `%APPDATA%/Claude/claude_desktop_config.json`
 51 | 
 52 | 2. Add the following:
 53 | 
 54 | ```json
 55 | {
 56 |   "mcpServers": {
 57 |     "mcp-clickhouse": {
 58 |       "command": "uv",
 59 |       "args": [
 60 |         "run",
 61 |         "--with",
 62 |         "mcp-clickhouse",
 63 |         "--python",
 64 |         "3.10",
 65 |         "mcp-clickhouse"
 66 |       ],
 67 |       "env": {
 68 |         "CLICKHOUSE_HOST": "<clickhouse-host>",
 69 |         "CLICKHOUSE_PORT": "<clickhouse-port>",
 70 |         "CLICKHOUSE_USER": "<clickhouse-user>",
 71 |         "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
 72 |         "CLICKHOUSE_SECURE": "true",
 73 |         "CLICKHOUSE_VERIFY": "true",
 74 |         "CLICKHOUSE_CONNECT_TIMEOUT": "30",
 75 |         "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30"
 76 |       }
 77 |     }
 78 |   }
 79 | }
 80 | ```
 81 | 
 82 | Update the environment variables to point to your own ClickHouse service.
 83 | 
 84 | Or, if you'd like to try it out with the [ClickHouse SQL Playground](https://sql.clickhouse.com/), you can use the following config:
 85 | 
 86 | ```json
 87 | {
 88 |   "mcpServers": {
 89 |     "mcp-clickhouse": {
 90 |       "command": "uv",
 91 |       "args": [
 92 |         "run",
 93 |         "--with",
 94 |         "mcp-clickhouse",
 95 |         "--python",
 96 |         "3.10",
 97 |         "mcp-clickhouse"
 98 |       ],
 99 |       "env": {
100 |         "CLICKHOUSE_HOST": "sql-clickhouse.clickhouse.com",
101 |         "CLICKHOUSE_PORT": "8443",
102 |         "CLICKHOUSE_USER": "demo",
103 |         "CLICKHOUSE_PASSWORD": "",
104 |         "CLICKHOUSE_SECURE": "true",
105 |         "CLICKHOUSE_VERIFY": "true",
106 |         "CLICKHOUSE_CONNECT_TIMEOUT": "30",
107 |         "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30"
108 |       }
109 |     }
110 |   }
111 | }
112 | ```
113 | 
114 | For chDB (embedded ClickHouse engine), add the following configuration:
115 | 
116 | ```json
117 | {
118 |   "mcpServers": {
119 |     "mcp-clickhouse": {
120 |       "command": "uv",
121 |       "args": [
122 |         "run",
123 |         "--with",
124 |         "mcp-clickhouse",
125 |         "--python",
126 |         "3.10",
127 |         "mcp-clickhouse"
128 |       ],
129 |       "env": {
130 |         "CHDB_ENABLED": "true",
131 |         "CLICKHOUSE_ENABLED": "false",
132 |         "CHDB_DATA_PATH": "/path/to/chdb/data"
133 |       }
134 |     }
135 |   }
136 | }
137 | ```
138 | 
139 | You can also enable both ClickHouse and chDB simultaneously:
140 | 
141 | ```json
142 | {
143 |   "mcpServers": {
144 |     "mcp-clickhouse": {
145 |       "command": "uv",
146 |       "args": [
147 |         "run",
148 |         "--with",
149 |         "mcp-clickhouse",
150 |         "--python",
151 |         "3.10",
152 |         "mcp-clickhouse"
153 |       ],
154 |       "env": {
155 |         "CLICKHOUSE_HOST": "<clickhouse-host>",
156 |         "CLICKHOUSE_PORT": "<clickhouse-port>",
157 |         "CLICKHOUSE_USER": "<clickhouse-user>",
158 |         "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
159 |         "CLICKHOUSE_SECURE": "true",
160 |         "CLICKHOUSE_VERIFY": "true",
161 |         "CLICKHOUSE_CONNECT_TIMEOUT": "30",
162 |         "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30",
163 |         "CHDB_ENABLED": "true",
164 |         "CHDB_DATA_PATH": "/path/to/chdb/data"
165 |       }
166 |     }
167 |   }
168 | }
169 | ```
170 | 
171 | 3. Locate the command entry for `uv` and replace it with the absolute path to the `uv` executable. This ensures that the correct version of `uv` is used when starting the server. On a mac, you can find this path using `which uv`.
172 | 
173 | 4. Restart Claude Desktop to apply the changes.
174 | 
175 | ### Running Without uv (Using System Python)
176 | 
177 | If you prefer to use the system Python installation instead of uv, you can install the package from PyPI and run it directly:
178 | 
179 | 1. Install the package using pip:
180 |    ```bash
181 |    python3 -m pip install mcp-clickhouse
182 |    ```
183 | 
184 |    To upgrade to the latest version:
185 |    ```bash
186 |    python3 -m pip install --upgrade mcp-clickhouse
187 |    ```
188 | 
189 | 2. Update your Claude Desktop configuration to use Python directly:
190 | 
191 | ```json
192 | {
193 |   "mcpServers": {
194 |     "mcp-clickhouse": {
195 |       "command": "python3",
196 |       "args": [
197 |         "-m",
198 |         "mcp_clickhouse.main"
199 |       ],
200 |       "env": {
201 |         "CLICKHOUSE_HOST": "<clickhouse-host>",
202 |         "CLICKHOUSE_PORT": "<clickhouse-port>",
203 |         "CLICKHOUSE_USER": "<clickhouse-user>",
204 |         "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
205 |         "CLICKHOUSE_SECURE": "true",
206 |         "CLICKHOUSE_VERIFY": "true",
207 |         "CLICKHOUSE_CONNECT_TIMEOUT": "30",
208 |         "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30"
209 |       }
210 |     }
211 |   }
212 | }
213 | ```
214 | 
215 | Alternatively, you can use the installed script directly:
216 | 
217 | ```json
218 | {
219 |   "mcpServers": {
220 |     "mcp-clickhouse": {
221 |       "command": "mcp-clickhouse",
222 |       "env": {
223 |         "CLICKHOUSE_HOST": "<clickhouse-host>",
224 |         "CLICKHOUSE_PORT": "<clickhouse-port>",
225 |         "CLICKHOUSE_USER": "<clickhouse-user>",
226 |         "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
227 |         "CLICKHOUSE_SECURE": "true",
228 |         "CLICKHOUSE_VERIFY": "true",
229 |         "CLICKHOUSE_CONNECT_TIMEOUT": "30",
230 |         "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30"
231 |       }
232 |     }
233 |   }
234 | }
235 | ```
236 | 
237 | Note: Make sure to use the full path to the Python executable or the `mcp-clickhouse` script if they are not in your system PATH. You can find the paths using:
238 | - `which python3` for the Python executable
239 | - `which mcp-clickhouse` for the installed script
240 | 
241 | ## Development
242 | 
243 | 1. In `test-services` directory run `docker compose up -d` to start the ClickHouse cluster.
244 | 
245 | 2. Add the following variables to a `.env` file in the root of the repository.
246 | 
247 | *Note: The use of the `default` user in this context is intended solely for local development purposes.*
248 | 
249 | ```bash
250 | CLICKHOUSE_HOST=localhost
251 | CLICKHOUSE_PORT=8123
252 | CLICKHOUSE_USER=default
253 | CLICKHOUSE_PASSWORD=clickhouse
254 | ```
255 | 
256 | 3. Run `uv sync` to install the dependencies. To install `uv` follow the instructions [here](https://docs.astral.sh/uv/). Then do `source .venv/bin/activate`.
257 | 
258 | 4. For easy testing with the MCP Inspector, run `fastmcp dev mcp_clickhouse/mcp_server.py` to start the MCP server.
259 | 
260 | 5. To test with HTTP transport and the health check endpoint:
261 |    ```bash
262 |    # Using default port 8000
263 |    CLICKHOUSE_MCP_SERVER_TRANSPORT=http python -m mcp_clickhouse.main
264 | 
265 |    # Or with a custom port
266 |    CLICKHOUSE_MCP_SERVER_TRANSPORT=http CLICKHOUSE_MCP_BIND_PORT=4200 python -m mcp_clickhouse.main
267 | 
268 |    # Then in another terminal:
269 |    curl http://localhost:8000/health  # or http://localhost:4200/health for custom port
270 |    ```
271 | 
272 | ### Environment Variables
273 | 
274 | The following environment variables are used to configure the ClickHouse and chDB connections:
275 | 
276 | #### ClickHouse Variables
277 | 
278 | ##### Required Variables
279 | 
280 | * `CLICKHOUSE_HOST`: The hostname of your ClickHouse server
281 | * `CLICKHOUSE_USER`: The username for authentication
282 | * `CLICKHOUSE_PASSWORD`: The password for authentication
283 | 
284 | > [!CAUTION]
285 | > It is important to treat your MCP database user as you would any external client connecting to your database, granting only the minimum necessary privileges required for its operation. The use of default or administrative users should be strictly avoided at all times.
286 | 
287 | ##### Optional Variables
288 | 
289 | * `CLICKHOUSE_PORT`: The port number of your ClickHouse server
290 |   * Default: `8443` if HTTPS is enabled, `8123` if disabled
291 |   * Usually doesn't need to be set unless using a non-standard port
292 | * `CLICKHOUSE_SECURE`: Enable/disable HTTPS connection
293 |   * Default: `"true"`
294 |   * Set to `"false"` for non-secure connections
295 | * `CLICKHOUSE_VERIFY`: Enable/disable SSL certificate verification
296 |   * Default: `"true"`
297 |   * Set to `"false"` to disable certificate verification (not recommended for production)
298 |   * TLS certificates: The package uses your operating system trust store for TLS certificate verification via `truststore`. We call `truststore.inject_into_ssl()` at startup to ensure proper certificate handling. Python’s default SSL behavior is used as a fallback only if an unexpected error occurs.
299 | * `CLICKHOUSE_CONNECT_TIMEOUT`: Connection timeout in seconds
300 |   * Default: `"30"`
301 |   * Increase this value if you experience connection timeouts
302 | * `CLICKHOUSE_SEND_RECEIVE_TIMEOUT`: Send/receive timeout in seconds
303 |   * Default: `"300"`
304 |   * Increase this value for long-running queries
305 | * `CLICKHOUSE_DATABASE`: Default database to use
306 |   * Default: None (uses server default)
307 |   * Set this to automatically connect to a specific database
308 | * `CLICKHOUSE_MCP_SERVER_TRANSPORT`: Sets the transport method for the MCP server.
309 |   * Default: `"stdio"`
310 |   * Valid options: `"stdio"`, `"http"`, `"sse"`. This is useful for local development with tools like MCP Inspector.
311 | * `CLICKHOUSE_MCP_BIND_HOST`: Host to bind the MCP server to when using HTTP or SSE transport
312 |   * Default: `"127.0.0.1"`
313 |   * Set to `"0.0.0.0"` to bind to all network interfaces (useful for Docker or remote access)
314 |   * Only used when transport is `"http"` or `"sse"`
315 | * `CLICKHOUSE_MCP_BIND_PORT`: Port to bind the MCP server to when using HTTP or SSE transport
316 |   * Default: `"8000"`
317 |   * Only used when transport is `"http"` or `"sse"`
318 | * `CLICKHOUSE_MCP_QUERY_TIMEOUT`: Timeout in seconds for SELECT tools
319 |   * Default: `"30"`
320 |   * Increase this if you see `Query timed out after ...` errors for heavy queries
321 | * `CLICKHOUSE_ENABLED`: Enable/disable ClickHouse functionality
322 |   * Default: `"true"`
323 |   * Set to `"false"` to disable ClickHouse tools when using chDB only
324 | 
325 | #### chDB Variables
326 | 
327 | * `CHDB_ENABLED`: Enable/disable chDB functionality
328 |   * Default: `"false"`
329 |   * Set to `"true"` to enable chDB tools
330 | * `CHDB_DATA_PATH`: The path to the chDB data directory
331 |   * Default: `":memory:"` (in-memory database)
332 |   * Use `:memory:` for in-memory database
333 |   * Use a file path for persistent storage (e.g., `/path/to/chdb/data`)
334 | 
335 | #### Example Configurations
336 | 
337 | For local development with Docker:
338 | 
339 | ```env
340 | # Required variables
341 | CLICKHOUSE_HOST=localhost
342 | CLICKHOUSE_USER=default
343 | CLICKHOUSE_PASSWORD=clickhouse
344 | 
345 | # Optional: Override defaults for local development
346 | CLICKHOUSE_SECURE=false  # Uses port 8123 automatically
347 | CLICKHOUSE_VERIFY=false
348 | ```
349 | 
350 | For ClickHouse Cloud:
351 | 
352 | ```env
353 | # Required variables
354 | CLICKHOUSE_HOST=your-instance.clickhouse.cloud
355 | CLICKHOUSE_USER=default
356 | CLICKHOUSE_PASSWORD=your-password
357 | 
358 | # Optional: These use secure defaults
359 | # CLICKHOUSE_SECURE=true  # Uses port 8443 automatically
360 | # CLICKHOUSE_DATABASE=your_database
361 | ```
362 | 
363 | For ClickHouse SQL Playground:
364 | 
365 | ```env
366 | CLICKHOUSE_HOST=sql-clickhouse.clickhouse.com
367 | CLICKHOUSE_USER=demo
368 | CLICKHOUSE_PASSWORD=
369 | # Uses secure defaults (HTTPS on port 8443)
370 | ```
371 | 
372 | For chDB only (in-memory):
373 | 
374 | ```env
375 | # chDB configuration
376 | CHDB_ENABLED=true
377 | CLICKHOUSE_ENABLED=false
378 | # CHDB_DATA_PATH defaults to :memory:
379 | ```
380 | 
381 | For chDB with persistent storage:
382 | 
383 | ```env
384 | # chDB configuration
385 | CHDB_ENABLED=true
386 | CLICKHOUSE_ENABLED=false
387 | CHDB_DATA_PATH=/path/to/chdb/data
388 | ```
389 | 
390 | For MCP Inspector or remote access with HTTP transport:
391 | 
392 | ```env
393 | CLICKHOUSE_HOST=localhost
394 | CLICKHOUSE_USER=default
395 | CLICKHOUSE_PASSWORD=clickhouse
396 | CLICKHOUSE_MCP_SERVER_TRANSPORT=http
397 | CLICKHOUSE_MCP_BIND_HOST=0.0.0.0  # Bind to all interfaces
398 | CLICKHOUSE_MCP_BIND_PORT=4200  # Custom port (default: 8000)
399 | ```
400 | 
401 | When using HTTP transport, the server will run on the configured port (default 8000). For example, with the above configuration:
402 | - MCP endpoint: `http://localhost:4200/mcp`
403 | - Health check: `http://localhost:4200/health`
404 | 
405 | You can set these variables in your environment, in a `.env` file, or in the Claude Desktop configuration:
406 | 
407 | ```json
408 | {
409 |   "mcpServers": {
410 |     "mcp-clickhouse": {
411 |       "command": "uv",
412 |       "args": [
413 |         "run",
414 |         "--with",
415 |         "mcp-clickhouse",
416 |         "--python",
417 |         "3.10",
418 |         "mcp-clickhouse"
419 |       ],
420 |       "env": {
421 |         "CLICKHOUSE_HOST": "<clickhouse-host>",
422 |         "CLICKHOUSE_USER": "<clickhouse-user>",
423 |         "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
424 |         "CLICKHOUSE_DATABASE": "<optional-database>",
425 |         "CLICKHOUSE_MCP_SERVER_TRANSPORT": "stdio",
426 |         "CLICKHOUSE_MCP_BIND_HOST": "127.0.0.1",
427 |         "CLICKHOUSE_MCP_BIND_PORT": "8000"
428 |       }
429 |     }
430 |   }
431 | }
432 | ```
433 | 
434 | Note: The bind host and port settings are only used when transport is set to "http" or "sse".
435 | 
436 | ### Running tests
437 | 
438 | ```bash
439 | uv sync --all-extras --dev # install dev dependencies
440 | uv run ruff check . # run linting
441 | 
442 | docker compose up -d test_services # start ClickHouse
443 | uv run pytest -v tests
444 | uv run pytest -v tests/test_tool.py # ClickHouse only
445 | uv run pytest -v tests/test_chdb_tool.py # chDB only
446 | ```
447 | 
448 | ## YouTube Overview
449 | 
450 | [![YouTube](http://i.ytimg.com/vi/y9biAm_Fkqw/hqdefault.jpg)](https://www.youtube.com/watch?v=y9biAm_Fkqw)
451 | 
```

--------------------------------------------------------------------------------
/fastmcp.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "$schema": "https://gofastmcp.com/public/schemas/fastmcp.json/v1.json",
 3 |   "source": {
 4 |     "path": "mcp_clickhouse/mcp_server.py",
 5 |     "entrypoint": "mcp"
 6 |   },
 7 |   "environment": {
 8 |     "dependencies": [
 9 |       "clickhouse-connect",
10 |       "python-dotenv",
11 |       "truststore",
12 |       "chdb"
13 |     ]
14 |   }
15 | }
16 | 
```

--------------------------------------------------------------------------------
/test-services/docker-compose.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | services:
 2 |   clickhouse:
 3 |     image: clickhouse/clickhouse-server:latest
 4 |     ports:
 5 |       - "8123:8123"
 6 |       - "9000:9000"
 7 |     volumes:
 8 |       - clickhouse-data:/var/lib/clickhouse
 9 |     environment:
10 |       - CLICKHOUSE_USER=default
11 |       - CLICKHOUSE_PASSWORD=clickhouse
12 |       - CLICKHOUSE_DB=default
13 | 
14 | volumes:
15 |   clickhouse-data:
16 | 
```

--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------

```yaml
 1 | on:
 2 |   workflow_dispatch:
 3 | 
 4 | jobs:
 5 |   publish:
 6 |     name: Upload release to PyPI
 7 |     runs-on: ubuntu-latest
 8 |     environment:
 9 |       name: pypi
10 |       url: "https://pypi.org/p/mcp-clickhouse"
11 |     permissions:
12 |       id-token: write
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - uses: astral-sh/setup-uv@v5
16 |       - run: uv python install
17 |       - run: uv build
18 |       - uses: pypa/gh-action-pypi-publish@release/v1
```

--------------------------------------------------------------------------------
/mcp_clickhouse/__init__.py:
--------------------------------------------------------------------------------

```python
 1 | import os
 2 | 
 3 | from .mcp_server import (
 4 |     create_clickhouse_client,
 5 |     list_databases,
 6 |     list_tables,
 7 |     run_select_query,
 8 |     create_chdb_client,
 9 |     run_chdb_select_query,
10 |     chdb_initial_prompt,
11 | )
12 | 
13 | 
14 | if os.getenv("MCP_CLICKHOUSE_TRUSTSTORE_DISABLE", None) != "1":
15 |     try:
16 |         import truststore
17 |         truststore.inject_into_ssl()
18 |     except Exception:
19 |         pass
20 | 
21 | __all__ = [
22 |     "list_databases",
23 |     "list_tables",
24 |     "run_select_query",
25 |     "create_clickhouse_client",
26 |     "create_chdb_client",
27 |     "run_chdb_select_query",
28 |     "chdb_initial_prompt",
29 | ]
30 | 
```

--------------------------------------------------------------------------------
/mcp_clickhouse/main.py:
--------------------------------------------------------------------------------

```python
 1 | from .mcp_server import mcp
 2 | from .mcp_env import get_mcp_config, TransportType
 3 | 
 4 | 
 5 | def main():
 6 |     mcp_config = get_mcp_config()
 7 |     transport = mcp_config.server_transport
 8 | 
 9 |     # For HTTP and SSE transports, we need to specify host and port
10 |     http_transports = [TransportType.HTTP.value, TransportType.SSE.value]
11 |     if transport in http_transports:
12 |         # Use the configured bind host (defaults to 127.0.0.1, can be set to 0.0.0.0)
13 |         # and bind port (defaults to 8000)
14 |         mcp.run(transport=transport, host=mcp_config.bind_host, port=mcp_config.bind_port)
15 |     else:
16 |         # For stdio transport, no host or port is needed
17 |         mcp.run(transport=transport)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     main()
22 | 
```

--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------

```toml
 1 | [project]
 2 | name = "mcp-clickhouse"
 3 | version = "0.1.12"
 4 | description = "An MCP server for ClickHouse."
 5 | readme = "README.md"
 6 | license = "Apache-2.0"
 7 | license-files = ["LICENSE"]
 8 | requires-python = ">=3.10"
 9 | dependencies = [
10 |      "fastmcp>=2.0.0",
11 |      "python-dotenv>=1.0.1",
12 |      "clickhouse-connect>=0.8.16",
13 |      "truststore>=0.10",
14 |      "chdb>=3.3.0",
15 | ]
16 | 
17 | [project.scripts]
18 | mcp-clickhouse = "mcp_clickhouse.main:main"
19 | 
20 | [project.urls]
21 | Home = "https://github.com/ClickHouse/mcp-clickhouse"
22 | 
23 | [project.optional-dependencies]
24 | dev = [
25 |     "ruff",
26 |     "pytest",
27 |     "pytest-asyncio"
28 | ]
29 | 
30 | [tool.hatch.build.targets.wheel]
31 | packages = ["mcp_clickhouse"]
32 | 
33 | [tool.ruff]
34 | line-length = 100
35 | 
36 | [build-system]
37 | requires = ["hatchling"]
38 | build-backend = "hatchling.build"
39 | 
```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
 1 | # Build stage - Use a Python image with uv pre-installed
 2 | FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim AS builder
 3 | 
 4 | # Install the project into `/app`
 5 | WORKDIR /app
 6 | 
 7 | # Enable bytecode compilation
 8 | ENV UV_COMPILE_BYTECODE=1
 9 | 
10 | # Copy from the cache instead of linking since it's a mounted volume
11 | ENV UV_LINK_MODE=copy
12 | 
13 | # Install git and build dependencies for ClickHouse client
14 | RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
15 |     --mount=type=cache,target=/var/lib/apt,sharing=locked \
16 |     apt-get update && apt-get install -y --no-install-recommends git build-essential
17 | 
18 | # Install the project's dependencies using the lockfile and settings
19 | RUN --mount=type=cache,target=/root/.cache/uv \
20 |     --mount=type=bind,source=uv.lock,target=uv.lock \
21 |     --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
22 |     --mount=type=bind,source=README.md,target=README.md \
23 |     uv sync --locked --no-install-project --no-dev
24 | 
25 | # Then, add the rest of the project source code and install it
26 | # Installing separately from its dependencies allows optimal layer caching
27 | COPY . /app
28 | RUN --mount=type=cache,target=/root/.cache/uv \
29 |     uv sync --locked --no-dev --no-editable
30 | 
31 | # Production stage - Use minimal Python image
32 | FROM python:3.13-slim-bookworm
33 | 
34 | # Set the working directory
35 | WORKDIR /app
36 | 
37 | # Copy the virtual environment from the builder stage
38 | COPY --from=builder /app/.venv /app/.venv
39 | 
40 | # Place executables in the environment at the front of the path
41 | ENV PATH="/app/.venv/bin:$PATH"
42 | 
43 | # Run the MCP ClickHouse server by default
44 | CMD ["python", "-m", "mcp_clickhouse.main"]
45 | 
```

--------------------------------------------------------------------------------
/tests/test_chdb_tool.py:
--------------------------------------------------------------------------------

```python
 1 | import unittest
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from mcp_clickhouse import create_chdb_client, run_chdb_select_query
 6 | 
 7 | load_dotenv()
 8 | 
 9 | 
10 | class TestChDBTools(unittest.TestCase):
11 |     @classmethod
12 |     def setUpClass(cls):
13 |         """Set up the environment before chDB tests."""
14 |         cls.client = create_chdb_client()
15 | 
16 |     def test_run_chdb_select_query_simple(self):
17 |         """Test running a simple SELECT query in chDB."""
18 |         query = "SELECT 1 as test_value"
19 |         result = run_chdb_select_query(query)
20 |         self.assertIsInstance(result, list)
21 |         self.assertIn("test_value", str(result))
22 | 
23 |     def test_run_chdb_select_query_with_url_table_function(self):
24 |         """Test running a SELECT query with url table function in chDB."""
25 |         query = "SELECT COUNT(1) FROM url('https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_0.parquet', 'Parquet')"
26 |         result = run_chdb_select_query(query)
27 |         print(result)
28 |         self.assertIsInstance(result, list)
29 |         self.assertIn("1000000", str(result))
30 | 
31 |     def test_run_chdb_select_query_failure(self):
32 |         """Test running a SELECT query with an error in chDB."""
33 |         query = "SELECT * FROM non_existent_table_chDB"
34 |         result = run_chdb_select_query(query)
35 |         print(result)
36 |         self.assertIsInstance(result, dict)
37 |         self.assertEqual(result["status"], "error")
38 |         self.assertIn("message", result)
39 | 
40 |     def test_run_chdb_select_query_empty_result(self):
41 |         """Test running a SELECT query that returns empty result in chDB."""
42 |         query = "SELECT 1 WHERE 1 = 0"
43 |         result = run_chdb_select_query(query)
44 |         print(result)
45 |         self.assertIsInstance(result, list)
46 |         self.assertEqual(len(result), 0)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     unittest.main()
51 | 
```

--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 | 
13 |     services:
14 |       clickhouse:
15 |         image: clickhouse/clickhouse-server:24.10
16 |         ports:
17 |           - 9000:9000
18 |           - 8123:8123
19 | 
20 |     steps:
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v4
23 | 
24 |       - name: Install uv
25 |         uses: astral-sh/setup-uv@v4
26 | 
27 |       - name: Setup Python
28 |         run: uv python install 3.13
29 | 
30 |       - name: Install Project
31 |         run: uv sync --all-extras --dev
32 | 
33 |       - name: Run tests
34 |         env:
35 |           CLICKHOUSE_HOST: "localhost"
36 |           CLICKHOUSE_PORT: "8123"
37 |           CLICKHOUSE_USER: "default"
38 |           CLICKHOUSE_PASSWORD: ""
39 |           CLICKHOUSE_SECURE: "false"
40 |           CLICKHOUSE_VERIFY: "false"
41 |           CHDB_ENABLED: "true"
42 |         run: |
43 |           uv run pytest tests
44 | 
45 |       - name: Lint with Ruff
46 |         run: uv run ruff check .
47 | 
48 |   docker-build:
49 |     runs-on: ubuntu-latest
50 | 
51 |     steps:
52 |       - name: Checkout repository
53 |         uses: actions/checkout@v4
54 | 
55 |       - name: Set up Docker Buildx
56 |         uses: docker/setup-buildx-action@v3
57 | 
58 |       - name: Build Docker image
59 |         uses: docker/build-push-action@v5
60 |         with:
61 |           context: .
62 |           push: false
63 |           load: true
64 |           tags: mcp-clickhouse:test
65 |           cache-from: type=gha
66 |           cache-to: type=gha,mode=max
67 | 
68 |       - name: Test Docker image import
69 |         run: |
70 |           docker run --rm mcp-clickhouse:test python -c "import mcp_clickhouse; print('✅ MCP ClickHouse Docker image works!')"
71 | 
72 |       - name: Test Docker image default command
73 |         run: |
74 |           timeout 10s docker run --rm \
75 |             -e CLICKHOUSE_HOST=localhost \
76 |             -e CLICKHOUSE_USER=default \
77 |             -e CLICKHOUSE_PASSWORD="" \
78 |             mcp-clickhouse:test || [ $? = 124 ] && echo "✅ Docker container starts successfully"
79 | 
```

--------------------------------------------------------------------------------
/tests/test_config_interface.py:
--------------------------------------------------------------------------------

```python
 1 | import pytest
 2 | 
 3 | from mcp_clickhouse.mcp_env import ClickHouseConfig
 4 | 
 5 | 
 6 | def test_interface_http_when_secure_false(monkeypatch: pytest.MonkeyPatch):
 7 |     """Test that interface is set to 'http' when CLICKHOUSE_SECURE=false."""
 8 |     monkeypatch.setenv("CLICKHOUSE_HOST", "localhost")
 9 |     monkeypatch.setenv("CLICKHOUSE_USER", "test")
10 |     monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
11 |     monkeypatch.setenv("CLICKHOUSE_SECURE", "false")
12 |     monkeypatch.setenv("CLICKHOUSE_PORT", "8123")
13 | 
14 |     config = ClickHouseConfig()
15 |     client_config = config.get_client_config()
16 | 
17 |     assert client_config["interface"] == "http"
18 |     assert client_config["secure"] is False
19 |     assert client_config["port"] == 8123
20 | 
21 | 
22 | def test_interface_https_when_secure_true(monkeypatch: pytest.MonkeyPatch):
23 |     """Test that interface is set to 'https' when CLICKHOUSE_SECURE=true."""
24 |     monkeypatch.setenv("CLICKHOUSE_HOST", "example.com")
25 |     monkeypatch.setenv("CLICKHOUSE_USER", "test")
26 |     monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
27 |     monkeypatch.setenv("CLICKHOUSE_SECURE", "true")
28 |     monkeypatch.setenv("CLICKHOUSE_PORT", "8443")
29 | 
30 |     config = ClickHouseConfig()
31 |     client_config = config.get_client_config()
32 | 
33 |     assert client_config["interface"] == "https"
34 |     assert client_config["secure"] is True
35 |     assert client_config["port"] == 8443
36 | 
37 | 
38 | def test_interface_https_by_default(monkeypatch: pytest.MonkeyPatch):
39 |     """Test that interface defaults to 'https' when CLICKHOUSE_SECURE is not set."""
40 |     monkeypatch.setenv("CLICKHOUSE_HOST", "example.com")
41 |     monkeypatch.setenv("CLICKHOUSE_USER", "test")
42 |     monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
43 |     monkeypatch.delenv("CLICKHOUSE_SECURE", raising=False)
44 |     monkeypatch.delenv("CLICKHOUSE_PORT", raising=False)
45 | 
46 |     config = ClickHouseConfig()
47 |     client_config = config.get_client_config()
48 | 
49 |     assert client_config["interface"] == "https"
50 |     assert client_config["secure"] is True
51 |     assert client_config["port"] == 8443
52 | 
53 | 
54 | def test_interface_http_with_custom_port(monkeypatch: pytest.MonkeyPatch):
55 |     """Test that interface is 'http' with custom port when CLICKHOUSE_SECURE=false."""
56 |     monkeypatch.setenv("CLICKHOUSE_HOST", "localhost")
57 |     monkeypatch.setenv("CLICKHOUSE_USER", "test")
58 |     monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
59 |     monkeypatch.setenv("CLICKHOUSE_SECURE", "false")
60 |     monkeypatch.setenv("CLICKHOUSE_PORT", "9000")
61 | 
62 |     config = ClickHouseConfig()
63 |     client_config = config.get_client_config()
64 | 
65 |     assert client_config["interface"] == "http"
66 |     assert client_config["secure"] is False
67 |     assert client_config["port"] == 9000
68 | 
69 | 
70 | def test_interface_https_with_custom_port(monkeypatch: pytest.MonkeyPatch):
71 |     """Test that interface is 'https' with custom port when CLICKHOUSE_SECURE=true."""
72 |     monkeypatch.setenv("CLICKHOUSE_HOST", "example.com")
73 |     monkeypatch.setenv("CLICKHOUSE_USER", "test")
74 |     monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
75 |     monkeypatch.setenv("CLICKHOUSE_SECURE", "true")
76 |     monkeypatch.setenv("CLICKHOUSE_PORT", "9443")
77 | 
78 |     config = ClickHouseConfig()
79 |     client_config = config.get_client_config()
80 | 
81 |     assert client_config["interface"] == "https"
82 |     assert client_config["secure"] is True
83 |     assert client_config["port"] == 9443
84 | 
```

--------------------------------------------------------------------------------
/tests/test_tool.py:
--------------------------------------------------------------------------------

```python
  1 | import unittest
  2 | import json
  3 | 
  4 | from dotenv import load_dotenv
  5 | from fastmcp.exceptions import ToolError
  6 | 
  7 | from mcp_clickhouse import create_clickhouse_client, list_databases, list_tables, run_select_query
  8 | 
  9 | load_dotenv()
 10 | 
 11 | 
 12 | class TestClickhouseTools(unittest.TestCase):
 13 |     @classmethod
 14 |     def setUpClass(cls):
 15 |         """Set up the environment before tests."""
 16 |         cls.client = create_clickhouse_client()
 17 | 
 18 |         # Prepare test database and table
 19 |         cls.test_db = "test_tool_db"
 20 |         cls.test_table = "test_table"
 21 |         cls.client.command(f"CREATE DATABASE IF NOT EXISTS {cls.test_db}")
 22 | 
 23 |         # Drop table if exists to ensure clean state
 24 |         cls.client.command(f"DROP TABLE IF EXISTS {cls.test_db}.{cls.test_table}")
 25 | 
 26 |         # Create table with comments
 27 |         cls.client.command(f"""
 28 |             CREATE TABLE {cls.test_db}.{cls.test_table} (
 29 |                 id UInt32 COMMENT 'Primary identifier',
 30 |                 name String COMMENT 'User name field'
 31 |             ) ENGINE = MergeTree()
 32 |             ORDER BY id
 33 |             COMMENT 'Test table for unit testing'
 34 |         """)
 35 |         cls.client.command(f"""
 36 |             INSERT INTO {cls.test_db}.{cls.test_table} (id, name) VALUES (1, 'Alice'), (2, 'Bob')
 37 |         """)
 38 | 
 39 |     @classmethod
 40 |     def tearDownClass(cls):
 41 |         """Clean up the environment after tests."""
 42 |         cls.client.command(f"DROP DATABASE IF EXISTS {cls.test_db}")
 43 | 
 44 |     def test_list_databases(self):
 45 |         """Test listing databases."""
 46 |         result = list_databases()
 47 |         # Parse JSON response
 48 |         databases = json.loads(result)
 49 |         self.assertIn(self.test_db, databases)
 50 | 
 51 |     def test_list_tables_without_like(self):
 52 |         """Test listing tables without a 'LIKE' filter."""
 53 |         result = list_tables(self.test_db)
 54 |         self.assertIsInstance(result, list)
 55 |         self.assertEqual(len(result), 1)
 56 |         self.assertEqual(result[0]["name"], self.test_table)
 57 | 
 58 |     def test_list_tables_with_like(self):
 59 |         """Test listing tables with a 'LIKE' filter."""
 60 |         result = list_tables(self.test_db, like=f"{self.test_table}%")
 61 |         self.assertIsInstance(result, list)
 62 |         self.assertEqual(len(result), 1)
 63 |         self.assertEqual(result[0]["name"], self.test_table)
 64 | 
 65 |     def test_run_select_query_success(self):
 66 |         """Test running a SELECT query successfully."""
 67 |         query = f"SELECT * FROM {self.test_db}.{self.test_table}"
 68 |         result = run_select_query(query)
 69 |         self.assertIsInstance(result, dict)
 70 |         self.assertEqual(len(result["rows"]), 2)
 71 |         self.assertEqual(result["rows"][0][0], 1)
 72 |         self.assertEqual(result["rows"][0][1], "Alice")
 73 | 
 74 |     def test_run_select_query_failure(self):
 75 |         """Test running a SELECT query with an error."""
 76 |         query = f"SELECT * FROM {self.test_db}.non_existent_table"
 77 | 
 78 |         # Should raise ToolError
 79 |         with self.assertRaises(ToolError) as context:
 80 |             run_select_query(query)
 81 | 
 82 |         self.assertIn("Query execution failed", str(context.exception))
 83 | 
 84 |     def test_table_and_column_comments(self):
 85 |         """Test that table and column comments are correctly retrieved."""
 86 |         result = list_tables(self.test_db)
 87 |         self.assertIsInstance(result, list)
 88 |         self.assertEqual(len(result), 1)
 89 | 
 90 |         table_info = result[0]
 91 |         # Verify table comment
 92 |         self.assertEqual(table_info["comment"], "Test table for unit testing")
 93 | 
 94 |         # Get columns by name for easier testing
 95 |         columns = {col["name"]: col for col in table_info["columns"]}
 96 | 
 97 |         # Verify column comments
 98 |         self.assertEqual(columns["id"]["comment"], "Primary identifier")
 99 |         self.assertEqual(columns["name"]["comment"], "User name field")
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     unittest.main()
104 | 
```

--------------------------------------------------------------------------------
/mcp_clickhouse/chdb_prompt.py:
--------------------------------------------------------------------------------

```python
  1 | """chDB prompts for MCP server."""
  2 | 
  3 | CHDB_PROMPT = """
  4 | # chDB MCP System Prompt
  5 | 
  6 | ## Available Tools
  7 | - **run_chdb_select_query**: Execute SELECT queries using chDB's table functions
  8 | 
  9 | ## Core Principles
 10 | You are a chDB assistant, specialized in helping users query data sources directly through table functions, **avoiding data imports**.
 11 | 
 12 | ### 🚨 Important Constraints
 13 | #### Data Processing Constraints
 14 | - **No large data display**: Don't show more than 10 rows of raw data in responses
 15 | - **Use analysis tool**: All data processing must be completed in the analysis tool
 16 | - **Result-oriented output**: Only provide query results and key insights, not intermediate processing data
 17 | - **Avoid context explosion**: Don't paste large amounts of raw data or complete tables
 18 | 
 19 | #### Query Strategy Constraints
 20 | - **Prioritize table functions**: When users mention import/load/insert, immediately recommend table functions
 21 | - **Direct querying**: All data should be queried in place through table functions
 22 | - **Fallback option**: When no suitable table function exists, use Python to download temporary files then process with file()
 23 | - **Concise responses**: Avoid lengthy explanations, provide executable SQL directly
 24 | 
 25 | ## Table Functions
 26 | 
 27 | ### File Types
 28 | ```sql
 29 | -- Local files (auto format detection)
 30 | file('path/to/file.csv')
 31 | file('data.parquet', 'Parquet')
 32 | 
 33 | -- Remote files
 34 | url('https://example.com/data.csv', 'CSV')
 35 | url('https://example.com/data.parquet')
 36 | 
 37 | -- S3 storage
 38 | s3('s3://bucket/path/file.csv', 'CSV')
 39 | s3('s3://bucket/path/*.parquet', 'access_key', 'secret_key', 'Parquet')
 40 | 
 41 | -- HDFS
 42 | hdfs('hdfs://namenode:9000/path/file.parquet')
 43 | ```
 44 | 
 45 | ### Database Types
 46 | ```sql
 47 | -- PostgreSQL
 48 | postgresql('host:port', 'database', 'table', 'user', 'password')
 49 | 
 50 | -- MySQL
 51 | mysql('host:port', 'database', 'table', 'user', 'password')
 52 | 
 53 | -- SQLite
 54 | sqlite('path/to/database.db', 'table')
 55 | ```
 56 | 
 57 | ### Common Formats
 58 | - `CSV`, `CSVWithNames`, `TSV`, `TSVWithNames`
 59 | - `JSON`, `JSONEachRow`, `JSONCompact`
 60 | - `Parquet`, `ORC`, `Avro`
 61 | 
 62 | ## Workflow
 63 | 
 64 | ### 1. Identify Data Source
 65 | - User mentions URL → `url()`
 66 | - User mentions S3 → `s3()`
 67 | - User mentions local file → `file()`
 68 | - User mentions database → corresponding database function
 69 | - **No suitable table function** → Use Python to download as temporary file
 70 | 
 71 | ### 2. Fallback: Python Download
 72 | When no suitable table function exists:
 73 | ```python
 74 | # Execute in analysis tool
 75 | import requests
 76 | import tempfile
 77 | import os
 78 | 
 79 | # Download data to temporary file
 80 | response = requests.get('your_data_url')
 81 | 
 82 | with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
 83 |     f.write(response.text)
 84 |     temp_file = f.name
 85 | 
 86 | # Execute chDB query immediately within the block
 87 | try:
 88 |     # Use run_chdb_select_query to execute query
 89 |     result = run_chdb_select_query(f"SELECT * FROM file('{temp_file}', 'CSV') LIMIT 10")
 90 |     print(result)
 91 | finally:
 92 |     # Ensure temporary file deletion
 93 |     if os.path.exists(temp_file):
 94 |         os.unlink(temp_file)
 95 | ```
 96 | 
 97 | ### 3. Quick Testing
 98 | ```sql
 99 | -- Test connection (default LIMIT 10)
100 | SELECT * FROM table_function(...) LIMIT 10;
101 | 
102 | -- View structure
103 | DESCRIBE table_function(...);
104 | ```
105 | 
106 | ### 4. Build Queries
107 | ```sql
108 | -- Basic query (default LIMIT 10)
109 | SELECT column1, column2 FROM table_function(...) WHERE condition LIMIT 10;
110 | 
111 | -- Aggregation analysis
112 | SELECT category, COUNT(*), AVG(price) 
113 | FROM table_function(...) 
114 | GROUP BY category 
115 | LIMIT 10;
116 | 
117 | -- Multi-source join
118 | SELECT a.id, b.name 
119 | FROM file('data1.csv') a 
120 | JOIN url('https://example.com/data2.csv', 'CSV') b ON a.id = b.id
121 | LIMIT 10;
122 | ```
123 | 
124 | ## Response Patterns
125 | 
126 | ### When Users Ask About Data Import
127 | 1. **Immediate stop**: "No need to import data, chDB can query directly"
128 | 2. **Recommend solution**: Provide corresponding table function based on data source type
129 | 3. **Fallback option**: If no suitable table function, explain using Python to download temporary file
130 | 4. **Provide examples**: Give specific SQL statements
131 | 5. **Follow constraints**: Complete all data processing in analysis tool, only output key results
132 | 
133 | ### Example Dialogues
134 | ```
135 | User: "How to import this CSV file into chDB?"
136 | Assistant: "No need to import! Query directly:
137 | SELECT * FROM file('your_file.csv') LIMIT 10;
138 | What analysis do you want?"
139 | 
140 | User: "This API endpoint doesn't have direct table function support"
141 | Assistant: "I'll use Python to download data to a temporary file, then query with file().
142 | Let me process the data in the analysis tool first..."
143 | ```
144 | 
145 | ## Output Constraints
146 | - **Avoid**: Displaying large amounts of raw data, complete tables, intermediate processing steps
147 | - **Recommend**: Concise statistical summaries, key insights, executable SQL
148 | - **Interaction**: Provide overview first, ask for specific needs before deep analysis
149 | 
150 | ## Optimization Tips
151 | - Use WHERE filtering to reduce data transfer
152 | - SELECT specific columns to avoid full table scans
153 | - **Default use LIMIT 10** to prevent large data output
154 | - Test connection with LIMIT 1 for large datasets first
155 | """
156 | 
```

--------------------------------------------------------------------------------
/mcp_clickhouse/mcp_env.py:
--------------------------------------------------------------------------------

```python
  1 | """Environment configuration for the MCP ClickHouse server.
  2 | 
  3 | This module handles all environment variable configuration with sensible defaults
  4 | and type conversion.
  5 | """
  6 | 
  7 | from dataclasses import dataclass
  8 | import os
  9 | from typing import Optional
 10 | from enum import Enum
 11 | 
 12 | 
 13 | class TransportType(str, Enum):
 14 |     """Supported MCP server transport types."""
 15 | 
 16 |     STDIO = "stdio"
 17 |     HTTP = "http"
 18 |     SSE = "sse"
 19 | 
 20 |     @classmethod
 21 |     def values(cls) -> list[str]:
 22 |         """Get all valid transport values."""
 23 |         return [transport.value for transport in cls]
 24 | 
 25 | 
 26 | @dataclass
 27 | class ClickHouseConfig:
 28 |     """Configuration for ClickHouse connection settings.
 29 | 
 30 |     This class handles all environment variable configuration with sensible defaults
 31 |     and type conversion. It provides typed methods for accessing each configuration value.
 32 | 
 33 |     Required environment variables (only when CLICKHOUSE_ENABLED=true):
 34 |         CLICKHOUSE_HOST: The hostname of the ClickHouse server
 35 |         CLICKHOUSE_USER: The username for authentication
 36 |         CLICKHOUSE_PASSWORD: The password for authentication
 37 | 
 38 |     Optional environment variables (with defaults):
 39 |         CLICKHOUSE_PORT: The port number (default: 8443 if secure=True, 8123 if secure=False)
 40 |         CLICKHOUSE_SECURE: Enable HTTPS (default: true)
 41 |         CLICKHOUSE_VERIFY: Verify SSL certificates (default: true)
 42 |         CLICKHOUSE_CONNECT_TIMEOUT: Connection timeout in seconds (default: 30)
 43 |         CLICKHOUSE_SEND_RECEIVE_TIMEOUT: Send/receive timeout in seconds (default: 300)
 44 |         CLICKHOUSE_DATABASE: Default database to use (default: None)
 45 |         CLICKHOUSE_PROXY_PATH: Path to be added to the host URL. For instance, for servers behind an HTTP proxy (default: None)
 46 |         CLICKHOUSE_ENABLED: Enable ClickHouse server (default: true)
 47 |     """
 48 | 
 49 |     def __init__(self):
 50 |         """Initialize the configuration from environment variables."""
 51 |         if self.enabled:
 52 |             self._validate_required_vars()
 53 | 
 54 |     @property
 55 |     def enabled(self) -> bool:
 56 |         """Get whether ClickHouse server is enabled.
 57 | 
 58 |         Default: True
 59 |         """
 60 |         return os.getenv("CLICKHOUSE_ENABLED", "true").lower() == "true"
 61 | 
 62 |     @property
 63 |     def host(self) -> str:
 64 |         """Get the ClickHouse host."""
 65 |         return os.environ["CLICKHOUSE_HOST"]
 66 | 
 67 |     @property
 68 |     def port(self) -> int:
 69 |         """Get the ClickHouse port.
 70 | 
 71 |         Defaults to 8443 if secure=True, 8123 if secure=False.
 72 |         Can be overridden by CLICKHOUSE_PORT environment variable.
 73 |         """
 74 |         if "CLICKHOUSE_PORT" in os.environ:
 75 |             return int(os.environ["CLICKHOUSE_PORT"])
 76 |         return 8443 if self.secure else 8123
 77 | 
 78 |     @property
 79 |     def username(self) -> str:
 80 |         """Get the ClickHouse username."""
 81 |         return os.environ["CLICKHOUSE_USER"]
 82 | 
 83 |     @property
 84 |     def password(self) -> str:
 85 |         """Get the ClickHouse password."""
 86 |         return os.environ["CLICKHOUSE_PASSWORD"]
 87 | 
 88 |     @property
 89 |     def database(self) -> Optional[str]:
 90 |         """Get the default database name if set."""
 91 |         return os.getenv("CLICKHOUSE_DATABASE")
 92 | 
 93 |     @property
 94 |     def secure(self) -> bool:
 95 |         """Get whether HTTPS is enabled.
 96 | 
 97 |         Default: True
 98 |         """
 99 |         return os.getenv("CLICKHOUSE_SECURE", "true").lower() == "true"
100 | 
101 |     @property
102 |     def verify(self) -> bool:
103 |         """Get whether SSL certificate verification is enabled.
104 | 
105 |         Default: True
106 |         """
107 |         return os.getenv("CLICKHOUSE_VERIFY", "true").lower() == "true"
108 | 
109 |     @property
110 |     def connect_timeout(self) -> int:
111 |         """Get the connection timeout in seconds.
112 | 
113 |         Default: 30
114 |         """
115 |         return int(os.getenv("CLICKHOUSE_CONNECT_TIMEOUT", "30"))
116 | 
117 |     @property
118 |     def send_receive_timeout(self) -> int:
119 |         """Get the send/receive timeout in seconds.
120 | 
121 |         Default: 300 (ClickHouse default)
122 |         """
123 |         return int(os.getenv("CLICKHOUSE_SEND_RECEIVE_TIMEOUT", "300"))
124 | 
125 |     @property
126 |     def proxy_path(self) -> str:
127 |         return os.getenv("CLICKHOUSE_PROXY_PATH")
128 | 
129 |     def get_client_config(self) -> dict:
130 |         """Get the configuration dictionary for clickhouse_connect client.
131 | 
132 |         Returns:
133 |             dict: Configuration ready to be passed to clickhouse_connect.get_client()
134 |         """
135 |         config = {
136 |             "host": self.host,
137 |             "port": self.port,
138 |             "username": self.username,
139 |             "password": self.password,
140 |             "interface": "https" if self.secure else "http",
141 |             "secure": self.secure,
142 |             "verify": self.verify,
143 |             "connect_timeout": self.connect_timeout,
144 |             "send_receive_timeout": self.send_receive_timeout,
145 |             "client_name": "mcp_clickhouse",
146 |         }
147 | 
148 |         # Add optional database if set
149 |         if self.database:
150 |             config["database"] = self.database
151 | 
152 |         if self.proxy_path:
153 |             config["proxy_path"] = self.proxy_path
154 | 
155 |         return config
156 | 
157 |     def _validate_required_vars(self) -> None:
158 |         """Validate that all required environment variables are set.
159 | 
160 |         Raises:
161 |             ValueError: If any required environment variable is missing.
162 |         """
163 |         missing_vars = []
164 |         for var in ["CLICKHOUSE_HOST", "CLICKHOUSE_USER", "CLICKHOUSE_PASSWORD"]:
165 |             if var not in os.environ:
166 |                 missing_vars.append(var)
167 | 
168 |         if missing_vars:
169 |             raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
170 | 
171 | 
172 | @dataclass
173 | class ChDBConfig:
174 |     """Configuration for chDB connection settings.
175 | 
176 |     This class handles all environment variable configuration with sensible defaults
177 |     and type conversion. It provides typed methods for accessing each configuration value.
178 | 
179 |     Required environment variables:
180 |         CHDB_DATA_PATH: The path to the chDB data directory (only required if CHDB_ENABLED=true)
181 |     """
182 | 
183 |     def __init__(self):
184 |         """Initialize the configuration from environment variables."""
185 |         if self.enabled:
186 |             self._validate_required_vars()
187 | 
188 |     @property
189 |     def enabled(self) -> bool:
190 |         """Get whether chDB is enabled.
191 | 
192 |         Default: False
193 |         """
194 |         return os.getenv("CHDB_ENABLED", "false").lower() == "true"
195 | 
196 |     @property
197 |     def data_path(self) -> str:
198 |         """Get the chDB data path."""
199 |         return os.getenv("CHDB_DATA_PATH", ":memory:")
200 | 
201 |     def get_client_config(self) -> dict:
202 |         """Get the configuration dictionary for chDB client.
203 | 
204 |         Returns:
205 |             dict: Configuration ready to be passed to chDB client
206 |         """
207 |         return {
208 |             "data_path": self.data_path,
209 |         }
210 | 
211 |     def _validate_required_vars(self) -> None:
212 |         """Validate that all required environment variables are set.
213 | 
214 |         Raises:
215 |             ValueError: If any required environment variable is missing.
216 |         """
217 |         pass
218 | 
219 | 
220 | # Global instance placeholders for the singleton pattern
221 | _CONFIG_INSTANCE = None
222 | _CHDB_CONFIG_INSTANCE = None
223 | 
224 | 
225 | def get_config():
226 |     """
227 |     Gets the singleton instance of ClickHouseConfig.
228 |     Instantiates it on the first call.
229 |     """
230 |     global _CONFIG_INSTANCE
231 |     if _CONFIG_INSTANCE is None:
232 |         # Instantiate the config object here, ensuring load_dotenv() has likely run
233 |         _CONFIG_INSTANCE = ClickHouseConfig()
234 |     return _CONFIG_INSTANCE
235 | 
236 | 
237 | def get_chdb_config() -> ChDBConfig:
238 |     """
239 |     Gets the singleton instance of ChDBConfig.
240 |     Instantiates it on the first call.
241 | 
242 |     Returns:
243 |         ChDBConfig: The chDB configuration instance
244 |     """
245 |     global _CHDB_CONFIG_INSTANCE
246 |     if _CHDB_CONFIG_INSTANCE is None:
247 |         _CHDB_CONFIG_INSTANCE = ChDBConfig()
248 |     return _CHDB_CONFIG_INSTANCE
249 | 
250 | 
251 | @dataclass
252 | class MCPServerConfig:
253 |     """Configuration for MCP server-level settings.
254 | 
255 |     These settings control the server transport and tool behavior and are
256 |     intentionally independent of ClickHouse connection validation.
257 | 
258 |     Optional environment variables (with defaults):
259 |         CLICKHOUSE_MCP_SERVER_TRANSPORT: "stdio", "http", or "sse" (default: stdio)
260 |         CLICKHOUSE_MCP_BIND_HOST: Bind host for HTTP/SSE (default: 127.0.0.1)
261 |         CLICKHOUSE_MCP_BIND_PORT: Bind port for HTTP/SSE (default: 8000)
262 |         CLICKHOUSE_MCP_QUERY_TIMEOUT: SELECT tool timeout in seconds (default: 30)
263 |     """
264 | 
265 |     @property
266 |     def server_transport(self) -> str:
267 |         transport = os.getenv("CLICKHOUSE_MCP_SERVER_TRANSPORT", TransportType.STDIO.value).lower()
268 |         if transport not in TransportType.values():
269 |             valid_options = ", ".join(f'"{t}"' for t in TransportType.values())
270 |             raise ValueError(f"Invalid transport '{transport}'. Valid options: {valid_options}")
271 |         return transport
272 | 
273 |     @property
274 |     def bind_host(self) -> str:
275 |         return os.getenv("CLICKHOUSE_MCP_BIND_HOST", "127.0.0.1")
276 | 
277 |     @property
278 |     def bind_port(self) -> int:
279 |         return int(os.getenv("CLICKHOUSE_MCP_BIND_PORT", "8000"))
280 | 
281 |     @property
282 |     def query_timeout(self) -> int:
283 |         return int(os.getenv("CLICKHOUSE_MCP_QUERY_TIMEOUT", "30"))
284 | 
285 | 
286 | _MCP_CONFIG_INSTANCE = None
287 | 
288 | 
289 | def get_mcp_config() -> MCPServerConfig:
290 |     """Gets the singleton instance of MCPServerConfig."""
291 |     global _MCP_CONFIG_INSTANCE
292 |     if _MCP_CONFIG_INSTANCE is None:
293 |         _MCP_CONFIG_INSTANCE = MCPServerConfig()
294 |     return _MCP_CONFIG_INSTANCE
295 | 
```

--------------------------------------------------------------------------------
/tests/test_mcp_server.py:
--------------------------------------------------------------------------------

```python
  1 | import pytest
  2 | import pytest_asyncio
  3 | from fastmcp import Client
  4 | from fastmcp.exceptions import ToolError
  5 | import asyncio
  6 | from mcp_clickhouse.mcp_server import mcp, create_clickhouse_client
  7 | from dotenv import load_dotenv
  8 | import json
  9 | 
 10 | # Load environment variables
 11 | load_dotenv()
 12 | 
 13 | 
 14 | @pytest.fixture(scope="module")
 15 | def event_loop():
 16 |     """Create an instance of the default event loop for the test session."""
 17 |     loop = asyncio.get_event_loop_policy().new_event_loop()
 18 |     yield loop
 19 |     loop.close()
 20 | 
 21 | 
 22 | @pytest_asyncio.fixture(scope="module")
 23 | async def setup_test_database():
 24 |     """Set up test database and tables before running tests."""
 25 |     client = create_clickhouse_client()
 26 | 
 27 |     # Test database and table names
 28 |     test_db = "test_mcp_db"
 29 |     test_table = "test_table"
 30 |     test_table2 = "another_test_table"
 31 | 
 32 |     # Create test database
 33 |     client.command(f"CREATE DATABASE IF NOT EXISTS {test_db}")
 34 | 
 35 |     # Drop tables if they exist
 36 |     client.command(f"DROP TABLE IF EXISTS {test_db}.{test_table}")
 37 |     client.command(f"DROP TABLE IF EXISTS {test_db}.{test_table2}")
 38 | 
 39 |     # Create first test table with comments
 40 |     client.command(f"""
 41 |         CREATE TABLE {test_db}.{test_table} (
 42 |             id UInt32 COMMENT 'Primary identifier',
 43 |             name String COMMENT 'User name field',
 44 |             age UInt8 COMMENT 'User age',
 45 |             created_at DateTime DEFAULT now() COMMENT 'Record creation timestamp'
 46 |         ) ENGINE = MergeTree()
 47 |         ORDER BY id
 48 |         COMMENT 'Test table for MCP server testing'
 49 |     """)
 50 | 
 51 |     # Create second test table
 52 |     client.command(f"""
 53 |         CREATE TABLE {test_db}.{test_table2} (
 54 |             event_id UInt64,
 55 |             event_type String,
 56 |             timestamp DateTime
 57 |         ) ENGINE = MergeTree()
 58 |         ORDER BY (event_type, timestamp)
 59 |         COMMENT 'Event tracking table'
 60 |     """)
 61 | 
 62 |     # Insert test data
 63 |     client.command(f"""
 64 |         INSERT INTO {test_db}.{test_table} (id, name, age) VALUES
 65 |         (1, 'Alice', 30),
 66 |         (2, 'Bob', 25),
 67 |         (3, 'Charlie', 35),
 68 |         (4, 'Diana', 28)
 69 |     """)
 70 | 
 71 |     client.command(f"""
 72 |         INSERT INTO {test_db}.{test_table2} (event_id, event_type, timestamp) VALUES
 73 |         (1001, 'login', '2024-01-01 10:00:00'),
 74 |         (1002, 'logout', '2024-01-01 11:00:00'),
 75 |         (1003, 'login', '2024-01-01 12:00:00')
 76 |     """)
 77 | 
 78 |     yield test_db, test_table, test_table2
 79 | 
 80 |     # Cleanup after tests
 81 |     client.command(f"DROP DATABASE IF EXISTS {test_db}")
 82 | 
 83 | 
 84 | @pytest.fixture
 85 | def mcp_server():
 86 |     """Return the MCP server instance for testing."""
 87 |     return mcp
 88 | 
 89 | 
 90 | @pytest.mark.asyncio
 91 | async def test_list_databases(mcp_server, setup_test_database):
 92 |     """Test the list_databases tool."""
 93 |     test_db, _, _ = setup_test_database
 94 | 
 95 |     async with Client(mcp_server) as client:
 96 |         result = await client.call_tool("list_databases", {})
 97 | 
 98 |         # The result should be a list containing at least one item
 99 |         assert len(result.content) >= 1
100 |         assert isinstance(result.content[0].text, str)
101 | 
102 |         # Parse the result text (it's a JSON list of database names)
103 |         databases = json.loads(result.content[0].text)
104 |         assert test_db in databases
105 |         assert "system" in databases  # System database should always exist
106 | 
107 | 
108 | @pytest.mark.asyncio
109 | async def test_list_tables_basic(mcp_server, setup_test_database):
110 |     """Test the list_tables tool without filters."""
111 |     test_db, test_table, test_table2 = setup_test_database
112 | 
113 |     async with Client(mcp_server) as client:
114 |         result = await client.call_tool("list_tables", {"database": test_db})
115 | 
116 |         assert len(result.content) >= 1
117 |         tables = json.loads(result.content[0].text)
118 | 
119 |         # Should have exactly 2 tables
120 |         assert len(tables) == 2
121 | 
122 |         # Get table names
123 |         table_names = [table["name"] for table in tables]
124 |         assert test_table in table_names
125 |         assert test_table2 in table_names
126 | 
127 |         # Check table details
128 |         for table in tables:
129 |             assert table["database"] == test_db
130 |             assert "columns" in table
131 |             assert "total_rows" in table
132 |             assert "engine" in table
133 |             assert "comment" in table
134 | 
135 |             # Verify column information exists
136 |             assert len(table["columns"]) > 0
137 |             for column in table["columns"]:
138 |                 assert "name" in column
139 |                 assert "column_type" in column
140 |                 assert "comment" in column
141 | 
142 | 
143 | @pytest.mark.asyncio
144 | async def test_list_tables_with_like_filter(mcp_server, setup_test_database):
145 |     """Test the list_tables tool with LIKE filter."""
146 |     test_db, test_table, _ = setup_test_database
147 | 
148 |     async with Client(mcp_server) as client:
149 |         # Test with LIKE filter
150 |         result = await client.call_tool("list_tables", {"database": test_db, "like": "test_%"})
151 | 
152 |         tables_data = json.loads(result.content[0].text)
153 | 
154 |         # Handle both single dict and list of dicts
155 |         if isinstance(tables_data, dict):
156 |             tables = [tables_data]
157 |         else:
158 |             tables = tables_data
159 | 
160 |         assert len(tables) == 1
161 |         assert tables[0]["name"] == test_table
162 | 
163 | 
164 | @pytest.mark.asyncio
165 | async def test_list_tables_with_not_like_filter(mcp_server, setup_test_database):
166 |     """Test the list_tables tool with NOT LIKE filter."""
167 |     test_db, _, test_table2 = setup_test_database
168 | 
169 |     async with Client(mcp_server) as client:
170 |         # Test with NOT LIKE filter
171 |         result = await client.call_tool("list_tables", {"database": test_db, "not_like": "test_%"})
172 | 
173 |         tables_data = json.loads(result.content[0].text)
174 | 
175 |         # Handle both single dict and list of dicts
176 |         if isinstance(tables_data, dict):
177 |             tables = [tables_data]
178 |         else:
179 |             tables = tables_data
180 | 
181 |         assert len(tables) == 1
182 |         assert tables[0]["name"] == test_table2
183 | 
184 | 
185 | @pytest.mark.asyncio
186 | async def test_run_select_query_success(mcp_server, setup_test_database):
187 |     """Test running a successful SELECT query."""
188 |     test_db, test_table, _ = setup_test_database
189 | 
190 |     async with Client(mcp_server) as client:
191 |         query = f"SELECT id, name, age FROM {test_db}.{test_table} ORDER BY id"
192 |         result = await client.call_tool("run_select_query", {"query": query})
193 | 
194 |         query_result = json.loads(result.content[0].text)
195 | 
196 |         # Check structure
197 |         assert "columns" in query_result
198 |         assert "rows" in query_result
199 | 
200 |         # Check columns
201 |         assert query_result["columns"] == ["id", "name", "age"]
202 | 
203 |         # Check rows
204 |         assert len(query_result["rows"]) == 4
205 |         assert query_result["rows"][0] == [1, "Alice", 30]
206 |         assert query_result["rows"][1] == [2, "Bob", 25]
207 |         assert query_result["rows"][2] == [3, "Charlie", 35]
208 |         assert query_result["rows"][3] == [4, "Diana", 28]
209 | 
210 | 
211 | @pytest.mark.asyncio
212 | async def test_run_select_query_with_aggregation(mcp_server, setup_test_database):
213 |     """Test running a SELECT query with aggregation."""
214 |     test_db, test_table, _ = setup_test_database
215 | 
216 |     async with Client(mcp_server) as client:
217 |         query = f"SELECT COUNT(*) as count, AVG(age) as avg_age FROM {test_db}.{test_table}"
218 |         result = await client.call_tool("run_select_query", {"query": query})
219 | 
220 |         query_result = json.loads(result.content[0].text)
221 | 
222 |         assert query_result["columns"] == ["count", "avg_age"]
223 |         assert len(query_result["rows"]) == 1
224 |         assert query_result["rows"][0][0] == 4  # count
225 |         assert query_result["rows"][0][1] == 29.5  # average age
226 | 
227 | 
228 | @pytest.mark.asyncio
229 | async def test_run_select_query_with_join(mcp_server, setup_test_database):
230 |     """Test running a SELECT query with JOIN."""
231 |     test_db, test_table, test_table2 = setup_test_database
232 | 
233 |     async with Client(mcp_server) as client:
234 |         # Insert related data for join
235 |         client_direct = create_clickhouse_client()
236 |         client_direct.command(f"""
237 |             INSERT INTO {test_db}.{test_table2} (event_id, event_type, timestamp) VALUES
238 |             (2001, 'purchase', '2024-01-01 14:00:00')
239 |         """)
240 | 
241 |         query = f"""
242 |         SELECT
243 |             COUNT(DISTINCT event_type) as event_types_count
244 |         FROM {test_db}.{test_table2}
245 |         """
246 |         result = await client.call_tool("run_select_query", {"query": query})
247 | 
248 |         query_result = json.loads(result.content[0].text)
249 |         assert query_result["rows"][0][0] == 3  # login, logout, purchase
250 | 
251 | 
252 | @pytest.mark.asyncio
253 | async def test_run_select_query_error(mcp_server, setup_test_database):
254 |     """Test running a SELECT query that results in an error."""
255 |     test_db, _, _ = setup_test_database
256 | 
257 |     async with Client(mcp_server) as client:
258 |         # Query non-existent table
259 |         query = f"SELECT * FROM {test_db}.non_existent_table"
260 | 
261 |         # Should raise ToolError
262 |         with pytest.raises(ToolError) as exc_info:
263 |             await client.call_tool("run_select_query", {"query": query})
264 | 
265 |         assert "Query execution failed" in str(exc_info.value)
266 | 
267 | 
268 | @pytest.mark.asyncio
269 | async def test_run_select_query_syntax_error(mcp_server):
270 |     """Test running a SELECT query with syntax error."""
271 |     async with Client(mcp_server) as client:
272 |         # Invalid SQL syntax
273 |         query = "SELECT FROM WHERE"
274 | 
275 |         # Should raise ToolError
276 |         with pytest.raises(ToolError) as exc_info:
277 |             await client.call_tool("run_select_query", {"query": query})
278 | 
279 |         assert "Query execution failed" in str(exc_info.value)
280 | 
281 | 
282 | @pytest.mark.asyncio
283 | async def test_table_metadata_details(mcp_server, setup_test_database):
284 |     """Test that table metadata is correctly retrieved."""
285 |     test_db, test_table, _ = setup_test_database
286 | 
287 |     async with Client(mcp_server) as client:
288 |         result = await client.call_tool("list_tables", {"database": test_db})
289 |         tables = json.loads(result.content[0].text)
290 | 
291 |         # Find our test table
292 |         test_table_info = next(t for t in tables if t["name"] == test_table)
293 | 
294 |         # Check table comment
295 |         assert test_table_info["comment"] == "Test table for MCP server testing"
296 | 
297 |         # Check engine info
298 |         assert test_table_info["engine"] == "MergeTree"
299 |         assert "MergeTree" in test_table_info["engine_full"]
300 | 
301 |         # Check row count
302 |         assert test_table_info["total_rows"] == 4
303 | 
304 |         # Check columns and their comments
305 |         columns_by_name = {col["name"]: col for col in test_table_info["columns"]}
306 | 
307 |         assert columns_by_name["id"]["comment"] == "Primary identifier"
308 |         assert columns_by_name["id"]["column_type"] == "UInt32"
309 | 
310 |         assert columns_by_name["name"]["comment"] == "User name field"
311 |         assert columns_by_name["name"]["column_type"] == "String"
312 | 
313 |         assert columns_by_name["age"]["comment"] == "User age"
314 |         assert columns_by_name["age"]["column_type"] == "UInt8"
315 | 
316 |         assert columns_by_name["created_at"]["comment"] == "Record creation timestamp"
317 |         assert columns_by_name["created_at"]["column_type"] == "DateTime"
318 |         assert columns_by_name["created_at"]["default_expression"] == "now()"
319 | 
320 | 
321 | @pytest.mark.asyncio
322 | async def test_system_database_access(mcp_server):
323 |     """Test that we can access system databases."""
324 |     async with Client(mcp_server) as client:
325 |         # List tables in system database
326 |         result = await client.call_tool("list_tables", {"database": "system"})
327 |         tables = json.loads(result.content[0].text)
328 | 
329 |         # System database should have many tables
330 |         assert len(tables) > 10
331 | 
332 |         # Check for some common system tables
333 |         table_names = [t["name"] for t in tables]
334 |         assert "tables" in table_names
335 |         assert "columns" in table_names
336 |         assert "databases" in table_names
337 | 
338 | 
339 | @pytest.mark.asyncio
340 | async def test_concurrent_queries(mcp_server, setup_test_database):
341 |     """Test running multiple queries concurrently."""
342 |     test_db, test_table, test_table2 = setup_test_database
343 | 
344 |     async with Client(mcp_server) as client:
345 |         # Run multiple queries concurrently
346 |         queries = [
347 |             f"SELECT COUNT(*) FROM {test_db}.{test_table}",
348 |             f"SELECT COUNT(*) FROM {test_db}.{test_table2}",
349 |             f"SELECT MAX(id) FROM {test_db}.{test_table}",
350 |             f"SELECT MIN(event_id) FROM {test_db}.{test_table2}",
351 |         ]
352 | 
353 |         # Execute all queries concurrently
354 |         results = await asyncio.gather(
355 |             *[client.call_tool("run_select_query", {"query": query}) for query in queries]
356 |         )
357 | 
358 |         # Verify all queries succeeded
359 |         assert len(results) == 4
360 | 
361 |         # Check each result
362 |         for i, result in enumerate(results):
363 |             query_result = json.loads(result.content[0].text)
364 |             assert "rows" in query_result
365 |             assert len(query_result["rows"]) == 1
366 | 
```

--------------------------------------------------------------------------------
/mcp_clickhouse/mcp_server.py:
--------------------------------------------------------------------------------

```python
  1 | import logging
  2 | import json
  3 | from typing import Optional, List, Any
  4 | import concurrent.futures
  5 | import atexit
  6 | import os
  7 | 
  8 | import clickhouse_connect
  9 | import chdb.session as chs
 10 | from clickhouse_connect.driver.binding import format_query_value
 11 | from dotenv import load_dotenv
 12 | from fastmcp import FastMCP
 13 | from fastmcp.tools import Tool
 14 | from fastmcp.prompts import Prompt
 15 | from fastmcp.exceptions import ToolError
 16 | from dataclasses import dataclass, field, asdict, is_dataclass
 17 | from starlette.requests import Request
 18 | from starlette.responses import PlainTextResponse
 19 | 
 20 | from mcp_clickhouse.mcp_env import get_config, get_chdb_config, get_mcp_config
 21 | from mcp_clickhouse.chdb_prompt import CHDB_PROMPT
 22 | 
 23 | 
 24 | @dataclass
 25 | class Column:
 26 |     database: str
 27 |     table: str
 28 |     name: str
 29 |     column_type: str
 30 |     default_kind: Optional[str]
 31 |     default_expression: Optional[str]
 32 |     comment: Optional[str]
 33 | 
 34 | 
 35 | @dataclass
 36 | class Table:
 37 |     database: str
 38 |     name: str
 39 |     engine: str
 40 |     create_table_query: str
 41 |     dependencies_database: str
 42 |     dependencies_table: str
 43 |     engine_full: str
 44 |     sorting_key: str
 45 |     primary_key: str
 46 |     total_rows: int
 47 |     total_bytes: int
 48 |     total_bytes_uncompressed: int
 49 |     parts: int
 50 |     active_parts: int
 51 |     total_marks: int
 52 |     comment: Optional[str] = None
 53 |     columns: List[Column] = field(default_factory=list)
 54 | 
 55 | 
 56 | MCP_SERVER_NAME = "mcp-clickhouse"
 57 | 
 58 | # Configure logging
 59 | logging.basicConfig(
 60 |     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 61 | )
 62 | logger = logging.getLogger(MCP_SERVER_NAME)
 63 | 
 64 | QUERY_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=10)
 65 | atexit.register(lambda: QUERY_EXECUTOR.shutdown(wait=True))
 66 | 
 67 | load_dotenv()
 68 | 
 69 | mcp = FastMCP(name=MCP_SERVER_NAME)
 70 | 
 71 | 
 72 | @mcp.custom_route("/health", methods=["GET"])
 73 | async def health_check(request: Request) -> PlainTextResponse:
 74 |     """Health check endpoint for monitoring server status.
 75 | 
 76 |     Returns OK if the server is running and can connect to ClickHouse.
 77 |     """
 78 |     try:
 79 |         # Check if ClickHouse is enabled by trying to create config
 80 |         # If ClickHouse is disabled, this will succeed but connection will fail
 81 |         clickhouse_enabled = os.getenv("CLICKHOUSE_ENABLED", "true").lower() == "true"
 82 | 
 83 |         if not clickhouse_enabled:
 84 |             # If ClickHouse is disabled, check chDB status
 85 |             chdb_config = get_chdb_config()
 86 |             if chdb_config.enabled:
 87 |                 return PlainTextResponse("OK - MCP server running with chDB enabled")
 88 |             else:
 89 |                 # Both ClickHouse and chDB are disabled - this is an error
 90 |                 return PlainTextResponse(
 91 |                     "ERROR - Both ClickHouse and chDB are disabled. At least one must be enabled.",
 92 |                     status_code=503,
 93 |                 )
 94 | 
 95 |         # Try to create a client connection to verify ClickHouse connectivity
 96 |         client = create_clickhouse_client()
 97 |         version = client.server_version
 98 |         return PlainTextResponse(f"OK - Connected to ClickHouse {version}")
 99 |     except Exception as e:
100 |         # Return 503 Service Unavailable if we can't connect to ClickHouse
101 |         return PlainTextResponse(f"ERROR - Cannot connect to ClickHouse: {str(e)}", status_code=503)
102 | 
103 | 
104 | def result_to_table(query_columns, result) -> List[Table]:
105 |     return [Table(**dict(zip(query_columns, row))) for row in result]
106 | 
107 | 
108 | def result_to_column(query_columns, result) -> List[Column]:
109 |     return [Column(**dict(zip(query_columns, row))) for row in result]
110 | 
111 | 
112 | def to_json(obj: Any) -> str:
113 |     if is_dataclass(obj):
114 |         return json.dumps(asdict(obj), default=to_json)
115 |     elif isinstance(obj, list):
116 |         return [to_json(item) for item in obj]
117 |     elif isinstance(obj, dict):
118 |         return {key: to_json(value) for key, value in obj.items()}
119 |     return obj
120 | 
121 | 
122 | def list_databases():
123 |     """List available ClickHouse databases"""
124 |     logger.info("Listing all databases")
125 |     client = create_clickhouse_client()
126 |     result = client.command("SHOW DATABASES")
127 | 
128 |     # Convert newline-separated string to list and trim whitespace
129 |     if isinstance(result, str):
130 |         databases = [db.strip() for db in result.strip().split("\n")]
131 |     else:
132 |         databases = [result]
133 | 
134 |     logger.info(f"Found {len(databases)} databases")
135 |     return json.dumps(databases)
136 | 
137 | 
138 | def list_tables(database: str, like: Optional[str] = None, not_like: Optional[str] = None):
139 |     """List available ClickHouse tables in a database, including schema, comment,
140 |     row count, and column count."""
141 |     logger.info(f"Listing tables in database '{database}'")
142 |     client = create_clickhouse_client()
143 |     query = f"SELECT database, name, engine, create_table_query, dependencies_database, dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes, total_bytes_uncompressed, parts, active_parts, total_marks, comment FROM system.tables WHERE database = {format_query_value(database)}"
144 |     if like:
145 |         query += f" AND name LIKE {format_query_value(like)}"
146 | 
147 |     if not_like:
148 |         query += f" AND name NOT LIKE {format_query_value(not_like)}"
149 | 
150 |     result = client.query(query)
151 | 
152 |     # Deserialize result as Table dataclass instances
153 |     tables = result_to_table(result.column_names, result.result_rows)
154 | 
155 |     for table in tables:
156 |         column_data_query = f"SELECT database, table, name, type AS column_type, default_kind, default_expression, comment FROM system.columns WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}"
157 |         column_data_query_result = client.query(column_data_query)
158 |         table.columns = [
159 |             c
160 |             for c in result_to_column(
161 |                 column_data_query_result.column_names,
162 |                 column_data_query_result.result_rows,
163 |             )
164 |         ]
165 | 
166 |     logger.info(f"Found {len(tables)} tables")
167 |     return [asdict(table) for table in tables]
168 | 
169 | 
170 | def execute_query(query: str):
171 |     client = create_clickhouse_client()
172 |     try:
173 |         read_only = get_readonly_setting(client)
174 |         res = client.query(query, settings={"readonly": read_only})
175 |         logger.info(f"Query returned {len(res.result_rows)} rows")
176 |         return {"columns": res.column_names, "rows": res.result_rows}
177 |     except Exception as err:
178 |         logger.error(f"Error executing query: {err}")
179 |         raise ToolError(f"Query execution failed: {str(err)}")
180 | 
181 | 
182 | def run_select_query(query: str):
183 |     """Run a SELECT query in a ClickHouse database"""
184 |     logger.info(f"Executing SELECT query: {query}")
185 |     try:
186 |         future = QUERY_EXECUTOR.submit(execute_query, query)
187 |         try:
188 |             timeout_secs = get_mcp_config().query_timeout
189 |             result = future.result(timeout=timeout_secs)
190 |             # Check if we received an error structure from execute_query
191 |             if isinstance(result, dict) and "error" in result:
192 |                 logger.warning(f"Query failed: {result['error']}")
193 |                 # MCP requires structured responses; string error messages can cause
194 |                 # serialization issues leading to BrokenResourceError
195 |                 return {
196 |                     "status": "error",
197 |                     "message": f"Query failed: {result['error']}",
198 |                 }
199 |             return result
200 |         except concurrent.futures.TimeoutError:
201 |             logger.warning(f"Query timed out after {timeout_secs} seconds: {query}")
202 |             future.cancel()
203 |             raise ToolError(f"Query timed out after {timeout_secs} seconds")
204 |     except ToolError:
205 |         raise
206 |     except Exception as e:
207 |         logger.error(f"Unexpected error in run_select_query: {str(e)}")
208 |         raise RuntimeError(f"Unexpected error during query execution: {str(e)}")
209 | 
210 | 
211 | def create_clickhouse_client():
212 |     client_config = get_config().get_client_config()
213 |     logger.info(
214 |         f"Creating ClickHouse client connection to {client_config['host']}:{client_config['port']} "
215 |         f"as {client_config['username']} "
216 |         f"(secure={client_config['secure']}, verify={client_config['verify']}, "
217 |         f"connect_timeout={client_config['connect_timeout']}s, "
218 |         f"send_receive_timeout={client_config['send_receive_timeout']}s)"
219 |     )
220 | 
221 |     try:
222 |         client = clickhouse_connect.get_client(**client_config)
223 |         # Test the connection
224 |         version = client.server_version
225 |         logger.info(f"Successfully connected to ClickHouse server version {version}")
226 |         return client
227 |     except Exception as e:
228 |         logger.error(f"Failed to connect to ClickHouse: {str(e)}")
229 |         raise
230 | 
231 | 
232 | def get_readonly_setting(client) -> str:
233 |     """Get the appropriate readonly setting value to use for queries.
234 | 
235 |     This function handles potential conflicts between server and client readonly settings:
236 |     - readonly=0: No read-only restrictions
237 |     - readonly=1: Only read queries allowed, settings cannot be changed
238 |     - readonly=2: Only read queries allowed, settings can be changed (except readonly itself)
239 | 
240 |     If server has readonly=2 and client tries to set readonly=1, it would cause:
241 |     "Setting readonly is unknown or readonly" error
242 | 
243 |     This function preserves the server's readonly setting unless it's 0, in which case
244 |     we enforce readonly=1 to ensure queries are read-only.
245 | 
246 |     Args:
247 |         client: ClickHouse client connection
248 | 
249 |     Returns:
250 |         String value of readonly setting to use
251 |     """
252 |     read_only = client.server_settings.get("readonly")
253 |     if read_only:
254 |         if read_only == "0":
255 |             return "1"  # Force read-only mode if server has it disabled
256 |         else:
257 |             return read_only.value  # Respect server's readonly setting (likely 2)
258 |     else:
259 |         return "1"  # Default to basic read-only mode if setting isn't present
260 | 
261 | 
262 | def create_chdb_client():
263 |     """Create a chDB client connection."""
264 |     if not get_chdb_config().enabled:
265 |         raise ValueError("chDB is not enabled. Set CHDB_ENABLED=true to enable it.")
266 |     return _chdb_client
267 | 
268 | 
269 | def execute_chdb_query(query: str):
270 |     """Execute a query using chDB client."""
271 |     client = create_chdb_client()
272 |     try:
273 |         res = client.query(query, "JSON")
274 |         if res.has_error():
275 |             error_msg = res.error_message()
276 |             logger.error(f"Error executing chDB query: {error_msg}")
277 |             return {"error": error_msg}
278 | 
279 |         result_data = res.data()
280 |         if not result_data:
281 |             return []
282 | 
283 |         result_json = json.loads(result_data)
284 | 
285 |         return result_json.get("data", [])
286 | 
287 |     except Exception as err:
288 |         logger.error(f"Error executing chDB query: {err}")
289 |         return {"error": str(err)}
290 | 
291 | 
292 | def run_chdb_select_query(query: str):
293 |     """Run SQL in chDB, an in-process ClickHouse engine"""
294 |     logger.info(f"Executing chDB SELECT query: {query}")
295 |     try:
296 |         future = QUERY_EXECUTOR.submit(execute_chdb_query, query)
297 |         try:
298 |             timeout_secs = get_mcp_config().query_timeout
299 |             result = future.result(timeout=timeout_secs)
300 |             # Check if we received an error structure from execute_chdb_query
301 |             if isinstance(result, dict) and "error" in result:
302 |                 logger.warning(f"chDB query failed: {result['error']}")
303 |                 return {
304 |                     "status": "error",
305 |                     "message": f"chDB query failed: {result['error']}",
306 |                 }
307 |             return result
308 |         except concurrent.futures.TimeoutError:
309 |             logger.warning(
310 |                 f"chDB query timed out after {timeout_secs} seconds: {query}"
311 |             )
312 |             future.cancel()
313 |             return {
314 |                 "status": "error",
315 |                 "message": f"chDB query timed out after {timeout_secs} seconds",
316 |             }
317 |     except Exception as e:
318 |         logger.error(f"Unexpected error in run_chdb_select_query: {e}")
319 |         return {"status": "error", "message": f"Unexpected error: {e}"}
320 | 
321 | 
322 | def chdb_initial_prompt() -> str:
323 |     """This prompt helps users understand how to interact and perform common operations in chDB"""
324 |     return CHDB_PROMPT
325 | 
326 | 
327 | def _init_chdb_client():
328 |     """Initialize the global chDB client instance."""
329 |     try:
330 |         if not get_chdb_config().enabled:
331 |             logger.info("chDB is disabled, skipping client initialization")
332 |             return None
333 | 
334 |         client_config = get_chdb_config().get_client_config()
335 |         data_path = client_config["data_path"]
336 |         logger.info(f"Creating chDB client with data_path={data_path}")
337 |         client = chs.Session(path=data_path)
338 |         logger.info(f"Successfully connected to chDB with data_path={data_path}")
339 |         return client
340 |     except Exception as e:
341 |         logger.error(f"Failed to initialize chDB client: {e}")
342 |         return None
343 | 
344 | 
345 | # Register tools based on configuration
346 | if os.getenv("CLICKHOUSE_ENABLED", "true").lower() == "true":
347 |     mcp.add_tool(Tool.from_function(list_databases))
348 |     mcp.add_tool(Tool.from_function(list_tables))
349 |     mcp.add_tool(Tool.from_function(run_select_query))
350 |     logger.info("ClickHouse tools registered")
351 | 
352 | 
353 | if os.getenv("CHDB_ENABLED", "false").lower() == "true":
354 |     _chdb_client = _init_chdb_client()
355 |     if _chdb_client:
356 |         atexit.register(lambda: _chdb_client.close())
357 | 
358 |     mcp.add_tool(Tool.from_function(run_chdb_select_query))
359 |     chdb_prompt = Prompt.from_function(
360 |         chdb_initial_prompt,
361 |         name="chdb_initial_prompt",
362 |         description="This prompt helps users understand how to interact and perform common operations in chDB",
363 |     )
364 |     mcp.add_prompt(chdb_prompt)
365 |     logger.info("chDB tools and prompts registered")
366 | 
```