# Directory Structure
```
├── .dockerignore
├── .editorconfig
├── .github
│ └── workflows
│ ├── ci.yaml
│ └── publish.yml
├── .gitignore
├── .python-version
├── Dockerfile
├── fastmcp.json
├── LICENSE
├── mcp_clickhouse
│ ├── __init__.py
│ ├── chdb_prompt.py
│ ├── main.py
│ ├── mcp_env.py
│ └── mcp_server.py
├── pyproject.toml
├── README.md
├── test-services
│ └── docker-compose.yaml
├── tests
│ ├── test_chdb_tool.py
│ ├── test_config_interface.py
│ ├── test_mcp_server.py
│ └── test_tool.py
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
```
1 | 3.10
2 |
```
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
```
1 | [*]
2 | trim_trailing_whitespace = true
3 | end_of_line = lf
```
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
```
1 | # Git
2 | .git/
3 | .gitignore
4 | .github/
5 |
6 | # Documentation
7 | LICENSE
8 | # README.md is needed for package build
9 | # *.md files may be needed for package metadata
10 |
11 | # Development environment
12 | .venv/
13 | .env
14 | .envrc
15 | .python-version
16 |
17 | # IDE and editor files
18 | .vscode/
19 | .idea/
20 | *.swp
21 | *.swo
22 | *~
23 |
24 | # Cache directories
25 | .ruff_cache/
26 | .pytest_cache/
27 | __pycache__/
28 | *.pyc
29 | *.pyo
30 | *.pyd
31 | .mypy_cache/
32 | .dmypy.json
33 | dmypy.json
34 |
35 | # Test files and directories
36 | tests/
37 | test-services/
38 | .coverage
39 | .coverage.*
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | coverage.xml
44 | *.cover
45 | *.py,cover
46 | .hypothesis/
47 | cover/
48 |
49 | # Build artifacts
50 | build/
51 | dist/
52 | *.egg-info/
53 | .eggs/
54 | *.egg
55 | MANIFEST
56 |
57 | # Temporary files
58 | *.tmp
59 | *.temp
60 | *.log
61 | .DS_Store
62 | Thumbs.db
63 |
64 | # Docker files (avoid copying Dockerfile into itself)
65 | Dockerfile
66 | .dockerignore
67 |
68 | # Editor config (not needed at runtime)
69 | .editorconfig
70 |
71 | # Claude AI files
72 | .claude/
```
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | .envrc
2 | .ruff_cache/
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 | cover/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | .pybuilder/
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | # For a library or package, you might want to ignore these files since the code is
90 | # intended to run in multiple environments; otherwise, check them in:
91 | # .python-version
92 |
93 | # pipenv
94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
97 | # install all needed dependencies.
98 | #Pipfile.lock
99 |
100 | # UV
101 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
102 | # This is especially recommended for binary packages to ensure reproducibility, and is more
103 | # commonly ignored for libraries.
104 | #uv.lock
105 |
106 | # poetry
107 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108 | # This is especially recommended for binary packages to ensure reproducibility, and is more
109 | # commonly ignored for libraries.
110 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111 | #poetry.lock
112 |
113 | # pdm
114 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
115 | #pdm.lock
116 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
117 | # in version control.
118 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
119 | .pdm.toml
120 | .pdm-python
121 | .pdm-build/
122 |
123 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
124 | __pypackages__/
125 |
126 | # Celery stuff
127 | celerybeat-schedule
128 | celerybeat.pid
129 |
130 | # SageMath parsed files
131 | *.sage.py
132 |
133 | # Environments
134 | .env
135 | .venv
136 | env/
137 | venv/
138 | ENV/
139 | env.bak/
140 | venv.bak/
141 |
142 | # Spyder project settings
143 | .spyderproject
144 | .spyproject
145 |
146 | # Rope project settings
147 | .ropeproject
148 |
149 | # mkdocs documentation
150 | /site
151 |
152 | # mypy
153 | .mypy_cache/
154 | .dmypy.json
155 | dmypy.json
156 |
157 | # Pyre type checker
158 | .pyre/
159 |
160 | # pytype static type analyzer
161 | .pytype/
162 |
163 | # Cython debug symbols
164 | cython_debug/
165 |
166 | # PyCharm
167 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
168 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
169 | # and can be added to the global gitignore or merged into this file. For a more nuclear
170 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
171 | #.idea/
172 |
173 | # PyPI configuration file
174 | .pypirc
175 |
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # ClickHouse MCP Server
2 |
3 | [](https://pypi.org/project/mcp-clickhouse)
4 |
5 | An MCP server for ClickHouse.
6 |
7 | <a href="https://glama.ai/mcp/servers/yvjy4csvo1"><img width="380" height="200" src="https://glama.ai/mcp/servers/yvjy4csvo1/badge" alt="mcp-clickhouse MCP server" /></a>
8 |
9 | ## Features
10 |
11 | ### ClickHouse Tools
12 |
13 | * `run_select_query`
14 | * Execute SQL queries on your ClickHouse cluster.
15 | * Input: `sql` (string): The SQL query to execute.
16 | * All ClickHouse queries are run with `readonly = 1` to ensure they are safe.
17 |
18 | * `list_databases`
19 | * List all databases on your ClickHouse cluster.
20 |
21 | * `list_tables`
22 | * List all tables in a database.
23 | * Input: `database` (string): The name of the database.
24 |
25 | ### chDB Tools
26 |
27 | * `run_chdb_select_query`
28 | * Execute SQL queries using [chDB](https://github.com/chdb-io/chdb)'s embedded ClickHouse engine.
29 | * Input: `sql` (string): The SQL query to execute.
30 | * Query data directly from various sources (files, URLs, databases) without ETL processes.
31 |
32 | ### Health Check Endpoint
33 |
34 | When running with HTTP or SSE transport, a health check endpoint is available at `/health`. This endpoint:
35 | - Returns `200 OK` with the ClickHouse version if the server is healthy and can connect to ClickHouse
36 | - Returns `503 Service Unavailable` if the server cannot connect to ClickHouse
37 |
38 | Example:
39 | ```bash
40 | curl http://localhost:8000/health
41 | # Response: OK - Connected to ClickHouse 24.3.1
42 | ```
43 |
44 | ## Configuration
45 |
46 | This MCP server supports both ClickHouse and chDB. You can enable either or both depending on your needs.
47 |
48 | 1. Open the Claude Desktop configuration file located at:
49 | * On macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
50 | * On Windows: `%APPDATA%/Claude/claude_desktop_config.json`
51 |
52 | 2. Add the following:
53 |
54 | ```json
55 | {
56 | "mcpServers": {
57 | "mcp-clickhouse": {
58 | "command": "uv",
59 | "args": [
60 | "run",
61 | "--with",
62 | "mcp-clickhouse",
63 | "--python",
64 | "3.10",
65 | "mcp-clickhouse"
66 | ],
67 | "env": {
68 | "CLICKHOUSE_HOST": "<clickhouse-host>",
69 | "CLICKHOUSE_PORT": "<clickhouse-port>",
70 | "CLICKHOUSE_USER": "<clickhouse-user>",
71 | "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
72 | "CLICKHOUSE_SECURE": "true",
73 | "CLICKHOUSE_VERIFY": "true",
74 | "CLICKHOUSE_CONNECT_TIMEOUT": "30",
75 | "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30"
76 | }
77 | }
78 | }
79 | }
80 | ```
81 |
82 | Update the environment variables to point to your own ClickHouse service.
83 |
84 | Or, if you'd like to try it out with the [ClickHouse SQL Playground](https://sql.clickhouse.com/), you can use the following config:
85 |
86 | ```json
87 | {
88 | "mcpServers": {
89 | "mcp-clickhouse": {
90 | "command": "uv",
91 | "args": [
92 | "run",
93 | "--with",
94 | "mcp-clickhouse",
95 | "--python",
96 | "3.10",
97 | "mcp-clickhouse"
98 | ],
99 | "env": {
100 | "CLICKHOUSE_HOST": "sql-clickhouse.clickhouse.com",
101 | "CLICKHOUSE_PORT": "8443",
102 | "CLICKHOUSE_USER": "demo",
103 | "CLICKHOUSE_PASSWORD": "",
104 | "CLICKHOUSE_SECURE": "true",
105 | "CLICKHOUSE_VERIFY": "true",
106 | "CLICKHOUSE_CONNECT_TIMEOUT": "30",
107 | "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30"
108 | }
109 | }
110 | }
111 | }
112 | ```
113 |
114 | For chDB (embedded ClickHouse engine), add the following configuration:
115 |
116 | ```json
117 | {
118 | "mcpServers": {
119 | "mcp-clickhouse": {
120 | "command": "uv",
121 | "args": [
122 | "run",
123 | "--with",
124 | "mcp-clickhouse",
125 | "--python",
126 | "3.10",
127 | "mcp-clickhouse"
128 | ],
129 | "env": {
130 | "CHDB_ENABLED": "true",
131 | "CLICKHOUSE_ENABLED": "false",
132 | "CHDB_DATA_PATH": "/path/to/chdb/data"
133 | }
134 | }
135 | }
136 | }
137 | ```
138 |
139 | You can also enable both ClickHouse and chDB simultaneously:
140 |
141 | ```json
142 | {
143 | "mcpServers": {
144 | "mcp-clickhouse": {
145 | "command": "uv",
146 | "args": [
147 | "run",
148 | "--with",
149 | "mcp-clickhouse",
150 | "--python",
151 | "3.10",
152 | "mcp-clickhouse"
153 | ],
154 | "env": {
155 | "CLICKHOUSE_HOST": "<clickhouse-host>",
156 | "CLICKHOUSE_PORT": "<clickhouse-port>",
157 | "CLICKHOUSE_USER": "<clickhouse-user>",
158 | "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
159 | "CLICKHOUSE_SECURE": "true",
160 | "CLICKHOUSE_VERIFY": "true",
161 | "CLICKHOUSE_CONNECT_TIMEOUT": "30",
162 | "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30",
163 | "CHDB_ENABLED": "true",
164 | "CHDB_DATA_PATH": "/path/to/chdb/data"
165 | }
166 | }
167 | }
168 | }
169 | ```
170 |
171 | 3. Locate the command entry for `uv` and replace it with the absolute path to the `uv` executable. This ensures that the correct version of `uv` is used when starting the server. On a mac, you can find this path using `which uv`.
172 |
173 | 4. Restart Claude Desktop to apply the changes.
174 |
175 | ### Running Without uv (Using System Python)
176 |
177 | If you prefer to use the system Python installation instead of uv, you can install the package from PyPI and run it directly:
178 |
179 | 1. Install the package using pip:
180 | ```bash
181 | python3 -m pip install mcp-clickhouse
182 | ```
183 |
184 | To upgrade to the latest version:
185 | ```bash
186 | python3 -m pip install --upgrade mcp-clickhouse
187 | ```
188 |
189 | 2. Update your Claude Desktop configuration to use Python directly:
190 |
191 | ```json
192 | {
193 | "mcpServers": {
194 | "mcp-clickhouse": {
195 | "command": "python3",
196 | "args": [
197 | "-m",
198 | "mcp_clickhouse.main"
199 | ],
200 | "env": {
201 | "CLICKHOUSE_HOST": "<clickhouse-host>",
202 | "CLICKHOUSE_PORT": "<clickhouse-port>",
203 | "CLICKHOUSE_USER": "<clickhouse-user>",
204 | "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
205 | "CLICKHOUSE_SECURE": "true",
206 | "CLICKHOUSE_VERIFY": "true",
207 | "CLICKHOUSE_CONNECT_TIMEOUT": "30",
208 | "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30"
209 | }
210 | }
211 | }
212 | }
213 | ```
214 |
215 | Alternatively, you can use the installed script directly:
216 |
217 | ```json
218 | {
219 | "mcpServers": {
220 | "mcp-clickhouse": {
221 | "command": "mcp-clickhouse",
222 | "env": {
223 | "CLICKHOUSE_HOST": "<clickhouse-host>",
224 | "CLICKHOUSE_PORT": "<clickhouse-port>",
225 | "CLICKHOUSE_USER": "<clickhouse-user>",
226 | "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
227 | "CLICKHOUSE_SECURE": "true",
228 | "CLICKHOUSE_VERIFY": "true",
229 | "CLICKHOUSE_CONNECT_TIMEOUT": "30",
230 | "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "30"
231 | }
232 | }
233 | }
234 | }
235 | ```
236 |
237 | Note: Make sure to use the full path to the Python executable or the `mcp-clickhouse` script if they are not in your system PATH. You can find the paths using:
238 | - `which python3` for the Python executable
239 | - `which mcp-clickhouse` for the installed script
240 |
241 | ## Development
242 |
243 | 1. In `test-services` directory run `docker compose up -d` to start the ClickHouse cluster.
244 |
245 | 2. Add the following variables to a `.env` file in the root of the repository.
246 |
247 | *Note: The use of the `default` user in this context is intended solely for local development purposes.*
248 |
249 | ```bash
250 | CLICKHOUSE_HOST=localhost
251 | CLICKHOUSE_PORT=8123
252 | CLICKHOUSE_USER=default
253 | CLICKHOUSE_PASSWORD=clickhouse
254 | ```
255 |
256 | 3. Run `uv sync` to install the dependencies. To install `uv` follow the instructions [here](https://docs.astral.sh/uv/). Then do `source .venv/bin/activate`.
257 |
258 | 4. For easy testing with the MCP Inspector, run `fastmcp dev mcp_clickhouse/mcp_server.py` to start the MCP server.
259 |
260 | 5. To test with HTTP transport and the health check endpoint:
261 | ```bash
262 | # Using default port 8000
263 | CLICKHOUSE_MCP_SERVER_TRANSPORT=http python -m mcp_clickhouse.main
264 |
265 | # Or with a custom port
266 | CLICKHOUSE_MCP_SERVER_TRANSPORT=http CLICKHOUSE_MCP_BIND_PORT=4200 python -m mcp_clickhouse.main
267 |
268 | # Then in another terminal:
269 | curl http://localhost:8000/health # or http://localhost:4200/health for custom port
270 | ```
271 |
272 | ### Environment Variables
273 |
274 | The following environment variables are used to configure the ClickHouse and chDB connections:
275 |
276 | #### ClickHouse Variables
277 |
278 | ##### Required Variables
279 |
280 | * `CLICKHOUSE_HOST`: The hostname of your ClickHouse server
281 | * `CLICKHOUSE_USER`: The username for authentication
282 | * `CLICKHOUSE_PASSWORD`: The password for authentication
283 |
284 | > [!CAUTION]
285 | > It is important to treat your MCP database user as you would any external client connecting to your database, granting only the minimum necessary privileges required for its operation. The use of default or administrative users should be strictly avoided at all times.
286 |
287 | ##### Optional Variables
288 |
289 | * `CLICKHOUSE_PORT`: The port number of your ClickHouse server
290 | * Default: `8443` if HTTPS is enabled, `8123` if disabled
291 | * Usually doesn't need to be set unless using a non-standard port
292 | * `CLICKHOUSE_SECURE`: Enable/disable HTTPS connection
293 | * Default: `"true"`
294 | * Set to `"false"` for non-secure connections
295 | * `CLICKHOUSE_VERIFY`: Enable/disable SSL certificate verification
296 | * Default: `"true"`
297 | * Set to `"false"` to disable certificate verification (not recommended for production)
298 | * TLS certificates: The package uses your operating system trust store for TLS certificate verification via `truststore`. We call `truststore.inject_into_ssl()` at startup to ensure proper certificate handling. Python’s default SSL behavior is used as a fallback only if an unexpected error occurs.
299 | * `CLICKHOUSE_CONNECT_TIMEOUT`: Connection timeout in seconds
300 | * Default: `"30"`
301 | * Increase this value if you experience connection timeouts
302 | * `CLICKHOUSE_SEND_RECEIVE_TIMEOUT`: Send/receive timeout in seconds
303 | * Default: `"300"`
304 | * Increase this value for long-running queries
305 | * `CLICKHOUSE_DATABASE`: Default database to use
306 | * Default: None (uses server default)
307 | * Set this to automatically connect to a specific database
308 | * `CLICKHOUSE_MCP_SERVER_TRANSPORT`: Sets the transport method for the MCP server.
309 | * Default: `"stdio"`
310 | * Valid options: `"stdio"`, `"http"`, `"sse"`. This is useful for local development with tools like MCP Inspector.
311 | * `CLICKHOUSE_MCP_BIND_HOST`: Host to bind the MCP server to when using HTTP or SSE transport
312 | * Default: `"127.0.0.1"`
313 | * Set to `"0.0.0.0"` to bind to all network interfaces (useful for Docker or remote access)
314 | * Only used when transport is `"http"` or `"sse"`
315 | * `CLICKHOUSE_MCP_BIND_PORT`: Port to bind the MCP server to when using HTTP or SSE transport
316 | * Default: `"8000"`
317 | * Only used when transport is `"http"` or `"sse"`
318 | * `CLICKHOUSE_MCP_QUERY_TIMEOUT`: Timeout in seconds for SELECT tools
319 | * Default: `"30"`
320 | * Increase this if you see `Query timed out after ...` errors for heavy queries
321 | * `CLICKHOUSE_ENABLED`: Enable/disable ClickHouse functionality
322 | * Default: `"true"`
323 | * Set to `"false"` to disable ClickHouse tools when using chDB only
324 |
325 | #### chDB Variables
326 |
327 | * `CHDB_ENABLED`: Enable/disable chDB functionality
328 | * Default: `"false"`
329 | * Set to `"true"` to enable chDB tools
330 | * `CHDB_DATA_PATH`: The path to the chDB data directory
331 | * Default: `":memory:"` (in-memory database)
332 | * Use `:memory:` for in-memory database
333 | * Use a file path for persistent storage (e.g., `/path/to/chdb/data`)
334 |
335 | #### Example Configurations
336 |
337 | For local development with Docker:
338 |
339 | ```env
340 | # Required variables
341 | CLICKHOUSE_HOST=localhost
342 | CLICKHOUSE_USER=default
343 | CLICKHOUSE_PASSWORD=clickhouse
344 |
345 | # Optional: Override defaults for local development
346 | CLICKHOUSE_SECURE=false # Uses port 8123 automatically
347 | CLICKHOUSE_VERIFY=false
348 | ```
349 |
350 | For ClickHouse Cloud:
351 |
352 | ```env
353 | # Required variables
354 | CLICKHOUSE_HOST=your-instance.clickhouse.cloud
355 | CLICKHOUSE_USER=default
356 | CLICKHOUSE_PASSWORD=your-password
357 |
358 | # Optional: These use secure defaults
359 | # CLICKHOUSE_SECURE=true # Uses port 8443 automatically
360 | # CLICKHOUSE_DATABASE=your_database
361 | ```
362 |
363 | For ClickHouse SQL Playground:
364 |
365 | ```env
366 | CLICKHOUSE_HOST=sql-clickhouse.clickhouse.com
367 | CLICKHOUSE_USER=demo
368 | CLICKHOUSE_PASSWORD=
369 | # Uses secure defaults (HTTPS on port 8443)
370 | ```
371 |
372 | For chDB only (in-memory):
373 |
374 | ```env
375 | # chDB configuration
376 | CHDB_ENABLED=true
377 | CLICKHOUSE_ENABLED=false
378 | # CHDB_DATA_PATH defaults to :memory:
379 | ```
380 |
381 | For chDB with persistent storage:
382 |
383 | ```env
384 | # chDB configuration
385 | CHDB_ENABLED=true
386 | CLICKHOUSE_ENABLED=false
387 | CHDB_DATA_PATH=/path/to/chdb/data
388 | ```
389 |
390 | For MCP Inspector or remote access with HTTP transport:
391 |
392 | ```env
393 | CLICKHOUSE_HOST=localhost
394 | CLICKHOUSE_USER=default
395 | CLICKHOUSE_PASSWORD=clickhouse
396 | CLICKHOUSE_MCP_SERVER_TRANSPORT=http
397 | CLICKHOUSE_MCP_BIND_HOST=0.0.0.0 # Bind to all interfaces
398 | CLICKHOUSE_MCP_BIND_PORT=4200 # Custom port (default: 8000)
399 | ```
400 |
401 | When using HTTP transport, the server will run on the configured port (default 8000). For example, with the above configuration:
402 | - MCP endpoint: `http://localhost:4200/mcp`
403 | - Health check: `http://localhost:4200/health`
404 |
405 | You can set these variables in your environment, in a `.env` file, or in the Claude Desktop configuration:
406 |
407 | ```json
408 | {
409 | "mcpServers": {
410 | "mcp-clickhouse": {
411 | "command": "uv",
412 | "args": [
413 | "run",
414 | "--with",
415 | "mcp-clickhouse",
416 | "--python",
417 | "3.10",
418 | "mcp-clickhouse"
419 | ],
420 | "env": {
421 | "CLICKHOUSE_HOST": "<clickhouse-host>",
422 | "CLICKHOUSE_USER": "<clickhouse-user>",
423 | "CLICKHOUSE_PASSWORD": "<clickhouse-password>",
424 | "CLICKHOUSE_DATABASE": "<optional-database>",
425 | "CLICKHOUSE_MCP_SERVER_TRANSPORT": "stdio",
426 | "CLICKHOUSE_MCP_BIND_HOST": "127.0.0.1",
427 | "CLICKHOUSE_MCP_BIND_PORT": "8000"
428 | }
429 | }
430 | }
431 | }
432 | ```
433 |
434 | Note: The bind host and port settings are only used when transport is set to "http" or "sse".
435 |
436 | ### Running tests
437 |
438 | ```bash
439 | uv sync --all-extras --dev # install dev dependencies
440 | uv run ruff check . # run linting
441 |
442 | docker compose up -d test_services # start ClickHouse
443 | uv run pytest -v tests
444 | uv run pytest -v tests/test_tool.py # ClickHouse only
445 | uv run pytest -v tests/test_chdb_tool.py # chDB only
446 | ```
447 |
448 | ## YouTube Overview
449 |
450 | [](https://www.youtube.com/watch?v=y9biAm_Fkqw)
451 |
```
--------------------------------------------------------------------------------
/fastmcp.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "$schema": "https://gofastmcp.com/public/schemas/fastmcp.json/v1.json",
3 | "source": {
4 | "path": "mcp_clickhouse/mcp_server.py",
5 | "entrypoint": "mcp"
6 | },
7 | "environment": {
8 | "dependencies": [
9 | "clickhouse-connect",
10 | "python-dotenv",
11 | "truststore",
12 | "chdb"
13 | ]
14 | }
15 | }
16 |
```
--------------------------------------------------------------------------------
/test-services/docker-compose.yaml:
--------------------------------------------------------------------------------
```yaml
1 | services:
2 | clickhouse:
3 | image: clickhouse/clickhouse-server:latest
4 | ports:
5 | - "8123:8123"
6 | - "9000:9000"
7 | volumes:
8 | - clickhouse-data:/var/lib/clickhouse
9 | environment:
10 | - CLICKHOUSE_USER=default
11 | - CLICKHOUSE_PASSWORD=clickhouse
12 | - CLICKHOUSE_DB=default
13 |
14 | volumes:
15 | clickhouse-data:
16 |
```
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
```yaml
1 | on:
2 | workflow_dispatch:
3 |
4 | jobs:
5 | publish:
6 | name: Upload release to PyPI
7 | runs-on: ubuntu-latest
8 | environment:
9 | name: pypi
10 | url: "https://pypi.org/p/mcp-clickhouse"
11 | permissions:
12 | id-token: write
13 | steps:
14 | - uses: actions/checkout@v4
15 | - uses: astral-sh/setup-uv@v5
16 | - run: uv python install
17 | - run: uv build
18 | - uses: pypa/gh-action-pypi-publish@release/v1
```
--------------------------------------------------------------------------------
/mcp_clickhouse/__init__.py:
--------------------------------------------------------------------------------
```python
1 | import os
2 |
3 | from .mcp_server import (
4 | create_clickhouse_client,
5 | list_databases,
6 | list_tables,
7 | run_select_query,
8 | create_chdb_client,
9 | run_chdb_select_query,
10 | chdb_initial_prompt,
11 | )
12 |
13 |
14 | if os.getenv("MCP_CLICKHOUSE_TRUSTSTORE_DISABLE", None) != "1":
15 | try:
16 | import truststore
17 | truststore.inject_into_ssl()
18 | except Exception:
19 | pass
20 |
21 | __all__ = [
22 | "list_databases",
23 | "list_tables",
24 | "run_select_query",
25 | "create_clickhouse_client",
26 | "create_chdb_client",
27 | "run_chdb_select_query",
28 | "chdb_initial_prompt",
29 | ]
30 |
```
--------------------------------------------------------------------------------
/mcp_clickhouse/main.py:
--------------------------------------------------------------------------------
```python
1 | from .mcp_server import mcp
2 | from .mcp_env import get_mcp_config, TransportType
3 |
4 |
5 | def main():
6 | mcp_config = get_mcp_config()
7 | transport = mcp_config.server_transport
8 |
9 | # For HTTP and SSE transports, we need to specify host and port
10 | http_transports = [TransportType.HTTP.value, TransportType.SSE.value]
11 | if transport in http_transports:
12 | # Use the configured bind host (defaults to 127.0.0.1, can be set to 0.0.0.0)
13 | # and bind port (defaults to 8000)
14 | mcp.run(transport=transport, host=mcp_config.bind_host, port=mcp_config.bind_port)
15 | else:
16 | # For stdio transport, no host or port is needed
17 | mcp.run(transport=transport)
18 |
19 |
20 | if __name__ == "__main__":
21 | main()
22 |
```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
```toml
1 | [project]
2 | name = "mcp-clickhouse"
3 | version = "0.1.12"
4 | description = "An MCP server for ClickHouse."
5 | readme = "README.md"
6 | license = "Apache-2.0"
7 | license-files = ["LICENSE"]
8 | requires-python = ">=3.10"
9 | dependencies = [
10 | "fastmcp>=2.0.0",
11 | "python-dotenv>=1.0.1",
12 | "clickhouse-connect>=0.8.16",
13 | "truststore>=0.10",
14 | "chdb>=3.3.0",
15 | ]
16 |
17 | [project.scripts]
18 | mcp-clickhouse = "mcp_clickhouse.main:main"
19 |
20 | [project.urls]
21 | Home = "https://github.com/ClickHouse/mcp-clickhouse"
22 |
23 | [project.optional-dependencies]
24 | dev = [
25 | "ruff",
26 | "pytest",
27 | "pytest-asyncio"
28 | ]
29 |
30 | [tool.hatch.build.targets.wheel]
31 | packages = ["mcp_clickhouse"]
32 |
33 | [tool.ruff]
34 | line-length = 100
35 |
36 | [build-system]
37 | requires = ["hatchling"]
38 | build-backend = "hatchling.build"
39 |
```
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
```dockerfile
1 | # Build stage - Use a Python image with uv pre-installed
2 | FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim AS builder
3 |
4 | # Install the project into `/app`
5 | WORKDIR /app
6 |
7 | # Enable bytecode compilation
8 | ENV UV_COMPILE_BYTECODE=1
9 |
10 | # Copy from the cache instead of linking since it's a mounted volume
11 | ENV UV_LINK_MODE=copy
12 |
13 | # Install git and build dependencies for ClickHouse client
14 | RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
15 | --mount=type=cache,target=/var/lib/apt,sharing=locked \
16 | apt-get update && apt-get install -y --no-install-recommends git build-essential
17 |
18 | # Install the project's dependencies using the lockfile and settings
19 | RUN --mount=type=cache,target=/root/.cache/uv \
20 | --mount=type=bind,source=uv.lock,target=uv.lock \
21 | --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
22 | --mount=type=bind,source=README.md,target=README.md \
23 | uv sync --locked --no-install-project --no-dev
24 |
25 | # Then, add the rest of the project source code and install it
26 | # Installing separately from its dependencies allows optimal layer caching
27 | COPY . /app
28 | RUN --mount=type=cache,target=/root/.cache/uv \
29 | uv sync --locked --no-dev --no-editable
30 |
31 | # Production stage - Use minimal Python image
32 | FROM python:3.13-slim-bookworm
33 |
34 | # Set the working directory
35 | WORKDIR /app
36 |
37 | # Copy the virtual environment from the builder stage
38 | COPY --from=builder /app/.venv /app/.venv
39 |
40 | # Place executables in the environment at the front of the path
41 | ENV PATH="/app/.venv/bin:$PATH"
42 |
43 | # Run the MCP ClickHouse server by default
44 | CMD ["python", "-m", "mcp_clickhouse.main"]
45 |
```
--------------------------------------------------------------------------------
/tests/test_chdb_tool.py:
--------------------------------------------------------------------------------
```python
1 | import unittest
2 |
3 | from dotenv import load_dotenv
4 |
5 | from mcp_clickhouse import create_chdb_client, run_chdb_select_query
6 |
7 | load_dotenv()
8 |
9 |
10 | class TestChDBTools(unittest.TestCase):
11 | @classmethod
12 | def setUpClass(cls):
13 | """Set up the environment before chDB tests."""
14 | cls.client = create_chdb_client()
15 |
16 | def test_run_chdb_select_query_simple(self):
17 | """Test running a simple SELECT query in chDB."""
18 | query = "SELECT 1 as test_value"
19 | result = run_chdb_select_query(query)
20 | self.assertIsInstance(result, list)
21 | self.assertIn("test_value", str(result))
22 |
23 | def test_run_chdb_select_query_with_url_table_function(self):
24 | """Test running a SELECT query with url table function in chDB."""
25 | query = "SELECT COUNT(1) FROM url('https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_0.parquet', 'Parquet')"
26 | result = run_chdb_select_query(query)
27 | print(result)
28 | self.assertIsInstance(result, list)
29 | self.assertIn("1000000", str(result))
30 |
31 | def test_run_chdb_select_query_failure(self):
32 | """Test running a SELECT query with an error in chDB."""
33 | query = "SELECT * FROM non_existent_table_chDB"
34 | result = run_chdb_select_query(query)
35 | print(result)
36 | self.assertIsInstance(result, dict)
37 | self.assertEqual(result["status"], "error")
38 | self.assertIn("message", result)
39 |
40 | def test_run_chdb_select_query_empty_result(self):
41 | """Test running a SELECT query that returns empty result in chDB."""
42 | query = "SELECT 1 WHERE 1 = 0"
43 | result = run_chdb_select_query(query)
44 | print(result)
45 | self.assertIsInstance(result, list)
46 | self.assertEqual(len(result), 0)
47 |
48 |
49 | if __name__ == "__main__":
50 | unittest.main()
51 |
```
--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
```yaml
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 |
9 | jobs:
10 | test:
11 | runs-on: ubuntu-latest
12 |
13 | services:
14 | clickhouse:
15 | image: clickhouse/clickhouse-server:24.10
16 | ports:
17 | - 9000:9000
18 | - 8123:8123
19 |
20 | steps:
21 | - name: Checkout repository
22 | uses: actions/checkout@v4
23 |
24 | - name: Install uv
25 | uses: astral-sh/setup-uv@v4
26 |
27 | - name: Setup Python
28 | run: uv python install 3.13
29 |
30 | - name: Install Project
31 | run: uv sync --all-extras --dev
32 |
33 | - name: Run tests
34 | env:
35 | CLICKHOUSE_HOST: "localhost"
36 | CLICKHOUSE_PORT: "8123"
37 | CLICKHOUSE_USER: "default"
38 | CLICKHOUSE_PASSWORD: ""
39 | CLICKHOUSE_SECURE: "false"
40 | CLICKHOUSE_VERIFY: "false"
41 | CHDB_ENABLED: "true"
42 | run: |
43 | uv run pytest tests
44 |
45 | - name: Lint with Ruff
46 | run: uv run ruff check .
47 |
48 | docker-build:
49 | runs-on: ubuntu-latest
50 |
51 | steps:
52 | - name: Checkout repository
53 | uses: actions/checkout@v4
54 |
55 | - name: Set up Docker Buildx
56 | uses: docker/setup-buildx-action@v3
57 |
58 | - name: Build Docker image
59 | uses: docker/build-push-action@v5
60 | with:
61 | context: .
62 | push: false
63 | load: true
64 | tags: mcp-clickhouse:test
65 | cache-from: type=gha
66 | cache-to: type=gha,mode=max
67 |
68 | - name: Test Docker image import
69 | run: |
70 | docker run --rm mcp-clickhouse:test python -c "import mcp_clickhouse; print('✅ MCP ClickHouse Docker image works!')"
71 |
72 | - name: Test Docker image default command
73 | run: |
74 | timeout 10s docker run --rm \
75 | -e CLICKHOUSE_HOST=localhost \
76 | -e CLICKHOUSE_USER=default \
77 | -e CLICKHOUSE_PASSWORD="" \
78 | mcp-clickhouse:test || [ $? = 124 ] && echo "✅ Docker container starts successfully"
79 |
```
--------------------------------------------------------------------------------
/tests/test_config_interface.py:
--------------------------------------------------------------------------------
```python
1 | import pytest
2 |
3 | from mcp_clickhouse.mcp_env import ClickHouseConfig
4 |
5 |
6 | def test_interface_http_when_secure_false(monkeypatch: pytest.MonkeyPatch):
7 | """Test that interface is set to 'http' when CLICKHOUSE_SECURE=false."""
8 | monkeypatch.setenv("CLICKHOUSE_HOST", "localhost")
9 | monkeypatch.setenv("CLICKHOUSE_USER", "test")
10 | monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
11 | monkeypatch.setenv("CLICKHOUSE_SECURE", "false")
12 | monkeypatch.setenv("CLICKHOUSE_PORT", "8123")
13 |
14 | config = ClickHouseConfig()
15 | client_config = config.get_client_config()
16 |
17 | assert client_config["interface"] == "http"
18 | assert client_config["secure"] is False
19 | assert client_config["port"] == 8123
20 |
21 |
22 | def test_interface_https_when_secure_true(monkeypatch: pytest.MonkeyPatch):
23 | """Test that interface is set to 'https' when CLICKHOUSE_SECURE=true."""
24 | monkeypatch.setenv("CLICKHOUSE_HOST", "example.com")
25 | monkeypatch.setenv("CLICKHOUSE_USER", "test")
26 | monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
27 | monkeypatch.setenv("CLICKHOUSE_SECURE", "true")
28 | monkeypatch.setenv("CLICKHOUSE_PORT", "8443")
29 |
30 | config = ClickHouseConfig()
31 | client_config = config.get_client_config()
32 |
33 | assert client_config["interface"] == "https"
34 | assert client_config["secure"] is True
35 | assert client_config["port"] == 8443
36 |
37 |
38 | def test_interface_https_by_default(monkeypatch: pytest.MonkeyPatch):
39 | """Test that interface defaults to 'https' when CLICKHOUSE_SECURE is not set."""
40 | monkeypatch.setenv("CLICKHOUSE_HOST", "example.com")
41 | monkeypatch.setenv("CLICKHOUSE_USER", "test")
42 | monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
43 | monkeypatch.delenv("CLICKHOUSE_SECURE", raising=False)
44 | monkeypatch.delenv("CLICKHOUSE_PORT", raising=False)
45 |
46 | config = ClickHouseConfig()
47 | client_config = config.get_client_config()
48 |
49 | assert client_config["interface"] == "https"
50 | assert client_config["secure"] is True
51 | assert client_config["port"] == 8443
52 |
53 |
54 | def test_interface_http_with_custom_port(monkeypatch: pytest.MonkeyPatch):
55 | """Test that interface is 'http' with custom port when CLICKHOUSE_SECURE=false."""
56 | monkeypatch.setenv("CLICKHOUSE_HOST", "localhost")
57 | monkeypatch.setenv("CLICKHOUSE_USER", "test")
58 | monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
59 | monkeypatch.setenv("CLICKHOUSE_SECURE", "false")
60 | monkeypatch.setenv("CLICKHOUSE_PORT", "9000")
61 |
62 | config = ClickHouseConfig()
63 | client_config = config.get_client_config()
64 |
65 | assert client_config["interface"] == "http"
66 | assert client_config["secure"] is False
67 | assert client_config["port"] == 9000
68 |
69 |
70 | def test_interface_https_with_custom_port(monkeypatch: pytest.MonkeyPatch):
71 | """Test that interface is 'https' with custom port when CLICKHOUSE_SECURE=true."""
72 | monkeypatch.setenv("CLICKHOUSE_HOST", "example.com")
73 | monkeypatch.setenv("CLICKHOUSE_USER", "test")
74 | monkeypatch.setenv("CLICKHOUSE_PASSWORD", "test")
75 | monkeypatch.setenv("CLICKHOUSE_SECURE", "true")
76 | monkeypatch.setenv("CLICKHOUSE_PORT", "9443")
77 |
78 | config = ClickHouseConfig()
79 | client_config = config.get_client_config()
80 |
81 | assert client_config["interface"] == "https"
82 | assert client_config["secure"] is True
83 | assert client_config["port"] == 9443
84 |
```
--------------------------------------------------------------------------------
/tests/test_tool.py:
--------------------------------------------------------------------------------
```python
1 | import unittest
2 | import json
3 |
4 | from dotenv import load_dotenv
5 | from fastmcp.exceptions import ToolError
6 |
7 | from mcp_clickhouse import create_clickhouse_client, list_databases, list_tables, run_select_query
8 |
9 | load_dotenv()
10 |
11 |
12 | class TestClickhouseTools(unittest.TestCase):
13 | @classmethod
14 | def setUpClass(cls):
15 | """Set up the environment before tests."""
16 | cls.client = create_clickhouse_client()
17 |
18 | # Prepare test database and table
19 | cls.test_db = "test_tool_db"
20 | cls.test_table = "test_table"
21 | cls.client.command(f"CREATE DATABASE IF NOT EXISTS {cls.test_db}")
22 |
23 | # Drop table if exists to ensure clean state
24 | cls.client.command(f"DROP TABLE IF EXISTS {cls.test_db}.{cls.test_table}")
25 |
26 | # Create table with comments
27 | cls.client.command(f"""
28 | CREATE TABLE {cls.test_db}.{cls.test_table} (
29 | id UInt32 COMMENT 'Primary identifier',
30 | name String COMMENT 'User name field'
31 | ) ENGINE = MergeTree()
32 | ORDER BY id
33 | COMMENT 'Test table for unit testing'
34 | """)
35 | cls.client.command(f"""
36 | INSERT INTO {cls.test_db}.{cls.test_table} (id, name) VALUES (1, 'Alice'), (2, 'Bob')
37 | """)
38 |
39 | @classmethod
40 | def tearDownClass(cls):
41 | """Clean up the environment after tests."""
42 | cls.client.command(f"DROP DATABASE IF EXISTS {cls.test_db}")
43 |
44 | def test_list_databases(self):
45 | """Test listing databases."""
46 | result = list_databases()
47 | # Parse JSON response
48 | databases = json.loads(result)
49 | self.assertIn(self.test_db, databases)
50 |
51 | def test_list_tables_without_like(self):
52 | """Test listing tables without a 'LIKE' filter."""
53 | result = list_tables(self.test_db)
54 | self.assertIsInstance(result, list)
55 | self.assertEqual(len(result), 1)
56 | self.assertEqual(result[0]["name"], self.test_table)
57 |
58 | def test_list_tables_with_like(self):
59 | """Test listing tables with a 'LIKE' filter."""
60 | result = list_tables(self.test_db, like=f"{self.test_table}%")
61 | self.assertIsInstance(result, list)
62 | self.assertEqual(len(result), 1)
63 | self.assertEqual(result[0]["name"], self.test_table)
64 |
65 | def test_run_select_query_success(self):
66 | """Test running a SELECT query successfully."""
67 | query = f"SELECT * FROM {self.test_db}.{self.test_table}"
68 | result = run_select_query(query)
69 | self.assertIsInstance(result, dict)
70 | self.assertEqual(len(result["rows"]), 2)
71 | self.assertEqual(result["rows"][0][0], 1)
72 | self.assertEqual(result["rows"][0][1], "Alice")
73 |
74 | def test_run_select_query_failure(self):
75 | """Test running a SELECT query with an error."""
76 | query = f"SELECT * FROM {self.test_db}.non_existent_table"
77 |
78 | # Should raise ToolError
79 | with self.assertRaises(ToolError) as context:
80 | run_select_query(query)
81 |
82 | self.assertIn("Query execution failed", str(context.exception))
83 |
84 | def test_table_and_column_comments(self):
85 | """Test that table and column comments are correctly retrieved."""
86 | result = list_tables(self.test_db)
87 | self.assertIsInstance(result, list)
88 | self.assertEqual(len(result), 1)
89 |
90 | table_info = result[0]
91 | # Verify table comment
92 | self.assertEqual(table_info["comment"], "Test table for unit testing")
93 |
94 | # Get columns by name for easier testing
95 | columns = {col["name"]: col for col in table_info["columns"]}
96 |
97 | # Verify column comments
98 | self.assertEqual(columns["id"]["comment"], "Primary identifier")
99 | self.assertEqual(columns["name"]["comment"], "User name field")
100 |
101 |
102 | if __name__ == "__main__":
103 | unittest.main()
104 |
```
--------------------------------------------------------------------------------
/mcp_clickhouse/chdb_prompt.py:
--------------------------------------------------------------------------------
```python
1 | """chDB prompts for MCP server."""
2 |
3 | CHDB_PROMPT = """
4 | # chDB MCP System Prompt
5 |
6 | ## Available Tools
7 | - **run_chdb_select_query**: Execute SELECT queries using chDB's table functions
8 |
9 | ## Core Principles
10 | You are a chDB assistant, specialized in helping users query data sources directly through table functions, **avoiding data imports**.
11 |
12 | ### 🚨 Important Constraints
13 | #### Data Processing Constraints
14 | - **No large data display**: Don't show more than 10 rows of raw data in responses
15 | - **Use analysis tool**: All data processing must be completed in the analysis tool
16 | - **Result-oriented output**: Only provide query results and key insights, not intermediate processing data
17 | - **Avoid context explosion**: Don't paste large amounts of raw data or complete tables
18 |
19 | #### Query Strategy Constraints
20 | - **Prioritize table functions**: When users mention import/load/insert, immediately recommend table functions
21 | - **Direct querying**: All data should be queried in place through table functions
22 | - **Fallback option**: When no suitable table function exists, use Python to download temporary files then process with file()
23 | - **Concise responses**: Avoid lengthy explanations, provide executable SQL directly
24 |
25 | ## Table Functions
26 |
27 | ### File Types
28 | ```sql
29 | -- Local files (auto format detection)
30 | file('path/to/file.csv')
31 | file('data.parquet', 'Parquet')
32 |
33 | -- Remote files
34 | url('https://example.com/data.csv', 'CSV')
35 | url('https://example.com/data.parquet')
36 |
37 | -- S3 storage
38 | s3('s3://bucket/path/file.csv', 'CSV')
39 | s3('s3://bucket/path/*.parquet', 'access_key', 'secret_key', 'Parquet')
40 |
41 | -- HDFS
42 | hdfs('hdfs://namenode:9000/path/file.parquet')
43 | ```
44 |
45 | ### Database Types
46 | ```sql
47 | -- PostgreSQL
48 | postgresql('host:port', 'database', 'table', 'user', 'password')
49 |
50 | -- MySQL
51 | mysql('host:port', 'database', 'table', 'user', 'password')
52 |
53 | -- SQLite
54 | sqlite('path/to/database.db', 'table')
55 | ```
56 |
57 | ### Common Formats
58 | - `CSV`, `CSVWithNames`, `TSV`, `TSVWithNames`
59 | - `JSON`, `JSONEachRow`, `JSONCompact`
60 | - `Parquet`, `ORC`, `Avro`
61 |
62 | ## Workflow
63 |
64 | ### 1. Identify Data Source
65 | - User mentions URL → `url()`
66 | - User mentions S3 → `s3()`
67 | - User mentions local file → `file()`
68 | - User mentions database → corresponding database function
69 | - **No suitable table function** → Use Python to download as temporary file
70 |
71 | ### 2. Fallback: Python Download
72 | When no suitable table function exists:
73 | ```python
74 | # Execute in analysis tool
75 | import requests
76 | import tempfile
77 | import os
78 |
79 | # Download data to temporary file
80 | response = requests.get('your_data_url')
81 |
82 | with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
83 | f.write(response.text)
84 | temp_file = f.name
85 |
86 | # Execute chDB query immediately within the block
87 | try:
88 | # Use run_chdb_select_query to execute query
89 | result = run_chdb_select_query(f"SELECT * FROM file('{temp_file}', 'CSV') LIMIT 10")
90 | print(result)
91 | finally:
92 | # Ensure temporary file deletion
93 | if os.path.exists(temp_file):
94 | os.unlink(temp_file)
95 | ```
96 |
97 | ### 3. Quick Testing
98 | ```sql
99 | -- Test connection (default LIMIT 10)
100 | SELECT * FROM table_function(...) LIMIT 10;
101 |
102 | -- View structure
103 | DESCRIBE table_function(...);
104 | ```
105 |
106 | ### 4. Build Queries
107 | ```sql
108 | -- Basic query (default LIMIT 10)
109 | SELECT column1, column2 FROM table_function(...) WHERE condition LIMIT 10;
110 |
111 | -- Aggregation analysis
112 | SELECT category, COUNT(*), AVG(price)
113 | FROM table_function(...)
114 | GROUP BY category
115 | LIMIT 10;
116 |
117 | -- Multi-source join
118 | SELECT a.id, b.name
119 | FROM file('data1.csv') a
120 | JOIN url('https://example.com/data2.csv', 'CSV') b ON a.id = b.id
121 | LIMIT 10;
122 | ```
123 |
124 | ## Response Patterns
125 |
126 | ### When Users Ask About Data Import
127 | 1. **Immediate stop**: "No need to import data, chDB can query directly"
128 | 2. **Recommend solution**: Provide corresponding table function based on data source type
129 | 3. **Fallback option**: If no suitable table function, explain using Python to download temporary file
130 | 4. **Provide examples**: Give specific SQL statements
131 | 5. **Follow constraints**: Complete all data processing in analysis tool, only output key results
132 |
133 | ### Example Dialogues
134 | ```
135 | User: "How to import this CSV file into chDB?"
136 | Assistant: "No need to import! Query directly:
137 | SELECT * FROM file('your_file.csv') LIMIT 10;
138 | What analysis do you want?"
139 |
140 | User: "This API endpoint doesn't have direct table function support"
141 | Assistant: "I'll use Python to download data to a temporary file, then query with file().
142 | Let me process the data in the analysis tool first..."
143 | ```
144 |
145 | ## Output Constraints
146 | - **Avoid**: Displaying large amounts of raw data, complete tables, intermediate processing steps
147 | - **Recommend**: Concise statistical summaries, key insights, executable SQL
148 | - **Interaction**: Provide overview first, ask for specific needs before deep analysis
149 |
150 | ## Optimization Tips
151 | - Use WHERE filtering to reduce data transfer
152 | - SELECT specific columns to avoid full table scans
153 | - **Default use LIMIT 10** to prevent large data output
154 | - Test connection with LIMIT 1 for large datasets first
155 | """
156 |
```
--------------------------------------------------------------------------------
/mcp_clickhouse/mcp_env.py:
--------------------------------------------------------------------------------
```python
1 | """Environment configuration for the MCP ClickHouse server.
2 |
3 | This module handles all environment variable configuration with sensible defaults
4 | and type conversion.
5 | """
6 |
7 | from dataclasses import dataclass
8 | import os
9 | from typing import Optional
10 | from enum import Enum
11 |
12 |
13 | class TransportType(str, Enum):
14 | """Supported MCP server transport types."""
15 |
16 | STDIO = "stdio"
17 | HTTP = "http"
18 | SSE = "sse"
19 |
20 | @classmethod
21 | def values(cls) -> list[str]:
22 | """Get all valid transport values."""
23 | return [transport.value for transport in cls]
24 |
25 |
26 | @dataclass
27 | class ClickHouseConfig:
28 | """Configuration for ClickHouse connection settings.
29 |
30 | This class handles all environment variable configuration with sensible defaults
31 | and type conversion. It provides typed methods for accessing each configuration value.
32 |
33 | Required environment variables (only when CLICKHOUSE_ENABLED=true):
34 | CLICKHOUSE_HOST: The hostname of the ClickHouse server
35 | CLICKHOUSE_USER: The username for authentication
36 | CLICKHOUSE_PASSWORD: The password for authentication
37 |
38 | Optional environment variables (with defaults):
39 | CLICKHOUSE_PORT: The port number (default: 8443 if secure=True, 8123 if secure=False)
40 | CLICKHOUSE_SECURE: Enable HTTPS (default: true)
41 | CLICKHOUSE_VERIFY: Verify SSL certificates (default: true)
42 | CLICKHOUSE_CONNECT_TIMEOUT: Connection timeout in seconds (default: 30)
43 | CLICKHOUSE_SEND_RECEIVE_TIMEOUT: Send/receive timeout in seconds (default: 300)
44 | CLICKHOUSE_DATABASE: Default database to use (default: None)
45 | CLICKHOUSE_PROXY_PATH: Path to be added to the host URL. For instance, for servers behind an HTTP proxy (default: None)
46 | CLICKHOUSE_ENABLED: Enable ClickHouse server (default: true)
47 | """
48 |
49 | def __init__(self):
50 | """Initialize the configuration from environment variables."""
51 | if self.enabled:
52 | self._validate_required_vars()
53 |
54 | @property
55 | def enabled(self) -> bool:
56 | """Get whether ClickHouse server is enabled.
57 |
58 | Default: True
59 | """
60 | return os.getenv("CLICKHOUSE_ENABLED", "true").lower() == "true"
61 |
62 | @property
63 | def host(self) -> str:
64 | """Get the ClickHouse host."""
65 | return os.environ["CLICKHOUSE_HOST"]
66 |
67 | @property
68 | def port(self) -> int:
69 | """Get the ClickHouse port.
70 |
71 | Defaults to 8443 if secure=True, 8123 if secure=False.
72 | Can be overridden by CLICKHOUSE_PORT environment variable.
73 | """
74 | if "CLICKHOUSE_PORT" in os.environ:
75 | return int(os.environ["CLICKHOUSE_PORT"])
76 | return 8443 if self.secure else 8123
77 |
78 | @property
79 | def username(self) -> str:
80 | """Get the ClickHouse username."""
81 | return os.environ["CLICKHOUSE_USER"]
82 |
83 | @property
84 | def password(self) -> str:
85 | """Get the ClickHouse password."""
86 | return os.environ["CLICKHOUSE_PASSWORD"]
87 |
88 | @property
89 | def database(self) -> Optional[str]:
90 | """Get the default database name if set."""
91 | return os.getenv("CLICKHOUSE_DATABASE")
92 |
93 | @property
94 | def secure(self) -> bool:
95 | """Get whether HTTPS is enabled.
96 |
97 | Default: True
98 | """
99 | return os.getenv("CLICKHOUSE_SECURE", "true").lower() == "true"
100 |
101 | @property
102 | def verify(self) -> bool:
103 | """Get whether SSL certificate verification is enabled.
104 |
105 | Default: True
106 | """
107 | return os.getenv("CLICKHOUSE_VERIFY", "true").lower() == "true"
108 |
109 | @property
110 | def connect_timeout(self) -> int:
111 | """Get the connection timeout in seconds.
112 |
113 | Default: 30
114 | """
115 | return int(os.getenv("CLICKHOUSE_CONNECT_TIMEOUT", "30"))
116 |
117 | @property
118 | def send_receive_timeout(self) -> int:
119 | """Get the send/receive timeout in seconds.
120 |
121 | Default: 300 (ClickHouse default)
122 | """
123 | return int(os.getenv("CLICKHOUSE_SEND_RECEIVE_TIMEOUT", "300"))
124 |
125 | @property
126 | def proxy_path(self) -> str:
127 | return os.getenv("CLICKHOUSE_PROXY_PATH")
128 |
129 | def get_client_config(self) -> dict:
130 | """Get the configuration dictionary for clickhouse_connect client.
131 |
132 | Returns:
133 | dict: Configuration ready to be passed to clickhouse_connect.get_client()
134 | """
135 | config = {
136 | "host": self.host,
137 | "port": self.port,
138 | "username": self.username,
139 | "password": self.password,
140 | "interface": "https" if self.secure else "http",
141 | "secure": self.secure,
142 | "verify": self.verify,
143 | "connect_timeout": self.connect_timeout,
144 | "send_receive_timeout": self.send_receive_timeout,
145 | "client_name": "mcp_clickhouse",
146 | }
147 |
148 | # Add optional database if set
149 | if self.database:
150 | config["database"] = self.database
151 |
152 | if self.proxy_path:
153 | config["proxy_path"] = self.proxy_path
154 |
155 | return config
156 |
157 | def _validate_required_vars(self) -> None:
158 | """Validate that all required environment variables are set.
159 |
160 | Raises:
161 | ValueError: If any required environment variable is missing.
162 | """
163 | missing_vars = []
164 | for var in ["CLICKHOUSE_HOST", "CLICKHOUSE_USER", "CLICKHOUSE_PASSWORD"]:
165 | if var not in os.environ:
166 | missing_vars.append(var)
167 |
168 | if missing_vars:
169 | raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
170 |
171 |
172 | @dataclass
173 | class ChDBConfig:
174 | """Configuration for chDB connection settings.
175 |
176 | This class handles all environment variable configuration with sensible defaults
177 | and type conversion. It provides typed methods for accessing each configuration value.
178 |
179 | Required environment variables:
180 | CHDB_DATA_PATH: The path to the chDB data directory (only required if CHDB_ENABLED=true)
181 | """
182 |
183 | def __init__(self):
184 | """Initialize the configuration from environment variables."""
185 | if self.enabled:
186 | self._validate_required_vars()
187 |
188 | @property
189 | def enabled(self) -> bool:
190 | """Get whether chDB is enabled.
191 |
192 | Default: False
193 | """
194 | return os.getenv("CHDB_ENABLED", "false").lower() == "true"
195 |
196 | @property
197 | def data_path(self) -> str:
198 | """Get the chDB data path."""
199 | return os.getenv("CHDB_DATA_PATH", ":memory:")
200 |
201 | def get_client_config(self) -> dict:
202 | """Get the configuration dictionary for chDB client.
203 |
204 | Returns:
205 | dict: Configuration ready to be passed to chDB client
206 | """
207 | return {
208 | "data_path": self.data_path,
209 | }
210 |
211 | def _validate_required_vars(self) -> None:
212 | """Validate that all required environment variables are set.
213 |
214 | Raises:
215 | ValueError: If any required environment variable is missing.
216 | """
217 | pass
218 |
219 |
220 | # Global instance placeholders for the singleton pattern
221 | _CONFIG_INSTANCE = None
222 | _CHDB_CONFIG_INSTANCE = None
223 |
224 |
225 | def get_config():
226 | """
227 | Gets the singleton instance of ClickHouseConfig.
228 | Instantiates it on the first call.
229 | """
230 | global _CONFIG_INSTANCE
231 | if _CONFIG_INSTANCE is None:
232 | # Instantiate the config object here, ensuring load_dotenv() has likely run
233 | _CONFIG_INSTANCE = ClickHouseConfig()
234 | return _CONFIG_INSTANCE
235 |
236 |
237 | def get_chdb_config() -> ChDBConfig:
238 | """
239 | Gets the singleton instance of ChDBConfig.
240 | Instantiates it on the first call.
241 |
242 | Returns:
243 | ChDBConfig: The chDB configuration instance
244 | """
245 | global _CHDB_CONFIG_INSTANCE
246 | if _CHDB_CONFIG_INSTANCE is None:
247 | _CHDB_CONFIG_INSTANCE = ChDBConfig()
248 | return _CHDB_CONFIG_INSTANCE
249 |
250 |
251 | @dataclass
252 | class MCPServerConfig:
253 | """Configuration for MCP server-level settings.
254 |
255 | These settings control the server transport and tool behavior and are
256 | intentionally independent of ClickHouse connection validation.
257 |
258 | Optional environment variables (with defaults):
259 | CLICKHOUSE_MCP_SERVER_TRANSPORT: "stdio", "http", or "sse" (default: stdio)
260 | CLICKHOUSE_MCP_BIND_HOST: Bind host for HTTP/SSE (default: 127.0.0.1)
261 | CLICKHOUSE_MCP_BIND_PORT: Bind port for HTTP/SSE (default: 8000)
262 | CLICKHOUSE_MCP_QUERY_TIMEOUT: SELECT tool timeout in seconds (default: 30)
263 | """
264 |
265 | @property
266 | def server_transport(self) -> str:
267 | transport = os.getenv("CLICKHOUSE_MCP_SERVER_TRANSPORT", TransportType.STDIO.value).lower()
268 | if transport not in TransportType.values():
269 | valid_options = ", ".join(f'"{t}"' for t in TransportType.values())
270 | raise ValueError(f"Invalid transport '{transport}'. Valid options: {valid_options}")
271 | return transport
272 |
273 | @property
274 | def bind_host(self) -> str:
275 | return os.getenv("CLICKHOUSE_MCP_BIND_HOST", "127.0.0.1")
276 |
277 | @property
278 | def bind_port(self) -> int:
279 | return int(os.getenv("CLICKHOUSE_MCP_BIND_PORT", "8000"))
280 |
281 | @property
282 | def query_timeout(self) -> int:
283 | return int(os.getenv("CLICKHOUSE_MCP_QUERY_TIMEOUT", "30"))
284 |
285 |
286 | _MCP_CONFIG_INSTANCE = None
287 |
288 |
289 | def get_mcp_config() -> MCPServerConfig:
290 | """Gets the singleton instance of MCPServerConfig."""
291 | global _MCP_CONFIG_INSTANCE
292 | if _MCP_CONFIG_INSTANCE is None:
293 | _MCP_CONFIG_INSTANCE = MCPServerConfig()
294 | return _MCP_CONFIG_INSTANCE
295 |
```
--------------------------------------------------------------------------------
/tests/test_mcp_server.py:
--------------------------------------------------------------------------------
```python
1 | import pytest
2 | import pytest_asyncio
3 | from fastmcp import Client
4 | from fastmcp.exceptions import ToolError
5 | import asyncio
6 | from mcp_clickhouse.mcp_server import mcp, create_clickhouse_client
7 | from dotenv import load_dotenv
8 | import json
9 |
10 | # Load environment variables
11 | load_dotenv()
12 |
13 |
14 | @pytest.fixture(scope="module")
15 | def event_loop():
16 | """Create an instance of the default event loop for the test session."""
17 | loop = asyncio.get_event_loop_policy().new_event_loop()
18 | yield loop
19 | loop.close()
20 |
21 |
22 | @pytest_asyncio.fixture(scope="module")
23 | async def setup_test_database():
24 | """Set up test database and tables before running tests."""
25 | client = create_clickhouse_client()
26 |
27 | # Test database and table names
28 | test_db = "test_mcp_db"
29 | test_table = "test_table"
30 | test_table2 = "another_test_table"
31 |
32 | # Create test database
33 | client.command(f"CREATE DATABASE IF NOT EXISTS {test_db}")
34 |
35 | # Drop tables if they exist
36 | client.command(f"DROP TABLE IF EXISTS {test_db}.{test_table}")
37 | client.command(f"DROP TABLE IF EXISTS {test_db}.{test_table2}")
38 |
39 | # Create first test table with comments
40 | client.command(f"""
41 | CREATE TABLE {test_db}.{test_table} (
42 | id UInt32 COMMENT 'Primary identifier',
43 | name String COMMENT 'User name field',
44 | age UInt8 COMMENT 'User age',
45 | created_at DateTime DEFAULT now() COMMENT 'Record creation timestamp'
46 | ) ENGINE = MergeTree()
47 | ORDER BY id
48 | COMMENT 'Test table for MCP server testing'
49 | """)
50 |
51 | # Create second test table
52 | client.command(f"""
53 | CREATE TABLE {test_db}.{test_table2} (
54 | event_id UInt64,
55 | event_type String,
56 | timestamp DateTime
57 | ) ENGINE = MergeTree()
58 | ORDER BY (event_type, timestamp)
59 | COMMENT 'Event tracking table'
60 | """)
61 |
62 | # Insert test data
63 | client.command(f"""
64 | INSERT INTO {test_db}.{test_table} (id, name, age) VALUES
65 | (1, 'Alice', 30),
66 | (2, 'Bob', 25),
67 | (3, 'Charlie', 35),
68 | (4, 'Diana', 28)
69 | """)
70 |
71 | client.command(f"""
72 | INSERT INTO {test_db}.{test_table2} (event_id, event_type, timestamp) VALUES
73 | (1001, 'login', '2024-01-01 10:00:00'),
74 | (1002, 'logout', '2024-01-01 11:00:00'),
75 | (1003, 'login', '2024-01-01 12:00:00')
76 | """)
77 |
78 | yield test_db, test_table, test_table2
79 |
80 | # Cleanup after tests
81 | client.command(f"DROP DATABASE IF EXISTS {test_db}")
82 |
83 |
84 | @pytest.fixture
85 | def mcp_server():
86 | """Return the MCP server instance for testing."""
87 | return mcp
88 |
89 |
90 | @pytest.mark.asyncio
91 | async def test_list_databases(mcp_server, setup_test_database):
92 | """Test the list_databases tool."""
93 | test_db, _, _ = setup_test_database
94 |
95 | async with Client(mcp_server) as client:
96 | result = await client.call_tool("list_databases", {})
97 |
98 | # The result should be a list containing at least one item
99 | assert len(result.content) >= 1
100 | assert isinstance(result.content[0].text, str)
101 |
102 | # Parse the result text (it's a JSON list of database names)
103 | databases = json.loads(result.content[0].text)
104 | assert test_db in databases
105 | assert "system" in databases # System database should always exist
106 |
107 |
108 | @pytest.mark.asyncio
109 | async def test_list_tables_basic(mcp_server, setup_test_database):
110 | """Test the list_tables tool without filters."""
111 | test_db, test_table, test_table2 = setup_test_database
112 |
113 | async with Client(mcp_server) as client:
114 | result = await client.call_tool("list_tables", {"database": test_db})
115 |
116 | assert len(result.content) >= 1
117 | tables = json.loads(result.content[0].text)
118 |
119 | # Should have exactly 2 tables
120 | assert len(tables) == 2
121 |
122 | # Get table names
123 | table_names = [table["name"] for table in tables]
124 | assert test_table in table_names
125 | assert test_table2 in table_names
126 |
127 | # Check table details
128 | for table in tables:
129 | assert table["database"] == test_db
130 | assert "columns" in table
131 | assert "total_rows" in table
132 | assert "engine" in table
133 | assert "comment" in table
134 |
135 | # Verify column information exists
136 | assert len(table["columns"]) > 0
137 | for column in table["columns"]:
138 | assert "name" in column
139 | assert "column_type" in column
140 | assert "comment" in column
141 |
142 |
143 | @pytest.mark.asyncio
144 | async def test_list_tables_with_like_filter(mcp_server, setup_test_database):
145 | """Test the list_tables tool with LIKE filter."""
146 | test_db, test_table, _ = setup_test_database
147 |
148 | async with Client(mcp_server) as client:
149 | # Test with LIKE filter
150 | result = await client.call_tool("list_tables", {"database": test_db, "like": "test_%"})
151 |
152 | tables_data = json.loads(result.content[0].text)
153 |
154 | # Handle both single dict and list of dicts
155 | if isinstance(tables_data, dict):
156 | tables = [tables_data]
157 | else:
158 | tables = tables_data
159 |
160 | assert len(tables) == 1
161 | assert tables[0]["name"] == test_table
162 |
163 |
164 | @pytest.mark.asyncio
165 | async def test_list_tables_with_not_like_filter(mcp_server, setup_test_database):
166 | """Test the list_tables tool with NOT LIKE filter."""
167 | test_db, _, test_table2 = setup_test_database
168 |
169 | async with Client(mcp_server) as client:
170 | # Test with NOT LIKE filter
171 | result = await client.call_tool("list_tables", {"database": test_db, "not_like": "test_%"})
172 |
173 | tables_data = json.loads(result.content[0].text)
174 |
175 | # Handle both single dict and list of dicts
176 | if isinstance(tables_data, dict):
177 | tables = [tables_data]
178 | else:
179 | tables = tables_data
180 |
181 | assert len(tables) == 1
182 | assert tables[0]["name"] == test_table2
183 |
184 |
185 | @pytest.mark.asyncio
186 | async def test_run_select_query_success(mcp_server, setup_test_database):
187 | """Test running a successful SELECT query."""
188 | test_db, test_table, _ = setup_test_database
189 |
190 | async with Client(mcp_server) as client:
191 | query = f"SELECT id, name, age FROM {test_db}.{test_table} ORDER BY id"
192 | result = await client.call_tool("run_select_query", {"query": query})
193 |
194 | query_result = json.loads(result.content[0].text)
195 |
196 | # Check structure
197 | assert "columns" in query_result
198 | assert "rows" in query_result
199 |
200 | # Check columns
201 | assert query_result["columns"] == ["id", "name", "age"]
202 |
203 | # Check rows
204 | assert len(query_result["rows"]) == 4
205 | assert query_result["rows"][0] == [1, "Alice", 30]
206 | assert query_result["rows"][1] == [2, "Bob", 25]
207 | assert query_result["rows"][2] == [3, "Charlie", 35]
208 | assert query_result["rows"][3] == [4, "Diana", 28]
209 |
210 |
211 | @pytest.mark.asyncio
212 | async def test_run_select_query_with_aggregation(mcp_server, setup_test_database):
213 | """Test running a SELECT query with aggregation."""
214 | test_db, test_table, _ = setup_test_database
215 |
216 | async with Client(mcp_server) as client:
217 | query = f"SELECT COUNT(*) as count, AVG(age) as avg_age FROM {test_db}.{test_table}"
218 | result = await client.call_tool("run_select_query", {"query": query})
219 |
220 | query_result = json.loads(result.content[0].text)
221 |
222 | assert query_result["columns"] == ["count", "avg_age"]
223 | assert len(query_result["rows"]) == 1
224 | assert query_result["rows"][0][0] == 4 # count
225 | assert query_result["rows"][0][1] == 29.5 # average age
226 |
227 |
228 | @pytest.mark.asyncio
229 | async def test_run_select_query_with_join(mcp_server, setup_test_database):
230 | """Test running a SELECT query with JOIN."""
231 | test_db, test_table, test_table2 = setup_test_database
232 |
233 | async with Client(mcp_server) as client:
234 | # Insert related data for join
235 | client_direct = create_clickhouse_client()
236 | client_direct.command(f"""
237 | INSERT INTO {test_db}.{test_table2} (event_id, event_type, timestamp) VALUES
238 | (2001, 'purchase', '2024-01-01 14:00:00')
239 | """)
240 |
241 | query = f"""
242 | SELECT
243 | COUNT(DISTINCT event_type) as event_types_count
244 | FROM {test_db}.{test_table2}
245 | """
246 | result = await client.call_tool("run_select_query", {"query": query})
247 |
248 | query_result = json.loads(result.content[0].text)
249 | assert query_result["rows"][0][0] == 3 # login, logout, purchase
250 |
251 |
252 | @pytest.mark.asyncio
253 | async def test_run_select_query_error(mcp_server, setup_test_database):
254 | """Test running a SELECT query that results in an error."""
255 | test_db, _, _ = setup_test_database
256 |
257 | async with Client(mcp_server) as client:
258 | # Query non-existent table
259 | query = f"SELECT * FROM {test_db}.non_existent_table"
260 |
261 | # Should raise ToolError
262 | with pytest.raises(ToolError) as exc_info:
263 | await client.call_tool("run_select_query", {"query": query})
264 |
265 | assert "Query execution failed" in str(exc_info.value)
266 |
267 |
268 | @pytest.mark.asyncio
269 | async def test_run_select_query_syntax_error(mcp_server):
270 | """Test running a SELECT query with syntax error."""
271 | async with Client(mcp_server) as client:
272 | # Invalid SQL syntax
273 | query = "SELECT FROM WHERE"
274 |
275 | # Should raise ToolError
276 | with pytest.raises(ToolError) as exc_info:
277 | await client.call_tool("run_select_query", {"query": query})
278 |
279 | assert "Query execution failed" in str(exc_info.value)
280 |
281 |
282 | @pytest.mark.asyncio
283 | async def test_table_metadata_details(mcp_server, setup_test_database):
284 | """Test that table metadata is correctly retrieved."""
285 | test_db, test_table, _ = setup_test_database
286 |
287 | async with Client(mcp_server) as client:
288 | result = await client.call_tool("list_tables", {"database": test_db})
289 | tables = json.loads(result.content[0].text)
290 |
291 | # Find our test table
292 | test_table_info = next(t for t in tables if t["name"] == test_table)
293 |
294 | # Check table comment
295 | assert test_table_info["comment"] == "Test table for MCP server testing"
296 |
297 | # Check engine info
298 | assert test_table_info["engine"] == "MergeTree"
299 | assert "MergeTree" in test_table_info["engine_full"]
300 |
301 | # Check row count
302 | assert test_table_info["total_rows"] == 4
303 |
304 | # Check columns and their comments
305 | columns_by_name = {col["name"]: col for col in test_table_info["columns"]}
306 |
307 | assert columns_by_name["id"]["comment"] == "Primary identifier"
308 | assert columns_by_name["id"]["column_type"] == "UInt32"
309 |
310 | assert columns_by_name["name"]["comment"] == "User name field"
311 | assert columns_by_name["name"]["column_type"] == "String"
312 |
313 | assert columns_by_name["age"]["comment"] == "User age"
314 | assert columns_by_name["age"]["column_type"] == "UInt8"
315 |
316 | assert columns_by_name["created_at"]["comment"] == "Record creation timestamp"
317 | assert columns_by_name["created_at"]["column_type"] == "DateTime"
318 | assert columns_by_name["created_at"]["default_expression"] == "now()"
319 |
320 |
321 | @pytest.mark.asyncio
322 | async def test_system_database_access(mcp_server):
323 | """Test that we can access system databases."""
324 | async with Client(mcp_server) as client:
325 | # List tables in system database
326 | result = await client.call_tool("list_tables", {"database": "system"})
327 | tables = json.loads(result.content[0].text)
328 |
329 | # System database should have many tables
330 | assert len(tables) > 10
331 |
332 | # Check for some common system tables
333 | table_names = [t["name"] for t in tables]
334 | assert "tables" in table_names
335 | assert "columns" in table_names
336 | assert "databases" in table_names
337 |
338 |
339 | @pytest.mark.asyncio
340 | async def test_concurrent_queries(mcp_server, setup_test_database):
341 | """Test running multiple queries concurrently."""
342 | test_db, test_table, test_table2 = setup_test_database
343 |
344 | async with Client(mcp_server) as client:
345 | # Run multiple queries concurrently
346 | queries = [
347 | f"SELECT COUNT(*) FROM {test_db}.{test_table}",
348 | f"SELECT COUNT(*) FROM {test_db}.{test_table2}",
349 | f"SELECT MAX(id) FROM {test_db}.{test_table}",
350 | f"SELECT MIN(event_id) FROM {test_db}.{test_table2}",
351 | ]
352 |
353 | # Execute all queries concurrently
354 | results = await asyncio.gather(
355 | *[client.call_tool("run_select_query", {"query": query}) for query in queries]
356 | )
357 |
358 | # Verify all queries succeeded
359 | assert len(results) == 4
360 |
361 | # Check each result
362 | for i, result in enumerate(results):
363 | query_result = json.loads(result.content[0].text)
364 | assert "rows" in query_result
365 | assert len(query_result["rows"]) == 1
366 |
```
--------------------------------------------------------------------------------
/mcp_clickhouse/mcp_server.py:
--------------------------------------------------------------------------------
```python
1 | import logging
2 | import json
3 | from typing import Optional, List, Any
4 | import concurrent.futures
5 | import atexit
6 | import os
7 |
8 | import clickhouse_connect
9 | import chdb.session as chs
10 | from clickhouse_connect.driver.binding import format_query_value
11 | from dotenv import load_dotenv
12 | from fastmcp import FastMCP
13 | from fastmcp.tools import Tool
14 | from fastmcp.prompts import Prompt
15 | from fastmcp.exceptions import ToolError
16 | from dataclasses import dataclass, field, asdict, is_dataclass
17 | from starlette.requests import Request
18 | from starlette.responses import PlainTextResponse
19 |
20 | from mcp_clickhouse.mcp_env import get_config, get_chdb_config, get_mcp_config
21 | from mcp_clickhouse.chdb_prompt import CHDB_PROMPT
22 |
23 |
24 | @dataclass
25 | class Column:
26 | database: str
27 | table: str
28 | name: str
29 | column_type: str
30 | default_kind: Optional[str]
31 | default_expression: Optional[str]
32 | comment: Optional[str]
33 |
34 |
35 | @dataclass
36 | class Table:
37 | database: str
38 | name: str
39 | engine: str
40 | create_table_query: str
41 | dependencies_database: str
42 | dependencies_table: str
43 | engine_full: str
44 | sorting_key: str
45 | primary_key: str
46 | total_rows: int
47 | total_bytes: int
48 | total_bytes_uncompressed: int
49 | parts: int
50 | active_parts: int
51 | total_marks: int
52 | comment: Optional[str] = None
53 | columns: List[Column] = field(default_factory=list)
54 |
55 |
56 | MCP_SERVER_NAME = "mcp-clickhouse"
57 |
58 | # Configure logging
59 | logging.basicConfig(
60 | level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
61 | )
62 | logger = logging.getLogger(MCP_SERVER_NAME)
63 |
64 | QUERY_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=10)
65 | atexit.register(lambda: QUERY_EXECUTOR.shutdown(wait=True))
66 |
67 | load_dotenv()
68 |
69 | mcp = FastMCP(name=MCP_SERVER_NAME)
70 |
71 |
72 | @mcp.custom_route("/health", methods=["GET"])
73 | async def health_check(request: Request) -> PlainTextResponse:
74 | """Health check endpoint for monitoring server status.
75 |
76 | Returns OK if the server is running and can connect to ClickHouse.
77 | """
78 | try:
79 | # Check if ClickHouse is enabled by trying to create config
80 | # If ClickHouse is disabled, this will succeed but connection will fail
81 | clickhouse_enabled = os.getenv("CLICKHOUSE_ENABLED", "true").lower() == "true"
82 |
83 | if not clickhouse_enabled:
84 | # If ClickHouse is disabled, check chDB status
85 | chdb_config = get_chdb_config()
86 | if chdb_config.enabled:
87 | return PlainTextResponse("OK - MCP server running with chDB enabled")
88 | else:
89 | # Both ClickHouse and chDB are disabled - this is an error
90 | return PlainTextResponse(
91 | "ERROR - Both ClickHouse and chDB are disabled. At least one must be enabled.",
92 | status_code=503,
93 | )
94 |
95 | # Try to create a client connection to verify ClickHouse connectivity
96 | client = create_clickhouse_client()
97 | version = client.server_version
98 | return PlainTextResponse(f"OK - Connected to ClickHouse {version}")
99 | except Exception as e:
100 | # Return 503 Service Unavailable if we can't connect to ClickHouse
101 | return PlainTextResponse(f"ERROR - Cannot connect to ClickHouse: {str(e)}", status_code=503)
102 |
103 |
104 | def result_to_table(query_columns, result) -> List[Table]:
105 | return [Table(**dict(zip(query_columns, row))) for row in result]
106 |
107 |
108 | def result_to_column(query_columns, result) -> List[Column]:
109 | return [Column(**dict(zip(query_columns, row))) for row in result]
110 |
111 |
112 | def to_json(obj: Any) -> str:
113 | if is_dataclass(obj):
114 | return json.dumps(asdict(obj), default=to_json)
115 | elif isinstance(obj, list):
116 | return [to_json(item) for item in obj]
117 | elif isinstance(obj, dict):
118 | return {key: to_json(value) for key, value in obj.items()}
119 | return obj
120 |
121 |
122 | def list_databases():
123 | """List available ClickHouse databases"""
124 | logger.info("Listing all databases")
125 | client = create_clickhouse_client()
126 | result = client.command("SHOW DATABASES")
127 |
128 | # Convert newline-separated string to list and trim whitespace
129 | if isinstance(result, str):
130 | databases = [db.strip() for db in result.strip().split("\n")]
131 | else:
132 | databases = [result]
133 |
134 | logger.info(f"Found {len(databases)} databases")
135 | return json.dumps(databases)
136 |
137 |
138 | def list_tables(database: str, like: Optional[str] = None, not_like: Optional[str] = None):
139 | """List available ClickHouse tables in a database, including schema, comment,
140 | row count, and column count."""
141 | logger.info(f"Listing tables in database '{database}'")
142 | client = create_clickhouse_client()
143 | query = f"SELECT database, name, engine, create_table_query, dependencies_database, dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes, total_bytes_uncompressed, parts, active_parts, total_marks, comment FROM system.tables WHERE database = {format_query_value(database)}"
144 | if like:
145 | query += f" AND name LIKE {format_query_value(like)}"
146 |
147 | if not_like:
148 | query += f" AND name NOT LIKE {format_query_value(not_like)}"
149 |
150 | result = client.query(query)
151 |
152 | # Deserialize result as Table dataclass instances
153 | tables = result_to_table(result.column_names, result.result_rows)
154 |
155 | for table in tables:
156 | column_data_query = f"SELECT database, table, name, type AS column_type, default_kind, default_expression, comment FROM system.columns WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}"
157 | column_data_query_result = client.query(column_data_query)
158 | table.columns = [
159 | c
160 | for c in result_to_column(
161 | column_data_query_result.column_names,
162 | column_data_query_result.result_rows,
163 | )
164 | ]
165 |
166 | logger.info(f"Found {len(tables)} tables")
167 | return [asdict(table) for table in tables]
168 |
169 |
170 | def execute_query(query: str):
171 | client = create_clickhouse_client()
172 | try:
173 | read_only = get_readonly_setting(client)
174 | res = client.query(query, settings={"readonly": read_only})
175 | logger.info(f"Query returned {len(res.result_rows)} rows")
176 | return {"columns": res.column_names, "rows": res.result_rows}
177 | except Exception as err:
178 | logger.error(f"Error executing query: {err}")
179 | raise ToolError(f"Query execution failed: {str(err)}")
180 |
181 |
182 | def run_select_query(query: str):
183 | """Run a SELECT query in a ClickHouse database"""
184 | logger.info(f"Executing SELECT query: {query}")
185 | try:
186 | future = QUERY_EXECUTOR.submit(execute_query, query)
187 | try:
188 | timeout_secs = get_mcp_config().query_timeout
189 | result = future.result(timeout=timeout_secs)
190 | # Check if we received an error structure from execute_query
191 | if isinstance(result, dict) and "error" in result:
192 | logger.warning(f"Query failed: {result['error']}")
193 | # MCP requires structured responses; string error messages can cause
194 | # serialization issues leading to BrokenResourceError
195 | return {
196 | "status": "error",
197 | "message": f"Query failed: {result['error']}",
198 | }
199 | return result
200 | except concurrent.futures.TimeoutError:
201 | logger.warning(f"Query timed out after {timeout_secs} seconds: {query}")
202 | future.cancel()
203 | raise ToolError(f"Query timed out after {timeout_secs} seconds")
204 | except ToolError:
205 | raise
206 | except Exception as e:
207 | logger.error(f"Unexpected error in run_select_query: {str(e)}")
208 | raise RuntimeError(f"Unexpected error during query execution: {str(e)}")
209 |
210 |
211 | def create_clickhouse_client():
212 | client_config = get_config().get_client_config()
213 | logger.info(
214 | f"Creating ClickHouse client connection to {client_config['host']}:{client_config['port']} "
215 | f"as {client_config['username']} "
216 | f"(secure={client_config['secure']}, verify={client_config['verify']}, "
217 | f"connect_timeout={client_config['connect_timeout']}s, "
218 | f"send_receive_timeout={client_config['send_receive_timeout']}s)"
219 | )
220 |
221 | try:
222 | client = clickhouse_connect.get_client(**client_config)
223 | # Test the connection
224 | version = client.server_version
225 | logger.info(f"Successfully connected to ClickHouse server version {version}")
226 | return client
227 | except Exception as e:
228 | logger.error(f"Failed to connect to ClickHouse: {str(e)}")
229 | raise
230 |
231 |
232 | def get_readonly_setting(client) -> str:
233 | """Get the appropriate readonly setting value to use for queries.
234 |
235 | This function handles potential conflicts between server and client readonly settings:
236 | - readonly=0: No read-only restrictions
237 | - readonly=1: Only read queries allowed, settings cannot be changed
238 | - readonly=2: Only read queries allowed, settings can be changed (except readonly itself)
239 |
240 | If server has readonly=2 and client tries to set readonly=1, it would cause:
241 | "Setting readonly is unknown or readonly" error
242 |
243 | This function preserves the server's readonly setting unless it's 0, in which case
244 | we enforce readonly=1 to ensure queries are read-only.
245 |
246 | Args:
247 | client: ClickHouse client connection
248 |
249 | Returns:
250 | String value of readonly setting to use
251 | """
252 | read_only = client.server_settings.get("readonly")
253 | if read_only:
254 | if read_only == "0":
255 | return "1" # Force read-only mode if server has it disabled
256 | else:
257 | return read_only.value # Respect server's readonly setting (likely 2)
258 | else:
259 | return "1" # Default to basic read-only mode if setting isn't present
260 |
261 |
262 | def create_chdb_client():
263 | """Create a chDB client connection."""
264 | if not get_chdb_config().enabled:
265 | raise ValueError("chDB is not enabled. Set CHDB_ENABLED=true to enable it.")
266 | return _chdb_client
267 |
268 |
269 | def execute_chdb_query(query: str):
270 | """Execute a query using chDB client."""
271 | client = create_chdb_client()
272 | try:
273 | res = client.query(query, "JSON")
274 | if res.has_error():
275 | error_msg = res.error_message()
276 | logger.error(f"Error executing chDB query: {error_msg}")
277 | return {"error": error_msg}
278 |
279 | result_data = res.data()
280 | if not result_data:
281 | return []
282 |
283 | result_json = json.loads(result_data)
284 |
285 | return result_json.get("data", [])
286 |
287 | except Exception as err:
288 | logger.error(f"Error executing chDB query: {err}")
289 | return {"error": str(err)}
290 |
291 |
292 | def run_chdb_select_query(query: str):
293 | """Run SQL in chDB, an in-process ClickHouse engine"""
294 | logger.info(f"Executing chDB SELECT query: {query}")
295 | try:
296 | future = QUERY_EXECUTOR.submit(execute_chdb_query, query)
297 | try:
298 | timeout_secs = get_mcp_config().query_timeout
299 | result = future.result(timeout=timeout_secs)
300 | # Check if we received an error structure from execute_chdb_query
301 | if isinstance(result, dict) and "error" in result:
302 | logger.warning(f"chDB query failed: {result['error']}")
303 | return {
304 | "status": "error",
305 | "message": f"chDB query failed: {result['error']}",
306 | }
307 | return result
308 | except concurrent.futures.TimeoutError:
309 | logger.warning(
310 | f"chDB query timed out after {timeout_secs} seconds: {query}"
311 | )
312 | future.cancel()
313 | return {
314 | "status": "error",
315 | "message": f"chDB query timed out after {timeout_secs} seconds",
316 | }
317 | except Exception as e:
318 | logger.error(f"Unexpected error in run_chdb_select_query: {e}")
319 | return {"status": "error", "message": f"Unexpected error: {e}"}
320 |
321 |
322 | def chdb_initial_prompt() -> str:
323 | """This prompt helps users understand how to interact and perform common operations in chDB"""
324 | return CHDB_PROMPT
325 |
326 |
327 | def _init_chdb_client():
328 | """Initialize the global chDB client instance."""
329 | try:
330 | if not get_chdb_config().enabled:
331 | logger.info("chDB is disabled, skipping client initialization")
332 | return None
333 |
334 | client_config = get_chdb_config().get_client_config()
335 | data_path = client_config["data_path"]
336 | logger.info(f"Creating chDB client with data_path={data_path}")
337 | client = chs.Session(path=data_path)
338 | logger.info(f"Successfully connected to chDB with data_path={data_path}")
339 | return client
340 | except Exception as e:
341 | logger.error(f"Failed to initialize chDB client: {e}")
342 | return None
343 |
344 |
345 | # Register tools based on configuration
346 | if os.getenv("CLICKHOUSE_ENABLED", "true").lower() == "true":
347 | mcp.add_tool(Tool.from_function(list_databases))
348 | mcp.add_tool(Tool.from_function(list_tables))
349 | mcp.add_tool(Tool.from_function(run_select_query))
350 | logger.info("ClickHouse tools registered")
351 |
352 |
353 | if os.getenv("CHDB_ENABLED", "false").lower() == "true":
354 | _chdb_client = _init_chdb_client()
355 | if _chdb_client:
356 | atexit.register(lambda: _chdb_client.close())
357 |
358 | mcp.add_tool(Tool.from_function(run_chdb_select_query))
359 | chdb_prompt = Prompt.from_function(
360 | chdb_initial_prompt,
361 | name="chdb_initial_prompt",
362 | description="This prompt helps users understand how to interact and perform common operations in chDB",
363 | )
364 | mcp.add_prompt(chdb_prompt)
365 | logger.info("chDB tools and prompts registered")
366 |
```