# Directory Structure
```
├── .dockerignore
├── .github
│ ├── CODEOWNERS
│ ├── dependabot.yml
│ └── workflows
│ ├── contributors-list.yml
│ ├── test.yml
│ ├── trunk_check.yml
│ └── trunk_upgrade.yml
├── .gitignore
├── .python-version
├── .trunk
│ ├── .gitignore
│ ├── configs
│ │ ├── .checkov.yml
│ │ ├── .isort.cfg
│ │ ├── .markdownlint.yaml
│ │ ├── .shellcheckrc
│ │ ├── .yamllint.yaml
│ │ └── ruff.toml
│ └── trunk.yaml
├── config.yml.template
├── dev
│ ├── build.sh
│ ├── clean.sh
│ ├── publish.sh
│ ├── setup.sh
│ └── test_python.sh
├── Dockerfile
├── docs
│ └── img
│ └── archirecture.png
├── LICENSE
├── Makefile
├── pyproject.toml
├── README.md
├── requirements.setup.txt
├── src
│ ├── mcp_vertexai_search
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── agent.py
│ │ ├── cli.py
│ │ ├── config.py
│ │ ├── google_cloud.py
│ │ ├── server.py
│ │ └── utils.py
│ └── research_agent
│ ├── __init__.py
│ ├── chat.py
│ ├── mcp_client.py
│ └── utils.py
├── tests
│ ├── __init__.py
│ ├── test_config.py
│ └── test_utils.py
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
```
1 | 3.12
2 |
```
--------------------------------------------------------------------------------
/.trunk/configs/.isort.cfg:
--------------------------------------------------------------------------------
```
1 | [settings]
2 | profile=black
3 |
```
--------------------------------------------------------------------------------
/.trunk/configs/.checkov.yml:
--------------------------------------------------------------------------------
```yaml
1 | skip-check:
2 | - CKV2_GHA_1
3 |
```
--------------------------------------------------------------------------------
/.trunk/.gitignore:
--------------------------------------------------------------------------------
```
1 | *out
2 | *logs
3 | *actions
4 | *notifications
5 | *tools
6 | plugins
7 | user_trunk.yaml
8 | user.yaml
9 | tmp
10 |
```
--------------------------------------------------------------------------------
/.trunk/configs/.markdownlint.yaml:
--------------------------------------------------------------------------------
```yaml
1 | # Prettier friendly markdownlint config (all formatting rules disabled)
2 | extends: markdownlint/style/prettier
3 |
```
--------------------------------------------------------------------------------
/.trunk/configs/.yamllint.yaml:
--------------------------------------------------------------------------------
```yaml
1 | rules:
2 | quoted-strings:
3 | required: only-when-needed
4 | extra-allowed: ["{|}"]
5 | key-duplicates: {}
6 | octal-values:
7 | forbid-implicit-octal: true
8 |
```
--------------------------------------------------------------------------------
/.trunk/configs/.shellcheckrc:
--------------------------------------------------------------------------------
```
1 | enable=all
2 | source-path=SCRIPTDIR
3 | disable=SC2154
4 |
5 | # If you're having issues with shellcheck following source, disable the errors via:
6 | # disable=SC1090
7 | # disable=SC1091
8 |
```
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
```
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 |
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 |
143 | # Rope project settings
144 | .ropeproject
145 |
146 | # mkdocs documentation
147 | /site
148 |
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 |
154 | # Pyre type checker
155 | .pyre/
156 |
157 | # pytype static type analyzer
158 | .pytype/
159 |
160 | # Cython debug symbols
161 | cython_debug/
162 |
163 | # PyCharm
164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | # and can be added to the global gitignore or merged into this file. For a more nuclear
167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 |
170 | # PyPI configuration file
171 | .pypirc
172 |
```
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 |
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 |
143 | # Rope project settings
144 | .ropeproject
145 |
146 | # mkdocs documentation
147 | /site
148 |
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 |
154 | # Pyre type checker
155 | .pyre/
156 |
157 | # pytype static type analyzer
158 | .pytype/
159 |
160 | # Cython debug symbols
161 | cython_debug/
162 |
163 | # PyCharm
164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | # and can be added to the global gitignore or merged into this file. For a more nuclear
167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 |
170 | # PyPI configuration file
171 | .pypirc
172 |
173 | # server config
174 | config.yml
175 |
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # MCP Server for Vertex AI Search
2 |
3 | This is a MCP server to search documents using Vertex AI.
4 |
5 | ## Architecture
6 |
7 | This solution uses Gemini with Vertex AI grounding to search documents using your private data.
8 | Grounding improves the quality of search results by grounding Gemini's responses in your data stored in Vertex AI Datastore.
9 | We can integrate one or multiple Vertex AI data stores to the MCP server.
10 | For more details on grounding, refer to [Vertex AI Grounding Documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/ground-with-your-data).
11 |
12 | 
13 |
14 | ## How to use
15 |
16 | There are two ways to use this MCP server.
17 | If you want to run this on Docker, the first approach would be good as Dockerfile is provided in the project.
18 |
19 | ### 1. Clone the repository
20 |
21 | ```shell
22 | # Clone the repository
23 | git clone [email protected]:ubie-oss/mcp-vertexai-search.git
24 |
25 | # Create a virtual environment
26 | uv venv
27 | # Install the dependencies
28 | uv sync --all-extras
29 |
30 | # Check the command
31 | uv run mcp-vertexai-search
32 | ```
33 |
34 | ### Install the python package
35 |
36 | The package isn't published to PyPI yet, but we can install it from the repository.
37 | We need a config file derives from [config.yml.template](./config.yml.template) to run the MCP server, because the python package doesn't include the config template.
38 | Please refer to [Appendix A: Config file](#appendix-a-config-file) for the details of the config file.
39 |
40 | ```shell
41 | # Install the package
42 | pip install git+https://github.com/ubie-oss/mcp-vertexai-search.git
43 |
44 | # Check the command
45 | mcp-vertexai-search --help
46 | ```
47 |
48 | ## Development
49 |
50 | ### Prerequisites
51 |
52 | - [uv](https://docs.astral.sh/uv/getting-started/installation/)
53 | - Vertex AI data store
54 | - Please look into [the official documentation about data stores](https://cloud.google.com/generative-ai-app-builder/docs/create-datastore-ingest) for more information
55 |
56 | ### Set up Local Environment
57 |
58 | ```shell
59 | # Optional: Install uv
60 | python -m pip install -r requirements.setup.txt
61 |
62 | # Create a virtual environment
63 | uv venv
64 | uv sync --all-extras
65 | ```
66 |
67 | ### Run the MCP server
68 |
69 | This supports two transports for SSE (Server-Sent Events) and stdio (Standard Input Output).
70 | We can control the transport by setting the `--transport` flag.
71 |
72 | We can configure the MCP server with a YAML file.
73 | [config.yml.template](./config.yml.template) is a template for the config file.
74 | Please modify the config file to fit your needs.
75 |
76 | ```bash
77 | uv run mcp-vertexai-search serve \
78 | --config config.yml \
79 | --transport <stdio|sse>
80 | ```
81 |
82 | ### Test the Vertex AI Search
83 |
84 | We can test the Vertex AI Search by using the `mcp-vertexai-search search` command without the MCP server.
85 |
86 | ```bash
87 | uv run mcp-vertexai-search search \
88 | --config config.yml \
89 | --query <your-query>
90 | ```
91 |
92 | ## Appendix A: Config file
93 |
94 | [config.yml.template](./config.yml.template) is a template for the config file.
95 |
96 | - `server`
97 | - `server.name`: The name of the MCP server
98 | - `model`
99 | - `model.model_name`: The name of the Vertex AI model
100 | - `model.project_id`: The project ID of the Vertex AI model
101 | - `model.location`: The location of the model (e.g. us-central1)
102 | - `model.impersonate_service_account`: The service account to impersonate
103 | - `model.generate_content_config`: The configuration for the generate content API
104 | - `data_stores`: The list of Vertex AI data stores
105 | - `data_stores.project_id`: The project ID of the Vertex AI data store
106 | - `data_stores.location`: The location of the Vertex AI data store (e.g. us)
107 | - `data_stores.datastore_id`: The ID of the Vertex AI data store
108 | - `data_stores.tool_name`: The name of the tool
109 | - `data_stores.description`: The description of the Vertex AI data store
110 |
```
--------------------------------------------------------------------------------
/src/mcp_vertexai_search/__init__.py:
--------------------------------------------------------------------------------
```python
1 |
```
--------------------------------------------------------------------------------
/src/research_agent/__init__.py:
--------------------------------------------------------------------------------
```python
1 |
```
--------------------------------------------------------------------------------
/requirements.setup.txt:
--------------------------------------------------------------------------------
```
1 | uv>=0.6
2 |
```
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
```python
1 | __version__ = "0.0.1"
2 |
```
--------------------------------------------------------------------------------
/dev/build.sh:
--------------------------------------------------------------------------------
```bash
1 | #!/bin/bash
2 | set -Eo pipefail
3 |
4 | uv build
5 |
```
--------------------------------------------------------------------------------
/src/mcp_vertexai_search/__main__.py:
--------------------------------------------------------------------------------
```python
1 | from mcp_vertexai_search.cli import serve
2 |
3 | serve()
4 |
```
--------------------------------------------------------------------------------
/.trunk/configs/ruff.toml:
--------------------------------------------------------------------------------
```toml
1 | # Generic, formatter-friendly config.
2 | select = ["B", "D3", "E", "F"]
3 |
4 | # Never enforce `E501` (line length violations). This should be handled by formatters.
5 | ignore = ["E501"]
6 |
```
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
```dockerfile
1 | FROM python:3.12-slim
2 |
3 | WORKDIR /app
4 |
5 | COPY requirements.setup.txt pyproject.toml uv.lock /app/
6 | RUN python -m pip install --no-cache-dir -r requirements.setup.txt \
7 | && uv venv \
8 | && uv sync
9 |
10 |
11 | COPY . /app
12 |
13 | ENTRYPOINT ["uv", "run", "mcp-vertexai-search"]
14 |
```
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
```python
1 | import unittest
2 |
3 | from mcp_vertexai_search.utils import to_mcp_tool
4 |
5 |
6 | class TestUtils(unittest.TestCase):
7 | def test_to_mcp_tool(self):
8 | tool = to_mcp_tool("test-tool", "test-description")
9 | self.assertEqual(tool.name, "test-tool")
10 | self.assertEqual(tool.description, "test-description")
11 |
```
--------------------------------------------------------------------------------
/.github/workflows/contributors-list.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: Generate contributors list
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | # SEE https://github.com/marketplace/actions/contribute-list
10 | contrib-readme-job:
11 | runs-on: ubuntu-latest
12 | name: A job to automate contrib in readme
13 | steps:
14 | - name: Contribute List
15 | uses: akhilmhdh/[email protected]
16 | env:
17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 |
```
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
```yaml
1 | # See GitHub's documentation for more information on this file:
2 | # https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates
3 | version: 2
4 | updates:
5 | - package-ecosystem: github-actions
6 | directory: /
7 | schedule:
8 | interval: weekly
9 | # - package-ecosystem: pip
10 | # directory: /
11 | # schedule:
12 | # interval: weekly
13 |
```
--------------------------------------------------------------------------------
/.github/workflows/trunk_check.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: Trunk Check
2 |
3 | on:
4 | pull_request:
5 | workflow_dispatch:
6 |
7 | concurrency:
8 | group: ${{ github.head_ref || github.run_id }}
9 | cancel-in-progress: true
10 |
11 | permissions: read-all
12 |
13 | jobs:
14 | trunk_check:
15 | name: Trunk Check Runner
16 | runs-on: ubuntu-latest
17 | permissions:
18 | checks: write # For trunk to post annotations
19 | contents: read # For repo checkout
20 |
21 | steps:
22 | - name: Checkout
23 | uses: actions/checkout@v5
24 |
25 | - name: Trunk Check
26 | uses: trunk-io/trunk-action@v1
27 |
```
--------------------------------------------------------------------------------
/dev/publish.sh:
--------------------------------------------------------------------------------
```bash
1 | set -Eo pipefail
2 | set -x
3 |
4 | # Constants
5 | SCRIPT_FILE="$(readlink -f "$0")"
6 | SCRIPT_DIR="$(dirname "${SCRIPT_FILE}")"
7 | MODULE_DIR="$(dirname "${SCRIPT_DIR}")"
8 |
9 | cd "${MODULE_DIR}" || exit
10 |
11 | # Arguments
12 | target=${1:?"target is not set"}
13 |
14 | # Ensure uv is installed
15 | pip install uv
16 |
17 | # Build the package first
18 | uv build
19 |
20 | # Publish to the specified target
21 | if [[ ${target} == "pypi" ]]; then
22 | uv publish
23 | elif [[ ${target} == "testpypi" ]]; then
24 | uv publish --publish-url "https://test.pypi.org/legacy/"
25 | else
26 | echo "No such target ${target}"
27 | exit 1
28 | fi
29 |
```
--------------------------------------------------------------------------------
/.github/workflows/trunk_upgrade.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: Upgrade Trunk
2 |
3 | on:
4 | workflow_dispatch: {}
5 | schedule:
6 | # Runs the first day of every month (in the UTC timezone)
7 | - cron: 0 0 1 * *
8 |
9 | permissions: read-all
10 |
11 | jobs:
12 | trunk_upgrade:
13 | name: Upgrade Trunk
14 | runs-on: ubuntu-latest
15 | permissions:
16 | contents: write # For trunk to create PRs
17 | pull-requests: write # For trunk to create PRs
18 | steps:
19 | - name: Checkout
20 | uses: actions/checkout@v5
21 | # >>> Install your own deps here (npm install, etc) <<<
22 | # SEE https://github.com/trunk-io/trunk-action
23 | - name: Trunk Upgrade
24 | uses: trunk-io/trunk-action/upgrade@v1
25 | with:
26 | signoff: true
27 |
```
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: Test python
2 |
3 | on:
4 | pull_request:
5 | paths:
6 | - .github/workflows/test.yml
7 | - pyproject.toml
8 | - dbt_artifacts_parser/**/*.py
9 | - tests/**/*.py
10 | - pylintrc
11 | push:
12 | branches:
13 | - main
14 |
15 | jobs:
16 | test:
17 | runs-on: ubuntu-latest
18 | strategy:
19 | matrix:
20 | python-version: ["3.11", "3.12"]
21 | fail-fast: false
22 |
23 | defaults:
24 | run:
25 | shell: bash
26 |
27 | steps:
28 | - uses: actions/checkout@v5
29 | - name: Set up Python
30 | uses: actions/setup-python@v6
31 | with:
32 | python-version: ${{ matrix.python-version }}
33 | - name: Install dependencies
34 | run: |
35 | python -m pip install -r requirements.setup.txt
36 | bash dev/setup.sh --deps "development"
37 | - name: Run tests
38 | run: bash dev/test_python.sh
39 | - name: Test build
40 | run: |
41 | bash dev/build.sh
42 |
```
--------------------------------------------------------------------------------
/dev/test_python.sh:
--------------------------------------------------------------------------------
```bash
1 | #!/bin/bash
2 | # Licensed to the Apache Software Foundation (ASF) under one or more
3 | # contributor license agreements. See the NOTICE file distributed with
4 | # this work for additional information regarding copyright ownership.
5 | # The ASF licenses this file to You under the Apache License, Version 2.0
6 | # (the "License"); you may not use this file except in compliance with
7 | # the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | set -Eeuo pipefail
17 |
18 | # Constants
19 | SCRIPT_FILE="$(readlink -f "$0")"
20 | SCRIPT_DIR="$(dirname "${SCRIPT_FILE}")"
21 | MODULE_DIR="$(dirname "${SCRIPT_DIR}")"
22 |
23 | pytest -v -s --cache-clear "${MODULE_DIR}/tests"
24 |
```
--------------------------------------------------------------------------------
/src/research_agent/utils.py:
--------------------------------------------------------------------------------
```python
1 | from google import genai
2 | from google.genai import types as genai_types
3 | from mcp import types as mcp_types
4 |
5 |
6 | def to_gemini_tool(mcp_tool: mcp_types.Tool) -> genai_types.Tool:
7 | """
8 | Converts an MCP tool schema to a Gemini tool.
9 |
10 | Args:
11 | name: The name of the tool.
12 | description: The description of the tool.
13 | input_schema: The input schema of the tool.
14 |
15 | Returns:
16 | A Gemini tool.
17 | """
18 | required_params: list[str] = mcp_tool.inputSchema.get("required", [])
19 | properties = {}
20 | for key, value in mcp_tool.inputSchema.get("properties", {}).items():
21 | schema_dict = {
22 | "type": value.get("type", "STRING").upper(),
23 | "description": value.get("description", ""),
24 | }
25 | properties[key] = genai_types.Schema(**schema_dict)
26 |
27 | function = genai.types.FunctionDeclaration(
28 | name=mcp_tool.name,
29 | description=mcp_tool.description,
30 | parameters=genai.types.Schema(
31 | type="OBJECT",
32 | properties=properties,
33 | required=required_params,
34 | ),
35 | )
36 | return genai_types.Tool(function_declarations=[function])
37 |
```
--------------------------------------------------------------------------------
/dev/clean.sh:
--------------------------------------------------------------------------------
```bash
1 | #!/usr/bin/env bash
2 | # Licensed to the Apache Software Foundation (ASF) under one or more
3 | # contributor license agreements. See the NOTICE file distributed with
4 | # this work for additional information regarding copyright ownership.
5 | # The ASF licenses this file to You under the Apache License, Version 2.0
6 | # (the "License"); you may not use this file except in compliance with
7 | # the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | set -e
17 | set -x
18 |
19 | # Constants
20 | SCRIPT_FILE="$(readlink -f "$0")"
21 | SCRIPT_DIR="$(dirname "${SCRIPT_FILE}")"
22 | MODULE_DIR="$(dirname "${SCRIPT_DIR}")"
23 |
24 | cleaned_dirs=(
25 | dist
26 | sdist
27 | .pytest_cache
28 | )
29 |
30 | for cleaned_dir in "${cleaned_dirs[@]}"; do
31 | if [[ -d "${MODULE_DIR}/${cleaned_dir}" ]]; then
32 | rm -r "${MODULE_DIR:?}/${cleaned_dir}"
33 | fi
34 | done
35 |
```
--------------------------------------------------------------------------------
/.trunk/trunk.yaml:
--------------------------------------------------------------------------------
```yaml
1 | # This file controls the behavior of Trunk: https://docs.trunk.io/cli
2 | # To learn more about the format of this file, see https://docs.trunk.io/reference/trunk-yaml
3 | version: 0.1
4 | cli:
5 | version: 1.24.0
6 | # Trunk provides extensibility via plugins. (https://docs.trunk.io/plugins)
7 | plugins:
8 | sources:
9 | - id: trunk
10 | ref: v1.7.0
11 | uri: https://github.com/trunk-io/plugins
12 | # Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes)
13 | runtimes:
14 | enabled:
15 | - [email protected]
16 | - [email protected]
17 | - [email protected]
18 | # This is the section where you manage your linters. (https://docs.trunk.io/check/configuration)
19 | lint:
20 | disabled:
21 | - black
22 | enabled:
23 | - [email protected]
24 | - [email protected]
25 | - [email protected]
26 | - [email protected]
27 | - [email protected]
28 | - [email protected]
29 | - [email protected]
30 | - [email protected]
31 | - [email protected]
32 | - [email protected]
33 | - [email protected]
34 | - [email protected]
35 | - [email protected]
36 | - git-diff-check
37 | - [email protected]
38 | - [email protected]
39 | - [email protected]
40 | actions:
41 | enabled:
42 | - trunk-announce
43 | - trunk-check-pre-push
44 | - trunk-fmt-pre-commit
45 | - trunk-upgrade-available
46 |
```
--------------------------------------------------------------------------------
/dev/setup.sh:
--------------------------------------------------------------------------------
```bash
1 | # Constants
2 | SCRIPT_FILE="$(readlink -f "$0")"
3 | SCRIPT_DIR="$(dirname "${SCRIPT_FILE}")"
4 | MODULE_DIR="$(dirname "${SCRIPT_DIR}")"
5 |
6 | # Arguments
7 | deps="production"
8 | use_venv=false
9 | while (($# > 0)); do
10 | if [[ $1 == "--use-venv" ]]; then
11 | use_venv=true
12 | shift 1
13 | elif [[ $1 == "--deps" ]]; then
14 | if [[ $2 != "production" && $2 != "development" ]]; then
15 | echo "Error: deps must be one of 'production' or 'development'"
16 | exit 1
17 | fi
18 | deps="$2"
19 | shift 2
20 | else
21 | echo "Unknown argument: $1"
22 | exit 1
23 | fi
24 | done
25 |
26 | # Change to the module directory
27 | cd "${MODULE_DIR}"
28 |
29 | # Install uv and dependencies
30 | pip install --force-reinstall -r "${MODULE_DIR}/requirements.setup.txt"
31 |
32 | UV_PIP_OPTIONS=("--force-reinstall")
33 | if [[ ${use_venv} == true ]]; then
34 | # Create virtual environment
35 | uv venv
36 | # Activate virtual environment
37 | if [[ -f .venv/bin/activate ]]; then
38 | # shellcheck disable=SC1091
39 | source .venv/bin/activate
40 | else
41 | echo "Error: .venv/bin/activate not found"
42 | exit 1
43 | fi
44 | else
45 | UV_PIP_OPTIONS+=("--system")
46 | fi
47 |
48 | # Install package and dependencies
49 | if [[ ${deps} == "production" ]]; then
50 | uv pip install "${UV_PIP_OPTIONS[@]}" -e "."
51 | else
52 | uv pip install "${UV_PIP_OPTIONS[@]}" -e ".[dev,test]"
53 | fi
54 |
```
--------------------------------------------------------------------------------
/src/mcp_vertexai_search/utils.py:
--------------------------------------------------------------------------------
```python
1 | from typing import Dict, List
2 |
3 | from mcp import types as mcp_types
4 |
5 | from mcp_vertexai_search.config import DataStoreConfig
6 |
7 |
8 | def to_mcp_tool(tool_name: str, description: str) -> mcp_types.Tool:
9 | """Convert a tool name and description to an MCP Tool"""
10 | return mcp_types.Tool(
11 | name=tool_name,
12 | description=description,
13 | inputSchema={
14 | "type": "object",
15 | "required": ["query"],
16 | "properties": {
17 | "query": {
18 | "type": "string",
19 | "description": """\
20 | A natural language question, not search keywords, used to query the documents.
21 | The query question should be sentence(s), not search keywords.
22 | """.strip(),
23 | },
24 | },
25 | },
26 | )
27 |
28 |
29 | def to_mcp_tools_map(
30 | data_store_configs: List[DataStoreConfig],
31 | ) -> Dict[str, mcp_types.Tool]:
32 | """Convert a list of DataStoreConfigs to a tool map"""
33 | return {
34 | data_store_config.tool_name: to_mcp_tool(
35 | data_store_config.tool_name, data_store_config.description
36 | )
37 | for data_store_config in data_store_configs
38 | }
39 |
```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
```toml
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [tool.hatch.build.targets.wheel]
6 | packages = ["src/mcp_vertexai_search"]
7 |
8 | [project]
9 | name = "mcp-vertexai-search"
10 | version = "0.1.0"
11 | authors = [{ name = "ubie" }]
12 | readme = "README.md"
13 | license = { file = "LICENSE" }
14 | requires-python = ">=3.10.0"
15 | classifiers = [
16 | "Development Status :: 4 - Beta",
17 | "Intended Audience :: Information Technology",
18 | "Intended Audience :: System Administrators",
19 | "Operating System :: OS Independent",
20 | "Topic :: Software Development :: Libraries",
21 | "Topic :: Software Development :: Libraries :: Python Modules",
22 | "License :: OSI Approved :: Apache Software License",
23 | "Programming Language :: Python",
24 | "Programming Language :: Python :: 3",
25 | "Programming Language :: Python :: 3 :: Only",
26 | "Programming Language :: Python :: 3.10",
27 | "Programming Language :: Python :: 3.11",
28 | "Programming Language :: Python :: 3.12",
29 | "Typing :: Typed",
30 | ]
31 | description = "A dbt artifacts parser in python"
32 | dependencies = [
33 | "click>=8.1.8",
34 | "google-cloud-aiplatform>=1.96.0",
35 | "google-cloud-discoveryengine>=0.13.8",
36 | "loguru>=0.7.3",
37 | "mcp[cli]>=1.9.2",
38 | "pydantic>=2.10.6",
39 | "pyyaml>=6.0.2",
40 | "uvicorn>=0.34.0",
41 | "vertexai>=1.43.0",
42 | ]
43 |
44 | [project.optional-dependencies]
45 | dev = [
46 | "autopep8>=2.3.2",
47 | "bandit>=1.8.3",
48 | "black>=25.1.0",
49 | "google-genai>=1.2.0",
50 | "isort>=6.0.0",
51 | "langgraph>=0.2.74",
52 | "pytest>=8.3.4",
53 | "ruff>=0.9.6",
54 | ]
55 |
56 |
57 | [project.scripts]
58 | mcp-vertexai-search = "mcp_vertexai_search.cli:cli"
59 |
```
--------------------------------------------------------------------------------
/src/research_agent/mcp_client.py:
--------------------------------------------------------------------------------
```python
1 | from contextlib import AsyncExitStack
2 | from typing import Optional
3 |
4 | from mcp.client.session import ClientSession
5 | from mcp.client.sse import sse_client
6 |
7 |
8 | class MCPClient:
9 | def __init__(self, name: str,server_url: Optional[str] = None):
10 | # Initialize session and client objects
11 | self.name = name
12 | self.session: Optional[ClientSession] = None
13 | self.exit_stack = AsyncExitStack()
14 |
15 | if server_url:
16 | self.connect_to_server(server_url)
17 |
18 | async def connect_to_server(self, server_url: str):
19 | """Connect to an MCP server running with SSE transport"""
20 | # Use AsyncExitStack to manage the contexts
21 | _sse_client = sse_client(url=server_url)
22 | streams = await self.exit_stack.enter_async_context(_sse_client)
23 |
24 | _session_context = ClientSession(*streams)
25 | self.session: ClientSession = await self.exit_stack.enter_async_context(
26 | _session_context
27 | )
28 |
29 | # Initialize
30 | await self.session.initialize()
31 |
32 | async def cleanup(self):
33 | """Properly clean up the session and streams"""
34 | await self.exit_stack.aclose()
35 |
36 | async def list_tools(self):
37 | return await self.session.list_tools()
38 |
39 | async def call_tool(self, tool_name: str, tool_arguments: Optional[dict] = None):
40 | return await self.session.call_tool(tool_name, tool_arguments)
41 |
42 |
43 | if __name__ == "__main__":
44 |
45 | async def main():
46 | client = MCPClient()
47 | await client.connect_to_server(server_url="http://0.0.0.0:8080/sse")
48 | tools = await client.list_tools()
49 | print(tools)
50 | tool_call = await client.call_tool("document-search", {"query": "cpp segment とはなんですか?"})
51 | print(tool_call)
52 | await client.cleanup() # Ensure cleanup is called
53 |
54 | import asyncio
55 |
56 | asyncio.run(main())
57 |
```
--------------------------------------------------------------------------------
/src/mcp_vertexai_search/google_cloud.py:
--------------------------------------------------------------------------------
```python
1 | from typing import List, Optional
2 |
3 | from google import auth
4 | from google.auth import impersonated_credentials
5 |
6 |
7 | def get_credentials(
8 | project_id: Optional[str] = None,
9 | impersonate_service_account: Optional[str] = None,
10 | scopes: Optional[List[str]] = None,
11 | lifetime: Optional[int] = None,
12 | ) -> auth.credentials.Credentials:
13 | """Get the credentials"""
14 | if impersonate_service_account is not None:
15 | return get_impersonate_credentials(
16 | impersonate_service_account, project_id, scopes, lifetime
17 | )
18 | return get_default_credentials(project_id)
19 |
20 |
21 | def get_default_credentials(
22 | project_id: Optional[str] = None,
23 | ) -> auth.credentials.Credentials:
24 | """Get the default credentials"""
25 | if project_id is not None:
26 | credentials, _ = auth.default(quota_project_id=project_id)
27 | else:
28 | credentials, _ = auth.default()
29 | return credentials
30 |
31 |
32 | def get_impersonate_credentials(
33 | impersonate_service_account: str,
34 | quoted_project_id: Optional[str] = None,
35 | scopes: Optional[List[str]] = None,
36 | lifetime: Optional[int] = None,
37 | ) -> impersonated_credentials.Credentials:
38 | """Get a impersonate credentials"""
39 | # Create a impersonated service account
40 | if scopes is None:
41 | scopes = ["https://www.googleapis.com/auth/cloud-platform"]
42 | if lifetime is None:
43 | # NOTE The maximum life time is 3600s. If we can't load a table within 1 hour,
44 | # we have to consider alternative way.
45 | lifetime = 3600
46 |
47 | source_credentials, _ = auth.default()
48 | if quoted_project_id is not None:
49 | source_credentials, quoted_project_id = auth.default(
50 | quota_project_id=quoted_project_id
51 | )
52 | target_credentials = impersonated_credentials.Credentials(
53 | source_credentials=source_credentials,
54 | target_principal=impersonate_service_account,
55 | target_scopes=scopes,
56 | lifetime=lifetime,
57 | )
58 | return target_credentials
59 |
```
--------------------------------------------------------------------------------
/src/mcp_vertexai_search/config.py:
--------------------------------------------------------------------------------
```python
1 | from typing import List, Optional
2 |
3 | import yaml
4 | from pydantic import BaseModel, Field
5 |
6 |
7 | class GenerateContentConfig(BaseModel):
8 | """The configuration for the generate content API."""
9 |
10 | temperature: float = Field(
11 | description="The temperature for the generate content API",
12 | default=0.7,
13 | )
14 | top_p: float = Field(
15 | description="The top p for the generate content API",
16 | default=0.95,
17 | )
18 |
19 |
20 | class VertexAIModelConfig(BaseModel):
21 | """The configuration for a Vertex AI model."""
22 |
23 | model_name: str = Field(..., description="The name of the Vertex AI model")
24 | project_id: str = Field(..., description="The project ID of the Vertex AI model")
25 | location: str = Field(..., description="The location of the model")
26 | impersonate_service_account: Optional[str] = Field(
27 | None, description="The service account to impersonate"
28 | )
29 | generate_content_config: Optional[GenerateContentConfig] = Field(
30 | description="The configuration for the generate content API",
31 | default_factory=GenerateContentConfig,
32 | )
33 |
34 |
35 | class DataStoreConfig(BaseModel):
36 | """The configuration for a Vertex AI data store."""
37 |
38 | project_id: str = Field(
39 | ..., description="The project ID of the Vertex AI data store"
40 | )
41 | location: str = Field(..., description="The location of the Vertex AI data store")
42 | datastore_id: str = Field(..., description="The ID of the Vertex AI data store")
43 | tool_name: str = Field(
44 | ...,
45 | description="The name of the tool. If not provided, defaults to 'search_document_<datastore_id>'",
46 | )
47 | description: str = Field(
48 | description="The description of the Vertex AI data store",
49 | default="",
50 | )
51 |
52 |
53 | class MCPServerConfig(BaseModel):
54 | """The configuration for an MCP server."""
55 |
56 | name: str = Field(
57 | description="The name of the MCP server", default="document-search"
58 | )
59 |
60 |
61 | class Config(BaseModel):
62 | """The configuration for the application."""
63 |
64 | server: MCPServerConfig = Field(
65 | description="The server configuration", default_factory=MCPServerConfig
66 | )
67 | model: VertexAIModelConfig = Field(
68 | description="The model configuration", default_factory=VertexAIModelConfig
69 | )
70 | data_stores: List[DataStoreConfig] = Field(
71 | description="The data stores configuration", default_factory=list
72 | )
73 |
74 |
75 | def load_yaml_config(file_path: str) -> Config:
76 | """Load a YAML config file"""
77 | with open(file_path, "r") as f:
78 | return Config(**yaml.safe_load(f))
79 |
```
--------------------------------------------------------------------------------
/src/mcp_vertexai_search/cli.py:
--------------------------------------------------------------------------------
```python
1 | import asyncio
2 |
3 | import click
4 | import vertexai
5 |
6 | from mcp_vertexai_search.agent import (
7 | VertexAISearchAgent,
8 | create_model,
9 | create_vertex_ai_tools,
10 | get_default_safety_settings,
11 | get_generation_config,
12 | get_system_instruction,
13 | )
14 | from mcp_vertexai_search.config import load_yaml_config
15 | from mcp_vertexai_search.google_cloud import get_credentials
16 | from mcp_vertexai_search.server import create_server, run_sse_server, run_stdio_server
17 |
18 | cli = click.Group()
19 |
20 |
21 | @cli.command("serve")
22 | # trunk-ignore(bandit/B104)
23 | @click.option("--host", type=str, default="0.0.0.0", help="The host to listen on")
24 | @click.option("--port", type=int, default=8080, help="The port to listen on")
25 | @click.option(
26 | "--transport",
27 | type=click.Choice(["stdio", "sse"]),
28 | default="stdio",
29 | help="The transport to use",
30 | )
31 | @click.option("--config", type=click.Path(exists=True), help="The config file")
32 | def serve(
33 | host: str,
34 | port: int,
35 | transport: str,
36 | config: str,
37 | ):
38 | server_config = load_yaml_config(config)
39 | vertexai.init(
40 | project=server_config.model.project_id, location=server_config.model.location
41 | )
42 |
43 | search_tools = create_vertex_ai_tools(server_config.data_stores)
44 | model = create_model(
45 | model_name=server_config.model.model_name,
46 | tools=search_tools,
47 | system_instruction=get_system_instruction(),
48 | )
49 | agent = VertexAISearchAgent(model=model)
50 |
51 | app = create_server(agent, server_config)
52 | if transport == "stdio":
53 | asyncio.run(run_stdio_server(app))
54 | elif transport == "sse":
55 | asyncio.run(run_sse_server(app, host, port))
56 | else:
57 | raise ValueError(f"Invalid transport: {transport}")
58 |
59 |
60 | @cli.command("search")
61 | @click.option("--config", type=click.Path(exists=True), help="The config file")
62 | @click.option("--query", type=str, help="The query to search for")
63 | def search(
64 | config: str,
65 | query: str,
66 | ):
67 | # Load the config
68 | server_config = load_yaml_config(config)
69 |
70 | # Initialize the Vertex AI client
71 | credentials = get_credentials(
72 | impersonate_service_account=server_config.model.impersonate_service_account,
73 | )
74 | vertexai.init(
75 | project=server_config.model.project_id,
76 | location=server_config.model.location,
77 | credentials=credentials,
78 | )
79 |
80 | # Create the search agent
81 | search_tools = create_vertex_ai_tools(server_config.data_stores)
82 | model = create_model(
83 | model_name=server_config.model.model_name,
84 | tools=search_tools,
85 | system_instruction=get_system_instruction(),
86 | )
87 | agent = VertexAISearchAgent(
88 | model=model,
89 | )
90 |
91 | # Generate the response
92 | generation_config = get_generation_config()
93 | safety_settings = get_default_safety_settings()
94 | response = agent.search(
95 | query,
96 | generation_config=generation_config,
97 | safety_settings=safety_settings,
98 | )
99 | print(response)
100 |
101 |
102 | @cli.command("validate-config")
103 | @click.option("--config", type=click.Path(exists=True), help="The config file")
104 | @click.option("--verbose", type=bool, default=False, help="Verbose output")
105 | def validate_config(config: str, verbose: bool):
106 | try:
107 | server_config = load_yaml_config(config)
108 | if verbose:
109 | print(server_config.model_dump_json(indent=2))
110 | # pylint: disable=broad-exception-caught
111 | except Exception as e:
112 | raise ValueError(f"Invalid config: {e}") from e
113 |
```
--------------------------------------------------------------------------------
/src/mcp_vertexai_search/server.py:
--------------------------------------------------------------------------------
```python
1 | import anyio
2 | import mcp.types as types
3 | from mcp.server.lowlevel import Server
4 | from mcp.shared.exceptions import ErrorData, McpError
5 |
6 | from mcp_vertexai_search.agent import (
7 | VertexAISearchAgent,
8 | get_default_safety_settings,
9 | get_generation_config,
10 | )
11 | from mcp_vertexai_search.config import Config
12 | from mcp_vertexai_search.utils import to_mcp_tools_map
13 |
14 |
15 | def create_server(
16 | agent: VertexAISearchAgent,
17 | config: Config,
18 | ) -> Server:
19 | """Create the MCP server."""
20 | app = Server("document-search")
21 |
22 | # Create a map of tools for the MCP server
23 | tools_map = to_mcp_tools_map(config.data_stores)
24 |
25 | # TODO Add @app.list_prompts()
26 |
27 | @app.call_tool()
28 | async def call_tool(
29 | name: str, arguments: dict
30 | ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
31 | if name not in tools_map:
32 | raise McpError(
33 | ErrorData(code=types.INVALID_PARAMS, message=f"Unknown tool: {name}")
34 | )
35 | if "query" not in arguments:
36 | raise McpError(
37 | ErrorData(code=types.INVALID_PARAMS, message="query is required")
38 | )
39 | # pylint: disable=broad-exception-caught
40 | try:
41 | # TODO handle retry logic
42 | generation_config = get_generation_config(
43 | temperature=config.model.generate_content_config.temperature,
44 | top_p=config.model.generate_content_config.top_p,
45 | )
46 | safety_settings = get_default_safety_settings()
47 | response = agent.search(
48 | query=arguments["query"],
49 | generation_config=generation_config,
50 | safety_settings=safety_settings,
51 | )
52 | return [types.TextContent(type="text", text=response)]
53 | # pylint: disable=broad-exception-caught
54 | except Exception as e:
55 | raise McpError(ErrorData(code=types.INVALID_PARAMS, message=str(e))) from e
56 |
57 | @app.list_tools()
58 | async def list_tools() -> list[types.Tool]:
59 | return [tools_map[tool_name] for tool_name in tools_map]
60 |
61 | return app
62 |
63 |
64 | def run_stdio_server(app: Server) -> None:
65 | """Run the server using the stdio transport."""
66 | try:
67 | from mcp.server.stdio import stdio_server
68 | except ImportError as e:
69 | raise ImportError("stdio transport is not available") from e
70 |
71 | async def arun():
72 | async with stdio_server() as streams:
73 | await app.run(streams[0], streams[1], app.create_initialization_options())
74 |
75 | anyio.run(arun)
76 |
77 |
78 | def run_sse_server(app: Server, host: str, port: int) -> None:
79 | """Run the server using the SSE transport."""
80 | try:
81 | import uvicorn
82 | from mcp.server.sse import SseServerTransport
83 | from starlette.applications import Starlette
84 | from starlette.routing import Mount, Route
85 | except ImportError as e:
86 | raise ImportError("SSE transport is not available") from e
87 |
88 | # Handle SSE connections
89 | sse = SseServerTransport("/messages/")
90 |
91 | async def handle_sse(request):
92 | async with sse.connect_sse(
93 | request.scope, request.receive, request._send
94 | ) as streams:
95 | await app.run(streams[0], streams[1], app.create_initialization_options())
96 |
97 | # Create the Starlette app
98 | starlette_app = Starlette(
99 | debug=True,
100 | routes=[
101 | Route("/sse", endpoint=handle_sse),
102 | Mount("/messages/", app=sse.handle_post_message),
103 | ],
104 | )
105 | # Serve the Starlette app
106 | uvicorn.run(starlette_app, host=host, port=port)
107 |
```
--------------------------------------------------------------------------------
/src/research_agent/chat.py:
--------------------------------------------------------------------------------
```python
1 | import argparse
2 | import asyncio
3 | import json
4 | import textwrap
5 | from typing import List
6 |
7 | from google import genai
8 | from google.genai import chats, types
9 | from loguru import logger
10 | from pydantic import BaseModel, Field
11 |
12 | from research_agent.mcp_client import MCPClient
13 | from research_agent.utils import to_gemini_tool
14 |
15 |
16 | class Reference(BaseModel):
17 | """A reference to a document."""
18 |
19 | title: str = Field(..., description="The title of the document.")
20 | raw_text: str = Field(..., description="The raw text of the document.")
21 |
22 |
23 | class SearchResponse(BaseModel):
24 | """The response from the search tool."""
25 |
26 | answer: str = Field(..., description="The answer to the user's question.")
27 | references: List[Reference] = Field(
28 | ...,
29 | description="The references to the documents that are used to answer the user's question.",
30 | )
31 |
32 | @classmethod
33 | def from_json_string(cls, json_string: str) -> "SearchResponse":
34 | """Deserialize the search response from a JSON string."""
35 | return cls(**json.loads(json_string))
36 |
37 | def __str__(self) -> str:
38 | return textwrap.dedent(f"""
39 | Answer: {self.answer}
40 |
41 | References:
42 | {"\n".join([f" - {ref.title}: {ref.raw_text}" for ref in self.references])}
43 | """)
44 |
45 |
46 | async def process_query(
47 | chat_client: chats.Chat,
48 | mcp_client: MCPClient,
49 | query: str,
50 | ) -> str:
51 | """Process the user query using Gemini and MCP tools."""
52 | response = chat_client.send_message(message=[query])
53 | if not response.candidates:
54 | raise RuntimeError("No response from Gemini")
55 |
56 | response_text = []
57 | for candidate in response.candidates:
58 | if not candidate.content:
59 | logger.debug(f"No content in candidate {candidate}")
60 | continue
61 |
62 | for part in candidate.content.parts:
63 | if part.text:
64 | response_text.append(part.text)
65 | elif part.function_call:
66 | tool_name = part.function_call.name
67 | tool_args = part.function_call.args
68 | logger.debug(f"Tool name: {tool_name}, tool args: {tool_args}")
69 | tool_call = await mcp_client.call_tool(tool_name, tool_args)
70 |
71 | if tool_call and tool_call.content:
72 | for content in tool_call.content:
73 | text = content.text
74 | if not text:
75 | logger.info(f"No text in tool call content {content}")
76 | continue
77 |
78 | try:
79 | parsed_content = SearchResponse.from_json_string(text)
80 | response_text.append(str(parsed_content))
81 | except Exception as e: # pylint: disable=broad-except
82 | logger.error(
83 | f"Failed to deserialize tool call content {content}: {e}"
84 | )
85 | response_text.append(text)
86 | else:
87 | raise RuntimeError(f"No tool call content {tool_call}")
88 | else:
89 | raise RuntimeError(f"Unknown part type {part}")
90 | return "\n".join(response_text)
91 |
92 |
93 | async def chat(server_url: str):
94 | """
95 | Run the chat server.
96 | """
97 | # Why do we use google-genai, not vertexai?
98 | # Because it is easier to convert MCP tools to GenAI tools in google-genai.
99 | genai_client = genai.Client(vertexai=True, location="us-central1")
100 | mcp_client = MCPClient(name="document-search")
101 | await mcp_client.connect_to_server(server_url=server_url)
102 |
103 | # Collect tools from MCP server
104 | mcp_tools = await mcp_client.list_tools()
105 | # Convert MCP tools to GenAI tools
106 | genai_tools = [to_gemini_tool(tool) for tool in mcp_tools.tools]
107 |
108 | # Create chat client
109 | chat_client = genai_client.chats.create(
110 | model="gemini-2.0-flash",
111 | config=types.GenerateContentConfig(
112 | tools=genai_tools,
113 | system_instruction="""
114 | You are a helpful assistant to search documents.
115 | You have to pass the query to the tool to search the documents as much natural as possible.
116 | """,
117 | ),
118 | )
119 |
120 | print("If you want to quit, please enter 'bye'")
121 | try:
122 | while True:
123 | # Get user query
124 | query = input("Enter your query: ")
125 | if query == "bye":
126 | break
127 |
128 | # Get response from GenAI
129 | response = await process_query(chat_client, mcp_client, query)
130 | print(response)
131 | # pylint: disable=broad-except
132 | except Exception as e:
133 | await mcp_client.cleanup()
134 | raise RuntimeError from e
135 |
136 |
137 | if __name__ == "__main__":
138 | # Parse command line arguments
139 | parser = argparse.ArgumentParser()
140 | # trunk-ignore(bandit/B104)
141 | parser.add_argument("--host", type=str, default="0.0.0.0")
142 | parser.add_argument("--port", type=int, default=8080)
143 | args = parser.parse_args()
144 | # Run the chat server
145 | server_url = f"http://{args.host}:{args.port}/sse"
146 | asyncio.run(chat(server_url))
147 |
```
--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
```python
1 | import unittest
2 |
3 | from src.mcp_vertexai_search.config import (
4 | Config,
5 | DataStoreConfig,
6 | GenerateContentConfig,
7 | MCPServerConfig,
8 | VertexAIModelConfig,
9 | )
10 |
11 |
12 | class TestConfig(unittest.TestCase):
13 | def test_default_config(self):
14 | """Test that default Config values are set correctly."""
15 | config = Config(
16 | model=VertexAIModelConfig(
17 | project_id="test-project",
18 | model_name="test-model",
19 | location="test-location",
20 | ),
21 | )
22 | self.assertIsInstance(config.server, MCPServerConfig)
23 | self.assertIsInstance(config.model, VertexAIModelConfig)
24 | self.assertEqual(config.data_stores, [])
25 |
26 | def test_custom_config(self):
27 | """Test that Config can be initialized with custom values."""
28 | custom_server = MCPServerConfig(name="test-server")
29 | custom_model = VertexAIModelConfig(
30 | project_id="test-project",
31 | model_name="test-model",
32 | location="test-location",
33 | )
34 | custom_data_store = DataStoreConfig(
35 | project_id="test-project",
36 | location="test-location",
37 | datastore_id="test-datastore",
38 | tool_name="test-tool",
39 | )
40 |
41 | config = Config(
42 | server=custom_server,
43 | model=custom_model,
44 | data_stores=[custom_data_store]
45 | )
46 |
47 | self.assertEqual(config.server.name, "test-server")
48 | self.assertEqual(config.model.model_name, "test-model")
49 | self.assertEqual(config.model.location, "test-location")
50 | self.assertEqual(len(config.data_stores), 1)
51 | self.assertEqual(config.data_stores[0].datastore_id, "test-datastore")
52 |
53 | def test_default_mcpserverconfig(self):
54 | """Test MCPServerConfig default values."""
55 | server_config = MCPServerConfig()
56 | self.assertEqual(server_config.name, "document-search")
57 |
58 | def test_custom_mcpserverconfig(self):
59 | """Test MCPServerConfig with custom values."""
60 | server_config = MCPServerConfig(name="custom-server")
61 | self.assertEqual(server_config.name, "custom-server")
62 |
63 | def test_default_vertexaimodelconfig(self):
64 | """Test VertexAIModelConfig default values."""
65 | model_config = VertexAIModelConfig(
66 | project_id="test-project",
67 | location="test-location",
68 | model_name="test-model",
69 | )
70 | self.assertIsInstance(model_config.generate_content_config, GenerateContentConfig)
71 | self.assertEqual(model_config.project_id, "test-project")
72 | self.assertEqual(model_config.location, "test-location")
73 | self.assertEqual(model_config.model_name, "test-model")
74 | self.assertEqual(model_config.generate_content_config.temperature, 0.7)
75 | self.assertEqual(model_config.generate_content_config.top_p, 0.95)
76 |
77 | def test_custom_vertexaimodelconfig(self):
78 | """Test VertexAIModelConfig with custom values."""
79 | custom_gen_config = GenerateContentConfig(temperature=0.8, top_p=0.9)
80 | model_config = VertexAIModelConfig(
81 | model_name="custom-model",
82 | location="custom-location",
83 | project_id="custom-project",
84 | generate_content_config=custom_gen_config,
85 | )
86 | self.assertEqual(model_config.model_name, "custom-model")
87 | self.assertEqual(model_config.location, "custom-location")
88 | self.assertEqual(model_config.project_id, "custom-project")
89 | self.assertEqual(model_config.generate_content_config.temperature, 0.8)
90 | self.assertEqual(model_config.generate_content_config.top_p, 0.9)
91 |
92 | def test_default_generatecontentconfig(self):
93 | """Test GenerateContentConfig default values."""
94 | gen_config = GenerateContentConfig()
95 | self.assertEqual(gen_config.temperature, 0.7)
96 | self.assertEqual(gen_config.top_p, 0.95)
97 |
98 | def test_custom_generatecontentconfig(self):
99 | """Test GenerateContentConfig with custom values."""
100 | gen_config = GenerateContentConfig(temperature=0.6, top_p=0.8)
101 | self.assertEqual(gen_config.temperature, 0.6)
102 | self.assertEqual(gen_config.top_p, 0.8)
103 |
104 | def test_default_datastoreconfig(self):
105 | """Test DataStoreConfig default values."""
106 | datastore_config = DataStoreConfig(
107 | project_id="test-project",
108 | location="test-location",
109 | datastore_id="test-datastore",
110 | tool_name="test-tool",
111 | )
112 | self.assertEqual(datastore_config.description, "")
113 | self.assertEqual(datastore_config.tool_name, "test-tool")
114 |
115 | def test_custom_datastoreconfig(self):
116 | """Test DataStoreConfig with custom values."""
117 | datastore_config = DataStoreConfig(
118 | project_id="custom-project",
119 | location="custom-location",
120 | datastore_id="custom-datastore",
121 | description="custom-description",
122 | tool_name="custom-tool",
123 | )
124 | self.assertEqual(datastore_config.project_id, "custom-project")
125 | self.assertEqual(datastore_config.location, "custom-location")
126 | self.assertEqual(datastore_config.datastore_id, "custom-datastore")
127 | self.assertEqual(datastore_config.description, "custom-description")
128 | self.assertEqual(datastore_config.tool_name, "custom-tool")
129 |
130 | def test_computed_tool_name_datastoreconfig(self):
131 | """Test DataStoreConfig computed tool name when not provided."""
132 | datastore_config = DataStoreConfig(
133 | project_id="custom-project",
134 | location="custom-location",
135 | datastore_id="custom-datastore",
136 | description="custom-description",
137 | tool_name = "document-search"
138 | )
139 | expected_tool_name = "document-search"
140 | self.assertEqual(datastore_config.tool_name, expected_tool_name)
141 |
```
--------------------------------------------------------------------------------
/src/mcp_vertexai_search/agent.py:
--------------------------------------------------------------------------------
```python
1 | import textwrap
2 | from typing import List, Optional
3 |
4 | from vertexai import generative_models
5 |
6 | from mcp_vertexai_search.config import DataStoreConfig
7 |
8 | # class Reference(BaseModel):
9 | # """Reference"""
10 |
11 | # title: str = Field(..., description="Title of the reference snippet")
12 | # raw_text: str = Field(..., description="Content of the reference raw text")
13 |
14 |
15 | # class SearchResponse(BaseModel):
16 | # """Search response"""
17 |
18 | # answer: str = Field(..., description="The answer to the query")
19 | # references: List[Reference] = Field(
20 | # ..., description="References used to generate the answer"
21 | # )
22 |
23 |
24 | def get_generation_config(
25 | temperature: Optional[float] = None,
26 | top_p: Optional[float] = None,
27 | ) -> generative_models.GenerationConfig:
28 | """Default generation config
29 |
30 | TODO: We should customize this based on the use case.
31 | """
32 | return generative_models.GenerationConfig(
33 | temperature=temperature,
34 | top_p=top_p,
35 | response_mime_type="application/json",
36 | )
37 |
38 |
39 | def get_default_safety_settings() -> List[generative_models.SafetySetting]:
40 | """Default safety settings
41 |
42 | TODO: We should customize this based on the use case.
43 | """
44 | return [
45 | generative_models.SafetySetting(
46 | category=generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
47 | threshold=generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
48 | ),
49 | generative_models.SafetySetting(
50 | category=generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
51 | threshold=generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
52 | ),
53 | generative_models.SafetySetting(
54 | category=generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
55 | threshold=generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
56 | ),
57 | generative_models.SafetySetting(
58 | category=generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT,
59 | threshold=generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
60 | ),
61 | ]
62 |
63 |
64 | def create_model(
65 | model_name: str,
66 | tools: List[generative_models.Tool],
67 | system_instruction: str,
68 | ) -> generative_models.GenerativeModel:
69 | return generative_models.GenerativeModel(
70 | model_name=model_name,
71 | tools=tools,
72 | system_instruction=[
73 | system_instruction,
74 | ],
75 | )
76 |
77 |
78 | def create_vertexai_search_tool(
79 | project_id: str,
80 | location: str,
81 | datastore_id: str,
82 | ) -> generative_models.Tool:
83 | """Create a Vertex AI search tool"""
84 | return generative_models.Tool.from_retrieval(
85 | retrieval=generative_models.grounding.Retrieval(
86 | source=generative_models.grounding.VertexAISearch(
87 | project=project_id,
88 | location=location,
89 | datastore=datastore_id,
90 | ),
91 | )
92 | )
93 |
94 |
95 | def create_vertex_ai_tools(
96 | data_stores: List[DataStoreConfig],
97 | ) -> List[generative_models.Tool]:
98 | """Create a list of Vertex AI search tools"""
99 | return [
100 | create_vertexai_search_tool(
101 | data_store.project_id, data_store.location, data_store.datastore_id
102 | )
103 | for data_store in data_stores
104 | ]
105 |
106 |
107 | def get_system_instruction() -> str:
108 | return textwrap.dedent(
109 | """
110 | You are a helpful assistant knowledgeable about Alphabet quarterly earning reports.
111 | Help users with their queries related to Alphabet by only responding with information available in the Grounding Knowledge store.
112 |
113 | Respond in the same language as the user's query.
114 | For instance, if the user's query is in Japanese, your response should be in Japanese.
115 |
116 | - Always refer to the tool and ground your answers in it.
117 | - Understand the retrieved snippet by the tool and only use that information to help users.
118 | - For supporting references, you can provide the Grounding tool snippets verbatim, and any other info like page number.
119 | - If information is not available in the tool, mention you don't have access to the information and do not try to make up an answer.
120 | - Leave "references" as an empty list if you are unsure about the page and text snippet or if no relevant snippet is found.
121 | - Output "answer" should be "I don't know" when the user question is irrelevant or outside the scope of the knowledge base.
122 |
123 | The Grounding tool finds the most relevant snippets from the Alphabet earning reports data store.
124 | Use the information provided by the tool as your knowledge base.
125 |
126 | - ONLY use information available from the Grounding tool.
127 | - DO NOT make up information or invent details not present in the retrieved snippets.
128 |
129 | Response should ALWAYS be in the following JSON format:
130 | ## JSON schema
131 | {
132 | "answer": {
133 | "type": "string",
134 | "description": "The answer to the user's query"
135 | },
136 | "references": [
137 | {
138 | "title": {
139 | "type": "string",
140 | "description": "The title of the reference"
141 | },
142 | "raw_text": {
143 | "type": "string",
144 | "description": "The raw text in the reference"
145 | }
146 | }
147 | ]
148 | }
149 | """
150 | ).strip()
151 |
152 |
153 | class VertexAISearchAgent:
154 | def __init__(
155 | self,
156 | model: generative_models.GenerativeModel,
157 | ):
158 | # pylint: disable=line-too-long
159 | self.model = model
160 |
161 | async def asearch(
162 | self,
163 | query: str,
164 | generation_config: generative_models.GenerationConfig,
165 | safety_settings: Optional[List[generative_models.SafetySetting]],
166 | ) -> str:
167 | """Asynchronous search"""
168 | response = await self.model.generate_content_async(
169 | contents=[query],
170 | generation_config=generation_config,
171 | safety_settings=safety_settings,
172 | stream=True,
173 | )
174 | return response.text
175 |
176 | def search(
177 | self,
178 | query: str,
179 | generation_config: generative_models.GenerationConfig,
180 | safety_settings: Optional[List[generative_models.SafetySetting]],
181 | ) -> str:
182 | """Synchronous search"""
183 | # TODO Enable to customize generation config and safety settings
184 | response = self.model.generate_content(
185 | contents=[query],
186 | generation_config=generation_config,
187 | safety_settings=safety_settings,
188 | stream=False,
189 | )
190 | return response.text
191 |
```