ubie-oss/mcp-vertexai-search # codebase.md

# Directory Structure

```
├── .dockerignore
├── .github
│   ├── CODEOWNERS
│   ├── dependabot.yml
│   └── workflows
│       ├── contributors-list.yml
│       ├── test.yml
│       ├── trunk_check.yml
│       └── trunk_upgrade.yml
├── .gitignore
├── .python-version
├── .trunk
│   ├── .gitignore
│   ├── configs
│   │   ├── .checkov.yml
│   │   ├── .isort.cfg
│   │   ├── .markdownlint.yaml
│   │   ├── .shellcheckrc
│   │   ├── .yamllint.yaml
│   │   └── ruff.toml
│   └── trunk.yaml
├── config.yml.template
├── dev
│   ├── build.sh
│   ├── clean.sh
│   ├── publish.sh
│   ├── setup.sh
│   └── test_python.sh
├── Dockerfile
├── docs
│   └── img
│       └── archirecture.png
├── LICENSE
├── Makefile
├── pyproject.toml
├── README.md
├── requirements.setup.txt
├── src
│   ├── mcp_vertexai_search
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── agent.py
│   │   ├── cli.py
│   │   ├── config.py
│   │   ├── google_cloud.py
│   │   ├── server.py
│   │   └── utils.py
│   └── research_agent
│       ├── __init__.py
│       ├── chat.py
│       ├── mcp_client.py
│       └── utils.py
├── tests
│   ├── __init__.py
│   ├── test_config.py
│   └── test_utils.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------

```
1 | 3.12
2 | 
```

--------------------------------------------------------------------------------
/.trunk/configs/.isort.cfg:
--------------------------------------------------------------------------------

```
1 | [settings]
2 | profile=black
3 | 
```

--------------------------------------------------------------------------------
/.trunk/configs/.checkov.yml:
--------------------------------------------------------------------------------

```yaml
1 | skip-check:
2 |   - CKV2_GHA_1
3 | 
```

--------------------------------------------------------------------------------
/.trunk/.gitignore:
--------------------------------------------------------------------------------

```
 1 | *out
 2 | *logs
 3 | *actions
 4 | *notifications
 5 | *tools
 6 | plugins
 7 | user_trunk.yaml
 8 | user.yaml
 9 | tmp
10 | 
```

--------------------------------------------------------------------------------
/.trunk/configs/.markdownlint.yaml:
--------------------------------------------------------------------------------

```yaml
1 | # Prettier friendly markdownlint config (all formatting rules disabled)
2 | extends: markdownlint/style/prettier
3 | 
```

--------------------------------------------------------------------------------
/.trunk/configs/.yamllint.yaml:
--------------------------------------------------------------------------------

```yaml
1 | rules:
2 |   quoted-strings:
3 |     required: only-when-needed
4 |     extra-allowed: ["{|}"]
5 |   key-duplicates: {}
6 |   octal-values:
7 |     forbid-implicit-octal: true
8 | 
```

--------------------------------------------------------------------------------
/.trunk/configs/.shellcheckrc:
--------------------------------------------------------------------------------

```
1 | enable=all
2 | source-path=SCRIPTDIR
3 | disable=SC2154
4 | 
5 | # If you're having issues with shellcheck following source, disable the errors via:
6 | # disable=SC1090
7 | # disable=SC1091
8 | 
```

--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------

```
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # PyPI configuration file
171 | .pypirc
172 | 
```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # PyPI configuration file
171 | .pypirc
172 | 
173 | # server config
174 | config.yml
175 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | # MCP Server for Vertex AI Search
  2 | 
  3 | This is a MCP server to search documents using Vertex AI.
  4 | 
  5 | ## Architecture
  6 | 
  7 | This solution uses Gemini with Vertex AI grounding to search documents using your private data.
  8 | Grounding improves the quality of search results by grounding Gemini's responses in your data stored in Vertex AI Datastore.
  9 | We can integrate one or multiple Vertex AI data stores to the MCP server.
 10 | For more details on grounding, refer to [Vertex AI Grounding Documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/ground-with-your-data).
 11 | 
 12 | ![Architecture](./docs/img//archirecture.png)
 13 | 
 14 | ## How to use
 15 | 
 16 | There are two ways to use this MCP server.
 17 | If you want to run this on Docker, the first approach would be good as Dockerfile is provided in the project.
 18 | 
 19 | ### 1. Clone the repository
 20 | 
 21 | ```shell
 22 | # Clone the repository
 23 | git clone [email protected]:ubie-oss/mcp-vertexai-search.git
 24 | 
 25 | # Create a virtual environment
 26 | uv venv
 27 | # Install the dependencies
 28 | uv sync --all-extras
 29 | 
 30 | # Check the command
 31 | uv run mcp-vertexai-search
 32 | ```
 33 | 
 34 | ### Install the python package
 35 | 
 36 | The package isn't published to PyPI yet, but we can install it from the repository.
 37 | We need a config file derives from [config.yml.template](./config.yml.template) to run the MCP server, because the python package doesn't include the config template.
 38 | Please refer to [Appendix A: Config file](#appendix-a-config-file) for the details of the config file.
 39 | 
 40 | ```shell
 41 | # Install the package
 42 | pip install git+https://github.com/ubie-oss/mcp-vertexai-search.git
 43 | 
 44 | # Check the command
 45 | mcp-vertexai-search --help
 46 | ```
 47 | 
 48 | ## Development
 49 | 
 50 | ### Prerequisites
 51 | 
 52 | - [uv](https://docs.astral.sh/uv/getting-started/installation/)
 53 | - Vertex AI data store
 54 |   - Please look into [the official documentation about data stores](https://cloud.google.com/generative-ai-app-builder/docs/create-datastore-ingest) for more information
 55 | 
 56 | ### Set up Local Environment
 57 | 
 58 | ```shell
 59 | # Optional: Install uv
 60 | python -m pip install -r requirements.setup.txt
 61 | 
 62 | # Create a virtual environment
 63 | uv venv
 64 | uv sync --all-extras
 65 | ```
 66 | 
 67 | ### Run the MCP server
 68 | 
 69 | This supports two transports for SSE (Server-Sent Events) and stdio (Standard Input Output).
 70 | We can control the transport by setting the `--transport` flag.
 71 | 
 72 | We can configure the MCP server with a YAML file.
 73 | [config.yml.template](./config.yml.template) is a template for the config file.
 74 | Please modify the config file to fit your needs.
 75 | 
 76 | ```bash
 77 | uv run mcp-vertexai-search serve \
 78 |     --config config.yml \
 79 |     --transport <stdio|sse>
 80 | ```
 81 | 
 82 | ### Test the Vertex AI Search
 83 | 
 84 | We can test the Vertex AI Search by using the `mcp-vertexai-search search` command without the MCP server.
 85 | 
 86 | ```bash
 87 | uv run mcp-vertexai-search search \
 88 |     --config config.yml \
 89 |     --query <your-query>
 90 | ```
 91 | 
 92 | ## Appendix A: Config file
 93 | 
 94 | [config.yml.template](./config.yml.template) is a template for the config file.
 95 | 
 96 | - `server`
 97 |   - `server.name`: The name of the MCP server
 98 | - `model`
 99 |   - `model.model_name`: The name of the Vertex AI model
100 |   - `model.project_id`: The project ID of the Vertex AI model
101 |   - `model.location`: The location of the model (e.g. us-central1)
102 |   - `model.impersonate_service_account`: The service account to impersonate
103 |   - `model.generate_content_config`: The configuration for the generate content API
104 | - `data_stores`: The list of Vertex AI data stores
105 |   - `data_stores.project_id`: The project ID of the Vertex AI data store
106 |   - `data_stores.location`: The location of the Vertex AI data store (e.g. us)
107 |   - `data_stores.datastore_id`: The ID of the Vertex AI data store
108 |   - `data_stores.tool_name`: The name of the tool
109 |   - `data_stores.description`: The description of the Vertex AI data store
110 | 
```

--------------------------------------------------------------------------------
/src/mcp_vertexai_search/__init__.py:
--------------------------------------------------------------------------------

```python
1 | 
```

--------------------------------------------------------------------------------
/src/research_agent/__init__.py:
--------------------------------------------------------------------------------

```python
1 | 
```

--------------------------------------------------------------------------------
/requirements.setup.txt:
--------------------------------------------------------------------------------

```
1 | uv>=0.6
2 | 
```

--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------

```python
1 | __version__ = "0.0.1"
2 | 
```

--------------------------------------------------------------------------------
/dev/build.sh:
--------------------------------------------------------------------------------

```bash
1 | #!/bin/bash
2 | set -Eo pipefail
3 | 
4 | uv build
5 | 
```

--------------------------------------------------------------------------------
/src/mcp_vertexai_search/__main__.py:
--------------------------------------------------------------------------------

```python
1 | from mcp_vertexai_search.cli import serve
2 | 
3 | serve()
4 | 
```

--------------------------------------------------------------------------------
/.trunk/configs/ruff.toml:
--------------------------------------------------------------------------------

```toml
1 | # Generic, formatter-friendly config.
2 | select = ["B", "D3", "E", "F"]
3 | 
4 | # Never enforce `E501` (line length violations). This should be handled by formatters.
5 | ignore = ["E501"]
6 | 
```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
 1 | FROM python:3.12-slim
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY requirements.setup.txt pyproject.toml uv.lock /app/
 6 | RUN python -m pip install --no-cache-dir -r requirements.setup.txt \
 7 |     && uv venv \
 8 |     && uv sync
 9 | 
10 | 
11 | COPY . /app
12 | 
13 | ENTRYPOINT ["uv", "run", "mcp-vertexai-search"]
14 | 
```

--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------

```python
 1 | import unittest
 2 | 
 3 | from mcp_vertexai_search.utils import to_mcp_tool
 4 | 
 5 | 
 6 | class TestUtils(unittest.TestCase):
 7 |     def test_to_mcp_tool(self):
 8 |         tool = to_mcp_tool("test-tool", "test-description")
 9 |         self.assertEqual(tool.name, "test-tool")
10 |         self.assertEqual(tool.description, "test-description")
11 | 
```

--------------------------------------------------------------------------------
/.github/workflows/contributors-list.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: Generate contributors list
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   # SEE https://github.com/marketplace/actions/contribute-list
10 |   contrib-readme-job:
11 |     runs-on: ubuntu-latest
12 |     name: A job to automate contrib in readme
13 |     steps:
14 |       - name: Contribute List
15 |         uses: akhilmhdh/[email protected]
16 |         env:
17 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 | 
```

--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------

```yaml
 1 | # See GitHub's documentation for more information on this file:
 2 | # https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates
 3 | version: 2
 4 | updates:
 5 |   - package-ecosystem: github-actions
 6 |     directory: /
 7 |     schedule:
 8 |       interval: weekly
 9 |   # - package-ecosystem: pip
10 |   #   directory: /
11 |   #   schedule:
12 |   #     interval: weekly
13 | 
```

--------------------------------------------------------------------------------
/.github/workflows/trunk_check.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: Trunk Check
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   workflow_dispatch:
 6 | 
 7 | concurrency:
 8 |   group: ${{ github.head_ref || github.run_id }}
 9 |   cancel-in-progress: true
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   trunk_check:
15 |     name: Trunk Check Runner
16 |     runs-on: ubuntu-latest
17 |     permissions:
18 |       checks: write # For trunk to post annotations
19 |       contents: read # For repo checkout
20 | 
21 |     steps:
22 |       - name: Checkout
23 |         uses: actions/checkout@v5
24 | 
25 |       - name: Trunk Check
26 |         uses: trunk-io/trunk-action@v1
27 | 
```

--------------------------------------------------------------------------------
/dev/publish.sh:
--------------------------------------------------------------------------------

```bash
 1 | set -Eo pipefail
 2 | set -x
 3 | 
 4 | # Constants
 5 | SCRIPT_FILE="$(readlink -f "$0")"
 6 | SCRIPT_DIR="$(dirname "${SCRIPT_FILE}")"
 7 | MODULE_DIR="$(dirname "${SCRIPT_DIR}")"
 8 | 
 9 | cd "${MODULE_DIR}" || exit
10 | 
11 | # Arguments
12 | target=${1:?"target is not set"}
13 | 
14 | # Ensure uv is installed
15 | pip install uv
16 | 
17 | # Build the package first
18 | uv build
19 | 
20 | # Publish to the specified target
21 | if [[ ${target} == "pypi" ]]; then
22 | 	uv publish
23 | elif [[ ${target} == "testpypi" ]]; then
24 | 	uv publish --publish-url "https://test.pypi.org/legacy/"
25 | else
26 | 	echo "No such target ${target}"
27 | 	exit 1
28 | fi
29 | 
```

--------------------------------------------------------------------------------
/.github/workflows/trunk_upgrade.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: Upgrade Trunk
 2 | 
 3 | on:
 4 |   workflow_dispatch: {}
 5 |   schedule:
 6 |     # Runs the first day of every month (in the UTC timezone)
 7 |     - cron: 0 0 1 * *
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   trunk_upgrade:
13 |     name: Upgrade Trunk
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       contents: write # For trunk to create PRs
17 |       pull-requests: write # For trunk to create PRs
18 |     steps:
19 |       - name: Checkout
20 |         uses: actions/checkout@v5
21 |       # >>> Install your own deps here (npm install, etc) <<<
22 |       # SEE https://github.com/trunk-io/trunk-action
23 |       - name: Trunk Upgrade
24 |         uses: trunk-io/trunk-action/upgrade@v1
25 |         with:
26 |           signoff: true
27 | 
```

--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------

```yaml
 1 | name: Test python
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |       - .github/workflows/test.yml
 7 |       - pyproject.toml
 8 |       - dbt_artifacts_parser/**/*.py
 9 |       - tests/**/*.py
10 |       - pylintrc
11 |   push:
12 |     branches:
13 |       - main
14 | 
15 | jobs:
16 |   test:
17 |     runs-on: ubuntu-latest
18 |     strategy:
19 |       matrix:
20 |         python-version: ["3.11", "3.12"]
21 |       fail-fast: false
22 | 
23 |     defaults:
24 |       run:
25 |         shell: bash
26 | 
27 |     steps:
28 |       - uses: actions/checkout@v5
29 |       - name: Set up Python
30 |         uses: actions/setup-python@v6
31 |         with:
32 |           python-version: ${{ matrix.python-version }}
33 |       - name: Install dependencies
34 |         run: |
35 |           python -m pip install -r requirements.setup.txt
36 |           bash dev/setup.sh --deps "development"
37 |       - name: Run tests
38 |         run: bash dev/test_python.sh
39 |       - name: Test build
40 |         run: |
41 |           bash dev/build.sh
42 | 
```

--------------------------------------------------------------------------------
/dev/test_python.sh:
--------------------------------------------------------------------------------

```bash
 1 | #!/bin/bash
 2 | #  Licensed to the Apache Software Foundation (ASF) under one or more
 3 | #  contributor license agreements.  See the NOTICE file distributed with
 4 | #  this work for additional information regarding copyright ownership.
 5 | #  The ASF licenses this file to You under the Apache License, Version 2.0
 6 | #  (the "License"); you may not use this file except in compliance with
 7 | #  the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | #  Unless required by applicable law or agreed to in writing, software
12 | #  distributed under the License is distributed on an "AS IS" BASIS,
13 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #  See the License for the specific language governing permissions and
15 | #  limitations under the License.
16 | set -Eeuo pipefail
17 | 
18 | # Constants
19 | SCRIPT_FILE="$(readlink -f "$0")"
20 | SCRIPT_DIR="$(dirname "${SCRIPT_FILE}")"
21 | MODULE_DIR="$(dirname "${SCRIPT_DIR}")"
22 | 
23 | pytest -v -s --cache-clear "${MODULE_DIR}/tests"
24 | 
```

--------------------------------------------------------------------------------
/src/research_agent/utils.py:
--------------------------------------------------------------------------------

```python
 1 | from google import genai
 2 | from google.genai import types as genai_types
 3 | from mcp import types as mcp_types
 4 | 
 5 | 
 6 | def to_gemini_tool(mcp_tool: mcp_types.Tool) -> genai_types.Tool:
 7 |     """
 8 |     Converts an MCP tool schema to a Gemini tool.
 9 | 
10 |     Args:
11 |         name: The name of the tool.
12 |         description: The description of the tool.
13 |         input_schema: The input schema of the tool.
14 | 
15 |     Returns:
16 |         A Gemini tool.
17 |     """
18 |     required_params: list[str] = mcp_tool.inputSchema.get("required", [])
19 |     properties = {}
20 |     for key, value in mcp_tool.inputSchema.get("properties", {}).items():
21 |         schema_dict = {
22 |             "type": value.get("type", "STRING").upper(),
23 |             "description": value.get("description", ""),
24 |         }
25 |         properties[key] = genai_types.Schema(**schema_dict)
26 | 
27 |     function = genai.types.FunctionDeclaration(
28 |         name=mcp_tool.name,
29 |         description=mcp_tool.description,
30 |         parameters=genai.types.Schema(
31 |             type="OBJECT",
32 |             properties=properties,
33 |             required=required_params,
34 |         ),
35 |     )
36 |     return genai_types.Tool(function_declarations=[function])
37 | 
```

--------------------------------------------------------------------------------
/dev/clean.sh:
--------------------------------------------------------------------------------

```bash
 1 | #!/usr/bin/env bash
 2 | #  Licensed to the Apache Software Foundation (ASF) under one or more
 3 | #  contributor license agreements.  See the NOTICE file distributed with
 4 | #  this work for additional information regarding copyright ownership.
 5 | #  The ASF licenses this file to You under the Apache License, Version 2.0
 6 | #  (the "License"); you may not use this file except in compliance with
 7 | #  the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | #  Unless required by applicable law or agreed to in writing, software
12 | #  distributed under the License is distributed on an "AS IS" BASIS,
13 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #  See the License for the specific language governing permissions and
15 | #  limitations under the License.
16 | set -e
17 | set -x
18 | 
19 | # Constants
20 | SCRIPT_FILE="$(readlink -f "$0")"
21 | SCRIPT_DIR="$(dirname "${SCRIPT_FILE}")"
22 | MODULE_DIR="$(dirname "${SCRIPT_DIR}")"
23 | 
24 | cleaned_dirs=(
25 | 	dist
26 | 	sdist
27 | 	.pytest_cache
28 | )
29 | 
30 | for cleaned_dir in "${cleaned_dirs[@]}"; do
31 | 	if [[ -d "${MODULE_DIR}/${cleaned_dir}" ]]; then
32 | 		rm -r "${MODULE_DIR:?}/${cleaned_dir}"
33 | 	fi
34 | done
35 | 
```

--------------------------------------------------------------------------------
/.trunk/trunk.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | # This file controls the behavior of Trunk: https://docs.trunk.io/cli
 2 | # To learn more about the format of this file, see https://docs.trunk.io/reference/trunk-yaml
 3 | version: 0.1
 4 | cli:
 5 |   version: 1.24.0
 6 | # Trunk provides extensibility via plugins. (https://docs.trunk.io/plugins)
 7 | plugins:
 8 |   sources:
 9 |     - id: trunk
10 |       ref: v1.7.0
11 |       uri: https://github.com/trunk-io/plugins
12 | # Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes)
13 | runtimes:
14 |   enabled:
15 |     - [email protected]
16 |     - [email protected]
17 |     - [email protected]
18 | # This is the section where you manage your linters. (https://docs.trunk.io/check/configuration)
19 | lint:
20 |   disabled:
21 |     - black
22 |   enabled:
23 |     - [email protected]
24 |     - [email protected]
25 |     - [email protected]
26 |     - [email protected]
27 |     - [email protected]
28 |     - [email protected]
29 |     - [email protected]
30 |     - [email protected]
31 |     - [email protected]
32 |     - [email protected]
33 |     - [email protected]
34 |     - [email protected]
35 |     - [email protected]
36 |     - git-diff-check
37 |     - [email protected]
38 |     - [email protected]
39 |     - [email protected]
40 | actions:
41 |   enabled:
42 |     - trunk-announce
43 |     - trunk-check-pre-push
44 |     - trunk-fmt-pre-commit
45 |     - trunk-upgrade-available
46 | 
```

--------------------------------------------------------------------------------
/dev/setup.sh:
--------------------------------------------------------------------------------

```bash
 1 | # Constants
 2 | SCRIPT_FILE="$(readlink -f "$0")"
 3 | SCRIPT_DIR="$(dirname "${SCRIPT_FILE}")"
 4 | MODULE_DIR="$(dirname "${SCRIPT_DIR}")"
 5 | 
 6 | # Arguments
 7 | deps="production"
 8 | use_venv=false
 9 | while (($# > 0)); do
10 | 	if [[ $1 == "--use-venv" ]]; then
11 | 		use_venv=true
12 | 		shift 1
13 | 	elif [[ $1 == "--deps" ]]; then
14 | 		if [[ $2 != "production" && $2 != "development" ]]; then
15 | 			echo "Error: deps must be one of 'production' or 'development'"
16 | 			exit 1
17 | 		fi
18 | 		deps="$2"
19 | 		shift 2
20 | 	else
21 | 		echo "Unknown argument: $1"
22 | 		exit 1
23 | 	fi
24 | done
25 | 
26 | # Change to the module directory
27 | cd "${MODULE_DIR}"
28 | 
29 | # Install uv and dependencies
30 | pip install --force-reinstall -r "${MODULE_DIR}/requirements.setup.txt"
31 | 
32 | UV_PIP_OPTIONS=("--force-reinstall")
33 | if [[ ${use_venv} == true ]]; then
34 | 	# Create virtual environment
35 | 	uv venv
36 | 	# Activate virtual environment
37 | 	if [[ -f .venv/bin/activate ]]; then
38 | 		# shellcheck disable=SC1091
39 | 		source .venv/bin/activate
40 | 	else
41 | 		echo "Error: .venv/bin/activate not found"
42 | 		exit 1
43 | 	fi
44 | else
45 | 	UV_PIP_OPTIONS+=("--system")
46 | fi
47 | 
48 | # Install package and dependencies
49 | if [[ ${deps} == "production" ]]; then
50 | 	uv pip install "${UV_PIP_OPTIONS[@]}" -e "."
51 | else
52 | 	uv pip install "${UV_PIP_OPTIONS[@]}" -e ".[dev,test]"
53 | fi
54 | 
```

--------------------------------------------------------------------------------
/src/mcp_vertexai_search/utils.py:
--------------------------------------------------------------------------------

```python
 1 | from typing import Dict, List
 2 | 
 3 | from mcp import types as mcp_types
 4 | 
 5 | from mcp_vertexai_search.config import DataStoreConfig
 6 | 
 7 | 
 8 | def to_mcp_tool(tool_name: str, description: str) -> mcp_types.Tool:
 9 |     """Convert a tool name and description to an MCP Tool"""
10 |     return mcp_types.Tool(
11 |         name=tool_name,
12 |         description=description,
13 |         inputSchema={
14 |             "type": "object",
15 |             "required": ["query"],
16 |             "properties": {
17 |                 "query": {
18 |                     "type": "string",
19 |                     "description": """\
20 |                       A natural language question, not search keywords, used to query the documents.
21 |                       The query question should be sentence(s), not search keywords.
22 |                       """.strip(),
23 |                 },
24 |             },
25 |         },
26 |     )
27 | 
28 | 
29 | def to_mcp_tools_map(
30 |     data_store_configs: List[DataStoreConfig],
31 | ) -> Dict[str, mcp_types.Tool]:
32 |     """Convert a list of DataStoreConfigs to a tool map"""
33 |     return {
34 |         data_store_config.tool_name: to_mcp_tool(
35 |             data_store_config.tool_name, data_store_config.description
36 |         )
37 |         for data_store_config in data_store_configs
38 |     }
39 | 
```

--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------

```toml
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [tool.hatch.build.targets.wheel]
 6 | packages = ["src/mcp_vertexai_search"]
 7 | 
 8 | [project]
 9 | name = "mcp-vertexai-search"
10 | version = "0.1.0"
11 | authors = [{ name = "ubie" }]
12 | readme = "README.md"
13 | license = { file = "LICENSE" }
14 | requires-python = ">=3.10.0"
15 | classifiers = [
16 |   "Development Status :: 4 - Beta",
17 |   "Intended Audience :: Information Technology",
18 |   "Intended Audience :: System Administrators",
19 |   "Operating System :: OS Independent",
20 |   "Topic :: Software Development :: Libraries",
21 |   "Topic :: Software Development :: Libraries :: Python Modules",
22 |   "License :: OSI Approved :: Apache Software License",
23 |   "Programming Language :: Python",
24 |   "Programming Language :: Python :: 3",
25 |   "Programming Language :: Python :: 3 :: Only",
26 |   "Programming Language :: Python :: 3.10",
27 |   "Programming Language :: Python :: 3.11",
28 |   "Programming Language :: Python :: 3.12",
29 |   "Typing :: Typed",
30 | ]
31 | description = "A dbt artifacts parser in python"
32 | dependencies = [
33 |   "click>=8.1.8",
34 |   "google-cloud-aiplatform>=1.96.0",
35 |   "google-cloud-discoveryengine>=0.13.8",
36 |   "loguru>=0.7.3",
37 |   "mcp[cli]>=1.9.2",
38 |   "pydantic>=2.10.6",
39 |   "pyyaml>=6.0.2",
40 |   "uvicorn>=0.34.0",
41 |   "vertexai>=1.43.0",
42 | ]
43 | 
44 | [project.optional-dependencies]
45 | dev = [
46 |   "autopep8>=2.3.2",
47 |   "bandit>=1.8.3",
48 |   "black>=25.1.0",
49 |   "google-genai>=1.2.0",
50 |   "isort>=6.0.0",
51 |   "langgraph>=0.2.74",
52 |   "pytest>=8.3.4",
53 |   "ruff>=0.9.6",
54 | ]
55 | 
56 | 
57 | [project.scripts]
58 | mcp-vertexai-search = "mcp_vertexai_search.cli:cli"
59 | 
```

--------------------------------------------------------------------------------
/src/research_agent/mcp_client.py:
--------------------------------------------------------------------------------

```python
 1 | from contextlib import AsyncExitStack
 2 | from typing import Optional
 3 | 
 4 | from mcp.client.session import ClientSession
 5 | from mcp.client.sse import sse_client
 6 | 
 7 | 
 8 | class MCPClient:
 9 |     def __init__(self, name: str,server_url: Optional[str] = None):
10 |         # Initialize session and client objects
11 |         self.name = name
12 |         self.session: Optional[ClientSession] = None
13 |         self.exit_stack = AsyncExitStack()
14 | 
15 |         if server_url:
16 |             self.connect_to_server(server_url)
17 | 
18 |     async def connect_to_server(self, server_url: str):
19 |         """Connect to an MCP server running with SSE transport"""
20 |         # Use AsyncExitStack to manage the contexts
21 |         _sse_client = sse_client(url=server_url)
22 |         streams = await self.exit_stack.enter_async_context(_sse_client)
23 | 
24 |         _session_context = ClientSession(*streams)
25 |         self.session: ClientSession = await self.exit_stack.enter_async_context(
26 |             _session_context
27 |         )
28 | 
29 |         # Initialize
30 |         await self.session.initialize()
31 | 
32 |     async def cleanup(self):
33 |         """Properly clean up the session and streams"""
34 |         await self.exit_stack.aclose()
35 | 
36 |     async def list_tools(self):
37 |         return await self.session.list_tools()
38 | 
39 |     async def call_tool(self, tool_name: str, tool_arguments: Optional[dict] = None):
40 |         return await self.session.call_tool(tool_name, tool_arguments)
41 | 
42 | 
43 | if __name__ == "__main__":
44 | 
45 |     async def main():
46 |         client = MCPClient()
47 |         await client.connect_to_server(server_url="http://0.0.0.0:8080/sse")
48 |         tools = await client.list_tools()
49 |         print(tools)
50 |         tool_call = await client.call_tool("document-search", {"query": "cpp segment とはなんですか？"})
51 |         print(tool_call)
52 |         await client.cleanup()  # Ensure cleanup is called
53 | 
54 |     import asyncio
55 | 
56 |     asyncio.run(main())
57 | 
```

--------------------------------------------------------------------------------
/src/mcp_vertexai_search/google_cloud.py:
--------------------------------------------------------------------------------

```python
 1 | from typing import List, Optional
 2 | 
 3 | from google import auth
 4 | from google.auth import impersonated_credentials
 5 | 
 6 | 
 7 | def get_credentials(
 8 |     project_id: Optional[str] = None,
 9 |     impersonate_service_account: Optional[str] = None,
10 |     scopes: Optional[List[str]] = None,
11 |     lifetime: Optional[int] = None,
12 | ) -> auth.credentials.Credentials:
13 |     """Get the credentials"""
14 |     if impersonate_service_account is not None:
15 |         return get_impersonate_credentials(
16 |             impersonate_service_account, project_id, scopes, lifetime
17 |         )
18 |     return get_default_credentials(project_id)
19 | 
20 | 
21 | def get_default_credentials(
22 |     project_id: Optional[str] = None,
23 | ) -> auth.credentials.Credentials:
24 |     """Get the default credentials"""
25 |     if project_id is not None:
26 |         credentials, _ = auth.default(quota_project_id=project_id)
27 |     else:
28 |         credentials, _ = auth.default()
29 |     return credentials
30 | 
31 | 
32 | def get_impersonate_credentials(
33 |     impersonate_service_account: str,
34 |     quoted_project_id: Optional[str] = None,
35 |     scopes: Optional[List[str]] = None,
36 |     lifetime: Optional[int] = None,
37 | ) -> impersonated_credentials.Credentials:
38 |     """Get a impersonate credentials"""
39 |     # Create a impersonated service account
40 |     if scopes is None:
41 |         scopes = ["https://www.googleapis.com/auth/cloud-platform"]
42 |     if lifetime is None:
43 |         # NOTE The maximum life time is 3600s. If we can't load a table within 1 hour,
44 |         #      we have to consider alternative way.
45 |         lifetime = 3600
46 | 
47 |     source_credentials, _ = auth.default()
48 |     if quoted_project_id is not None:
49 |         source_credentials, quoted_project_id = auth.default(
50 |             quota_project_id=quoted_project_id
51 |         )
52 |     target_credentials = impersonated_credentials.Credentials(
53 |         source_credentials=source_credentials,
54 |         target_principal=impersonate_service_account,
55 |         target_scopes=scopes,
56 |         lifetime=lifetime,
57 |     )
58 |     return target_credentials
59 | 
```

--------------------------------------------------------------------------------
/src/mcp_vertexai_search/config.py:
--------------------------------------------------------------------------------

```python
 1 | from typing import List, Optional
 2 | 
 3 | import yaml
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class GenerateContentConfig(BaseModel):
 8 |     """The configuration for the generate content API."""
 9 | 
10 |     temperature: float = Field(
11 |         description="The temperature for the generate content API",
12 |         default=0.7,
13 |     )
14 |     top_p: float = Field(
15 |         description="The top p for the generate content API",
16 |         default=0.95,
17 |     )
18 | 
19 | 
20 | class VertexAIModelConfig(BaseModel):
21 |     """The configuration for a Vertex AI model."""
22 | 
23 |     model_name: str = Field(..., description="The name of the Vertex AI model")
24 |     project_id: str = Field(..., description="The project ID of the Vertex AI model")
25 |     location: str = Field(..., description="The location of the model")
26 |     impersonate_service_account: Optional[str] = Field(
27 |         None, description="The service account to impersonate"
28 |     )
29 |     generate_content_config: Optional[GenerateContentConfig] = Field(
30 |         description="The configuration for the generate content API",
31 |         default_factory=GenerateContentConfig,
32 |     )
33 | 
34 | 
35 | class DataStoreConfig(BaseModel):
36 |     """The configuration for a Vertex AI data store."""
37 | 
38 |     project_id: str = Field(
39 |         ..., description="The project ID of the Vertex AI data store"
40 |     )
41 |     location: str = Field(..., description="The location of the Vertex AI data store")
42 |     datastore_id: str = Field(..., description="The ID of the Vertex AI data store")
43 |     tool_name: str = Field(
44 |         ...,
45 |         description="The name of the tool. If not provided, defaults to 'search_document_<datastore_id>'",
46 |     )
47 |     description: str = Field(
48 |         description="The description of the Vertex AI data store",
49 |         default="",
50 |     )
51 | 
52 | 
53 | class MCPServerConfig(BaseModel):
54 |     """The configuration for an MCP server."""
55 | 
56 |     name: str = Field(
57 |         description="The name of the MCP server", default="document-search"
58 |     )
59 | 
60 | 
61 | class Config(BaseModel):
62 |     """The configuration for the application."""
63 | 
64 |     server: MCPServerConfig = Field(
65 |         description="The server configuration", default_factory=MCPServerConfig
66 |     )
67 |     model: VertexAIModelConfig = Field(
68 |         description="The model configuration", default_factory=VertexAIModelConfig
69 |     )
70 |     data_stores: List[DataStoreConfig] = Field(
71 |         description="The data stores configuration", default_factory=list
72 |     )
73 | 
74 | 
75 | def load_yaml_config(file_path: str) -> Config:
76 |     """Load a YAML config file"""
77 |     with open(file_path, "r") as f:
78 |         return Config(**yaml.safe_load(f))
79 | 
```

--------------------------------------------------------------------------------
/src/mcp_vertexai_search/cli.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | 
  3 | import click
  4 | import vertexai
  5 | 
  6 | from mcp_vertexai_search.agent import (
  7 |     VertexAISearchAgent,
  8 |     create_model,
  9 |     create_vertex_ai_tools,
 10 |     get_default_safety_settings,
 11 |     get_generation_config,
 12 |     get_system_instruction,
 13 | )
 14 | from mcp_vertexai_search.config import load_yaml_config
 15 | from mcp_vertexai_search.google_cloud import get_credentials
 16 | from mcp_vertexai_search.server import create_server, run_sse_server, run_stdio_server
 17 | 
 18 | cli = click.Group()
 19 | 
 20 | 
 21 | @cli.command("serve")
 22 | # trunk-ignore(bandit/B104)
 23 | @click.option("--host", type=str, default="0.0.0.0", help="The host to listen on")
 24 | @click.option("--port", type=int, default=8080, help="The port to listen on")
 25 | @click.option(
 26 |     "--transport",
 27 |     type=click.Choice(["stdio", "sse"]),
 28 |     default="stdio",
 29 |     help="The transport to use",
 30 | )
 31 | @click.option("--config", type=click.Path(exists=True), help="The config file")
 32 | def serve(
 33 |     host: str,
 34 |     port: int,
 35 |     transport: str,
 36 |     config: str,
 37 | ):
 38 |     server_config = load_yaml_config(config)
 39 |     vertexai.init(
 40 |         project=server_config.model.project_id, location=server_config.model.location
 41 |     )
 42 | 
 43 |     search_tools = create_vertex_ai_tools(server_config.data_stores)
 44 |     model = create_model(
 45 |         model_name=server_config.model.model_name,
 46 |         tools=search_tools,
 47 |         system_instruction=get_system_instruction(),
 48 |     )
 49 |     agent = VertexAISearchAgent(model=model)
 50 | 
 51 |     app = create_server(agent, server_config)
 52 |     if transport == "stdio":
 53 |         asyncio.run(run_stdio_server(app))
 54 |     elif transport == "sse":
 55 |         asyncio.run(run_sse_server(app, host, port))
 56 |     else:
 57 |         raise ValueError(f"Invalid transport: {transport}")
 58 | 
 59 | 
 60 | @cli.command("search")
 61 | @click.option("--config", type=click.Path(exists=True), help="The config file")
 62 | @click.option("--query", type=str, help="The query to search for")
 63 | def search(
 64 |     config: str,
 65 |     query: str,
 66 | ):
 67 |     # Load the config
 68 |     server_config = load_yaml_config(config)
 69 | 
 70 |     # Initialize the Vertex AI client
 71 |     credentials = get_credentials(
 72 |         impersonate_service_account=server_config.model.impersonate_service_account,
 73 |     )
 74 |     vertexai.init(
 75 |         project=server_config.model.project_id,
 76 |         location=server_config.model.location,
 77 |         credentials=credentials,
 78 |     )
 79 | 
 80 |     # Create the search agent
 81 |     search_tools = create_vertex_ai_tools(server_config.data_stores)
 82 |     model = create_model(
 83 |         model_name=server_config.model.model_name,
 84 |         tools=search_tools,
 85 |         system_instruction=get_system_instruction(),
 86 |     )
 87 |     agent = VertexAISearchAgent(
 88 |         model=model,
 89 |     )
 90 | 
 91 |     # Generate the response
 92 |     generation_config = get_generation_config()
 93 |     safety_settings = get_default_safety_settings()
 94 |     response = agent.search(
 95 |         query,
 96 |         generation_config=generation_config,
 97 |         safety_settings=safety_settings,
 98 |     )
 99 |     print(response)
100 | 
101 | 
102 | @cli.command("validate-config")
103 | @click.option("--config", type=click.Path(exists=True), help="The config file")
104 | @click.option("--verbose", type=bool, default=False, help="Verbose output")
105 | def validate_config(config: str, verbose: bool):
106 |     try:
107 |         server_config = load_yaml_config(config)
108 |         if verbose:
109 |             print(server_config.model_dump_json(indent=2))
110 |     # pylint: disable=broad-exception-caught
111 |     except Exception as e:
112 |         raise ValueError(f"Invalid config: {e}") from e
113 | 
```

--------------------------------------------------------------------------------
/src/mcp_vertexai_search/server.py:
--------------------------------------------------------------------------------

```python
  1 | import anyio
  2 | import mcp.types as types
  3 | from mcp.server.lowlevel import Server
  4 | from mcp.shared.exceptions import ErrorData, McpError
  5 | 
  6 | from mcp_vertexai_search.agent import (
  7 |     VertexAISearchAgent,
  8 |     get_default_safety_settings,
  9 |     get_generation_config,
 10 | )
 11 | from mcp_vertexai_search.config import Config
 12 | from mcp_vertexai_search.utils import to_mcp_tools_map
 13 | 
 14 | 
 15 | def create_server(
 16 |     agent: VertexAISearchAgent,
 17 |     config: Config,
 18 | ) -> Server:
 19 |     """Create the MCP server."""
 20 |     app = Server("document-search")
 21 | 
 22 |     # Create a map of tools for the MCP server
 23 |     tools_map = to_mcp_tools_map(config.data_stores)
 24 | 
 25 |     # TODO Add @app.list_prompts()
 26 | 
 27 |     @app.call_tool()
 28 |     async def call_tool(
 29 |         name: str, arguments: dict
 30 |     ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
 31 |         if name not in tools_map:
 32 |             raise McpError(
 33 |                 ErrorData(code=types.INVALID_PARAMS, message=f"Unknown tool: {name}")
 34 |             )
 35 |         if "query" not in arguments:
 36 |             raise McpError(
 37 |                 ErrorData(code=types.INVALID_PARAMS, message="query is required")
 38 |             )
 39 |         # pylint: disable=broad-exception-caught
 40 |         try:
 41 |             # TODO handle retry logic
 42 |             generation_config = get_generation_config(
 43 |                 temperature=config.model.generate_content_config.temperature,
 44 |                 top_p=config.model.generate_content_config.top_p,
 45 |             )
 46 |             safety_settings = get_default_safety_settings()
 47 |             response = agent.search(
 48 |                 query=arguments["query"],
 49 |                 generation_config=generation_config,
 50 |                 safety_settings=safety_settings,
 51 |             )
 52 |             return [types.TextContent(type="text", text=response)]
 53 |         # pylint: disable=broad-exception-caught
 54 |         except Exception as e:
 55 |             raise McpError(ErrorData(code=types.INVALID_PARAMS, message=str(e))) from e
 56 | 
 57 |     @app.list_tools()
 58 |     async def list_tools() -> list[types.Tool]:
 59 |         return [tools_map[tool_name] for tool_name in tools_map]
 60 | 
 61 |     return app
 62 | 
 63 | 
 64 | def run_stdio_server(app: Server) -> None:
 65 |     """Run the server using the stdio transport."""
 66 |     try:
 67 |         from mcp.server.stdio import stdio_server
 68 |     except ImportError as e:
 69 |         raise ImportError("stdio transport is not available") from e
 70 | 
 71 |     async def arun():
 72 |         async with stdio_server() as streams:
 73 |             await app.run(streams[0], streams[1], app.create_initialization_options())
 74 | 
 75 |     anyio.run(arun)
 76 | 
 77 | 
 78 | def run_sse_server(app: Server, host: str, port: int) -> None:
 79 |     """Run the server using the SSE transport."""
 80 |     try:
 81 |         import uvicorn
 82 |         from mcp.server.sse import SseServerTransport
 83 |         from starlette.applications import Starlette
 84 |         from starlette.routing import Mount, Route
 85 |     except ImportError as e:
 86 |         raise ImportError("SSE transport is not available") from e
 87 | 
 88 |     # Handle SSE connections
 89 |     sse = SseServerTransport("/messages/")
 90 | 
 91 |     async def handle_sse(request):
 92 |         async with sse.connect_sse(
 93 |             request.scope, request.receive, request._send
 94 |         ) as streams:
 95 |             await app.run(streams[0], streams[1], app.create_initialization_options())
 96 | 
 97 |     # Create the Starlette app
 98 |     starlette_app = Starlette(
 99 |         debug=True,
100 |         routes=[
101 |             Route("/sse", endpoint=handle_sse),
102 |             Mount("/messages/", app=sse.handle_post_message),
103 |         ],
104 |     )
105 |     # Serve the Starlette app
106 |     uvicorn.run(starlette_app, host=host, port=port)
107 | 
```

--------------------------------------------------------------------------------
/src/research_agent/chat.py:
--------------------------------------------------------------------------------

```python
  1 | import argparse
  2 | import asyncio
  3 | import json
  4 | import textwrap
  5 | from typing import List
  6 | 
  7 | from google import genai
  8 | from google.genai import chats, types
  9 | from loguru import logger
 10 | from pydantic import BaseModel, Field
 11 | 
 12 | from research_agent.mcp_client import MCPClient
 13 | from research_agent.utils import to_gemini_tool
 14 | 
 15 | 
 16 | class Reference(BaseModel):
 17 |     """A reference to a document."""
 18 | 
 19 |     title: str = Field(..., description="The title of the document.")
 20 |     raw_text: str = Field(..., description="The raw text of the document.")
 21 | 
 22 | 
 23 | class SearchResponse(BaseModel):
 24 |     """The response from the search tool."""
 25 | 
 26 |     answer: str = Field(..., description="The answer to the user's question.")
 27 |     references: List[Reference] = Field(
 28 |         ...,
 29 |         description="The references to the documents that are used to answer the user's question.",
 30 |     )
 31 | 
 32 |     @classmethod
 33 |     def from_json_string(cls, json_string: str) -> "SearchResponse":
 34 |         """Deserialize the search response from a JSON string."""
 35 |         return cls(**json.loads(json_string))
 36 | 
 37 |     def __str__(self) -> str:
 38 |         return textwrap.dedent(f"""
 39 | Answer: {self.answer}
 40 | 
 41 | References:
 42 | {"\n".join([f"  - {ref.title}: {ref.raw_text}" for ref in self.references])}
 43 | """)
 44 | 
 45 | 
 46 | async def process_query(
 47 |     chat_client: chats.Chat,
 48 |     mcp_client: MCPClient,
 49 |     query: str,
 50 | ) -> str:
 51 |     """Process the user query using Gemini and MCP tools."""
 52 |     response = chat_client.send_message(message=[query])
 53 |     if not response.candidates:
 54 |         raise RuntimeError("No response from Gemini")
 55 | 
 56 |     response_text = []
 57 |     for candidate in response.candidates:
 58 |         if not candidate.content:
 59 |             logger.debug(f"No content in candidate {candidate}")
 60 |             continue
 61 | 
 62 |         for part in candidate.content.parts:
 63 |             if part.text:
 64 |                 response_text.append(part.text)
 65 |             elif part.function_call:
 66 |                 tool_name = part.function_call.name
 67 |                 tool_args = part.function_call.args
 68 |                 logger.debug(f"Tool name: {tool_name}, tool args: {tool_args}")
 69 |                 tool_call = await mcp_client.call_tool(tool_name, tool_args)
 70 | 
 71 |                 if tool_call and tool_call.content:
 72 |                     for content in tool_call.content:
 73 |                         text = content.text
 74 |                         if not text:
 75 |                             logger.info(f"No text in tool call content {content}")
 76 |                             continue
 77 | 
 78 |                         try:
 79 |                             parsed_content = SearchResponse.from_json_string(text)
 80 |                             response_text.append(str(parsed_content))
 81 |                         except Exception as e:  # pylint: disable=broad-except
 82 |                             logger.error(
 83 |                                 f"Failed to deserialize tool call content {content}: {e}"
 84 |                             )
 85 |                             response_text.append(text)
 86 |                 else:
 87 |                     raise RuntimeError(f"No tool call content {tool_call}")
 88 |             else:
 89 |                 raise RuntimeError(f"Unknown part type {part}")
 90 |     return "\n".join(response_text)
 91 | 
 92 | 
 93 | async def chat(server_url: str):
 94 |     """
 95 |     Run the chat server.
 96 |     """
 97 |     # Why do we use google-genai, not vertexai?
 98 |     # Because it is easier to convert MCP tools to GenAI tools in google-genai.
 99 |     genai_client = genai.Client(vertexai=True, location="us-central1")
100 |     mcp_client = MCPClient(name="document-search")
101 |     await mcp_client.connect_to_server(server_url=server_url)
102 | 
103 |     # Collect tools from MCP server
104 |     mcp_tools = await mcp_client.list_tools()
105 |     # Convert MCP tools to GenAI tools
106 |     genai_tools = [to_gemini_tool(tool) for tool in mcp_tools.tools]
107 | 
108 |     # Create chat client
109 |     chat_client = genai_client.chats.create(
110 |         model="gemini-2.0-flash",
111 |         config=types.GenerateContentConfig(
112 |             tools=genai_tools,
113 |             system_instruction="""
114 |             You are a helpful assistant to search documents.
115 |             You have to pass the query to the tool to search the documents as much natural as possible.
116 |           """,
117 |         ),
118 |     )
119 | 
120 |     print("If you want to quit, please enter 'bye'")
121 |     try:
122 |         while True:
123 |             # Get user query
124 |             query = input("Enter your query: ")
125 |             if query == "bye":
126 |                 break
127 | 
128 |             # Get response from GenAI
129 |             response = await process_query(chat_client, mcp_client, query)
130 |             print(response)
131 |     # pylint: disable=broad-except
132 |     except Exception as e:
133 |         await mcp_client.cleanup()
134 |         raise RuntimeError from e
135 | 
136 | 
137 | if __name__ == "__main__":
138 |     # Parse command line arguments
139 |     parser = argparse.ArgumentParser()
140 |     # trunk-ignore(bandit/B104)
141 |     parser.add_argument("--host", type=str, default="0.0.0.0")
142 |     parser.add_argument("--port", type=int, default=8080)
143 |     args = parser.parse_args()
144 |     # Run the chat server
145 |     server_url = f"http://{args.host}:{args.port}/sse"
146 |     asyncio.run(chat(server_url))
147 | 
```

--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------

```python
  1 | import unittest
  2 | 
  3 | from src.mcp_vertexai_search.config import (
  4 |     Config,
  5 |     DataStoreConfig,
  6 |     GenerateContentConfig,
  7 |     MCPServerConfig,
  8 |     VertexAIModelConfig,
  9 | )
 10 | 
 11 | 
 12 | class TestConfig(unittest.TestCase):
 13 |     def test_default_config(self):
 14 |         """Test that default Config values are set correctly."""
 15 |         config = Config(
 16 |             model=VertexAIModelConfig(
 17 |               project_id="test-project",
 18 |               model_name="test-model",
 19 |               location="test-location",
 20 |             ),
 21 |         )
 22 |         self.assertIsInstance(config.server, MCPServerConfig)
 23 |         self.assertIsInstance(config.model, VertexAIModelConfig)
 24 |         self.assertEqual(config.data_stores, [])
 25 | 
 26 |     def test_custom_config(self):
 27 |         """Test that Config can be initialized with custom values."""
 28 |         custom_server = MCPServerConfig(name="test-server")
 29 |         custom_model = VertexAIModelConfig(
 30 |             project_id="test-project",
 31 |             model_name="test-model",
 32 |             location="test-location",
 33 |         )
 34 |         custom_data_store = DataStoreConfig(
 35 |             project_id="test-project",
 36 |             location="test-location",
 37 |             datastore_id="test-datastore",
 38 |             tool_name="test-tool",
 39 |         )
 40 | 
 41 |         config = Config(
 42 |             server=custom_server,
 43 |             model=custom_model,
 44 |             data_stores=[custom_data_store]
 45 |         )
 46 | 
 47 |         self.assertEqual(config.server.name, "test-server")
 48 |         self.assertEqual(config.model.model_name, "test-model")
 49 |         self.assertEqual(config.model.location, "test-location")
 50 |         self.assertEqual(len(config.data_stores), 1)
 51 |         self.assertEqual(config.data_stores[0].datastore_id, "test-datastore")
 52 | 
 53 |     def test_default_mcpserverconfig(self):
 54 |         """Test MCPServerConfig default values."""
 55 |         server_config = MCPServerConfig()
 56 |         self.assertEqual(server_config.name, "document-search")
 57 | 
 58 |     def test_custom_mcpserverconfig(self):
 59 |         """Test MCPServerConfig with custom values."""
 60 |         server_config = MCPServerConfig(name="custom-server")
 61 |         self.assertEqual(server_config.name, "custom-server")
 62 | 
 63 |     def test_default_vertexaimodelconfig(self):
 64 |         """Test VertexAIModelConfig default values."""
 65 |         model_config = VertexAIModelConfig(
 66 |             project_id="test-project",
 67 |             location="test-location",
 68 |             model_name="test-model",
 69 |         )
 70 |         self.assertIsInstance(model_config.generate_content_config, GenerateContentConfig)
 71 |         self.assertEqual(model_config.project_id, "test-project")
 72 |         self.assertEqual(model_config.location, "test-location")
 73 |         self.assertEqual(model_config.model_name, "test-model")
 74 |         self.assertEqual(model_config.generate_content_config.temperature, 0.7)
 75 |         self.assertEqual(model_config.generate_content_config.top_p, 0.95)
 76 | 
 77 |     def test_custom_vertexaimodelconfig(self):
 78 |         """Test VertexAIModelConfig with custom values."""
 79 |         custom_gen_config = GenerateContentConfig(temperature=0.8, top_p=0.9)
 80 |         model_config = VertexAIModelConfig(
 81 |             model_name="custom-model",
 82 |             location="custom-location",
 83 |             project_id="custom-project",
 84 |             generate_content_config=custom_gen_config,
 85 |         )
 86 |         self.assertEqual(model_config.model_name, "custom-model")
 87 |         self.assertEqual(model_config.location, "custom-location")
 88 |         self.assertEqual(model_config.project_id, "custom-project")
 89 |         self.assertEqual(model_config.generate_content_config.temperature, 0.8)
 90 |         self.assertEqual(model_config.generate_content_config.top_p, 0.9)
 91 | 
 92 |     def test_default_generatecontentconfig(self):
 93 |         """Test GenerateContentConfig default values."""
 94 |         gen_config = GenerateContentConfig()
 95 |         self.assertEqual(gen_config.temperature, 0.7)
 96 |         self.assertEqual(gen_config.top_p, 0.95)
 97 | 
 98 |     def test_custom_generatecontentconfig(self):
 99 |         """Test GenerateContentConfig with custom values."""
100 |         gen_config = GenerateContentConfig(temperature=0.6, top_p=0.8)
101 |         self.assertEqual(gen_config.temperature, 0.6)
102 |         self.assertEqual(gen_config.top_p, 0.8)
103 | 
104 |     def test_default_datastoreconfig(self):
105 |         """Test DataStoreConfig default values."""
106 |         datastore_config = DataStoreConfig(
107 |             project_id="test-project",
108 |             location="test-location",
109 |             datastore_id="test-datastore",
110 |             tool_name="test-tool",
111 |         )
112 |         self.assertEqual(datastore_config.description, "")
113 |         self.assertEqual(datastore_config.tool_name, "test-tool")
114 | 
115 |     def test_custom_datastoreconfig(self):
116 |         """Test DataStoreConfig with custom values."""
117 |         datastore_config = DataStoreConfig(
118 |             project_id="custom-project",
119 |             location="custom-location",
120 |             datastore_id="custom-datastore",
121 |             description="custom-description",
122 |             tool_name="custom-tool",
123 |         )
124 |         self.assertEqual(datastore_config.project_id, "custom-project")
125 |         self.assertEqual(datastore_config.location, "custom-location")
126 |         self.assertEqual(datastore_config.datastore_id, "custom-datastore")
127 |         self.assertEqual(datastore_config.description, "custom-description")
128 |         self.assertEqual(datastore_config.tool_name, "custom-tool")
129 | 
130 |     def test_computed_tool_name_datastoreconfig(self):
131 |         """Test DataStoreConfig computed tool name when not provided."""
132 |         datastore_config = DataStoreConfig(
133 |             project_id="custom-project",
134 |             location="custom-location",
135 |             datastore_id="custom-datastore",
136 |             description="custom-description",
137 |             tool_name = "document-search"
138 |         )
139 |         expected_tool_name = "document-search"
140 |         self.assertEqual(datastore_config.tool_name, expected_tool_name)
141 | 
```

--------------------------------------------------------------------------------
/src/mcp_vertexai_search/agent.py:
--------------------------------------------------------------------------------

```python
  1 | import textwrap
  2 | from typing import List, Optional
  3 | 
  4 | from vertexai import generative_models
  5 | 
  6 | from mcp_vertexai_search.config import DataStoreConfig
  7 | 
  8 | # class Reference(BaseModel):
  9 | #     """Reference"""
 10 | 
 11 | #     title: str = Field(..., description="Title of the reference snippet")
 12 | #     raw_text: str = Field(..., description="Content of the reference raw text")
 13 | 
 14 | 
 15 | # class SearchResponse(BaseModel):
 16 | #     """Search response"""
 17 | 
 18 | #     answer: str = Field(..., description="The answer to the query")
 19 | #     references: List[Reference] = Field(
 20 | #         ..., description="References used to generate the answer"
 21 | #     )
 22 | 
 23 | 
 24 | def get_generation_config(
 25 |     temperature: Optional[float] = None,
 26 |     top_p: Optional[float] = None,
 27 | ) -> generative_models.GenerationConfig:
 28 |     """Default generation config
 29 | 
 30 |     TODO: We should customize this based on the use case.
 31 |     """
 32 |     return generative_models.GenerationConfig(
 33 |         temperature=temperature,
 34 |         top_p=top_p,
 35 |         response_mime_type="application/json",
 36 |     )
 37 | 
 38 | 
 39 | def get_default_safety_settings() -> List[generative_models.SafetySetting]:
 40 |     """Default safety settings
 41 | 
 42 |     TODO: We should customize this based on the use case.
 43 |     """
 44 |     return [
 45 |         generative_models.SafetySetting(
 46 |             category=generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
 47 |             threshold=generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
 48 |         ),
 49 |         generative_models.SafetySetting(
 50 |             category=generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
 51 |             threshold=generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
 52 |         ),
 53 |         generative_models.SafetySetting(
 54 |             category=generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
 55 |             threshold=generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
 56 |         ),
 57 |         generative_models.SafetySetting(
 58 |             category=generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT,
 59 |             threshold=generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
 60 |         ),
 61 |     ]
 62 | 
 63 | 
 64 | def create_model(
 65 |     model_name: str,
 66 |     tools: List[generative_models.Tool],
 67 |     system_instruction: str,
 68 | ) -> generative_models.GenerativeModel:
 69 |     return generative_models.GenerativeModel(
 70 |         model_name=model_name,
 71 |         tools=tools,
 72 |         system_instruction=[
 73 |             system_instruction,
 74 |         ],
 75 |     )
 76 | 
 77 | 
 78 | def create_vertexai_search_tool(
 79 |     project_id: str,
 80 |     location: str,
 81 |     datastore_id: str,
 82 | ) -> generative_models.Tool:
 83 |     """Create a Vertex AI search tool"""
 84 |     return generative_models.Tool.from_retrieval(
 85 |         retrieval=generative_models.grounding.Retrieval(
 86 |             source=generative_models.grounding.VertexAISearch(
 87 |                 project=project_id,
 88 |                 location=location,
 89 |                 datastore=datastore_id,
 90 |             ),
 91 |         )
 92 |     )
 93 | 
 94 | 
 95 | def create_vertex_ai_tools(
 96 |     data_stores: List[DataStoreConfig],
 97 | ) -> List[generative_models.Tool]:
 98 |     """Create a list of Vertex AI search tools"""
 99 |     return [
100 |         create_vertexai_search_tool(
101 |             data_store.project_id, data_store.location, data_store.datastore_id
102 |         )
103 |         for data_store in data_stores
104 |     ]
105 | 
106 | 
107 | def get_system_instruction() -> str:
108 |     return textwrap.dedent(
109 |         """
110 |         You are a helpful assistant knowledgeable about Alphabet quarterly earning reports.
111 |         Help users with their queries related to Alphabet by only responding with information available in the Grounding Knowledge store.
112 | 
113 |         Respond in the same language as the user's query.
114 |         For instance, if the user's query is in Japanese, your response should be in Japanese.
115 | 
116 |         - Always refer to the tool and ground your answers in it.
117 |         - Understand the retrieved snippet by the tool and only use that information to help users.
118 |         - For supporting references, you can provide the Grounding tool snippets verbatim, and any other info like page number.
119 |         - If information is not available in the tool, mention you don't have access to the information and do not try to make up an answer.
120 |         - Leave "references" as an empty list if you are unsure about the page and text snippet or if no relevant snippet is found.
121 |         - Output "answer" should be "I don't know" when the user question is irrelevant or outside the scope of the knowledge base.
122 | 
123 |         The Grounding tool finds the most relevant snippets from the Alphabet earning reports data store.
124 |         Use the information provided by the tool as your knowledge base.
125 | 
126 |         - ONLY use information available from the Grounding tool.
127 |         - DO NOT make up information or invent details not present in the retrieved snippets.
128 | 
129 |         Response should ALWAYS be in the following JSON format:
130 |         ## JSON schema
131 |         {
132 |             "answer": {
133 |                 "type": "string",
134 |                 "description": "The answer to the user's query"
135 |             },
136 |             "references": [
137 |                 {
138 |                     "title": {
139 |                         "type": "string",
140 |                         "description": "The title of the reference"
141 |                     },
142 |                     "raw_text": {
143 |                         "type": "string",
144 |                         "description": "The raw text in the reference"
145 |                     }
146 |                 }
147 |             ]
148 |         }
149 |         """
150 |     ).strip()
151 | 
152 | 
153 | class VertexAISearchAgent:
154 |     def __init__(
155 |         self,
156 |         model: generative_models.GenerativeModel,
157 |     ):
158 |         # pylint: disable=line-too-long
159 |         self.model = model
160 | 
161 |     async def asearch(
162 |         self,
163 |         query: str,
164 |         generation_config: generative_models.GenerationConfig,
165 |         safety_settings: Optional[List[generative_models.SafetySetting]],
166 |     ) -> str:
167 |         """Asynchronous search"""
168 |         response = await self.model.generate_content_async(
169 |             contents=[query],
170 |             generation_config=generation_config,
171 |             safety_settings=safety_settings,
172 |             stream=True,
173 |         )
174 |         return response.text
175 | 
176 |     def search(
177 |         self,
178 |         query: str,
179 |         generation_config: generative_models.GenerationConfig,
180 |         safety_settings: Optional[List[generative_models.SafetySetting]],
181 |     ) -> str:
182 |         """Synchronous search"""
183 |         # TODO Enable to customize generation config and safety settings
184 |         response = self.model.generate_content(
185 |             contents=[query],
186 |             generation_config=generation_config,
187 |             safety_settings=safety_settings,
188 |             stream=False,
189 |         )
190 |         return response.text
191 | 
```