#
tokens: 38338/50000 25/26 files (page 1/2)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 1 of 2. Use http://codebase.md/saik0s/mcp-browser-use?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .env.example
├── .gitignore
├── .pre-commit-config.yaml
├── .python-version
├── assets
│   └── header.png
├── CLAUDE.md
├── LICENSE
├── pyproject.toml
├── README.md
├── src
│   └── mcp_server_browser_use
│       ├── __init__.py
│       ├── __main__.py
│       ├── _internal
│       │   ├── __init__.py
│       │   ├── agent
│       │   │   ├── __init__.py
│       │   │   ├── browser_use
│       │   │   │   └── browser_use_agent.py
│       │   │   └── deep_research
│       │   │       └── deep_research_agent.py
│       │   ├── browser
│       │   │   ├── __init__.py
│       │   │   ├── custom_browser.py
│       │   │   └── custom_context.py
│       │   ├── controller
│       │   │   ├── __init__.py
│       │   │   └── custom_controller.py
│       │   └── utils
│       │       ├── __init__.py
│       │       ├── config.py
│       │       ├── llm_provider.py
│       │       ├── mcp_client.py
│       │       └── utils.py
│       ├── cli.py
│       ├── config.py
│       └── server.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------

```
1 | 3.11
2 | 
```

--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | fail_fast: true
 2 | 
 3 | repos:
 4 |   - repo: https://github.com/pre-commit/mirrors-prettier
 5 |     rev: v3.1.0
 6 |     hooks:
 7 |       - id: prettier
 8 |         types_or: [yaml, json5]
 9 | 
10 |   # - repo: https://github.com/astral-sh/ruff-pre-commit
11 |   #   rev: v0.8.1
12 |   #   hooks:
13 |   #     - id: ruff-format
14 |   #     - id: ruff
15 |   #       args: [--fix, --exit-non-zero-on-fix]
16 | 
17 |   - repo: local
18 |     hooks:
19 |       - id: uv-lock-check
20 |         name: Check uv.lock is up to date
21 |         entry: uv lock --check
22 |         language: system
23 |         files: ^(pyproject\.toml|uv\.lock)$
24 |         pass_filenames: false
25 | 
```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
  1 | # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
  2 | # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,macos,python
  3 | # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,macos,python
  4 | 
  5 | ### macOS ###
  6 | # General
  7 | .DS_Store
  8 | .AppleDouble
  9 | .LSOverride
 10 | 
 11 | # Icon must end with two \r
 12 | Icon
 13 | 
 14 | 
 15 | # Thumbnails
 16 | ._*
 17 | 
 18 | # Files that might appear in the root of a volume
 19 | .DocumentRevisions-V100
 20 | .fseventsd
 21 | .Spotlight-V100
 22 | .TemporaryItems
 23 | .Trashes
 24 | .VolumeIcon.icns
 25 | .com.apple.timemachine.donotpresent
 26 | 
 27 | # Directories potentially created on remote AFP share
 28 | .AppleDB
 29 | .AppleDesktop
 30 | Network Trash Folder
 31 | Temporary Items
 32 | .apdisk
 33 | 
 34 | ### macOS Patch ###
 35 | # iCloud generated files
 36 | *.icloud
 37 | 
 38 | ### Python ###
 39 | # Byte-compiled / optimized / DLL files
 40 | __pycache__/
 41 | *.py[cod]
 42 | *$py.class
 43 | 
 44 | # C extensions
 45 | *.so
 46 | 
 47 | # Distribution / packaging
 48 | .Python
 49 | build/
 50 | develop-eggs/
 51 | dist/
 52 | downloads/
 53 | eggs/
 54 | .eggs/
 55 | lib/
 56 | lib64/
 57 | parts/
 58 | sdist/
 59 | var/
 60 | wheels/
 61 | share/python-wheels/
 62 | *.egg-info/
 63 | .installed.cfg
 64 | *.egg
 65 | MANIFEST
 66 | 
 67 | # PyInstaller
 68 | #  Usually these files are written by a python script from a template
 69 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 70 | *.manifest
 71 | *.spec
 72 | 
 73 | # Installer logs
 74 | pip-log.txt
 75 | pip-delete-this-directory.txt
 76 | 
 77 | # Unit test / coverage reports
 78 | htmlcov/
 79 | .tox/
 80 | .nox/
 81 | .coverage
 82 | .coverage.*
 83 | .cache
 84 | nosetests.xml
 85 | coverage.xml
 86 | *.cover
 87 | *.py,cover
 88 | .hypothesis/
 89 | .pytest_cache/
 90 | cover/
 91 | 
 92 | # Translations
 93 | *.mo
 94 | *.pot
 95 | 
 96 | # Django stuff:
 97 | *.log
 98 | local_settings.py
 99 | db.sqlite3
100 | db.sqlite3-journal
101 | 
102 | # Flask stuff:
103 | instance/
104 | .webassets-cache
105 | 
106 | # Scrapy stuff:
107 | .scrapy
108 | 
109 | # Sphinx documentation
110 | docs/_build/
111 | 
112 | # PyBuilder
113 | .pybuilder/
114 | target/
115 | 
116 | # Jupyter Notebook
117 | .ipynb_checkpoints
118 | 
119 | # IPython
120 | profile_default/
121 | ipython_config.py
122 | 
123 | # pyenv
124 | #   For a library or package, you might want to ignore these files since the code is
125 | #   intended to run in multiple environments; otherwise, check them in:
126 | # .python-version
127 | 
128 | # pipenv
129 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
130 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
131 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
132 | #   install all needed dependencies.
133 | #Pipfile.lock
134 | 
135 | # poetry
136 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
137 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
138 | #   commonly ignored for libraries.
139 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
140 | #poetry.lock
141 | 
142 | # pdm
143 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
144 | #pdm.lock
145 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
146 | #   in version control.
147 | #   https://pdm.fming.dev/#use-with-ide
148 | .pdm.toml
149 | 
150 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
151 | __pypackages__/
152 | 
153 | # Celery stuff
154 | celerybeat-schedule
155 | celerybeat.pid
156 | 
157 | # SageMath parsed files
158 | *.sage.py
159 | 
160 | # Environments
161 | .env
162 | .venv
163 | env/
164 | venv/
165 | ENV/
166 | env.bak/
167 | venv.bak/
168 | 
169 | # Spyder project settings
170 | .spyderproject
171 | .spyproject
172 | 
173 | # Rope project settings
174 | .ropeproject
175 | 
176 | # mkdocs documentation
177 | /site
178 | 
179 | # mypy
180 | .mypy_cache/
181 | .dmypy.json
182 | dmypy.json
183 | 
184 | # Pyre type checker
185 | .pyre/
186 | 
187 | # pytype static type analyzer
188 | .pytype/
189 | 
190 | # Cython debug symbols
191 | cython_debug/
192 | 
193 | # PyCharm
194 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
195 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
196 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
197 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
198 | #.idea/
199 | 
200 | ### Python Patch ###
201 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
202 | poetry.toml
203 | 
204 | # ruff
205 | .ruff_cache/
206 | 
207 | # LSP config files
208 | pyrightconfig.json
209 | 
210 | ### VisualStudioCode ###
211 | .vscode/*
212 | !.vscode/settings.json
213 | !.vscode/tasks.json
214 | !.vscode/launch.json
215 | !.vscode/extensions.json
216 | !.vscode/*.code-snippets
217 | 
218 | # Local History for Visual Studio Code
219 | .history/
220 | 
221 | # Built Visual Studio Code Extensions
222 | *.vsix
223 | 
224 | ### VisualStudioCode Patch ###
225 | # Ignore all local history of files
226 | .history
227 | .ionide
228 | 
229 | # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,macos,python
230 | 
231 | # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
232 | 
233 | agent_history.gif
234 | trace.json
235 | recording.mp4
236 | temp/
237 | tmp/
238 | .vscode/
239 | 
```

--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------

```
  1 | # This is an example .env file. Copy it to .env and fill in your values.
  2 | # Lines starting with # are comments.
  3 | 
  4 | # === Main LLM Configuration (MCP_LLM_*) ===
  5 | # Select the primary LLM provider
  6 | # Options: openai, azure_openai, anthropic, google, mistral, ollama, deepseek, openrouter, alibaba, moonshot, unbound
  7 | MCP_LLM_PROVIDER=google
  8 | # Specify the model name for the selected provider
  9 | MCP_LLM_MODEL_NAME=gemini-2.5-flash-preview-04-17
 10 | # LLM temperature (0.0-2.0). Controls randomness.
 11 | MCP_LLM_TEMPERATURE=0.0
 12 | # Optional: Generic override for the LLM provider's base URL
 13 | # MCP_LLM_BASE_URL=
 14 | # Optional: Generic override for the LLM provider's API key. Takes precedence over provider-specific keys.
 15 | # MCP_LLM_API_KEY=
 16 | 
 17 | # --- Provider Specific API Keys (MCP_LLM_*) ---
 18 | # Required unless using Ollama locally without auth or generic MCP_LLM_API_KEY is set
 19 | # MCP_LLM_OPENAI_API_KEY=YOUR_OPENAI_API_KEY
 20 | # MCP_LLM_ANTHROPIC_API_KEY=YOUR_ANTHROPIC_API_KEY
 21 | # MCP_LLM_GOOGLE_API_KEY=YOUR_GOOGLE_API_KEY
 22 | # MCP_LLM_AZURE_OPENAI_API_KEY=YOUR_AZURE_OPENAI_API_KEY
 23 | # MCP_LLM_DEEPSEEK_API_KEY=YOUR_DEEPSEEK_API_KEY
 24 | # MCP_LLM_MISTRAL_API_KEY=YOUR_MISTRAL_API_KEY
 25 | # MCP_LLM_OPENROUTER_API_KEY=YOUR_OPENROUTER_API_KEY
 26 | # MCP_LLM_ALIBABA_API_KEY=YOUR_ALIBABA_API_KEY
 27 | # MCP_LLM_MOONSHOT_API_KEY=YOUR_MOONSHOT_API_KEY
 28 | # MCP_LLM_UNBOUND_API_KEY=YOUR_UNBOUND_API_KEY
 29 | 
 30 | # --- Provider Specific Endpoints (MCP_LLM_*) ---
 31 | # Optional: Override default API endpoints.
 32 | # MCP_LLM_OPENAI_ENDPOINT=https://api.openai.com/v1
 33 | # MCP_LLM_ANTHROPIC_ENDPOINT=https://api.anthropic.com
 34 | # MCP_LLM_AZURE_OPENAI_ENDPOINT=YOUR_AZURE_ENDPOINT # Required if using Azure, e.g., https://your-resource.openai.azure.com/
 35 | # MCP_LLM_AZURE_OPENAI_API_VERSION=2025-01-01-preview
 36 | # MCP_LLM_DEEPSEEK_ENDPOINT=https://api.deepseek.com
 37 | # MCP_LLM_MISTRAL_ENDPOINT=https://api.mistral.ai/v1
 38 | # MCP_LLM_OLLAMA_ENDPOINT=http://localhost:11434
 39 | # MCP_LLM_OPENROUTER_ENDPOINT=https://openrouter.ai/api/v1
 40 | # MCP_LLM_ALIBABA_ENDPOINT=https://dashscope.aliyuncs.com/compatible-mode/v1
 41 | # MCP_LLM_MOONSHOT_ENDPOINT=https://api.moonshot.cn/v1
 42 | # MCP_LLM_UNBOUND_ENDPOINT=https://api.getunbound.ai
 43 | 
 44 | # --- Ollama Specific (MCP_LLM_*) ---
 45 | # MCP_LLM_OLLAMA_NUM_CTX=32000
 46 | # MCP_LLM_OLLAMA_NUM_PREDICT=1024
 47 | 
 48 | # === Planner LLM Configuration (Optional, MCP_LLM_PLANNER_*) ===
 49 | # If you want to use a different LLM for planning tasks within agents.
 50 | # Defaults to main LLM settings if not specified.
 51 | # MCP_LLM_PLANNER_PROVIDER=
 52 | # MCP_LLM_PLANNER_MODEL_NAME=
 53 | # MCP_LLM_PLANNER_TEMPERATURE=
 54 | # MCP_LLM_PLANNER_BASE_URL=
 55 | # MCP_LLM_PLANNER_API_KEY= # Generic planner API key, or use provider-specific below
 56 | # MCP_LLM_PLANNER_OPENAI_API_KEY=
 57 | # ... (similar provider-specific keys and endpoints for planner if needed)
 58 | 
 59 | # === Browser Configuration (MCP_BROWSER_*) ===
 60 | # General browser headless mode (true/false)
 61 | MCP_BROWSER_HEADLESS=false
 62 | # General browser disable security features (use cautiously) (true/false)
 63 | MCP_BROWSER_DISABLE_SECURITY=false
 64 | # Optional: Path to Chrome/Chromium executable
 65 | # MCP_BROWSER_BINARY_PATH=/usr/bin/chromium-browser
 66 | # Optional: Path to Chrome user data directory (for persistent sessions)
 67 | # MCP_BROWSER_USER_DATA_DIR=~/.config/google-chrome/Profile 1
 68 | MCP_BROWSER_WINDOW_WIDTH=1280
 69 | MCP_BROWSER_WINDOW_HEIGHT=1080
 70 | # Set to true to connect to user's browser via MCP_BROWSER_CDP_URL
 71 | MCP_BROWSER_USE_OWN_BROWSER=false
 72 | # Optional: Connect to existing Chrome via DevTools Protocol URL. Required if MCP_BROWSER_USE_OWN_BROWSER=true.
 73 | # MCP_BROWSER_CDP_URL=http://localhost:9222
 74 | # MCP_BROWSER_WSS_URL= # Optional: WSS URL if CDP URL is not sufficient
 75 | # Keep browser managed by server open between MCP tool calls (if MCP_BROWSER_USE_OWN_BROWSER=false)
 76 | MCP_BROWSER_KEEP_OPEN=false
 77 | # Optional: Directory to save Playwright trace files (useful for debugging). If not set, tracing to file is disabled.
 78 | # MCP_BROWSER_TRACE_PATH=./tmp/trace
 79 | 
 80 | # === Agent Tool Configuration (`run_browser_agent` tool, MCP_AGENT_TOOL_*) ===
 81 | MCP_AGENT_TOOL_MAX_STEPS=100
 82 | MCP_AGENT_TOOL_MAX_ACTIONS_PER_STEP=5
 83 | # Method for tool invocation ('auto', 'json_schema', 'function_calling')
 84 | MCP_AGENT_TOOL_TOOL_CALLING_METHOD=auto
 85 | MCP_AGENT_TOOL_MAX_INPUT_TOKENS=128000
 86 | # Enable vision capabilities (screenshot analysis)
 87 | MCP_AGENT_TOOL_USE_VISION=true
 88 | # Override general browser headless mode for this tool (true/false/empty for general setting)
 89 | # MCP_AGENT_TOOL_HEADLESS=
 90 | # Override general browser disable security for this tool (true/false/empty for general setting)
 91 | # MCP_AGENT_TOOL_DISABLE_SECURITY=
 92 | # Enable Playwright video recording (true/false)
 93 | MCP_AGENT_TOOL_ENABLE_RECORDING=false
 94 | # Optional: Path to save agent run video recordings. If not set, recording to file is disabled even if ENABLE_RECORDING=true.
 95 | # MCP_AGENT_TOOL_SAVE_RECORDING_PATH=./tmp/recordings
 96 | # Optional: Directory to save agent history JSON files. If not set, history saving is disabled.
 97 | # MCP_AGENT_TOOL_HISTORY_PATH=./tmp/agent_history
 98 | 
 99 | # === Deep Research Tool Configuration (`run_deep_research` tool, MCP_RESEARCH_TOOL_*) ===
100 | MCP_RESEARCH_TOOL_MAX_PARALLEL_BROWSERS=3
101 | # MANDATORY: Base directory to save research artifacts (report, results). Task ID will be appended.
102 | # Example: MCP_RESEARCH_TOOL_SAVE_DIR=/mnt/data/research_outputs
103 | # Example: MCP_RESEARCH_TOOL_SAVE_DIR=C:\\Users\\YourUser\\Documents\\ResearchData
104 | MCP_RESEARCH_TOOL_SAVE_DIR=./tmp/deep_research
105 | 
106 | # === Path Configuration (MCP_PATHS_*) ===
107 | # Optional: Directory for downloaded files. If not set, persistent downloads to a specific path are disabled.
108 | # MCP_PATHS_DOWNLOADS=./tmp/downloads
109 | 
110 | # === Server Configuration (MCP_SERVER_*) ===
111 | # Path for the server log file. Leave empty for stdout.
112 | # MCP_SERVER_LOG_FILE=mcp_server_browser_use.log
113 | # Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL
114 | MCP_SERVER_LOGGING_LEVEL=INFO
115 | # Enable/disable anonymized telemetry (true/false)
116 | MCP_SERVER_ANONYMIZED_TELEMETRY=true
117 | # Optional: JSON string for MCP client configuration for the controller
118 | # MCP_SERVER_MCP_CONFIG='{"client_name": "mcp-browser-use-controller"}'
119 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | <img src="./assets/header.png" alt="Browser Use Web UI" width="full"/>
  2 | 
  3 | <br/>
  4 | 
  5 | # browser-use MCP server & CLI
  6 | [![Documentation](https://img.shields.io/badge/Documentation-📕-blue)](https://docs.browser-use.com)
  7 | [![License](https://img.shields.io/badge/License-MIT-green)](LICENSE)
  8 | 
  9 | > **Project Note**: This MCP server implementation builds upon the [browser-use/web-ui](https://github.com/browser-use/web-ui) foundation. Core browser automation logic and configuration patterns are adapted from the original project.
 10 | 
 11 | AI-driven browser automation server implementing the Model Context Protocol (MCP) for natural language browser control and web research. Also provides CLI access to its core functionalities.
 12 | 
 13 | <a href="https://glama.ai/mcp/servers/@Saik0s/mcp-browser-use"><img width="380" height="200" src="https://glama.ai/mcp/servers/@Saik0s/mcp-browser-use/badge" alt="Browser-Use MCP server" /></a>
 14 | 
 15 | ## Features
 16 | 
 17 | -   🧠 **MCP Integration** - Full protocol implementation for AI agent communication.
 18 | -   🌐 **Browser Automation** - Page navigation, form filling, element interaction via natural language (`run_browser_agent` tool).
 19 | -   👁️ **Visual Understanding** - Optional screenshot analysis for vision-capable LLMs.
 20 | -   🔄 **State Persistence** - Option to manage a server browser session across multiple MCP calls or connect to user's browser.
 21 | -   🔌 **Multi-LLM Support** - Integrates with OpenAI, Anthropic, Azure, DeepSeek, Google, Mistral, Ollama, OpenRouter, Alibaba, Moonshot, Unbound AI.
 22 | -   🔍 **Deep Research Tool** - Dedicated tool for multi-step web research and report generation (`run_deep_research` tool).
 23 | -   ⚙️ **Environment Variable Configuration** - Fully configurable via environment variables using a structured Pydantic model.
 24 | -   🔗 **CDP Connection** - Ability to connect to and control a user-launched Chrome/Chromium instance via Chrome DevTools Protocol.
 25 | -   ⌨️ **CLI Interface** - Access core agent functionalities (`run_browser_agent`, `run_deep_research`) directly from the command line for testing and scripting.
 26 | 
 27 | ## Quick Start
 28 | 
 29 | ### The Essentials
 30 | 
 31 | 1. Install UV - the rocket-powered Python installer:
 32 | `curl -LsSf https://astral.sh/uv/install.sh | sh`
 33 | 
 34 | 2. Get Playwright browsers (required for automation):
 35 | `uvx --from mcp-server-browser-use@latest python -m playwright install`
 36 | 
 37 | ### Integration Patterns
 38 | 
 39 | For MCP clients like Claude Desktop, add a server configuration that's as simple as:
 40 | 
 41 | ```json
 42 | // Example 1: One-Line Latest Version (Always Fresh)
 43 | "mcpServers": {
 44 |     "browser-use": {
 45 |       "command": "uvx",
 46 |       "args": ["mcp-server-browser-use@latest"],
 47 |       "env": {
 48 |         "MCP_LLM_GOOGLE_API_KEY": "YOUR_KEY_HERE_IF_USING_GOOGLE",
 49 |         "MCP_LLM_PROVIDER": "google",
 50 |         "MCP_LLM_MODEL_NAME": "gemini-2.5-flash-preview-04-17",
 51 |         "MCP_BROWSER_HEADLESS": "true",
 52 |       }
 53 |     }
 54 | }
 55 | ```
 56 | 
 57 | ```json
 58 | // Example 2: Advanced Configuration with CDP
 59 | "mcpServers": {
 60 |     "browser-use": {
 61 |       "command": "uvx",
 62 |       "args": ["mcp-server-browser-use@latest"],
 63 |       "env": {
 64 |         "MCP_LLM_OPENROUTER_API_KEY": "YOUR_KEY_HERE_IF_USING_OPENROUTER",
 65 |         "MCP_LLM_PROVIDER": "openrouter",
 66 |         "MCP_LLM_MODEL_NAME": "anthropic/claude-3.5-haiku",
 67 |         "MCP_LLM_TEMPERATURE": "0.4",
 68 | 
 69 |         "MCP_BROWSER_HEADLESS": "false",
 70 |         "MCP_BROWSER_WINDOW_WIDTH": "1440",
 71 |         "MCP_BROWSER_WINDOW_HEIGHT": "1080",
 72 |         "MCP_AGENT_TOOL_USE_VISION": "true",
 73 | 
 74 |         "MCP_RESEARCH_TOOL_SAVE_DIR": "/path/to/your/research",
 75 |         "MCP_RESEARCH_TOOL_MAX_PARALLEL_BROWSERS": "5",
 76 | 
 77 |         "MCP_PATHS_DOWNLOADS": "/path/to/your/downloads",
 78 | 
 79 |         "MCP_BROWSER_USE_OWN_BROWSER": "true",
 80 |         "MCP_BROWSER_CDP_URL": "http://localhost:9222",
 81 | 
 82 |         "MCP_AGENT_TOOL_HISTORY_PATH": "/path/to/your/history",
 83 | 
 84 |         "MCP_SERVER_LOGGING_LEVEL": "DEBUG",
 85 |         "MCP_SERVER_LOG_FILE": "/path/to/your/log/mcp_server_browser_use.log",
 86 |       }
 87 |     }
 88 | }
 89 | ```
 90 | 
 91 | ```json
 92 | // Example 3: Advanced Configuration with User Data and custom chrome path
 93 | "mcpServers": {
 94 |     "browser-use": {
 95 |       "command": "uvx",
 96 |       "args": ["mcp-server-browser-use@latest"],
 97 |       "env": {
 98 |         "MCP_LLM_OPENAI_API_KEY": "YOUR_KEY_HERE_IF_USING_OPENAI",
 99 |         "MCP_LLM_PROVIDER": "openai",
100 |         "MCP_LLM_MODEL_NAME": "gpt-4.1-mini",
101 |         "MCP_LLM_TEMPERATURE": "0.2",
102 | 
103 |         "MCP_BROWSER_HEADLESS": "false",
104 | 
105 |         "MCP_BROWSER_BINARY_PATH": "/path/to/your/chrome/binary",
106 |         "MCP_BROWSER_USER_DATA_DIR": "/path/to/your/user/data",
107 |         "MCP_BROWSER_DISABLE_SECURITY": "true",
108 |         "MCP_BROWSER_KEEP_OPEN": "true",
109 |         "MCP_BROWSER_TRACE_PATH": "/path/to/your/trace",
110 | 
111 |         "MCP_AGENT_TOOL_HISTORY_PATH": "/path/to/your/history",
112 | 
113 |         "MCP_SERVER_LOGGING_LEVEL": "DEBUG",
114 |         "MCP_SERVER_LOG_FILE": "/path/to/your/log/mcp_server_browser_use.log",
115 |       }
116 |     }
117 | }
118 | ```
119 | 
120 | ```json
121 | // Example 4: Local Development Flow
122 | "mcpServers": {
123 |     "browser-use": {
124 |       "command": "uv",
125 |       "args": [
126 |         "--directory",
127 |         "/your/dev/path",
128 |         "run",
129 |         "mcp-server-browser-use"
130 |       ],
131 |       "env": {
132 |         "MCP_LLM_OPENROUTER_API_KEY": "YOUR_KEY_HERE_IF_USING_OPENROUTER",
133 |         "MCP_LLM_PROVIDER": "openrouter",
134 |         "MCP_LLM_MODEL_NAME": "openai/gpt-4o-mini",
135 |         "MCP_BROWSER_HEADLESS": "true",
136 |       }
137 |     }
138 | }
139 | ```
140 | 
141 | **Key Insight:** The best configurations emerge from starting simple (Example 1). The .env.example file contains all possible dials.
142 | 
143 | ## MCP Tools
144 | 
145 | This server exposes the following tools via the Model Context Protocol:
146 | 
147 | ### Synchronous Tools (Wait for Completion)
148 | 
149 | 1.  **`run_browser_agent`**
150 |     *   **Description:** Executes a browser automation task based on natural language instructions and waits for it to complete. Uses settings from `MCP_AGENT_TOOL_*`, `MCP_LLM_*`, and `MCP_BROWSER_*` environment variables.
151 |     *   **Arguments:**
152 |         *   `task` (string, required): The primary task or objective.
153 |     *   **Returns:** (string) The final result extracted by the agent or an error message. Agent history (JSON, optional GIF) saved if `MCP_AGENT_TOOL_HISTORY_PATH` is set.
154 | 
155 | 2.  **`run_deep_research`**
156 |     *   **Description:** Performs in-depth web research on a topic, generates a report, and waits for completion. Uses settings from `MCP_RESEARCH_TOOL_*`, `MCP_LLM_*`, and `MCP_BROWSER_*` environment variables. If `MCP_RESEARCH_TOOL_SAVE_DIR` is set, outputs are saved to a subdirectory within it; otherwise, operates in memory-only mode.
157 |     *   **Arguments:**
158 |         *   `research_task` (string, required): The topic or question for the research.
159 |         *   `max_parallel_browsers` (integer, optional): Overrides `MCP_RESEARCH_TOOL_MAX_PARALLEL_BROWSERS` from environment.
160 |     *   **Returns:** (string) The generated research report in Markdown format, including the file path (if saved), or an error message.
161 | 
162 | ## CLI Usage
163 | 
164 | This package also provides a command-line interface `mcp-browser-cli` for direct testing and scripting.
165 | 
166 | **Global Options:**
167 | *   `--env-file PATH, -e PATH`: Path to a `.env` file to load configurations from.
168 | *   `--log-level LEVEL, -l LEVEL`: Override the logging level (e.g., `DEBUG`, `INFO`).
169 | 
170 | **Commands:**
171 | 
172 | 1.  **`mcp-browser-cli run-browser-agent [OPTIONS] TASK`**
173 |     *   **Description:** Runs a browser agent task.
174 |     *   **Arguments:**
175 |         *   `TASK` (string, required): The primary task for the agent.
176 |     *   **Example:**
177 |         ```bash
178 |         mcp-browser-cli run-browser-agent "Go to example.com and find the title." -e .env
179 |         ```
180 | 
181 | 2.  **`mcp-browser-cli run-deep-research [OPTIONS] RESEARCH_TASK`**
182 |     *   **Description:** Performs deep web research.
183 |     *   **Arguments:**
184 |         *   `RESEARCH_TASK` (string, required): The topic or question for research.
185 |     *   **Options:**
186 |         *   `--max-parallel-browsers INTEGER, -p INTEGER`: Override `MCP_RESEARCH_TOOL_MAX_PARALLEL_BROWSERS`.
187 |     *   **Example:**
188 |         ```bash
189 |         mcp-browser-cli run-deep-research "What are the latest advancements in AI-driven browser automation?" --max-parallel-browsers 5 -e .env
190 |         ```
191 | 
192 | All other configurations (LLM keys, paths, browser settings) are picked up from environment variables (or the specified `.env` file) as detailed in the Configuration section.
193 | 
194 | ## Configuration (Environment Variables)
195 | 
196 | Configure the server and CLI using environment variables. You can set these in your system or place them in a `.env` file in the project root (use `--env-file` for CLI). Variables are structured with prefixes.
197 | 
198 | | Variable Group (Prefix)             | Example Variable                               | Description                                                                                                | Default Value                     |
199 | | :---------------------------------- | :--------------------------------------------- | :--------------------------------------------------------------------------------------------------------- | :-------------------------------- |
200 | | **Main LLM (MCP_LLM_)**             |                                                | Settings for the primary LLM used by agents.                                                               |                                   |
201 | |                                     | `MCP_LLM_PROVIDER`                             | LLM provider. Options: `openai`, `azure_openai`, `anthropic`, `google`, `mistral`, `ollama`, etc.         | `openai`                          |
202 | |                                     | `MCP_LLM_MODEL_NAME`                           | Specific model name for the provider.                                                                      | `gpt-4.1`                         |
203 | |                                     | `MCP_LLM_TEMPERATURE`                          | LLM temperature (0.0-2.0).                                                                                 | `0.0`                             |
204 | |                                     | `MCP_LLM_BASE_URL`                             | Optional: Generic override for LLM provider's base URL.                                                    | Provider-specific                 |
205 | |                                     | `MCP_LLM_API_KEY`                              | Optional: Generic LLM API key (takes precedence).                                                          | -                                 |
206 | |                                     | `MCP_LLM_OPENAI_API_KEY`                       | API Key for OpenAI (if provider is `openai`).                                                              | -                                 |
207 | |                                     | `MCP_LLM_ANTHROPIC_API_KEY`                    | API Key for Anthropic.                                                                                     | -                                 |
208 | |                                     | `MCP_LLM_GOOGLE_API_KEY`                       | API Key for Google AI (Gemini).                                                                            | -                                 |
209 | |                                     | `MCP_LLM_AZURE_OPENAI_API_KEY`                 | API Key for Azure OpenAI.                                                                                  | -                                 |
210 | |                                     | `MCP_LLM_AZURE_OPENAI_ENDPOINT`                | **Required if using Azure.** Your Azure resource endpoint.                                                 | -                                 |
211 | |                                     | `MCP_LLM_OLLAMA_ENDPOINT`                      | Ollama API endpoint URL.                                                                                   | `http://localhost:11434`          |
212 | |                                     | `MCP_LLM_OLLAMA_NUM_CTX`                       | Context window size for Ollama models.                                                                     | `32000`                           |
213 | | **Planner LLM (MCP_LLM_PLANNER_)**  |                                                | Optional: Settings for a separate LLM for agent planning. Defaults to Main LLM if not set.                |                                   |
214 | |                                     | `MCP_LLM_PLANNER_PROVIDER`                     | Planner LLM provider.                                                                                      | Main LLM Provider                 |
215 | |                                     | `MCP_LLM_PLANNER_MODEL_NAME`                   | Planner LLM model name.                                                                                    | Main LLM Model                    |
216 | | **Browser (MCP_BROWSER_)**          |                                                | General browser settings.                                                                                  |                                   |
217 | |                                     | `MCP_BROWSER_HEADLESS`                         | Run browser without UI (general setting).                                                                  | `false`                           |
218 | |                                     | `MCP_BROWSER_DISABLE_SECURITY`                 | Disable browser security features (general setting, use cautiously).                                       | `false`                           |
219 | |                                     | `MCP_BROWSER_BINARY_PATH`                      | Path to Chrome/Chromium executable.                                                                        | -                                 |
220 | |                                     | `MCP_BROWSER_USER_DATA_DIR`                    | Path to Chrome user data directory.                                                                        | -                                 |
221 | |                                     | `MCP_BROWSER_WINDOW_WIDTH`                     | Browser window width (pixels).                                                                             | `1280`                            |
222 | |                                     | `MCP_BROWSER_WINDOW_HEIGHT`                    | Browser window height (pixels).                                                                            | `1080`                            |
223 | |                                     | `MCP_BROWSER_USE_OWN_BROWSER`                  | Connect to user's browser via CDP URL.                                                                     | `false`                           |
224 | |                                     | `MCP_BROWSER_CDP_URL`                          | CDP URL (e.g., `http://localhost:9222`). Required if `MCP_BROWSER_USE_OWN_BROWSER=true`.                  | -                                 |
225 | |                                     | `MCP_BROWSER_KEEP_OPEN`                        | Keep server-managed browser open between MCP calls (if `MCP_BROWSER_USE_OWN_BROWSER=false`).               | `false`                           |
226 | |                                     | `MCP_BROWSER_TRACE_PATH`                       | Optional: Directory to save Playwright trace files. If not set, tracing to file is disabled.               | ` ` (empty, tracing disabled)     |
227 | | **Agent Tool (MCP_AGENT_TOOL_)**    |                                                | Settings for the `run_browser_agent` tool.                                                                 |                                   |
228 | |                                     | `MCP_AGENT_TOOL_MAX_STEPS`                     | Max steps per agent run.                                                                                   | `100`                             |
229 | |                                     | `MCP_AGENT_TOOL_MAX_ACTIONS_PER_STEP`          | Max actions per agent step.                                                                                | `5`                               |
230 | |                                     | `MCP_AGENT_TOOL_TOOL_CALLING_METHOD`           | Method for tool invocation ('auto', 'json_schema', 'function_calling').                                    | `auto`                            |
231 | |                                     | `MCP_AGENT_TOOL_MAX_INPUT_TOKENS`              | Max input tokens for LLM context.                                                                          | `128000`                          |
232 | |                                     | `MCP_AGENT_TOOL_USE_VISION`                    | Enable vision capabilities (screenshot analysis).                                                          | `true`                            |
233 | |                                     | `MCP_AGENT_TOOL_HEADLESS`                      | Override `MCP_BROWSER_HEADLESS` for this tool (true/false/empty).                                          | ` ` (uses general)                |
234 | |                                     | `MCP_AGENT_TOOL_DISABLE_SECURITY`              | Override `MCP_BROWSER_DISABLE_SECURITY` for this tool (true/false/empty).                                  | ` ` (uses general)                |
235 | |                                     | `MCP_AGENT_TOOL_ENABLE_RECORDING`              | Enable Playwright video recording.                                                                         | `false`                           |
236 | |                                     | `MCP_AGENT_TOOL_SAVE_RECORDING_PATH`           | Optional: Path to save recordings. If not set, recording to file is disabled even if `ENABLE_RECORDING=true`. | ` ` (empty, recording disabled)   |
237 | |                                     | `MCP_AGENT_TOOL_HISTORY_PATH`                  | Optional: Directory to save agent history JSON files. If not set, history saving is disabled.              | ` ` (empty, history saving disabled) |
238 | | **Research Tool (MCP_RESEARCH_TOOL_)** |                                             | Settings for the `run_deep_research` tool.                                                                 |                                   |
239 | |                                     | `MCP_RESEARCH_TOOL_MAX_PARALLEL_BROWSERS`      | Max parallel browser instances for deep research.                                                          | `3`                               |
240 | |                                     | `MCP_RESEARCH_TOOL_SAVE_DIR`                   | Optional: Base directory to save research artifacts. Task ID will be appended. If not set, operates in memory-only mode. | `None`                           |
241 | | **Paths (MCP_PATHS_)**              |                                                | General path settings.                                                                                     |                                   |
242 | |                                     | `MCP_PATHS_DOWNLOADS`                          | Optional: Directory for downloaded files. If not set, persistent downloads to a specific path are disabled.  | ` ` (empty, downloads disabled)  |
243 | | **Server (MCP_SERVER_)**            |                                                | Server-specific settings.                                                                                  |                                   |
244 | |                                     | `MCP_SERVER_LOG_FILE`                          | Path for the server log file. Empty for stdout.                                                            | ` ` (empty, logs to stdout)       |
245 | |                                     | `MCP_SERVER_LOGGING_LEVEL`                     | Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`).                                           | `ERROR`                           |
246 | |                                     | `MCP_SERVER_ANONYMIZED_TELEMETRY`              | Enable/disable anonymized telemetry (`true`/`false`).                                                      | `true`                            |
247 | |                                     | `MCP_SERVER_MCP_CONFIG`                        | Optional: JSON string for MCP client config used by the internal controller.                               | `null`                            |
248 | 
249 | **Supported LLM Providers (`MCP_LLM_PROVIDER`):**
250 | `openai`, `azure_openai`, `anthropic`, `google`, `mistral`, `ollama`, `deepseek`, `openrouter`, `alibaba`, `moonshot`, `unbound`
251 | 
252 | *(Refer to `.env.example` for a comprehensive list of all supported environment variables and their specific provider keys/endpoints.)*
253 | 
254 | ## Connecting to Your Own Browser (CDP)
255 | 
256 | Instead of having the server launch and manage its own browser instance, you can connect it to a Chrome/Chromium browser that you launch and manage yourself.
257 | 
258 | **Steps:**
259 | 
260 | 1.  **Launch Chrome/Chromium with Remote Debugging Enabled:**
261 |     (Commands for macOS, Linux, Windows as previously listed, e.g., `google-chrome --remote-debugging-port=9222`)
262 | 
263 | 2.  **Configure Environment Variables:**
264 |     Set the following environment variables:
265 |     ```dotenv
266 |     MCP_BROWSER_USE_OWN_BROWSER=true
267 |     MCP_BROWSER_CDP_URL=http://localhost:9222 # Use the same port
268 |     # Optional: MCP_BROWSER_USER_DATA_DIR=/path/to/your/profile
269 |     ```
270 | 
271 | 3.  **Run the MCP Server or CLI:**
272 |     Start the server (`uv run mcp-server-browser-use`) or CLI (`mcp-browser-cli ...`) as usual.
273 | 
274 | **Important Considerations:**
275 | *   The browser launched with `--remote-debugging-port` must remain open.
276 | *   Settings like `MCP_BROWSER_HEADLESS` and `MCP_BROWSER_KEEP_OPEN` are ignored when `MCP_BROWSER_USE_OWN_BROWSER=true`.
277 | 
278 | ## Development
279 | 
280 | ```bash
281 | # Install dev dependencies and sync project deps
282 | uv sync --dev
283 | 
284 | # Install playwright browsers
285 | uv run playwright install
286 | 
287 | # Run MCP server with debugger (Example connecting to own browser via CDP)
288 | # 1. Launch Chrome: google-chrome --remote-debugging-port=9222 --user-data-dir="optional/path/to/user/profile"
289 | # 2. Run inspector command with environment variables:
290 | npx @modelcontextprotocol/inspector@latest \
291 |   -e MCP_LLM_GOOGLE_API_KEY=$GOOGLE_API_KEY \
292 |   -e MCP_LLM_PROVIDER=google \
293 |   -e MCP_LLM_MODEL_NAME=gemini-2.5-flash-preview-04-17 \
294 |   -e MCP_BROWSER_USE_OWN_BROWSER=true \
295 |   -e MCP_BROWSER_CDP_URL=http://localhost:9222 \
296 |   -e MCP_RESEARCH_TOOL_SAVE_DIR=./tmp/dev_research_output \
297 |   uv --directory . run mcp-server-browser-use
298 | 
299 | # Note: Change timeout in inspector's config panel if needed (default is 10 seconds)
300 | 
301 | # Run CLI example
302 | # Create a .env file with your settings (including MCP_RESEARCH_TOOL_SAVE_DIR) or use environment variables
303 | uv run mcp-browser-cli -e .env run-browser-agent "What is the title of example.com?"
304 | uv run mcp-browser-cli -e .env run-deep-research "What is the best material for a pan for everyday use on amateur kitchen and dishwasher?"
305 | ```
306 | 
307 | ## Troubleshooting
308 | 
309 | -   **Configuration Error on Startup**: If the application fails to start with an error about a missing setting, ensure all **mandatory** environment variables (like `MCP_RESEARCH_TOOL_SAVE_DIR`) are set correctly in your environment or `.env` file.
310 | -   **Browser Conflicts**: If *not* using CDP (`MCP_BROWSER_USE_OWN_BROWSER=false`), ensure no conflicting Chrome instances are running with the same user data directory if `MCP_BROWSER_USER_DATA_DIR` is specified.
311 | -   **CDP Connection Issues**: If using `MCP_BROWSER_USE_OWN_BROWSER=true`:
312 |     *   Verify Chrome was launched with `--remote-debugging-port`.
313 |     *   Ensure the port in `MCP_BROWSER_CDP_URL` matches.
314 |     *   Check firewalls and ensure the browser is running.
315 | -   **API Errors**: Double-check API keys (`MCP_LLM_<PROVIDER>_API_KEY` or `MCP_LLM_API_KEY`) and endpoints (e.g., `MCP_LLM_AZURE_OPENAI_ENDPOINT` for Azure).
316 | -   **Vision Issues**: Ensure `MCP_AGENT_TOOL_USE_VISION=true` and your LLM supports vision.
317 | -   **Dependency Problems**: Run `uv sync` and `uv run playwright install`.
318 | -   **File/Path Issues**:
319 |     *   If optional features like history saving, tracing, or downloads are not working, ensure the corresponding path variables (`MCP_AGENT_TOOL_HISTORY_PATH`, `MCP_BROWSER_TRACE_PATH`, `MCP_PATHS_DOWNLOADS`) are set and the application has write permissions to those locations.
320 |     *   For deep research, ensure `MCP_RESEARCH_TOOL_SAVE_DIR` is set to a valid, writable directory.
321 | -   **Logging**: Check the log file (`MCP_SERVER_LOG_FILE`, if set) or console output. Increase `MCP_SERVER_LOGGING_LEVEL` to `DEBUG` for more details. For CLI, use `--log-level DEBUG`.
322 | 
323 | ## License
324 | 
325 | MIT - See [LICENSE](LICENSE) for details.
326 | 
```

--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Development Guidelines
 2 | 
 3 | This document contains critical information about working with this codebase. Follow these guidelines precisely.
 4 | 
 5 | ## Core Development Rules
 6 | 
 7 | 1. Package Management
 8 |    - ONLY use uv, NEVER pip
 9 |    - Installation: `uv add package`
10 |    - Running tools: `uv run tool`
11 |    - Upgrading: `uv add --dev package --upgrade-package package`
12 |    - FORBIDDEN: `uv pip install`, `@latest` syntax
13 | 
14 | 2. Code Quality
15 |    - Type hints required for all code
16 |    - Public APIs must have docstrings
17 |    - Functions must be focused and small
18 |    - Follow existing patterns exactly
19 |    - Line length: 150 chars maximum
20 | 
21 | 3. Testing Requirements
22 |    - Framework: `uv run pytest`
23 |    - Async testing: use anyio, not asyncio
24 |    - Coverage: test edge cases and errors
25 |    - New features require tests
26 |    - Bug fixes require regression tests
27 | 
28 | ## Python Tools
29 | 
30 | ## Code Formatting
31 | 
32 | 1. Ruff
33 |    - Format: `uv run ruff format .`
34 |    - Check: `uv run ruff check .`
35 |    - Fix: `uv run ruff check . --fix`
36 |    - Critical issues:
37 |      - Line length (150 chars)
38 |      - Import sorting (I001)
39 |      - Unused imports
40 |    - Line wrapping:
41 |      - Strings: use parentheses
42 |      - Function calls: multi-line with proper indent
43 |      - Imports: split into multiple lines
44 | 
45 | 2. Type Checking
46 |    - Tool: `uv run pyright`
47 |    - Requirements:
48 |      - Explicit None checks for Optional
49 |      - Type narrowing for strings
50 |      - Version warnings can be ignored if checks pass
51 | 
52 | 3. Pre-commit
53 |    - Config: `.pre-commit-config.yaml`
54 |    - Runs: on git commit
55 |    - Tools: Prettier (YAML/JSON), Ruff (Python)
56 |    - Ruff updates:
57 |      - Check PyPI versions
58 |      - Update config rev
59 |      - Commit config first
60 | 
61 | ## Error Resolution
62 | 
63 | 1. CI Failures
64 |    - Fix order:
65 |      1. Formatting
66 |      2. Type errors
67 |      3. Linting
68 |    - Type errors:
69 |      - Get full line context
70 |      - Check Optional types
71 |      - Add type narrowing
72 |      - Verify function signatures
73 | 
74 | 2. Common Issues
75 |    - Line length150     - Break strings with parentheses
76 |      - Multi-line function calls
77 |      - Split imports
78 |    - Types:
79 |      - Add None checks
80 |      - Narrow string types
81 |      - Match existing patterns
82 | 
83 | 3. Best Practices
84 |    - Check git status before commits
85 |    - Run formatters before type checks
86 |    - Keep changes minimal
87 |    - Follow existing patterns
88 |    - Document public APIs
89 |    - Test thoroughly
90 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/__init__.py:
--------------------------------------------------------------------------------

```python
1 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/agent/__init__.py:
--------------------------------------------------------------------------------

```python
1 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/browser/__init__.py:
--------------------------------------------------------------------------------

```python
1 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/controller/__init__.py:
--------------------------------------------------------------------------------

```python
1 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/utils/__init__.py:
--------------------------------------------------------------------------------

```python
1 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/__init__.py:
--------------------------------------------------------------------------------

```python
1 | from mcp_server_browser_use.server import main
2 | 
3 | if __name__ == "__main__":
4 |     main()
5 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/__main__.py:
--------------------------------------------------------------------------------

```python
1 | from mcp_server_browser_use.server import main
2 | 
3 | if __name__ == "__main__":
4 |     main()
5 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/utils/utils.py:
--------------------------------------------------------------------------------

```python
 1 | import base64
 2 | import os
 3 | import time
 4 | from pathlib import Path
 5 | from typing import Dict, Optional
 6 | import requests
 7 | import json
 8 | import uuid
 9 | 
10 | 
11 | def encode_image(img_path):
12 |     if not img_path:
13 |         return None
14 |     with open(img_path, "rb") as fin:
15 |         image_data = base64.b64encode(fin.read()).decode("utf-8")
16 |     return image_data
17 | 
18 | 
19 | def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Dict[str, Optional[str]]:
20 |     """Get the latest recording and trace files"""
21 |     latest_files: Dict[str, Optional[str]] = {ext: None for ext in file_types}
22 | 
23 |     if not os.path.exists(directory):
24 |         os.makedirs(directory, exist_ok=True)
25 |         return latest_files
26 | 
27 |     for file_type in file_types:
28 |         try:
29 |             matches = list(Path(directory).rglob(f"*{file_type}"))
30 |             if matches:
31 |                 latest = max(matches, key=lambda p: p.stat().st_mtime)
32 |                 # Only return files that are complete (not being written)
33 |                 if time.time() - latest.stat().st_mtime > 1.0:
34 |                     latest_files[file_type] = str(latest)
35 |         except Exception as e:
36 |             print(f"Error getting latest {file_type} file: {e}")
37 | 
38 |     return latest_files
39 | 
```

--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------

```toml
 1 | [project]
 2 | name = "mcp_server_browser_use"
 3 | version = "0.1.8"
 4 | description = "MCP server for browser-use"
 5 | readme = "README.md"
 6 | requires-python = ">=3.11"
 7 | authors = [{ name = "Igor Tarasenko" }]
 8 | license = { text = "MIT" }
 9 | classifiers = [
10 |   "Development Status :: 4 - Beta",
11 |   "Programming Language :: Python :: 3",
12 |   "Programming Language :: Python :: 3.11",
13 |   "Operating System :: OS Independent",
14 | ]
15 | 
16 | dependencies = [
17 |   "pydantic-settings>=2.0.0",
18 |   "mcp>=1.6.0",
19 |   "typer>=0.12.0",
20 |   "browser-use==0.1.41",
21 |   "pyperclip==1.9.0",
22 |   "json-repair",
23 |   "langchain-mistralai==0.2.4",
24 |   "MainContentExtractor==0.0.4",
25 |   "langchain-ibm==0.3.10",
26 |   "langchain_mcp_adapters==0.0.9",
27 |   "langgraph==0.3.34",
28 |   "langchain-community",
29 | ]
30 | 
31 | [build-system]
32 | requires = ["hatchling"]
33 | build-backend = "hatchling.build"
34 | 
35 | [tool.hatch.build.targets.wheel]
36 | packages = ["src/mcp_server_browser_use"]
37 | 
38 | [project.scripts]
39 | mcp-server-browser-use = "mcp_server_browser_use.server:main"
40 | mcp-browser-cli = "mcp_server_browser_use.cli:app"
41 | 
42 | [tool.pyright]
43 | include = ["src/mcp_server_browser_use"]
44 | venvPath = "."
45 | venv = ".venv"
46 | 
47 | [tool.ruff.lint]
48 | select = ["E", "F", "I"]
49 | ignore = []
50 | 
51 | [tool.ruff]
52 | line-length = 150
53 | target-version = "py311"
54 | 
55 | [tool.uv]
56 | dev-dependencies = ["pyright>=1.1.378", "pytest>=8.3.3", "ruff>=0.6.9"]
57 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/utils/config.py:
--------------------------------------------------------------------------------

```python
 1 | PROVIDER_DISPLAY_NAMES = {
 2 |     "openai": "OpenAI",
 3 |     "azure_openai": "Azure OpenAI",
 4 |     "anthropic": "Anthropic",
 5 |     "deepseek": "DeepSeek",
 6 |     "google": "Google",
 7 |     "alibaba": "Alibaba",
 8 |     "moonshot": "MoonShot",
 9 |     "unbound": "Unbound AI",
10 |     "ibm": "IBM"
11 | }
12 | 
13 | # Predefined model names for common providers
14 | model_names = {
15 |     "anthropic": ["claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-opus-20240229"],
16 |     "openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "o3-mini"],
17 |     "deepseek": ["deepseek-chat", "deepseek-reasoner"],
18 |     "google": ["gemini-2.0-flash", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest",
19 |                "gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-01-21", "gemini-2.0-pro-exp-02-05",
20 |                "gemini-2.5-pro-preview-03-25", "gemini-2.5-flash-preview-04-17"],
21 |     "ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b",
22 |                "deepseek-r1:14b", "deepseek-r1:32b"],
23 |     "azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"],
24 |     "mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
25 |     "alibaba": ["qwen-plus", "qwen-max", "qwen-vl-max", "qwen-vl-plus", "qwen-turbo", "qwen-long"],
26 |     "moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"],
27 |     "unbound": ["gemini-2.0-flash", "gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"],
28 |     "siliconflow": [
29 |         "deepseek-ai/DeepSeek-R1",
30 |         "deepseek-ai/DeepSeek-V3",
31 |         "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
32 |         "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
33 |         "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
34 |         "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
35 |         "deepseek-ai/DeepSeek-V2.5",
36 |         "deepseek-ai/deepseek-vl2",
37 |         "Qwen/Qwen2.5-72B-Instruct-128K",
38 |         "Qwen/Qwen2.5-72B-Instruct",
39 |         "Qwen/Qwen2.5-32B-Instruct",
40 |         "Qwen/Qwen2.5-14B-Instruct",
41 |         "Qwen/Qwen2.5-7B-Instruct",
42 |         "Qwen/Qwen2.5-Coder-32B-Instruct",
43 |         "Qwen/Qwen2.5-Coder-7B-Instruct",
44 |         "Qwen/Qwen2-7B-Instruct",
45 |         "Qwen/Qwen2-1.5B-Instruct",
46 |         "Qwen/QwQ-32B-Preview",
47 |         "Qwen/Qwen2-VL-72B-Instruct",
48 |         "Qwen/Qwen2.5-VL-32B-Instruct",
49 |         "Qwen/Qwen2.5-VL-72B-Instruct",
50 |         "TeleAI/TeleChat2",
51 |         "THUDM/glm-4-9b-chat",
52 |         "Vendor-A/Qwen/Qwen2.5-72B-Instruct",
53 |         "internlm/internlm2_5-7b-chat",
54 |         "internlm/internlm2_5-20b-chat",
55 |         "Pro/Qwen/Qwen2.5-7B-Instruct",
56 |         "Pro/Qwen/Qwen2-7B-Instruct",
57 |         "Pro/Qwen/Qwen2-1.5B-Instruct",
58 |         "Pro/THUDM/chatglm3-6b",
59 |         "Pro/THUDM/glm-4-9b-chat",
60 |     ],
61 |     "ibm": ["ibm/granite-vision-3.1-2b-preview", "meta-llama/llama-4-maverick-17b-128e-instruct-fp8",
62 |             "meta-llama/llama-3-2-90b-vision-instruct"]
63 | }
64 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/browser/custom_context.py:
--------------------------------------------------------------------------------

```python
  1 | import json
  2 | import logging
  3 | import os
  4 | 
  5 | from browser_use.browser.browser import Browser, IN_DOCKER
  6 | from browser_use.browser.context import BrowserContext, BrowserContextConfig
  7 | from playwright.async_api import Browser as PlaywrightBrowser
  8 | from playwright.async_api import BrowserContext as PlaywrightBrowserContext
  9 | from typing import Optional
 10 | from browser_use.browser.context import BrowserContextState
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class CustomBrowserContextConfig(BrowserContextConfig):
 16 |     force_new_context: bool = False  # force to create new context
 17 | 
 18 | 
 19 | class CustomBrowserContext(BrowserContext):
 20 |     def __init__(
 21 |             self,
 22 |             browser: 'Browser',
 23 |             config: BrowserContextConfig | None = None,
 24 |             state: Optional[BrowserContextState] = None,
 25 |     ):
 26 |         super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state)
 27 | 
 28 |     async def _create_context(self, browser: PlaywrightBrowser):
 29 |         """Creates a new browser context with anti-detection measures and loads cookies if available."""
 30 |         if not self.config.force_new_context and self.browser.config.cdp_url and len(browser.contexts) > 0:
 31 |             context = browser.contexts[0]
 32 |         elif not self.config.force_new_context and self.browser.config.browser_binary_path and len(
 33 |                 browser.contexts) > 0:
 34 |             # Connect to existing Chrome instance instead of creating new one
 35 |             context = browser.contexts[0]
 36 |         else:
 37 |             # Original code for creating new context
 38 |             context = await browser.new_context(
 39 |                 no_viewport=True,
 40 |                 user_agent=self.config.user_agent,
 41 |                 java_script_enabled=True,
 42 |                 bypass_csp=self.config.disable_security,
 43 |                 ignore_https_errors=self.config.disable_security,
 44 |                 record_video_dir=self.config.save_recording_path,
 45 |                 record_video_size=self.config.browser_window_size.model_dump(),
 46 |                 record_har_path=self.config.save_har_path,
 47 |                 locale=self.config.locale,
 48 |                 http_credentials=self.config.http_credentials,
 49 |                 is_mobile=self.config.is_mobile,
 50 |                 has_touch=self.config.has_touch,
 51 |                 geolocation=self.config.geolocation,
 52 |                 permissions=self.config.permissions,
 53 |                 timezone_id=self.config.timezone_id,
 54 |             )
 55 | 
 56 |         if self.config.trace_path:
 57 |             await context.tracing.start(screenshots=True, snapshots=True, sources=True)
 58 | 
 59 |         # Load cookies if they exist
 60 |         if self.config.cookies_file and os.path.exists(self.config.cookies_file):
 61 |             with open(self.config.cookies_file, 'r') as f:
 62 |                 try:
 63 |                     cookies = json.load(f)
 64 | 
 65 |                     valid_same_site_values = ['Strict', 'Lax', 'None']
 66 |                     for cookie in cookies:
 67 |                         if 'sameSite' in cookie:
 68 |                             if cookie['sameSite'] not in valid_same_site_values:
 69 |                                 logger.warning(
 70 |                                     f"Fixed invalid sameSite value '{cookie['sameSite']}' to 'None' for cookie {cookie.get('name')}"
 71 |                                 )
 72 |                                 cookie['sameSite'] = 'None'
 73 |                     logger.info(f'🍪  Loaded {len(cookies)} cookies from {self.config.cookies_file}')
 74 |                     await context.add_cookies(cookies)
 75 | 
 76 |                 except json.JSONDecodeError as e:
 77 |                     logger.error(f'Failed to parse cookies file: {str(e)}')
 78 | 
 79 |         # Expose anti-detection scripts
 80 |         await context.add_init_script(
 81 |             """
 82 |             // Webdriver property
 83 |             Object.defineProperty(navigator, 'webdriver', {
 84 |                 get: () => undefined
 85 |             });
 86 | 
 87 |             // Languages
 88 |             Object.defineProperty(navigator, 'languages', {
 89 |                 get: () => ['en-US']
 90 |             });
 91 | 
 92 |             // Plugins
 93 |             Object.defineProperty(navigator, 'plugins', {
 94 |                 get: () => [1, 2, 3, 4, 5]
 95 |             });
 96 | 
 97 |             // Chrome runtime
 98 |             window.chrome = { runtime: {} };
 99 | 
100 |             // Permissions
101 |             const originalQuery = window.navigator.permissions.query;
102 |             window.navigator.permissions.query = (parameters) => (
103 |                 parameters.name === 'notifications' ?
104 |                     Promise.resolve({ state: Notification.permission }) :
105 |                     originalQuery(parameters)
106 |             );
107 |             (function () {
108 |                 const originalAttachShadow = Element.prototype.attachShadow;
109 |                 Element.prototype.attachShadow = function attachShadow(options) {
110 |                     return originalAttachShadow.call(this, { ...options, mode: "open" });
111 |                 };
112 |             })();
113 |             """
114 |         )
115 | 
116 |         return context
117 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/browser/custom_browser.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import gc
  3 | import pdb
  4 | 
  5 | from playwright.async_api import Browser as PlaywrightBrowser
  6 | from playwright.async_api import (
  7 |     BrowserContext as PlaywrightBrowserContext,
  8 | )
  9 | from playwright.async_api import (
 10 |     Playwright,
 11 |     async_playwright,
 12 | )
 13 | from browser_use.browser.browser import Browser, IN_DOCKER
 14 | from browser_use.browser.context import BrowserContext, BrowserContextConfig
 15 | from playwright.async_api import BrowserContext as PlaywrightBrowserContext
 16 | import logging
 17 | 
 18 | from browser_use.browser.chrome import (
 19 |     CHROME_ARGS,
 20 |     CHROME_DETERMINISTIC_RENDERING_ARGS,
 21 |     CHROME_DISABLE_SECURITY_ARGS,
 22 |     CHROME_DOCKER_ARGS,
 23 |     CHROME_HEADLESS_ARGS,
 24 | )
 25 | from browser_use.browser.context import BrowserContext, BrowserContextConfig
 26 | from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
 27 | from browser_use.utils import time_execution_async
 28 | import socket
 29 | 
 30 | from .custom_context import CustomBrowserContext, CustomBrowserContextConfig
 31 | 
 32 | logger = logging.getLogger(__name__)
 33 | 
 34 | 
 35 | class CustomBrowser(Browser):
 36 | 
 37 |     async def new_context(self, config: CustomBrowserContextConfig | None = None) -> CustomBrowserContext:
 38 |         """Create a browser context"""
 39 |         browser_config = self.config.model_dump() if self.config else {}
 40 |         context_config = config.model_dump() if config else {}
 41 |         merged_config = {**browser_config, **context_config}
 42 |         return CustomBrowserContext(config=CustomBrowserContextConfig(**merged_config), browser=self)
 43 | 
 44 |     async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
 45 |         """Sets up and returns a Playwright Browser instance with anti-detection measures."""
 46 |         assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
 47 | 
 48 |         if self.config.headless:
 49 |             screen_size = {'width': 1920, 'height': 1080}
 50 |             offset_x, offset_y = 0, 0
 51 |         else:
 52 |             screen_size = get_screen_resolution()
 53 |             offset_x, offset_y = get_window_adjustments()
 54 | 
 55 |         chrome_args = {
 56 |             *CHROME_ARGS,
 57 |             *(CHROME_DOCKER_ARGS if IN_DOCKER else []),
 58 |             *(CHROME_HEADLESS_ARGS if self.config.headless else []),
 59 |             *(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
 60 |             *(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
 61 |             f'--window-position={offset_x},{offset_y}',
 62 |             *self.config.extra_browser_args,
 63 |         }
 64 |         contain_window_size = False
 65 |         for arg in self.config.extra_browser_args:
 66 |             if "--window-size" in arg:
 67 |                 contain_window_size = True
 68 |                 break
 69 |         if not contain_window_size:
 70 |             chrome_args.add(f'--window-size={screen_size["width"]},{screen_size["height"]}')
 71 | 
 72 |         # check if port 9222 is already taken, if so remove the remote-debugging-port arg to prevent conflicts
 73 |         with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
 74 |             if s.connect_ex(('localhost', 9222)) == 0:
 75 |                 chrome_args.remove('--remote-debugging-port=9222')
 76 | 
 77 |         browser_class = getattr(playwright, self.config.browser_class)
 78 |         args = {
 79 |             'chromium': list(chrome_args),
 80 |             'firefox': [
 81 |                 *{
 82 |                     '-no-remote',
 83 |                     *self.config.extra_browser_args,
 84 |                 }
 85 |             ],
 86 |             'webkit': [
 87 |                 *{
 88 |                     '--no-startup-window',
 89 |                     *self.config.extra_browser_args,
 90 |                 }
 91 |             ],
 92 |         }
 93 | 
 94 |         browser = await browser_class.launch(
 95 |             headless=self.config.headless,
 96 |             args=args[self.config.browser_class],
 97 |             proxy=self.config.proxy.model_dump() if self.config.proxy else None,
 98 |             handle_sigterm=False,
 99 |             handle_sigint=False,
100 |         )
101 |         return browser
102 | 
103 |     async def _close_without_httpxclients(self):
104 |         if self.config.keep_alive:
105 |             return
106 | 
107 |         try:
108 |             if self.playwright_browser:
109 |                 await self.playwright_browser.close()
110 |                 del self.playwright_browser
111 |             if self.playwright:
112 |                 await self.playwright.stop()
113 |                 del self.playwright
114 |             if chrome_proc := getattr(self, '_chrome_subprocess', None):
115 |                 try:
116 |                     # always kill all children processes, otherwise chrome leaves a bunch of zombie processes
117 |                     for proc in chrome_proc.children(recursive=True):
118 |                         proc.kill()
119 |                     chrome_proc.kill()
120 |                 except Exception as e:
121 |                     logger.debug(f'Failed to terminate chrome subprocess: {e}')
122 | 
123 |         except Exception as e:
124 |             logger.debug(f'Failed to close browser properly: {e}')
125 | 
126 |         finally:
127 |             self.playwright_browser = None
128 |             self.playwright = None
129 |             self._chrome_subprocess = None
130 |             gc.collect()
131 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/agent/browser_use/browser_use_agent.py:
--------------------------------------------------------------------------------

```python
  1 | from __future__ import annotations
  2 | 
  3 | import asyncio
  4 | import gc
  5 | import inspect
  6 | import json
  7 | import logging
  8 | import os
  9 | import re
 10 | import time
 11 | from pathlib import Path
 12 | from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, TypeVar, Union
 13 | 
 14 | from dotenv import load_dotenv
 15 | from langchain_core.language_models.chat_models import BaseChatModel
 16 | from langchain_core.messages import (
 17 |     BaseMessage,
 18 |     HumanMessage,
 19 |     SystemMessage,
 20 | )
 21 | 
 22 | # from lmnr.sdk.decorators import observe
 23 | from pydantic import BaseModel, ValidationError
 24 | 
 25 | from browser_use.agent.gif import create_history_gif
 26 | from browser_use.agent.memory.service import Memory, MemorySettings
 27 | from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings
 28 | from browser_use.agent.message_manager.utils import convert_input_messages, extract_json_from_model_output, save_conversation
 29 | from browser_use.agent.prompts import AgentMessagePrompt, PlannerPrompt, SystemPrompt
 30 | from browser_use.agent.views import (
 31 |     REQUIRED_LLM_API_ENV_VARS,
 32 |     ActionResult,
 33 |     AgentError,
 34 |     AgentHistory,
 35 |     AgentHistoryList,
 36 |     AgentOutput,
 37 |     AgentSettings,
 38 |     AgentState,
 39 |     AgentStepInfo,
 40 |     StepMetadata,
 41 |     ToolCallingMethod,
 42 | )
 43 | from browser_use.browser.browser import Browser
 44 | from browser_use.browser.context import BrowserContext
 45 | from browser_use.browser.views import BrowserState, BrowserStateHistory
 46 | from browser_use.controller.registry.views import ActionModel
 47 | from browser_use.controller.service import Controller
 48 | from browser_use.dom.history_tree_processor.service import (
 49 |     DOMHistoryElement,
 50 |     HistoryTreeProcessor,
 51 | )
 52 | from browser_use.exceptions import LLMException
 53 | from browser_use.telemetry.service import ProductTelemetry
 54 | from browser_use.telemetry.views import (
 55 |     AgentEndTelemetryEvent,
 56 |     AgentRunTelemetryEvent,
 57 |     AgentStepTelemetryEvent,
 58 | )
 59 | from browser_use.utils import check_env_variables, time_execution_async, time_execution_sync
 60 | from browser_use.agent.service import Agent, AgentHookFunc
 61 | 
 62 | load_dotenv()
 63 | logger = logging.getLogger(__name__)
 64 | 
 65 | SKIP_LLM_API_KEY_VERIFICATION = os.environ.get('SKIP_LLM_API_KEY_VERIFICATION', 'false').lower()[0] in 'ty1'
 66 | 
 67 | 
 68 | class BrowserUseAgent(Agent):
 69 |     @time_execution_async('--run (agent)')
 70 |     async def run(
 71 |             self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
 72 |             on_step_end: AgentHookFunc | None = None
 73 |     ) -> AgentHistoryList:
 74 |         """Execute the task with maximum number of steps"""
 75 | 
 76 |         loop = asyncio.get_event_loop()
 77 | 
 78 |         # Set up the Ctrl+C signal handler with callbacks specific to this agent
 79 |         from browser_use.utils import SignalHandler
 80 | 
 81 |         signal_handler = SignalHandler(
 82 |             loop=loop,
 83 |             pause_callback=self.pause,
 84 |             resume_callback=self.resume,
 85 |             custom_exit_callback=None,  # No special cleanup needed on forced exit
 86 |             exit_on_second_int=True,
 87 |         )
 88 |         signal_handler.register()
 89 | 
 90 |         # Wait for verification task to complete if it exists
 91 |         if hasattr(self, '_verification_task') and self._verification_task and not self._verification_task.done():
 92 |             try:
 93 |                 await self._verification_task
 94 |             except Exception:
 95 |                 # Error already logged in the task
 96 |                 pass
 97 | 
 98 |         try:
 99 |             self._log_agent_run()
100 | 
101 |             # Execute initial actions if provided
102 |             if self.initial_actions:
103 |                 result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
104 |                 self.state.last_result = result
105 | 
106 |             for step in range(max_steps):
107 |                 # Check if waiting for user input after Ctrl+C
108 |                 while self.state.paused:
109 |                     await asyncio.sleep(0.5)
110 |                     if self.state.stopped:
111 |                         break
112 | 
113 |                 # Check if we should stop due to too many failures
114 |                 if self.state.consecutive_failures >= self.settings.max_failures:
115 |                     logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
116 |                     break
117 | 
118 |                 # Check control flags before each step
119 |                 if self.state.stopped:
120 |                     logger.info('Agent stopped')
121 |                     break
122 | 
123 |                 while self.state.paused:
124 |                     await asyncio.sleep(0.2)  # Small delay to prevent CPU spinning
125 |                     if self.state.stopped:  # Allow stopping while paused
126 |                         break
127 | 
128 |                 if on_step_start is not None:
129 |                     await on_step_start(self)
130 | 
131 |                 step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
132 |                 await self.step(step_info)
133 | 
134 |                 if on_step_end is not None:
135 |                     await on_step_end(self)
136 | 
137 |                 if self.state.history.is_done():
138 |                     if self.settings.validate_output and step < max_steps - 1:
139 |                         if not await self._validate_output():
140 |                             continue
141 | 
142 |                     await self.log_completion()
143 |                     break
144 |             else:
145 |                 logger.info('❌ Failed to complete task in maximum steps')
146 | 
147 |             return self.state.history
148 | 
149 |         except KeyboardInterrupt:
150 |             # Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
151 |             logger.info('Got KeyboardInterrupt during execution, returning current history')
152 |             return self.state.history
153 | 
154 |         finally:
155 |             # Unregister signal handlers before cleanup
156 |             signal_handler.unregister()
157 | 
158 |             self.telemetry.capture(
159 |                 AgentEndTelemetryEvent(
160 |                     agent_id=self.state.agent_id,
161 |                     is_done=self.state.history.is_done(),
162 |                     success=self.state.history.is_successful(),
163 |                     steps=self.state.n_steps,
164 |                     max_steps_reached=self.state.n_steps >= max_steps,
165 |                     errors=self.state.history.errors(),
166 |                     total_input_tokens=self.state.history.total_input_tokens(),
167 |                     total_duration_seconds=self.state.history.total_duration_seconds(),
168 |                 )
169 |             )
170 | 
171 |             await self.close()
172 | 
173 |             if self.settings.generate_gif:
174 |                 output_path: str = 'agent_history.gif'
175 |                 if isinstance(self.settings.generate_gif, str):
176 |                     output_path = self.settings.generate_gif
177 | 
178 |                 create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/controller/custom_controller.py:
--------------------------------------------------------------------------------

```python
  1 | import pdb
  2 | 
  3 | import pyperclip
  4 | from typing import Optional, Type, Callable, Dict, Any, Union, Awaitable, TypeVar
  5 | from pydantic import BaseModel
  6 | from browser_use.agent.views import ActionResult
  7 | from browser_use.browser.context import BrowserContext
  8 | from browser_use.controller.service import Controller, DoneAction
  9 | from browser_use.controller.registry.service import Registry, RegisteredAction
 10 | from main_content_extractor import MainContentExtractor
 11 | from browser_use.controller.views import (
 12 |     ClickElementAction,
 13 |     DoneAction,
 14 |     ExtractPageContentAction,
 15 |     GoToUrlAction,
 16 |     InputTextAction,
 17 |     OpenTabAction,
 18 |     ScrollAction,
 19 |     SearchGoogleAction,
 20 |     SendKeysAction,
 21 |     SwitchTabAction,
 22 | )
 23 | import logging
 24 | import inspect
 25 | import asyncio
 26 | import os
 27 | from langchain_core.language_models.chat_models import BaseChatModel
 28 | from browser_use.agent.views import ActionModel, ActionResult
 29 | 
 30 | from ..utils.mcp_client import create_tool_param_model, setup_mcp_client_and_tools
 31 | 
 32 | from browser_use.utils import time_execution_sync
 33 | 
 34 | logger = logging.getLogger(__name__)
 35 | 
 36 | Context = TypeVar('Context')
 37 | 
 38 | 
 39 | class CustomController(Controller):
 40 |     def __init__(self, exclude_actions: list[str] = [],
 41 |                  output_model: Optional[Type[BaseModel]] = None,
 42 |                  ask_assistant_callback: Optional[Union[Callable[[str, BrowserContext], Dict[str, Any]], Callable[
 43 |                      [str, BrowserContext], Awaitable[Dict[str, Any]]]]] = None,
 44 |                  ):
 45 |         super().__init__(exclude_actions=exclude_actions, output_model=output_model)
 46 |         self._register_custom_actions()
 47 |         self.ask_assistant_callback = ask_assistant_callback
 48 |         self.mcp_client = None
 49 |         self.mcp_server_config = None
 50 | 
 51 |     def _register_custom_actions(self):
 52 |         """Register all custom browser actions"""
 53 | 
 54 |         @self.registry.action(
 55 |             "When executing tasks, prioritize autonomous completion. However, if you encounter a definitive blocker "
 56 |             "that prevents you from proceeding independently – such as needing credentials you don't possess, "
 57 |             "requiring subjective human judgment, needing a physical action performed, encountering complex CAPTCHAs, "
 58 |             "or facing limitations in your capabilities – you must request human assistance."
 59 |         )
 60 |         async def ask_for_assistant(query: str, browser: BrowserContext):
 61 |             if self.ask_assistant_callback:
 62 |                 if inspect.iscoroutinefunction(self.ask_assistant_callback):
 63 |                     user_response = await self.ask_assistant_callback(query, browser)
 64 |                 else:
 65 |                     user_response = self.ask_assistant_callback(query, browser)
 66 |                 msg = f"AI ask: {query}. User response: {user_response['response']}"
 67 |                 logger.info(msg)
 68 |                 return ActionResult(extracted_content=msg, include_in_memory=True)
 69 |             else:
 70 |                 return ActionResult(extracted_content="Human cannot help you. Please try another way.",
 71 |                                     include_in_memory=True)
 72 | 
 73 |         @self.registry.action(
 74 |             'Upload file to interactive element with file path ',
 75 |         )
 76 |         async def upload_file(index: int, path: str, browser: BrowserContext, available_file_paths: list[str]):
 77 |             if path not in available_file_paths:
 78 |                 return ActionResult(error=f'File path {path} is not available')
 79 | 
 80 |             if not os.path.exists(path):
 81 |                 return ActionResult(error=f'File {path} does not exist')
 82 | 
 83 |             dom_el = await browser.get_dom_element_by_index(index)
 84 | 
 85 |             file_upload_dom_el = dom_el.get_file_upload_element()
 86 | 
 87 |             if file_upload_dom_el is None:
 88 |                 msg = f'No file upload element found at index {index}'
 89 |                 logger.info(msg)
 90 |                 return ActionResult(error=msg)
 91 | 
 92 |             file_upload_el = await browser.get_locate_element(file_upload_dom_el)
 93 | 
 94 |             if file_upload_el is None:
 95 |                 msg = f'No file upload element found at index {index}'
 96 |                 logger.info(msg)
 97 |                 return ActionResult(error=msg)
 98 | 
 99 |             try:
100 |                 await file_upload_el.set_input_files(path)
101 |                 msg = f'Successfully uploaded file to index {index}'
102 |                 logger.info(msg)
103 |                 return ActionResult(extracted_content=msg, include_in_memory=True)
104 |             except Exception as e:
105 |                 msg = f'Failed to upload file to index {index}: {str(e)}'
106 |                 logger.info(msg)
107 |                 return ActionResult(error=msg)
108 | 
109 |     @time_execution_sync('--act')
110 |     async def act(
111 |             self,
112 |             action: ActionModel,
113 |             browser_context: Optional[BrowserContext] = None,
114 |             #
115 |             page_extraction_llm: Optional[BaseChatModel] = None,
116 |             sensitive_data: Optional[Dict[str, str]] = None,
117 |             available_file_paths: Optional[list[str]] = None,
118 |             #
119 |             context: Context | None = None,
120 |     ) -> ActionResult:
121 |         """Execute an action"""
122 | 
123 |         try:
124 |             for action_name, params in action.model_dump(exclude_unset=True).items():
125 |                 if params is not None:
126 |                     if action_name.startswith("mcp"):
127 |                         # this is a mcp tool
128 |                         logger.debug(f"Invoke MCP tool: {action_name}")
129 |                         mcp_tool = self.registry.registry.actions.get(action_name).function
130 |                         result = await mcp_tool.ainvoke(params)
131 |                     else:
132 |                         result = await self.registry.execute_action(
133 |                             action_name,
134 |                             params,
135 |                             browser=browser_context,
136 |                             page_extraction_llm=page_extraction_llm,
137 |                             sensitive_data=sensitive_data,
138 |                             available_file_paths=available_file_paths,
139 |                             context=context,
140 |                         )
141 | 
142 |                     if isinstance(result, str):
143 |                         return ActionResult(extracted_content=result)
144 |                     elif isinstance(result, ActionResult):
145 |                         return result
146 |                     elif result is None:
147 |                         return ActionResult()
148 |                     else:
149 |                         raise ValueError(f'Invalid action result type: {type(result)} of {result}')
150 |             return ActionResult()
151 |         except Exception as e:
152 |             raise e
153 | 
154 |     async def setup_mcp_client(self, mcp_server_config: Optional[Dict[str, Any]] = None):
155 |         self.mcp_server_config = mcp_server_config
156 |         if self.mcp_server_config:
157 |             self.mcp_client = await setup_mcp_client_and_tools(self.mcp_server_config)
158 |             self.register_mcp_tools()
159 | 
160 |     def register_mcp_tools(self):
161 |         """
162 |         Register the MCP tools used by this controller.
163 |         """
164 |         if self.mcp_client:
165 |             for server_name in self.mcp_client.server_name_to_tools:
166 |                 for tool in self.mcp_client.server_name_to_tools[server_name]:
167 |                     tool_name = f"mcp.{server_name}.{tool.name}"
168 |                     self.registry.registry.actions[tool_name] = RegisteredAction(
169 |                         name=tool_name,
170 |                         description=tool.description,
171 |                         function=tool,
172 |                         param_model=create_tool_param_model(tool),
173 |                     )
174 |                     logger.info(f"Add mcp tool: {tool_name}")
175 | 
176 |     async def close_mcp_client(self):
177 |         if self.mcp_client:
178 |             await self.mcp_client.__aexit__(None, None, None)
179 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/utils/mcp_client.py:
--------------------------------------------------------------------------------

```python
  1 | import os
  2 | import asyncio
  3 | import base64
  4 | import pdb
  5 | from typing import List, Tuple, Optional
  6 | from langchain_core.tools import BaseTool
  7 | from langchain_mcp_adapters.client import MultiServerMCPClient
  8 | import base64
  9 | import json
 10 | import logging
 11 | from typing import Optional, Dict, Any, Type
 12 | from langchain_core.tools import BaseTool
 13 | from pydantic.v1 import BaseModel, Field
 14 | from langchain_core.runnables import RunnableConfig
 15 | from pydantic import BaseModel, Field, create_model
 16 | from typing import Type, Dict, Any, Optional, get_type_hints, List, Union, Annotated, Set
 17 | from pydantic import BaseModel, ConfigDict, create_model, Field
 18 | from langchain.tools import BaseTool
 19 | import inspect
 20 | from datetime import datetime, date, time
 21 | import uuid
 22 | from enum import Enum
 23 | import inspect
 24 | from browser_use.controller.registry.views import ActionModel
 25 | from typing import Type, Dict, Any, Optional, get_type_hints
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | async def setup_mcp_client_and_tools(mcp_server_config: Dict[str, Any]) -> Optional[MultiServerMCPClient]:
 31 |     """
 32 |     Initializes the MultiServerMCPClient, connects to servers, fetches tools,
 33 |     filters them, and returns a flat list of usable tools and the client instance.
 34 | 
 35 |     Returns:
 36 |         A tuple containing:
 37 |         - list[BaseTool]: The filtered list of usable LangChain tools.
 38 |         - MultiServerMCPClient | None: The initialized and started client instance, or None on failure.
 39 |     """
 40 | 
 41 |     logger.info("Initializing MultiServerMCPClient...")
 42 | 
 43 |     if not mcp_server_config:
 44 |         logger.error("No MCP server configuration provided.")
 45 |         return None
 46 | 
 47 |     try:
 48 |         if "mcpServers" in mcp_server_config:
 49 |             mcp_server_config = mcp_server_config["mcpServers"]
 50 |         client = MultiServerMCPClient(mcp_server_config)
 51 |         await client.__aenter__()
 52 |         return client
 53 | 
 54 |     except Exception as e:
 55 |         logger.error(f"Failed to setup MCP client or fetch tools: {e}", exc_info=True)
 56 |         return None
 57 | 
 58 | 
 59 | def create_tool_param_model(tool: BaseTool) -> Type[BaseModel]:
 60 |     """Creates a Pydantic model from a LangChain tool's schema"""
 61 | 
 62 |     # Get tool schema information
 63 |     json_schema = tool.args_schema
 64 |     tool_name = tool.name
 65 | 
 66 |     # If the tool already has a schema defined, convert it to a new param_model
 67 |     if json_schema is not None:
 68 | 
 69 |         # Create new parameter model
 70 |         params = {}
 71 | 
 72 |         # Process properties if they exist
 73 |         if 'properties' in json_schema:
 74 |             # Find required fields
 75 |             required_fields: Set[str] = set(json_schema.get('required', []))
 76 | 
 77 |             for prop_name, prop_details in json_schema['properties'].items():
 78 |                 field_type = resolve_type(prop_details, f"{tool_name}_{prop_name}")
 79 | 
 80 |                 # Check if parameter is required
 81 |                 is_required = prop_name in required_fields
 82 | 
 83 |                 # Get default value and description
 84 |                 default_value = prop_details.get('default', ... if is_required else None)
 85 |                 description = prop_details.get('description', '')
 86 | 
 87 |                 # Add field constraints
 88 |                 field_kwargs = {'default': default_value}
 89 |                 if description:
 90 |                     field_kwargs['description'] = description
 91 | 
 92 |                 # Add additional constraints if present
 93 |                 if 'minimum' in prop_details:
 94 |                     field_kwargs['ge'] = prop_details['minimum']
 95 |                 if 'maximum' in prop_details:
 96 |                     field_kwargs['le'] = prop_details['maximum']
 97 |                 if 'minLength' in prop_details:
 98 |                     field_kwargs['min_length'] = prop_details['minLength']
 99 |                 if 'maxLength' in prop_details:
100 |                     field_kwargs['max_length'] = prop_details['maxLength']
101 |                 if 'pattern' in prop_details:
102 |                     field_kwargs['pattern'] = prop_details['pattern']
103 | 
104 |                 # Add to parameters dictionary
105 |                 params[prop_name] = (field_type, Field(**field_kwargs))
106 | 
107 |         return create_model(
108 |             f'{tool_name}_parameters',
109 |             __base__=ActionModel,
110 |             **params,  # type: ignore
111 |         )
112 | 
113 |     # If no schema is defined, extract parameters from the _run method
114 |     run_method = tool._run
115 |     sig = inspect.signature(run_method)
116 | 
117 |     # Get type hints for better type information
118 |     try:
119 |         type_hints = get_type_hints(run_method)
120 |     except Exception:
121 |         type_hints = {}
122 | 
123 |     params = {}
124 |     for name, param in sig.parameters.items():
125 |         # Skip 'self' parameter and any other parameters you want to exclude
126 |         if name == 'self':
127 |             continue
128 | 
129 |         # Get annotation from type hints if available, otherwise from signature
130 |         annotation = type_hints.get(name, param.annotation)
131 |         if annotation == inspect.Parameter.empty:
132 |             annotation = Any
133 | 
134 |         # Use default value if available, otherwise make it required
135 |         if param.default != param.empty:
136 |             params[name] = (annotation, param.default)
137 |         else:
138 |             params[name] = (annotation, ...)
139 | 
140 |     return create_model(
141 |         f'{tool_name}_parameters',
142 |         __base__=ActionModel,
143 |         **params,  # type: ignore
144 |     )
145 | 
146 | 
147 | def resolve_type(prop_details: Dict[str, Any], prefix: str = "") -> Any:
148 |     """Recursively resolves JSON schema type to Python/Pydantic type"""
149 | 
150 |     # Handle reference types
151 |     if '$ref' in prop_details:
152 |         # In a real application, reference resolution would be needed
153 |         return Any
154 | 
155 |     # Basic type mapping
156 |     type_mapping = {
157 |         'string': str,
158 |         'integer': int,
159 |         'number': float,
160 |         'boolean': bool,
161 |         'array': List,
162 |         'object': Dict,
163 |         'null': type(None),
164 |     }
165 | 
166 |     # Handle formatted strings
167 |     if prop_details.get('type') == 'string' and 'format' in prop_details:
168 |         format_mapping = {
169 |             'date-time': datetime,
170 |             'date': date,
171 |             'time': time,
172 |             'email': str,
173 |             'uri': str,
174 |             'url': str,
175 |             'uuid': uuid.UUID,
176 |             'binary': bytes,
177 |         }
178 |         return format_mapping.get(prop_details['format'], str)
179 | 
180 |     # Handle enum types
181 |     if 'enum' in prop_details:
182 |         enum_values = prop_details['enum']
183 |         # Create dynamic enum class with safe names
184 |         enum_dict = {}
185 |         for i, v in enumerate(enum_values):
186 |             # Ensure enum names are valid Python identifiers
187 |             if isinstance(v, str):
188 |                 key = v.upper().replace(' ', '_').replace('-', '_')
189 |                 if not key.isidentifier():
190 |                     key = f"VALUE_{i}"
191 |             else:
192 |                 key = f"VALUE_{i}"
193 |             enum_dict[key] = v
194 | 
195 |         # Only create enum if we have values
196 |         if enum_dict:
197 |             return Enum(f"{prefix}_Enum", enum_dict)
198 |         return str  # Fallback
199 | 
200 |     # Handle array types
201 |     if prop_details.get('type') == 'array' and 'items' in prop_details:
202 |         item_type = resolve_type(prop_details['items'], f"{prefix}_item")
203 |         return List[item_type]  # type: ignore
204 | 
205 |     # Handle object types with properties
206 |     if prop_details.get('type') == 'object' and 'properties' in prop_details:
207 |         nested_params = {}
208 |         for nested_name, nested_details in prop_details['properties'].items():
209 |             nested_type = resolve_type(nested_details, f"{prefix}_{nested_name}")
210 |             # Get required field info
211 |             required_fields = prop_details.get('required', [])
212 |             is_required = nested_name in required_fields
213 |             default_value = nested_details.get('default', ... if is_required else None)
214 |             description = nested_details.get('description', '')
215 | 
216 |             field_kwargs = {'default': default_value}
217 |             if description:
218 |                 field_kwargs['description'] = description
219 | 
220 |             nested_params[nested_name] = (nested_type, Field(**field_kwargs))
221 | 
222 |         # Create nested model
223 |         nested_model = create_model(f"{prefix}_Model", **nested_params)
224 |         return nested_model
225 | 
226 |     # Handle union types (oneOf, anyOf)
227 |     if 'oneOf' in prop_details or 'anyOf' in prop_details:
228 |         union_schema = prop_details.get('oneOf') or prop_details.get('anyOf')
229 |         union_types = []
230 |         for i, t in enumerate(union_schema):
231 |             union_types.append(resolve_type(t, f"{prefix}_{i}"))
232 | 
233 |         if union_types:
234 |             return Union.__getitem__(tuple(union_types))  # type: ignore
235 |         return Any
236 | 
237 |     # Handle allOf (intersection types)
238 |     if 'allOf' in prop_details:
239 |         nested_params = {}
240 |         for i, schema_part in enumerate(prop_details['allOf']):
241 |             if 'properties' in schema_part:
242 |                 for nested_name, nested_details in schema_part['properties'].items():
243 |                     nested_type = resolve_type(nested_details, f"{prefix}_allOf_{i}_{nested_name}")
244 |                     # Check if required
245 |                     required_fields = schema_part.get('required', [])
246 |                     is_required = nested_name in required_fields
247 |                     nested_params[nested_name] = (nested_type, ... if is_required else None)
248 | 
249 |         # Create composite model
250 |         if nested_params:
251 |             composite_model = create_model(f"{prefix}_CompositeModel", **nested_params)
252 |             return composite_model
253 |         return Dict
254 | 
255 |     # Default to basic types
256 |     schema_type = prop_details.get('type', 'string')
257 |     if isinstance(schema_type, list):
258 |         # Handle multiple types (e.g., ["string", "null"])
259 |         non_null_types = [t for t in schema_type if t != 'null']
260 |         if non_null_types:
261 |             primary_type = type_mapping.get(non_null_types[0], Any)
262 |             if 'null' in schema_type:
263 |                 return Optional[primary_type]  # type: ignore
264 |             return primary_type
265 |         return Any
266 | 
267 |     return type_mapping.get(schema_type, Any)
268 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/_internal/utils/llm_provider.py:
--------------------------------------------------------------------------------

```python
  1 | from openai import OpenAI
  2 | import pdb
  3 | from langchain_openai import ChatOpenAI
  4 | from langchain_core.globals import get_llm_cache
  5 | from langchain_core.language_models.base import (
  6 |     BaseLanguageModel,
  7 |     LangSmithParams,
  8 |     LanguageModelInput,
  9 | )
 10 | import os
 11 | from langchain_core.load import dumpd, dumps
 12 | from langchain_core.messages import (
 13 |     AIMessage,
 14 |     SystemMessage,
 15 |     AnyMessage,
 16 |     BaseMessage,
 17 |     BaseMessageChunk,
 18 |     HumanMessage,
 19 |     convert_to_messages,
 20 |     message_chunk_to_message,
 21 | )
 22 | from langchain_core.outputs import (
 23 |     ChatGeneration,
 24 |     ChatGenerationChunk,
 25 |     ChatResult,
 26 |     LLMResult,
 27 |     RunInfo,
 28 | )
 29 | from langchain_ollama import ChatOllama
 30 | from langchain_core.output_parsers.base import OutputParserLike
 31 | from langchain_core.runnables import Runnable, RunnableConfig
 32 | from langchain_core.tools import BaseTool
 33 | 
 34 | from typing import (
 35 |     TYPE_CHECKING,
 36 |     Any,
 37 |     Callable,
 38 |     Literal,
 39 |     Optional,
 40 |     Union,
 41 |     cast, List,
 42 | )
 43 | from langchain_anthropic import ChatAnthropic
 44 | from langchain_mistralai import ChatMistralAI
 45 | from langchain_google_genai import ChatGoogleGenerativeAI
 46 | from langchain_ollama import ChatOllama
 47 | from langchain_openai import AzureChatOpenAI, ChatOpenAI
 48 | from langchain_ibm import ChatWatsonx
 49 | from langchain_aws import ChatBedrock
 50 | from pydantic import SecretStr
 51 | 
 52 | from ..utils import config
 53 | 
 54 | 
 55 | class DeepSeekR1ChatOpenAI(ChatOpenAI):
 56 | 
 57 |     def __init__(self, *args: Any, **kwargs: Any) -> None:
 58 |         super().__init__(*args, **kwargs)
 59 |         self.client = OpenAI(
 60 |             base_url=kwargs.get("base_url"),
 61 |             api_key=kwargs.get("api_key")
 62 |         )
 63 | 
 64 |     async def ainvoke(
 65 |             self,
 66 |             input: LanguageModelInput,
 67 |             config: Optional[RunnableConfig] = None,
 68 |             *,
 69 |             stop: Optional[list[str]] = None,
 70 |             **kwargs: Any,
 71 |     ) -> AIMessage:
 72 |         message_history = []
 73 |         for input_ in input:
 74 |             if isinstance(input_, SystemMessage):
 75 |                 message_history.append({"role": "system", "content": input_.content})
 76 |             elif isinstance(input_, AIMessage):
 77 |                 message_history.append({"role": "assistant", "content": input_.content})
 78 |             else:
 79 |                 message_history.append({"role": "user", "content": input_.content})
 80 | 
 81 |         response = self.client.chat.completions.create(
 82 |             model=self.model_name,
 83 |             messages=message_history
 84 |         )
 85 | 
 86 |         reasoning_content = response.choices[0].message.reasoning_content
 87 |         content = response.choices[0].message.content
 88 |         return AIMessage(content=content, reasoning_content=reasoning_content)
 89 | 
 90 |     def invoke(
 91 |             self,
 92 |             input: LanguageModelInput,
 93 |             config: Optional[RunnableConfig] = None,
 94 |             *,
 95 |             stop: Optional[list[str]] = None,
 96 |             **kwargs: Any,
 97 |     ) -> AIMessage:
 98 |         message_history = []
 99 |         for input_ in input:
100 |             if isinstance(input_, SystemMessage):
101 |                 message_history.append({"role": "system", "content": input_.content})
102 |             elif isinstance(input_, AIMessage):
103 |                 message_history.append({"role": "assistant", "content": input_.content})
104 |             else:
105 |                 message_history.append({"role": "user", "content": input_.content})
106 | 
107 |         response = self.client.chat.completions.create(
108 |             model=self.model_name,
109 |             messages=message_history
110 |         )
111 | 
112 |         reasoning_content = response.choices[0].message.reasoning_content
113 |         content = response.choices[0].message.content
114 |         return AIMessage(content=content, reasoning_content=reasoning_content)
115 | 
116 | 
117 | class DeepSeekR1ChatOllama(ChatOllama):
118 | 
119 |     async def ainvoke(
120 |             self,
121 |             input: LanguageModelInput,
122 |             config: Optional[RunnableConfig] = None,
123 |             *,
124 |             stop: Optional[list[str]] = None,
125 |             **kwargs: Any,
126 |     ) -> AIMessage:
127 |         org_ai_message = await super().ainvoke(input=input)
128 |         org_content = org_ai_message.content
129 |         reasoning_content = org_content.split("</think>")[0].replace("<think>", "")
130 |         content = org_content.split("</think>")[1]
131 |         if "**JSON Response:**" in content:
132 |             content = content.split("**JSON Response:**")[-1]
133 |         return AIMessage(content=content, reasoning_content=reasoning_content)
134 | 
135 |     def invoke(
136 |             self,
137 |             input: LanguageModelInput,
138 |             config: Optional[RunnableConfig] = None,
139 |             *,
140 |             stop: Optional[list[str]] = None,
141 |             **kwargs: Any,
142 |     ) -> AIMessage:
143 |         org_ai_message = super().invoke(input=input)
144 |         org_content = org_ai_message.content
145 |         reasoning_content = org_content.split("</think>")[0].replace("<think>", "")
146 |         content = org_content.split("</think>")[1]
147 |         if "**JSON Response:**" in content:
148 |             content = content.split("**JSON Response:**")[-1]
149 |         return AIMessage(content=content, reasoning_content=reasoning_content)
150 | 
151 | 
152 | def get_llm_model(provider: str, **kwargs):
153 |     """
154 |     Get LLM model
155 |     :param provider: LLM provider
156 |     :param kwargs:
157 |     :return:
158 |     """
159 |     if provider not in ["ollama", "bedrock"]:
160 |         env_var = f"{provider.upper()}_API_KEY"
161 |         api_key = kwargs.get("api_key", "") or os.getenv(env_var, "")
162 |         if not api_key:
163 |             provider_display = config.PROVIDER_DISPLAY_NAMES.get(provider, provider.upper())
164 |             error_msg = f"💥 {provider_display} API key not found! 🔑 Please set the `{env_var}` environment variable or provide it in the UI."
165 |             raise ValueError(error_msg)
166 |         kwargs["api_key"] = api_key
167 | 
168 |     if provider == "anthropic":
169 |         if not kwargs.get("base_url", ""):
170 |             base_url = "https://api.anthropic.com"
171 |         else:
172 |             base_url = kwargs.get("base_url")
173 | 
174 |         return ChatAnthropic(
175 |             model=kwargs.get("model_name", "claude-3-5-sonnet-20241022"),
176 |             temperature=kwargs.get("temperature", 0.0),
177 |             base_url=base_url,
178 |             api_key=api_key,
179 |         )
180 |     elif provider == 'mistral':
181 |         if not kwargs.get("base_url", ""):
182 |             base_url = os.getenv("MISTRAL_ENDPOINT", "https://api.mistral.ai/v1")
183 |         else:
184 |             base_url = kwargs.get("base_url")
185 |         if not kwargs.get("api_key", ""):
186 |             api_key = os.getenv("MISTRAL_API_KEY", "")
187 |         else:
188 |             api_key = kwargs.get("api_key")
189 | 
190 |         return ChatMistralAI(
191 |             model=kwargs.get("model_name", "mistral-large-latest"),
192 |             temperature=kwargs.get("temperature", 0.0),
193 |             base_url=base_url,
194 |             api_key=api_key,
195 |         )
196 |     elif provider == "openai":
197 |         if not kwargs.get("base_url", ""):
198 |             base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
199 |         else:
200 |             base_url = kwargs.get("base_url")
201 | 
202 |         return ChatOpenAI(
203 |             model=kwargs.get("model_name", "gpt-4o"),
204 |             temperature=kwargs.get("temperature", 0.0),
205 |             base_url=base_url,
206 |             api_key=api_key,
207 |         )
208 |     elif provider == "deepseek":
209 |         if not kwargs.get("base_url", ""):
210 |             base_url = os.getenv("DEEPSEEK_ENDPOINT", "")
211 |         else:
212 |             base_url = kwargs.get("base_url")
213 | 
214 |         if kwargs.get("model_name", "deepseek-chat") == "deepseek-reasoner":
215 |             return DeepSeekR1ChatOpenAI(
216 |                 model=kwargs.get("model_name", "deepseek-reasoner"),
217 |                 temperature=kwargs.get("temperature", 0.0),
218 |                 base_url=base_url,
219 |                 api_key=api_key,
220 |             )
221 |         else:
222 |             return ChatOpenAI(
223 |                 model=kwargs.get("model_name", "deepseek-chat"),
224 |                 temperature=kwargs.get("temperature", 0.0),
225 |                 base_url=base_url,
226 |                 api_key=api_key,
227 |             )
228 |     elif provider == "google":
229 |         return ChatGoogleGenerativeAI(
230 |             model=kwargs.get("model_name", "gemini-2.0-flash-exp"),
231 |             temperature=kwargs.get("temperature", 0.0),
232 |             api_key=api_key,
233 |         )
234 |     elif provider == "ollama":
235 |         if not kwargs.get("base_url", ""):
236 |             base_url = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434")
237 |         else:
238 |             base_url = kwargs.get("base_url")
239 | 
240 |         if "deepseek-r1" in kwargs.get("model_name", "qwen2.5:7b"):
241 |             return DeepSeekR1ChatOllama(
242 |                 model=kwargs.get("model_name", "deepseek-r1:14b"),
243 |                 temperature=kwargs.get("temperature", 0.0),
244 |                 num_ctx=kwargs.get("num_ctx", 32000),
245 |                 base_url=base_url,
246 |             )
247 |         else:
248 |             return ChatOllama(
249 |                 model=kwargs.get("model_name", "qwen2.5:7b"),
250 |                 temperature=kwargs.get("temperature", 0.0),
251 |                 num_ctx=kwargs.get("num_ctx", 32000),
252 |                 num_predict=kwargs.get("num_predict", 1024),
253 |                 base_url=base_url,
254 |             )
255 |     elif provider == "azure_openai":
256 |         if not kwargs.get("base_url", ""):
257 |             base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
258 |         else:
259 |             base_url = kwargs.get("base_url")
260 |         api_version = kwargs.get("api_version", "") or os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview")
261 |         return AzureChatOpenAI(
262 |             model=kwargs.get("model_name", "gpt-4o"),
263 |             temperature=kwargs.get("temperature", 0.0),
264 |             api_version=api_version,
265 |             azure_endpoint=base_url,
266 |             api_key=api_key,
267 |         )
268 |     elif provider == "alibaba":
269 |         if not kwargs.get("base_url", ""):
270 |             base_url = os.getenv("ALIBABA_ENDPOINT", "https://dashscope.aliyuncs.com/compatible-mode/v1")
271 |         else:
272 |             base_url = kwargs.get("base_url")
273 | 
274 |         return ChatOpenAI(
275 |             model=kwargs.get("model_name", "qwen-plus"),
276 |             temperature=kwargs.get("temperature", 0.0),
277 |             base_url=base_url,
278 |             api_key=api_key,
279 |         )
280 |     elif provider == "ibm":
281 |         parameters = {
282 |             "temperature": kwargs.get("temperature", 0.0),
283 |             "max_tokens": kwargs.get("num_ctx", 32000)
284 |         }
285 |         if not kwargs.get("base_url", ""):
286 |             base_url = os.getenv("IBM_ENDPOINT", "https://us-south.ml.cloud.ibm.com")
287 |         else:
288 |             base_url = kwargs.get("base_url")
289 | 
290 |         return ChatWatsonx(
291 |             model_id=kwargs.get("model_name", "ibm/granite-vision-3.1-2b-preview"),
292 |             url=base_url,
293 |             project_id=os.getenv("IBM_PROJECT_ID"),
294 |             apikey=os.getenv("IBM_API_KEY"),
295 |             params=parameters
296 |         )
297 |     elif provider == "moonshot":
298 |         return ChatOpenAI(
299 |             model=kwargs.get("model_name", "moonshot-v1-32k-vision-preview"),
300 |             temperature=kwargs.get("temperature", 0.0),
301 |             base_url=os.getenv("MOONSHOT_ENDPOINT"),
302 |             api_key=os.getenv("MOONSHOT_API_KEY"),
303 |         )
304 |     elif provider == "unbound":
305 |         return ChatOpenAI(
306 |             model=kwargs.get("model_name", "gpt-4o-mini"),
307 |             temperature=kwargs.get("temperature", 0.0),
308 |             base_url=os.getenv("UNBOUND_ENDPOINT", "https://api.getunbound.ai"),
309 |             api_key=api_key,
310 |         )
311 |     elif provider == "siliconflow":
312 |         if not kwargs.get("api_key", ""):
313 |             api_key = os.getenv("SiliconFLOW_API_KEY", "")
314 |         else:
315 |             api_key = kwargs.get("api_key")
316 |         if not kwargs.get("base_url", ""):
317 |             base_url = os.getenv("SiliconFLOW_ENDPOINT", "")
318 |         else:
319 |             base_url = kwargs.get("base_url")
320 |         return ChatOpenAI(
321 |             api_key=api_key,
322 |             base_url=base_url,
323 |             model_name=kwargs.get("model_name", "Qwen/QwQ-32B"),
324 |             temperature=kwargs.get("temperature", 0.0),
325 |         )
326 |     else:
327 |         raise ValueError(f"Unsupported provider: {provider}")
328 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/config.py:
--------------------------------------------------------------------------------

```python
  1 | from typing import Any, Dict, List, Optional, Union
  2 | 
  3 | from pydantic import Field, SecretStr, field_validator, ValidationInfo
  4 | from pydantic_settings import BaseSettings, SettingsConfigDict
  5 | 
  6 | 
  7 | class LLMSettings(BaseSettings):
  8 |     model_config = SettingsConfigDict(env_prefix="MCP_LLM_")
  9 | 
 10 |     provider: str = Field(default="google", env="PROVIDER")
 11 |     model_name: str = Field(default="gemini-2.5-flash-preview-04-17", env="MODEL_NAME")
 12 |     temperature: float = Field(default=0.0, env="TEMPERATURE")
 13 |     base_url: Optional[str] = Field(default=None, env="BASE_URL")
 14 |     api_key: Optional[SecretStr] = Field(default=None, env="API_KEY") # Generic API key
 15 | 
 16 |     # Provider-specific API keys
 17 |     openai_api_key: Optional[SecretStr] = Field(default=None, env="OPENAI_API_KEY")
 18 |     anthropic_api_key: Optional[SecretStr] = Field(default=None, env="ANTHROPIC_API_KEY")
 19 |     google_api_key: Optional[SecretStr] = Field(default=None, env="GOOGLE_API_KEY")
 20 |     azure_openai_api_key: Optional[SecretStr] = Field(default=None, env="AZURE_OPENAI_API_KEY")
 21 |     deepseek_api_key: Optional[SecretStr] = Field(default=None, env="DEEPSEEK_API_KEY")
 22 |     mistral_api_key: Optional[SecretStr] = Field(default=None, env="MISTRAL_API_KEY")
 23 |     openrouter_api_key: Optional[SecretStr] = Field(default=None, env="OPENROUTER_API_KEY")
 24 |     alibaba_api_key: Optional[SecretStr] = Field(default=None, env="ALIBABA_API_KEY")
 25 |     moonshot_api_key: Optional[SecretStr] = Field(default=None, env="MOONSHOT_API_KEY")
 26 |     unbound_api_key: Optional[SecretStr] = Field(default=None, env="UNBOUND_API_KEY")
 27 | 
 28 |     # Provider-specific endpoints
 29 |     openai_endpoint: Optional[str] = Field(default=None, env="OPENAI_ENDPOINT")
 30 |     anthropic_endpoint: Optional[str] = Field(default=None, env="ANTHROPIC_ENDPOINT")
 31 |     azure_openai_endpoint: Optional[str] = Field(default=None, env="AZURE_OPENAI_ENDPOINT")
 32 |     azure_openai_api_version: str = Field(default="2025-01-01-preview", env="AZURE_OPENAI_API_VERSION")
 33 |     deepseek_endpoint: Optional[str] = Field(default=None, env="DEEPSEEK_ENDPOINT")
 34 |     mistral_endpoint: Optional[str] = Field(default=None, env="MISTRAL_ENDPOINT")
 35 |     ollama_endpoint: str = Field(default="http://localhost:11434", env="OLLAMA_ENDPOINT")
 36 |     openrouter_endpoint: str = Field(default="https://openrouter.ai/api/v1", env="OPENROUTER_ENDPOINT")
 37 |     alibaba_endpoint: Optional[str] = Field(default=None, env="ALIBABA_ENDPOINT")
 38 |     moonshot_endpoint: Optional[str] = Field(default=None, env="MOONSHOT_ENDPOINT")
 39 |     unbound_endpoint: Optional[str] = Field(default=None, env="UNBOUND_ENDPOINT")
 40 | 
 41 |     ollama_num_ctx: Optional[int] = Field(default=32000, env="OLLAMA_NUM_CTX")
 42 |     ollama_num_predict: Optional[int] = Field(default=1024, env="OLLAMA_NUM_PREDICT")
 43 | 
 44 |     # Planner LLM settings (optional, defaults to main LLM if not set)
 45 |     planner_provider: Optional[str] = Field(default=None, env="PLANNER_PROVIDER")
 46 |     planner_model_name: Optional[str] = Field(default=None, env="PLANNER_MODEL_NAME")
 47 |     planner_temperature: Optional[float] = Field(default=None, env="PLANNER_TEMPERATURE")
 48 |     planner_base_url: Optional[str] = Field(default=None, env="PLANNER_BASE_URL")
 49 |     planner_api_key: Optional[SecretStr] = Field(default=None, env="PLANNER_API_KEY")
 50 | 
 51 | 
 52 | class BrowserSettings(BaseSettings):
 53 |     model_config = SettingsConfigDict(env_prefix="MCP_BROWSER_")
 54 | 
 55 |     headless: bool = Field(default=False, env="HEADLESS") # General headless
 56 |     disable_security: bool = Field(default=False, env="DISABLE_SECURITY") # General disable security
 57 |     binary_path: Optional[str] = Field(default=None, env="BINARY_PATH")
 58 |     user_data_dir: Optional[str] = Field(default=None, env="USER_DATA_DIR")
 59 |     window_width: int = Field(default=1280, env="WINDOW_WIDTH")
 60 |     window_height: int = Field(default=1080, env="WINDOW_HEIGHT")
 61 |     use_own_browser: bool = Field(default=False, env="USE_OWN_BROWSER")
 62 |     cdp_url: Optional[str] = Field(default=None, env="CDP_URL")
 63 |     wss_url: Optional[str] = Field(default=None, env="WSS_URL") # For CDP connection if needed
 64 |     keep_open: bool = Field(default=False, env="KEEP_OPEN") # Server-managed browser persistence
 65 |     trace_path: Optional[str] = Field(default=None, env="TRACE_PATH")
 66 | 
 67 | 
 68 | class AgentToolSettings(BaseSettings):
 69 |     model_config = SettingsConfigDict(env_prefix="MCP_AGENT_TOOL_")
 70 | 
 71 |     max_steps: int = Field(default=100, env="MAX_STEPS")
 72 |     max_actions_per_step: int = Field(default=5, env="MAX_ACTIONS_PER_STEP")
 73 |     tool_calling_method: Optional[str] = Field(default="auto", env="TOOL_CALLING_METHOD")
 74 |     max_input_tokens: Optional[int] = Field(default=128000, env="MAX_INPUT_TOKENS")
 75 |     use_vision: bool = Field(default=True, env="USE_VISION")
 76 | 
 77 |     # Browser settings specific to this tool, can override general MCP_BROWSER_ settings
 78 |     headless: Optional[bool] = Field(default=None, env="HEADLESS")
 79 |     disable_security: Optional[bool] = Field(default=None, env="DISABLE_SECURITY")
 80 | 
 81 |     enable_recording: bool = Field(default=False, env="ENABLE_RECORDING")
 82 |     save_recording_path: Optional[str] = Field(default=None, env="SAVE_RECORDING_PATH") # e.g. ./tmp/recordings
 83 |     history_path: Optional[str] = Field(default=None, env="HISTORY_PATH") # e.g. ./tmp/agent_history
 84 | 
 85 | 
 86 | class DeepResearchToolSettings(BaseSettings):
 87 |     model_config = SettingsConfigDict(env_prefix="MCP_RESEARCH_TOOL_")
 88 | 
 89 |     max_parallel_browsers: int = Field(default=3, env="MAX_PARALLEL_BROWSERS")
 90 |     save_dir: Optional[str] = Field(default=None, env="SAVE_DIR") # Base dir, task_id will be appended. Optional now.
 91 | 
 92 | 
 93 | class PathSettings(BaseSettings):
 94 |     model_config = SettingsConfigDict(env_prefix="MCP_PATHS_")
 95 |     downloads: Optional[str] = Field(default=None, env="DOWNLOADS") # e.g. ./tmp/downloads
 96 | 
 97 | 
 98 | class ServerSettings(BaseSettings):
 99 |     model_config = SettingsConfigDict(env_prefix="MCP_SERVER_")
100 |     log_file: Optional[str] = Field(default=None, env="LOG_FILE")
101 |     logging_level: str = Field(default="ERROR", env="LOGGING_LEVEL")
102 |     anonymized_telemetry: bool = Field(default=True, env="ANONYMIZED_TELEMETRY")
103 |     mcp_config: Optional[Dict[str, Any]] = Field(default=None, env="MCP_CONFIG") # For controller's MCP client
104 | 
105 | 
106 | class AppSettings(BaseSettings):
107 |     model_config = SettingsConfigDict(env_prefix="MCP_", extra='ignore') # Root prefix
108 | 
109 |     llm: LLMSettings = Field(default_factory=LLMSettings)
110 |     browser: BrowserSettings = Field(default_factory=BrowserSettings)
111 |     agent_tool: AgentToolSettings = Field(default_factory=AgentToolSettings)
112 |     research_tool: DeepResearchToolSettings = Field(default_factory=DeepResearchToolSettings)
113 |     paths: PathSettings = Field(default_factory=PathSettings)
114 |     server: ServerSettings = Field(default_factory=ServerSettings)
115 | 
116 |     @field_validator('server', 'llm', 'browser', 'agent_tool', 'research_tool', 'paths', mode='before')
117 |     @classmethod
118 |     def ensure_nested_defaults(cls, v: Any) -> Any:
119 |         # This ensures that even if MCP_SERVER__LOG_FILE is set but MCP_SERVER is not,
120 |         # the ServerSettings object is still created.
121 |         # Pydantic-settings usually handles this, but being explicit can help.
122 |         if v is None:
123 |             return {}
124 |         return v
125 | 
126 |     def get_api_key_for_provider(self, provider_name: Optional[str], is_planner: bool = False) -> Optional[str]:
127 |         """Retrieves the API key for a given provider, checking generic, then specific."""
128 |         llm_settings_to_use = self.llm
129 |         provider_to_use = provider_name if provider_name else (self.llm.planner_provider if is_planner else self.llm.provider)
130 | 
131 |         if is_planner:
132 |             if self.llm.planner_api_key:
133 |                 return self.llm.planner_api_key.get_secret_value()
134 |             # Fallback to main LLM settings if planner-specific key is not set, but provider is
135 |             if self.llm.planner_provider and not self.llm.planner_api_key:
136 |                  llm_settings_to_use = self.llm # Check main llm settings for this provider
137 |             # if no planner provider, it will use main llm provider and its key
138 | 
139 |         if not provider_to_use: # Should not happen if called correctly
140 |             return None
141 | 
142 |         # Check generic API key first for the relevant LLM settings (main or planner if planner_api_key was set)
143 |         if not is_planner and llm_settings_to_use.api_key: # only main LLM has generic api_key
144 |              return llm_settings_to_use.api_key.get_secret_value()
145 | 
146 |         provider_specific_key_name = f"{provider_to_use.lower()}_api_key"
147 |         if hasattr(llm_settings_to_use, provider_specific_key_name):
148 |             key_val = getattr(llm_settings_to_use, provider_specific_key_name)
149 |             if key_val and isinstance(key_val, SecretStr):
150 |                 return key_val.get_secret_value()
151 |         return None
152 | 
153 |     def get_endpoint_for_provider(self, provider_name: Optional[str], is_planner: bool = False) -> Optional[str]:
154 |         """Retrieves the endpoint for a given provider."""
155 |         llm_settings_to_use = self.llm
156 |         provider_to_use = provider_name if provider_name else (self.llm.planner_provider if is_planner else self.llm.provider)
157 | 
158 |         if is_planner:
159 |             if self.llm.planner_base_url:
160 |                 return self.llm.planner_base_url
161 |             if self.llm.planner_provider and not self.llm.planner_base_url:
162 |                 llm_settings_to_use = self.llm # Check main llm settings for this provider
163 | 
164 |         if not provider_to_use:
165 |             return None
166 | 
167 |         if not is_planner and llm_settings_to_use.base_url: # only main LLM has generic base_url
168 |             return llm_settings_to_use.base_url
169 | 
170 |         provider_specific_endpoint_name = f"{provider_to_use.lower()}_endpoint"
171 |         if hasattr(llm_settings_to_use, provider_specific_endpoint_name):
172 |             return getattr(llm_settings_to_use, provider_specific_endpoint_name)
173 |         return None
174 | 
175 |     def get_llm_config(self, is_planner: bool = False) -> Dict[str, Any]:
176 |         """Returns a dictionary of LLM settings suitable for llm_provider.get_llm_model."""
177 |         provider = self.llm.planner_provider if is_planner and self.llm.planner_provider else self.llm.provider
178 |         model_name = self.llm.planner_model_name if is_planner and self.llm.planner_model_name else self.llm.model_name
179 |         temperature = self.llm.planner_temperature if is_planner and self.llm.planner_temperature is not None else self.llm.temperature
180 | 
181 |         api_key = self.get_api_key_for_provider(provider, is_planner=is_planner)
182 |         base_url = self.get_endpoint_for_provider(provider, is_planner=is_planner)
183 | 
184 |         config = {
185 |             "provider": provider,
186 |             "model_name": model_name,
187 |             "temperature": temperature,
188 |             "api_key": api_key,
189 |             "base_url": base_url,
190 |             "use_vision": self.agent_tool.use_vision if not is_planner else False, # Planners typically don't need vision
191 |             "tool_calling_method": self.agent_tool.tool_calling_method if not is_planner else "auto",
192 |             "max_input_tokens": self.agent_tool.max_input_tokens if not is_planner else None,
193 |         }
194 | 
195 |         if provider == "azure_openai":
196 |             config["azure_openai_api_version"] = self.llm.azure_openai_api_version
197 |         elif provider == "ollama":
198 |             config["ollama_num_ctx"] = self.llm.ollama_num_ctx
199 |             config["ollama_num_predict"] = self.llm.ollama_num_predict
200 |         elif provider == "openrouter":
201 |             config["provider"] = "openai"
202 | 
203 |         return config
204 | 
205 | # Global settings instance, to be imported by other modules
206 | settings = AppSettings()
207 | 
208 | # Example usage (for testing this file directly):
209 | if __name__ == "__main__":
210 |     try:
211 |         print("Loaded AppSettings:")
212 |         print(settings.model_dump_json(indent=2))
213 |         print(f"\nLLM API Key for main provider ({settings.llm.provider}): {settings.get_api_key_for_provider(settings.llm.provider)}")
214 |         if settings.llm.planner_provider:
215 |             print(f"LLM API Key for planner provider ({settings.llm.planner_provider}): {settings.get_api_key_for_provider(settings.llm.planner_provider, is_planner=True)}")
216 | 
217 |         print("\nMain LLM Config for get_llm_model:")
218 |         print(settings.get_llm_config())
219 |         if settings.llm.planner_provider:
220 |             print("\nPlanner LLM Config for get_llm_model:")
221 |             print(settings.get_llm_config(is_planner=True))
222 |     except Exception as e:
223 |         print(f"Error during settings load or test: {e}")
224 |         import os
225 |         print("MCP_RESEARCH_TOOL_SAVE_DIR:", os.getenv("MCP_RESEARCH_TOOL_SAVE_DIR"))
226 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/cli.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import json
  3 | import logging
  4 | import os
  5 | import sys
  6 | import traceback
  7 | import uuid
  8 | from pathlib import Path
  9 | from typing import Any, Dict, Optional
 10 | 
 11 | import typer
 12 | from dotenv import load_dotenv
 13 | 
 14 | from .config import AppSettings, settings as global_settings # Import AppSettings and the global instance
 15 | # Import from _internal
 16 | from ._internal.agent.browser_use.browser_use_agent import BrowserUseAgent, AgentHistoryList
 17 | from ._internal.agent.deep_research.deep_research_agent import DeepResearchAgent
 18 | from ._internal.browser.custom_browser import CustomBrowser
 19 | from ._internal.browser.custom_context import (
 20 |     CustomBrowserContext,
 21 |     CustomBrowserContextConfig,
 22 | )
 23 | from ._internal.controller.custom_controller import CustomController
 24 | from ._internal.utils import llm_provider as internal_llm_provider
 25 | from browser_use.browser.browser import BrowserConfig
 26 | from browser_use.agent.views import AgentOutput
 27 | from browser_use.browser.views import BrowserState
 28 | 
 29 | app = typer.Typer(name="mcp-browser-cli", help="CLI for mcp-browser-use tools.")
 30 | logger = logging.getLogger("mcp_browser_cli")
 31 | 
 32 | class CLIState:
 33 |     settings: Optional[AppSettings] = None
 34 | 
 35 | cli_state = CLIState()
 36 | 
 37 | def setup_logging(level_str: str, log_file: Optional[str]):
 38 |     numeric_level = getattr(logging, level_str.upper(), logging.INFO)
 39 |     for handler in logging.root.handlers[:]:
 40 |         logging.root.removeHandler(handler)
 41 |     logging.basicConfig(
 42 |         level=numeric_level,
 43 |         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 44 |         filename=log_file if log_file else None,
 45 |         filemode="a" if log_file else None,
 46 |         force=True
 47 |     )
 48 | 
 49 | @app.callback()
 50 | def main_callback(
 51 |     ctx: typer.Context,
 52 |     env_file: Optional[Path] = typer.Option(
 53 |         None, "--env-file", "-e", help="Path to .env file to load.", exists=True, dir_okay=False, resolve_path=True
 54 |     ),
 55 |     log_level: Optional[str] = typer.Option(
 56 |         None, "--log-level", "-l", help="Override logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)."
 57 |     )
 58 | ):
 59 |     """
 60 |     MCP Browser Use CLI. Settings are loaded from environment variables.
 61 |     You can use an .env file for convenience.
 62 |     """
 63 |     if env_file:
 64 |         load_dotenv(env_file, override=True)
 65 |         logger.info(f"Loaded environment variables from: {env_file}")
 66 | 
 67 |     # Reload settings after .env might have been loaded and to apply overrides
 68 |     try:
 69 |         cli_state.settings = AppSettings()
 70 |     except Exception as e:
 71 |         # This can happen if mandatory fields (like MCP_RESEARCH_TOOL_SAVE_DIR) are not set
 72 |         sys.stderr.write(f"Error loading application settings: {e}\n")
 73 |         sys.stderr.write("Please ensure all mandatory environment variables are set (e.g., MCP_RESEARCH_TOOL_SAVE_DIR).\n")
 74 |         raise typer.Exit(code=1)
 75 | 
 76 |     # Setup logging based on final settings (env file, then env vars, then CLI override)
 77 |     final_log_level = log_level if log_level else cli_state.settings.server.logging_level
 78 |     final_log_file = cli_state.settings.server.log_file
 79 |     setup_logging(final_log_level, final_log_file)
 80 | 
 81 |     logger.info(f"CLI initialized. Effective log level: {final_log_level.upper()}")
 82 |     if not cli_state.settings: # Should not happen if AppSettings() worked
 83 |         logger.error("Failed to load application settings.")
 84 |         raise typer.Exit(code=1)
 85 | 
 86 | 
 87 | async def cli_ask_human_callback(query: str, browser_context: Any) -> Dict[str, Any]:
 88 |     """Callback for agent to ask human for input via CLI."""
 89 |     # browser_context is part of the signature from browser-use, might not be needed here
 90 |     print(typer.style(f"\n🤖 AGENT ASKS: {query}", fg=typer.colors.YELLOW))
 91 |     response_text = typer.prompt(typer.style("Your response", fg=typer.colors.CYAN))
 92 |     return {"response": response_text}
 93 | 
 94 | def cli_on_step_callback(browser_state: BrowserState, agent_output: AgentOutput, step_num: int):
 95 |     """CLI callback for BrowserUseAgent steps."""
 96 |     print(typer.style(f"\n--- Step {step_num} ---", fg=typer.colors.BLUE, bold=True))
 97 |     # Print current state if available
 98 |     if hasattr(agent_output, "current_state") and agent_output.current_state:
 99 |         print(typer.style("🧠 Agent State:", fg=typer.colors.MAGENTA))
100 |         print(agent_output.current_state)
101 |     # Print actions
102 |     if hasattr(agent_output, "action") and agent_output.action:
103 |         print(typer.style("🎬 Actions:", fg=typer.colors.GREEN))
104 |         for action in agent_output.action:
105 |             # Try to get action_type and action_input if present, else print the action itself
106 |             action_type = getattr(action, "action_type", None)
107 |             action_input = getattr(action, "action_input", None)
108 |             if action_type is not None or action_input is not None:
109 |                 print(f"  - {action_type or 'Unknown action'}: {action_input or ''}")
110 |             else:
111 |                 print(f"  - {action}")
112 |     # Optionally print observation if present in browser_state
113 |     if hasattr(browser_state, "observation") and browser_state.observation:
114 |         obs = browser_state.observation
115 |         print(typer.style("👀 Observation:", fg=typer.colors.CYAN))
116 |         print(str(obs)[:200] + "..." if obs and len(str(obs)) > 200 else obs)
117 | 
118 | 
119 | async def _run_browser_agent_logic_cli(task_str: str, current_settings: AppSettings) -> str:
120 |     logger.info(f"CLI: Starting run_browser_agent task: {task_str[:100]}...")
121 |     agent_task_id = str(uuid.uuid4())
122 |     final_result = "Error: Agent execution failed."
123 | 
124 |     browser_instance: Optional[CustomBrowser] = None
125 |     context_instance: Optional[CustomBrowserContext] = None
126 |     controller_instance: Optional[CustomController] = None
127 | 
128 |     try:
129 |         # LLM Setup
130 |         main_llm_config = current_settings.get_llm_config()
131 |         main_llm = internal_llm_provider.get_llm_model(**main_llm_config)
132 |         planner_llm = None
133 |         if current_settings.llm.planner_provider and current_settings.llm.planner_model_name:
134 |             planner_llm_config = current_settings.get_llm_config(is_planner=True)
135 |             planner_llm = internal_llm_provider.get_llm_model(**planner_llm_config)
136 | 
137 |         # Controller Setup
138 |         controller_instance = CustomController(ask_assistant_callback=cli_ask_human_callback)
139 |         if current_settings.server.mcp_config:
140 |             mcp_dict_config = current_settings.server.mcp_config
141 |             if isinstance(current_settings.server.mcp_config, str):
142 |                 mcp_dict_config = json.loads(current_settings.server.mcp_config)
143 |             await controller_instance.setup_mcp_client(mcp_dict_config)
144 | 
145 |         # Browser and Context Setup
146 |         agent_headless_override = current_settings.agent_tool.headless
147 |         browser_headless = agent_headless_override if agent_headless_override is not None else current_settings.browser.headless
148 |         agent_disable_security_override = current_settings.agent_tool.disable_security
149 |         browser_disable_security = agent_disable_security_override if agent_disable_security_override is not None else current_settings.browser.disable_security
150 | 
151 |         if current_settings.browser.use_own_browser and current_settings.browser.cdp_url:
152 |             browser_cfg = BrowserConfig(cdp_url=current_settings.browser.cdp_url, wss_url=current_settings.browser.wss_url, user_data_dir=current_settings.browser.user_data_dir)
153 |         else:
154 |             browser_cfg = BrowserConfig(
155 |                 headless=browser_headless,
156 |                 disable_security=browser_disable_security,
157 |                 browser_binary_path=current_settings.browser.binary_path,
158 |                 user_data_dir=current_settings.browser.user_data_dir,
159 |                 window_width=current_settings.browser.window_width,
160 |                 window_height=current_settings.browser.window_height,
161 |             )
162 |         browser_instance = CustomBrowser(config=browser_cfg)
163 | 
164 |         context_cfg = CustomBrowserContextConfig(
165 |             trace_path=current_settings.browser.trace_path,
166 |             save_downloads_path=current_settings.paths.downloads,
167 |             save_recording_path=current_settings.agent_tool.save_recording_path if current_settings.agent_tool.enable_recording else None,
168 |             force_new_context=True # CLI always gets a new context
169 |         )
170 |         context_instance = await browser_instance.new_context(config=context_cfg)
171 | 
172 |         agent_history_json_file = None
173 |         task_history_base_path = current_settings.agent_tool.history_path
174 | 
175 |         if task_history_base_path:
176 |             task_specific_history_dir = Path(task_history_base_path) / agent_task_id
177 |             task_specific_history_dir.mkdir(parents=True, exist_ok=True)
178 |             agent_history_json_file = str(task_specific_history_dir / f"{agent_task_id}.json")
179 |             logger.info(f"Agent history will be saved to: {agent_history_json_file}")
180 | 
181 |         # Agent Instantiation
182 |         agent_instance = BrowserUseAgent(
183 |             task=task_str, llm=main_llm,
184 |             browser=browser_instance, browser_context=context_instance, controller=controller_instance,
185 |             planner_llm=planner_llm,
186 |             max_actions_per_step=current_settings.agent_tool.max_actions_per_step,
187 |             use_vision=current_settings.agent_tool.use_vision,
188 |             register_new_step_callback=cli_on_step_callback,
189 |         )
190 | 
191 |         # Run Agent
192 |         history: AgentHistoryList = await agent_instance.run(max_steps=current_settings.agent_tool.max_steps)
193 |         agent_instance.save_history(agent_history_json_file)
194 |         final_result = history.final_result() or "Agent finished without a final result."
195 |         logger.info(f"CLI Agent task {agent_task_id} completed.")
196 | 
197 |     except Exception as e:
198 |         logger.error(f"CLI Error in run_browser_agent: {e}\n{traceback.format_exc()}")
199 |         final_result = f"Error: {e}"
200 |     finally:
201 |         if context_instance: await context_instance.close()
202 |         if browser_instance and not current_settings.browser.use_own_browser : await browser_instance.close() # Only close if we launched it
203 |         if controller_instance: await controller_instance.close_mcp_client()
204 | 
205 |     return final_result
206 | 
207 | 
208 | async def _run_deep_research_logic_cli(research_task_str: str, max_parallel_browsers_override: Optional[int], current_settings: AppSettings) -> str:
209 |     logger.info(f"CLI: Starting run_deep_research task: {research_task_str[:100]}...")
210 |     task_id = str(uuid.uuid4())
211 |     report_content = "Error: Deep research failed."
212 | 
213 |     try:
214 |         main_llm_config = current_settings.get_llm_config()
215 |         research_llm = internal_llm_provider.get_llm_model(**main_llm_config)
216 | 
217 |         dr_browser_cfg = {
218 |             "headless": current_settings.browser.headless,
219 |             "disable_security": current_settings.browser.disable_security,
220 |             "browser_binary_path": current_settings.browser.binary_path,
221 |             "user_data_dir": current_settings.browser.user_data_dir,
222 |             "window_width": current_settings.browser.window_width,
223 |             "window_height": current_settings.browser.window_height,
224 |             "trace_path": current_settings.browser.trace_path,
225 |             "save_downloads_path": current_settings.paths.downloads,
226 |         }
227 |         if current_settings.browser.use_own_browser and current_settings.browser.cdp_url:
228 |             dr_browser_cfg["cdp_url"] = current_settings.browser.cdp_url
229 |             dr_browser_cfg["wss_url"] = current_settings.browser.wss_url
230 | 
231 |         mcp_server_config_for_agent = None
232 |         if current_settings.server.mcp_config:
233 |             mcp_server_config_for_agent = current_settings.server.mcp_config
234 |             if isinstance(current_settings.server.mcp_config, str):
235 |                  mcp_server_config_for_agent = json.loads(current_settings.server.mcp_config)
236 | 
237 |         agent_instance = DeepResearchAgent(
238 |             llm=research_llm, browser_config=dr_browser_cfg,
239 |             mcp_server_config=mcp_server_config_for_agent,
240 |         )
241 | 
242 |         current_max_parallel_browsers = max_parallel_browsers_override if max_parallel_browsers_override is not None else current_settings.research_tool.max_parallel_browsers
243 | 
244 |         save_dir_for_task = os.path.join(current_settings.research_tool.save_dir, task_id)
245 |         os.makedirs(save_dir_for_task, exist_ok=True)
246 | 
247 |         logger.info(f"CLI Deep research save directory: {save_dir_for_task}")
248 |         logger.info(f"CLI Using max_parallel_browsers: {current_max_parallel_browsers}")
249 | 
250 |         result_dict = await agent_instance.run(
251 |             topic=research_task_str, task_id=task_id,
252 |             save_dir=save_dir_for_task, max_parallel_browsers=current_max_parallel_browsers
253 |         )
254 | 
255 |         report_file_path = result_dict.get("report_file_path")
256 |         if report_file_path and os.path.exists(report_file_path):
257 |             with open(report_file_path, "r", encoding="utf-8") as f:
258 |                 markdown_content = f.read()
259 |             report_content = f"Deep research report generated successfully at {report_file_path}\n\n{markdown_content}"
260 |             logger.info(f"CLI Deep research task {task_id} completed. Report at {report_file_path}")
261 |         else:
262 |             report_content = f"Deep research completed, but report file not found. Result: {result_dict}"
263 |             logger.warning(f"CLI Deep research task {task_id} result: {result_dict}, report file path missing or invalid.")
264 | 
265 |     except Exception as e:
266 |         logger.error(f"CLI Error in run_deep_research: {e}\n{traceback.format_exc()}")
267 |         report_content = f"Error: {e}"
268 | 
269 |     return report_content
270 | 
271 | 
272 | @app.command()
273 | def run_browser_agent(
274 |     task: str = typer.Argument(..., help="The primary task or objective for the browser agent."),
275 | ):
276 |     """Runs a browser agent task and prints the result."""
277 |     if not cli_state.settings:
278 |         typer.secho("Error: Application settings not loaded. Use --env-file or set environment variables.", fg=typer.colors.RED)
279 |         raise typer.Exit(code=1)
280 | 
281 |     typer.secho(f"Executing browser agent task: {task}", fg=typer.colors.GREEN)
282 |     try:
283 |         result = asyncio.run(_run_browser_agent_logic_cli(task, cli_state.settings))
284 |         typer.secho("\n--- Agent Final Result ---", fg=typer.colors.BLUE, bold=True)
285 |         print(result)
286 |     except Exception as e:
287 |         typer.secho(f"CLI command failed: {e}", fg=typer.colors.RED)
288 |         logger.error(f"CLI run_browser_agent command failed: {e}\n{traceback.format_exc()}")
289 |         raise typer.Exit(code=1)
290 | 
291 | @app.command()
292 | def run_deep_research(
293 |     research_task: str = typer.Argument(..., help="The topic or question for deep research."),
294 |     max_parallel_browsers: Optional[int] = typer.Option(None, "--max-parallel-browsers", "-p", help="Override max parallel browsers from settings.")
295 | ):
296 |     """Performs deep web research and prints the report."""
297 |     if not cli_state.settings:
298 |         typer.secho("Error: Application settings not loaded. Use --env-file or set environment variables.", fg=typer.colors.RED)
299 |         raise typer.Exit(code=1)
300 | 
301 |     typer.secho(f"Executing deep research task: {research_task}", fg=typer.colors.GREEN)
302 |     try:
303 |         result = asyncio.run(_run_deep_research_logic_cli(research_task, max_parallel_browsers, cli_state.settings))
304 |         typer.secho("\n--- Deep Research Final Report ---", fg=typer.colors.BLUE, bold=True)
305 |         print(result)
306 |     except Exception as e:
307 |         typer.secho(f"CLI command failed: {e}", fg=typer.colors.RED)
308 |         logger.error(f"CLI run_deep_research command failed: {e}\n{traceback.format_exc()}")
309 |         raise typer.Exit(code=1)
310 | 
311 | if __name__ == "__main__":
312 |     # This allows running `python src/mcp_server_browser_use/cli.py ...`
313 |     # Set a default log level if run directly for dev purposes, can be overridden by CLI args
314 |     if not os.getenv("MCP_SERVER_LOGGING_LEVEL"): # Check if already set
315 |         os.environ["MCP_SERVER_LOGGING_LEVEL"] = "DEBUG"
316 |     if not os.getenv("MCP_RESEARCH_TOOL_SAVE_DIR"): # Ensure mandatory var is set for local dev
317 |         print("Warning: MCP_RESEARCH_TOOL_SAVE_DIR not set. Defaulting to './tmp/deep_research_cli_default' for this run.", file=sys.stderr)
318 |         os.environ["MCP_RESEARCH_TOOL_SAVE_DIR"] = "./tmp/deep_research_cli_default"
319 | 
320 |     app()
321 | 
```

--------------------------------------------------------------------------------
/src/mcp_server_browser_use/server.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import json
  3 | import logging
  4 | import os
  5 | import traceback
  6 | import uuid
  7 | from typing import Any, Dict, Optional
  8 | from pathlib import Path
  9 | 
 10 | 
 11 | from .config import settings # Import global AppSettings instance
 12 | 
 13 | # Configure logging using settings
 14 | log_level_str = settings.server.logging_level.upper()
 15 | numeric_level = getattr(logging, log_level_str, logging.INFO)
 16 | 
 17 | # Remove any existing handlers from the root logger to avoid duplicate messages
 18 | # if basicConfig was called elsewhere or by a library.
 19 | for handler in logging.root.handlers[:]:
 20 |     logging.root.removeHandler(handler)
 21 | 
 22 | logging.basicConfig(
 23 |     level=numeric_level,
 24 |     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 25 |     filename=settings.server.log_file if settings.server.log_file else None,
 26 |     filemode="a" if settings.server.log_file else None, # only use filemode if filename is set
 27 |     force=True # Override any previous basicConfig
 28 | )
 29 | 
 30 | logger = logging.getLogger("mcp_server_browser_use")
 31 | # Prevent log propagation if other loggers are configured higher up
 32 | # logging.getLogger().propagate = False # This might be too aggressive, let's rely on basicConfig force
 33 | 
 34 | from browser_use.browser.browser import BrowserConfig
 35 | from mcp.server.fastmcp import Context, FastMCP
 36 | 
 37 | # Import from _internal
 38 | from ._internal.agent.browser_use.browser_use_agent import BrowserUseAgent
 39 | from ._internal.agent.deep_research.deep_research_agent import DeepResearchAgent
 40 | from ._internal.browser.custom_browser import CustomBrowser
 41 | from ._internal.browser.custom_context import (
 42 |     CustomBrowserContext,
 43 |     CustomBrowserContextConfig,
 44 | )
 45 | from ._internal.controller.custom_controller import CustomController
 46 | from ._internal.utils import llm_provider as internal_llm_provider # aliased
 47 | 
 48 | from browser_use.agent.views import (
 49 |     AgentHistoryList,
 50 | )
 51 | 
 52 | # Shared resources for MCP_BROWSER_KEEP_OPEN
 53 | shared_browser_instance: Optional[CustomBrowser] = None
 54 | shared_context_instance: Optional[CustomBrowserContext] = None
 55 | shared_controller_instance: Optional[CustomController] = None # Controller might also be shared
 56 | resource_lock = asyncio.Lock()
 57 | 
 58 | 
 59 | async def get_controller(ask_human_callback: Optional[Any] = None) -> CustomController:
 60 |     """Gets or creates a shared controller instance if keep_open is true, or a new one."""
 61 |     global shared_controller_instance
 62 |     if settings.browser.keep_open and shared_controller_instance:
 63 |         # Potentially update callback if it can change per call, though usually fixed for server
 64 |         return shared_controller_instance
 65 | 
 66 |     controller = CustomController(ask_assistant_callback=ask_human_callback)
 67 |     if settings.server.mcp_config:
 68 |         try:
 69 |             mcp_dict_config = settings.server.mcp_config
 70 |             if isinstance(settings.server.mcp_config, str): # if passed as JSON string
 71 |                 mcp_dict_config = json.loads(settings.server.mcp_config)
 72 |             await controller.setup_mcp_client(mcp_dict_config)
 73 |         except Exception as e:
 74 |             logger.error(f"Failed to setup MCP client for controller: {e}")
 75 | 
 76 |     if settings.browser.keep_open:
 77 |         shared_controller_instance = controller
 78 |     return controller
 79 | 
 80 | 
 81 | async def get_browser_and_context() -> tuple[CustomBrowser, CustomBrowserContext]:
 82 |     """
 83 |     Manages creation/reuse of CustomBrowser and CustomBrowserContext
 84 |     based on settings.browser.keep_open and settings.browser.use_own_browser.
 85 |     """
 86 |     global shared_browser_instance, shared_context_instance
 87 | 
 88 |     current_browser: Optional[CustomBrowser] = None
 89 |     current_context: Optional[CustomBrowserContext] = None
 90 | 
 91 |     agent_headless_override = settings.agent_tool.headless
 92 |     browser_headless = agent_headless_override if agent_headless_override is not None else settings.browser.headless
 93 | 
 94 |     agent_disable_security_override = settings.agent_tool.disable_security
 95 |     browser_disable_security = agent_disable_security_override if agent_disable_security_override is not None else settings.browser.disable_security
 96 | 
 97 |     if settings.browser.use_own_browser and settings.browser.cdp_url:
 98 |         logger.info(f"Connecting to own browser via CDP: {settings.browser.cdp_url}")
 99 |         browser_cfg = BrowserConfig(
100 |             cdp_url=settings.browser.cdp_url,
101 |             wss_url=settings.browser.wss_url,
102 |             user_data_dir=settings.browser.user_data_dir, # Useful for CDP
103 |             # Headless, binary_path etc. are controlled by the user-launched browser
104 |         )
105 |         current_browser = CustomBrowser(config=browser_cfg)
106 |         # For CDP, context config is minimal, trace/recording might not apply or be harder to manage
107 |         context_cfg = CustomBrowserContextConfig(
108 |             trace_path=settings.browser.trace_path,
109 |             save_downloads_path=settings.paths.downloads,
110 |             save_recording_path=settings.agent_tool.save_recording_path if settings.agent_tool.enable_recording else None,
111 |         )
112 |         current_context = await current_browser.new_context(config=context_cfg)
113 | 
114 |     elif settings.browser.keep_open:
115 |         if shared_browser_instance and shared_context_instance:
116 |             logger.info("Reusing shared browser and context.")
117 |             # Ensure browser is still connected
118 |             if not shared_browser_instance.is_connected():
119 |                 logger.warning("Shared browser was disconnected. Recreating.")
120 |                 if shared_context_instance: await shared_context_instance.close() # Close old context too
121 |                 await shared_browser_instance.close() # Close browser after context
122 |                 shared_browser_instance = None
123 |                 shared_context_instance = None
124 |             else:
125 |                 current_browser = shared_browser_instance
126 |                 # For shared browser, we might want a new context or reuse.
127 |                 # For simplicity, let's reuse the context if keep_open is true.
128 |                 # If new context per call is needed, this logic would change.
129 |                 current_context = shared_context_instance
130 | 
131 |         if not current_browser or not current_context : # If shared instances were not valid or not yet created
132 |             logger.info("Creating new shared browser and context.")
133 |             browser_cfg = BrowserConfig(
134 |                 headless=browser_headless,
135 |                 disable_security=browser_disable_security,
136 |                 browser_binary_path=settings.browser.binary_path,
137 |                 user_data_dir=settings.browser.user_data_dir,
138 |                 window_width=settings.browser.window_width,
139 |                 window_height=settings.browser.window_height,
140 |             )
141 |             shared_browser_instance = CustomBrowser(config=browser_cfg)
142 |             context_cfg = CustomBrowserContextConfig(
143 |                 trace_path=settings.browser.trace_path,
144 |                 save_downloads_path=settings.paths.downloads,
145 |                 save_recording_path=settings.agent_tool.save_recording_path if settings.agent_tool.enable_recording else None,
146 |                 force_new_context=False # Important for shared context
147 |             )
148 |             shared_context_instance = await shared_browser_instance.new_context(config=context_cfg)
149 |             current_browser = shared_browser_instance
150 |             current_context = shared_context_instance
151 |     else: # Create new resources per call (not using own browser, not keeping open)
152 |         logger.info("Creating new browser and context for this call.")
153 |         browser_cfg = BrowserConfig(
154 |             headless=browser_headless,
155 |             disable_security=browser_disable_security,
156 |             browser_binary_path=settings.browser.binary_path,
157 |             user_data_dir=settings.browser.user_data_dir,
158 |             window_width=settings.browser.window_width,
159 |             window_height=settings.browser.window_height,
160 |         )
161 |         current_browser = CustomBrowser(config=browser_cfg)
162 |         context_cfg = CustomBrowserContextConfig(
163 |             trace_path=settings.browser.trace_path,
164 |             save_downloads_path=settings.paths.downloads,
165 |             save_recording_path=settings.agent_tool.save_recording_path if settings.agent_tool.enable_recording else None,
166 |             force_new_context=True
167 |         )
168 |         current_context = await current_browser.new_context(config=context_cfg)
169 | 
170 |     if not current_browser or not current_context:
171 |         raise RuntimeError("Failed to initialize browser or context")
172 | 
173 |     return current_browser, current_context
174 | 
175 | 
176 | def serve() -> FastMCP:
177 |     server = FastMCP("mcp_server_browser_use")
178 | 
179 |     @server.tool()
180 |     async def run_browser_agent(ctx: Context, task: str) -> str:
181 |         logger.info(f"Received run_browser_agent task: {task[:100]}...")
182 |         agent_task_id = str(uuid.uuid4())
183 |         final_result = "Error: Agent execution failed."
184 | 
185 |         browser_instance: Optional[CustomBrowser] = None
186 |         context_instance: Optional[CustomBrowserContext] = None
187 |         controller_instance: Optional[CustomController] = None
188 | 
189 |         try:
190 |             async with resource_lock: # Protect shared resource access/creation
191 |                 browser_instance, context_instance = await get_browser_and_context()
192 |                 # For server, ask_human_callback is likely not interactive, can be None or a placeholder
193 |                 controller_instance = await get_controller(ask_human_callback=None)
194 | 
195 |             if not browser_instance or not context_instance or not controller_instance:
196 |                  raise RuntimeError("Failed to acquire browser resources or controller.")
197 | 
198 |             main_llm_config = settings.get_llm_config()
199 |             main_llm = internal_llm_provider.get_llm_model(**main_llm_config)
200 | 
201 |             planner_llm = None
202 |             if settings.llm.planner_provider and settings.llm.planner_model_name:
203 |                 planner_llm_config = settings.get_llm_config(is_planner=True)
204 |                 planner_llm = internal_llm_provider.get_llm_model(**planner_llm_config)
205 | 
206 |             agent_history_json_file = None
207 |             task_history_base_path = settings.agent_tool.history_path
208 | 
209 |             if task_history_base_path:
210 |                 task_specific_history_dir = Path(task_history_base_path) / agent_task_id
211 |                 task_specific_history_dir.mkdir(parents=True, exist_ok=True)
212 |                 agent_history_json_file = str(task_specific_history_dir / f"{agent_task_id}.json")
213 |                 logger.info(f"Agent history will be saved to: {agent_history_json_file}")
214 | 
215 |             agent_instance = BrowserUseAgent(
216 |                 task=task,
217 |                 llm=main_llm,
218 |                 browser=browser_instance,
219 |                 browser_context=context_instance,
220 |                 controller=controller_instance,
221 |                 planner_llm=planner_llm,
222 |                 max_actions_per_step=settings.agent_tool.max_actions_per_step,
223 |                 use_vision=settings.agent_tool.use_vision,
224 |             )
225 | 
226 |             history: AgentHistoryList = await agent_instance.run(max_steps=settings.agent_tool.max_steps)
227 | 
228 |             if agent_history_json_file:
229 |                 agent_instance.save_history(agent_history_json_file)
230 | 
231 |             final_result = history.final_result() or "Agent finished without a final result."
232 |             logger.info(f"Agent task completed. Result: {final_result[:100]}...")
233 | 
234 |         except Exception as e:
235 |             logger.error(f"Error in run_browser_agent: {e}\n{traceback.format_exc()}")
236 |             final_result = f"Error: {e}"
237 |         finally:
238 |             if not settings.browser.keep_open and not settings.browser.use_own_browser:
239 |                 logger.info("Closing browser resources for this call.")
240 |                 if context_instance:
241 |                     await context_instance.close()
242 |                 if browser_instance:
243 |                     await browser_instance.close()
244 |                 if controller_instance: # Close controller only if not shared
245 |                     await controller_instance.close_mcp_client()
246 |             elif settings.browser.use_own_browser: # Own browser, only close controller if not shared
247 |                  if controller_instance and not (settings.browser.keep_open and controller_instance == shared_controller_instance):
248 |                     await controller_instance.close_mcp_client()
249 |         return final_result
250 | 
251 |     @server.tool()
252 |     async def run_deep_research(
253 |         ctx: Context,
254 |         research_task: str,
255 |         max_parallel_browsers_override: Optional[int] = None,
256 |     ) -> str:
257 |         logger.info(f"Received run_deep_research task: {research_task[:100]}...")
258 |         task_id = str(uuid.uuid4()) # This task_id is used for the sub-directory name
259 |         report_content = "Error: Deep research failed."
260 | 
261 |         try:
262 |             main_llm_config = settings.get_llm_config() # Deep research uses main LLM config
263 |             research_llm = internal_llm_provider.get_llm_model(**main_llm_config)
264 | 
265 |             # Prepare browser_config dict for DeepResearchAgent's sub-agents
266 |             dr_browser_cfg = {
267 |                 "headless": settings.browser.headless, # Use general browser headless for sub-tasks
268 |                 "disable_security": settings.browser.disable_security,
269 |                 "browser_binary_path": settings.browser.binary_path,
270 |                 "user_data_dir": settings.browser.user_data_dir,
271 |                 "window_width": settings.browser.window_width,
272 |                 "window_height": settings.browser.window_height,
273 |                 "trace_path": settings.browser.trace_path, # For sub-agent traces
274 |                 "save_downloads_path": settings.paths.downloads, # For sub-agent downloads
275 |             }
276 |             if settings.browser.use_own_browser and settings.browser.cdp_url:
277 |                 # If main browser is CDP, sub-agents should also use it
278 |                 dr_browser_cfg["cdp_url"] = settings.browser.cdp_url
279 |                 dr_browser_cfg["wss_url"] = settings.browser.wss_url
280 | 
281 |             mcp_server_config_for_agent = None
282 |             if settings.server.mcp_config:
283 |                 mcp_server_config_for_agent = settings.server.mcp_config
284 |                 if isinstance(settings.server.mcp_config, str):
285 |                      mcp_server_config_for_agent = json.loads(settings.server.mcp_config)
286 | 
287 |             agent_instance = DeepResearchAgent(
288 |                 llm=research_llm,
289 |                 browser_config=dr_browser_cfg,
290 |                 mcp_server_config=mcp_server_config_for_agent,
291 |             )
292 | 
293 |             current_max_parallel_browsers = max_parallel_browsers_override if max_parallel_browsers_override is not None else settings.research_tool.max_parallel_browsers
294 | 
295 |             # Check if save_dir is provided, otherwise use in-memory approach
296 |             save_dir_for_this_task = None
297 |             if settings.research_tool.save_dir:
298 |                 # If save_dir is provided, construct the full save directory path for this specific task
299 |                 save_dir_for_this_task = str(Path(settings.research_tool.save_dir) / task_id)
300 |                 logger.info(f"Deep research save directory for this task: {save_dir_for_this_task}")
301 |             else:
302 |                 logger.info("No save_dir configured. Deep research will operate in memory-only mode.")
303 | 
304 |             logger.info(f"Using max_parallel_browsers: {current_max_parallel_browsers}")
305 | 
306 |             result_dict = await agent_instance.run(
307 |                 topic=research_task,
308 |                 save_dir=save_dir_for_this_task, # Can be None now
309 |                 task_id=task_id, # Pass the generated task_id
310 |                 max_parallel_browsers=current_max_parallel_browsers
311 |             )
312 | 
313 |             # Handle the result based on if files were saved or not
314 |             if save_dir_for_this_task and result_dict.get("report_file_path") and Path(result_dict["report_file_path"]).exists():
315 |                 with open(result_dict["report_file_path"], "r", encoding="utf-8") as f:
316 |                     markdown_content = f.read()
317 |                 report_content = f"Deep research report generated successfully at {result_dict['report_file_path']}\n\n{markdown_content}"
318 |                 logger.info(f"Deep research task {task_id} completed. Report at {result_dict['report_file_path']}")
319 |             elif result_dict.get("status") == "completed" and result_dict.get("final_report"):
320 |                 report_content = f"Deep research completed. Report content:\n\n{result_dict['final_report']}"
321 |                 if result_dict.get("report_file_path"):
322 |                      report_content += f"\n(Expected report file at: {result_dict['report_file_path']})"
323 |                 logger.info(f"Deep research task {task_id} completed. Report content retrieved directly.")
324 |             else:
325 |                 report_content = f"Deep research task {task_id} result: {result_dict}. Report file not found or content not available."
326 |                 logger.warning(report_content)
327 | 
328 | 
329 |         except Exception as e:
330 |             logger.error(f"Error in run_deep_research: {e}\n{traceback.format_exc()}")
331 |             report_content = f"Error: {e}"
332 | 
333 |         return report_content
334 | 
335 |     return server
336 | 
337 | server_instance = serve() # Renamed from 'server' to avoid conflict with 'settings.server'
338 | 
339 | def main():
340 |     logger.info("Starting MCP server for browser-use...")
341 |     try:
342 |         # Just log the Research tool save directory if it's configured
343 |         if settings.research_tool.save_dir:
344 |             logger.info(f"Research tool save directory configured: {settings.research_tool.save_dir}")
345 |         else:
346 |             logger.info("Research tool save directory not configured. Deep research will operate in memory-only mode.")
347 |     except Exception as e:
348 |         logger.error(f"Configuration error: {e}")
349 |         return # Exit if there's a configuration error
350 | 
351 |     logger.info(f"Loaded settings with LLM provider: {settings.llm.provider}, Model: {settings.llm.model_name}")
352 |     logger.info(f"Browser keep_open: {settings.browser.keep_open}, Use own browser: {settings.browser.use_own_browser}")
353 |     if settings.browser.use_own_browser:
354 |         logger.info(f"Connecting to own browser via CDP: {settings.browser.cdp_url}")
355 |     server_instance.run()
356 | 
357 | if __name__ == "__main__":
358 |     main()
359 | 
```
Page 1/2FirstPrevNextLast