# Directory Structure
```
├── .github
│ └── workflows
│ └── release.yml
├── .gitignore
├── LICENSE
├── pyproject.toml
├── README.md
├── src
│ └── mcp_browser_use
│ ├── __init__.py
│ ├── server.py
│ └── utils.py
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
# mcp-browser-use: MCP server for browser-use
[](https://pypi.org/project/mcp-browser-use/) [](https://pypi.org/project/mcp-browser-use/) [](https://pypi.org/project/mcp-browser-use/)
**mcp-browser-use** is the easiest way to connect any MCP client (like Claude or Cursor) with the browser using [browser-use](https://github.com/browser-use/browser-use).
Unlike other `browser-use` MCPs that make you pay for an LLM API key, this one just uses the LLM that's already set up in your MCP client.
[📺 Demo](https://x.com/vortex_ape/status/1900953901588729864)
## Quickstart
You can start using `mcp-browser-use` with an MCP client by putting the following command in the relevant config:
```bash
uvx mcp-browser-use
```
**Note**: Provide the full path to uvx to prevent MCP client failing to start the server.
## Contributing
Contributions are welcome! Please feel free to submit a pull request.
## Versioning
`mcp-browser-use` uses [Semantic Versioning](https://semver.org/). For the available versions, see the tags on the GitHub repository.
## License
This project is licensed under the Apache 2.0 License, see the [LICENSE](https://github.com/vinayak-mehta/mcp-browser-use/blob/master/LICENSE) file for details.
```
--------------------------------------------------------------------------------
/src/mcp_browser_use/__init__.py:
--------------------------------------------------------------------------------
```python
```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
```toml
[project]
name = "mcp-browser-use"
version = "0.1.5"
description = "MCP server for browser-use"
requires-python = ">=3.10"
authors = [
{name = "Vinayak Mehta", email = "[email protected]"},
]
dependencies = [
"mcp>=0.1.0",
"python-dotenv>=1.0.0",
"fastapi>=0.109.0",
"uvicorn>=0.27.0",
"playwright>=1.50.0",
"browser-use>=0.1.40",
]
readme = "README.md"
license = {text = "Apache-2.0"}
classifiers = [
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"License :: OSI Approved :: Apache Software License",
]
[project.urls]
"Homepage" = "https://github.com/vinayak-mehta/mcp-browser-use"
"Bug Tracker" = "https://github.com/vinayak-mehta/mcp-browser-use/issues"
[project.scripts]
mcp-browser-use = "mcp_browser_use.server:main"
[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools]
package-dir = {"" = "src"}
packages = ["mcp_browser_use"]
```
--------------------------------------------------------------------------------
/src/mcp_browser_use/utils.py:
--------------------------------------------------------------------------------
```python
# ruff: noqa: E402
import logging
import os
import subprocess
import sys
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
logging.getLogger("browser_use").setLevel(logging.CRITICAL)
logging.getLogger("playwright").setLevel(logging.CRITICAL)
def check_playwright_installation():
"""
Check if Playwright is installed and properly set up.
Returns:
bool: True if Playwright is installed, False otherwise
"""
try:
# Try to import playwright to check if it's installed
import playwright
# Check if browsers are installed
try:
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
# Try to launch a browser to verify installation
browser = p.chromium.launch(headless=True)
browser.close()
return True
except Exception as e:
if "Executable doesn't exist" in str(e):
logger.error("Playwright browsers are not installed. Installing now...")
try:
# Redirect stdout and stderr to /dev/null to suppress progress bars
with open(os.devnull, "w") as devnull:
subprocess.run(
[sys.executable, "-m", "playwright", "install", "chromium"],
stdout=devnull,
stderr=devnull,
check=True,
)
logger.info("Playwright browsers installed successfully.")
return True
except subprocess.CalledProcessError:
logger.error(
"Failed to install Playwright browsers. Please run 'playwright install' manually."
)
return False
else:
logger.error(f"Error checking Playwright installation: {e}")
return False
except ImportError:
logger.error(
"Playwright is not installed. Please install it with 'pip install playwright'"
)
return False
```
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
```yaml
name: Publish Python 🐍 distribution 📦 to PyPI
on: push
jobs:
build:
name: Build distribution 📦
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
- name: Install pypa/build
run: >-
python3 -m
pip install
build
--user
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v4
with:
name: python-package-distributions
path: dist/
publish-to-pypi:
name: >-
Publish Python 🐍 distribution 📦 to PyPI
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
needs:
- build
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/mcp-browser-use
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing
steps:
- name: Download all the dists
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
github-release:
name: >-
Sign the Python 🐍 distribution 📦 with Sigstore
and upload them to GitHub Release
needs:
- publish-to-pypi
runs-on: ubuntu-latest
permissions:
contents: write # IMPORTANT: mandatory for making GitHub Releases
id-token: write # IMPORTANT: mandatory for sigstore
steps:
- name: Download all the dists
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/
- name: Sign the dists with Sigstore
uses: sigstore/[email protected]
with:
inputs: >-
./dist/*.tar.gz
./dist/*.whl
- name: Create GitHub Release
env:
GITHUB_TOKEN: ${{ github.token }}
run: >-
gh release create
"$GITHUB_REF_NAME"
--repo "$GITHUB_REPOSITORY"
--notes ""
- name: Upload artifact signatures to GitHub Release
env:
GITHUB_TOKEN: ${{ github.token }}
# Upload to GitHub Release using the `gh` CLI.
# `dist/` contains the built packages, and the
# sigstore-produced signatures and certificates.
run: >-
gh release upload
"$GITHUB_REF_NAME" dist/**
--repo "$GITHUB_REPOSITORY"
```
--------------------------------------------------------------------------------
/src/mcp_browser_use/server.py:
--------------------------------------------------------------------------------
```python
# ruff: noqa: E402
import asyncio
import logging
import sys
from typing import Optional
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
logging.getLogger("browser_use").setLevel(logging.CRITICAL)
logging.getLogger("playwright").setLevel(logging.CRITICAL)
import json
import markdownify
from browser_use.agent.message_manager.service import MessageManager
from browser_use.agent.prompts import AgentMessagePrompt, SystemPrompt
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContext
from mcp.server.fastmcp import FastMCP
from .utils import check_playwright_installation
mcp = FastMCP("browser_use")
browser: Optional[Browser] = None
browser_context: Optional[BrowserContext] = None
message_manager: Optional[MessageManager] = None
@mcp.tool()
async def initialize_browser(headless: bool = False, task: str = "") -> str:
"""Initialize a new browser instance.
Args:
headless: Whether to run browser in headless mode
task: The task to be performed
Returns:
Status message
"""
global browser, browser_context
if browser:
await close_browser()
config = BrowserConfig(headless=headless)
browser = Browser(config=config)
browser_context = BrowserContext(browser=browser)
system_prompt = SystemPrompt(
action_description=(
"Available actions: initialize_browser, close_browser, search_google, go_to_url, go_back, wait, click_element, input_text, "
"switch_tab, open_tab, inspect_page, scroll_down, scroll_up, send_keys, scroll_to_text, "
"get_dropdown_options, select_dropdown_option, validate_page, done"
)
).get_system_message()
browser_system_prompt = f"""
{system_prompt.text()}
Your ultimate task is: {task}.
If you achieved your ultimate task, stop everything and use the done tool to complete the task.
If not, continue as usual.
"""
return browser_system_prompt
@mcp.tool()
async def close_browser() -> str:
"""Close the current browser instance.
Returns:
Status message
"""
global browser, browser_context
if browser_context:
await browser_context.close()
browser_context = None
if browser:
await browser.close()
browser = None
return "Browser closed successfully"
@mcp.tool()
async def search_google(query: str) -> str:
"""
Search the query in Google in the current tab.
Args:
query (str): The search query to use in Google
Returns:
str: A message confirming the search was performed
"""
page = await browser_context.get_current_page()
await page.goto(f"https://www.google.com/search?q={query}&udm=14")
await page.wait_for_load_state()
return f'🔍 Searched for "{query}" in Google'
@mcp.tool()
async def go_to_url(url: str) -> str:
"""
Navigate to URL in the current tab.
Args:
url (str): The URL to navigate to
Returns:
str: A message confirming navigation
"""
page = await browser_context.get_current_page()
await page.goto(url)
await page.wait_for_load_state()
return f"🔗 Navigated to {url}"
@mcp.tool()
async def go_back() -> str:
"""
Go back to the previous page.
Returns:
str: A message confirming navigation back
"""
await browser_context.go_back()
return "🔙 Navigated back"
@mcp.tool()
async def wait(seconds: int = 3) -> str:
"""
Wait for the specified number of seconds.
Args:
seconds (int, optional): Number of seconds to wait. Defaults to 3.
Returns:
str: A message confirming the wait
"""
await asyncio.sleep(seconds)
return f"🕒 Waiting for {seconds} seconds"
@mcp.tool()
async def click_element(index: int) -> str:
"""
Click the element with the specified index.
Args:
index (int): The index of the element to click
Returns:
str: A message describing the result of the click action
"""
if index not in await browser_context.get_selector_map():
raise Exception(
f"Element with index {index} does not exist - retry or use alternative actions"
)
element_node = await browser_context.get_dom_element_by_index(index)
session = await browser_context.get_session()
initial_pages = len(session.context.pages)
# Check if element is a file uploader
if await browser_context.is_file_uploader(element_node):
return f"Index {index} - has an element which opens file upload dialog. Use a dedicated function for file uploads"
try:
download_path = await browser_context._click_element_node(element_node)
if download_path:
msg = f"💾 Downloaded file to {download_path}"
else:
msg = f"🖱️ Clicked button with index {index}: {element_node.get_all_text_till_next_clickable_element(max_depth=2)}"
# Handle new tab opening
if len(session.context.pages) > initial_pages:
msg += " - New tab opened - switching to it"
await browser_context.switch_to_tab(-1)
return msg
except Exception as e:
if "Element not found" in str(e) or "Failed to click element" in str(e):
# Wait a moment and try again
await asyncio.sleep(1)
try:
download_path = await browser_context._click_element_node(element_node)
if download_path:
msg = f"💾 Downloaded file to {download_path}"
else:
msg = f"🖱️ Clicked button with index {index}: {element_node.get_all_text_till_next_clickable_element(max_depth=2)}"
# Handle new tab opening
if len(session.context.pages) > initial_pages:
msg += " - New tab opened - switching to it"
await browser_context.switch_to_tab(-1)
return msg
except Exception:
raise Exception(
f"Failed to click element with index {index} even after waiting: {str(e)}"
)
else:
return f"Error clicking element with index {index}: {str(e)}. Call inspect_page() and try finding the element again."
@mcp.tool()
async def input_text(index: int, text: str, has_sensitive_data: bool = False) -> str:
"""
Input text into an interactive element at the specified index.
Args:
index (int): The index of the element to input text into
text (str): The text to input
has_sensitive_data (bool, optional): Whether the text is sensitive data. Defaults to False.
Returns:
str: A message confirming the text input
"""
if index not in await browser_context.get_selector_map():
raise Exception(
f"Element index {index} does not exist - retry or use alternative actions"
)
element_node = await browser_context.get_dom_element_by_index(index)
await browser_context._input_text_element_node(element_node, text)
if not has_sensitive_data:
return f"⌨️ Input {text} into index {index}"
else:
return f"⌨️ Input sensitive data into index {index}"
@mcp.tool()
async def switch_tab(page_id: int) -> str:
"""
Switch to the tab with the specified page ID.
Args:
page_id (int): The ID of the page to switch to
Returns:
str: A message confirming the tab switch
"""
await browser_context.switch_to_tab(page_id)
page = await browser_context.get_current_page()
await page.wait_for_load_state()
return f"🔄 Switched to tab {page_id}"
@mcp.tool()
async def open_tab(url: str) -> str:
"""
Open a URL in a new tab.
Args:
url (str): The URL to open in the new tab
Returns:
str: A message confirming the new tab was opened
"""
await browser_context.create_new_tab(url)
return f"🔗 Opened new tab with {url}"
@mcp.tool()
async def inspect_page() -> str:
"""
Lists interactive elements and extracts content from the current page.
Returns:
str: A formatted string that lists all interactive elements (if any) along with the content.
"""
# Get the current state to inspect interactive elements
state = await browser_context.get_state()
prompt_message = AgentMessagePrompt(
state,
include_attributes=["type", "role", "placeholder", "aria-label", "title"],
).get_user_message(use_vision=False)
return prompt_message.content
@mcp.tool()
async def scroll_down(amount: int = None) -> str:
"""
Scroll down the page by the specified amount.
Args:
amount (int, optional): Pixels to scroll down. If None, scrolls one page.
Returns:
str: A message confirming the scroll action
"""
page = await browser_context.get_current_page()
if amount is not None:
await page.evaluate(f"window.scrollBy(0, {amount});")
else:
await page.evaluate("window.scrollBy(0, window.innerHeight);")
amount_str = f"{amount} pixels" if amount is not None else "one page"
return f"🔍 Scrolled down the page by {amount_str}"
@mcp.tool()
async def scroll_up(amount: int = None) -> str:
"""
Scroll up the page by the specified amount.
Args:
amount (int, optional): Pixels to scroll up. If None, scrolls one page.
Returns:
str: A message confirming the scroll action
"""
page = await browser_context.get_current_page()
if amount is not None:
await page.evaluate(f"window.scrollBy(0, -{amount});")
else:
await page.evaluate("window.scrollBy(0, -window.innerHeight);")
amount_str = f"{amount} pixels" if amount is not None else "one page"
return f"🔍 Scrolled up the page by {amount_str}"
@mcp.tool()
async def send_keys(keys: str) -> str:
"""
Send keyboard keys or shortcuts to the current page.
Args:
keys (str): Keys to send, e.g. "Escape", "Enter", "Control+o"
Returns:
str: A message confirming the keys were sent
"""
page = await browser_context.get_current_page()
try:
await page.keyboard.press(keys)
except Exception as e:
if "Unknown key" in str(e):
for key in keys:
await page.keyboard.press(key)
else:
raise e
return f"⌨️ Sent keys: {keys}"
@mcp.tool()
async def scroll_to_text(text: str) -> str:
"""
Scroll to an element containing the specified text.
Args:
text (str): The text to find and scroll to.
Returns:
str: A message confirming the scroll action or indicating failure.
"""
page = await browser_context.get_current_page()
locators = [
page.get_by_text(text, exact=False),
page.locator(f"text={text}"),
page.locator(f"//*[contains(text(), '{text}')]"),
]
for locator in locators:
try:
if await locator.count() > 0 and await locator.first.is_visible():
await locator.first.scroll_into_view_if_needed()
await asyncio.sleep(0.5)
return f"🔍 Scrolled to text: {text}"
except Exception:
continue
return f"Text '{text}' not found or not visible on page"
@mcp.tool()
async def get_dropdown_options(index: int) -> str:
"""
Get all options from a dropdown element.
Args:
index (int): The index of the dropdown element.
Returns:
str: A formatted string listing all dropdown options.
"""
page = await browser_context.get_current_page()
selector_map = await browser_context.get_selector_map()
dom_element = selector_map[index]
all_options = []
for frame in page.frames:
try:
options = await frame.evaluate(
"""
(xpath) => {
const select = document.evaluate(xpath, document, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
if (!select) return null;
return {
options: Array.from(select.options).map(opt => ({
text: opt.text,
value: opt.value,
index: opt.index
})),
id: select.id,
name: select.name
};
}
""",
dom_element.xpath,
)
if options:
formatted_options = []
for opt in options["options"]:
encoded_text = json.dumps(opt["text"])
formatted_options.append(f'{opt["index"]}: text={encoded_text}')
all_options.extend(formatted_options)
except Exception:
pass
if all_options:
msg = "\n".join(all_options)
msg += "\nUse the exact text string in select_dropdown_option"
return msg
else:
return "No options found in any frame for dropdown"
@mcp.tool()
async def select_dropdown_option(index: int, text: str) -> str:
"""
Select an option from a dropdown by its text.
Args:
index (int): The index of the dropdown element.
text (str): The exact text of the option to select.
Returns:
str: A message confirming the option was selected.
"""
page = await browser_context.get_current_page()
selector_map = await browser_context.get_selector_map()
dom_element = selector_map[index]
if dom_element.tag_name != "select":
return f"Cannot select option: Element with index {index} is a {dom_element.tag_name}, not a select"
for frame in page.frames:
try:
find_dropdown_js = """
(xpath) => {
try {
const select = document.evaluate(xpath, document, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
if (!select) return null;
if (select.tagName.toLowerCase() !== 'select') {
return { error: `Found element but it's a ${select.tagName}, not a SELECT`, found: false };
}
return {
id: select.id,
name: select.name,
found: true,
tagName: select.tagName,
optionCount: select.options.length,
currentValue: select.value,
availableOptions: Array.from(select.options).map(o => o.text.trim())
};
} catch (e) {
return { error: e.toString(), found: false };
}
}
"""
dropdown_info = await frame.evaluate(find_dropdown_js, dom_element.xpath)
if dropdown_info and dropdown_info.get("found"):
selected_option_values = (
await frame.locator("//" + dom_element.xpath)
.nth(0)
.select_option(label=text, timeout=1000)
)
return f"Selected option {text} with value {selected_option_values}"
except Exception:
pass
return f"Could not select option '{text}' in any frame"
@mcp.tool()
async def validate_page(expected_text: str = "") -> str:
"""
Validate the current page state by extracting content and optionally checking for expected text.
Args:
expected_text (str): Optional text expected to be present on the page.
Returns:
str: A message indicating whether the expected text was found or showing an extracted snippet.
"""
page = await browser_context.get_current_page()
content = markdownify.markdownify(await page.content())
if expected_text and expected_text.lower() in content.lower():
return (
f"✅ Validation successful: Expected text '{expected_text}' found on page."
)
elif expected_text:
return f"⚠ Validation warning: Expected text '{expected_text}' not found. Extracted snippet: {content[:200]}..."
else:
return f"Page content extracted:\n{content[:500]}..."
@mcp.tool()
async def done(success: bool = True, text: str = "") -> dict:
"""
Complete the task with a success flag and optional text.
Returns:
dict: A dictionary indicating completion status.
"""
return {"is_done": True, "success": success, "extracted_content": text}
def main():
"""Run the MCP server"""
if not check_playwright_installation():
logger.error("Playwright is not properly installed. Exiting.")
sys.exit(1)
logger.info("Starting MCP server for browser-use")
mcp.run(transport="stdio")
if __name__ == "__main__":
main()
```