# Directory Structure
```
├── .gitignore
├── bin
│ └── mcp-selenium.js
├── Dockerfile
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── smithery.yaml
└── src
└── lib
└── server.js
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual Environment
venv/
ENV/
env/
# IDE
.idea/
.vscode/
*.swp
*.swo
# Node
node_modules/
npm-debug.log*
# Misc
.DS_Store
.env
.env.local
.env.*.local
# Selenium
geckodriver.log
chromedriver.log
.goose/
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
[](https://mseep.ai/app/angiejones-mcp-selenium)
# MCP Selenium Server
A Model Context Protocol (MCP) server implementation for Selenium WebDriver, enabling browser automation through standardized MCP clients.
## Video Demo (Click to Watch)
[](https://youtu.be/mRV0N8hcgYA)
## Features
- Start browser sessions with customizable options
- Navigate to URLs
- Find elements using various locator strategies
- Click, type, and interact with elements
- Perform mouse actions (hover, drag and drop)
- Handle keyboard input
- Take screenshots
- Upload files
- Support for headless mode
## Supported Browsers
- Chrome
- Firefox
- MS Edge
## Use with Goose
### Option 1: One-click install
Copy and paste the link below into a browser address bar to add this extension to goose desktop:
```
goose://extension?cmd=npx&arg=-y&arg=%40angiejones%2Fmcp-selenium&id=selenium-mcp&name=Selenium%20MCP&description=automates%20browser%20interactions
```
### Option 2: Add manually to desktop or CLI
* Name: `Selenium MCP`
* Description: `automates browser interactions`
* Command: `npx -y @angiejones/mcp-selenium`
## Use with other MCP clients (e.g. Claude Desktop, etc)
```json
{
"mcpServers": {
"selenium": {
"command": "npx",
"args": ["-y", "@angiejones/mcp-selenium"]
}
}
}
```
---
## Development
To work on this project:
1. Clone the repository
2. Install dependencies: `npm install`
3. Run the server: `npm start`
### Installation
#### Installing via Smithery
To install MCP Selenium for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@angiejones/mcp-selenium):
```bash
npx -y @smithery/cli install @angiejones/mcp-selenium --client claude
```
#### Manual Installation
```bash
npm install -g @angiejones/mcp-selenium
```
### Usage
Start the server by running:
```bash
mcp-selenium
```
Or use with NPX in your MCP configuration:
```json
{
"mcpServers": {
"selenium": {
"command": "npx",
"args": [
"-y",
"@angiejones/mcp-selenium"
]
}
}
}
```
## Tools
### start_browser
Launches a browser session.
**Parameters:**
- `browser` (required): Browser to launch
- Type: string
- Enum: ["chrome", "firefox"]
- `options`: Browser configuration options
- Type: object
- Properties:
- `headless`: Run browser in headless mode
- Type: boolean
- `arguments`: Additional browser arguments
- Type: array of strings
**Example:**
```json
{
"tool": "start_browser",
"parameters": {
"browser": "chrome",
"options": {
"headless": true,
"arguments": ["--no-sandbox"]
}
}
}
```
### navigate
Navigates to a URL.
**Parameters:**
- `url` (required): URL to navigate to
- Type: string
**Example:**
```json
{
"tool": "navigate",
"parameters": {
"url": "https://www.example.com"
}
}
```
### find_element
Finds an element on the page.
**Parameters:**
- `by` (required): Locator strategy
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `value` (required): Value for the locator strategy
- Type: string
- `timeout`: Maximum time to wait for element in milliseconds
- Type: number
- Default: 10000
**Example:**
```json
{
"tool": "find_element",
"parameters": {
"by": "id",
"value": "search-input",
"timeout": 5000
}
}
```
### click_element
Clicks an element.
**Parameters:**
- `by` (required): Locator strategy
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `value` (required): Value for the locator strategy
- Type: string
- `timeout`: Maximum time to wait for element in milliseconds
- Type: number
- Default: 10000
**Example:**
```json
{
"tool": "click_element",
"parameters": {
"by": "css",
"value": ".submit-button"
}
}
```
### send_keys
Sends keys to an element (typing).
**Parameters:**
- `by` (required): Locator strategy
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `value` (required): Value for the locator strategy
- Type: string
- `text` (required): Text to enter into the element
- Type: string
- `timeout`: Maximum time to wait for element in milliseconds
- Type: number
- Default: 10000
**Example:**
```json
{
"tool": "send_keys",
"parameters": {
"by": "name",
"value": "username",
"text": "testuser"
}
}
```
### get_element_text
Gets the text() of an element.
**Parameters:**
- `by` (required): Locator strategy
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `value` (required): Value for the locator strategy
- Type: string
- `timeout`: Maximum time to wait for element in milliseconds
- Type: number
- Default: 10000
**Example:**
```json
{
"tool": "get_element_text",
"parameters": {
"by": "css",
"value": ".message"
}
}
```
### hover
Moves the mouse to hover over an element.
**Parameters:**
- `by` (required): Locator strategy
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `value` (required): Value for the locator strategy
- Type: string
- `timeout`: Maximum time to wait for element in milliseconds
- Type: number
- Default: 10000
**Example:**
```json
{
"tool": "hover",
"parameters": {
"by": "css",
"value": ".dropdown-menu"
}
}
```
### drag_and_drop
Drags an element and drops it onto another element.
**Parameters:**
- `by` (required): Locator strategy for source element
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `value` (required): Value for the source locator strategy
- Type: string
- `targetBy` (required): Locator strategy for target element
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `targetValue` (required): Value for the target locator strategy
- Type: string
- `timeout`: Maximum time to wait for elements in milliseconds
- Type: number
- Default: 10000
**Example:**
```json
{
"tool": "drag_and_drop",
"parameters": {
"by": "id",
"value": "draggable",
"targetBy": "id",
"targetValue": "droppable"
}
}
```
### double_click
Performs a double click on an element.
**Parameters:**
- `by` (required): Locator strategy
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `value` (required): Value for the locator strategy
- Type: string
- `timeout`: Maximum time to wait for element in milliseconds
- Type: number
- Default: 10000
**Example:**
```json
{
"tool": "double_click",
"parameters": {
"by": "css",
"value": ".editable-text"
}
}
```
### right_click
Performs a right click (context click) on an element.
**Parameters:**
- `by` (required): Locator strategy
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `value` (required): Value for the locator strategy
- Type: string
- `timeout`: Maximum time to wait for element in milliseconds
- Type: number
- Default: 10000
**Example:**
```json
{
"tool": "right_click",
"parameters": {
"by": "css",
"value": ".context-menu-trigger"
}
}
```
### press_key
Simulates pressing a keyboard key.
**Parameters:**
- `key` (required): Key to press (e.g., 'Enter', 'Tab', 'a', etc.)
- Type: string
**Example:**
```json
{
"tool": "press_key",
"parameters": {
"key": "Enter"
}
}
```
### upload_file
Uploads a file using a file input element.
**Parameters:**
- `by` (required): Locator strategy
- Type: string
- Enum: ["id", "css", "xpath", "name", "tag", "class"]
- `value` (required): Value for the locator strategy
- Type: string
- `filePath` (required): Absolute path to the file to upload
- Type: string
- `timeout`: Maximum time to wait for element in milliseconds
- Type: number
- Default: 10000
**Example:**
```json
{
"tool": "upload_file",
"parameters": {
"by": "id",
"value": "file-input",
"filePath": "/path/to/file.pdf"
}
}
```
### take_screenshot
Captures a screenshot of the current page.
**Parameters:**
- `outputPath` (optional): Path where to save the screenshot. If not provided, returns base64 data.
- Type: string
**Example:**
```json
{
"tool": "take_screenshot",
"parameters": {
"outputPath": "/path/to/screenshot.png"
}
}
```
### close_session
Closes the current browser session and cleans up resources.
**Parameters:**
None required
**Example:**
```json
{
"tool": "close_session",
"parameters": {}
}
```
## License
MIT
```
--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
```yaml
# Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
startCommand:
type: stdio
configSchema:
# JSON Schema defining the configuration options for the MCP.
type: object
required: []
properties: {}
commandFunction:
# A function that produces the CLI command to start the MCP on stdio.
|-
(config) => ({command:'node', args:['src/lib/server.js'], env:{}})
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
{
"name": "@angiejones/mcp-selenium",
"version": "0.1.21",
"description": "Selenium WebDriver MCP Server",
"type": "module",
"main": "src/lib/server.js",
"bin": {
"mcp-selenium": "./src/lib/server.js"
},
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.7.0",
"selenium-webdriver": "^4.18.1"
}
}
```
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
```dockerfile
FROM node:18-alpine
# Install Chrome and dependencies
RUN apk update && apk add --no-cache \
chromium \
chromium-chromedriver \
nss \
freetype \
freetype-dev \
harfbuzz \
ca-certificates \
ttf-freefont \
udev \
ttf-opensans \
chromium-chromedriver
# Set Chrome environment variables
ENV CHROME_BIN=/usr/bin/chromium-browser
ENV CHROME_PATH=/usr/lib/chromium/
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
WORKDIR /app
# Copy package files
COPY package*.json ./
# Install dependencies
RUN npm install
# Copy application code
COPY . .
# Start the MCP server
CMD ["node", "src/lib/server.js"]
```
--------------------------------------------------------------------------------
/bin/mcp-selenium.js:
--------------------------------------------------------------------------------
```javascript
#!/usr/bin/env node
import { fileURLToPath } from 'url';
import { dirname, resolve } from 'path';
import { spawn } from 'child_process';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const serverPath = resolve(__dirname, '../src/lib/server.js');
// Start the server
const child = spawn('node', [serverPath], {
stdio: 'inherit'
});
child.on('error', (error) => {
console.error(`Error starting server: ${error.message}`);
process.exit(1);
});
// Handle process termination
process.on('SIGTERM', () => {
child.kill('SIGTERM');
});
process.on('SIGINT', () => {
child.kill('SIGINT');
});
```
--------------------------------------------------------------------------------
/src/lib/server.js:
--------------------------------------------------------------------------------
```javascript
#!/usr/bin/env node
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import pkg from 'selenium-webdriver';
const { Builder, By, Key, until, Actions } = pkg;
import { Options as ChromeOptions } from 'selenium-webdriver/chrome.js';
import { Options as FirefoxOptions } from 'selenium-webdriver/firefox.js';
import { Options as EdgeOptions } from 'selenium-webdriver/edge.js';
// Create an MCP server
const server = new McpServer({
name: "MCP Selenium",
version: "1.0.0"
});
// Server state
const state = {
drivers: new Map(),
currentSession: null
};
// Helper functions
const getDriver = () => {
const driver = state.drivers.get(state.currentSession);
if (!driver) {
throw new Error('No active browser session');
}
return driver;
};
const getLocator = (by, value) => {
switch (by.toLowerCase()) {
case 'id': return By.id(value);
case 'css': return By.css(value);
case 'xpath': return By.xpath(value);
case 'name': return By.name(value);
case 'tag': return By.css(value);
case 'class': return By.className(value);
default: throw new Error(`Unsupported locator strategy: ${by}`);
}
};
// Common schemas
const browserOptionsSchema = z.object({
headless: z.boolean().optional().describe("Run browser in headless mode"),
arguments: z.array(z.string()).optional().describe("Additional browser arguments")
}).optional();
const locatorSchema = {
by: z.enum(["id", "css", "xpath", "name", "tag", "class"]).describe("Locator strategy to find element"),
value: z.string().describe("Value for the locator strategy"),
timeout: z.number().optional().describe("Maximum time to wait for element in milliseconds")
};
// Browser Management Tools
server.tool(
"start_browser",
"launches browser",
{
browser: z.enum(["chrome", "firefox", "edge"]).describe("Browser to launch (chrome or firefox or microsoft edge)"),
options: browserOptionsSchema
},
async ({ browser, options = {} }) => {
try {
let builder = new Builder();
let driver;
switch (browser) {
case 'chrome': {
const chromeOptions = new ChromeOptions();
if (options.headless) {
chromeOptions.addArguments('--headless=new');
}
if (options.arguments) {
options.arguments.forEach(arg => chromeOptions.addArguments(arg));
}
driver = await builder
.forBrowser('chrome')
.setChromeOptions(chromeOptions)
.build();
break;
}
case 'edge': {
const edgeOptions = new EdgeOptions();
if (options.headless) {
edgeOptions.addArguments('--headless=new');
}
if (options.arguments) {
options.arguments.forEach(arg => edgeOptions.addArguments(arg));
}
driver = await builder
.forBrowser('edge')
.setEdgeOptions(edgeOptions)
.build();
break;
}
case 'firefox': {
const firefoxOptions = new FirefoxOptions();
if (options.headless) {
firefoxOptions.addArguments('--headless');
}
if (options.arguments) {
options.arguments.forEach(arg => firefoxOptions.addArguments(arg));
}
driver = await builder
.forBrowser('firefox')
.setFirefoxOptions(firefoxOptions)
.build();
break;
}
default: {
throw new Error(`Unsupported browser: ${browser}`);
}
}
const sessionId = `${browser}_${Date.now()}`;
state.drivers.set(sessionId, driver);
state.currentSession = sessionId;
return {
content: [{ type: 'text', text: `Browser started with session_id: ${sessionId}` }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error starting browser: ${e.message}` }]
};
}
}
);
server.tool(
"navigate",
"navigates to a URL",
{
url: z.string().describe("URL to navigate to")
},
async ({ url }) => {
try {
const driver = getDriver();
await driver.get(url);
return {
content: [{ type: 'text', text: `Navigated to ${url}` }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error navigating: ${e.message}` }]
};
}
}
);
// Element Interaction Tools
server.tool(
"find_element",
"finds an element",
{
...locatorSchema
},
async ({ by, value, timeout = 10000 }) => {
try {
const driver = getDriver();
const locator = getLocator(by, value);
await driver.wait(until.elementLocated(locator), timeout);
return {
content: [{ type: 'text', text: 'Element found' }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error finding element: ${e.message}` }]
};
}
}
);
server.tool(
"click_element",
"clicks an element",
{
...locatorSchema
},
async ({ by, value, timeout = 10000 }) => {
try {
const driver = getDriver();
const locator = getLocator(by, value);
const element = await driver.wait(until.elementLocated(locator), timeout);
await element.click();
return {
content: [{ type: 'text', text: 'Element clicked' }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error clicking element: ${e.message}` }]
};
}
}
);
server.tool(
"send_keys",
"sends keys to an element, aka typing",
{
...locatorSchema,
text: z.string().describe("Text to enter into the element")
},
async ({ by, value, text, timeout = 10000 }) => {
try {
const driver = getDriver();
const locator = getLocator(by, value);
const element = await driver.wait(until.elementLocated(locator), timeout);
await element.clear();
await element.sendKeys(text);
return {
content: [{ type: 'text', text: `Text "${text}" entered into element` }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error entering text: ${e.message}` }]
};
}
}
);
server.tool(
"get_element_text",
"gets the text() of an element",
{
...locatorSchema
},
async ({ by, value, timeout = 10000 }) => {
try {
const driver = getDriver();
const locator = getLocator(by, value);
const element = await driver.wait(until.elementLocated(locator), timeout);
const text = await element.getText();
return {
content: [{ type: 'text', text }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error getting element text: ${e.message}` }]
};
}
}
);
server.tool(
"hover",
"moves the mouse to hover over an element",
{
...locatorSchema
},
async ({ by, value, timeout = 10000 }) => {
try {
const driver = getDriver();
const locator = getLocator(by, value);
const element = await driver.wait(until.elementLocated(locator), timeout);
const actions = driver.actions({ bridge: true });
await actions.move({ origin: element }).perform();
return {
content: [{ type: 'text', text: 'Hovered over element' }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error hovering over element: ${e.message}` }]
};
}
}
);
server.tool(
"drag_and_drop",
"drags an element and drops it onto another element",
{
...locatorSchema,
targetBy: z.enum(["id", "css", "xpath", "name", "tag", "class"]).describe("Locator strategy to find target element"),
targetValue: z.string().describe("Value for the target locator strategy")
},
async ({ by, value, targetBy, targetValue, timeout = 10000 }) => {
try {
const driver = getDriver();
const sourceLocator = getLocator(by, value);
const targetLocator = getLocator(targetBy, targetValue);
const sourceElement = await driver.wait(until.elementLocated(sourceLocator), timeout);
const targetElement = await driver.wait(until.elementLocated(targetLocator), timeout);
const actions = driver.actions({ bridge: true });
await actions.dragAndDrop(sourceElement, targetElement).perform();
return {
content: [{ type: 'text', text: 'Drag and drop completed' }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error performing drag and drop: ${e.message}` }]
};
}
}
);
server.tool(
"double_click",
"performs a double click on an element",
{
...locatorSchema
},
async ({ by, value, timeout = 10000 }) => {
try {
const driver = getDriver();
const locator = getLocator(by, value);
const element = await driver.wait(until.elementLocated(locator), timeout);
const actions = driver.actions({ bridge: true });
await actions.doubleClick(element).perform();
return {
content: [{ type: 'text', text: 'Double click performed' }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error performing double click: ${e.message}` }]
};
}
}
);
server.tool(
"right_click",
"performs a right click (context click) on an element",
{
...locatorSchema
},
async ({ by, value, timeout = 10000 }) => {
try {
const driver = getDriver();
const locator = getLocator(by, value);
const element = await driver.wait(until.elementLocated(locator), timeout);
const actions = driver.actions({ bridge: true });
await actions.contextClick(element).perform();
return {
content: [{ type: 'text', text: 'Right click performed' }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error performing right click: ${e.message}` }]
};
}
}
);
server.tool(
"press_key",
"simulates pressing a keyboard key",
{
key: z.string().describe("Key to press (e.g., 'Enter', 'Tab', 'a', etc.)")
},
async ({ key }) => {
try {
const driver = getDriver();
const actions = driver.actions({ bridge: true });
await actions.keyDown(key).keyUp(key).perform();
return {
content: [{ type: 'text', text: `Key '${key}' pressed` }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error pressing key: ${e.message}` }]
};
}
}
);
server.tool(
"upload_file",
"uploads a file using a file input element",
{
...locatorSchema,
filePath: z.string().describe("Absolute path to the file to upload")
},
async ({ by, value, filePath, timeout = 10000 }) => {
try {
const driver = getDriver();
const locator = getLocator(by, value);
const element = await driver.wait(until.elementLocated(locator), timeout);
await element.sendKeys(filePath);
return {
content: [{ type: 'text', text: 'File upload initiated' }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error uploading file: ${e.message}` }]
};
}
}
);
server.tool(
"take_screenshot",
"captures a screenshot of the current page",
{
outputPath: z.string().optional().describe("Optional path where to save the screenshot. If not provided, returns base64 data.")
},
async ({ outputPath }) => {
try {
const driver = getDriver();
const screenshot = await driver.takeScreenshot();
if (outputPath) {
const fs = await import('fs');
await fs.promises.writeFile(outputPath, screenshot, 'base64');
return {
content: [{ type: 'text', text: `Screenshot saved to ${outputPath}` }]
};
} else {
return {
content: [
{ type: 'text', text: 'Screenshot captured as base64:' },
{ type: 'text', text: screenshot }
]
};
}
} catch (e) {
return {
content: [{ type: 'text', text: `Error taking screenshot: ${e.message}` }]
};
}
}
);
server.tool(
"close_session",
"closes the current browser session",
{},
async () => {
try {
const driver = getDriver();
await driver.quit();
state.drivers.delete(state.currentSession);
const sessionId = state.currentSession;
state.currentSession = null;
return {
content: [{ type: 'text', text: `Browser session ${sessionId} closed` }]
};
} catch (e) {
return {
content: [{ type: 'text', text: `Error closing session: ${e.message}` }]
};
}
}
);
// Resources
server.resource(
"browser-status",
new ResourceTemplate("browser-status://current"),
async (uri) => ({
contents: [{
uri: uri.href,
text: state.currentSession
? `Active browser session: ${state.currentSession}`
: "No active browser session"
}]
})
);
// Cleanup handler
async function cleanup() {
for (const [sessionId, driver] of state.drivers) {
try {
await driver.quit();
} catch (e) {
console.error(`Error closing browser session ${sessionId}:`, e);
}
}
state.drivers.clear();
state.currentSession = null;
process.exit(0);
}
process.on('SIGTERM', cleanup);
process.on('SIGINT', cleanup);
// Start the server
const transport = new StdioServerTransport();
await server.connect(transport);
```