# Directory Structure
```
├── .gitignore
├── bin
│ └── mcp-selenium.js
├── Dockerfile
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── smithery.yaml
└── src
└── lib
└── server.js
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | # Python
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.so
6 | .Python
7 | build/
8 | develop-eggs/
9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 |
23 | # Virtual Environment
24 | venv/
25 | ENV/
26 | env/
27 |
28 | # IDE
29 | .idea/
30 | .vscode/
31 | *.swp
32 | *.swo
33 |
34 | # Node
35 | node_modules/
36 | npm-debug.log*
37 |
38 | # Misc
39 | .DS_Store
40 | .env
41 | .env.local
42 | .env.*.local
43 |
44 | # Selenium
45 | geckodriver.log
46 | chromedriver.log
47 | .goose/
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | [](https://mseep.ai/app/angiejones-mcp-selenium)
2 |
3 | # MCP Selenium Server
4 |
5 | A Model Context Protocol (MCP) server implementation for Selenium WebDriver, enabling browser automation through standardized MCP clients.
6 |
7 | ## Video Demo (Click to Watch)
8 |
9 | [](https://youtu.be/mRV0N8hcgYA)
10 |
11 |
12 | ## Features
13 |
14 | - Start browser sessions with customizable options
15 | - Navigate to URLs
16 | - Find elements using various locator strategies
17 | - Click, type, and interact with elements
18 | - Perform mouse actions (hover, drag and drop)
19 | - Handle keyboard input
20 | - Take screenshots
21 | - Upload files
22 | - Support for headless mode
23 |
24 | ## Supported Browsers
25 |
26 | - Chrome
27 | - Firefox
28 | - MS Edge
29 |
30 | ## Use with Goose
31 |
32 | ### Option 1: One-click install
33 | Copy and paste the link below into a browser address bar to add this extension to goose desktop:
34 |
35 | ```
36 | goose://extension?cmd=npx&arg=-y&arg=%40angiejones%2Fmcp-selenium&id=selenium-mcp&name=Selenium%20MCP&description=automates%20browser%20interactions
37 | ```
38 |
39 |
40 | ### Option 2: Add manually to desktop or CLI
41 |
42 | * Name: `Selenium MCP`
43 | * Description: `automates browser interactions`
44 | * Command: `npx -y @angiejones/mcp-selenium`
45 |
46 | ## Use with other MCP clients (e.g. Claude Desktop, etc)
47 | ```json
48 | {
49 | "mcpServers": {
50 | "selenium": {
51 | "command": "npx",
52 | "args": ["-y", "@angiejones/mcp-selenium"]
53 | }
54 | }
55 | }
56 | ```
57 |
58 | ---
59 |
60 | ## Development
61 |
62 | To work on this project:
63 |
64 | 1. Clone the repository
65 | 2. Install dependencies: `npm install`
66 | 3. Run the server: `npm start`
67 |
68 | ### Installation
69 |
70 | #### Installing via Smithery
71 |
72 | To install MCP Selenium for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@angiejones/mcp-selenium):
73 |
74 | ```bash
75 | npx -y @smithery/cli install @angiejones/mcp-selenium --client claude
76 | ```
77 |
78 | #### Manual Installation
79 | ```bash
80 | npm install -g @angiejones/mcp-selenium
81 | ```
82 |
83 |
84 | ### Usage
85 |
86 | Start the server by running:
87 |
88 | ```bash
89 | mcp-selenium
90 | ```
91 |
92 | Or use with NPX in your MCP configuration:
93 |
94 | ```json
95 | {
96 | "mcpServers": {
97 | "selenium": {
98 | "command": "npx",
99 | "args": [
100 | "-y",
101 | "@angiejones/mcp-selenium"
102 | ]
103 | }
104 | }
105 | }
106 | ```
107 |
108 |
109 |
110 | ## Tools
111 |
112 | ### start_browser
113 | Launches a browser session.
114 |
115 | **Parameters:**
116 | - `browser` (required): Browser to launch
117 | - Type: string
118 | - Enum: ["chrome", "firefox"]
119 | - `options`: Browser configuration options
120 | - Type: object
121 | - Properties:
122 | - `headless`: Run browser in headless mode
123 | - Type: boolean
124 | - `arguments`: Additional browser arguments
125 | - Type: array of strings
126 |
127 | **Example:**
128 | ```json
129 | {
130 | "tool": "start_browser",
131 | "parameters": {
132 | "browser": "chrome",
133 | "options": {
134 | "headless": true,
135 | "arguments": ["--no-sandbox"]
136 | }
137 | }
138 | }
139 | ```
140 |
141 | ### navigate
142 | Navigates to a URL.
143 |
144 | **Parameters:**
145 | - `url` (required): URL to navigate to
146 | - Type: string
147 |
148 | **Example:**
149 | ```json
150 | {
151 | "tool": "navigate",
152 | "parameters": {
153 | "url": "https://www.example.com"
154 | }
155 | }
156 | ```
157 |
158 | ### find_element
159 | Finds an element on the page.
160 |
161 | **Parameters:**
162 | - `by` (required): Locator strategy
163 | - Type: string
164 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
165 | - `value` (required): Value for the locator strategy
166 | - Type: string
167 | - `timeout`: Maximum time to wait for element in milliseconds
168 | - Type: number
169 | - Default: 10000
170 |
171 | **Example:**
172 | ```json
173 | {
174 | "tool": "find_element",
175 | "parameters": {
176 | "by": "id",
177 | "value": "search-input",
178 | "timeout": 5000
179 | }
180 | }
181 | ```
182 |
183 | ### click_element
184 | Clicks an element.
185 |
186 | **Parameters:**
187 | - `by` (required): Locator strategy
188 | - Type: string
189 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
190 | - `value` (required): Value for the locator strategy
191 | - Type: string
192 | - `timeout`: Maximum time to wait for element in milliseconds
193 | - Type: number
194 | - Default: 10000
195 |
196 | **Example:**
197 | ```json
198 | {
199 | "tool": "click_element",
200 | "parameters": {
201 | "by": "css",
202 | "value": ".submit-button"
203 | }
204 | }
205 | ```
206 |
207 | ### send_keys
208 | Sends keys to an element (typing).
209 |
210 | **Parameters:**
211 | - `by` (required): Locator strategy
212 | - Type: string
213 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
214 | - `value` (required): Value for the locator strategy
215 | - Type: string
216 | - `text` (required): Text to enter into the element
217 | - Type: string
218 | - `timeout`: Maximum time to wait for element in milliseconds
219 | - Type: number
220 | - Default: 10000
221 |
222 | **Example:**
223 | ```json
224 | {
225 | "tool": "send_keys",
226 | "parameters": {
227 | "by": "name",
228 | "value": "username",
229 | "text": "testuser"
230 | }
231 | }
232 | ```
233 |
234 | ### get_element_text
235 | Gets the text() of an element.
236 |
237 | **Parameters:**
238 | - `by` (required): Locator strategy
239 | - Type: string
240 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
241 | - `value` (required): Value for the locator strategy
242 | - Type: string
243 | - `timeout`: Maximum time to wait for element in milliseconds
244 | - Type: number
245 | - Default: 10000
246 |
247 | **Example:**
248 | ```json
249 | {
250 | "tool": "get_element_text",
251 | "parameters": {
252 | "by": "css",
253 | "value": ".message"
254 | }
255 | }
256 | ```
257 |
258 | ### hover
259 | Moves the mouse to hover over an element.
260 |
261 | **Parameters:**
262 | - `by` (required): Locator strategy
263 | - Type: string
264 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
265 | - `value` (required): Value for the locator strategy
266 | - Type: string
267 | - `timeout`: Maximum time to wait for element in milliseconds
268 | - Type: number
269 | - Default: 10000
270 |
271 | **Example:**
272 | ```json
273 | {
274 | "tool": "hover",
275 | "parameters": {
276 | "by": "css",
277 | "value": ".dropdown-menu"
278 | }
279 | }
280 | ```
281 |
282 | ### drag_and_drop
283 | Drags an element and drops it onto another element.
284 |
285 | **Parameters:**
286 | - `by` (required): Locator strategy for source element
287 | - Type: string
288 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
289 | - `value` (required): Value for the source locator strategy
290 | - Type: string
291 | - `targetBy` (required): Locator strategy for target element
292 | - Type: string
293 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
294 | - `targetValue` (required): Value for the target locator strategy
295 | - Type: string
296 | - `timeout`: Maximum time to wait for elements in milliseconds
297 | - Type: number
298 | - Default: 10000
299 |
300 | **Example:**
301 | ```json
302 | {
303 | "tool": "drag_and_drop",
304 | "parameters": {
305 | "by": "id",
306 | "value": "draggable",
307 | "targetBy": "id",
308 | "targetValue": "droppable"
309 | }
310 | }
311 | ```
312 |
313 | ### double_click
314 | Performs a double click on an element.
315 |
316 | **Parameters:**
317 | - `by` (required): Locator strategy
318 | - Type: string
319 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
320 | - `value` (required): Value for the locator strategy
321 | - Type: string
322 | - `timeout`: Maximum time to wait for element in milliseconds
323 | - Type: number
324 | - Default: 10000
325 |
326 | **Example:**
327 | ```json
328 | {
329 | "tool": "double_click",
330 | "parameters": {
331 | "by": "css",
332 | "value": ".editable-text"
333 | }
334 | }
335 | ```
336 |
337 | ### right_click
338 | Performs a right click (context click) on an element.
339 |
340 | **Parameters:**
341 | - `by` (required): Locator strategy
342 | - Type: string
343 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
344 | - `value` (required): Value for the locator strategy
345 | - Type: string
346 | - `timeout`: Maximum time to wait for element in milliseconds
347 | - Type: number
348 | - Default: 10000
349 |
350 | **Example:**
351 | ```json
352 | {
353 | "tool": "right_click",
354 | "parameters": {
355 | "by": "css",
356 | "value": ".context-menu-trigger"
357 | }
358 | }
359 | ```
360 |
361 | ### press_key
362 | Simulates pressing a keyboard key.
363 |
364 | **Parameters:**
365 | - `key` (required): Key to press (e.g., 'Enter', 'Tab', 'a', etc.)
366 | - Type: string
367 |
368 | **Example:**
369 | ```json
370 | {
371 | "tool": "press_key",
372 | "parameters": {
373 | "key": "Enter"
374 | }
375 | }
376 | ```
377 |
378 | ### upload_file
379 | Uploads a file using a file input element.
380 |
381 | **Parameters:**
382 | - `by` (required): Locator strategy
383 | - Type: string
384 | - Enum: ["id", "css", "xpath", "name", "tag", "class"]
385 | - `value` (required): Value for the locator strategy
386 | - Type: string
387 | - `filePath` (required): Absolute path to the file to upload
388 | - Type: string
389 | - `timeout`: Maximum time to wait for element in milliseconds
390 | - Type: number
391 | - Default: 10000
392 |
393 | **Example:**
394 | ```json
395 | {
396 | "tool": "upload_file",
397 | "parameters": {
398 | "by": "id",
399 | "value": "file-input",
400 | "filePath": "/path/to/file.pdf"
401 | }
402 | }
403 | ```
404 |
405 | ### take_screenshot
406 | Captures a screenshot of the current page.
407 |
408 | **Parameters:**
409 | - `outputPath` (optional): Path where to save the screenshot. If not provided, returns base64 data.
410 | - Type: string
411 |
412 | **Example:**
413 | ```json
414 | {
415 | "tool": "take_screenshot",
416 | "parameters": {
417 | "outputPath": "/path/to/screenshot.png"
418 | }
419 | }
420 | ```
421 |
422 | ### close_session
423 | Closes the current browser session and cleans up resources.
424 |
425 | **Parameters:**
426 | None required
427 |
428 | **Example:**
429 | ```json
430 | {
431 | "tool": "close_session",
432 | "parameters": {}
433 | }
434 | ```
435 |
436 |
437 | ## License
438 |
439 | MIT
440 |
```
--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
```yaml
1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
2 |
3 | startCommand:
4 | type: stdio
5 | configSchema:
6 | # JSON Schema defining the configuration options for the MCP.
7 | type: object
8 | required: []
9 | properties: {}
10 | commandFunction:
11 | # A function that produces the CLI command to start the MCP on stdio.
12 | |-
13 | (config) => ({command:'node', args:['src/lib/server.js'], env:{}})
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "name": "@angiejones/mcp-selenium",
3 | "version": "0.1.21",
4 | "description": "Selenium WebDriver MCP Server",
5 | "type": "module",
6 | "main": "src/lib/server.js",
7 | "bin": {
8 | "mcp-selenium": "./src/lib/server.js"
9 | },
10 | "scripts": {
11 | "test": "echo \"Error: no test specified\" && exit 1"
12 | },
13 | "keywords": [],
14 | "author": "",
15 | "license": "ISC",
16 | "dependencies": {
17 | "@modelcontextprotocol/sdk": "^1.7.0",
18 | "selenium-webdriver": "^4.18.1"
19 | }
20 | }
21 |
```
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
```dockerfile
1 | FROM node:18-alpine
2 |
3 | # Install Chrome and dependencies
4 | RUN apk update && apk add --no-cache \
5 | chromium \
6 | chromium-chromedriver \
7 | nss \
8 | freetype \
9 | freetype-dev \
10 | harfbuzz \
11 | ca-certificates \
12 | ttf-freefont \
13 | udev \
14 | ttf-opensans \
15 | chromium-chromedriver
16 |
17 | # Set Chrome environment variables
18 | ENV CHROME_BIN=/usr/bin/chromium-browser
19 | ENV CHROME_PATH=/usr/lib/chromium/
20 | ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
21 |
22 | WORKDIR /app
23 |
24 | # Copy package files
25 | COPY package*.json ./
26 |
27 | # Install dependencies
28 | RUN npm install
29 |
30 | # Copy application code
31 | COPY . .
32 |
33 | # Start the MCP server
34 | CMD ["node", "src/lib/server.js"]
```
--------------------------------------------------------------------------------
/bin/mcp-selenium.js:
--------------------------------------------------------------------------------
```javascript
1 | #!/usr/bin/env node
2 |
3 | import { fileURLToPath } from 'url';
4 | import { dirname, resolve } from 'path';
5 | import { spawn } from 'child_process';
6 |
7 | const __filename = fileURLToPath(import.meta.url);
8 | const __dirname = dirname(__filename);
9 |
10 | const serverPath = resolve(__dirname, '../src/lib/server.js');
11 |
12 | // Start the server
13 | const child = spawn('node', [serverPath], {
14 | stdio: 'inherit'
15 | });
16 |
17 | child.on('error', (error) => {
18 | console.error(`Error starting server: ${error.message}`);
19 | process.exit(1);
20 | });
21 |
22 | // Handle process termination
23 | process.on('SIGTERM', () => {
24 | child.kill('SIGTERM');
25 | });
26 |
27 | process.on('SIGINT', () => {
28 | child.kill('SIGINT');
29 | });
```
--------------------------------------------------------------------------------
/src/lib/server.js:
--------------------------------------------------------------------------------
```javascript
1 | #!/usr/bin/env node
2 |
3 | import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
4 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
5 | import { z } from "zod";
6 | import pkg from 'selenium-webdriver';
7 | const { Builder, By, Key, until, Actions } = pkg;
8 | import { Options as ChromeOptions } from 'selenium-webdriver/chrome.js';
9 | import { Options as FirefoxOptions } from 'selenium-webdriver/firefox.js';
10 | import { Options as EdgeOptions } from 'selenium-webdriver/edge.js';
11 |
12 |
13 | // Create an MCP server
14 | const server = new McpServer({
15 | name: "MCP Selenium",
16 | version: "1.0.0"
17 | });
18 |
19 | // Server state
20 | const state = {
21 | drivers: new Map(),
22 | currentSession: null
23 | };
24 |
25 | // Helper functions
26 | const getDriver = () => {
27 | const driver = state.drivers.get(state.currentSession);
28 | if (!driver) {
29 | throw new Error('No active browser session');
30 | }
31 | return driver;
32 | };
33 |
34 | const getLocator = (by, value) => {
35 | switch (by.toLowerCase()) {
36 | case 'id': return By.id(value);
37 | case 'css': return By.css(value);
38 | case 'xpath': return By.xpath(value);
39 | case 'name': return By.name(value);
40 | case 'tag': return By.css(value);
41 | case 'class': return By.className(value);
42 | default: throw new Error(`Unsupported locator strategy: ${by}`);
43 | }
44 | };
45 |
46 | // Common schemas
47 | const browserOptionsSchema = z.object({
48 | headless: z.boolean().optional().describe("Run browser in headless mode"),
49 | arguments: z.array(z.string()).optional().describe("Additional browser arguments")
50 | }).optional();
51 |
52 | const locatorSchema = {
53 | by: z.enum(["id", "css", "xpath", "name", "tag", "class"]).describe("Locator strategy to find element"),
54 | value: z.string().describe("Value for the locator strategy"),
55 | timeout: z.number().optional().describe("Maximum time to wait for element in milliseconds")
56 | };
57 |
58 | // Browser Management Tools
59 | server.tool(
60 | "start_browser",
61 | "launches browser",
62 | {
63 | browser: z.enum(["chrome", "firefox", "edge"]).describe("Browser to launch (chrome or firefox or microsoft edge)"),
64 | options: browserOptionsSchema
65 | },
66 | async ({ browser, options = {} }) => {
67 | try {
68 | let builder = new Builder();
69 | let driver;
70 | switch (browser) {
71 | case 'chrome': {
72 | const chromeOptions = new ChromeOptions();
73 | if (options.headless) {
74 | chromeOptions.addArguments('--headless=new');
75 | }
76 | if (options.arguments) {
77 | options.arguments.forEach(arg => chromeOptions.addArguments(arg));
78 | }
79 | driver = await builder
80 | .forBrowser('chrome')
81 | .setChromeOptions(chromeOptions)
82 | .build();
83 | break;
84 | }
85 | case 'edge': {
86 | const edgeOptions = new EdgeOptions();
87 | if (options.headless) {
88 | edgeOptions.addArguments('--headless=new');
89 | }
90 | if (options.arguments) {
91 | options.arguments.forEach(arg => edgeOptions.addArguments(arg));
92 | }
93 | driver = await builder
94 | .forBrowser('edge')
95 | .setEdgeOptions(edgeOptions)
96 | .build();
97 | break;
98 | }
99 | case 'firefox': {
100 | const firefoxOptions = new FirefoxOptions();
101 | if (options.headless) {
102 | firefoxOptions.addArguments('--headless');
103 | }
104 | if (options.arguments) {
105 | options.arguments.forEach(arg => firefoxOptions.addArguments(arg));
106 | }
107 | driver = await builder
108 | .forBrowser('firefox')
109 | .setFirefoxOptions(firefoxOptions)
110 | .build();
111 | break;
112 | }
113 | default: {
114 | throw new Error(`Unsupported browser: ${browser}`);
115 | }
116 | }
117 | const sessionId = `${browser}_${Date.now()}`;
118 | state.drivers.set(sessionId, driver);
119 | state.currentSession = sessionId;
120 |
121 | return {
122 | content: [{ type: 'text', text: `Browser started with session_id: ${sessionId}` }]
123 | };
124 | } catch (e) {
125 | return {
126 | content: [{ type: 'text', text: `Error starting browser: ${e.message}` }]
127 | };
128 | }
129 | }
130 | );
131 |
132 | server.tool(
133 | "navigate",
134 | "navigates to a URL",
135 | {
136 | url: z.string().describe("URL to navigate to")
137 | },
138 | async ({ url }) => {
139 | try {
140 | const driver = getDriver();
141 | await driver.get(url);
142 | return {
143 | content: [{ type: 'text', text: `Navigated to ${url}` }]
144 | };
145 | } catch (e) {
146 | return {
147 | content: [{ type: 'text', text: `Error navigating: ${e.message}` }]
148 | };
149 | }
150 | }
151 | );
152 |
153 | // Element Interaction Tools
154 | server.tool(
155 | "find_element",
156 | "finds an element",
157 | {
158 | ...locatorSchema
159 | },
160 | async ({ by, value, timeout = 10000 }) => {
161 | try {
162 | const driver = getDriver();
163 | const locator = getLocator(by, value);
164 | await driver.wait(until.elementLocated(locator), timeout);
165 | return {
166 | content: [{ type: 'text', text: 'Element found' }]
167 | };
168 | } catch (e) {
169 | return {
170 | content: [{ type: 'text', text: `Error finding element: ${e.message}` }]
171 | };
172 | }
173 | }
174 | );
175 |
176 | server.tool(
177 | "click_element",
178 | "clicks an element",
179 | {
180 | ...locatorSchema
181 | },
182 | async ({ by, value, timeout = 10000 }) => {
183 | try {
184 | const driver = getDriver();
185 | const locator = getLocator(by, value);
186 | const element = await driver.wait(until.elementLocated(locator), timeout);
187 | await element.click();
188 | return {
189 | content: [{ type: 'text', text: 'Element clicked' }]
190 | };
191 | } catch (e) {
192 | return {
193 | content: [{ type: 'text', text: `Error clicking element: ${e.message}` }]
194 | };
195 | }
196 | }
197 | );
198 |
199 | server.tool(
200 | "send_keys",
201 | "sends keys to an element, aka typing",
202 | {
203 | ...locatorSchema,
204 | text: z.string().describe("Text to enter into the element")
205 | },
206 | async ({ by, value, text, timeout = 10000 }) => {
207 | try {
208 | const driver = getDriver();
209 | const locator = getLocator(by, value);
210 | const element = await driver.wait(until.elementLocated(locator), timeout);
211 | await element.clear();
212 | await element.sendKeys(text);
213 | return {
214 | content: [{ type: 'text', text: `Text "${text}" entered into element` }]
215 | };
216 | } catch (e) {
217 | return {
218 | content: [{ type: 'text', text: `Error entering text: ${e.message}` }]
219 | };
220 | }
221 | }
222 | );
223 |
224 | server.tool(
225 | "get_element_text",
226 | "gets the text() of an element",
227 | {
228 | ...locatorSchema
229 | },
230 | async ({ by, value, timeout = 10000 }) => {
231 | try {
232 | const driver = getDriver();
233 | const locator = getLocator(by, value);
234 | const element = await driver.wait(until.elementLocated(locator), timeout);
235 | const text = await element.getText();
236 | return {
237 | content: [{ type: 'text', text }]
238 | };
239 | } catch (e) {
240 | return {
241 | content: [{ type: 'text', text: `Error getting element text: ${e.message}` }]
242 | };
243 | }
244 | }
245 | );
246 |
247 | server.tool(
248 | "hover",
249 | "moves the mouse to hover over an element",
250 | {
251 | ...locatorSchema
252 | },
253 | async ({ by, value, timeout = 10000 }) => {
254 | try {
255 | const driver = getDriver();
256 | const locator = getLocator(by, value);
257 | const element = await driver.wait(until.elementLocated(locator), timeout);
258 | const actions = driver.actions({ bridge: true });
259 | await actions.move({ origin: element }).perform();
260 | return {
261 | content: [{ type: 'text', text: 'Hovered over element' }]
262 | };
263 | } catch (e) {
264 | return {
265 | content: [{ type: 'text', text: `Error hovering over element: ${e.message}` }]
266 | };
267 | }
268 | }
269 | );
270 |
271 | server.tool(
272 | "drag_and_drop",
273 | "drags an element and drops it onto another element",
274 | {
275 | ...locatorSchema,
276 | targetBy: z.enum(["id", "css", "xpath", "name", "tag", "class"]).describe("Locator strategy to find target element"),
277 | targetValue: z.string().describe("Value for the target locator strategy")
278 | },
279 | async ({ by, value, targetBy, targetValue, timeout = 10000 }) => {
280 | try {
281 | const driver = getDriver();
282 | const sourceLocator = getLocator(by, value);
283 | const targetLocator = getLocator(targetBy, targetValue);
284 | const sourceElement = await driver.wait(until.elementLocated(sourceLocator), timeout);
285 | const targetElement = await driver.wait(until.elementLocated(targetLocator), timeout);
286 | const actions = driver.actions({ bridge: true });
287 | await actions.dragAndDrop(sourceElement, targetElement).perform();
288 | return {
289 | content: [{ type: 'text', text: 'Drag and drop completed' }]
290 | };
291 | } catch (e) {
292 | return {
293 | content: [{ type: 'text', text: `Error performing drag and drop: ${e.message}` }]
294 | };
295 | }
296 | }
297 | );
298 |
299 | server.tool(
300 | "double_click",
301 | "performs a double click on an element",
302 | {
303 | ...locatorSchema
304 | },
305 | async ({ by, value, timeout = 10000 }) => {
306 | try {
307 | const driver = getDriver();
308 | const locator = getLocator(by, value);
309 | const element = await driver.wait(until.elementLocated(locator), timeout);
310 | const actions = driver.actions({ bridge: true });
311 | await actions.doubleClick(element).perform();
312 | return {
313 | content: [{ type: 'text', text: 'Double click performed' }]
314 | };
315 | } catch (e) {
316 | return {
317 | content: [{ type: 'text', text: `Error performing double click: ${e.message}` }]
318 | };
319 | }
320 | }
321 | );
322 |
323 | server.tool(
324 | "right_click",
325 | "performs a right click (context click) on an element",
326 | {
327 | ...locatorSchema
328 | },
329 | async ({ by, value, timeout = 10000 }) => {
330 | try {
331 | const driver = getDriver();
332 | const locator = getLocator(by, value);
333 | const element = await driver.wait(until.elementLocated(locator), timeout);
334 | const actions = driver.actions({ bridge: true });
335 | await actions.contextClick(element).perform();
336 | return {
337 | content: [{ type: 'text', text: 'Right click performed' }]
338 | };
339 | } catch (e) {
340 | return {
341 | content: [{ type: 'text', text: `Error performing right click: ${e.message}` }]
342 | };
343 | }
344 | }
345 | );
346 |
347 | server.tool(
348 | "press_key",
349 | "simulates pressing a keyboard key",
350 | {
351 | key: z.string().describe("Key to press (e.g., 'Enter', 'Tab', 'a', etc.)")
352 | },
353 | async ({ key }) => {
354 | try {
355 | const driver = getDriver();
356 | const actions = driver.actions({ bridge: true });
357 | await actions.keyDown(key).keyUp(key).perform();
358 | return {
359 | content: [{ type: 'text', text: `Key '${key}' pressed` }]
360 | };
361 | } catch (e) {
362 | return {
363 | content: [{ type: 'text', text: `Error pressing key: ${e.message}` }]
364 | };
365 | }
366 | }
367 | );
368 |
369 | server.tool(
370 | "upload_file",
371 | "uploads a file using a file input element",
372 | {
373 | ...locatorSchema,
374 | filePath: z.string().describe("Absolute path to the file to upload")
375 | },
376 | async ({ by, value, filePath, timeout = 10000 }) => {
377 | try {
378 | const driver = getDriver();
379 | const locator = getLocator(by, value);
380 | const element = await driver.wait(until.elementLocated(locator), timeout);
381 | await element.sendKeys(filePath);
382 | return {
383 | content: [{ type: 'text', text: 'File upload initiated' }]
384 | };
385 | } catch (e) {
386 | return {
387 | content: [{ type: 'text', text: `Error uploading file: ${e.message}` }]
388 | };
389 | }
390 | }
391 | );
392 |
393 | server.tool(
394 | "take_screenshot",
395 | "captures a screenshot of the current page",
396 | {
397 | outputPath: z.string().optional().describe("Optional path where to save the screenshot. If not provided, returns base64 data.")
398 | },
399 | async ({ outputPath }) => {
400 | try {
401 | const driver = getDriver();
402 | const screenshot = await driver.takeScreenshot();
403 | if (outputPath) {
404 | const fs = await import('fs');
405 | await fs.promises.writeFile(outputPath, screenshot, 'base64');
406 | return {
407 | content: [{ type: 'text', text: `Screenshot saved to ${outputPath}` }]
408 | };
409 | } else {
410 | return {
411 | content: [
412 | { type: 'text', text: 'Screenshot captured as base64:' },
413 | { type: 'text', text: screenshot }
414 | ]
415 | };
416 | }
417 | } catch (e) {
418 | return {
419 | content: [{ type: 'text', text: `Error taking screenshot: ${e.message}` }]
420 | };
421 | }
422 | }
423 | );
424 |
425 | server.tool(
426 | "close_session",
427 | "closes the current browser session",
428 | {},
429 | async () => {
430 | try {
431 | const driver = getDriver();
432 | await driver.quit();
433 | state.drivers.delete(state.currentSession);
434 | const sessionId = state.currentSession;
435 | state.currentSession = null;
436 | return {
437 | content: [{ type: 'text', text: `Browser session ${sessionId} closed` }]
438 | };
439 | } catch (e) {
440 | return {
441 | content: [{ type: 'text', text: `Error closing session: ${e.message}` }]
442 | };
443 | }
444 | }
445 | );
446 |
447 | // Resources
448 | server.resource(
449 | "browser-status",
450 | new ResourceTemplate("browser-status://current"),
451 | async (uri) => ({
452 | contents: [{
453 | uri: uri.href,
454 | text: state.currentSession
455 | ? `Active browser session: ${state.currentSession}`
456 | : "No active browser session"
457 | }]
458 | })
459 | );
460 |
461 | // Cleanup handler
462 | async function cleanup() {
463 | for (const [sessionId, driver] of state.drivers) {
464 | try {
465 | await driver.quit();
466 | } catch (e) {
467 | console.error(`Error closing browser session ${sessionId}:`, e);
468 | }
469 | }
470 | state.drivers.clear();
471 | state.currentSession = null;
472 | process.exit(0);
473 | }
474 |
475 | process.on('SIGTERM', cleanup);
476 | process.on('SIGINT', cleanup);
477 |
478 | // Start the server
479 | const transport = new StdioServerTransport();
480 | await server.connect(transport);
```