# Directory Structure ``` ├── .gitignore ├── dist │ ├── chrome-api.d.ts │ ├── chrome-api.js │ ├── index.d.ts │ ├── index.js │ ├── types.d.ts │ └── types.js ├── LICENSE ├── package-lock.json ├── package.json ├── README.md ├── src │ ├── chrome-api.ts │ ├── image-utils.ts │ ├── index.ts │ └── types.ts └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Dependencies 2 | node_modules/ 3 | package-lock.json 4 | 5 | # Build output 6 | dist/ 7 | build/ 8 | *.tsbuildinfo 9 | 10 | # IDE and editor files 11 | .vscode/ 12 | .idea/ 13 | *.swp 14 | *.swo 15 | *~ 16 | 17 | # Logs 18 | *.log 19 | npm-debug.log* 20 | yarn-debug.log* 21 | yarn-error.log* 22 | 23 | # Environment files 24 | .env 25 | .env.local 26 | .env.*.local 27 | 28 | # Operating System 29 | .DS_Store 30 | Thumbs.db 31 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | # Chrome Tools MCP Server 2 | 3 | An MCP server that provides tools for interacting with Chrome through its DevTools Protocol. This server enables remote control of Chrome tabs, including executing JavaScript, capturing screenshots, monitoring network traffic, and more. 4 | 5 | ## Why use an MCP server like this? 6 | This type of MCP Server is useful When you need to manually configure your browser to be in a certain state before you let an AI tool like Cline poke at it. You can also use this tool to listen to and pull network events into its context. 7 | 8 | ## Features 9 | 10 | - List Chrome tabs 11 | - Execute JavaScript in tabs 12 | - Capture screenshots 13 | - Monitor network traffic 14 | - Navigate tabs to URLs 15 | - Query DOM elements 16 | - Click elements with console output capture 17 | 18 | ## Installation 19 | 20 | ```bash 21 | npm install @nicholmikey/chrome-tools 22 | ``` 23 | 24 | ## Configuration 25 | 26 | The server can be configured through environment variables in your MCP settings: 27 | 28 | ```json 29 | { 30 | "chrome-tools": { 31 | "command": "node", 32 | "args": ["path/to/chrome-tools/dist/index.js"], 33 | "env": { 34 | "CHROME_DEBUG_URL": "http://localhost:9222", 35 | "CHROME_CONNECTION_TYPE": "direct", 36 | "CHROME_ERROR_HELP": "custom error message" 37 | } 38 | } 39 | } 40 | ``` 41 | 42 | ### Environment Variables 43 | 44 | - `CHROME_DEBUG_URL`: The URL where Chrome's remote debugging interface is available (default: http://localhost:9222) 45 | - `CHROME_CONNECTION_TYPE`: Connection type identifier for logging (e.g., "direct", "ssh-tunnel", "docker") 46 | - `CHROME_ERROR_HELP`: Custom error message shown when connection fails 47 | 48 | ## Setup Guide 49 | 50 | ### Native Setup (Windows/Mac/Linux) 51 | 52 | 1. Launch Chrome with remote debugging enabled: 53 | ```bash 54 | # Windows 55 | "C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222 56 | 57 | # Mac 58 | /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 59 | 60 | # Linux 61 | google-chrome --remote-debugging-port=9222 62 | ``` 63 | 64 | 2. Configure MCP settings: 65 | ```json 66 | { 67 | "env": { 68 | "CHROME_DEBUG_URL": "http://localhost:9222", 69 | "CHROME_CONNECTION_TYPE": "direct" 70 | } 71 | } 72 | ``` 73 | 74 | ### WSL Setup 75 | 76 | When running in WSL, you'll need to set up an SSH tunnel to connect to Chrome running on Windows: 77 | 78 | 1. Launch Chrome on Windows with remote debugging enabled 79 | 2. Create an SSH tunnel: 80 | ```bash 81 | ssh -N -L 9222:localhost:9222 windowsuser@host 82 | ``` 83 | 3. Configure MCP settings: 84 | ```json 85 | { 86 | "env": { 87 | "CHROME_DEBUG_URL": "http://localhost:9222", 88 | "CHROME_CONNECTION_TYPE": "ssh-tunnel", 89 | "CHROME_ERROR_HELP": "Make sure the SSH tunnel is running: ssh -N -L 9222:localhost:9222 windowsuser@host" 90 | } 91 | } 92 | ``` 93 | 94 | ### Docker Setup 95 | 96 | When running Chrome in Docker: 97 | 98 | 1. Launch Chrome container: 99 | ```bash 100 | docker run -d --name chrome -p 9222:9222 chromedp/headless-shell 101 | ``` 102 | 103 | 2. Configure MCP settings: 104 | ```json 105 | { 106 | "env": { 107 | "CHROME_DEBUG_URL": "http://localhost:9222", 108 | "CHROME_CONNECTION_TYPE": "docker" 109 | } 110 | } 111 | ``` 112 | 113 | ## Tools 114 | 115 | ### list_tabs 116 | Lists all available Chrome tabs. 117 | 118 | ### execute_script 119 | Executes JavaScript code in a specified tab. 120 | Parameters: 121 | - `tabId`: ID of the Chrome tab 122 | - `script`: JavaScript code to execute 123 | 124 | ### capture_screenshot 125 | Captures a screenshot of a specified tab, automatically optimizing it for AI model consumption. 126 | Parameters: 127 | - `tabId`: ID of the Chrome tab 128 | - `format`: Image format (jpeg/png) - Note: This is only for initial capture. Final output uses WebP with PNG fallback 129 | - `quality`: JPEG quality (1-100) - Note: For initial capture only 130 | - `fullPage`: Capture full scrollable page 131 | 132 | Image Processing: 133 | 1. WebP Optimization (Primary Format): 134 | - First attempt: WebP with quality 80 and high compression effort 135 | - Second attempt: WebP with quality 60 and near-lossless compression if first attempt exceeds 1MB 136 | 2. PNG Fallback: 137 | - Only used if WebP processing fails 138 | - Includes maximum compression and color palette optimization 139 | 3. Size Constraints: 140 | - Maximum dimensions: 900x600 (maintains aspect ratio) 141 | - Maximum file size: 1MB 142 | - Progressive size reduction if needed 143 | 144 | ### capture_network_events 145 | Monitors and captures network events from a specified tab. 146 | Parameters: 147 | - `tabId`: ID of the Chrome tab 148 | - `duration`: Duration in seconds to capture 149 | - `filters`: Optional type and URL pattern filters 150 | 151 | ### load_url 152 | Navigates a tab to a specified URL. 153 | Parameters: 154 | - `tabId`: ID of the Chrome tab 155 | - `url`: URL to load 156 | 157 | ### query_dom_elements 158 | Queries and retrieves detailed information about DOM elements matching a CSS selector. 159 | Parameters: 160 | - `tabId`: ID of the Chrome tab 161 | - `selector`: CSS selector to find elements 162 | Returns: 163 | - Array of DOM elements with properties including: 164 | - `nodeId`: Unique identifier for the node 165 | - `tagName`: HTML tag name 166 | - `textContent`: Text content of the element 167 | - `attributes`: Object containing all element attributes 168 | - `boundingBox`: Position and dimensions of the element 169 | - `isVisible`: Whether the element is visible 170 | - `ariaAttributes`: ARIA attributes for accessibility 171 | 172 | ### click_element 173 | Clicks on a DOM element and captures any console output triggered by the click. 174 | Parameters: 175 | - `tabId`: ID of the Chrome tab 176 | - `selector`: CSS selector to find the element to click 177 | Returns: 178 | - Object containing: 179 | - `message`: Success/failure message 180 | - `consoleOutput`: Array of console messages triggered by the click 181 | 182 | ## License 183 | 184 | MIT 185 | ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "esModuleInterop": true, 7 | "strict": true, 8 | "outDir": "./dist", 9 | "rootDir": "./src", 10 | "declaration": true 11 | }, 12 | "include": ["src/**/*"], 13 | "exclude": ["node_modules", "dist"] 14 | } 15 | ``` -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- ```typescript 1 | // Re-export the ChromeTab type from chrome-remote-interface for compatibility 2 | export type { Target as ChromeTab } from 'chrome-remote-interface'; 3 | 4 | // Interface for DOM element information 5 | export interface DOMElement { 6 | nodeId: number; 7 | tagName: string; 8 | textContent: string | null; 9 | attributes: Record<string, string>; 10 | boundingBox: { 11 | x: number; 12 | y: number; 13 | width: number; 14 | height: number; 15 | } | null; 16 | isVisible: boolean; 17 | ariaAttributes: Record<string, string>; 18 | } 19 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "@nicholmikey/chrome-tools", 3 | "version": "1.3.0", 4 | "description": "MCP server for Chrome DevTools Protocol integration - control Chrome tabs, execute JavaScript, capture screenshots, and monitor network traffic", 5 | "main": "dist/index.js", 6 | "type": "module", 7 | "scripts": { 8 | "build": "tsc", 9 | "start": "node dist/index.js", 10 | "dev": "ts-node --esm src/index.ts", 11 | "watch": "tsc --watch", 12 | "prepare": "npm run build", 13 | "test": "echo \"No tests specified\" && exit 0", 14 | "lint": "eslint src --ext .ts", 15 | "format": "prettier --write \"src/**/*.ts\"" 16 | }, 17 | "bin": { 18 | "mcp-chrome-tools": "./dist/index.js" 19 | }, 20 | "files": [ 21 | "dist", 22 | "README.md", 23 | "LICENSE" 24 | ], 25 | "keywords": [ 26 | "mcp", 27 | "chrome", 28 | "devtools", 29 | "debugging", 30 | "automation", 31 | "testing", 32 | "screenshots", 33 | "network-monitoring", 34 | "browser-automation", 35 | "chrome-devtools-protocol" 36 | ], 37 | "author": { 38 | "name": "nicholmikey", 39 | "url": "https://github.com/nicholmikey" 40 | }, 41 | "repository": { 42 | "type": "git", 43 | "url": "https://github.com/nicholmikey/chrome-tools-MCP.git" 44 | }, 45 | "bugs": { 46 | "url": "https://github.com/nicholmikey/chrome-tools-MCP/issues" 47 | }, 48 | "homepage": "https://github.com/nicholmikey/chrome-tools-MCP#readme", 49 | "license": "MIT", 50 | "dependencies": { 51 | "@modelcontextprotocol/sdk": "^1.5.0", 52 | "@types/ws": "^8.5.14", 53 | "axios": "^1.7.9", 54 | "chrome-remote-interface": "^0.33.2", 55 | "sharp": "^0.32.6", 56 | "ts-node": "^10.9.2", 57 | "typescript": "^5.7.3", 58 | "ws": "^8.18.0", 59 | "zod": "^3.24.2" 60 | }, 61 | "devDependencies": { 62 | "@types/chrome-remote-interface": "^0.31.14" 63 | } 64 | } 65 | ``` -------------------------------------------------------------------------------- /src/image-utils.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { createRequire } from 'module'; 2 | import path from 'path'; 3 | import fs from 'fs/promises'; 4 | const require = createRequire(import.meta.url); 5 | const sharp = require('sharp'); 6 | 7 | export const SCREENSHOT_DIR = path.join('/tmp', 'chrome-tools-screenshots'); 8 | 9 | export interface ProcessedImage { 10 | data: string; 11 | format: 'png'; 12 | size: number; 13 | } 14 | 15 | export async function saveImage(processedImage: ProcessedImage): Promise<string> { 16 | // Ensure screenshots directory exists 17 | await fs.mkdir(SCREENSHOT_DIR, { recursive: true }); 18 | 19 | const filename = `screenshot_${Date.now()}.webp`; 20 | const filepath = path.join(SCREENSHOT_DIR, filename); 21 | 22 | // Extract the base64 data after the "data:image/webp;base64," prefix 23 | const base64Data = processedImage.data.split(',')[1]; 24 | const imageBuffer = Buffer.from(base64Data, 'base64'); 25 | 26 | await fs.writeFile(filepath, imageBuffer); 27 | return filepath; 28 | } 29 | 30 | export async function processImage(base64Data: string): Promise<ProcessedImage> { 31 | try { 32 | // Convert base64 to buffer 33 | const buffer = Buffer.from(base64Data, 'base64'); 34 | 35 | // Create Sharp instance and resize maintaining aspect ratio 36 | const image = sharp(buffer).resize(900, 600, { 37 | fit: 'inside', 38 | withoutEnlargement: true 39 | }); 40 | 41 | // Try WebP first with good quality 42 | try { 43 | const webpBuffer = await image 44 | .webp({ 45 | quality: 80, 46 | effort: 6, // Higher compression effort 47 | lossless: false 48 | }) 49 | .toBuffer(); 50 | 51 | if (webpBuffer.length <= 1024 * 1024) { 52 | return { 53 | data: `data:image/webp;base64,${webpBuffer.toString('base64')}`, 54 | format: 'png', // Keep format as 'png' in interface for backward compatibility 55 | size: webpBuffer.length 56 | }; 57 | } 58 | 59 | // If still too large, try WebP with more aggressive compression 60 | const compressedWebpBuffer = await image 61 | .webp({ 62 | quality: 60, 63 | effort: 6, 64 | lossless: false, 65 | nearLossless: true 66 | }) 67 | .toBuffer(); 68 | 69 | if (compressedWebpBuffer.length <= 1024 * 1024) { 70 | return { 71 | data: `data:image/webp;base64,${compressedWebpBuffer.toString('base64')}`, 72 | format: 'png', // Keep format as 'png' in interface for backward compatibility 73 | size: compressedWebpBuffer.length 74 | }; 75 | } 76 | } catch (webpError) { 77 | console.error('WebP processing failed, falling back to PNG:', webpError); 78 | } 79 | 80 | // Fallback to PNG with compression if WebP fails or is too large 81 | const pngBuffer = await image 82 | .png({ 83 | compressionLevel: 9, 84 | palette: true 85 | }) 86 | .toBuffer(); 87 | 88 | if (pngBuffer.length > 1024 * 1024) { 89 | // If still too large, reduce dimensions further 90 | const scaleFactor = Math.sqrt(1024 * 1024 / pngBuffer.length); 91 | const resizedImage = sharp(buffer).resize( 92 | Math.floor(900 * scaleFactor), 93 | Math.floor(600 * scaleFactor), 94 | { 95 | fit: 'inside', 96 | withoutEnlargement: true 97 | } 98 | ); 99 | 100 | const compressedPngBuffer = await resizedImage 101 | .png({ 102 | compressionLevel: 9, 103 | palette: true, 104 | colors: 128 // Reduce color palette for smaller size 105 | }) 106 | .toBuffer(); 107 | 108 | if (compressedPngBuffer.length > 1024 * 1024) { 109 | throw new Error('Image is too large even after compression'); 110 | } 111 | 112 | return { 113 | data: `data:image/png;base64,${compressedPngBuffer.toString('base64')}`, 114 | format: 'png', 115 | size: compressedPngBuffer.length 116 | }; 117 | } 118 | 119 | return { 120 | data: `data:image/png;base64,${pngBuffer.toString('base64')}`, 121 | format: 'png', 122 | size: pngBuffer.length 123 | }; 124 | } catch (error) { 125 | throw new Error(`Failed to process image: ${error instanceof Error ? error.message : 'Unknown error'}`); 126 | } 127 | } 128 | ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | #!/usr/bin/env node 2 | import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; 3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 4 | import { ChromeAPI } from './chrome-api.js'; 5 | import { processImage, saveImage } from './image-utils.js'; 6 | import { z } from 'zod'; 7 | 8 | // Get Chrome debug URL from environment variable or use default 9 | const chromeDebugUrl = process.env.CHROME_DEBUG_URL || 'http://localhost:9222'; 10 | console.error(`Using Chrome debug URL: ${chromeDebugUrl}`); 11 | 12 | const chromeApi = new ChromeAPI({ baseUrl: chromeDebugUrl }); 13 | 14 | // Create the MCP server 15 | const server = new McpServer({ 16 | name: 'chrome-tools', 17 | version: '1.3.0' 18 | }); 19 | 20 | // Add the list_tabs tool 21 | server.tool( 22 | 'list_tabs', 23 | {}, // No input parameters needed 24 | async () => { 25 | try { 26 | console.error('Attempting to list Chrome tabs...'); 27 | const tabs = await chromeApi.listTabs(); 28 | console.error(`Successfully found ${tabs.length} tabs`); 29 | return { 30 | content: [{ 31 | type: 'text', 32 | text: JSON.stringify(tabs, null, 2) 33 | }] 34 | }; 35 | } catch (error) { 36 | console.error('Error in list_tabs tool:', error); 37 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; 38 | return { 39 | content: [{ 40 | type: 'text', 41 | text: `Error: ${errorMessage}` 42 | }], 43 | isError: true 44 | }; 45 | } 46 | } 47 | ); 48 | 49 | // Add the capture_screenshot tool 50 | server.tool( 51 | 'capture_screenshot', 52 | { 53 | tabId: z.string().describe('ID of the Chrome tab to capture. Only send this unless you are having issues with the result.'), 54 | format: z.enum(['jpeg', 'png']).optional() 55 | .describe('Initial capture format (jpeg/png). Note: Final output will be WebP with PNG fallback'), 56 | quality: z.number().min(1).max(100).optional() 57 | .describe('Initial capture quality (1-100). Note: Final output uses WebP quality settings'), 58 | fullPage: z.boolean().optional() 59 | .describe('Capture full scrollable page') 60 | }, 61 | async (params) => { 62 | try { 63 | console.error(`Attempting to capture screenshot of tab ${params.tabId}...`); 64 | const rawBase64Data = await chromeApi.captureScreenshot(params.tabId, { 65 | format: params.format, 66 | quality: params.quality, 67 | fullPage: params.fullPage 68 | }); 69 | console.error('Screenshot captured, optimizing with WebP...'); 70 | 71 | try { 72 | // Process image with the following strategy: 73 | // 1. Try WebP with quality 80 (best balance of quality/size) 74 | // 2. If >1MB, try WebP with quality 60 and near-lossless 75 | // 3. If WebP fails, fall back to PNG with maximum compression 76 | const processedImage = await processImage(rawBase64Data); 77 | console.error(`Image optimized successfully (${processedImage.data.startsWith('data:image/webp') ? 'WebP' : 'PNG'}, ${Math.round(processedImage.size / 1024)}KB)`); 78 | 79 | // Save the image and get the filepath 80 | const filepath = await saveImage(processedImage); 81 | console.error(`Screenshot saved to: ${filepath}`); 82 | 83 | return { 84 | content: [{ 85 | type: 'text', 86 | text: JSON.stringify({ 87 | status: 'Screenshot successful.', 88 | path: filepath 89 | }) 90 | }] 91 | }; 92 | } catch (error) { 93 | console.error('Image processing failed:', error); 94 | return { 95 | content: [{ 96 | type: 'text', 97 | text: `Error processing screenshot: ${error instanceof Error ? error.message : 'Unknown error'}` 98 | }], 99 | isError: true 100 | }; 101 | } 102 | } catch (error) { 103 | console.error('Error in capture_screenshot tool:', error); 104 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; 105 | return { 106 | content: [{ 107 | type: 'text', 108 | text: `Error: ${errorMessage}` 109 | }], 110 | isError: true 111 | }; 112 | } 113 | } 114 | ); 115 | 116 | // Add the execute_script tool 117 | server.tool( 118 | 'execute_script', 119 | { 120 | tabId: z.string().describe('ID of the Chrome tab to execute the script in'), 121 | script: z.string().describe('JavaScript code to execute in the tab') 122 | }, 123 | async (params) => { 124 | try { 125 | console.error(`Attempting to execute script in tab ${params.tabId}...`); 126 | const result = await chromeApi.executeScript(params.tabId, params.script); 127 | console.error('Script execution successful'); 128 | return { 129 | content: [{ 130 | type: 'text', 131 | text: result || 'undefined' 132 | }] 133 | }; 134 | } catch (error) { 135 | console.error('Error in execute_script tool:', error); 136 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; 137 | return { 138 | content: [{ 139 | type: 'text', 140 | text: `Error: ${errorMessage}` 141 | }], 142 | isError: true 143 | }; 144 | } 145 | } 146 | ); 147 | 148 | // Log when server starts 149 | console.error('Chrome Tools MCP Server starting...'); 150 | 151 | // Start the server 152 | const transport = new StdioServerTransport(); 153 | server.connect(transport).catch(console.error); 154 | 155 | // Add the load_url tool 156 | server.tool( 157 | 'load_url', 158 | { 159 | tabId: z.string().describe('ID of the Chrome tab to load the URL in'), 160 | url: z.string().url().describe('URL to load in the tab') 161 | }, 162 | async (params) => { 163 | try { 164 | console.error(`Attempting to load URL ${params.url} in tab ${params.tabId}...`); 165 | await chromeApi.loadUrl(params.tabId, params.url); 166 | console.error('URL loading successful'); 167 | return { 168 | content: [{ 169 | type: 'text', 170 | text: `Successfully loaded ${params.url} in tab ${params.tabId}` 171 | }] 172 | }; 173 | } catch (error) { 174 | console.error('Error in load_url tool:', error); 175 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; 176 | return { 177 | content: [{ 178 | type: 'text', 179 | text: `Error: ${errorMessage}` 180 | }], 181 | isError: true 182 | }; 183 | } 184 | } 185 | ); 186 | 187 | // Add the capture_network_events tool 188 | server.tool( 189 | 'capture_network_events', 190 | { 191 | tabId: z.string().describe('ID of the Chrome tab to monitor'), 192 | duration: z.number().min(1).max(60).optional() 193 | .describe('Duration in seconds to capture events (default: 10)'), 194 | filters: z.object({ 195 | types: z.array(z.enum(['fetch', 'xhr'])).optional() 196 | .describe('Types of requests to capture'), 197 | urlPattern: z.string().optional() 198 | .describe('Only capture URLs matching this pattern') 199 | }).optional() 200 | }, 201 | async (params) => { 202 | try { 203 | console.error(`Attempting to capture network events from tab ${params.tabId}...`); 204 | const events = await chromeApi.captureNetworkEvents(params.tabId, { 205 | duration: params.duration, 206 | filters: params.filters 207 | }); 208 | console.error(`Network event capture successful, captured ${events.length} events`); 209 | return { 210 | content: [{ 211 | type: 'text', 212 | text: JSON.stringify(events, null, 2) 213 | }] 214 | }; 215 | } catch (error) { 216 | console.error('Error in capture_network_events tool:', error); 217 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; 218 | return { 219 | content: [{ 220 | type: 'text', 221 | text: `Error: ${errorMessage}` 222 | }], 223 | isError: true 224 | }; 225 | } 226 | } 227 | ); 228 | 229 | // Add the query_dom_elements tool 230 | server.tool( 231 | 'query_dom_elements', 232 | { 233 | tabId: z.string().describe('ID of the Chrome tab to query'), 234 | selector: z.string().describe('CSS selector to find elements') 235 | }, 236 | async (params) => { 237 | try { 238 | console.error(`Attempting to query DOM elements in tab ${params.tabId}...`); 239 | const elements = await chromeApi.queryDOMElements(params.tabId, params.selector); 240 | console.error(`Successfully found ${elements.length} elements matching selector`); 241 | return { 242 | content: [{ 243 | type: 'text', 244 | text: JSON.stringify(elements, null, 2) 245 | }] 246 | }; 247 | } catch (error) { 248 | console.error('Error in query_dom_elements tool:', error); 249 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; 250 | return { 251 | content: [{ 252 | type: 'text', 253 | text: `Error: ${errorMessage}` 254 | }], 255 | isError: true 256 | }; 257 | } 258 | } 259 | ); 260 | 261 | // Add the click_element tool 262 | server.tool( 263 | 'click_element', 264 | { 265 | tabId: z.string().describe('ID of the Chrome tab containing the element'), 266 | selector: z.string().describe('CSS selector to find the element to click') 267 | }, 268 | async (params) => { 269 | try { 270 | console.error(`Attempting to click element in tab ${params.tabId}...`); 271 | const result = await chromeApi.clickElement(params.tabId, params.selector); 272 | console.error('Successfully clicked element'); 273 | return { 274 | content: [{ 275 | type: 'text', 276 | text: JSON.stringify({ 277 | message: 'Successfully clicked element', 278 | consoleOutput: result.consoleOutput 279 | }, null, 2) 280 | }] 281 | }; 282 | } catch (error) { 283 | console.error('Error in click_element tool:', error); 284 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; 285 | return { 286 | content: [{ 287 | type: 'text', 288 | text: `Error: ${errorMessage}` 289 | }], 290 | isError: true 291 | }; 292 | } 293 | } 294 | ); 295 | 296 | // Handle process termination 297 | process.on('SIGINT', () => { 298 | server.close().catch(console.error); 299 | process.exit(0); 300 | }); 301 | ``` -------------------------------------------------------------------------------- /src/chrome-api.ts: -------------------------------------------------------------------------------- ```typescript 1 | import CDP from 'chrome-remote-interface'; 2 | import type { Client } from 'chrome-remote-interface'; 3 | import { ChromeTab, DOMElement } from './types.js'; 4 | 5 | type MouseButton = 'none' | 'left' | 'middle' | 'right' | 'back' | 'forward'; 6 | type MouseEventType = 'mousePressed' | 'mouseReleased'; 7 | 8 | export class ChromeAPI { 9 | private baseUrl: string; 10 | 11 | constructor(options: { port?: number; baseUrl?: string } = {}) { 12 | const { port = 9222, baseUrl } = options; 13 | this.baseUrl = baseUrl || `http://localhost:${port}`; 14 | const connectionType = process.env.CHROME_CONNECTION_TYPE || 'direct'; 15 | console.error(`ChromeAPI: Connecting to ${this.baseUrl} (${connectionType} connection)`); 16 | } 17 | 18 | /** 19 | * List all available Chrome tabs 20 | * @returns Promise<ChromeTab[]> 21 | * @throws Error if Chrome is not accessible or returns an error 22 | */ 23 | async listTabs(): Promise<ChromeTab[]> { 24 | try { 25 | console.error(`ChromeAPI: Attempting to list tabs on port ${this.port}`); 26 | const targets = await CDP.List({ port: this.port }); 27 | console.error(`ChromeAPI: Successfully found ${targets.length} tabs`); 28 | return targets; 29 | } catch (error) { 30 | console.error(`ChromeAPI: Failed to list tabs:`, error instanceof Error ? error.message : error); 31 | const errorHelp = process.env.CHROME_ERROR_HELP || 'Make sure Chrome is running with remote debugging enabled (--remote-debugging-port=9222)'; 32 | throw new Error(`Failed to connect to Chrome DevTools. ${errorHelp}`); 33 | } 34 | } 35 | 36 | /** 37 | * Execute JavaScript in a specific Chrome tab 38 | * @param tabId The ID of the tab to execute the script in 39 | * @param script The JavaScript code to execute 40 | * @returns Promise with the result of the script execution 41 | * @throws Error if the tab is not found or script execution fails 42 | */ 43 | async executeScript(tabId: string, script: string): Promise<string> { 44 | console.error(`ChromeAPI: Attempting to execute script in tab ${tabId}`); 45 | let client: Client | undefined; 46 | try { 47 | // Connect to the specific tab 48 | client = await CDP({ target: tabId, port: this.port }); 49 | 50 | if (!client) { 51 | throw new Error('Failed to connect to Chrome DevTools'); 52 | } 53 | 54 | // Enable Runtime and set up console listener 55 | await client.Runtime.enable(); 56 | 57 | let consoleMessages: string[] = []; 58 | client.Runtime.consoleAPICalled(({ type, args }) => { 59 | const message = args.map(arg => arg.value || arg.description).join(' '); 60 | consoleMessages.push(`[${type}] ${message}`); 61 | console.error(`Chrome Console: ${type}:`, message); 62 | }); 63 | 64 | // Execute the script using Runtime.evaluate 65 | const result = await client.Runtime.evaluate({ 66 | expression: script, 67 | returnByValue: true, 68 | includeCommandLineAPI: true 69 | }); 70 | 71 | console.error('ChromeAPI: Script execution successful'); 72 | return JSON.stringify({ 73 | result: result.result, 74 | consoleOutput: consoleMessages 75 | }, null, 2); 76 | } catch (error) { 77 | console.error('ChromeAPI: Script execution failed:', error instanceof Error ? error.message : error); 78 | throw error; 79 | } finally { 80 | if (client) { 81 | await client.close(); 82 | } 83 | } 84 | } 85 | 86 | /** 87 | * Check if Chrome debugging port is accessible 88 | * @returns Promise<boolean> 89 | */ 90 | async isAvailable(): Promise<boolean> { 91 | try { 92 | await this.listTabs(); 93 | return true; 94 | } catch { 95 | return false; 96 | } 97 | } 98 | 99 | /** 100 | * Capture a screenshot of a specific Chrome tab 101 | * @param tabId The ID of the tab to capture 102 | * @param options Screenshot options (format, quality, fullPage) 103 | * @returns Promise with the base64-encoded screenshot data 104 | * @throws Error if the tab is not found or screenshot capture fails 105 | */ 106 | async captureScreenshot( 107 | tabId: string, 108 | options: { 109 | format?: 'jpeg' | 'png'; 110 | quality?: number; 111 | fullPage?: boolean; 112 | } = {} 113 | ): Promise<string> { 114 | console.error(`ChromeAPI: Attempting to capture screenshot of tab ${tabId}`); 115 | let client: Client | undefined; 116 | try { 117 | // Connect to the specific tab 118 | client = await CDP({ target: tabId, port: this.port }); 119 | 120 | if (!client) { 121 | throw new Error('Failed to connect to Chrome DevTools'); 122 | } 123 | 124 | // Enable Page domain for screenshot capabilities 125 | await client.Page.enable(); 126 | 127 | // If fullPage is requested, we need to get the full page dimensions 128 | if (options.fullPage) { 129 | // Get the full page dimensions 130 | const { root } = await client.DOM.getDocument(); 131 | const { model } = await client.DOM.getBoxModel({ nodeId: root.nodeId }); 132 | const height = model.height; 133 | 134 | // Set viewport to full page height 135 | await client.Emulation.setDeviceMetricsOverride({ 136 | width: 1920, // Standard width 137 | height: Math.ceil(height), 138 | deviceScaleFactor: 1, 139 | mobile: false 140 | }); 141 | } 142 | 143 | // Capture the screenshot 144 | const result = await client.Page.captureScreenshot({ 145 | format: options.format || 'png', 146 | quality: options.format === 'jpeg' ? options.quality || 80 : undefined, 147 | fromSurface: true, 148 | captureBeyondViewport: options.fullPage || false 149 | }); 150 | 151 | console.error('ChromeAPI: Screenshot capture successful'); 152 | return result.data; 153 | } catch (error) { 154 | console.error('ChromeAPI: Screenshot capture failed:', error instanceof Error ? error.message : error); 155 | throw error; 156 | } finally { 157 | if (client) { 158 | // Reset device metrics if we modified them 159 | if (options.fullPage) { 160 | await client.Emulation.clearDeviceMetricsOverride(); 161 | } 162 | await client.close(); 163 | } 164 | } 165 | } 166 | 167 | /** 168 | * Capture network events (XHR/Fetch) from a specific Chrome tab 169 | * @param tabId The ID of the tab to capture events from 170 | * @param options Capture options (duration, filters) 171 | * @returns Promise with the captured network events 172 | * @throws Error if the tab is not found or capture fails 173 | */ 174 | async captureNetworkEvents( 175 | tabId: string, 176 | options: { 177 | duration?: number; 178 | filters?: { 179 | types?: Array<'fetch' | 'xhr'>; 180 | urlPattern?: string; 181 | }; 182 | } = {} 183 | ): Promise<Array<{ 184 | type: 'fetch' | 'xhr'; 185 | method: string; 186 | url: string; 187 | status: number; 188 | statusText: string; 189 | requestHeaders: Record<string, string>; 190 | responseHeaders: Record<string, string>; 191 | timing: { 192 | requestTime: number; 193 | responseTime: number; 194 | }; 195 | }>> { 196 | console.error(`ChromeAPI: Attempting to capture network events from tab ${tabId}`); 197 | let client: Client | undefined; 198 | try { 199 | // Connect to the specific tab 200 | client = await CDP({ target: tabId, port: this.port }); 201 | 202 | if (!client) { 203 | throw new Error('Failed to connect to Chrome DevTools'); 204 | } 205 | 206 | // Enable Network domain 207 | await client.Network.enable(); 208 | 209 | const events: Array<any> = []; 210 | const requests = new Map(); 211 | 212 | // Set up event handlers 213 | const requestHandler = (params: any) => { 214 | const request = { 215 | type: (params.type?.toLowerCase() === 'xhr' ? 'xhr' : 'fetch') as 'xhr' | 'fetch', 216 | method: params.request.method, 217 | url: params.request.url, 218 | requestHeaders: params.request.headers, 219 | timing: { 220 | requestTime: params.timestamp 221 | } 222 | }; 223 | 224 | // Apply filters if specified 225 | if (options.filters) { 226 | if (options.filters.types && !options.filters.types.includes(request.type)) { 227 | return; 228 | } 229 | if (options.filters.urlPattern && !request.url.match(options.filters.urlPattern)) { 230 | return; 231 | } 232 | } 233 | 234 | requests.set(params.requestId, request); 235 | }; 236 | 237 | const responseHandler = (params: any) => { 238 | const request = requests.get(params.requestId); 239 | if (request) { 240 | request.status = params.response.status; 241 | request.statusText = params.response.statusText; 242 | request.responseHeaders = params.response.headers; 243 | request.timing.responseTime = params.timestamp; 244 | events.push(request); 245 | } 246 | }; 247 | 248 | // Register event handlers 249 | client.Network.requestWillBeSent(requestHandler); 250 | client.Network.responseReceived(responseHandler); 251 | 252 | // Wait for specified duration 253 | const duration = options.duration || 10; 254 | await new Promise(resolve => setTimeout(resolve, duration * 1000)); 255 | 256 | console.error('ChromeAPI: Network event capture successful'); 257 | return events; 258 | } catch (error) { 259 | console.error('ChromeAPI: Network event capture failed:', error instanceof Error ? error.message : error); 260 | throw error; 261 | } finally { 262 | if (client) { 263 | await client.close(); 264 | } 265 | } 266 | } 267 | 268 | /** 269 | * Navigate a Chrome tab to a specific URL 270 | * @param tabId The ID of the tab to load the URL in 271 | * @param url The URL to load 272 | * @returns Promise<void> 273 | * @throws Error if the tab is not found or navigation fails 274 | */ 275 | async loadUrl(tabId: string, url: string): Promise<void> { 276 | console.error(`ChromeAPI: Attempting to load URL ${url} in tab ${tabId}`); 277 | let client: Client | undefined; 278 | try { 279 | // Connect to the specific tab 280 | client = await CDP({ target: tabId, port: this.port }); 281 | 282 | if (!client) { 283 | throw new Error('Failed to connect to Chrome DevTools'); 284 | } 285 | 286 | // Enable Page domain for navigation 287 | await client.Page.enable(); 288 | 289 | // Navigate to the URL and wait for load 290 | await client.Page.navigate({ url }); 291 | await client.Page.loadEventFired(); 292 | 293 | console.error('ChromeAPI: URL loading successful'); 294 | } catch (error) { 295 | console.error('ChromeAPI: URL loading failed:', error instanceof Error ? error.message : error); 296 | throw error; 297 | } finally { 298 | if (client) { 299 | await client.close(); 300 | } 301 | } 302 | } 303 | 304 | /** 305 | * Query DOM elements using a CSS selector 306 | * @param tabId The ID of the tab to query 307 | * @param selector CSS selector to find elements 308 | * @returns Promise<DOMElement[]> Array of matching DOM elements with their properties 309 | * @throws Error if the tab is not found or query fails 310 | */ 311 | async queryDOMElements(tabId: string, selector: string): Promise<DOMElement[]> { 312 | console.error(`ChromeAPI: Attempting to query DOM elements in tab ${tabId} with selector "${selector}"`); 313 | let client: Client | undefined; 314 | try { 315 | // Connect to the specific tab 316 | client = await CDP({ target: tabId, port: this.port }); 317 | 318 | if (!client) { 319 | throw new Error('Failed to connect to Chrome DevTools'); 320 | } 321 | 322 | // Enable necessary domains 323 | await client.DOM.enable(); 324 | await client.Runtime.enable(); 325 | 326 | // Get the document root 327 | const { root } = await client.DOM.getDocument(); 328 | 329 | // Find elements matching the selector 330 | const { nodeIds } = await client.DOM.querySelectorAll({ 331 | nodeId: root.nodeId, 332 | selector: selector 333 | }); 334 | 335 | // Get detailed information for each element 336 | const elements: DOMElement[] = await Promise.all( 337 | nodeIds.map(async (nodeId) => { 338 | if (!client) { 339 | throw new Error('Client disconnected'); 340 | } 341 | 342 | // Get node details 343 | const { node } = await client.DOM.describeNode({ nodeId }); 344 | 345 | // Get node box model for position and dimensions 346 | const boxModel = await client.DOM.getBoxModel({ nodeId }) 347 | .catch(() => null); // Some elements might not have a box model 348 | 349 | // Check visibility using Runtime.evaluate 350 | const result = await client.Runtime.evaluate({ 351 | expression: ` 352 | (function(selector) { 353 | const element = document.querySelector(selector); 354 | if (!element) return false; 355 | const style = window.getComputedStyle(element); 356 | return style.display !== 'none' && 357 | style.visibility !== 'hidden' && 358 | style.opacity !== '0'; 359 | })('${selector}') 360 | `, 361 | returnByValue: true 362 | }); 363 | 364 | // Extract ARIA attributes 365 | const ariaAttributes: Record<string, string> = {}; 366 | if (node.attributes) { 367 | for (let i = 0; i < node.attributes.length; i += 2) { 368 | const name = node.attributes[i]; 369 | if (name.startsWith('aria-')) { 370 | ariaAttributes[name] = node.attributes[i + 1]; 371 | } 372 | } 373 | } 374 | 375 | // Convert attributes array to object 376 | const attributes: Record<string, string> = {}; 377 | if (node.attributes) { 378 | for (let i = 0; i < node.attributes.length; i += 2) { 379 | attributes[node.attributes[i]] = node.attributes[i + 1]; 380 | } 381 | } 382 | 383 | return { 384 | nodeId, 385 | tagName: node.nodeName.toLowerCase(), 386 | textContent: node.nodeValue || null, 387 | attributes, 388 | boundingBox: boxModel ? { 389 | x: boxModel.model.content[0], 390 | y: boxModel.model.content[1], 391 | width: boxModel.model.width, 392 | height: boxModel.model.height 393 | } : null, 394 | isVisible: result.result.value as boolean, 395 | ariaAttributes 396 | }; 397 | }) 398 | ); 399 | 400 | console.error(`ChromeAPI: Successfully found ${elements.length} elements matching selector`); 401 | return elements; 402 | } catch (error) { 403 | console.error('ChromeAPI: DOM query failed:', error instanceof Error ? error.message : error); 404 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; 405 | throw new Error(`Failed to query DOM elements with selector "${selector}": ${errorMessage}. Note: :contains() is not a valid CSS selector. Use a valid CSS selector like tag names, classes, or IDs.`); 406 | } finally { 407 | if (client) { 408 | await client.close(); 409 | } 410 | } 411 | } 412 | 413 | /** 414 | * Click on a DOM element matching a CSS selector 415 | * @param tabId The ID of the tab containing the element 416 | * @param selector CSS selector to find the element to click 417 | * @returns Promise<void> 418 | * @throws Error if the tab is not found, element is not found, or click fails 419 | */ 420 | async clickElement(tabId: string, selector: string): Promise<{consoleOutput: string[]}> { 421 | console.error(`ChromeAPI: Attempting to click element in tab ${tabId} with selector "${selector}"`); 422 | let client: Client | undefined; 423 | try { 424 | // Connect to the specific tab 425 | client = await CDP({ target: tabId, port: this.port }); 426 | 427 | if (!client) { 428 | throw new Error('Failed to connect to Chrome DevTools'); 429 | } 430 | 431 | // Enable necessary domains 432 | await client.DOM.enable(); 433 | await client.Runtime.enable(); 434 | 435 | // Get the document root 436 | const { root } = await client.DOM.getDocument(); 437 | 438 | // Find the element 439 | const { nodeIds } = await client.DOM.querySelectorAll({ 440 | nodeId: root.nodeId, 441 | selector: selector 442 | }); 443 | 444 | if (nodeIds.length === 0) { 445 | throw new Error(`No element found matching selector: ${selector}`); 446 | } 447 | 448 | // Get element's box model for coordinates 449 | const { model } = await client.DOM.getBoxModel({ nodeId: nodeIds[0] }); 450 | 451 | // Calculate center point 452 | const centerX = model.content[0] + (model.width / 2); 453 | const centerY = model.content[1] + (model.height / 2); 454 | 455 | // Dispatch click event using Runtime.evaluate 456 | await client.Runtime.evaluate({ 457 | expression: ` 458 | (() => { 459 | const element = document.querySelector('${selector}'); 460 | if (!element) throw new Error('Element not found'); 461 | 462 | const clickEvent = new MouseEvent('click', { 463 | bubbles: true, 464 | cancelable: true, 465 | view: window, 466 | clientX: ${Math.round(centerX)}, 467 | clientY: ${Math.round(centerY)} 468 | }); 469 | 470 | element.dispatchEvent(clickEvent); 471 | })() 472 | `, 473 | awaitPromise: true 474 | }); 475 | 476 | // Set up console listener before the click 477 | let consoleMessages: string[] = []; 478 | const consolePromise = new Promise<void>((resolve) => { 479 | if (!client) return; 480 | client.Runtime.consoleAPICalled(({ type, args }) => { 481 | const message = args.map(arg => arg.value || arg.description).join(' '); 482 | consoleMessages.push(`[${type}] ${message}`); 483 | console.error(`Chrome Console: ${type}:`, message); 484 | resolve(); // Resolve when we get a console message 485 | }); 486 | }); 487 | 488 | // Set up a timeout promise 489 | const timeoutPromise = new Promise<void>((resolve) => { 490 | setTimeout(resolve, 1000); 491 | }); 492 | 493 | // Click the element 494 | await client.Runtime.evaluate({ 495 | expression: ` 496 | (() => { 497 | const element = document.querySelector('${selector}'); 498 | if (!element) throw new Error('Element not found'); 499 | 500 | const clickEvent = new MouseEvent('click', { 501 | bubbles: true, 502 | cancelable: true, 503 | view: window, 504 | clientX: ${Math.round(centerX)}, 505 | clientY: ${Math.round(centerY)} 506 | }); 507 | 508 | element.dispatchEvent(clickEvent); 509 | })() 510 | `, 511 | awaitPromise: true 512 | }); 513 | 514 | // Wait for either a console message or timeout 515 | await Promise.race([consolePromise, timeoutPromise]); 516 | 517 | console.error('ChromeAPI: Successfully clicked element'); 518 | return { consoleOutput: consoleMessages }; 519 | } catch (error) { 520 | console.error('ChromeAPI: Element click failed:', error instanceof Error ? error.message : error); 521 | throw error; 522 | } finally { 523 | if (client) { 524 | await client.close(); 525 | } 526 | } 527 | } 528 | 529 | private get port(): number { 530 | const portMatch = this.baseUrl.match(/:(\d+)$/); 531 | return portMatch ? parseInt(portMatch[1]) : 9222; 532 | } 533 | } 534 | ```