# Directory Structure ``` ├── .github │ └── workflows │ └── ci.yaml ├── .gitignore ├── .python-version ├── client.py ├── Dockerfile ├── main.py ├── pyproject.toml ├── README.md ├── requirements.txt ├── smithery.yaml └── uv.lock ``` # Files -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- ``` 1 | 3.10 2 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Python-generated files 2 | __pycache__/ 3 | *.py[oc] 4 | build/ 5 | dist/ 6 | wheels/ 7 | *.egg-info 8 | 9 | # Virtual environments 10 | .venv 11 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | # 🤖 Browser Automation Agent 2 | 3 | A powerful browser automation tool built with MCP (Model Controlled Program) that combines web scraping capabilities with LLM-powered intelligence. This agent can search Google, navigate to webpages, and intelligently scrape content from various websites including GitHub, Stack Overflow, and documentation sites. 4 | 5 | ## 🚀 Features 6 | 7 | - **🔍 Google Search Integration**: Finds and retrieves top search results for any query 8 | - **🕸️ Intelligent Web Scraping**: Tailored scraping strategies for different website types: 9 | - 📂 GitHub repositories 10 | - 💬 Stack Overflow questions and answers 11 | - 📚 Documentation pages 12 | - 🌐 Generic websites 13 | - **🧠 AI-Powered Processing**: Uses Mistral AI for understanding and processing scraped content 14 | - **🥷 Stealth Mode**: Implements browser fingerprint protection to avoid detection 15 | - **💾 Content Saving**: Automatically saves both screenshots and text content from scraped pages 16 | 17 | ## 🏗️ Architecture 18 | 19 | This project uses a client-server architecture powered by MCP: 20 | 21 | - **🖥️ Server**: Handles browser automation and web scraping tasks 22 | - **👤 Client**: Provides the AI interface using Mistral AI and LangGraph 23 | - **📡 Communication**: Uses stdio for client-server communication 24 | 25 | ## ⚙️ Requirements 26 | 27 | - 🐍 Python 3.8+ 28 | - 🎭 Playwright 29 | - 🧩 MCP (Model Controlled Program) 30 | - 🔑 Mistral AI API key 31 | 32 | ## 📥 Installation 33 | 34 | 1. Clone the repository: 35 | 36 | ```bash 37 | git clone https://github.com/yourusername/browser-automation-agent.git 38 | cd browser-automation-agent 39 | ``` 40 | 41 | 2. Install dependencies: 42 | 43 | ```bash 44 | pip install -r requirements.txt 45 | ``` 46 | 47 | 3. Install Playwright browsers: 48 | 49 | ```bash 50 | playwright install 51 | ``` 52 | 53 | 4. Create a `.env` file in the project root and add your Mistral AI API key: 54 | 55 | ``` 56 | MISTRAL_API_KEY=your_api_key_here 57 | ``` 58 | 59 | ## 📋 Usage 60 | 61 | ### Running the Server 62 | 63 | ```bash 64 | python main.py 65 | ``` 66 | 67 | ### Running the Client 68 | 69 | ```bash 70 | python client.py 71 | ``` 72 | 73 | ### Sample Interaction 74 | 75 | Once both the server and client are running: 76 | 77 | 1. Enter your query when prompted 78 | 2. The agent will: 79 | - 🔍 Search Google for relevant results 80 | - 🧭 Navigate to the top result 81 | - 📊 Scrape content based on the website type 82 | - 📸 Save screenshots and content to files 83 | - 📤 Return processed information 84 | 85 | ## 🛠️ Tool Functions 86 | 87 | ### `get_top_google_url` 88 | 🔍 Searches Google and returns the top result URL for a given query. 89 | 90 | ### `browse_and_scrape` 91 | 🌐 Navigates to a URL and scrapes content based on the website type. 92 | 93 | ### `scrape_github` 94 | 📂 Specializes in extracting README content and code blocks from GitHub repositories. 95 | 96 | ### `scrape_stackoverflow` 97 | 💬 Extracts questions, answers, comments, and code blocks from Stack Overflow pages. 98 | 99 | ### `scrape_documentation` 100 | 📚 Optimized for extracting documentation content and code examples. 101 | 102 | ### `scrape_generic` 103 | 🌐 Extracts paragraph text and code blocks from generic websites. 104 | 105 | ## 📁 File Structure 106 | 107 | ``` 108 | browser-automation-agent/ 109 | ├── main.py # MCP server implementation 110 | ├── client.py # Mistral AI client implementation 111 | ├── requirements.txt # Project dependencies 112 | ├── .env # Environment variables (API keys) 113 | └── README.md # Project documentation 114 | ``` 115 | 116 | ## 📤 Output Files 117 | 118 | The agent generates two types of output files with timestamps: 119 | 120 | - 📸 `final_page_YYYYMMDD_HHMMSS.png`: Screenshot of the final page state 121 | - 📄 `scraped_content_YYYYMMDD_HHMMSS.txt`: Extracted text content from the page 122 | 123 | ## ⚙️ Customization 124 | 125 | You can modify the following parameters in the code: 126 | 127 | - 🖥️ Browser window size: Adjust `width` and `height` in `browse_and_scrape` 128 | - 👻 Headless mode: Set `headless=True` for invisible browser operation 129 | - 🔢 Number of Google results: Change `num_results` in `get_top_google_url` 130 | 131 | ## ❓ Troubleshooting 132 | 133 | - **🔌 Connection Issues**: Ensure both server and client are running in separate terminals 134 | - **🎭 Playwright Errors**: Make sure browsers are installed with `playwright install` 135 | - **🔑 API Key Errors**: Verify your Mistral API key is correctly set in the `.env` file 136 | - **🛣️ Path Errors**: Update the path to `main.py` in `client.py` if needed 137 | 138 | ## 📜 License 139 | 140 | [MIT License](LICENSE) 141 | 142 | ## 🤝 Contributing 143 | 144 | Contributions are welcome! Please feel free to submit a Pull Request. 145 | 146 | --- 147 | 148 | Built with 🧩 MCP, 🎭 Playwright, and 🧠 Mistral AI 149 | ``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- ``` 1 | playwright 2 | playwright-stealth 3 | langchain_mistralai 4 | python-dotenv 5 | mcp 6 | langchain-mcp-adapters 7 | asyncio 8 | ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | FROM python:3.10-slim 2 | 3 | # Set working directory 4 | WORKDIR /app 5 | 6 | # Copy files 7 | COPY . /app 8 | 9 | # Install dependencies 10 | RUN pip install --no-cache-dir \ 11 | fastmcp \ 12 | firecrawl \ 13 | tavily-python \ 14 | rich \ 15 | beautifulsoup4 \ 16 | python-dotenv \ 17 | requests 18 | 19 | # Expose the port if needed (optional) 20 | EXPOSE 8080 21 | 22 | # Default command 23 | CMD ["python", "main.py"] 24 | ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [project] 2 | name = "browsing-mcp" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | dependencies = [ 8 | "googlesearch-python>=1.3.0", 9 | "langchain-mcp-adapters>=0.0.9", 10 | "langchain-openai>=0.3.14", 11 | "playwright-stealth>=1.0.6", 12 | "playwright>=1.51.0", 13 | "python-dotenv>=1.1.0", 14 | "setuptools>=78.1.0", 15 | ] 16 | ``` -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- ```yaml 1 | build: 2 | dockerfile: Dockerfile 3 | dockerBuildPath: . 4 | 5 | startCommand: 6 | type: stdio 7 | configSchema: 8 | type: object 9 | required: 10 | - MISTRAL_API_KEY 11 | - FIRECRAWL_API_KEY 12 | - TAVILY_SEARCH_API 13 | properties: 14 | MISTRAL_API_KEY: 15 | type: string 16 | description: API key for Mistral AI 17 | FIRECRAWL_API_KEY: 18 | type: string 19 | description: API key for Firecrawl service 20 | TAVILY_SEARCH_API: 21 | type: string 22 | description: API key for Tavily search service 23 | commandFunction: | 24 | (config) => ({ 25 | command: 'python', 26 | args: ['main.py'], 27 | env: { 28 | MISTRAL_API_KEY: config.MISTRAL_API_KEY, 29 | FIRECRAWL_API_KEY: config.FIRECRAWL_API_KEY, 30 | TAVILY_SEARCH_API: config.TAVILY_SEARCH_API 31 | } 32 | }) 33 | ``` -------------------------------------------------------------------------------- /client.py: -------------------------------------------------------------------------------- ```python 1 | # Create server parameters for stdio connection 2 | from mcp import ClientSession, StdioServerParameters 3 | from mcp.client.stdio import stdio_client 4 | import asyncio 5 | from langchain_mcp_adapters.tools import load_mcp_tools 6 | from langgraph.prebuilt import create_react_agent 7 | from langchain_mistralai import ChatMistralAI 8 | import os 9 | from dotenv import load_dotenv 10 | 11 | # Load environment variables 12 | load_dotenv() 13 | 14 | # Initialize the Mistral AI model 15 | model = ChatMistralAI( 16 | model="mistral-small-latest", 17 | temperature=0.4, 18 | api_key=os.getenv("MISTRAL_API_KEY") # Ensure the API key is loaded 19 | ) 20 | 21 | # Define server parameters 22 | server_params = StdioServerParameters( 23 | command="python", 24 | args=["main.py"], # Path to your server script 25 | ) 26 | 27 | async def run_agent(): 28 | try: 29 | # Connect to the server 30 | async with stdio_client(server_params) as (read, write): 31 | async with ClientSession(read, write) as session: 32 | await session.initialize() 33 | print("Client session initialized successfully.") 34 | 35 | # Load tools from the server 36 | tools = await load_mcp_tools(session) 37 | print("Tools loaded successfully.") 38 | 39 | # Create the agent 40 | agent = create_react_agent(model, tools) 41 | 42 | # Main loop for user interaction 43 | while True: 44 | query = input("Enter the query (or type 'exit' to quit): ") 45 | if query.lower() == 'exit': 46 | print("Exiting...") 47 | break 48 | 49 | # Invoke the agent with the user's query 50 | agent_response = await agent.ainvoke({"messages": query}) 51 | print("Agent response:", agent_response["messages"][3].content) 52 | except Exception as e: 53 | print(f"Error during client execution: {e}") 54 | raise 55 | finally: 56 | print("Client execution complete.") 57 | 58 | if __name__ == "__main__": 59 | # Run the agent in an asyncio event loop 60 | asyncio.run(run_agent()) 61 | 62 | ``` -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- ```yaml 1 | name: CI/CD Pipeline 2 | 3 | on: 4 | push: 5 | branches: [ main, develop ] 6 | pull_request: 7 | branches: [ main, develop ] 8 | workflow_dispatch: # Allows manual triggering 9 | 10 | jobs: 11 | lint: 12 | name: Code Linting 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: '3.10' 21 | cache: 'pip' 22 | 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install flake8 black isort 27 | pip install -r requirements.txt 28 | 29 | - name: Check formatting with Black 30 | run: black --check . 31 | 32 | - name: Check imports with isort 33 | run: isort --check-only --profile black . 34 | 35 | - name: Lint with flake8 36 | run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 37 | 38 | test: 39 | name: Run Tests 40 | runs-on: ubuntu-latest 41 | needs: lint 42 | steps: 43 | - uses: actions/checkout@v3 44 | 45 | - name: Set up Python 46 | uses: actions/setup-python@v4 47 | with: 48 | python-version: '3.10' 49 | cache: 'pip' 50 | 51 | - name: Install dependencies 52 | run: | 53 | python -m pip install --upgrade pip 54 | pip install pytest pytest-asyncio pytest-cov 55 | pip install -r requirements.txt 56 | 57 | - name: Install Playwright browsers 58 | run: playwright install --with-deps chromium 59 | 60 | - name: Create .env file 61 | run: | 62 | echo "MISTRAL_API_KEY=${{ secrets.MISTRAL_API_KEY }}" > .env 63 | 64 | - name: Run tests 65 | run: pytest --cov=. --cov-report=xml 66 | 67 | - name: Upload coverage to Codecov 68 | uses: codecov/codecov-action@v3 69 | with: 70 | file: ./coverage.xml 71 | fail_ci_if_error: false 72 | 73 | build: 74 | name: Build Docker Image 75 | runs-on: ubuntu-latest 76 | if: github.event_name == 'push' && github.ref == 'refs/heads/main' 77 | needs: test 78 | steps: 79 | - uses: actions/checkout@v3 80 | 81 | - name: Set up Docker Buildx 82 | uses: docker/setup-buildx-action@v2 83 | 84 | - name: Login to GitHub Container Registry 85 | uses: docker/login-action@v2 86 | with: 87 | registry: ghcr.io 88 | username: ${{ github.actor }} 89 | password: ${{ secrets.GITHUB_TOKEN }} 90 | 91 | - name: Extract metadata 92 | id: meta 93 | uses: docker/metadata-action@v4 94 | with: 95 | images: ghcr.io/${{ github.repository }} 96 | tags: | 97 | type=sha,format=long 98 | type=ref,event=branch 99 | type=semver,pattern={{version}} 100 | latest 101 | 102 | - name: Build and push 103 | uses: docker/build-push-action@v4 104 | with: 105 | context: . 106 | push: true 107 | tags: ${{ steps.meta.outputs.tags }} 108 | labels: ${{ steps.meta.outputs.labels }} 109 | cache-from: type=gha 110 | cache-to: type=gha,mode=max 111 | 112 | deploy: 113 | name: Deploy to Dev Environment 114 | runs-on: ubuntu-latest 115 | if: github.event_name == 'push' && github.ref == 'refs/heads/main' 116 | needs: build 117 | environment: development 118 | steps: 119 | - name: Install SSH key 120 | uses: shimataro/ssh-key-action@v2 121 | with: 122 | key: ${{ secrets.SSH_PRIVATE_KEY }} 123 | known_hosts: ${{ secrets.KNOWN_HOSTS }} 124 | 125 | - name: Deploy to server 126 | run: | 127 | ssh ${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST }} << 'EOF' 128 | cd /path/to/deployment 129 | docker pull ghcr.io/${{ github.repository }}:latest 130 | docker-compose down 131 | docker-compose up -d 132 | docker system prune -af 133 | EOF 134 | ``` -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- ```python 1 | from mcp.server.fastmcp import FastMCP 2 | from firecrawl import FirecrawlApp 3 | from tavily import TavilyClient 4 | import re 5 | import requests 6 | from bs4 import BeautifulSoup 7 | from rich.markdown import Markdown 8 | from rich.console import Console 9 | from dotenv import load_dotenv 10 | import os 11 | import io 12 | 13 | load_dotenv() 14 | 15 | mcp = FastMCP("Framework Summarizer") 16 | 17 | app = FirecrawlApp(api_key=os.getenv('FIRECRAWL_API_KEY')) 18 | 19 | tavily_client = TavilyClient(api_key=os.getenv("TAVILY_SEARCH_API")) 20 | 21 | def render_markdown(markdown_text: str) -> str: 22 | """Render markdown text into formatted output. 23 | 24 | This function uses the rich library to render markdown content with proper formatting. 25 | It captures the output in a string and handles various markdown elements. 26 | 27 | Args: 28 | markdown_text (str): The markdown text to render. 29 | 30 | Returns: 31 | str: The rendered markdown content with proper formatting. 32 | 33 | Example: 34 | >>> content = "# Hello World\n\nThis is **bold** text." 35 | >>> rendered = render_markdown(content) 36 | >>> print(rendered) 37 | """ 38 | try: 39 | # Create a console that writes to a string buffer 40 | console = Console(file=io.StringIO()) 41 | 42 | # Create and render the markdown 43 | md = Markdown(markdown_text) 44 | console.print(md) 45 | 46 | # Get the rendered content from the buffer 47 | rendered = console.file.getvalue() 48 | console.file.close() 49 | 50 | return rendered 51 | except Exception as e: 52 | return f"Error rendering markdown: {str(e)}" 53 | 54 | @mcp.tool() 55 | def search_and_scrape(query:str): 56 | """Search for content using Tavily and scrape the most relevant result. 57 | 58 | This function performs a two-step process: 59 | 1. Uses Tavily search API to find the most relevant URLs for a given query 60 | 2. Scrapes the content from the top-ranked URL using Firecrawl 61 | 62 | Args: 63 | query (str): The search query to find relevant content. This query will be used 64 | to search for and retrieve the most relevant webpage content. 65 | 66 | Returns: 67 | str: The scraped content in markdown format from the most relevant webpage. 68 | 69 | Example: 70 | >>> content = search_and_scrape("What is Python programming language?") 71 | >>> print(content) 72 | 73 | Raises: 74 | Exception: If the search fails or if the scraping process fails. 75 | """ 76 | response = tavily_client.search(query, max_results=5) 77 | top_5_urls = [result['url'] for result in response.get('results', [])] 78 | url = top_5_urls[0] 79 | response = app.scrape_url(url=url, params={ 80 | 'formats': [ 'markdown' ], 81 | }) 82 | return response['markdown'] 83 | 84 | @mcp.tool() 85 | def list_directory(path: str = ".") -> list: 86 | """List contents of a directory. 87 | 88 | This tool lists all files and directories in the specified path. 89 | If no path is provided, it lists the current directory. 90 | 91 | Args: 92 | path (str, optional): The directory path to list. Defaults to current directory ("."). 93 | 94 | Returns: 95 | list: A list of dictionaries containing information about each item: 96 | - name: The name of the file/directory 97 | - type: Either "file" or "directory" 98 | - size: File size in bytes (for files only) 99 | - modified: Last modification timestamp 100 | 101 | Example: 102 | >>> contents = list_directory("/path/to/directory") 103 | >>> print(contents) 104 | """ 105 | try: 106 | items = [] 107 | for item in os.listdir(path): 108 | full_path = os.path.join(path, item) 109 | item_info = { 110 | "name": item, 111 | "type": "directory" if os.path.isdir(full_path) else "file", 112 | "modified": os.path.getmtime(full_path) 113 | } 114 | if item_info["type"] == "file": 115 | item_info["size"] = os.path.getsize(full_path) 116 | items.append(item_info) 117 | return items 118 | except Exception as e: 119 | return {"error": str(e)} 120 | 121 | @mcp.tool() 122 | def get_current_directory() -> str: 123 | """Get the current working directory. 124 | 125 | Returns: 126 | str: The absolute path of the current working directory. 127 | 128 | Example: 129 | >>> current_dir = get_current_directory() 130 | >>> print(current_dir) 131 | """ 132 | return os.getcwd() 133 | 134 | @mcp.tool() 135 | def change_directory(path: str) -> str: 136 | """Change the current working directory. 137 | 138 | Args: 139 | path (str): The directory path to change to. 140 | 141 | Returns: 142 | str: The new current working directory path. 143 | 144 | Raises: 145 | Exception: If the directory doesn't exist or is not accessible. 146 | 147 | Example: 148 | >>> new_dir = change_directory("/path/to/directory") 149 | >>> print(new_dir) 150 | """ 151 | try: 152 | os.chdir(path) 153 | return os.getcwd() 154 | except Exception as e: 155 | return {"error": str(e)} 156 | 157 | @mcp.tool() 158 | def file_info(path: str) -> dict: 159 | """Get detailed information about a file or directory. 160 | 161 | Args: 162 | path (str): The path to the file or directory. Can be obtained from list_all_files()["files"][i]["path"]. 163 | 164 | Returns: 165 | dict: A dictionary containing: 166 | - exists: Whether the path exists 167 | - type: "file" or "directory" 168 | - size: Size in bytes (for files) 169 | - created: Creation timestamp 170 | - modified: Last modification timestamp 171 | - accessed: Last access timestamp 172 | - absolute_path: Full absolute path 173 | 174 | Example: 175 | >>> # Get all files first 176 | >>> all_files = list_all_files() 177 | >>> # Get info for first file 178 | >>> info = file_info(all_files["files"][0]["path"]) 179 | >>> print(info) 180 | """ 181 | try: 182 | info = { 183 | "exists": os.path.exists(path), 184 | "absolute_path": os.path.abspath(path) 185 | } 186 | 187 | if info["exists"]: 188 | info.update({ 189 | "type": "directory" if os.path.isdir(path) else "file", 190 | "created": os.path.getctime(path), 191 | "modified": os.path.getmtime(path), 192 | "accessed": os.path.getatime(path) 193 | }) 194 | 195 | if info["type"] == "file": 196 | info["size"] = os.path.getsize(path) 197 | 198 | return info 199 | except Exception as e: 200 | return {"error": str(e)} 201 | 202 | @mcp.tool() 203 | def create_directory(path: str) -> dict: 204 | """Create a new directory. 205 | 206 | Args: 207 | path (str): The path where the directory should be created. 208 | 209 | Returns: 210 | dict: A dictionary containing: 211 | - success: Boolean indicating if creation was successful 212 | - path: The created directory path 213 | - error: Error message if creation failed 214 | 215 | Example: 216 | >>> result = create_directory("/path/to/new/directory") 217 | >>> print(result) 218 | """ 219 | try: 220 | os.makedirs(path, exist_ok=True) 221 | return { 222 | "success": True, 223 | "path": os.path.abspath(path) 224 | } 225 | except Exception as e: 226 | return { 227 | "success": False, 228 | "error": str(e) 229 | } 230 | 231 | @mcp.tool() 232 | def scrape_content(url): 233 | """Scrape content from a given URL and return it in markdown format. 234 | 235 | This tool uses Firecrawl to extract content from a webpage and convert it to markdown format. 236 | It's designed to handle various types of web content and convert them into a consistent markdown representation. 237 | 238 | Args: 239 | url (str): The URL of the webpage to scrape. Must be a valid HTTP/HTTPS URL. 240 | 241 | Returns: 242 | str: The scraped content in markdown format. 243 | 244 | Example: 245 | >>> content = scrape_content("https://example.com") 246 | >>> print(content) 247 | 248 | Raises: 249 | Exception: If the URL is invalid or if the scraping process fails. 250 | """ 251 | headers = {"User-Agent": "Mozilla/5.0"} # Bypass simple bot detection 252 | response = requests.get(url, headers=headers,timeout=10) 253 | 254 | if response.status_code == 200: 255 | soup = BeautifulSoup(response.text, "html.parser") 256 | 257 | # Remove all <a> (links) and <script> tags 258 | for tag in soup(["a", "script", "style", "noscript"]): 259 | tag.decompose() 260 | 261 | # Extract clean text from <p> tags 262 | paragraphs = [p.get_text(strip=True) for p in soup.find_all("p")] 263 | 264 | return "\n".join(paragraphs) 265 | 266 | else: 267 | return f"Error: Unable to scrape. Status code {response.status_code}" 268 | 269 | @mcp.tool() 270 | def read_file_content(file_path: str, start_line: int = 1, end_line: int = None) -> dict: 271 | """Read and display the contents of a file with proper formatting. 272 | 273 | This tool reads a file and returns its contents with metadata. For text files, 274 | it can optionally return specific line ranges. For markdown files, it includes 275 | rendered content. 276 | 277 | Args: 278 | file_path (str): The path to the file to read. Can be obtained from list_all_files()["files"][i]["path"]. 279 | start_line (int, optional): Starting line number to read. Defaults to 1. 280 | end_line (int, optional): Ending line number to read. If None, reads entire file. 281 | 282 | Returns: 283 | dict: A dictionary containing: 284 | - content: The file contents 285 | - rendered_content: Rendered markdown if applicable 286 | - metadata: File information (size, type, etc.) 287 | - error: Error message if reading fails 288 | 289 | Example: 290 | >>> # Get all files first 291 | >>> all_files = list_all_files() 292 | >>> # Read content of first file 293 | >>> result = read_file_content(all_files["files"][0]["path"]) 294 | >>> print(result["content"]) 295 | """ 296 | try: 297 | # Get file information 298 | info = file_info(file_path) 299 | if not info["exists"]: 300 | return {"error": f"File not found: {file_path}"} 301 | 302 | # Read file content 303 | with open(file_path, 'r', encoding='utf-8') as file: 304 | if end_line is None: 305 | content = file.read() 306 | else: 307 | lines = file.readlines() 308 | content = ''.join(lines[start_line-1:end_line]) 309 | 310 | result = { 311 | "content": content, 312 | "metadata": info 313 | } 314 | 315 | # If it's a markdown file, add rendered content 316 | if file_path.lower().endswith(('.md', '.markdown')): 317 | result["rendered_content"] = render_markdown(content) 318 | 319 | return result 320 | 321 | except Exception as e: 322 | return {"error": f"Error reading file: {str(e)}"} 323 | 324 | @mcp.tool() 325 | def preview_file(file_path: str, num_lines: int = 10) -> dict: 326 | """Preview the beginning of a file. 327 | 328 | This tool reads and displays the first few lines of a file, useful for 329 | quick file content inspection. 330 | 331 | Args: 332 | file_path (str): The path to the file to preview. Can be obtained from list_all_files()["files"][i]["path"]. 333 | num_lines (int, optional): Number of lines to preview. Defaults to 10. 334 | 335 | Returns: 336 | dict: A dictionary containing: 337 | - preview: The first few lines of the file 338 | - total_lines: Total number of lines in the file 339 | - metadata: File information 340 | - error: Error message if reading fails 341 | 342 | Example: 343 | >>> # Get all files first 344 | >>> all_files = list_all_files() 345 | >>> # Preview first file 346 | >>> preview = preview_file(all_files["files"][0]["path"], num_lines=5) 347 | >>> print(preview["preview"]) 348 | """ 349 | try: 350 | # Get file information 351 | info = file_info(file_path) 352 | if not info["exists"]: 353 | return {"error": f"File not found: {file_path}"} 354 | 355 | # Read first few lines 356 | with open(file_path, 'r', encoding='utf-8') as file: 357 | lines = file.readlines() 358 | preview = ''.join(lines[:num_lines]) 359 | 360 | return { 361 | "preview": preview, 362 | "total_lines": len(lines), 363 | "metadata": info 364 | } 365 | 366 | except Exception as e: 367 | return {"error": f"Error previewing file: {str(e)}"} 368 | 369 | @mcp.tool() 370 | def list_all_files(path: str = ".", exclude_dirs: list = None) -> dict: 371 | """Recursively list all files in a directory and its subdirectories. 372 | 373 | This tool walks through all directories and subdirectories to find all files, 374 | with options to exclude specific directories and file types. 375 | 376 | Args: 377 | path (str, optional): The root directory to start from. Defaults to current directory ("."). 378 | exclude_dirs (list, optional): List of directory names to exclude (e.g., ['node_modules', '.git']). 379 | 380 | Returns: 381 | dict: A dictionary containing: 382 | - files: List of dictionaries with file information: 383 | - path: Full path to the file 384 | - name: File name 385 | - size: File size in bytes 386 | - type: File type (extension) 387 | - modified: Last modification timestamp 388 | - total_files: Total number of files found 389 | - total_size: Total size of all files in bytes 390 | - error: Error message if operation fails 391 | 392 | Example: 393 | >>> result = list_all_files("/path/to/directory", exclude_dirs=['node_modules']) 394 | >>> print(result["files"]) 395 | """ 396 | try: 397 | if exclude_dirs is None: 398 | exclude_dirs = ['.git', 'node_modules', '__pycache__', '.venv', 'venv'] 399 | 400 | files = [] 401 | total_size = 0 402 | 403 | for root, dirs, files_in_dir in os.walk(path): 404 | # Skip excluded directories 405 | dirs[:] = [d for d in dirs if d not in exclude_dirs] 406 | 407 | for file in files_in_dir: 408 | file_path = os.path.join(root, file) 409 | file_info = { 410 | "path": file_path, 411 | "name": file, 412 | "size": os.path.getsize(file_path), 413 | "type": os.path.splitext(file)[1], 414 | "modified": os.path.getmtime(file_path) 415 | } 416 | files.append(file_info) 417 | total_size += file_info["size"] 418 | 419 | return { 420 | "files": files, 421 | "total_files": len(files), 422 | "total_size": total_size, 423 | "excluded_dirs": exclude_dirs 424 | } 425 | 426 | except Exception as e: 427 | return {"error": f"Error listing files: {str(e)}"} 428 | 429 | @mcp.tool() 430 | def find_files_by_type(path: str = ".", file_type: str = None) -> dict: 431 | """Find all files of a specific type in a directory and its subdirectories. 432 | 433 | Args: 434 | path (str, optional): The root directory to start from. Defaults to current directory ("."). 435 | file_type (str, optional): The file extension to search for (e.g., '.py', '.js', '.md'). 436 | 437 | Returns: 438 | dict: A dictionary containing: 439 | - files: List of matching files with their details 440 | - total_matches: Number of files found 441 | - file_type: The type of files searched for 442 | 443 | Example: 444 | >>> result = find_files_by_type("/path/to/directory", file_type=".py") 445 | >>> print(result["files"]) 446 | """ 447 | try: 448 | all_files = list_all_files(path) 449 | if "error" in all_files: 450 | return all_files 451 | 452 | if file_type: 453 | if not file_type.startswith('.'): 454 | file_type = '.' + file_type 455 | 456 | matching_files = [ 457 | file for file in all_files["files"] 458 | if file["type"].lower() == file_type.lower() 459 | ] 460 | else: 461 | matching_files = all_files["files"] 462 | 463 | return { 464 | "files": matching_files, 465 | "total_matches": len(matching_files), 466 | "file_type": file_type 467 | } 468 | 469 | except Exception as e: 470 | return {"error": f"Error finding files: {str(e)}"} 471 | 472 | if __name__ == "__main__": 473 | print("Starting MCP server...") 474 | print("MCP server is running.") 475 | mcp.run(transport='stdio') 476 | ```