# Directory Structure ``` ├── .github │ ├── dependabot.yaml │ └── workflows │ └── build.yml ├── .gitignore ├── docker-compose.yml ├── docs │ ├── images │ │ └── web-usage.png │ ├── search.md │ └── web.md ├── LICENSE ├── README.md └── server ├── .dockerignore ├── .python-version ├── Dockerfile ├── entrypoint.sh ├── mcp_server │ ├── __init__.py │ ├── __main__.py │ ├── server.py │ └── tools │ ├── __init__.py │ ├── helpers.py │ ├── search.py │ └── web.py ├── pyproject.toml ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── test_server.py │ └── test_web.py ├── tools.yaml └── uv.lock ``` # Files -------------------------------------------------------------------------------- /server/.python-version: -------------------------------------------------------------------------------- ``` 3.13 ``` -------------------------------------------------------------------------------- /server/.dockerignore: -------------------------------------------------------------------------------- ``` .dockerignore .git .github .gitignore .venv .ruff_cache .pytest_cache __pycache__ docs Dockerfile ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` # Byte-compiled / optimized / DLL files __pycache__/ .ruff_cache/ # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Temporary logs *.log # Sphinx documentation docs/_build/ ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown # MCP Server Give your AI assistants the power to help you more effectively. This server lets them safely access websites and search the web - with clear feedback about what's happening and helpful error messages when things go wrong. - [🛠️ What tools does this server offer?](#️-what-tools-does-this-server-offer) - [🏎️ How can I run it?](#️-how-can-i-run-it) - [🐋 Using Docker (recommended)](#-using-docker-recommended) - [💻 Running locally](#-running-locally) - [🔌 How to connect](#-how-to-connect) - [📚 Learn more about MCP](#-learn-more-about-mcp) - [📄 License](#-license) ## 🛠️ What tools does this server offer? The server provides two powerful tools that help AI assistants solve real-world problems: | Tool | What it can do | | ------------------ | --------------------------------------------------------------------------- | | [Search](docs/search.md) | Search the web via SearXNG for current information, specific resources, or to perform calculations. | | [Web](docs/web.md) | Access websites and process their content. Can convert pages to markdown for easy reading, get the raw content, or extract links. | ## 🏎️ How can I run it? ### 🐋 Using Docker (recommended) The server runs in Docker containers to keep things safe and simple. Here's how to get started: 1. [Install Docker](https://docs.docker.com/engine/install/) if you haven't already 2. Create a file called `docker-compose.yml` with: ```yaml:docker-compose.yml services: mcp-server: environment: # Required: URL for your SearXNG instance's Search API - SEARXNG_QUERY_URL=http://searxng:8080 # Optional: Configure network mode (SSE) for LibreChat etc. - SSE_HOST=0.0.0.0 - SSE_PORT=8080 # Optional: Set a custom User-Agent for web requests - USER_AGENT=MCP-Server/1.0 (github.com/tcpipuk/mcp-server) image: ghcr.io/tcpipuk/mcp-server/server:latest ports: # Only needed if using SSE_HOST/SSE_PORT - "8080:8080" # Expose port 8080 on host restart: unless-stopped stop_grace_period: 1s # Example SearXNG service (optional, adapt as needed) # searxng: # environment: # - SEARXNG_BASE_URL=http://searxng:8080 # Ensure SearXNG knows its own URL # image: searxng/searxng:latest # restart: unless-stopped # volumes: # - ./searxng:/etc/searxng:rw ``` > **Important**: You *must* provide the `SEARXNG_QUERY_URL` environment variable, pointing to > the Search API endpoint of your SearXNG instance (usually ending in `/` or `/search`). > > Setting `SSE_HOST` and `SSE_PORT` enables network mode (Server-Sent Events), recommended for > multi-container setups like LibreChat. If omitted, the server uses standard I/O. 3. Run `docker compose up -d` to start the server container (and optionally SearXNG). Most people use this with either: - [Claude Desktop](https://modelcontextprotocol.io/quickstart/user) - connects directly via stdio (omit `SSE_HOST`/`SSE_PORT` in `docker-compose.yml`). - [LibreChat](https://www.librechat.ai/docs/local) - connects over the network via SSE. For LibreChat, add this to your `librechat.yaml` (assuming `SSE_PORT=8080`): ```yaml:librechat.yaml mcpServers: mcp-server: iconPath: "/path/to/icon.png" # Optional: Custom icon label: "MCP Web/Search" # Optional: Custom label shown in UI type: sse url: http://mcp-server:8080/sse # Adjust host/port if needed ``` ### 💻 Running locally 1. Install `uv` (requires Python 3.13+): ```bash curl -LsSf https://astral.sh/uv/install.sh | sh ``` > **Note:** If you already have `uv` installed, update it with `uv self update`. 2. Create and activate a virtual environment: ```bash uv venv source .venv/bin/activate # Linux/macOS # or .venv\Scripts\activate # Windows ``` 3. Install dependencies from the lockfile: ```bash uv sync ``` 4. Set required environment variables: ```bash # Required: URL for your SearXNG instance's Search API export SEARXNG_QUERY_URL="http://your-searxng-instance.local:8080" # Optional: Custom User-Agent export USER_AGENT="CustomAgent/1.0" ``` 5. Run the server: ```bash # For network (SSE) mode (e.g., for LibreChat) mcp-server --sse-host 0.0.0.0 --sse-port 3001 # For direct stdio mode (e.g., for Claude Desktop) mcp-server ``` Available arguments: - `--sse-host`: SSE listening address (e.g., `0.0.0.0`). Enables SSE mode. - `--sse-port`: SSE listening port (e.g., `3001`). Enables SSE mode. - `--user-agent`: Custom User-Agent string (overrides `USER_AGENT` env var). > **Note**: If neither `--sse-host` nor `--sse-port` are provided (and `SSE_HOST`/`SSE_PORT` env > vars are not set), the server defaults to `stdio` mode. The `SEARXNG_QUERY_URL` environment > variable is *always* required. ## 🔌 How to connect You can connect to the server in two ways: | Method | What it means | When to use it | | ------------------------- | ------------------------------------------------------- | ----------------------------------------------- | | Network connection (SSE) | The server listens on a network port for connections. | Best for LibreChat or other networked clients. | | Direct connection (stdio) | The server communicates directly via standard input/out. | Useful for local testing or Claude Desktop. | ## 📚 Learn more about MCP Here are a few resources to get you started: - [MCP Specification](https://spec.modelcontextprotocol.io/) - [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk) - [MCP Example Servers](https://github.com/modelcontextprotocol/servers) ## 📄 License This project is licensed under the GPLv3. See the [LICENSE](LICENSE) file for full details. ``` -------------------------------------------------------------------------------- /server/tests/__init__.py: -------------------------------------------------------------------------------- ```python """Test suite for MCP Server.""" ``` -------------------------------------------------------------------------------- /server/entrypoint.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash set -e # Then run the main command if [ "$BUILD_ENV" = "dev" ]; then pytest -v --log-cli-level=INFO tests/ else exec mcp-server fi ``` -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- ```yaml version: 2 updates: - package-ecosystem: "pip" directory: "/" schedule: interval: "weekly" - package-ecosystem: "docker" directory: "/" schedule: interval: "weekly" - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" ``` -------------------------------------------------------------------------------- /server/mcp_server/__init__.py: -------------------------------------------------------------------------------- ```python """MCP Fetch Server module for handling web content retrieval. This module provides HTTP fetching capabilities for the Model Context Protocol (MCP) framework, allowing models to retrieve and process web content in a controlled manner. """ from __future__ import annotations from .__main__ import main from .server import MCPServer __all__ = ["MCPServer", "main"] ``` -------------------------------------------------------------------------------- /server/mcp_server/tools/__init__.py: -------------------------------------------------------------------------------- ```python """Tools submodule package for mcp_server. Provides tools that let AI assistants safely interact with external systems: - search: Use SearXNG's search API to find information on the web - web: Access and process web content with support for markdown conversion and link extraction Each tool is designed to handle errors gracefully and provide clear feedback to help AI assistants solve problems independently. """ from __future__ import annotations from .search import tool_search from .web import tool_web __all__ = ["tool_search", "tool_web"] ``` -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- ```yaml services: mcp-server: build: context: ./server dockerfile: Dockerfile environment: - SSE_HOST=0.0.0.0 - SSE_PORT=8080 - SANDBOX_SOCKET=/run/sandbox/shell.sock - USER_AGENT=CustomAgent/1.0 volumes: - sandbox_sockets:/run/sandbox image: ghcr.io/tcpipuk/mcp-server:latest networks: - mcp_net restart: unless-stopped stop_grace_period: 1s sandbox: build: context: ./sandbox dockerfile: Dockerfile environment: - SANDBOX_SOCKET=/run/sandbox/shell.sock image: ghcr.io/tcpipuk/mcp-sandbox:latest volumes: - sandbox_home:/home/sandbox - sandbox_sockets:/run/sandbox networks: - mcp_net restart: unless-stopped volumes: sandbox_home: ``` -------------------------------------------------------------------------------- /server/tests/conftest.py: -------------------------------------------------------------------------------- ```python """Configure pytest for the test suite.""" from __future__ import annotations from asyncio import create_subprocess_exec, sleep as asyncio_sleep from os import setsid as os_setsid from typing import TYPE_CHECKING import pytest import pytest_asyncio if TYPE_CHECKING: from collections.abc import AsyncGenerator @pytest.fixture(autouse=True) def _setup_test_env() -> None: """Set up test environment variables and cleanup.""" @pytest_asyncio.fixture async def sandbox_server(unused_tcp_port: int) -> AsyncGenerator[tuple[str, int]]: """Create a socat-based TCP server for sandbox testing. Yields: Tuple of (host, port) for the test server """ # Start socat in the background, echoing input back process = await create_subprocess_exec( "/usr/bin/socat", f"TCP-LISTEN:{unused_tcp_port},reuseaddr,fork", "EXEC:'bash -i',pty,stderr,setsid,sigint,sane", preexec_fn=os_setsid, ) # Give socat a moment to start up await asyncio_sleep(0.2) try: yield "127.0.0.1", unused_tcp_port finally: process.terminate() await process.wait() ``` -------------------------------------------------------------------------------- /server/Dockerfile: -------------------------------------------------------------------------------- ```dockerfile # Build stage using uv with a frozen lockfile and dependency caching FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim AS uv WORKDIR /app # Enable bytecode compilation and copy mode ENV UV_COMPILE_BYTECODE=1 \ UV_LINK_MODE=copy # Install dependencies using the lockfile and settings COPY pyproject.toml uv.lock ./ RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --frozen --no-install-project ${BUILD_ENV:+"--dev"} --no-editable # Add the source code and install main project dependencies COPY . . RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --frozen ${BUILD_ENV:+"--dev"} --no-editable # Prepare runtime image FROM python:3.13-slim-bookworm AS runtime WORKDIR /app # Set default build environment ARG BUILD_ENV=prod # Install minimal system dependencies and create runtime user RUN apt-get update \ && apt-get install -y --no-install-recommends socat \ && rm -rf /var/lib/apt/lists/* \ && groupadd -g 1000 appuser \ && useradd -u 1000 -g 1000 -m appuser # Copy only necessary files from build stage COPY --from=uv --chown=appuser:appuser /app/ . # Switch to non-root user # Set environment variables for runtime USER appuser ENV PATH="/app/.venv/bin:$PATH" \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 # Use wrapper script to handle startup ENTRYPOINT ["/app/entrypoint.sh"] ``` -------------------------------------------------------------------------------- /server/pyproject.toml: -------------------------------------------------------------------------------- ```toml [project] name = "mcp-server" version = "0.1.0" description = "Provides tools to clients over the Model Context Protocol, supporting both stdio and SSE" requires-python = ">=3.13" authors = [{ name = "Tom Foster" }] maintainers = [{ name = "Tom Foster", email = "[email protected]" }] keywords = ["http", "mcp", "llm", "automation"] license = { text = "GPLv3" } classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.13", ] dependencies = [ "aiohttp>=3.11.12", "beautifulsoup4>=4.13.3", "mcp>=1.2.1", "pyyaml>=6.0.2", "trafilatura>=2.0.0", "uvicorn>=0.34.0", ] [project.scripts] mcp-server = "mcp_server:main" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.pytest.ini_options] addopts = "-ra -v" asyncio_mode = "strict" asyncio_default_fixture_loop_scope = "session" cache_dir = "/tmp/.pytest_cache" filterwarnings = [ "ignore:assertions not in test modules or plugins will be ignored:pytest.PytestConfigWarning", ] testpaths = "tests" [tool.ruff] cache-dir = "/tmp/.cache/ruff" fix = true line-length = 110 target-version = "py313" unsafe-fixes = true [tool.ruff.format] skip-magic-trailing-comma = true [tool.ruff.lint] select = ["ALL"] ignore = ["COM812", "CPY", "D203", "D213", "FBT", "RUF029"] [tool.ruff.lint.isort] combine-as-imports = true required-imports = ["from __future__ import annotations"] split-on-trailing-comma = false [tool.ruff.lint.per-file-ignores] "tests/*" = ["ARG001"] [tool.ruff.lint.pydocstyle] convention = "google" [tool.uv] dev-dependencies = [ "psutil>=7.0.0", "pytest>=8.3.4", "pytest-asyncio>=0.25.3", "ruff>=0.9.6", ] ``` -------------------------------------------------------------------------------- /server/mcp_server/__main__.py: -------------------------------------------------------------------------------- ```python """Command-line entry point for the MCP fetch server. Provides configuration options for running the fetch server, including customisation of the User-Agent string for HTTP requests. The server runs asynchronously to handle concurrent requests efficiently. """ from __future__ import annotations from argparse import ArgumentParser from asyncio import CancelledError, run as asyncio_run from contextlib import suppress as contextlib_suppress from os import environ as os_environ from pathlib import Path from yaml import safe_load as yaml_safe_load from .server import MCPServer from .tools import tool_search, tool_web def main() -> None: """Provide command-line entrypoint for the MCP fetch server.""" parser = ArgumentParser(description="Give your LLM access to external tools") parser.add_argument("--sandbox", type=str, help="TCP host:port pair (e.g. mcp-sandbox:8080)") parser.add_argument("--sse-host", type=str, help="SSE listening address (e.g. 0.0.0.0)") parser.add_argument("--sse-port", type=int, help="SSE listening port (e.g. 3001)") parser.add_argument("--user-agent", type=str, help="Custom User-Agent string") parser.add_argument("--searxng-query-url", type=str, help="URL for SearXNG search endpoint") args = parser.parse_args() if args.sandbox: os_environ["SANDBOX"] = args.sandbox if args.sse_host: os_environ["SSE_HOST"] = args.sse_host if args.sse_port: os_environ["SSE_PORT"] = str(args.sse_port) if args.user_agent: os_environ["USER_AGENT"] = args.user_agent if args.searxng_query_url: os_environ["SEARXNG_QUERY_URL"] = args.searxng_query_url config = yaml_safe_load(Path("tools.yaml").read_text(encoding="utf-8")) config["tools"]["search"]["method"] = tool_search config["tools"]["web"]["method"] = tool_web # Remove the sandbox tool if there's no sandbox if not os_environ.get("SANDBOX") and "sandbox" in config["tools"]: del config["tools"]["sandbox"] server = MCPServer(config) with contextlib_suppress(KeyboardInterrupt, CancelledError): asyncio_run(server.serve()) if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /docs/search.md: -------------------------------------------------------------------------------- ```markdown # Search Tool - [Capabilities](#capabilities) - [Refining the Search](#refining-the-search) - [The Query (`q`)](#the-query-q) - [Filtering by Time (`time_range`)](#filtering-by-time-time_range) - [Content Safety (`safesearch`)](#content-safety-safesearch) - [Technical Details](#technical-details) Provides the AI assistant with web search capabilities via a SearXNG instance. It allows the AI to fetch current information, look up specific resources, and perform other search-related tasks. ## Capabilities The tool enables the AI assistant to perform tasks requiring external information lookup, such as: - Finding details on current events or recent developments. - Retrieving specific technical documentation or code examples. - Searching for various online content types (e.g., images, news). - Accessing specialised resources like scientific papers, package repositories (PyPI, npm), or Q&A sites (Stack Exchange). - Using WolframAlpha for calculations or fetching random data (UUIDs, numbers). - Calculating text hashes. ## Refining the Search The AI can tailor searches using the available parameters: ### The Query (`q`) The primary search input. Supports standard queries and SearXNG's specific syntax: - **Bang Prefixes (`!`):** Focuses the search on categories or engines (e.g. `!news`, `!images`, `!it`, `!repos`, `!pypi`, `!wa`, `!re`). Prefixes can be chained (e.g., `!it !q&a python async`). - **Keywords (No `!`):** Executes specific actions like calculations (`avg 1 2 3`), random data generation (`random uuid`), or hashing (`sha512 text`). ### Filtering by Time (`time_range`) Restricts results to a specific period (`day`, `month`, `year`), where supported by the underlying SearXNG engines. ### Content Safety (`safesearch`) Adjusts the filtering level for potentially explicit content: `0` (Off), `1` (Moderate - default), or `2` (Strict), engine permitting. ## Technical Details Key aspects of the tool's operation: - **Backend:** Relies on the SearXNG instance specified by the server's `SEARXNG_QUERY_URL` environment variable. - **Output Format:** Returns results exclusively in JSON format for straightforward parsing by the AI. - **Request Handling:** Uses the common `get_request` helper function (shared with the `web` tool) for managing HTTP requests, including redirects, timeouts, and connection errors. Errors are reported back to the AI. - **Parameter Exposure:** Only the parameters defined in `tools.yaml` (`q`, `time_range`, `safesearch`) are available to the AI. This tool gives the AI assistant a mechanism to query a SearXNG instance, enabling access to real-time web information and specialised search functions. ``` -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- ```yaml name: Build MCP Server concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true on: pull_request: push: paths: - "server/**" - ".github/workflows/build-server.yml" workflow_dispatch: permissions: contents: read packages: write pull-requests: write actions: write jobs: test: name: Pytest runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v5 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build test image uses: docker/build-push-action@v6 env: DOCKER_BUILD_SUMMARY: false DOCKER_BUILD_RECORD_UPLOAD: false with: context: server load: true build-args: | BUILD_ENV=dev tags: mcp-server:test cache-from: type=gha cache-to: type=gha,mode=max - name: Run tests and output results run: | set -o pipefail docker run --rm -e BUILD_ENV=dev mcp-server:test | tee pytest_output.txt exit_code=${PIPESTATUS[0]} echo '```' >> "$GITHUB_STEP_SUMMARY" cat pytest_output.txt >> "$GITHUB_STEP_SUMMARY" echo '```' >> "$GITHUB_STEP_SUMMARY" exit $exit_code build: name: Docker build needs: test runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v5 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Generate Docker metadata id: meta uses: docker/metadata-action@v5 with: context: workflow images: | name=ghcr.io/${{ github.repository }}/server tags: | type=raw,value=latest,enable={{is_default_branch}} type=ref,event=branch type=ref,event=pr type=ref,event=tag type=sha,enable={{is_default_branch}},prefix=${{ github.event_name == 'pull_request' && 'pr-' || github.ref_name }}- - name: Log in to GitHub Container Registry if: github.event_name != 'pull_request' uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push production image uses: docker/build-push-action@v6 env: DOCKER_BUILD_RECORD_UPLOAD: false with: context: server push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} build-args: | BUILD_ENV=prod cache-from: type=gha cache-to: type=gha,mode=max ``` -------------------------------------------------------------------------------- /server/mcp_server/tools/helpers.py: -------------------------------------------------------------------------------- ```python """Helper functions for the MCP fetch server tools. Provides shared utilities for error handling and web content retrieval: - Error formatting: Consistent XML-style error messages for AI parsing - HTTP client: Robust web content fetching with configurable User-Agent - Error handling: Detailed error messages for common network issues All functions handle errors gracefully and provide clear feedback to help AI assistants understand and resolve issues independently. """ from __future__ import annotations from os import getenv as os_getenv from aiohttp import ( ClientConnectionError, ClientError, ClientResponseError, ClientSession as AiohttpClientSession, ServerTimeoutError, TooManyRedirects, ) from mcp.shared.exceptions import McpError from mcp.types import INTERNAL_ERROR, ErrorData def add_error(text: str, error: str, append: bool = True) -> str: """Append an error message to the string. Args: text: The string to append the error to. error: The error message to append. append: Whether to append or prepend the error. Returns: The string with the error message appended. """ return f"{text}\n\n<error>{error}</error>" if append else f"<error>{error}</error>\n\n{text}" async def get_request(url: str) -> str: """Fetch content from a URL asynchronously. Args: url: The URL to fetch. Returns: The fetched content as a string. Raises: McpError: If fetching or processing fails. """ errmsg: str = "" try: async with AiohttpClientSession( headers={ "User-Agent": os_getenv("USER_AGENT") or "Mozilla/5.0 (X11; Linux i686; rv:135.0) Gecko/20100101 Firefox/135.0" } ) as session: response = await session.get(url) response_text = (await response.text()).strip() if response.ok: if response_text: return response_text errmsg = f"Failed to fetch {url}: HTTP {response.status} with empty body" else: errmsg = f"Failed to fetch {url}: HTTP {response.status} ({response.reason})" except ServerTimeoutError as err: errmsg = f"Timeout while fetching {url}: {str(err)!r}" except ClientConnectionError as err: errmsg = f"Failed to connect to {url}: {str(err)!r}" except TooManyRedirects as err: errmsg = f"Too many redirects while fetching {url}: {str(err)!r}" except ClientResponseError as err: errmsg = f"HTTP error while fetching {url}: {err.status} - {err.message}" except ClientError as err: errmsg = f"Network error while fetching {url}: {str(err)!r}" except Exception as err: # noqa: BLE001 errmsg = f"Unexpected error while fetching {url}: {str(err)!r}" raise McpError(ErrorData(code=INTERNAL_ERROR, message=errmsg)) ``` -------------------------------------------------------------------------------- /server/mcp_server/tools/search.py: -------------------------------------------------------------------------------- ```python """Provide a tool to query a SearXNG instance. Allows AI assistants to search the web using a configured SearXNG instance, leveraging its API for targeted and filtered searches. """ from __future__ import annotations from os import getenv as os_getenv from typing import Any from urllib.parse import urlencode from mcp.shared.exceptions import McpError from mcp.types import INTERNAL_ERROR, INVALID_PARAMS, ErrorData from .helpers import get_request # Allowed parameters for the SearXNG API, excluding 'q' which is handled separately. ALLOWED_PARAMS: set[str] = { "categories", "engines", "language", "pageno", "time_range", "format", "safesearch", } async def tool_search(q: str, **kwargs: Any) -> str: """Query a SearXNG instance using its Search API. Args: q: The search query string. **kwargs: Additional optional parameters for the SearXNG API (categories, engines, language, pageno, time_range, format, safesearch). Returns: The search results as a string (content depends on the 'format' parameter). Raises: McpError: If the SEARXNG_QUERY_URL environment variable is not set, if invalid parameters are provided, or if the request fails. """ searxng_url = os_getenv("SEARXNG_QUERY_URL") if not searxng_url: raise McpError( ErrorData(code=INTERNAL_ERROR, message="SearXNG query URL is not configured on the server.") ) # Filter out any provided kwargs that are not valid SearXNG parameters search_params = {k: v for k, v in kwargs.items() if k in ALLOWED_PARAMS and v is not None} search_params["q"] = q # Add the mandatory query # Default format to json if not specified, as it's often easiest for programmatic use if "format" not in search_params: search_params["format"] = "json" # Validate format if provided if search_params["format"] not in ("json", "csv", "rss"): raise McpError( ErrorData( code=INVALID_PARAMS, message=f"Invalid format '{search_params['format']}'. Must be 'json', 'csv', or 'rss'.", ) ) query_string = urlencode(search_params) full_url = f"{searxng_url}?{query_string}" try: # Use the existing get_request helper result = await get_request(full_url) # Simple check for empty result which might indicate no results found # depending on the format requested. SearXNG JSON format includes metadata even for no results. if not result and search_params["format"] != "json": return f"No results found for query '{q}' with specified parameters." except McpError as e: # Re-raise McpError to ensure it's handled correctly by the server raise McpError(ErrorData(code=e.data.code, message=f"SearXNG query failed: {e.data.message}")) from e except Exception as e: # Catch any other unexpected errors during the request raise McpError( ErrorData(code=INTERNAL_ERROR, message=f"Unexpected error during SearXNG query: {e!r}") ) from e else: return result ``` -------------------------------------------------------------------------------- /server/tools.yaml: -------------------------------------------------------------------------------- ```yaml tools: search: description: > Use this tool to access SearXNG to search the internet for current information or to perform calculations. Use this tool when the user asks about recent events, technical details, to find content, or your task requires calculations. If the search summary doesn't clearly answer the question, you can read one of the search results by providing the URL to the `web` tool, or use this `search` tool again to make further narrower requests to gain context to help your answer. inputSchema: type: object properties: q: type: string description: | SearXNG search query. Use `!` prefixes for categories/engines (chainable to search multiple sources) followed by your query: - General: `!news <query>`, `!map <place>`, `!images <keywords>` - Multimedia: `!videos` (PeerTube/Vimeo/YouTube), `!music` (Bandcamp/SoundCloud/YouTube), `!lyrics`, `!yt` (YouTube specific) - Files: `!files` (books/apps/torrents), `!1337x` or `!kc` or `!solid` or `!tpb` (Torrents), `!gpa` (Google Play), `!wcf` (Wikimedia Commons) - IT/Dev: `!it` (all tech), `!repos` (Git repos), `!dh` (Docker Hub), `!q&a` (Stack Ex.), `!mdn` (Web Docs), `!software_wikis` (Linux/dev wikis) - Packages: `!pypi` (Python), `!npm` (Node), `!crates` or `!lrs` (Rust), `!alp` (Alpine Linux) - Science/Compute: `!scientific_publications` (arXiv/PubMed/etc), `!wa` (WolframAlpha calculations/facts/definitions) - Social: `!re` (Reddit) Special keywords (no `!`): - Stats: `avg 1 2 3`, `max`, `min`, `sum`, `prod` - Random: `random color`, `random int`, `random string`, `random uuid` - Hash: `sha512 text` time_range: type: string enum: ["day", "month", "year"] description: Filter results by time range if supported safesearch: type: integer enum: [0, 1, 2] description: Safe search level (0=Off, 1=Moderate, 2=Strict) if supported default: 1 required: ["q"] web: description: > Use this tool to access live web pages using their URL. This is crucial for providing users with accurate information from up-to-date sources. You will typically want to use `markdown` to read content, or use 'links' mode to extract hyperlinks to find related pages on a site, e.g. for navigating documentation. inputSchema: type: object properties: url: type: string description: URL to access - must be a complete and valid web address. mode: type: string enum: - markdown - raw - links description: | Processing mode: - `markdown` (default) for clean readable text - `links` to list all hyperlinks - `raw` for unprocessed content (code, JSON, etc) default: markdown max_length: type: integer description: Optional character limit for the response (0 = no limit). default: 0 required: ["url"] ``` -------------------------------------------------------------------------------- /server/tests/test_web.py: -------------------------------------------------------------------------------- ```python """Test the web content retrieval and processing tools.""" from __future__ import annotations import pytest from mcp.shared.exceptions import McpError from mcp_server.tools.web import ProcessingMode, WebProcessor, tool_web @pytest.fixture def mock_html_content() -> str: """Return sample HTML content for testing. Returns: Sample HTML content as a string """ return """ <html> <body> <h1>Test Page</h1> <p>This is a test paragraph.</p> <a href="https://example.com">Example Link</a> <a href="/relative/path">Relative Link</a> <a href="#skip">Skip Link</a> <a href="javascript:void(0)">JavaScript Link</a> </body> </html> """ def test_processing_mode_from_str() -> None: """Test conversion of strings to ProcessingMode enum values.""" if ProcessingMode.from_str("markdown") != ProcessingMode.MARKDOWN: pytest.fail("Failed to convert 'markdown' to ProcessingMode.MARKDOWN") if ProcessingMode.from_str("raw") != ProcessingMode.RAW: pytest.fail("Failed to convert 'raw' to ProcessingMode.RAW") if ProcessingMode.from_str("links") != ProcessingMode.LINKS: pytest.fail("Failed to convert 'links' to ProcessingMode.LINKS") if ProcessingMode.from_str("invalid") != ProcessingMode.RAW: pytest.fail("Failed to convert invalid mode to ProcessingMode.RAW") @pytest.mark.asyncio async def test_web_processor_links(monkeypatch: pytest.MonkeyPatch, mock_html_content: str) -> None: """Test extraction and formatting of links from web content.""" async def mock_get_request(_url: str) -> str: return mock_html_content monkeypatch.setattr("mcp_server.tools.web.get_request", mock_get_request) processor = WebProcessor("https://test.com", mode=ProcessingMode.LINKS) result = await processor.process() if "Example Link: https://example.com" not in result: pytest.fail(f"Missing absolute link in output: {result}") if "https://test.com/relative/path" not in result: pytest.fail(f"Missing resolved relative link in output: {result}") if "#skip" in result: pytest.fail(f"Found invalid anchor link in output: {result}") if "javascript:void(0)" in result: pytest.fail(f"Found invalid JavaScript link in output: {result}") @pytest.mark.asyncio async def test_web_processor_markdown(monkeypatch: pytest.MonkeyPatch) -> None: """Test conversion of HTML content to markdown format.""" async def mock_get_request(_url: str) -> str: return """ <!DOCTYPE html> <html> <head><title>Test Page</title></head> <body> <article> <h1>Test Heading</h1> <p>This is a test paragraph with some <strong>bold text</strong>.</p> <p>And another paragraph for good measure.</p> </article> </body> </html> """ monkeypatch.setattr("mcp_server.tools.web.get_request", mock_get_request) processor = WebProcessor("https://test.com", mode=ProcessingMode.MARKDOWN) result = await processor.process() if "Test Heading" not in result: pytest.fail(f"Missing heading content in output: {result}") if "test paragraph" not in result: pytest.fail(f"Missing paragraph content in output: {result}") @pytest.mark.asyncio async def test_max_length_limit() -> None: """Test truncation of content based on max_length parameter.""" processor = WebProcessor("https://test.com", max_length=10) content = "This is a very long text that should be truncated" truncated = processor._format_links({"https://test.com": content}) # noqa: SLF001 if len(truncated) > processor.max_length + 100: # Allow for header text pytest.fail(f"Content exceeds max length: {len(truncated)} > {processor.max_length + 100}") @pytest.mark.asyncio async def test_invalid_url() -> None: """Test error handling for invalid URLs.""" try: await tool_web("not-a-url") pytest.fail("Expected McpError for invalid URL") except McpError: pass @pytest.mark.asyncio async def test_empty_links() -> None: """Test error handling when no links are found.""" processor = WebProcessor("https://test.com", mode=ProcessingMode.LINKS) try: processor._format_links({}) # noqa: SLF001 pytest.fail("Expected McpError for empty links") except McpError: pass ``` -------------------------------------------------------------------------------- /docs/web.md: -------------------------------------------------------------------------------- ```markdown # Web Tool 1. [What can it do?](#what-can-it-do) 2. [Processing Modes](#processing-modes) 1. [Markdown Mode (default)](#markdown-mode-default) 2. [Links Mode](#links-mode) 3. [Raw Mode](#raw-mode) 3. [Features and Limits](#features-and-limits) 1. [Content Management](#content-management) 2. [Safety Features](#safety-features) A tool that lets AI assistants access and process web content safely. It can convert pages to markdown, extract links, or get raw content - helping the AI give you more accurate, up-to-date information.  ## What can it do? When you're discussing documentation, researching solutions, or need current information, the AI can access web content to help. It's particularly useful when you want to: - Get the latest documentation for a library or tool - Find code examples that match your specific needs - Navigate through complex documentation structures - Verify that advice is current and accurate The tool handles all the technical details like following redirects, handling errors, and cleaning up messy HTML. You just point the AI at a URL, and it'll bring back the information in a format that's easy to work with. ## Processing Modes The AI can process web content in three different ways, each designed for specific needs: ### Markdown Mode (default) Most of the time, you'll want clean, readable content without the clutter of web formatting. Markdown mode automatically removes adverts, navigation menus, and other distractions, focusing on the actual content you care about. It preserves important elements like headings, lists, tables, and images, converting them into clean markdown that's easy to read. If something goes wrong with the conversion, the tool automatically falls back to raw content, letting the AI still help you even if the page is unusually formatted. Example output: ```markdown Contents of https://example.com/article: # Main Heading Article content in clean markdown format... ## Subheadings preserved * Lists kept intact * With proper formatting  | Tables | Converted | |--------|-----------| | To | Markdown | ``` ### Links Mode When you're exploring documentation or need to navigate through a website, links mode helps map out the available paths. It finds all the links on a page, converts them to absolute URLs so they always work, and shows you the text used to describe each link. This is particularly helpful when you need to: - Navigate through multi-page documentation - Find related articles or resources - Locate specific sections in large documents - Build a map of available information The AI orders links by relevance, filters out noise like social media buttons, and gives you a clean list of where you can go next. Example output: ```markdown All 45 links found on https://example.com - Home: https://example.com/ - Products: https://example.com/products - About Us: https://example.com/about ... ``` ### Raw Mode Sometimes you need the original, unprocessed content - particularly when working with APIs, downloading code, or accessing structured data. Raw mode gives you exactly what the server sends, while still handling things like authentication, redirects, and error handling behind the scenes. ## Features and Limits The tool includes several features to make web access both powerful and safe: ### Content Management The AI can handle content of any size, but you can control how much it processes at once. Setting a length limit helps when you're working with large documents or want to focus on specific sections. You'll always get complete sentences and properly formatted content, with clear warnings if anything gets truncated. If something goes wrong - whether it's a network issue, an authentication problem, or just an unusually formatted page - you'll get clear, actionable error messages explaining what happened and often suggesting how to fix it. ### Safety Features Behind the scenes, the tool uses industrial-strength libraries like `trafilatura` and `BeautifulSoup` to handle web content safely. It carefully processes URLs, headers, and content to prevent common issues, while giving you the flexibility to access the resources you need. The tool strikes a careful balance - giving AI assistants broad access to web content while maintaining security and providing clear feedback. This means you can confidently point the AI at documentation or resources, knowing it'll handle the technical details and bring back exactly what you need. ``` -------------------------------------------------------------------------------- /server/mcp_server/server.py: -------------------------------------------------------------------------------- ```python """Core MCPServer implementation for the MCP fetch service. Provides a generic MCPServer class for serving MCP requests. Allows drop-in tool support by mapping tool functions to configuration loaded from an external YAML file. """ from __future__ import annotations from dataclasses import dataclass, field from os import getenv as os_getenv from pathlib import Path from typing import TYPE_CHECKING, Any from mcp.server import Server as BaseMCPServer from mcp.server.sse import SseServerTransport from mcp.server.stdio import stdio_server from mcp.shared.exceptions import McpError from mcp.types import INVALID_PARAMS, ErrorData, TextContent, Tool from starlette.applications import Starlette from starlette.routing import Mount, Route from uvicorn import Config as UvicornConfig, Server as UvicornServer if TYPE_CHECKING: from starlette.requests import Request from starlette.responses import Response # Default path for tool configuration YAML file DEFAULT_TOOL_CONFIG_PATH = Path(__file__).parent / "tools.yaml" @dataclass(slots=True) class MCPServer: """Define a generic MCP server class with drop-in tool support.""" config: dict[str, Any] server: BaseMCPServer = field(init=False) server_name: str = field(default="mcp-server") tools: list[Tool] = field(default_factory=list) def __post_init__(self) -> None: """Initialise the MCPServer.""" if self.config.get("server", {}).get("name"): self.server_name = self.config["server"]["name"] # Create MCP server instance self.server = BaseMCPServer(self.server_name) # Build the tool registry and tool list self.tools = [ Tool(name=name, **{k: v for k, v in tool.items() if k != "method"}) for name, tool in self.config["tools"].items() ] # Register the tool listing/calling methods self.server.list_tools()(self.list_tools) self.server.call_tool()(self.call_tool) async def list_tools(self) -> list[Tool]: """Return a list of available tools. Returns: A list of Tool objects representing the available tools. """ return self.tools async def call_tool(self, name: str, arguments: dict) -> list[TextContent]: """Call the tool specified by name with provided arguments. Returns: A list of TextContent objects containing the tool's result Raises: McpError: If the tool is unknown or fails to execute """ if name not in self.config["tools"]: raise McpError( ErrorData( code=INVALID_PARAMS, message=f"Tool '{name}' isn't available on this server anymore" ) ) if "method" not in self.config["tools"][name]: raise McpError( ErrorData( code=INVALID_PARAMS, message=( f"Tool '{name}' has no registered method: inform the user that their MCP " "server requires configuration to provide a function for this tool." ), ) ) try: result = await self.config["tools"][name]["method"](**arguments) return [TextContent(type="text", text=result)] except McpError as err: raise McpError(ErrorData(code=INVALID_PARAMS, message=str(err))) from err async def serve(self) -> None: """Run the MCP server, using either SSE or stdio mode.""" options = self.server.create_initialization_options() sse_host, sse_port = os_getenv("SSE_HOST"), os_getenv("SSE_PORT") if sse_host and sse_port: sse = SseServerTransport("/messages/") async def _handle_sse(request: Request) -> Response | None: """Handle incoming SSE connection.""" async with sse.connect_sse( request.scope, request.receive, request._send, # noqa: SLF001 ) as streams: await self.server.run(streams[0], streams[1], options, raise_exceptions=True) starlette_app = Starlette( debug=True, routes=[ Route("/sse", endpoint=_handle_sse), Mount("/messages/", app=sse.handle_post_message), ], ) config = UvicornConfig(app=starlette_app, host=sse_host, port=int(sse_port), log_level="info") server_instance = UvicornServer(config) await server_instance.serve() else: async with stdio_server() as (read_stream, write_stream): await self.server.run(read_stream, write_stream, options, raise_exceptions=True) ``` -------------------------------------------------------------------------------- /server/mcp_server/tools/web.py: -------------------------------------------------------------------------------- ```python """Provide tools to retrieve and process web content. Helps AI assistants access and understand web content through three processing modes: - markdown: Converts HTML to clean, readable markdown (default) - links: Extracts and formats hyperlinks with their anchor text - raw: Returns unprocessed content for APIs or non-HTML resources Features include: - Smart content extraction focusing on main text - Link processing with relative URL resolution - Configurable length limits - Detailed error messages for common issues """ from __future__ import annotations from collections import Counter from dataclasses import dataclass, field from enum import Enum from typing import Final from urllib.parse import urljoin from bs4 import BeautifulSoup, Tag from bs4.filter import SoupStrainer from mcp.shared.exceptions import McpError from mcp.types import INTERNAL_ERROR, ErrorData from trafilatura import extract as trafilatura_extract from .helpers import add_error, get_request class ProcessingMode(Enum): """Define valid content processing modes.""" MARKDOWN = "markdown" RAW = "raw" LINKS = "links" @classmethod def from_str(cls, mode: str) -> ProcessingMode: """Create ProcessingMode from string, defaulting to RAW if invalid. Args: mode: String representation of the processing mode Returns: ProcessingMode enum value """ try: return cls(mode.lower()) except ValueError: return cls.RAW SKIP_HREF_PREFIXES: Final = ("#", "javascript:") @dataclass(slots=True) class WebProcessor: """Handle web content retrieval and processing.""" url: str mode: ProcessingMode | str = field(default=ProcessingMode.MARKDOWN) max_length: int = field(default=0) def __post_init__(self) -> None: """Validate and correct inputs as needed.""" if isinstance(self.mode, str): self.mode = ProcessingMode.from_str(self.mode) self.max_length = max(self.max_length, 0) async def process(self) -> str: """Fetch and process the content according to the specified mode. Returns: Processed content as a string """ content = await get_request(self.url) match self.mode: case ProcessingMode.LINKS: return self._format_links(self._extract_links(content)) case ProcessingMode.MARKDOWN: extracted = trafilatura_extract( content, favor_recall=True, include_formatting=True, include_images=True, include_links=True, include_tables=True, output_format="markdown", with_metadata=True, ) or add_error(content, "Extraction to markdown failed; returning raw content", append=False) case ProcessingMode.RAW: extracted = content if self.max_length > 0 and len(extracted) > self.max_length: extracted = add_error( extracted[: self.max_length], f"Content truncated to {self.max_length} characters", append=True, ) return f"Contents of {self.url}:\n\n{extracted}" def _get_absolute_url(self, href: str) -> str | None: """Get the absolute URL from a relative or absolute href. Returns: Absolute URL or None if invalid """ stripped = href.strip() if not stripped or any(stripped.startswith(prefix) for prefix in SKIP_HREF_PREFIXES): return None return stripped if stripped.startswith(("http://", "https://")) else urljoin(self.url, stripped) def _extract_links(self, content: str) -> dict[str, str]: """Extract all valid links with their anchor text. Returns: Dictionary mapping each unique absolute URL to its first-found anchor text """ soup = BeautifulSoup(content, "html.parser", parse_only=SoupStrainer("a", href=True)) anchors = [a for a in soup.find_all("a", href=True) if isinstance(a, Tag)] valid_anchors = [ (a, url) for a in anchors if (href := a.get("href")) and isinstance(href, str) and (url := self._get_absolute_url(href)) ] url_counts = Counter(url for _, url in valid_anchors) return dict( sorted( { url: next(a.get_text(strip=True) for a, anchor_url in valid_anchors if anchor_url == url) for url in url_counts }.items(), key=lambda x: (-url_counts[x[0]], x[0]), ) ) def _format_links(self, links: dict[str, str]) -> str: """Format extracted links into a readable string. Args: links: Dictionary of URLs and their titles Returns: Formatted string of links Raises: McpError: If no links are found """ if not links: raise McpError( ErrorData( code=INTERNAL_ERROR, message=f"No links found on {self.url} - it may require JavaScript or auth.", ) ) total_links = len(links) formatted_links = [] length = 0 for url, title in links.items(): link_text = f"- {title}: {url}" if title else f"- {url}" new_length = length + len(link_text) + 1 if self.max_length > 0 and new_length > self.max_length: break formatted_links.append(link_text) length = new_length added_count = len(formatted_links) header = ( f"{added_count} of {total_links} links found on {self.url}" if added_count < total_links else f"All {total_links} links found on {self.url}" ) return f"{header}\n" + "\n".join(formatted_links) async def tool_web(url: str, mode: str = "markdown", max_length: int = 0) -> str: """Access and process web content from a given URL. Returns: Processed content as a string """ processor = WebProcessor(url=url, mode=mode, max_length=max_length) return await processor.process() ``` -------------------------------------------------------------------------------- /server/tests/test_server.py: -------------------------------------------------------------------------------- ```python """Test the MCP server initialization and configuration.""" from __future__ import annotations from os import environ as os_environ from pathlib import Path from typing import TYPE_CHECKING import pytest import pytest_asyncio from yaml import dump as yaml_dump, safe_load as yaml_safe_load from mcp_server.server import MCPServer from mcp_server.tools import tool_search, tool_web if TYPE_CHECKING: from collections.abc import Generator # Constants for testing MAX_DESCRIPTION_LENGTH = 1024 @pytest.fixture def mock_yaml_file(tmp_path: Path) -> Path: """Create a temporary tools.yaml file for testing. Args: tmp_path: Pytest fixture providing temporary directory Returns: Path to the temporary YAML file """ yaml_content = { "tools": { "search": { "description": "Test Search tool", "inputSchema": {"type": "object", "properties": {"query": {"type": "string"}}}, }, "web": { "description": "Test Web tool", "inputSchema": {"type": "object", "properties": {"url": {"type": "string"}}}, }, } } yaml_path = tmp_path / "tools.yaml" yaml_path.write_text(yaml_dump(yaml_content), encoding="utf-8") return yaml_path @pytest.fixture def server_env() -> Generator[None]: """Set up server environment variables for testing.""" os_environ["SANDBOX"] = "127.0.0.1:8080" os_environ["SSE_HOST"] = "127.0.0.1" os_environ["SSE_PORT"] = "3001" os_environ["USER_AGENT"] = "TestAgent/1.0" yield for key in ["SANDBOX", "SSE_HOST", "SSE_PORT", "USER_AGENT"]: if key in os_environ: del os_environ[key] @pytest_asyncio.fixture async def server(mock_yaml_file: Path) -> MCPServer: """Create a test server instance. Args: mock_yaml_file: Path to test YAML configuration Returns: Configured MCPServer instance """ config = yaml_safe_load(mock_yaml_file.read_text(encoding="utf-8")) config["tools"]["search"]["method"] = tool_search config["tools"]["web"]["method"] = tool_web return MCPServer(config) def test_yaml_loading(mock_yaml_file: Path) -> None: """Test that the YAML configuration can be loaded correctly.""" config = yaml_safe_load(mock_yaml_file.read_text(encoding="utf-8")) if "tools" not in config: pytest.fail("Missing 'tools' section in config") if "search" not in config["tools"]: pytest.fail("Missing 'search' tool in config") if "web" not in config["tools"]: pytest.fail("Missing 'web' tool in config") for tool_name in ("search", "web"): if "description" not in config["tools"][tool_name]: pytest.fail(f"Missing 'description' in {tool_name} tool config") description_length = len(config["tools"][tool_name]["description"]) if description_length > MAX_DESCRIPTION_LENGTH: pytest.fail( f"Description for tool '{tool_name}' is too long: " f"{description_length} characters (max {MAX_DESCRIPTION_LENGTH})" ) def test_server_initialisation(server: MCPServer) -> None: """Test that the server initializes with the correct tools.""" if not hasattr(server, "tools"): pytest.fail("Server missing tools attribute") tool_names = {tool.name for tool in server.tools} if "search" not in tool_names: pytest.fail("Server missing search tool") if "web" not in tool_names: pytest.fail("Server missing web tool") search_tool_config = server.config["tools"]["search"] web_tool_config = server.config["tools"]["web"] if search_tool_config.get("method") != tool_search: pytest.fail("Search tool has incorrect method") if web_tool_config.get("method") != tool_web: pytest.fail("Web tool has incorrect method") @pytest.mark.asyncio @pytest.mark.usefixtures("server_env") async def test_server_environment() -> None: """Test that environment variables are correctly set.""" if os_environ["SANDBOX"] != "127.0.0.1:8080": pytest.fail(f"Incorrect SANDBOX: {os_environ['SANDBOX']}") if os_environ["SSE_HOST"] != "127.0.0.1": pytest.fail(f"Incorrect SSE_HOST: {os_environ['SSE_HOST']}") if os_environ["SSE_PORT"] != "3001": pytest.fail(f"Incorrect SSE_PORT: {os_environ['SSE_PORT']}") if os_environ["USER_AGENT"] != "TestAgent/1.0": pytest.fail(f"Incorrect USER_AGENT: {os_environ['USER_AGENT']}") def test_live_tools_yaml_file() -> None: """Test that the live tools.yaml file is readable and contains required keys.""" # Determine the project root (assumed one level above the tests directory) project_root = Path(__file__).parent.parent tools_yaml_path = project_root / "tools.yaml" if not tools_yaml_path.exists(): pytest.fail(f"tools.yaml file not found at {tools_yaml_path}") config = yaml_safe_load(tools_yaml_path.read_text(encoding="utf-8")) if "tools" not in config: pytest.fail("Missing 'tools' section in live tools.yaml") for tool in ("search", "web"): if tool not in config["tools"]: pytest.fail(f"Missing '{tool}' configuration in live tools.yaml") if "inputSchema" not in config["tools"][tool]: pytest.fail(f"Missing 'inputSchema' for tool '{tool}' in live tools.yaml") def test_tool_description_length() -> None: """Test that tool descriptions don't exceed the OpenAI API limit of 1024 characters.""" # Determine the project root (assumed one level above the tests directory) project_root = Path(__file__).parent.parent tools_yaml_path = project_root / "tools.yaml" if not tools_yaml_path.exists(): pytest.fail(f"tools.yaml file not found at {tools_yaml_path}") config = yaml_safe_load(tools_yaml_path.read_text(encoding="utf-8")) if "tools" not in config: pytest.fail("Missing 'tools' section in tools.yaml") for tool_name, tool_config in config["tools"].items(): if "description" not in tool_config: pytest.fail(f"Missing 'description' for tool '{tool_name}' in tools.yaml") description_length = len(tool_config["description"]) if description_length > MAX_DESCRIPTION_LENGTH: pytest.fail( f"Description for tool '{tool_name}' is too long: " f"{description_length} characters (max {MAX_DESCRIPTION_LENGTH})" ) ```