# Directory Structure ``` ├── .gitignore ├── .python-version ├── Dockerfile ├── LICENSE ├── pyproject.toml ├── README.md ├── smithery.yaml ├── src │ └── markitdown_mcp_server │ ├── __init__.py │ └── server.py └── uv.lock ``` # Files -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- ``` 1 | 3.13 2 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Python-generated files 2 | __pycache__/ 3 | *.py[oc] 4 | build/ 5 | dist/ 6 | wheels/ 7 | *.egg-info 8 | 9 | # Virtual environments 10 | .venv 11 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | # MarkItDown MCP Server 2 | 3 | [](https://smithery.ai/server/@KorigamiK/markitdown_mcp_server) 4 | 5 | A Model Context Protocol (MCP) server that converts various file formats to Markdown using the MarkItDown utility. 6 | 7 | <a href="https://glama.ai/mcp/servers/sbc6bljjg5"><img width="380" height="200" src="https://glama.ai/mcp/servers/sbc6bljjg5/badge" alt="MarkItDown Server MCP server" /></a> 8 | 9 | ## Supported Formats 10 | 11 | - PDF 12 | - PowerPoint 13 | - Word 14 | - Excel 15 | - Images (EXIF metadata and OCR) 16 | - Audio (EXIF metadata and speech transcription) 17 | - HTML 18 | - Text-based formats (CSV, JSON, XML) 19 | - ZIP files (iterates over contents) 20 | 21 | ## Installation 22 | 23 | ### Installing via Smithery 24 | 25 | To install MarkItDown MCP Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@KorigamiK/markitdown_mcp_server): 26 | 27 | ```bash 28 | npx -y @smithery/cli install @KorigamiK/markitdown_mcp_server --client claude 29 | ``` 30 | 31 | ### Manual Installation 32 | 33 | 1. Clone this repository 34 | 2. Install dependencies: 35 | ```bash 36 | uv install 37 | ``` 38 | 39 | ## Usage 40 | 41 | ### As MCP Server 42 | 43 | The server can be integrated with any MCP client. Here are some examples: 44 | 45 | #### Zed Editor 46 | 47 | Add the following to your `settings.json`: 48 | 49 | ```json 50 | "context_servers": { 51 | "markitdown_mcp": { 52 | "settings": {}, 53 | "command": { 54 | "path": "uv", 55 | "args": [ 56 | "--directory", 57 | "/path/to/markitdown_mcp_server", 58 | "run", 59 | "markitdown" 60 | ] 61 | } 62 | } 63 | } 64 | ``` 65 | 66 | ### Commands 67 | 68 | The server responds to the following MCP commands: 69 | 70 | - `/md <file>` - Convert the specified file to Markdown 71 | 72 | Example: 73 | ```bash 74 | /md document.pdf 75 | ``` 76 | 77 | ## Supported MCP Clients 78 | 79 | Works with any MCP-compliant client listed at [modelcontextprotocol.io/clients](https://modelcontextprotocol.io/clients), including: 80 | 81 | - Zed Editor 82 | - Any other MCP-compatible editors and tools 83 | 84 | ## License 85 | 86 | MIT License. See [LICENSE](LICENSE) for details. 87 | 88 | ## Acknowledgements 89 | 90 | https://github.com/microsoft/markitdown#readme 91 | ``` -------------------------------------------------------------------------------- /src/markitdown_mcp_server/__init__.py: -------------------------------------------------------------------------------- ```python 1 | from .server import run 2 | import os 3 | import asyncio 4 | 5 | 6 | def main() -> None: 7 | os.system("notify-send 'Parseer server started'") 8 | asyncio.run(run()) 9 | ``` -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- ```yaml 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml 2 | 3 | startCommand: 4 | type: stdio 5 | configSchema: 6 | # JSON Schema defining the configuration options for the MCP. 7 | type: object 8 | properties: {} 9 | commandFunction: 10 | # A function that produces the CLI command to start the MCP on stdio. 11 | |- 12 | () => ({command:'uv',args:['--directory', '/app', 'run', 'markitdown']}) 13 | ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [project] 2 | name = "markitdown-mcp-server" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | authors = [ 7 | { name = "korigamik-hypr", email = "[email protected]" } 8 | ] 9 | requires-python = ">=3.12" 10 | dependencies = [ 11 | "markitdown>=0.0.1a3", 12 | "mcp>=1.2.1", 13 | ] 14 | 15 | [project.scripts] 16 | markitdown = "markitdown_mcp_server:main" 17 | 18 | [build-system] 19 | requires = ["hatchling"] 20 | build-backend = "hatchling.build" 21 | ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile 2 | # Use a Python image with uv pre-installed 3 | FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS uv 4 | 5 | # Install the project into /app 6 | WORKDIR /app 7 | 8 | # Enable bytecode compilation 9 | ENV UV_COMPILE_BYTECODE=1 10 | 11 | # Copy from the cache instead of linking since it's a mounted volume 12 | ENV UV_LINK_MODE=copy 13 | 14 | 15 | # Install the project's dependencies using the lockfile and settings 16 | RUN --mount=type=cache,target=/root/.cache/uv \ 17 | --mount=type=bind,source=uv.lock,target=uv.lock \ 18 | --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ 19 | uv sync --frozen --no-install-project --no-dev --no-editable 20 | 21 | # Then, add the rest of the project source code and install it 22 | # Installing separately from its dependencies allows optimal layer caching 23 | ADD . /app 24 | RUN --mount=type=cache,target=/root/.cache/uv \ 25 | uv sync --frozen --no-dev --no-editable 26 | 27 | FROM python:3.12-slim-bookworm 28 | 29 | WORKDIR /app 30 | 31 | COPY --from=uv /root/.local /root/.local 32 | COPY --from=uv --chown=app:app /app/.venv /app/.venv 33 | 34 | # Place executables in the environment at the front of the path 35 | ENV PATH="/app/.venv/bin:$PATH" 36 | 37 | ENTRYPOINT ["markitdown_mcp_server"] 38 | ``` -------------------------------------------------------------------------------- /src/markitdown_mcp_server/server.py: -------------------------------------------------------------------------------- ```python 1 | from mcp.server import Server, stdio, models, NotificationOptions 2 | import mcp.types as types 3 | from markitdown import MarkItDown 4 | from typing import Tuple 5 | 6 | PROMPTS = { 7 | "md": types.Prompt( 8 | name="md", 9 | description="Convert document to markdown format using MarkItDown", 10 | arguments=[ 11 | types.PromptArgument( 12 | name="file_path", 13 | description="A URI to any document or file", 14 | required=True, 15 | ) 16 | ], 17 | ) 18 | } 19 | 20 | 21 | def convert_to_markdown(file_path: str) -> Tuple[str | None, str]: 22 | try: 23 | md = MarkItDown() 24 | result = md.convert(file_path) 25 | return result.title, result.text_content 26 | 27 | except Exception as e: 28 | return None, f"Error converting document: {str(e)}" 29 | 30 | 31 | # Initialize server 32 | app = Server("document-conversion-server") 33 | 34 | 35 | @app.list_prompts() 36 | async def list_prompts() -> list[types.Prompt]: 37 | return list(PROMPTS.values()) 38 | 39 | 40 | @app.get_prompt() 41 | async def get_prompt( 42 | name: str, arguments: dict[str, str] | None = None 43 | ) -> types.GetPromptResult: 44 | if name not in PROMPTS: 45 | raise ValueError(f"Prompt not found: {name}") 46 | 47 | if name == "md": 48 | if not arguments: 49 | raise ValueError("Arguments required") 50 | 51 | file_path = arguments.get("file_path") 52 | 53 | if not file_path: 54 | raise ValueError("file_path is required") 55 | 56 | try: 57 | markdown_title, markdown_content = convert_to_markdown(file_path) 58 | 59 | return types.GetPromptResult( 60 | messages=[ 61 | types.PromptMessage( 62 | role="user", 63 | content=types.TextContent( 64 | type="text", 65 | text=f"Here is the converted document in markdown format:\n{'' if not markdown_title else markdown_title}\n{markdown_content}", 66 | ), 67 | ) 68 | ] 69 | ) 70 | 71 | except Exception as e: 72 | raise ValueError(f"Error processing document: {str(e)}") 73 | 74 | raise ValueError("Prompt implementation not found") 75 | 76 | 77 | async def run(): 78 | async with stdio.stdio_server() as (read_stream, write_stream): 79 | await app.run( 80 | read_stream, 81 | write_stream, 82 | models.InitializationOptions( 83 | server_name="example", 84 | server_version="0.1.0", 85 | capabilities=app.get_capabilities( 86 | notification_options=NotificationOptions(), 87 | experimental_capabilities={}, 88 | ), 89 | ), 90 | ) 91 | ```