# Directory Structure
```
├── .github
│ ├── actions
│ │ └── uv_setup
│ │ └── action.yml
│ └── workflows
│ ├── _lint.yml
│ ├── _test.yml
│ ├── ci.yml
│ └── release.yml
├── .gitignore
├── LICENSE
├── Makefile
├── mcpdoc
│ ├── __init__.py
│ ├── _version.py
│ ├── cli.py
│ ├── langgraph.py
│ ├── main.py
│ └── splash.py
├── pyproject.toml
├── README.md
├── sample_config.json
├── sample_config.yaml
├── tests
│ └── unit_tests
│ ├── __init__.py
│ ├── test_imports.py
│ └── test_main.py
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | .vs/
2 | .vscode/
3 | .idea/
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | pip-wheel-metadata/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 | docs/docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 | notebooks/
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | .python-version
91 |
92 | # pipenv
93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
96 | # install all needed dependencies.
97 | #Pipfile.lock
98 |
99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100 | __pypackages__/
101 |
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 |
106 | # SageMath parsed files
107 | *.sage.py
108 |
109 | # Environments
110 | .env
111 | .envrc
112 | .venv
113 | .venvs
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 |
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 |
124 | # Rope project settings
125 | .ropeproject
126 |
127 | # mkdocs documentation
128 | /site
129 |
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 |
135 | # Pyre type checker
136 | .pyre/
137 |
138 | # macOS display setting files
139 | .DS_Store
140 |
141 | # Wandb directory
142 | wandb/
143 |
144 | # asdf tool versions
145 | .tool-versions
146 | /.ruff_cache/
147 |
148 | *.pkl
149 | *.bin
150 |
151 | # integration test artifacts
152 | data_map*
153 | \[('_type', 'fake'), ('stop', None)]
154 |
155 | # Replit files
156 | *replit*
157 |
158 | node_modules
159 | docs/.yarn/
160 | docs/node_modules/
161 | docs/.docusaurus/
162 | docs/.cache-loader/
163 | docs/_dist
164 | docs/api_reference/api_reference.rst
165 | docs/api_reference/experimental_api_reference.rst
166 | docs/api_reference/_build
167 | docs/api_reference/*/
168 | !docs/api_reference/_static/
169 | !docs/api_reference/templates/
170 | !docs/api_reference/themes/
171 | docs/docs_skeleton/build
172 | docs/docs_skeleton/node_modules
173 | docs/docs_skeleton/yarn.lock
174 |
175 | # Any new jupyter notebooks
176 | # not intended for the repo
177 | Untitled*.ipynb
178 |
179 | Chinook.db
180 |
181 | .vercel
182 | .turbo
183 |
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # MCP LLMS-TXT Documentation Server
2 |
3 | ## Overview
4 |
5 | [llms.txt](https://llmstxt.org/) is a website index for LLMs, providing background information, guidance, and links to detailed markdown files. IDEs like Cursor and Windsurf or apps like Claude Code/Desktop can use `llms.txt` to retrieve context for tasks. However, these apps use different built-in tools to read and process files like `llms.txt`. The retrieval process can be opaque, and there is not always a way to audit the tool calls or the context returned.
6 |
7 | [MCP](https://github.com/modelcontextprotocol) offers a way for developers to have *full control* over tools used by these applications. Here, we create [an open source MCP server](https://github.com/modelcontextprotocol) to provide MCP host applications (e.g., Cursor, Windsurf, Claude Code/Desktop) with (1) a user-defined list of `llms.txt` files and (2) a simple `fetch_docs` tool read URLs within any of the provided `llms.txt` files. This allows the user to audit each tool call as well as the context returned.
8 |
9 | <img src="https://github.com/user-attachments/assets/736f8f55-833d-4200-b833-5fca01a09e1b" width="60%">
10 |
11 | ## llms-txt
12 |
13 | You can find llms.txt files for langgraph and langchain here:
14 |
15 | | Library | llms.txt |
16 | |------------------|------------------------------------------------------------------------------------------------------------|
17 | | LangGraph Python | [https://langchain-ai.github.io/langgraph/llms.txt](https://langchain-ai.github.io/langgraph/llms.txt) |
18 | | LangGraph JS | [https://langchain-ai.github.io/langgraphjs/llms.txt](https://langchain-ai.github.io/langgraphjs/llms.txt) |
19 | | LangChain Python | [https://python.langchain.com/llms.txt](https://python.langchain.com/llms.txt) |
20 | | LangChain JS | [https://js.langchain.com/llms.txt](https://js.langchain.com/llms.txt) |
21 |
22 | ## Quickstart
23 |
24 | #### Install uv
25 | * Please see [official uv docs](https://docs.astral.sh/uv/getting-started/installation/#installation-methods) for other ways to install `uv`.
26 |
27 | ```bash
28 | curl -LsSf https://astral.sh/uv/install.sh | sh
29 | ```
30 |
31 | #### Choose an `llms.txt` file to use.
32 | * For example, [here's](https://langchain-ai.github.io/langgraph/llms.txt) the LangGraph `llms.txt` file.
33 |
34 | > **Note: Security and Domain Access Control**
35 | >
36 | > For security reasons, mcpdoc implements strict domain access controls:
37 | >
38 | > 1. **Remote llms.txt files**: When you specify a remote llms.txt URL (e.g., `https://langchain-ai.github.io/langgraph/llms.txt`), mcpdoc automatically adds only that specific domain (`langchain-ai.github.io`) to the allowed domains list. This means the tool can only fetch documentation from URLs on that domain.
39 | >
40 | > 2. **Local llms.txt files**: When using a local file, NO domains are automatically added to the allowed list. You MUST explicitly specify which domains to allow using the `--allowed-domains` parameter.
41 | >
42 | > 3. **Adding additional domains**: To allow fetching from domains beyond those automatically included:
43 | > - Use `--allowed-domains domain1.com domain2.com` to add specific domains
44 | > - Use `--allowed-domains '*'` to allow all domains (use with caution)
45 | >
46 | > This security measure prevents unauthorized access to domains not explicitly approved by the user, ensuring that documentation can only be retrieved from trusted sources.
47 |
48 | #### (Optional) Test the MCP server locally with your `llms.txt` file(s) of choice:
49 | ```bash
50 | uvx --from mcpdoc mcpdoc \
51 | --urls "LangGraph:https://langchain-ai.github.io/langgraph/llms.txt" "LangChain:https://python.langchain.com/llms.txt" \
52 | --transport sse \
53 | --port 8082 \
54 | --host localhost
55 | ```
56 |
57 | * This should run at: http://localhost:8082
58 |
59 | 
60 |
61 | * Run [MCP inspector](https://modelcontextprotocol.io/docs/tools/inspector) and connect to the running server:
62 | ```bash
63 | npx @modelcontextprotocol/inspector
64 | ```
65 |
66 | 
67 |
68 | * Here, you can test the `tool` calls.
69 |
70 | #### Connect to Cursor
71 |
72 | * Open `Cursor Settings` and `MCP` tab.
73 | * This will open the `~/.cursor/mcp.json` file.
74 |
75 | 
76 |
77 | * Paste the following into the file (we use the `langgraph-docs-mcp` name and link to the LangGraph `llms.txt`).
78 |
79 | ```
80 | {
81 | "mcpServers": {
82 | "langgraph-docs-mcp": {
83 | "command": "uvx",
84 | "args": [
85 | "--from",
86 | "mcpdoc",
87 | "mcpdoc",
88 | "--urls",
89 | "LangGraph:https://langchain-ai.github.io/langgraph/llms.txt LangChain:https://python.langchain.com/llms.txt",
90 | "--transport",
91 | "stdio"
92 | ]
93 | }
94 | }
95 | }
96 | ```
97 |
98 | * Confirm that the server is running in your `Cursor Settings/MCP` tab.
99 | * Best practice is to then update Cursor Global (User) rules.
100 | * Open Cursor `Settings/Rules` and update `User Rules` with the following (or similar):
101 |
102 | ```
103 | for ANY question about LangGraph, use the langgraph-docs-mcp server to help answer --
104 | + call list_doc_sources tool to get the available llms.txt file
105 | + call fetch_docs tool to read it
106 | + reflect on the urls in llms.txt
107 | + reflect on the input question
108 | + call fetch_docs on any urls relevant to the question
109 | + use this to answer the question
110 | ```
111 |
112 | * `CMD+L` (on Mac) to open chat.
113 | * Ensure `agent` is selected.
114 |
115 | 
116 |
117 | Then, try an example prompt, such as:
118 | ```
119 | what are types of memory in LangGraph?
120 | ```
121 |
122 | 
123 |
124 | ### Connect to Windsurf
125 |
126 | * Open Cascade with `CMD+L` (on Mac).
127 | * Click `Configure MCP` to open the config file, `~/.codeium/windsurf/mcp_config.json`.
128 | * Update with `langgraph-docs-mcp` as noted above.
129 |
130 | 
131 |
132 | * Update `Windsurf Rules/Global rules` with the following (or similar):
133 |
134 | ```
135 | for ANY question about LangGraph, use the langgraph-docs-mcp server to help answer --
136 | + call list_doc_sources tool to get the available llms.txt file
137 | + call fetch_docs tool to read it
138 | + reflect on the urls in llms.txt
139 | + reflect on the input question
140 | + call fetch_docs on any urls relevant to the question
141 | ```
142 |
143 | 
144 |
145 | Then, try the example prompt:
146 | * It will perform your tool calls.
147 |
148 | 
149 |
150 | ### Connect to Claude Desktop
151 |
152 | * Open `Settings/Developer` to update `~/Library/Application\ Support/Claude/claude_desktop_config.json`.
153 | * Update with `langgraph-docs-mcp` as noted above.
154 | * Restart Claude Desktop app.
155 |
156 | > [!Note]
157 | > If you run into issues with Python version incompatibility when trying to add MCPDoc tools to Claude Desktop, you can explicitly specify the filepath to `python` executable in the `uvx` command.
158 | >
159 | > <details>
160 | > <summary>Example configuration</summary>
161 | >
162 | > ```
163 | > {
164 | > "mcpServers": {
165 | > "langgraph-docs-mcp": {
166 | > "command": "uvx",
167 | > "args": [
168 | > "--python",
169 | > "/path/to/python",
170 | > "--from",
171 | > "mcpdoc",
172 | > "mcpdoc",
173 | > "--urls",
174 | > "LangGraph:https://langchain-ai.github.io/langgraph/llms.txt",
175 | > "--transport",
176 | > "stdio"
177 | > ]
178 | > }
179 | > }
180 | > }
181 | > ```
182 | > </details>
183 |
184 | > [!Note]
185 | > Currently (3/21/25) it appears that Claude Desktop does not support `rules` for global rules, so appending the following to your prompt.
186 |
187 | ```
188 | <rules>
189 | for ANY question about LangGraph, use the langgraph-docs-mcp server to help answer --
190 | + call list_doc_sources tool to get the available llms.txt file
191 | + call fetch_docs tool to read it
192 | + reflect on the urls in llms.txt
193 | + reflect on the input question
194 | + call fetch_docs on any urls relevant to the question
195 | </rules>
196 | ```
197 |
198 | 
199 |
200 | * You will see your tools visible in the bottom right of your chat input.
201 |
202 | 
203 |
204 | Then, try the example prompt:
205 |
206 | * It will ask to approve tool calls as it processes your request.
207 |
208 | 
209 |
210 | ### Connect to Claude Code
211 |
212 | * In a terminal after installing [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview), run this command to add the MCP server to your project:
213 | ```
214 | claude mcp add-json langgraph-docs '{"type":"stdio","command":"uvx" ,"args":["--from", "mcpdoc", "mcpdoc", "--urls", "langgraph:https://langchain-ai.github.io/langgraph/llms.txt", "LangChain:https://python.langchain.com/llms.txt"]}' -s local
215 | ```
216 | * You will see `~/.claude.json` updated.
217 | * Test by launching Claude Code and running to view your tools:
218 | ```
219 | $ Claude
220 | $ /mcp
221 | ```
222 |
223 | 
224 |
225 | > [!Note]
226 | > Currently (3/21/25) it appears that Claude Code does not support `rules` for global rules, so appending the following to your prompt.
227 |
228 | ```
229 | <rules>
230 | for ANY question about LangGraph, use the langgraph-docs-mcp server to help answer --
231 | + call list_doc_sources tool to get the available llms.txt file
232 | + call fetch_docs tool to read it
233 | + reflect on the urls in llms.txt
234 | + reflect on the input question
235 | + call fetch_docs on any urls relevant to the question
236 | </rules>
237 | ```
238 |
239 | Then, try the example prompt:
240 |
241 | * It will ask to approve tool calls.
242 |
243 | 
244 |
245 | ## Command-line Interface
246 |
247 | The `mcpdoc` command provides a simple CLI for launching the documentation server.
248 |
249 | You can specify documentation sources in three ways, and these can be combined:
250 |
251 | 1. Using a YAML config file:
252 |
253 | * This will load the LangGraph Python documentation from the `sample_config.yaml` file in this repo.
254 |
255 | ```bash
256 | mcpdoc --yaml sample_config.yaml
257 | ```
258 |
259 | 2. Using a JSON config file:
260 |
261 | * This will load the LangGraph Python documentation from the `sample_config.json` file in this repo.
262 |
263 | ```bash
264 | mcpdoc --json sample_config.json
265 | ```
266 |
267 | 3. Directly specifying llms.txt URLs with optional names:
268 |
269 | * URLs can be specified either as plain URLs or with optional names using the format `name:url`.
270 | * You can specify multiple URLs by using the `--urls` parameter multiple times.
271 | * This is how we loaded `llms.txt` for the MCP server above.
272 |
273 | ```bash
274 | mcpdoc --urls LangGraph:https://langchain-ai.github.io/langgraph/llms.txt --urls LangChain:https://python.langchain.com/llms.txt
275 | ```
276 |
277 | You can also combine these methods to merge documentation sources:
278 |
279 | ```bash
280 | mcpdoc --yaml sample_config.yaml --json sample_config.json --urls LangGraph:https://langchain-ai.github.io/langgraph/llms.txt --urls LangChain:https://python.langchain.com/llms.txt
281 | ```
282 |
283 | ## Additional Options
284 |
285 | - `--follow-redirects`: Follow HTTP redirects (defaults to False)
286 | - `--timeout SECONDS`: HTTP request timeout in seconds (defaults to 10.0)
287 |
288 | Example with additional options:
289 |
290 | ```bash
291 | mcpdoc --yaml sample_config.yaml --follow-redirects --timeout 15
292 | ```
293 |
294 | This will load the LangGraph Python documentation with a 15-second timeout and follow any HTTP redirects if necessary.
295 |
296 | ## Configuration Format
297 |
298 | Both YAML and JSON configuration files should contain a list of documentation sources.
299 |
300 | Each source must include an `llms_txt` URL and can optionally include a `name`:
301 |
302 | ### YAML Configuration Example (sample_config.yaml)
303 |
304 | ```yaml
305 | # Sample configuration for mcp-mcpdoc server
306 | # Each entry must have a llms_txt URL and optionally a name
307 | - name: LangGraph Python
308 | llms_txt: https://langchain-ai.github.io/langgraph/llms.txt
309 | ```
310 |
311 | ### JSON Configuration Example (sample_config.json)
312 |
313 | ```json
314 | [
315 | {
316 | "name": "LangGraph Python",
317 | "llms_txt": "https://langchain-ai.github.io/langgraph/llms.txt"
318 | }
319 | ]
320 | ```
321 |
322 | ## Programmatic Usage
323 |
324 | ```python
325 | from mcpdoc.main import create_server
326 |
327 | # Create a server with documentation sources
328 | server = create_server(
329 | [
330 | {
331 | "name": "LangGraph Python",
332 | "llms_txt": "https://langchain-ai.github.io/langgraph/llms.txt",
333 | },
334 | # You can add multiple documentation sources
335 | # {
336 | # "name": "Another Documentation",
337 | # "llms_txt": "https://example.com/llms.txt",
338 | # },
339 | ],
340 | follow_redirects=True,
341 | timeout=15.0,
342 | )
343 |
344 | # Run the server
345 | server.run(transport="stdio")
346 | ```
347 |
```
--------------------------------------------------------------------------------
/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
```python
1 |
```
--------------------------------------------------------------------------------
/mcpdoc/__init__.py:
--------------------------------------------------------------------------------
```python
1 | from mcpdoc._version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
```
--------------------------------------------------------------------------------
/sample_config.json:
--------------------------------------------------------------------------------
```json
1 | [
2 | {
3 | "name": "LangGraph Python",
4 | "llms_txt": "https://langchain-ai.github.io/langgraph/llms.txt"
5 | }
6 | ]
7 |
```
--------------------------------------------------------------------------------
/sample_config.yaml:
--------------------------------------------------------------------------------
```yaml
1 | # Sample configuration for mcp-llms-txt server
2 | # Each entry must have a llms_txt URL and optionally a name
3 | - name: LangGraph Python
4 | llms_txt: https://langchain-ai.github.io/langgraph/llms.txt
5 |
```
--------------------------------------------------------------------------------
/mcpdoc/_version.py:
--------------------------------------------------------------------------------
```python
1 | from importlib import metadata
2 |
3 | try:
4 | __version__ = metadata.version(__package__)
5 | except metadata.PackageNotFoundError:
6 | # Case where package metadata is not available.
7 | __version__ = ""
8 |
```
--------------------------------------------------------------------------------
/tests/unit_tests/test_imports.py:
--------------------------------------------------------------------------------
```python
1 | def test_imports():
2 | """Test that main modules can be imported."""
3 | from mcpdoc import main # noqa
4 | from mcpdoc import cli # noqa
5 | from mcpdoc import langgraph # noqa
6 |
7 | assert True
8 |
```
--------------------------------------------------------------------------------
/.github/actions/uv_setup/action.yml:
--------------------------------------------------------------------------------
```yaml
1 | # TODO: https://docs.astral.sh/uv/guides/integration/github/#caching
2 |
3 | name: uv-install
4 | description: Set up Python and uv
5 |
6 | inputs:
7 | python-version:
8 | description: Python version, supporting MAJOR.MINOR only
9 | required: true
10 |
11 | env:
12 | UV_VERSION: "0.5.25"
13 |
14 | runs:
15 | using: composite
16 | steps:
17 | - name: Install uv and set the python version
18 | uses: astral-sh/setup-uv@v5
19 | with:
20 | version: ${{ env.UV_VERSION }}
21 | python-version: ${{ inputs.python-version }}
22 |
```
--------------------------------------------------------------------------------
/.github/workflows/_test.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: test
2 |
3 | on:
4 | workflow_call:
5 | inputs:
6 | working-directory:
7 | required: true
8 | type: string
9 | description: "From which folder this pipeline executes"
10 | python-version:
11 | required: true
12 | type: string
13 | description: "Python version to use"
14 |
15 | env:
16 | UV_FROZEN: "true"
17 | UV_NO_SYNC: "true"
18 |
19 | jobs:
20 | build:
21 | defaults:
22 | run:
23 | working-directory: ${{ inputs.working-directory }}
24 | runs-on: ubuntu-latest
25 | timeout-minutes: 20
26 | name: "make test #${{ inputs.python-version }}"
27 | steps:
28 | - uses: actions/checkout@v4
29 |
30 | - name: Set up Python ${{ inputs.python-version }} + uv
31 | uses: "./.github/actions/uv_setup"
32 | id: setup-python
33 | with:
34 | python-version: ${{ inputs.python-version }}
35 | - name: Install dependencies
36 | shell: bash
37 | run: uv sync --group test
38 |
39 | - name: Run core tests
40 | shell: bash
41 | run: |
42 | make test
43 |
```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
```toml
1 | [project]
2 | name = "mcpdoc"
3 | version = "0.0.10"
4 | description = "Server llms-txt documentation over MCP"
5 | readme = "README.md"
6 | license = "MIT"
7 | requires-python = ">=3.10"
8 | dependencies = [
9 | "httpx>=0.28.1",
10 | "markdownify>=1.1.0",
11 | "mcp[cli]>=1.4.1",
12 | "pyyaml>=6.0.1",
13 | ]
14 |
15 | [project.scripts]
16 | mcpdoc = "mcpdoc.cli:main"
17 |
18 | [dependency-groups]
19 | test = [
20 | "pytest>=8.3.4",
21 | "pytest-asyncio>=0.25.3",
22 | "pytest-cov>=6.0.0",
23 | "pytest-mock>=3.14.0",
24 | "pytest-socket>=0.7.0",
25 | "pytest-timeout>=2.3.1",
26 | "ruff>=0.9.7",
27 | ]
28 |
29 |
30 |
31 | [build-system]
32 | requires = ["hatchling"]
33 | build-backend = "hatchling.build"
34 |
35 | [tool.pytest.ini_options]
36 | minversion = "8.0"
37 | # -ra: Report all extra test outcomes (passed, skipped, failed, etc.)
38 | # -q: Enable quiet mode for less cluttered output
39 | # -v: Enable verbose output to display detailed test names and statuses
40 | # --durations=5: Show the 10 slowest tests after the run (useful for performance tuning)
41 | addopts = "-ra -q -v --durations=5"
42 | testpaths = [
43 | "tests",
44 | ]
45 | python_files = ["test_*.py"]
46 | python_functions = ["test_*"]
47 | asyncio_mode = "auto"
48 | asyncio_default_fixture_loop_scope = "function"
49 |
50 |
```
--------------------------------------------------------------------------------
/.github/workflows/_lint.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: lint
2 |
3 | on:
4 | workflow_call:
5 | inputs:
6 | working-directory:
7 | required: true
8 | type: string
9 | description: "From which folder this pipeline executes"
10 | python-version:
11 | required: true
12 | type: string
13 | description: "Python version to use"
14 |
15 | env:
16 | WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
17 |
18 | # This env var allows us to get inline annotations when ruff has complaints.
19 | RUFF_OUTPUT_FORMAT: github
20 |
21 | UV_FROZEN: "true"
22 |
23 | jobs:
24 | build:
25 | name: "make lint #${{ inputs.python-version }}"
26 | runs-on: ubuntu-latest
27 | timeout-minutes: 20
28 | steps:
29 | - uses: actions/checkout@v4
30 |
31 | - name: Set up Python ${{ inputs.python-version }} + uv
32 | uses: "./.github/actions/uv_setup"
33 | with:
34 | python-version: ${{ inputs.python-version }}
35 |
36 | - name: Install dependencies
37 | working-directory: ${{ inputs.working-directory }}
38 | run: |
39 | uv sync --group test
40 |
41 | - name: Analysing the code with our lint
42 | working-directory: ${{ inputs.working-directory }}
43 | run: |
44 | make lint
45 |
```
--------------------------------------------------------------------------------
/mcpdoc/langgraph.py:
--------------------------------------------------------------------------------
```python
1 | """A server for just langgraph docs from langchain-ai.github.io.
2 |
3 | This is used as a way to test the doc functionality via MCP.
4 | """
5 |
6 | # /usr/bin/env python3
7 | import httpx
8 | from markdownify import markdownify
9 | from mcp.server.fastmcp import FastMCP
10 |
11 | server = FastMCP(name="llms-txt")
12 |
13 | ALLOWED_PREFIX = "https://langchain-ai.github.io/"
14 |
15 | HTTPX_CLIENT = httpx.AsyncClient(follow_redirects=False)
16 |
17 |
18 | @server.tool()
19 | async def get_docs(url: str = "overview") -> str:
20 | """Get langgraph docs.
21 |
22 | Always fetch the `overview` prior to fetching any other URLs as it will provide a
23 | list of available URLs.
24 |
25 | Args:
26 | url: The URL to fetch. Must start with https://langchain-ai.github.io/
27 | or be "overview".
28 | """
29 | if url == "overview":
30 | url = "https://langchain-ai.github.io/langgraph/llms.txt"
31 |
32 | if not url.startswith(ALLOWED_PREFIX):
33 | return (
34 | "Error: Invalid url. Must start with https://langchain-ai.github.io/ "
35 | 'or be "overview"'
36 | )
37 |
38 | response = await HTTPX_CLIENT.get(url)
39 | response.raise_for_status()
40 | if response.status_code == 200:
41 | # Convert HTML to markdown
42 | markdown_content = markdownify(response.text)
43 | return markdown_content
44 | else:
45 | return "Encountered an error while fetching the URL."
46 |
47 |
48 | if __name__ == "__main__":
49 | server.run(transport="stdio")
50 |
```
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
```yaml
1 | ---
2 | name: Run CI Tests
3 |
4 | on:
5 | push:
6 | branches: [ main ]
7 | pull_request:
8 | workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
9 |
10 | # If another push to the same PR or branch happens while this workflow is still running,
11 | # cancel the earlier run in favor of the next run.
12 | #
13 | # There's no point in testing an outdated version of the code. GitHub only allows
14 | # a limited number of job runners to be active at the same time, so it's better to cancel
15 | # pointless jobs early so that more useful jobs can run sooner.
16 | concurrency:
17 | group: ${{ github.workflow }}-${{ github.ref }}
18 | cancel-in-progress: true
19 |
20 | jobs:
21 | lint:
22 | strategy:
23 | matrix:
24 | # Only lint on the min and max supported Python versions.
25 | # It's extremely unlikely that there's a lint issue on any version in between
26 | # that doesn't show up on the min or max versions.
27 | #
28 | # GitHub rate-limits how many jobs can be running at any one time.
29 | # Starting new jobs is also relatively slow,
30 | # so linting on fewer versions makes CI faster.
31 | python-version:
32 | - "3.12"
33 | uses:
34 | ./.github/workflows/_lint.yml
35 | with:
36 | working-directory: .
37 | python-version: ${{ matrix.python-version }}
38 | secrets: inherit
39 | test:
40 | strategy:
41 | matrix:
42 | # Only lint on the min and max supported Python versions.
43 | # It's extremely unlikely that there's a lint issue on any version in between
44 | # that doesn't show up on the min or max versions.
45 | #
46 | # GitHub rate-limits how many jobs can be running at any one time.
47 | # Starting new jobs is also relatively slow,
48 | # so linting on fewer versions makes CI faster.
49 | python-version:
50 | - "3.10"
51 | - "3.12"
52 | uses:
53 | ./.github/workflows/_test.yml
54 | with:
55 | working-directory: .
56 | python-version: ${{ matrix.python-version }}
57 | secrets: inherit
58 |
59 |
```
--------------------------------------------------------------------------------
/tests/unit_tests/test_main.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for mcpdoc.main module."""
2 |
3 | import pytest
4 |
5 | from mcpdoc.main import (
6 | _get_fetch_description,
7 | _is_http_or_https,
8 | extract_domain,
9 | )
10 |
11 |
12 | def test_extract_domain() -> None:
13 | """Test extract_domain function."""
14 | # Test with https URL
15 | assert extract_domain("https://example.com/page") == "https://example.com/"
16 |
17 | # Test with http URL
18 | assert extract_domain("http://test.org/docs/index.html") == "http://test.org/"
19 |
20 | # Test with URL that has port
21 | assert extract_domain("https://localhost:8080/api") == "https://localhost:8080/"
22 |
23 | # Check trailing slash
24 | assert extract_domain("https://localhost:8080") == "https://localhost:8080/"
25 |
26 | # Test with URL that has subdomain
27 | assert extract_domain("https://docs.python.org/3/") == "https://docs.python.org/"
28 |
29 |
30 | @pytest.mark.parametrize(
31 | "url,expected",
32 | [
33 | ("http://example.com", True),
34 | ("https://example.com", True),
35 | ("/path/to/file.txt", False),
36 | ("file:///path/to/file.txt", False),
37 | (
38 | "ftp://example.com",
39 | False,
40 | ), # Not HTTP or HTTPS, even though it's not a local file
41 | ],
42 | )
43 | def test_is_http_or_https(url, expected):
44 | """Test _is_http_or_https function."""
45 | assert _is_http_or_https(url) is expected
46 |
47 |
48 | @pytest.mark.parametrize(
49 | "has_local_sources,expected_substrings",
50 | [
51 | (True, ["local file path", "file://"]),
52 | (False, ["URL to fetch"]),
53 | ],
54 | )
55 | def test_get_fetch_description(has_local_sources, expected_substrings):
56 | """Test _get_fetch_description function."""
57 | description = _get_fetch_description(has_local_sources)
58 |
59 | # Common assertions for both cases
60 | assert "Fetch and parse documentation" in description
61 | assert "Returns:" in description
62 |
63 | # Specific assertions based on has_local_sources
64 | for substring in expected_substrings:
65 | if has_local_sources:
66 | assert substring in description
67 | else:
68 | # For the False case, we only check that "local file path"
69 | # and "file://" are NOT present
70 | if substring in ["local file path", "file://"]:
71 | assert substring not in description
72 |
```
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: release
2 | run-name: Release ${{ inputs.working-directory }} by @${{ github.actor }}
3 | on:
4 | workflow_call:
5 | inputs:
6 | working-directory:
7 | required: true
8 | type: string
9 | description: "From which folder this pipeline executes"
10 | workflow_dispatch:
11 | inputs:
12 | working-directory:
13 | description: "From which folder this pipeline executes"
14 | default: "."
15 | dangerous-nonmain-release:
16 | required: false
17 | type: boolean
18 | default: false
19 | description: "Release from a non-main branch (danger!)"
20 |
21 | env:
22 | PYTHON_VERSION: "3.11"
23 | UV_FROZEN: "true"
24 | UV_NO_SYNC: "true"
25 |
26 | jobs:
27 | build:
28 | if: github.ref == 'refs/heads/main' || inputs.dangerous-nonmain-release
29 | environment: Scheduled testing
30 | runs-on: ubuntu-latest
31 |
32 | outputs:
33 | pkg-name: ${{ steps.check-version.outputs.pkg-name }}
34 | version: ${{ steps.check-version.outputs.version }}
35 |
36 | steps:
37 | - uses: actions/checkout@v4
38 |
39 | - name: Set up Python + uv
40 | uses: "./.github/actions/uv_setup"
41 | with:
42 | python-version: ${{ env.PYTHON_VERSION }}
43 |
44 | # We want to keep this build stage *separate* from the release stage,
45 | # so that there's no sharing of permissions between them.
46 | # The release stage has trusted publishing and GitHub repo contents write access,
47 | # and we want to keep the scope of that access limited just to the release job.
48 | # Otherwise, a malicious `build` step (e.g. via a compromised dependency)
49 | # could get access to our GitHub or PyPI credentials.
50 | #
51 | # Per the trusted publishing GitHub Action:
52 | # > It is strongly advised to separate jobs for building [...]
53 | # > from the publish job.
54 | # https://github.com/pypa/gh-action-pypi-publish#non-goals
55 | - name: Build project for distribution
56 | run: uv build
57 | - name: Upload build
58 | uses: actions/upload-artifact@v4
59 | with:
60 | name: dist
61 | path: ${{ inputs.working-directory }}/dist/
62 |
63 | - name: Check Version
64 | id: check-version
65 | shell: python
66 | working-directory: ${{ inputs.working-directory }}
67 | run: |
68 | import os
69 | import tomllib
70 | with open("pyproject.toml", "rb") as f:
71 | data = tomllib.load(f)
72 | pkg_name = data["project"]["name"]
73 | version = data["project"]["version"]
74 | with open(os.environ["GITHUB_OUTPUT"], "a") as f:
75 | f.write(f"pkg-name={pkg_name}\n")
76 | f.write(f"version={version}\n")
77 |
78 | publish:
79 | needs:
80 | - build
81 | runs-on: ubuntu-latest
82 | permissions:
83 | # This permission is used for trusted publishing:
84 | # https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
85 | #
86 | # Trusted publishing has to also be configured on PyPI for each package:
87 | # https://docs.pypi.org/trusted-publishers/adding-a-publisher/
88 | id-token: write
89 |
90 | defaults:
91 | run:
92 | working-directory: ${{ inputs.working-directory }}
93 |
94 | steps:
95 | - uses: actions/checkout@v4
96 |
97 | - name: Set up Python + uv
98 | uses: "./.github/actions/uv_setup"
99 | with:
100 | python-version: ${{ env.PYTHON_VERSION }}
101 |
102 | - uses: actions/download-artifact@v4
103 | with:
104 | name: dist
105 | path: ${{ inputs.working-directory }}/dist/
106 |
107 | - name: Publish package distributions to PyPI
108 | uses: pypa/gh-action-pypi-publish@release/v1
109 | with:
110 | packages-dir: ${{ inputs.working-directory }}/dist/
111 | verbose: true
112 | print-hash: true
113 | # Temp workaround since attestations are on by default as of gh-action-pypi-publish v1.11.0
114 | attestations: false
115 |
116 | mark-release:
117 | needs:
118 | - build
119 | - publish
120 | runs-on: ubuntu-latest
121 | permissions:
122 | # This permission is needed by `ncipollo/release-action` to
123 | # create the GitHub release.
124 | contents: write
125 |
126 | defaults:
127 | run:
128 | working-directory: ${{ inputs.working-directory }}
129 |
130 | steps:
131 | - uses: actions/checkout@v4
132 |
133 | - name: Set up Python + uv
134 | uses: "./.github/actions/uv_setup"
135 | with:
136 | python-version: ${{ env.PYTHON_VERSION }}
137 |
138 | - uses: actions/download-artifact@v4
139 | with:
140 | name: dist
141 | path: ${{ inputs.working-directory }}/dist/
142 |
143 | - name: Create Tag
144 | uses: ncipollo/release-action@v1
145 | with:
146 | artifacts: "dist/*"
147 | token: ${{ secrets.GITHUB_TOKEN }}
148 | generateReleaseNotes: true
149 | tag: ${{needs.build.outputs.pkg-name}}==${{ needs.build.outputs.version }}
150 | body: ${{ needs.release-notes.outputs.release-body }}
151 | commit: main
152 | makeLatest: true
```
--------------------------------------------------------------------------------
/mcpdoc/cli.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """Command-line interface for mcp-llms-txt server."""
3 |
4 | import argparse
5 | import json
6 | import sys
7 | from typing import List, Dict
8 |
9 | import yaml
10 |
11 | from mcpdoc._version import __version__
12 | from mcpdoc.main import create_server, DocSource
13 | from mcpdoc.splash import SPLASH
14 |
15 |
16 | class CustomFormatter(
17 | argparse.RawDescriptionHelpFormatter, argparse.ArgumentDefaultsHelpFormatter
18 | ):
19 | # Custom formatter to preserve epilog formatting while showing default values
20 | pass
21 |
22 |
23 | EPILOG = """
24 | Examples:
25 | # Directly specifying llms.txt URLs with optional names
26 | mcpdoc --urls LangGraph:https://langchain-ai.github.io/langgraph/llms.txt
27 |
28 | # Using a local file (absolute or relative path)
29 | mcpdoc --urls LocalDocs:/path/to/llms.txt --allowed-domains '*'
30 |
31 | # Using a YAML config file
32 | mcpdoc --yaml sample_config.yaml
33 |
34 | # Using a JSON config file
35 | mcpdoc --json sample_config.json
36 |
37 | # Combining multiple documentation sources
38 | mcpdoc --yaml sample_config.yaml --json sample_config.json --urls LangGraph:https://langchain-ai.github.io/langgraph/llms.txt
39 |
40 | # Using SSE transport with default host (127.0.0.1) and port (8000)
41 | mcpdoc --yaml sample_config.yaml --transport sse
42 |
43 | # Using SSE transport with custom host and port
44 | mcpdoc --yaml sample_config.yaml --transport sse --host 0.0.0.0 --port 9000
45 |
46 | # Using SSE transport with additional HTTP options
47 | mcpdoc --yaml sample_config.yaml --follow-redirects --timeout 15 --transport sse --host localhost --port 8080
48 |
49 | # Allow fetching from additional domains. The domains hosting the llms.txt files are always allowed.
50 | mcpdoc --yaml sample_config.yaml --allowed-domains https://example.com/ https://another-example.com/
51 |
52 | # Allow fetching from any domain
53 | mcpdoc --yaml sample_config.yaml --allowed-domains '*'
54 | """
55 |
56 |
57 | def parse_args() -> argparse.Namespace:
58 | """Parse command-line arguments."""
59 | # Custom formatter to preserve epilog formatting
60 | parser = argparse.ArgumentParser(
61 | description="MCP LLMS-TXT Documentation Server",
62 | formatter_class=CustomFormatter,
63 | epilog=EPILOG,
64 | )
65 |
66 | # Allow combining multiple doc source methods
67 | parser.add_argument(
68 | "--yaml", "-y", type=str, help="Path to YAML config file with doc sources"
69 | )
70 | parser.add_argument(
71 | "--json", "-j", type=str, help="Path to JSON config file with doc sources"
72 | )
73 | parser.add_argument(
74 | "--urls",
75 | "-u",
76 | type=str,
77 | nargs="+",
78 | help="List of llms.txt URLs or file paths with optional names (format: 'url_or_path' or 'name:url_or_path')",
79 | )
80 |
81 | parser.add_argument(
82 | "--follow-redirects",
83 | action="store_true",
84 | help="Whether to follow HTTP redirects",
85 | )
86 | parser.add_argument(
87 | "--allowed-domains",
88 | type=str,
89 | nargs="*",
90 | help="Additional allowed domains to fetch documentation from. Use '*' to allow all domains.",
91 | )
92 | parser.add_argument(
93 | "--timeout", type=float, default=10.0, help="HTTP request timeout in seconds"
94 | )
95 | parser.add_argument(
96 | "--transport",
97 | type=str,
98 | default="stdio",
99 | choices=["stdio", "sse"],
100 | help="Transport protocol for MCP server",
101 | )
102 |
103 | parser.add_argument(
104 | "--log-level",
105 | type=str,
106 | default="INFO",
107 | help=(
108 | "Log level for the server. Use one on the following: DEBUG, INFO, "
109 | "WARNING, ERROR."
110 | " (only used with --transport sse)"
111 | ),
112 | )
113 |
114 | # SSE-specific options
115 | parser.add_argument(
116 | "--host",
117 | type=str,
118 | default="127.0.0.1",
119 | help="Host to bind the server to (only used with --transport sse)",
120 | )
121 | parser.add_argument(
122 | "--port",
123 | type=int,
124 | default=8000,
125 | help="Port to bind the server to (only used with --transport sse)",
126 | )
127 |
128 | # Version information
129 | parser.add_argument(
130 | "--version",
131 | "-V",
132 | action="version",
133 | version=f"mcpdoc {__version__}",
134 | help="Show version information and exit",
135 | )
136 |
137 | return parser.parse_args()
138 |
139 |
140 | def load_config_file(file_path: str, file_format: str) -> List[Dict[str, str]]:
141 | """Load configuration from a file.
142 |
143 | Args:
144 | file_path: Path to the config file
145 | file_format: Format of the config file ("yaml" or "json")
146 |
147 | Returns:
148 | List of doc source configurations
149 | """
150 | try:
151 | with open(file_path, "r", encoding="utf-8") as file:
152 | if file_format.lower() == "yaml":
153 | config = yaml.safe_load(file)
154 | elif file_format.lower() == "json":
155 | config = json.load(file)
156 | else:
157 | raise ValueError(f"Unsupported file format: {file_format}")
158 |
159 | if not isinstance(config, list):
160 | raise ValueError("Config file must contain a list of doc sources")
161 |
162 | return config
163 | except (FileNotFoundError, yaml.YAMLError, json.JSONDecodeError) as e:
164 | print(f"Error loading config file: {e}", file=sys.stderr)
165 | sys.exit(1)
166 |
167 |
168 | def create_doc_sources_from_urls(urls: List[str]) -> List[DocSource]:
169 | """Create doc sources from a list of URLs or file paths with optional names.
170 |
171 | Args:
172 | urls: List of llms.txt URLs or file paths with optional names
173 | (format: 'url_or_path' or 'name:url_or_path')
174 |
175 | Returns:
176 | List of DocSource objects
177 | """
178 | doc_sources = []
179 | for entry in urls:
180 | if not entry.strip():
181 | continue
182 | if ":" in entry and not entry.startswith(("http:", "https:")):
183 | # Format is name:url
184 | name, url = entry.split(":", 1)
185 | doc_sources.append({"name": name, "llms_txt": url})
186 | else:
187 | # Format is just url
188 | doc_sources.append({"llms_txt": entry})
189 | return doc_sources
190 |
191 |
192 | def main() -> None:
193 | """Main entry point for the CLI."""
194 | # Check if any arguments were provided
195 | if len(sys.argv) == 1:
196 | # No arguments, print help
197 | # Use the same custom formatter as parse_args()
198 | help_parser = argparse.ArgumentParser(
199 | description="MCP LLMS-TXT Documentation Server",
200 | formatter_class=CustomFormatter,
201 | epilog=EPILOG,
202 | )
203 | # Add version to help parser too
204 | help_parser.add_argument(
205 | "--version",
206 | "-V",
207 | action="version",
208 | version=f"mcpdoc {__version__}",
209 | help="Show version information and exit",
210 | )
211 | help_parser.print_help()
212 | sys.exit(0)
213 |
214 | args = parse_args()
215 |
216 | # Load doc sources based on command-line arguments
217 | doc_sources: List[DocSource] = []
218 |
219 | # Check if any source options were provided
220 | if not (args.yaml or args.json or args.urls):
221 | print(
222 | "Error: At least one source option (--yaml, --json, or --urls) is required",
223 | file=sys.stderr,
224 | )
225 | sys.exit(1)
226 |
227 | # Merge doc sources from all provided methods
228 | if args.yaml:
229 | doc_sources.extend(load_config_file(args.yaml, "yaml"))
230 | if args.json:
231 | doc_sources.extend(load_config_file(args.json, "json"))
232 | if args.urls:
233 | doc_sources.extend(create_doc_sources_from_urls(args.urls))
234 |
235 | # Only used with SSE transport
236 | settings = {
237 | "host": args.host,
238 | "port": args.port,
239 | "log_level": "INFO",
240 | }
241 |
242 | # Create and run the server
243 | server = create_server(
244 | doc_sources,
245 | follow_redirects=args.follow_redirects,
246 | timeout=args.timeout,
247 | settings=settings,
248 | allowed_domains=args.allowed_domains,
249 | )
250 |
251 | if args.transport == "sse":
252 | print()
253 | print(SPLASH)
254 | print()
255 |
256 | print(
257 | f"Launching MCPDOC server with {len(doc_sources)} doc sources",
258 | )
259 |
260 | # Pass transport-specific options
261 | server.run(transport=args.transport)
262 |
263 |
264 | if __name__ == "__main__":
265 | main()
266 |
```
--------------------------------------------------------------------------------
/mcpdoc/main.py:
--------------------------------------------------------------------------------
```python
1 | """MCP Llms-txt server for docs."""
2 |
3 | import os
4 | import re
5 | from urllib.parse import urlparse, urljoin
6 |
7 | import httpx
8 | from markdownify import markdownify
9 | from mcp.server.fastmcp import FastMCP
10 | from typing_extensions import NotRequired, TypedDict
11 |
12 |
13 | class DocSource(TypedDict):
14 | """A source of documentation for a library or a package."""
15 |
16 | name: NotRequired[str]
17 | """Name of the documentation source (optional)."""
18 |
19 | llms_txt: str
20 | """URL to the llms.txt file or documentation source."""
21 |
22 | description: NotRequired[str]
23 | """Description of the documentation source (optional)."""
24 |
25 |
26 | def extract_domain(url: str) -> str:
27 | """Extract domain from URL.
28 |
29 | Args:
30 | url: Full URL
31 |
32 | Returns:
33 | Domain with scheme and trailing slash (e.g., https://example.com/)
34 | """
35 | parsed = urlparse(url)
36 | return f"{parsed.scheme}://{parsed.netloc}/"
37 |
38 |
39 | def _is_http_or_https(url: str) -> bool:
40 | """Check if the URL is an HTTP or HTTPS URL."""
41 | return url.startswith(("http:", "https:"))
42 |
43 |
44 | def _get_fetch_description(has_local_sources: bool) -> str:
45 | """Get fetch docs tool description."""
46 | description = [
47 | "Fetch and parse documentation from a given URL or local file.",
48 | "",
49 | "Use this tool after list_doc_sources to:",
50 | "1. First fetch the llms.txt file from a documentation source",
51 | "2. Analyze the URLs listed in the llms.txt file",
52 | "3. Then fetch specific documentation pages relevant to the user's question",
53 | "",
54 | ]
55 |
56 | if has_local_sources:
57 | description.extend(
58 | [
59 | "Args:",
60 | " url: The URL or file path to fetch documentation from. Can be:",
61 | " - URL from an allowed domain",
62 | " - A local file path (absolute or relative)",
63 | " - A file:// URL (e.g., file:///path/to/llms.txt)",
64 | ]
65 | )
66 | else:
67 | description.extend(
68 | [
69 | "Args:",
70 | " url: The URL to fetch documentation from.",
71 | ]
72 | )
73 |
74 | description.extend(
75 | [
76 | "",
77 | "Returns:",
78 | " The fetched documentation content converted to markdown, or an error message", # noqa: E501
79 | " if the request fails or the URL is not from an allowed domain.",
80 | ]
81 | )
82 |
83 | return "\n".join(description)
84 |
85 |
86 | def _normalize_path(path: str) -> str:
87 | """Accept paths in file:/// or relative format and map to absolute paths."""
88 | return (
89 | os.path.abspath(path[7:])
90 | if path.startswith("file://")
91 | else os.path.abspath(path)
92 | )
93 |
94 |
95 | def _get_server_instructions(doc_sources: list[DocSource]) -> str:
96 | """Generate server instructions with available documentation source names."""
97 | # Extract source names from doc_sources
98 | source_names = []
99 | for entry in doc_sources:
100 | if "name" in entry:
101 | source_names.append(entry["name"])
102 | elif _is_http_or_https(entry["llms_txt"]):
103 | # Use domain name as fallback for HTTP sources
104 | domain = extract_domain(entry["llms_txt"])
105 | source_names.append(domain.rstrip("/").split("//")[-1])
106 | else:
107 | # Use filename as fallback for local sources
108 | source_names.append(os.path.basename(entry["llms_txt"]))
109 |
110 | instructions = [
111 | "Use the list_doc_sources tool to see available documentation sources.",
112 | "This tool will return a URL for each documentation source.",
113 | ]
114 |
115 | if source_names:
116 | if len(source_names) == 1:
117 | instructions.append(
118 | f"Documentation URLs are available from this tool "
119 | f"for {source_names[0]}."
120 | )
121 | else:
122 | names_str = ", ".join(source_names[:-1]) + f", and {source_names[-1]}"
123 | instructions.append(
124 | f"Documentation URLs are available from this tool for {names_str}."
125 | )
126 |
127 | instructions.extend(
128 | [
129 | "",
130 | "Once you have a source documentation URL, use the fetch_docs tool "
131 | "to get the documentation contents. ",
132 | "If the documentation contents contains a URL for additional documentation "
133 | "that is relevant to your task, you can use the fetch_docs tool to "
134 | "fetch documentation from that URL next.",
135 | ]
136 | )
137 |
138 | return "\n".join(instructions)
139 |
140 |
141 | def create_server(
142 | doc_sources: list[DocSource],
143 | *,
144 | follow_redirects: bool = False,
145 | timeout: float = 10,
146 | settings: dict | None = None,
147 | allowed_domains: list[str] | None = None,
148 | ) -> FastMCP:
149 | """Create the server and generate documentation retrieval tools.
150 |
151 | Args:
152 | doc_sources: List of documentation sources to make available
153 | follow_redirects: Whether to follow HTTP redirects when fetching docs
154 | timeout: HTTP request timeout in seconds
155 | settings: Additional settings to pass to FastMCP
156 | allowed_domains: Additional domains to allow fetching from.
157 | Use ['*'] to allow all domains
158 | The domain hosting the llms.txt file is always appended to the list
159 | of allowed domains.
160 |
161 | Returns:
162 | A FastMCP server instance configured with documentation tools
163 | """
164 | settings = settings or {}
165 | server = FastMCP(
166 | name="llms-txt",
167 | instructions=_get_server_instructions(doc_sources),
168 | **settings,
169 | )
170 | httpx_client = httpx.AsyncClient(follow_redirects=follow_redirects, timeout=timeout)
171 |
172 | local_sources = []
173 | remote_sources = []
174 |
175 | for entry in doc_sources:
176 | url = entry["llms_txt"]
177 | if _is_http_or_https(url):
178 | remote_sources.append(entry)
179 | else:
180 | local_sources.append(entry)
181 |
182 | # Let's verify that all local sources exist
183 | for entry in local_sources:
184 | path = entry["llms_txt"]
185 | abs_path = _normalize_path(path)
186 | if not os.path.exists(abs_path):
187 | raise FileNotFoundError(f"Local file not found: {abs_path}")
188 |
189 | # Parse the domain names in the llms.txt URLs and identify local file paths
190 | domains = set(extract_domain(entry["llms_txt"]) for entry in remote_sources)
191 |
192 | # Add additional allowed domains if specified, or set to '*' if we have local files
193 | if allowed_domains:
194 | if "*" in allowed_domains:
195 | domains = {"*"} # Special marker for allowing all domains
196 | else:
197 | domains.update(allowed_domains)
198 |
199 | allowed_local_files = set(
200 | _normalize_path(entry["llms_txt"]) for entry in local_sources
201 | )
202 |
203 | @server.tool()
204 | def list_doc_sources() -> str:
205 | """List all available documentation sources.
206 |
207 | This is the first tool you should call in the documentation workflow.
208 | It provides URLs to llms.txt files or local file paths that the user has made available.
209 |
210 | Returns:
211 | A string containing a formatted list of documentation sources with their URLs or file paths
212 | """
213 | content = ""
214 | for entry_ in doc_sources:
215 | url_or_path = entry_["llms_txt"]
216 |
217 | if _is_http_or_https(url_or_path):
218 | name = entry_.get("name", extract_domain(url_or_path))
219 | content += f"{name}\nURL: {url_or_path}\n\n"
220 | else:
221 | path = _normalize_path(url_or_path)
222 | name = entry_.get("name", path)
223 | content += f"{name}\nPath: {path}\n\n"
224 | return content
225 |
226 | fetch_docs_description = _get_fetch_description(
227 | has_local_sources=bool(local_sources)
228 | )
229 |
230 | @server.tool(description=fetch_docs_description)
231 | async def fetch_docs(url: str) -> str:
232 | nonlocal domains, follow_redirects
233 | url = url.strip()
234 | # Handle local file paths (either as file:// URLs or direct filesystem paths)
235 | if not _is_http_or_https(url):
236 | abs_path = _normalize_path(url)
237 | if abs_path not in allowed_local_files:
238 | raise ValueError(
239 | f"Local file not allowed: {abs_path}. Allowed files: {allowed_local_files}"
240 | )
241 | try:
242 | with open(abs_path, "r", encoding="utf-8") as f:
243 | content = f.read()
244 | return markdownify(content)
245 | except Exception as e:
246 | return f"Error reading local file: {str(e)}"
247 | else:
248 | # Otherwise treat as URL
249 | if "*" not in domains and not any(
250 | url.startswith(domain) for domain in domains
251 | ):
252 | return (
253 | "Error: URL not allowed. Must start with one of the following domains: "
254 | + ", ".join(domains)
255 | )
256 |
257 | try:
258 | response = await httpx_client.get(url, timeout=timeout)
259 | response.raise_for_status()
260 | content = response.text
261 |
262 | if follow_redirects:
263 | # Check for meta refresh tag which indicates a client-side redirect
264 | match = re.search(
265 | r'<meta http-equiv="refresh" content="[^;]+;\s*url=([^"]+)"',
266 | content,
267 | re.IGNORECASE,
268 | )
269 |
270 | if match:
271 | redirect_url = match.group(1)
272 | new_url = urljoin(str(response.url), redirect_url)
273 |
274 | if "*" not in domains and not any(
275 | new_url.startswith(domain) for domain in domains
276 | ):
277 | return (
278 | "Error: Redirect URL not allowed. Must start with one of the following domains: "
279 | + ", ".join(domains)
280 | )
281 |
282 | response = await httpx_client.get(new_url, timeout=timeout)
283 | response.raise_for_status()
284 | content = response.text
285 |
286 | return markdownify(content)
287 | except (httpx.HTTPStatusError, httpx.RequestError) as e:
288 | return f"Encountered an HTTP error: {str(e)}"
289 |
290 | return server
291 |
```