# Directory Structure
```
├── .gitignore
├── .python-version
├── Dockerfile
├── LICENSE
├── pyproject.toml
├── README.md
├── smithery.yaml
├── src
│ └── mcp_paperswithcode
│ ├── __init__.py
│ └── server.py
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
```
1 | 3.10
2 |
```
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | # Python-generated files
2 | __pycache__/
3 | *.py[oc]
4 | build/
5 | dist/
6 | wheels/
7 | *.egg-info
8 |
9 | # Virtual environments
10 | .venv
11 |
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # mcp-paperswithcode
2 |
3 | [](https://smithery.ai/server/@hbg/mcp-paperswithcode)
4 |
5 | # 🦾 Features
6 |
7 | > Allows AI assistants to find and read papers, as well as view related code repositories for further context.
8 |
9 | This MCP server provides a Model Context Protocol (MCP) client that interfaces with the PapersWithCode API.
10 |
11 | It includes tools for searching, retrieving, and parsing information on research papers, authors, datasets, conferences, and more.
12 |
13 | # 🚀 Getting Started
14 |
15 | ### Installing via Smithery
16 |
17 | To install mcp-paperswithcode for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@hbg/mcp-paperswithcode):
18 |
19 | ```bash
20 | npx -y @smithery/cli install @hbg/mcp-paperswithcode --client claude
21 | ```
22 |
23 | # 🛠️ Tools
24 |
25 | ## 📚 Paper Tools
26 |
27 | ### `search_papers`
28 | Search for papers using optional filters.
29 |
30 | - `abstract` (str, optional): Filter by abstract text.
31 | - `title` (str, optional): Filter by title text.
32 | - `arxiv_id` (str, optional): Filter by ArXiv ID.
33 |
34 | ### `get_paper`
35 | Get a paper's metadata by its ID.
36 |
37 | - `paper_id` (str): The paper ID.
38 |
39 | ### `read_paper_from_url`
40 | Extract readable text from a paper given its URL.
41 |
42 | - `paper_url` (str): The direct PDF or HTML URL to a paper.
43 |
44 | ### `list_paper_results`
45 | List benchmark results associated with a paper.
46 |
47 | - `paper_id` (str): The paper ID.
48 |
49 | ### `list_paper_tasks`
50 | List tasks associated with a paper.
51 |
52 | - `paper_id` (str): The paper ID.
53 |
54 | ### `list_paper_methods`
55 | List methods discussed in a paper.
56 |
57 | - `paper_id` (str): The paper ID.
58 |
59 | ### `list_paper_repositories`
60 | List code repositories linked to a paper.
61 |
62 | - `paper_id` (str): The paper ID.
63 |
64 | ### `list_paper_datasets`
65 | List datasets mentioned or used in a paper.
66 |
67 | - `paper_id` (str): The paper ID.
68 |
69 | ## 🧠 Research Area Tools
70 |
71 | ### `search_research_areas`
72 | Search research areas by name.
73 |
74 | - `name` (str): Partial or full name of the research area.
75 |
76 | ### `get_research_area`
77 | Get metadata for a specific research area.
78 |
79 | - `area_id` (str): The area ID.
80 |
81 | ### `list_research_area_tasks`
82 | List tasks associated with a research area.
83 |
84 | - `area_id` (str): The area ID.
85 |
86 | ## 👨🔬 Author Tools
87 |
88 | ### `search_authors`
89 | Search authors by full name.
90 |
91 | - `full_name` (str): Full name of the author.
92 |
93 | ### `get_paper_author`
94 | Get metadata for an author by ID.
95 |
96 | - `author_id` (str): The author ID.
97 |
98 | ### `list_papers_by_author_id`
99 | List all papers written by an author via ID.
100 |
101 | - `author_id` (str): The author ID.
102 |
103 | ### `list_papers_by_author_name`
104 | Search by name and return papers for the first matching author.
105 |
106 | - `author_name` (str): Full name of the author.
107 |
108 | ## 🎓 Conference Tools
109 |
110 | ### `list_conferences`
111 | List conferences, optionally filter by name.
112 |
113 | - `conference_name` (str, optional): Full or partial name.
114 |
115 | ### `get_conference`
116 | Get metadata for a specific conference.
117 |
118 | - `conference_id` (str): The conference ID.
119 |
120 | ### `list_conference_proceedings`
121 | List all proceedings under a conference.
122 |
123 | - `conference_id` (str): The conference ID.
124 |
125 | ### `get_conference_proceeding`
126 | Get details for a specific conference proceeding.
127 |
128 | - `conference_id` (str): The conference ID.
129 | - `proceeding_id` (str): The proceeding ID.
130 |
131 | ### `list_conference_papers`
132 | List all papers for a specific conference proceeding.
133 |
134 | - `conference_id` (str): The conference ID.
135 | - `proceeding_id` (str): The proceeding ID.
136 |
```
--------------------------------------------------------------------------------
/src/mcp_paperswithcode/__init__.py:
--------------------------------------------------------------------------------
```python
1 |
```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
```toml
1 | [project]
2 | name = "mcp-paperswithcode"
3 | version = "0.1.0"
4 | description = "Add your description here"
5 | readme = "README.md"
6 | requires-python = ">=3.10"
7 | dependencies = [
8 | "mcp[cli]>=1.4.1",
9 | "pypdf2>=3.0.1",
10 | "requests>=2.32.3",
11 | ]
12 |
```
--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
```yaml
1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
2 |
3 | startCommand:
4 | type: stdio
5 | configSchema:
6 | # JSON Schema defining the configuration options for the MCP.
7 | type: object
8 | properties: {}
9 | commandFunction:
10 | # A JS function that produces the CLI command based on the given config to start the MCP on stdio.
11 | |-
12 | (config) => ({ command: 'python', args: ['src/mcp_paperswithcode/server.py'] })
13 | exampleConfig: {}
14 |
```
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
```dockerfile
1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
2 | FROM python:3.10-alpine
3 |
4 | # Install build dependencies
5 | RUN apk add --no-cache build-base libffi-dev openssl-dev
6 |
7 | WORKDIR /app
8 |
9 | # Copy project files
10 | COPY . .
11 |
12 | # Install dependencies
13 | RUN pip install --no-cache-dir "mcp[cli]>=1.4.1" "pypdf2>=3.0.1" "requests>=2.32.3" "httpx>=0.23.0"
14 |
15 | # Expose port if needed (optional)
16 | # EXPOSE 8000
17 |
18 | CMD ["python", "src/mcp_paperswithcode/server.py"]
19 |
```
--------------------------------------------------------------------------------
/src/mcp_paperswithcode/server.py:
--------------------------------------------------------------------------------
```python
1 | """Main MCP server for PapersWithCode"""
2 | import io
3 | from urllib.parse import urlencode
4 | import httpx
5 | import requests
6 | from mcp.server.fastmcp import FastMCP
7 | from PyPDF2 import PdfReader
8 |
9 | mcp = FastMCP("Papers With Code MCP Interface")
10 | BASE_URL = "https://paperswithcode.com/api/v1"
11 |
12 | def encode_non_null_params(params: dict) -> str:
13 | """Encode non-null URL parameters for the API"""
14 | if params:
15 | updated_params = {k: v for k, v in params.items() if v is not None}
16 | return urlencode(updated_params)
17 | return ""
18 |
19 | async def get_all_results(url: str) -> list:
20 | """Helper function to fetch all paginated results from a PapersWithCode endpoint"""
21 | all_results = []
22 | while url:
23 | async with httpx.AsyncClient() as client:
24 | response = await client.get(url)
25 | data = response.json()
26 | all_results.extend(data.get("results", []))
27 | url = data.get("next")
28 | return all_results
29 |
30 | @mcp.tool()
31 | async def search_research_areas(name: str) -> dict:
32 | """Search for research areas that exist in PapersWithCode"""
33 | params = {"name": name}
34 | url = f"{BASE_URL}/areas/?{encode_non_null_params(params)}"
35 | results = await get_all_results(url)
36 | return {"results": results}
37 |
38 | @mcp.tool()
39 | async def get_research_area(area_id: str) -> dict:
40 | """Get a research area by ID in PapersWithCode"""
41 | url = f"{BASE_URL}/areas/{area_id}/"
42 | async with httpx.AsyncClient() as client:
43 | response = await client.get(url)
44 | return response.json()
45 |
46 | @mcp.tool()
47 | async def list_research_area_tasks(area_id: str) -> dict:
48 | """List the tasks for a given research area ID in PapersWithCode"""
49 | params = {"area": area_id}
50 | url = f"{BASE_URL}/tasks/?{encode_non_null_params(params)}"
51 | results = await get_all_results(url)
52 | return {"results": results}
53 |
54 | @mcp.tool()
55 | async def search_authors(full_name: str) -> dict:
56 | """Search for authors by name in PapersWithCode"""
57 | params = {"full_name": full_name}
58 | url = f"{BASE_URL}/authors/?{encode_non_null_params(params)}"
59 | results = await get_all_results(url)
60 | return {"results": results}
61 |
62 | @mcp.tool()
63 | async def get_paper_author(author_id: str) -> dict:
64 | """Get a paper author by ID in PapersWithCode"""
65 | url = f"{BASE_URL}/authors/{author_id}/"
66 | async with httpx.AsyncClient() as client:
67 | response = await client.get(url)
68 | return response.json()
69 |
70 | @mcp.tool()
71 | async def list_papers_by_author_id(author_id: str) -> dict:
72 | """List the papers for a given author ID in PapersWithCode"""
73 | url = f"{BASE_URL}/authors/{author_id}/papers/"
74 | results = await get_all_results(url)
75 | return {"results": results}
76 |
77 | @mcp.tool()
78 | async def list_papers_by_author_name(author_name: str) -> dict:
79 | """List the papers written by a given author name in PapersWithCode"""
80 | authors_response = await search_authors(author_name)
81 | if not authors_response.get("results"):
82 | return {"results": []}
83 | author_id = authors_response["results"][0]["id"]
84 | return await list_papers_by_author_id(author_id)
85 |
86 | @mcp.tool()
87 | async def list_conferences(conference_name: str | None = None) -> dict:
88 | """List the conferences in PapersWithCode"""
89 | params = {"name": conference_name}
90 | url = f"{BASE_URL}/conferences/?{encode_non_null_params(params)}"
91 | results = await get_all_results(url)
92 | return {"results": results}
93 |
94 | @mcp.tool()
95 | async def get_conference(conference_id: str) -> dict:
96 | """Get a conference by ID in PapersWithCode"""
97 | url = f"{BASE_URL}/conferences/{conference_id}/"
98 | async with httpx.AsyncClient() as client:
99 | response = await client.get(url)
100 | return response.json()
101 |
102 | @mcp.tool()
103 | async def list_conference_proceedings(conference_id: str) -> dict:
104 | """List the proceedings for a given conference ID in PapersWithCode"""
105 | url = f"{BASE_URL}/conferences/{conference_id}/proceedings/"
106 | results = await get_all_results(url)
107 | return {"results": results}
108 |
109 | @mcp.tool()
110 | async def get_conference_proceeding(conference_id: str, proceeding_id: str) -> dict:
111 | """Get a proceeding by ID in PapersWithCode"""
112 | url = f"{BASE_URL}/conferences/{conference_id}/proceedings/{proceeding_id}/"
113 | async with httpx.AsyncClient() as client:
114 | response = await client.get(url)
115 | return response.json()
116 |
117 | @mcp.tool()
118 | async def list_conference_papers(conference_id: str, proceeding_id: str) -> dict:
119 | """List the papers for a given conference ID and proceeding ID in PapersWithCode"""
120 | url = f"{BASE_URL}/conferences/{conference_id}/proceedings/{proceeding_id}/papers/"
121 | results = await get_all_results(url)
122 | return {"results": results}
123 |
124 | @mcp.tool()
125 | async def search_papers(abstract: str | None = None, title: str | None = None, arxiv_id: str | None = None) -> dict:
126 | """Search for a paper in PapersWithCode"""
127 | params = {"abstract": abstract, "title": title, "arxiv_id": arxiv_id}
128 | url = f"{BASE_URL}/papers/?{encode_non_null_params(params)}"
129 | results = await get_all_results(url)
130 | return {"results": results}
131 |
132 | @mcp.tool()
133 | async def get_paper(paper_id: str) -> dict:
134 | """Get a paper by ID in PapersWithCode"""
135 | url = f"{BASE_URL}/papers/{paper_id}/"
136 | async with httpx.AsyncClient() as client:
137 | response = await client.get(url)
138 | return response.json()
139 |
140 | @mcp.tool()
141 | async def list_paper_repositories(paper_id: str) -> dict:
142 | """List the repositories for a given paper ID in PapersWithCode"""
143 | url = f"{BASE_URL}/papers/{paper_id}/repositories/"
144 | results = await get_all_results(url)
145 | return {"results": results}
146 |
147 | @mcp.tool()
148 | async def list_paper_datasets(paper_id: str) -> dict:
149 | """List the datasets for a given paper ID in PapersWithCode"""
150 | url = f"{BASE_URL}/papers/{paper_id}/datasets/"
151 | results = await get_all_results(url)
152 | return {"results": results}
153 |
154 | @mcp.tool()
155 | async def list_paper_methods(paper_id: str) -> dict:
156 | """List the methods for a given paper ID in PapersWithCode"""
157 | url = f"{BASE_URL}/papers/{paper_id}/methods/"
158 | results = await get_all_results(url)
159 | return {"results": results}
160 |
161 | @mcp.tool()
162 | async def list_paper_results(paper_id: str) -> dict:
163 | """List the results for a given paper ID in PapersWithCode"""
164 | url = f"{BASE_URL}/papers/{paper_id}/results/"
165 | results = await get_all_results(url)
166 | return {"results": results}
167 |
168 | @mcp.tool()
169 | async def list_paper_tasks(paper_id: str) -> dict:
170 | """List the tasks for a given paper ID in PapersWithCode"""
171 | url = f"{BASE_URL}/papers/{paper_id}/tasks/"
172 | results = await get_all_results(url)
173 | return {"results": results}
174 |
175 | @mcp.tool()
176 | async def read_paper_from_url(paper_url: str) -> dict:
177 | """Explain a paper by URL in PapersWithCode"""
178 | try:
179 | response = requests.get(paper_url)
180 | if response.headers.get('content-type') == 'application/pdf':
181 | pdf_content = io.BytesIO(response.content)
182 | reader = PdfReader(pdf_content)
183 | text = ""
184 | for page in reader.pages:
185 | text += page.extract_text()
186 | return {"text": text, "type": "pdf"}
187 | else:
188 | return {"text": response.text, "type": "html"}
189 | except Exception as e:
190 | return {"error": str(e), "type": "error"}
191 |
192 | if __name__ == "__main__":
193 | mcp.run()
194 |
```