This is page 1 of 2. Use http://codebase.md/yuzongmin/semantic-scholar-fastmcp-mcp-server?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── REFACTORING.md ├── requirements.txt ├── run.py ├── semantic_scholar │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── authors.py │ │ ├── papers.py │ │ └── recommendations.py │ ├── config.py │ ├── mcp.py │ ├── server.py │ └── utils │ ├── __init__.py │ ├── errors.py │ └── http.py ├── semantic_scholar_server.py ├── smithery.yaml ├── test │ ├── __init__.py │ ├── test_author.py │ ├── test_paper.py │ ├── test_recommend.py │ └── test_utils.py └── TOOLS.md ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | 176 | # Data files 177 | *.npy 178 | *.npz 179 | *.mat 180 | *.pkl 181 | 182 | # Checkpoint files 183 | _METADATA 184 | _CHECKPOINT_METADATA 185 | 186 | # Experimental results 187 | experimental_results/ 188 | saved_models/ 189 | 190 | # VS Code 191 | .vscode/ 192 | 193 | # macOS 194 | .DS_Store 195 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | # Semantic Scholar MCP Server 2 | 3 | [](https://smithery.ai/server/semantic-scholar-fastmcp-mcp-server) 4 | 5 | A FastMCP server implementation for the Semantic Scholar API, providing comprehensive access to academic paper data, author information, and citation networks. 6 | 7 | ## Project Structure 8 | 9 | The project has been refactored into a modular structure for better maintainability: 10 | 11 | ``` 12 | semantic-scholar-server/ 13 | ├── semantic_scholar/ # Main package 14 | │ ├── __init__.py # Package initialization 15 | │ ├── server.py # Server setup and main functionality 16 | │ ├── mcp.py # Centralized FastMCP instance definition 17 | │ ├── config.py # Configuration classes 18 | │ ├── utils/ # Utility modules 19 | │ │ ├── __init__.py 20 | │ │ ├── errors.py # Error handling 21 | │ │ └── http.py # HTTP client and rate limiting 22 | │ ├── api/ # API endpoints 23 | │ ├── __init__.py 24 | │ ├── papers.py # Paper-related endpoints 25 | │ ├── authors.py # Author-related endpoints 26 | │ └── recommendations.py # Recommendation endpoints 27 | ├── run.py # Entry point script 28 | ``` 29 | 30 | This structure: 31 | 32 | - Separates concerns into logical modules 33 | - Makes the codebase easier to understand and maintain 34 | - Allows for better testing and future extensions 35 | - Keeps related functionality grouped together 36 | - Centralizes the FastMCP instance to avoid circular imports 37 | 38 | ## Features 39 | 40 | - **Paper Search & Discovery** 41 | 42 | - Full-text search with advanced filtering 43 | - Title-based paper matching 44 | - Paper recommendations (single and multi-paper) 45 | - Batch paper details retrieval 46 | - Advanced search with ranking strategies 47 | 48 | - **Citation Analysis** 49 | 50 | - Citation network exploration 51 | - Reference tracking 52 | - Citation context and influence analysis 53 | 54 | - **Author Information** 55 | 56 | - Author search and profile details 57 | - Publication history 58 | - Batch author details retrieval 59 | 60 | - **Advanced Features** 61 | - Complex search with multiple ranking strategies 62 | - Customizable field selection 63 | - Efficient batch operations 64 | - Rate limiting compliance 65 | - Support for both authenticated and unauthenticated access 66 | - Graceful shutdown and error handling 67 | - Connection pooling and resource management 68 | 69 | ## System Requirements 70 | 71 | - Python 3.8+ 72 | - FastMCP framework 73 | - Environment variable for API key (optional) 74 | 75 | ## Installation 76 | 77 | ### Installing via Smithery 78 | 79 | To install Semantic Scholar MCP Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/semantic-scholar-fastmcp-mcp-server): 80 | 81 | ```bash 82 | npx -y @smithery/cli install semantic-scholar-fastmcp-mcp-server --client claude 83 | ``` 84 | 85 | ### Manual Installation 86 | 87 | 1. Clone the repository: 88 | 89 | ```bash 90 | git clone https://github.com/YUZongmin/semantic-scholar-fastmcp-mcp-server.git 91 | cd semantic-scholar-server 92 | ``` 93 | 94 | 2. Install FastMCP and other dependencies following: https://github.com/jlowin/fastmcp 95 | 96 | 3. Configure FastMCP: 97 | 98 | For Claude Desktop users, you'll need to configure the server in your FastMCP configuration file. Add the following to your configuration (typically in `~/.config/claude-desktop/config.json`): 99 | 100 | ```json 101 | { 102 | "mcps": { 103 | "Semantic Scholar Server": { 104 | "command": "/path/to/your/venv/bin/fastmcp", 105 | "args": [ 106 | "run", 107 | "/path/to/your/semantic-scholar-server/run.py" 108 | ], 109 | "env": { 110 | "SEMANTIC_SCHOLAR_API_KEY": "your-api-key-here" # Optional 111 | } 112 | } 113 | } 114 | } 115 | ``` 116 | 117 | Make sure to: 118 | 119 | - Replace `/path/to/your/venv/bin/fastmcp` with the actual path to your FastMCP installation 120 | - Replace `/path/to/your/semantic-scholar-server/run.py` with the actual path to run.py on your machine 121 | - If you have a Semantic Scholar API key, add it to the `env` section. If not, you can remove the `env` section entirely 122 | 123 | 4. Start using the server: 124 | 125 | The server will now be available to your Claude Desktop instance. No need to manually run any commands - Claude will automatically start and manage the server process when needed. 126 | 127 | ### API Key (Optional) 128 | 129 | To get higher rate limits and better performance: 130 | 131 | 1. Get an API key from [Semantic Scholar API](https://www.semanticscholar.org/product/api) 132 | 2. Add it to your FastMCP configuration as shown above in the `env` section 133 | 134 | If no API key is provided, the server will use unauthenticated access with lower rate limits. 135 | 136 | ## Configuration 137 | 138 | ### Environment Variables 139 | 140 | - `SEMANTIC_SCHOLAR_API_KEY`: Your Semantic Scholar API key (optional) 141 | - Get your key from [Semantic Scholar API](https://www.semanticscholar.org/product/api) 142 | - If not provided, the server will use unauthenticated access 143 | 144 | ### Rate Limits 145 | 146 | The server automatically adjusts to the appropriate rate limits: 147 | 148 | **With API Key**: 149 | 150 | - Search, batch and recommendation endpoints: 1 request per second 151 | - Other endpoints: 10 requests per second 152 | 153 | **Without API Key**: 154 | 155 | - All endpoints: 100 requests per 5 minutes 156 | - Longer timeouts for requests 157 | 158 | ## Available MCP Tools 159 | 160 | > Note: All tools are aligned with the official [Semantic Scholar API documentation](https://api.semanticscholar.org/api-docs/). Please refer to the official documentation for detailed field specifications and the latest updates. 161 | 162 | ### Paper Search Tools 163 | 164 | - `paper_relevance_search`: Search for papers using relevance ranking 165 | 166 | - Supports comprehensive query parameters including year range and citation count filters 167 | - Returns paginated results with customizable fields 168 | 169 | - `paper_bulk_search`: Bulk paper search with sorting options 170 | 171 | - Similar to relevance search but optimized for larger result sets 172 | - Supports sorting by citation count, publication date, etc. 173 | 174 | - `paper_title_search`: Find papers by exact title match 175 | 176 | - Useful for finding specific papers when you know the title 177 | - Returns detailed paper information with customizable fields 178 | 179 | - `paper_details`: Get comprehensive details about a specific paper 180 | 181 | - Accepts various paper ID formats (S2 ID, DOI, ArXiv, etc.) 182 | - Returns detailed paper metadata with nested field support 183 | 184 | - `paper_batch_details`: Efficiently retrieve details for multiple papers 185 | - Accepts up to 1000 paper IDs per request 186 | - Supports the same ID formats and fields as single paper details 187 | 188 | ### Citation Tools 189 | 190 | - `paper_citations`: Get papers that cite a specific paper 191 | 192 | - Returns paginated list of citing papers 193 | - Includes citation context when available 194 | - Supports field customization and sorting 195 | 196 | - `paper_references`: Get papers referenced by a specific paper 197 | - Returns paginated list of referenced papers 198 | - Includes reference context when available 199 | - Supports field customization and sorting 200 | 201 | ### Author Tools 202 | 203 | - `author_search`: Search for authors by name 204 | 205 | - Returns paginated results with customizable fields 206 | - Includes affiliations and publication counts 207 | 208 | - `author_details`: Get detailed information about an author 209 | 210 | - Returns comprehensive author metadata 211 | - Includes metrics like h-index and citation counts 212 | 213 | - `author_papers`: Get papers written by an author 214 | 215 | - Returns paginated list of author's publications 216 | - Supports field customization and sorting 217 | 218 | - `author_batch_details`: Get details for multiple authors 219 | - Efficiently retrieve information for up to 1000 authors 220 | - Returns the same fields as single author details 221 | 222 | ### Recommendation Tools 223 | 224 | - `paper_recommendations_single`: Get recommendations based on a single paper 225 | 226 | - Returns similar papers based on content and citation patterns 227 | - Supports field customization for recommended papers 228 | 229 | - `paper_recommendations_multi`: Get recommendations based on multiple papers 230 | - Accepts positive and negative example papers 231 | - Returns papers similar to positive examples and dissimilar to negative ones 232 | 233 | ## Usage Examples 234 | 235 | ### Basic Paper Search 236 | 237 | ```python 238 | results = await paper_relevance_search( 239 | context, 240 | query="machine learning", 241 | year="2020-2024", 242 | min_citation_count=50, 243 | fields=["title", "abstract", "authors"] 244 | ) 245 | ``` 246 | 247 | ### Paper Recommendations 248 | 249 | ```python 250 | # Single paper recommendation 251 | recommendations = await paper_recommendations_single( 252 | context, 253 | paper_id="649def34f8be52c8b66281af98ae884c09aef38b", 254 | fields="title,authors,year" 255 | ) 256 | 257 | # Multi-paper recommendation 258 | recommendations = await paper_recommendations_multi( 259 | context, 260 | positive_paper_ids=["649def34f8be52c8b66281af98ae884c09aef38b", "ARXIV:2106.15928"], 261 | negative_paper_ids=["ArXiv:1805.02262"], 262 | fields="title,abstract,authors" 263 | ) 264 | ``` 265 | 266 | ### Batch Operations 267 | 268 | ```python 269 | # Get details for multiple papers 270 | papers = await paper_batch_details( 271 | context, 272 | paper_ids=["649def34f8be52c8b66281af98ae884c09aef38b", "ARXIV:2106.15928"], 273 | fields="title,authors,year,citations" 274 | ) 275 | 276 | # Get details for multiple authors 277 | authors = await author_batch_details( 278 | context, 279 | author_ids=["1741101", "1780531"], 280 | fields="name,hIndex,citationCount,paperCount" 281 | ) 282 | ``` 283 | 284 | ## Error Handling 285 | 286 | The server provides standardized error responses: 287 | 288 | ```python 289 | { 290 | "error": { 291 | "type": "error_type", # rate_limit, api_error, validation, timeout 292 | "message": "Error description", 293 | "details": { 294 | # Additional context 295 | "authenticated": true/false # Indicates if request was authenticated 296 | } 297 | } 298 | } 299 | ``` 300 | ``` -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """Test package for semantic-scholar-server""" ``` -------------------------------------------------------------------------------- /semantic_scholar/utils/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Utility modules for the Semantic Scholar API Server. 3 | """ ``` -------------------------------------------------------------------------------- /semantic_scholar/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Semantic Scholar API Server Package 3 | 4 | A FastMCP-based server for accessing the Semantic Scholar Academic Graph API. 5 | """ 6 | 7 | __version__ = "0.1.0" ``` -------------------------------------------------------------------------------- /semantic_scholar/mcp.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Central definition of the FastMCP instance. 3 | """ 4 | 5 | from fastmcp import FastMCP 6 | 7 | # Create FastMCP instance 8 | mcp = FastMCP("Semantic Scholar Server") ``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- ``` 1 | # HTTP client 2 | httpx>=0.24.0 3 | 4 | # Testing 5 | pytest>=7.3.1 6 | pytest-asyncio>=0.21.0 7 | 8 | # Environment 9 | python-dotenv>=1.0.0 10 | 11 | # Server dependencies 12 | uvicorn>=0.27.1 13 | fastmcp>=0.1.0 ``` -------------------------------------------------------------------------------- /semantic_scholar/api/__init__.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | API endpoints for the Semantic Scholar API Server. 3 | """ 4 | 5 | # Import all endpoints to make them available when importing the package 6 | from .papers import ( 7 | paper_relevance_search, 8 | paper_bulk_search, 9 | paper_title_search, 10 | paper_details, 11 | paper_batch_details, 12 | paper_authors, 13 | paper_citations, 14 | paper_references 15 | ) 16 | 17 | from .authors import ( 18 | author_search, 19 | author_details, 20 | author_papers, 21 | author_batch_details 22 | ) 23 | 24 | from .recommendations import ( 25 | get_paper_recommendations_single, 26 | get_paper_recommendations_multi 27 | ) ``` -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- ```yaml 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml 2 | 3 | startCommand: 4 | type: stdio 5 | configSchema: 6 | # JSON Schema defining the configuration options for the MCP. 7 | type: object 8 | required: [] 9 | properties: 10 | semanticScholarApiKey: 11 | type: string 12 | description: The API key for the Semantic Scholar server. Optional for 13 | authenticated access. 14 | commandFunction: 15 | # A function that produces the CLI command to start the MCP on stdio. 16 | |- 17 | (config) => ({command:'python',args:['semantic_scholar_server.py'],env:{SEMANTIC_SCHOLAR_API_KEY:config.semanticScholarApiKey || ''}}) ``` -------------------------------------------------------------------------------- /semantic_scholar/utils/errors.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Error handling utilities for the Semantic Scholar API Server. 3 | """ 4 | 5 | from typing import Dict, Optional 6 | from ..config import ErrorType 7 | 8 | def create_error_response( 9 | error_type: ErrorType, 10 | message: str, 11 | details: Optional[Dict] = None 12 | ) -> Dict: 13 | """ 14 | Create a standardized error response. 15 | 16 | Args: 17 | error_type: The type of error that occurred. 18 | message: A human-readable message describing the error. 19 | details: Optional additional details about the error. 20 | 21 | Returns: 22 | A dictionary with the error information. 23 | """ 24 | return { 25 | "error": { 26 | "type": error_type.value, 27 | "message": message, 28 | "details": details or {} 29 | } 30 | } ``` -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """ 3 | Entry point script for the Semantic Scholar API Server. 4 | 5 | Available tools: 6 | - paper_relevance_search 7 | - paper_bulk_search 8 | - paper_title_search 9 | - paper_details 10 | - paper_batch_details 11 | - paper_authors 12 | - paper_citations 13 | - paper_references 14 | - author_search 15 | - author_details 16 | - author_papers 17 | - author_batch_details 18 | - get_paper_recommendations_single 19 | - get_paper_recommendations_multi 20 | """ 21 | 22 | # Import the mcp instance from centralized location 23 | from semantic_scholar.mcp import mcp 24 | # Import the main function from server 25 | from semantic_scholar.server import main 26 | 27 | # Import all API modules to ensure tools are registered 28 | from semantic_scholar.api import papers, authors, recommendations 29 | 30 | if __name__ == "__main__": 31 | main() ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | # Start from a base Python image 2 | FROM python:3.8-slim 3 | 4 | # Set the working directory 5 | WORKDIR /app 6 | 7 | # Copy the requirements file first to leverage Docker cache 8 | COPY requirements.txt /app/requirements.txt 9 | RUN pip install --no-cache-dir -r requirements.txt 10 | 11 | # Copy the rest of the application code 12 | # This includes the 'semantic_scholar' package and 'run.py' 13 | COPY . /app 14 | # Alternatively, be more specific: 15 | # COPY semantic_scholar /app/semantic_scholar 16 | # COPY run.py /app/run.py 17 | 18 | # Expose the port that the MCP server will run on 19 | EXPOSE 8000 20 | 21 | # Set the environment variable for the API key (placeholder) 22 | # Glama or the user should provide the actual key at runtime 23 | ENV SEMANTIC_SCHOLAR_API_KEY="" 24 | 25 | # Command to run the server using the refactored entry point 26 | CMD ["python", "run.py"] ``` -------------------------------------------------------------------------------- /test/test_recommend.py: -------------------------------------------------------------------------------- ```python 1 | import unittest 2 | import asyncio 3 | import os 4 | from typing import Optional, List, Dict 5 | 6 | from .test_utils import make_request, create_error_response, ErrorType, Config 7 | 8 | class TestRecommendationTools(unittest.TestCase): 9 | def setUp(self): 10 | """Set up test environment""" 11 | # API key is required for recommendations 12 | api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY") 13 | if not api_key: 14 | raise ValueError("SEMANTIC_SCHOLAR_API_KEY environment variable is required for recommendation tests") 15 | 16 | # Create event loop for async tests 17 | self.loop = asyncio.new_event_loop() 18 | asyncio.set_event_loop(self.loop) 19 | 20 | # Sample paper IDs for testing (using full IDs) 21 | self.sample_paper_id = "204e3073870fae3d05bcbc2f6a8e263d9b72e776" # "Attention is All You Need" 22 | self.positive_paper_ids = [ 23 | self.sample_paper_id, 24 | "df2b0e26d0599ce3e70df8a9da02e51594e0e992" # BERT 25 | ] 26 | self.negative_paper_ids = [ 27 | "649def34f8be52c8b66281af98ae884c09aef38b" # Different topic 28 | ] 29 | 30 | def tearDown(self): 31 | """Clean up after tests""" 32 | self.loop.close() 33 | 34 | def run_async(self, coro): 35 | """Helper to run async functions in tests""" 36 | return self.loop.run_until_complete(coro) 37 | 38 | async def async_test_with_delay(self, coro): 39 | """Helper to run async tests with delay to handle rate limiting""" 40 | await asyncio.sleep(1) # Add 1 second delay between tests 41 | return await coro 42 | 43 | def test_paper_recommendations_single(self): 44 | """Test single paper recommendations functionality""" 45 | result = self.run_async(self.async_test_with_delay(make_request( 46 | f"papers/forpaper/{self.sample_paper_id}", # Using full paper ID 47 | params={ 48 | "fields": "title,year" # Minimal fields 49 | } 50 | ))) 51 | self.assertIn("recommendedPapers", result) 52 | self.assertTrue(isinstance(result["recommendedPapers"], list)) 53 | 54 | def test_paper_recommendations_multi(self): 55 | """Test multi-paper recommendations functionality""" 56 | result = self.run_async(self.async_test_with_delay(make_request( 57 | "papers", # No leading slash 58 | method="POST", 59 | params={"fields": "title,year"}, # Minimal fields 60 | json={ 61 | "positivePaperIds": self.positive_paper_ids, # Changed key name to match API 62 | "negativePaperIds": self.negative_paper_ids 63 | } 64 | ))) 65 | self.assertIn("recommendedPapers", result) 66 | self.assertTrue(isinstance(result["recommendedPapers"], list)) 67 | 68 | if __name__ == '__main__': 69 | unittest.main() 70 | ``` -------------------------------------------------------------------------------- /semantic_scholar/server.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Main server module for the Semantic Scholar API Server. 3 | """ 4 | 5 | import logging 6 | import asyncio 7 | import signal 8 | 9 | # Import mcp from centralized location 10 | from .mcp import mcp 11 | from .utils.http import initialize_client, cleanup_client 12 | 13 | # Configure logging 14 | logging.basicConfig(level=logging.INFO) 15 | logger = logging.getLogger(__name__) 16 | 17 | # Import API modules to register tools 18 | # Note: This must come AFTER mcp is initialized 19 | from .api import papers, authors, recommendations 20 | 21 | async def handle_exception(loop, context): 22 | """Global exception handler for the event loop.""" 23 | msg = context.get("exception", context["message"]) 24 | logger.error(f"Caught exception: {msg}") 25 | asyncio.create_task(shutdown()) 26 | 27 | async def shutdown(): 28 | """Gracefully shut down the server.""" 29 | logger.info("Initiating graceful shutdown...") 30 | 31 | # Cancel all tasks 32 | tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] 33 | for task in tasks: 34 | task.cancel() 35 | try: 36 | await task 37 | except asyncio.CancelledError: 38 | pass 39 | 40 | # Cleanup resources 41 | await cleanup_client() 42 | await mcp.cleanup() 43 | 44 | logger.info(f"Cancelled {len(tasks)} tasks") 45 | logger.info("Shutdown complete") 46 | 47 | def init_signal_handlers(loop): 48 | """Initialize signal handlers for graceful shutdown.""" 49 | for sig in (signal.SIGTERM, signal.SIGINT): 50 | loop.add_signal_handler(sig, lambda: asyncio.create_task(shutdown())) 51 | logger.info("Signal handlers initialized") 52 | 53 | async def run_server(): 54 | """Run the server with proper async context management.""" 55 | async with mcp: 56 | try: 57 | # Initialize HTTP client 58 | await initialize_client() 59 | 60 | # Start the server 61 | logger.info("Starting Semantic Scholar Server") 62 | await mcp.run_async() 63 | except Exception as e: 64 | logger.error(f"Server error: {e}") 65 | raise 66 | finally: 67 | await shutdown() 68 | 69 | def main(): 70 | """Main entry point for the server.""" 71 | try: 72 | # Set up event loop with exception handler 73 | loop = asyncio.new_event_loop() 74 | asyncio.set_event_loop(loop) 75 | loop.set_exception_handler(handle_exception) 76 | 77 | # Initialize signal handlers 78 | init_signal_handlers(loop) 79 | 80 | # Run the server 81 | loop.run_until_complete(run_server()) 82 | except KeyboardInterrupt: 83 | logger.info("Received keyboard interrupt, shutting down...") 84 | except Exception as e: 85 | logger.error(f"Fatal error: {str(e)}") 86 | finally: 87 | try: 88 | loop.run_until_complete(asyncio.sleep(0)) # Let pending tasks complete 89 | loop.close() 90 | except Exception as e: 91 | logger.error(f"Error during final cleanup: {str(e)}") 92 | logger.info("Server stopped") 93 | 94 | if __name__ == "__main__": 95 | main() ``` -------------------------------------------------------------------------------- /test/test_author.py: -------------------------------------------------------------------------------- ```python 1 | import unittest 2 | import asyncio 3 | import os 4 | from typing import Optional, List, Dict 5 | 6 | from .test_utils import make_request, create_error_response, ErrorType, Config 7 | 8 | class TestAuthorTools(unittest.TestCase): 9 | def setUp(self): 10 | """Set up test environment""" 11 | # You can set your API key here for testing 12 | os.environ["SEMANTIC_SCHOLAR_API_KEY"] = "" # Optional 13 | 14 | # Create event loop for async tests 15 | self.loop = asyncio.new_event_loop() 16 | asyncio.set_event_loop(self.loop) 17 | 18 | # Sample author IDs for testing 19 | self.sample_author_id = "1741101" # Andrew Ng 20 | self.sample_author_ids = [ 21 | self.sample_author_id, 22 | "2061296" # Yann LeCun 23 | ] 24 | 25 | def tearDown(self): 26 | """Clean up after tests""" 27 | self.loop.close() 28 | 29 | def run_async(self, coro): 30 | """Helper to run async functions in tests""" 31 | return self.loop.run_until_complete(coro) 32 | 33 | async def async_test_with_delay(self, coro): 34 | """Helper to run async tests with delay to handle rate limiting""" 35 | await asyncio.sleep(1) # Add 1 second delay between tests 36 | return await coro 37 | 38 | def test_author_search(self): 39 | """Test author search functionality""" 40 | result = self.run_async(self.async_test_with_delay(make_request( 41 | "/author/search", 42 | params={ 43 | "query": "Andrew Ng", 44 | "fields": "name,affiliations,paperCount" 45 | } 46 | ))) 47 | self.assertIn("data", result) 48 | self.assertIn("total", result) 49 | 50 | def test_author_details(self): 51 | """Test author details functionality""" 52 | result = self.run_async(self.async_test_with_delay(make_request( 53 | f"/author/{self.sample_author_id}", 54 | params={ 55 | "fields": "name,affiliations,paperCount,citationCount,hIndex" 56 | } 57 | ))) 58 | self.assertIn("authorId", result) 59 | self.assertIn("name", result) 60 | 61 | def test_author_papers(self): 62 | """Test author papers functionality""" 63 | result = self.run_async(self.async_test_with_delay(make_request( 64 | f"/author/{self.sample_author_id}/papers", 65 | params={ 66 | "fields": "title,year,citationCount", 67 | "limit": 10 68 | } 69 | ))) 70 | self.assertIn("data", result) 71 | self.assertIn("next", result) 72 | self.assertIn("offset", result) 73 | self.assertTrue(isinstance(result["data"], list)) 74 | 75 | def test_author_batch_details(self): 76 | """Test batch author details functionality""" 77 | result = self.run_async(self.async_test_with_delay(make_request( 78 | "/author/batch", 79 | method="POST", 80 | params={"fields": "name,affiliations,paperCount"}, 81 | json={"ids": self.sample_author_ids} 82 | ))) 83 | self.assertTrue(isinstance(result, list)) 84 | self.assertEqual(len(result), len(self.sample_author_ids)) 85 | 86 | if __name__ == '__main__': 87 | unittest.main() 88 | ``` -------------------------------------------------------------------------------- /TOOLS.md: -------------------------------------------------------------------------------- ```markdown 1 | # Semantic Scholar Server Tools 2 | 3 | This document lists all the tools available in the Semantic Scholar API Server. 4 | 5 | ## Paper-related Tools 6 | 7 | ### `paper_relevance_search` 8 | 9 | Search for papers on Semantic Scholar using relevance-based ranking. 10 | 11 | ```json 12 | { 13 | "query": "quantum computing", 14 | "fields": ["title", "abstract", "year", "authors"], 15 | "limit": 10 16 | } 17 | ``` 18 | 19 | ### `paper_bulk_search` 20 | 21 | Bulk search for papers with advanced filtering and sorting options. 22 | 23 | ```json 24 | { 25 | "query": "machine learning", 26 | "fields": ["title", "abstract", "authors"], 27 | "sort": "citationCount:desc" 28 | } 29 | ``` 30 | 31 | ### `paper_title_search` 32 | 33 | Find a specific paper by matching its title. 34 | 35 | ```json 36 | { 37 | "query": "Attention Is All You Need", 38 | "fields": ["title", "abstract", "authors", "year"] 39 | } 40 | ``` 41 | 42 | ### `paper_details` 43 | 44 | Get detailed information about a specific paper by ID. 45 | 46 | ```json 47 | { 48 | "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", 49 | "fields": ["title", "abstract", "authors", "citations"] 50 | } 51 | ``` 52 | 53 | ### `paper_batch_details` 54 | 55 | Get details for multiple papers in one request. 56 | 57 | ```json 58 | { 59 | "paper_ids": ["649def34f8be52c8b66281af98ae884c09aef38b", "ARXIV:2106.15928"], 60 | "fields": "title,abstract,authors" 61 | } 62 | ``` 63 | 64 | ### `paper_authors` 65 | 66 | Get the authors of a specific paper. 67 | 68 | ```json 69 | { 70 | "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", 71 | "fields": ["name", "affiliations"] 72 | } 73 | ``` 74 | 75 | ### `paper_citations` 76 | 77 | Get papers that cite a specific paper. 78 | 79 | ```json 80 | { 81 | "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", 82 | "fields": ["title", "year", "authors"], 83 | "limit": 50 84 | } 85 | ``` 86 | 87 | ### `paper_references` 88 | 89 | Get papers referenced by a specific paper. 90 | 91 | ```json 92 | { 93 | "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", 94 | "fields": ["title", "year", "authors"], 95 | "limit": 50 96 | } 97 | ``` 98 | 99 | ## Author-related Tools 100 | 101 | ### `author_search` 102 | 103 | Search for authors by name. 104 | 105 | ```json 106 | { 107 | "query": "Albert Einstein", 108 | "fields": ["name", "affiliations", "paperCount"] 109 | } 110 | ``` 111 | 112 | ### `author_details` 113 | 114 | Get detailed information about a specific author. 115 | 116 | ```json 117 | { 118 | "author_id": "1741101", 119 | "fields": ["name", "affiliations", "papers", "citationCount"] 120 | } 121 | ``` 122 | 123 | ### `author_papers` 124 | 125 | Get papers written by a specific author. 126 | 127 | ```json 128 | { 129 | "author_id": "1741101", 130 | "fields": ["title", "year", "venue"], 131 | "limit": 50 132 | } 133 | ``` 134 | 135 | ### `author_batch_details` 136 | 137 | Get details for multiple authors at once. 138 | 139 | ```json 140 | { 141 | "author_ids": ["1741101", "1741102"], 142 | "fields": "name,affiliations,paperCount,citationCount" 143 | } 144 | ``` 145 | 146 | ## Recommendation Tools 147 | 148 | ### `get_paper_recommendations_single` 149 | 150 | Get paper recommendations based on a single paper. 151 | 152 | ```json 153 | { 154 | "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", 155 | "fields": "title,authors,year,abstract", 156 | "limit": 20 157 | } 158 | ``` 159 | 160 | ### `get_paper_recommendations_multi` 161 | 162 | Get paper recommendations based on multiple papers. 163 | 164 | ```json 165 | { 166 | "positive_paper_ids": [ 167 | "649def34f8be52c8b66281af98ae884c09aef38b", 168 | "ARXIV:2106.15928" 169 | ], 170 | "negative_paper_ids": ["ARXIV:1805.02262"], 171 | "fields": "title,authors,year", 172 | "limit": 20 173 | } 174 | ``` 175 | 176 | ## Note 177 | 178 | - The tool name in the error message (`read_paper`) does not exist in this server 179 | - Use one of the tools listed above instead 180 | - Always include the required parameters for each tool 181 | ``` -------------------------------------------------------------------------------- /REFACTORING.md: -------------------------------------------------------------------------------- ```markdown 1 | # Semantic Scholar Server Refactoring 2 | 3 | This document describes the refactoring of the Semantic Scholar server from a single monolithic file to a modular package structure. 4 | 5 | ## Motivation 6 | 7 | The original implementation consisted of a single 2,200+ line Python file (`semantic_scholar_server.py`), which made it difficult to: 8 | 9 | - Understand the overall structure 10 | - Locate specific functionality 11 | - Debug issues 12 | - Make focused changes 13 | - Test individual components 14 | 15 | ## Refactoring Approach 16 | 17 | We used a modular package approach, separating concerns into logical components: 18 | 19 | ``` 20 | semantic-scholar-server/ 21 | ├── semantic_scholar/ # Main package 22 | │ ├── __init__.py # Package initialization 23 | │ ├── server.py # Server setup and main functionality 24 | │ ├── mcp.py # Centralized FastMCP instance definition 25 | │ ├── config.py # Configuration classes 26 | │ ├── utils/ # Utility modules 27 | │ │ ├── __init__.py 28 | │ │ ├── errors.py # Error handling 29 | │ │ └── http.py # HTTP client and rate limiting 30 | │ ├── api/ # API endpoints 31 | │ ├── __init__.py 32 | │ ├── papers.py # Paper-related endpoints 33 | │ ├── authors.py # Author-related endpoints 34 | │ └── recommendations.py # Recommendation endpoints 35 | ├── run.py # Entry point script 36 | ``` 37 | 38 | ## Key Improvements 39 | 40 | 1. **Separation of Concerns** 41 | 42 | - Config classes in their own module 43 | - Utilities separated from business logic 44 | - API endpoints grouped by domain (papers, authors, recommendations) 45 | - Server infrastructure code isolated 46 | - FastMCP instance centralized in its own module 47 | 48 | 2. **Improved Maintainability** 49 | 50 | - Each file has a single responsibility 51 | - Files are much smaller and easier to understand 52 | - Clear imports show dependencies between modules 53 | - Better docstrings and code organization 54 | - No circular dependencies between modules 55 | 56 | 3. **Enhanced Extensibility** 57 | 58 | - Adding new endpoints only requires changes to the relevant module 59 | - Utilities can be reused across the codebase 60 | - Configuration is centralized 61 | - Testing individual components is much easier 62 | - Each module imports the FastMCP instance from a central location 63 | 64 | 4. **Clearer Entry Point** 65 | - `run.py` provides a simple way to start the server 66 | - Server initialization is separated from the API logic 67 | - All modules consistently import the FastMCP instance from mcp.py 68 | 69 | ## Migration Guide 70 | 71 | The refactored code maintains the same functionality and API as the original implementation. To migrate: 72 | 73 | 1. Replace the original `semantic_scholar_server.py` with the new package structure 74 | 2. Update any import statements that referenced the original file 75 | 3. Use `run.py` as the new entry point 76 | 77 | No changes to API usage are required - all tool functions maintain the same signatures and behavior. 78 | 79 | ## Future Improvements 80 | 81 | The modular structure enables several future improvements: 82 | 83 | 1. **Testing**: Add unit tests for individual components 84 | 2. **Caching**: Implement caching layer for improved performance 85 | 3. **Logging**: Enhanced logging throughout the application 86 | 4. **Metrics**: Add performance monitoring 87 | 5. **Documentation**: Generate API documentation from docstrings 88 | ``` -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- ```python 1 | """Test utilities and core functionality without MCP dependencies""" 2 | 3 | import httpx 4 | import logging 5 | import os 6 | from typing import Dict, Optional 7 | import asyncio 8 | from enum import Enum 9 | from dotenv import load_dotenv 10 | 11 | # Load environment variables from .env file 12 | load_dotenv() 13 | 14 | # Basic setup 15 | logging.basicConfig(level=logging.INFO) 16 | logger = logging.getLogger(__name__) 17 | 18 | class ErrorType(Enum): 19 | RATE_LIMIT = "rate_limit" 20 | API_ERROR = "api_error" 21 | VALIDATION = "validation" 22 | TIMEOUT = "timeout" 23 | 24 | class Config: 25 | API_VERSION = "v1" 26 | GRAPH_BASE_URL = f"https://api.semanticscholar.org/graph/{API_VERSION}" 27 | RECOMMENDATIONS_BASE_URL = "https://api.semanticscholar.org/recommendations/v1" 28 | TIMEOUT = 30 # seconds 29 | 30 | def create_error_response( 31 | error_type: ErrorType, 32 | message: str, 33 | details: Optional[Dict] = None 34 | ) -> Dict: 35 | return { 36 | "error": { 37 | "type": error_type.value, 38 | "message": message, 39 | "details": details or {} 40 | } 41 | } 42 | 43 | def get_api_key() -> Optional[str]: 44 | """Get the Semantic Scholar API key from environment variables.""" 45 | api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY") 46 | logger.info(f"API Key found: {'Yes' if api_key else 'No'}") 47 | return api_key 48 | 49 | async def make_request(endpoint: str, params: Dict = None, method: str = "GET", json: Dict = None) -> Dict: 50 | """Make a request to the Semantic Scholar API.""" 51 | try: 52 | api_key = get_api_key() 53 | headers = {"x-api-key": api_key} if api_key else {} 54 | params = params or {} 55 | 56 | # Choose base URL based on endpoint 57 | is_recommendations = endpoint.startswith("recommendations") or endpoint.startswith("papers/forpaper") 58 | base_url = Config.RECOMMENDATIONS_BASE_URL if is_recommendations else Config.GRAPH_BASE_URL 59 | 60 | # Clean up endpoint 61 | if endpoint.startswith("/"): 62 | endpoint = endpoint[1:] 63 | if is_recommendations and endpoint.startswith("recommendations/"): 64 | endpoint = endpoint[15:] # Remove "recommendations/" prefix 65 | 66 | url = f"{base_url}/{endpoint}" 67 | logger.info(f"Making {method} request to {url}") 68 | logger.info(f"Headers: {headers}") 69 | logger.info(f"Params: {params}") 70 | if json: 71 | logger.info(f"JSON body: {json}") 72 | 73 | async with httpx.AsyncClient(timeout=Config.TIMEOUT, follow_redirects=True) as client: 74 | if method == "GET": 75 | response = await client.get(url, params=params, headers=headers) 76 | else: # POST 77 | response = await client.post(url, params=params, json=json, headers=headers) 78 | 79 | logger.info(f"Response status: {response.status_code}") 80 | logger.info(f"Response body: {response.text}") 81 | 82 | response.raise_for_status() 83 | return response.json() 84 | 85 | except httpx.HTTPStatusError as e: 86 | if e.response.status_code == 429: 87 | return create_error_response( 88 | ErrorType.RATE_LIMIT, 89 | "Rate limit exceeded", 90 | {"retry_after": e.response.headers.get("retry-after")} 91 | ) 92 | return create_error_response( 93 | ErrorType.API_ERROR, 94 | f"HTTP error: {e.response.status_code}", 95 | {"response": e.response.text} 96 | ) 97 | except httpx.TimeoutException: 98 | return create_error_response( 99 | ErrorType.TIMEOUT, 100 | f"Request timed out after {Config.TIMEOUT} seconds" 101 | ) 102 | except Exception as e: 103 | return create_error_response( 104 | ErrorType.API_ERROR, 105 | str(e) 106 | ) ``` -------------------------------------------------------------------------------- /test/test_paper.py: -------------------------------------------------------------------------------- ```python 1 | import unittest 2 | import asyncio 3 | import os 4 | from typing import Optional, List, Dict 5 | import random 6 | 7 | from .test_utils import make_request, create_error_response, ErrorType, Config 8 | 9 | class TestPaperTools(unittest.TestCase): 10 | def setUp(self): 11 | """Set up test environment""" 12 | # You can set your API key here for testing 13 | os.environ["SEMANTIC_SCHOLAR_API_KEY"] = "" # Optional 14 | 15 | # Create event loop for async tests 16 | self.loop = asyncio.new_event_loop() 17 | asyncio.set_event_loop(self.loop) 18 | 19 | # Sample paper IDs for testing 20 | self.sample_paper_id = "649def34f8be52c8b66281af98ae884c09aef38b" 21 | self.sample_paper_ids = [ 22 | self.sample_paper_id, 23 | "ARXIV:2106.15928" 24 | ] 25 | 26 | def tearDown(self): 27 | """Clean up after tests""" 28 | self.loop.close() 29 | 30 | def run_async(self, coro): 31 | """Helper to run async functions in tests""" 32 | return self.loop.run_until_complete(coro) 33 | 34 | async def async_test_with_delay(self, endpoint: str, **kwargs): 35 | """Helper to run async tests with delay to handle rate limiting""" 36 | await asyncio.sleep(random.uniform(5, 8)) # Random initial delay 37 | 38 | max_retries = 5 39 | base_delay = 8 40 | 41 | for attempt in range(max_retries): 42 | result = await make_request(endpoint, **kwargs) 43 | if not isinstance(result, dict) or "error" not in result: 44 | return result 45 | 46 | if result["error"]["type"] == "rate_limit": 47 | delay = base_delay * (2 ** attempt) + random.uniform(0, 2) # Add jitter 48 | await asyncio.sleep(delay) 49 | continue 50 | else: 51 | return result 52 | 53 | return result # Return last result if all retries failed 54 | 55 | @classmethod 56 | def setUpClass(cls): 57 | """Set up class-level test environment""" 58 | # Add initial delay before any tests run 59 | asyncio.get_event_loop().run_until_complete(asyncio.sleep(10)) 60 | 61 | def test_paper_relevance_search(self): 62 | """Test paper relevance search functionality""" 63 | # Test basic search 64 | result = self.run_async(self.async_test_with_delay( 65 | "paper/search", # Remove leading slash 66 | params={ 67 | "query": "quantum computing", 68 | "fields": "title,abstract,year" 69 | } 70 | )) 71 | self.assertNotIn("error", result) 72 | self.assertIn("data", result) 73 | self.assertIn("total", result) 74 | 75 | # Test with filters 76 | result = self.run_async(self.async_test_with_delay( 77 | "paper/search", 78 | params={ 79 | "query": "machine learning", 80 | "fields": "title,year", 81 | "minCitationCount": 100, 82 | "year": "2020-2023" 83 | } 84 | )) 85 | self.assertNotIn("error", result) 86 | self.assertIn("data", result) 87 | 88 | def test_paper_bulk_search(self): 89 | """Test paper bulk search functionality""" 90 | result = self.run_async(self.async_test_with_delay( 91 | "paper/search/bulk", # Remove leading slash 92 | params={ 93 | "query": "neural networks", 94 | "fields": "title,year,authors", 95 | "sort": "citationCount:desc" 96 | } 97 | )) 98 | self.assertNotIn("error", result) 99 | self.assertIn("data", result) 100 | 101 | def test_paper_details(self): 102 | """Test paper details functionality""" 103 | result = self.run_async(self.async_test_with_delay( 104 | f"paper/{self.sample_paper_id}", # Remove leading slash 105 | params={ 106 | "fields": "title,abstract,year,authors" 107 | } 108 | )) 109 | self.assertNotIn("error", result) 110 | self.assertIn("paperId", result) 111 | self.assertIn("title", result) 112 | 113 | def test_paper_batch_details(self): 114 | """Test batch paper details functionality""" 115 | result = self.run_async(self.async_test_with_delay( 116 | "paper/batch", # Remove leading slash 117 | method="POST", 118 | params={"fields": "title,year,authors"}, 119 | json={"ids": self.sample_paper_ids} 120 | )) 121 | self.assertNotIn("error", result) 122 | self.assertTrue(isinstance(result, list)) 123 | self.assertEqual(len(result), len(self.sample_paper_ids)) 124 | 125 | if __name__ == '__main__': 126 | unittest.main() 127 | ``` -------------------------------------------------------------------------------- /semantic_scholar/utils/http.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | HTTP client utilities for the Semantic Scholar API Server. 3 | """ 4 | 5 | import os 6 | import logging 7 | import httpx 8 | import asyncio 9 | import time 10 | from typing import Dict, Optional, Tuple, Any 11 | 12 | from ..config import Config, ErrorType, RateLimitConfig 13 | from .errors import create_error_response 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | # Global HTTP client for connection pooling 18 | http_client = None 19 | 20 | class RateLimiter: 21 | """ 22 | Rate limiter for API requests to prevent exceeding API limits. 23 | """ 24 | def __init__(self): 25 | self._last_call_time = {} 26 | self._locks = {} 27 | 28 | def _get_rate_limit(self, endpoint: str) -> Tuple[int, int]: 29 | """Get the appropriate rate limit for an endpoint.""" 30 | if any(restricted in endpoint for restricted in RateLimitConfig.RESTRICTED_ENDPOINTS): 31 | if "batch" in endpoint: 32 | return RateLimitConfig.BATCH_LIMIT 33 | if "search" in endpoint: 34 | return RateLimitConfig.SEARCH_LIMIT 35 | return RateLimitConfig.DEFAULT_LIMIT 36 | return RateLimitConfig.DEFAULT_LIMIT 37 | 38 | async def acquire(self, endpoint: str): 39 | """ 40 | Acquire permission to make a request, waiting if necessary to respect rate limits. 41 | 42 | Args: 43 | endpoint: The API endpoint being accessed. 44 | """ 45 | if endpoint not in self._locks: 46 | self._locks[endpoint] = asyncio.Lock() 47 | self._last_call_time[endpoint] = 0 48 | 49 | async with self._locks[endpoint]: 50 | rate_limit = self._get_rate_limit(endpoint) 51 | current_time = time.time() 52 | time_since_last_call = current_time - self._last_call_time[endpoint] 53 | 54 | if time_since_last_call < rate_limit[1]: 55 | delay = rate_limit[1] - time_since_last_call 56 | await asyncio.sleep(delay) 57 | 58 | self._last_call_time[endpoint] = time.time() 59 | 60 | # Create global rate limiter instance 61 | rate_limiter = RateLimiter() 62 | 63 | def get_api_key() -> Optional[str]: 64 | """ 65 | Get the Semantic Scholar API key from environment variables. 66 | Returns None if no API key is set, enabling unauthenticated access. 67 | """ 68 | api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY") 69 | if not api_key: 70 | logger.warning("No SEMANTIC_SCHOLAR_API_KEY set. Using unauthenticated access with lower rate limits.") 71 | return api_key 72 | 73 | async def initialize_client(): 74 | """Initialize the global HTTP client.""" 75 | global http_client 76 | if http_client is None: 77 | http_client = httpx.AsyncClient( 78 | timeout=Config.TIMEOUT, 79 | limits=httpx.Limits(max_keepalive_connections=10) 80 | ) 81 | return http_client 82 | 83 | async def cleanup_client(): 84 | """Clean up the global HTTP client.""" 85 | global http_client 86 | if http_client is not None: 87 | await http_client.aclose() 88 | http_client = None 89 | 90 | async def make_request(endpoint: str, params: Dict = None) -> Dict: 91 | """ 92 | Make a rate-limited request to the Semantic Scholar API. 93 | 94 | Args: 95 | endpoint: The API endpoint to call. 96 | params: Optional query parameters. 97 | 98 | Returns: 99 | The JSON response or an error response dictionary. 100 | """ 101 | try: 102 | # Apply rate limiting 103 | await rate_limiter.acquire(endpoint) 104 | 105 | # Get API key if available 106 | api_key = get_api_key() 107 | headers = {"x-api-key": api_key} if api_key else {} 108 | url = f"{Config.BASE_URL}{endpoint}" 109 | 110 | # Use global client 111 | client = await initialize_client() 112 | response = await client.get(url, params=params, headers=headers) 113 | response.raise_for_status() 114 | return response.json() 115 | except httpx.HTTPStatusError as e: 116 | logger.error(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") 117 | if e.response.status_code == 429: 118 | return create_error_response( 119 | ErrorType.RATE_LIMIT, 120 | "Rate limit exceeded. Consider using an API key for higher limits.", 121 | { 122 | "retry_after": e.response.headers.get("retry-after"), 123 | "authenticated": bool(get_api_key()) 124 | } 125 | ) 126 | return create_error_response( 127 | ErrorType.API_ERROR, 128 | f"HTTP error: {e.response.status_code}", 129 | {"response": e.response.text} 130 | ) 131 | except httpx.TimeoutException as e: 132 | logger.error(f"Request timeout for {endpoint}: {str(e)}") 133 | return create_error_response( 134 | ErrorType.TIMEOUT, 135 | f"Request timed out after {Config.TIMEOUT} seconds" 136 | ) 137 | except Exception as e: 138 | logger.error(f"Unexpected error for {endpoint}: {str(e)}") 139 | return create_error_response( 140 | ErrorType.API_ERROR, 141 | str(e) 142 | ) ``` -------------------------------------------------------------------------------- /semantic_scholar/config.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Configuration for the Semantic Scholar API Server. 3 | """ 4 | 5 | from dataclasses import dataclass 6 | from enum import Enum 7 | from typing import Dict, List, Tuple, Any 8 | 9 | # Rate Limiting Configuration 10 | @dataclass 11 | class RateLimitConfig: 12 | # Define rate limits (requests, seconds) 13 | SEARCH_LIMIT = (1, 1) # 1 request per 1 second 14 | BATCH_LIMIT = (1, 1) # 1 request per 1 second 15 | DEFAULT_LIMIT = (10, 1) # 10 requests per 1 second 16 | 17 | # Endpoints categorization 18 | # These endpoints have stricter rate limits due to their computational intensity 19 | # and to prevent abuse of the recommendation system 20 | RESTRICTED_ENDPOINTS = [ 21 | "/paper/batch", # Batch operations are expensive 22 | "/paper/search", # Search operations are computationally intensive 23 | "/recommendations" # Recommendation generation is resource-intensive 24 | ] 25 | 26 | # Error Types 27 | class ErrorType(Enum): 28 | RATE_LIMIT = "rate_limit" 29 | API_ERROR = "api_error" 30 | VALIDATION = "validation" 31 | TIMEOUT = "timeout" 32 | 33 | # Field Constants 34 | class PaperFields: 35 | DEFAULT = ["title", "abstract", "year", "citationCount", "authors", "url"] 36 | DETAILED = DEFAULT + ["references", "citations", "venue", "influentialCitationCount"] 37 | MINIMAL = ["title", "year", "authors"] 38 | SEARCH = ["paperId", "title", "year", "citationCount"] 39 | 40 | # Valid fields from API documentation 41 | VALID_FIELDS = { 42 | "abstract", 43 | "authors", 44 | "citationCount", 45 | "citations", 46 | "corpusId", 47 | "embedding", 48 | "externalIds", 49 | "fieldsOfStudy", 50 | "influentialCitationCount", 51 | "isOpenAccess", 52 | "openAccessPdf", 53 | "paperId", 54 | "publicationDate", 55 | "publicationTypes", 56 | "publicationVenue", 57 | "references", 58 | "s2FieldsOfStudy", 59 | "title", 60 | "tldr", 61 | "url", 62 | "venue", 63 | "year" 64 | } 65 | 66 | class AuthorDetailFields: 67 | """Common field combinations for author details""" 68 | 69 | # Basic author information 70 | BASIC = ["name", "url", "affiliations"] 71 | 72 | # Author's papers information 73 | PAPERS_BASIC = ["papers"] # Returns paperId and title 74 | PAPERS_DETAILED = [ 75 | "papers.year", 76 | "papers.authors", 77 | "papers.abstract", 78 | "papers.venue", 79 | "papers.url" 80 | ] 81 | 82 | # Complete author profile 83 | COMPLETE = BASIC + ["papers", "papers.year", "papers.authors", "papers.venue"] 84 | 85 | # Citation metrics 86 | METRICS = ["citationCount", "hIndex", "paperCount"] 87 | 88 | # Valid fields for author details 89 | VALID_FIELDS = { 90 | "authorId", 91 | "name", 92 | "url", 93 | "affiliations", 94 | "papers", 95 | "papers.year", 96 | "papers.authors", 97 | "papers.abstract", 98 | "papers.venue", 99 | "papers.url", 100 | "citationCount", 101 | "hIndex", 102 | "paperCount" 103 | } 104 | 105 | class PaperDetailFields: 106 | """Common field combinations for paper details""" 107 | 108 | # Basic paper information 109 | BASIC = ["title", "abstract", "year", "venue"] 110 | 111 | # Author information 112 | AUTHOR_BASIC = ["authors"] 113 | AUTHOR_DETAILED = ["authors.url", "authors.paperCount", "authors.citationCount"] 114 | 115 | # Citation information 116 | CITATION_BASIC = ["citations", "references"] 117 | CITATION_DETAILED = ["citations.title", "citations.abstract", "citations.year", 118 | "references.title", "references.abstract", "references.year"] 119 | 120 | # Full paper details 121 | COMPLETE = BASIC + AUTHOR_BASIC + CITATION_BASIC + ["url", "fieldsOfStudy", 122 | "publicationVenue", "publicationTypes"] 123 | 124 | class CitationReferenceFields: 125 | """Common field combinations for citation and reference queries""" 126 | 127 | # Basic information 128 | BASIC = ["title"] 129 | 130 | # Citation/Reference context 131 | CONTEXT = ["contexts", "intents", "isInfluential"] 132 | 133 | # Paper details 134 | DETAILED = ["title", "abstract", "authors", "year", "venue"] 135 | 136 | # Full information 137 | COMPLETE = CONTEXT + DETAILED 138 | 139 | # Valid fields for citation/reference queries 140 | VALID_FIELDS = { 141 | "contexts", 142 | "intents", 143 | "isInfluential", 144 | "title", 145 | "abstract", 146 | "authors", 147 | "year", 148 | "venue", 149 | "paperId", 150 | "url", 151 | "citationCount", 152 | "influentialCitationCount" 153 | } 154 | 155 | # Configuration 156 | class Config: 157 | # API Configuration 158 | API_VERSION = "v1" 159 | BASE_URL = f"https://api.semanticscholar.org/graph/{API_VERSION}" 160 | TIMEOUT = 30 # seconds 161 | 162 | # Request Limits 163 | MAX_BATCH_SIZE = 100 164 | MAX_RESULTS_PER_PAGE = 100 165 | DEFAULT_PAGE_SIZE = 10 166 | MAX_BATCHES = 5 167 | 168 | # Fields Configuration 169 | DEFAULT_FIELDS = PaperFields.DEFAULT 170 | 171 | # Feature Flags 172 | ENABLE_CACHING = False 173 | DEBUG_MODE = False 174 | 175 | # Search Configuration 176 | SEARCH_TYPES = { 177 | "comprehensive": { 178 | "description": "Balanced search considering relevance and impact", 179 | "min_citations": None, 180 | "ranking_strategy": "balanced" 181 | }, 182 | "influential": { 183 | "description": "Focus on highly-cited and influential papers", 184 | "min_citations": 50, 185 | "ranking_strategy": "citations" 186 | }, 187 | "latest": { 188 | "description": "Focus on recent papers with impact", 189 | "min_citations": None, 190 | "ranking_strategy": "recency" 191 | } 192 | } ``` -------------------------------------------------------------------------------- /semantic_scholar/api/recommendations.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Recommendation-related API endpoints for the Semantic Scholar API. 3 | """ 4 | 5 | from typing import Dict, List, Optional 6 | from fastmcp import Context 7 | import httpx 8 | 9 | # Import mcp from centralized location instead of server 10 | from ..mcp import mcp 11 | from ..config import Config, ErrorType 12 | from ..utils.http import rate_limiter, get_api_key 13 | from ..utils.errors import create_error_response 14 | 15 | @mcp.tool() 16 | async def get_paper_recommendations_single( 17 | context: Context, 18 | paper_id: str, 19 | fields: Optional[str] = None, 20 | limit: int = 100, 21 | from_pool: str = "recent" 22 | ) -> Dict: 23 | """ 24 | Get paper recommendations based on a single seed paper. 25 | This endpoint is optimized for finding papers similar to a specific paper. 26 | 27 | Args: 28 | paper_id (str): Paper identifier in one of the following formats: 29 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 30 | - CorpusId:<id> (e.g., "CorpusId:215416146") 31 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 32 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 33 | - MAG:<id> (e.g., "MAG:112218234") 34 | - ACL:<id> (e.g., "ACL:W12-3903") 35 | - PMID:<id> (e.g., "PMID:19872477") 36 | - PMCID:<id> (e.g., "PMCID:2323736") 37 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 38 | 39 | fields (Optional[str]): Comma-separated list of fields to return for each paper. 40 | paperId is always returned. 41 | 42 | limit (int): Maximum number of recommendations to return. 43 | Default: 100 44 | Maximum: 500 45 | 46 | from_pool (str): Which pool of papers to recommend from. 47 | Options: 48 | - "recent": Recent papers (default) 49 | - "all-cs": All computer science papers 50 | Default: "recent" 51 | 52 | Returns: 53 | Dict: { 54 | "recommendedPapers": List[Dict] # List of recommended papers with requested fields 55 | } 56 | """ 57 | try: 58 | # Apply rate limiting 59 | endpoint = "/recommendations" 60 | await rate_limiter.acquire(endpoint) 61 | 62 | # Validate limit 63 | if limit > 500: 64 | return create_error_response( 65 | ErrorType.VALIDATION, 66 | "Cannot request more than 500 recommendations", 67 | {"max_limit": 500, "requested": limit} 68 | ) 69 | 70 | # Validate pool 71 | if from_pool not in ["recent", "all-cs"]: 72 | return create_error_response( 73 | ErrorType.VALIDATION, 74 | "Invalid paper pool specified", 75 | {"valid_pools": ["recent", "all-cs"]} 76 | ) 77 | 78 | # Build request parameters 79 | params = { 80 | "limit": limit, 81 | "from": from_pool 82 | } 83 | if fields: 84 | params["fields"] = fields 85 | 86 | # Make the API request 87 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: 88 | api_key = get_api_key() 89 | headers = {"x-api-key": api_key} if api_key else {} 90 | 91 | url = f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}" 92 | response = await client.get(url, params=params, headers=headers) 93 | 94 | # Handle specific error cases 95 | if response.status_code == 404: 96 | return create_error_response( 97 | ErrorType.VALIDATION, 98 | "Paper not found", 99 | {"paper_id": paper_id} 100 | ) 101 | 102 | response.raise_for_status() 103 | return response.json() 104 | 105 | except httpx.HTTPStatusError as e: 106 | if e.response.status_code == 429: 107 | return create_error_response( 108 | ErrorType.RATE_LIMIT, 109 | "Rate limit exceeded. Consider using an API key for higher limits.", 110 | { 111 | "retry_after": e.response.headers.get("retry-after"), 112 | "authenticated": bool(get_api_key()) 113 | } 114 | ) 115 | return create_error_response( 116 | ErrorType.API_ERROR, 117 | f"HTTP error {e.response.status_code}", 118 | {"response": e.response.text} 119 | ) 120 | except httpx.TimeoutException: 121 | return create_error_response( 122 | ErrorType.TIMEOUT, 123 | f"Request timed out after {Config.TIMEOUT} seconds" 124 | ) 125 | except Exception as e: 126 | import logging 127 | logger = logging.getLogger(__name__) 128 | logger.error(f"Unexpected error in recommendations: {str(e)}") 129 | return create_error_response( 130 | ErrorType.API_ERROR, 131 | "Failed to get recommendations", 132 | {"error": str(e)} 133 | ) 134 | 135 | @mcp.tool() 136 | async def get_paper_recommendations_multi( 137 | context: Context, 138 | positive_paper_ids: List[str], 139 | negative_paper_ids: Optional[List[str]] = None, 140 | fields: Optional[str] = None, 141 | limit: int = 100 142 | ) -> Dict: 143 | """ 144 | Get paper recommendations based on multiple positive and optional negative examples. 145 | This endpoint is optimized for finding papers similar to a set of papers while 146 | avoiding papers similar to the negative examples. 147 | 148 | Args: 149 | positive_paper_ids (List[str]): List of paper IDs to use as positive examples. 150 | Papers similar to these will be recommended. 151 | Each ID can be in any of these formats: 152 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 153 | - CorpusId:<id> (e.g., "CorpusId:215416146") 154 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 155 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 156 | - MAG:<id> (e.g., "MAG:112218234") 157 | - ACL:<id> (e.g., "ACL:W12-3903") 158 | - PMID:<id> (e.g., "PMID:19872477") 159 | - PMCID:<id> (e.g., "PMCID:2323736") 160 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 161 | 162 | negative_paper_ids (Optional[List[str]]): List of paper IDs to use as negative examples. 163 | Papers similar to these will be avoided in recommendations. 164 | Uses same ID formats as positive_paper_ids. 165 | 166 | fields (Optional[str]): Comma-separated list of fields to return for each paper. 167 | paperId is always returned. 168 | 169 | limit (int): Maximum number of recommendations to return. 170 | Default: 100 171 | Maximum: 500 172 | 173 | Returns: 174 | Dict: { 175 | "recommendedPapers": List[Dict] # List of recommended papers with requested fields 176 | } 177 | """ 178 | try: 179 | # Apply rate limiting 180 | endpoint = "/recommendations" 181 | await rate_limiter.acquire(endpoint) 182 | 183 | # Validate inputs 184 | if not positive_paper_ids: 185 | return create_error_response( 186 | ErrorType.VALIDATION, 187 | "Must provide at least one positive paper ID" 188 | ) 189 | 190 | if limit > 500: 191 | return create_error_response( 192 | ErrorType.VALIDATION, 193 | "Cannot request more than 500 recommendations", 194 | {"max_limit": 500, "requested": limit} 195 | ) 196 | 197 | # Build request parameters 198 | params = {"limit": limit} 199 | if fields: 200 | params["fields"] = fields 201 | 202 | request_body = { 203 | "positivePaperIds": positive_paper_ids, 204 | "negativePaperIds": negative_paper_ids or [] 205 | } 206 | 207 | # Make the API request 208 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: 209 | api_key = get_api_key() 210 | headers = {"x-api-key": api_key} if api_key else {} 211 | 212 | url = "https://api.semanticscholar.org/recommendations/v1/papers" 213 | response = await client.post(url, params=params, json=request_body, headers=headers) 214 | 215 | # Handle specific error cases 216 | if response.status_code == 404: 217 | return create_error_response( 218 | ErrorType.VALIDATION, 219 | "One or more input papers not found", 220 | { 221 | "positive_ids": positive_paper_ids, 222 | "negative_ids": negative_paper_ids 223 | } 224 | ) 225 | 226 | response.raise_for_status() 227 | return response.json() 228 | 229 | except httpx.HTTPStatusError as e: 230 | if e.response.status_code == 429: 231 | return create_error_response( 232 | ErrorType.RATE_LIMIT, 233 | "Rate limit exceeded. Consider using an API key for higher limits.", 234 | { 235 | "retry_after": e.response.headers.get("retry-after"), 236 | "authenticated": bool(get_api_key()) 237 | } 238 | ) 239 | return create_error_response( 240 | ErrorType.API_ERROR, 241 | f"HTTP error {e.response.status_code}", 242 | {"response": e.response.text} 243 | ) 244 | except httpx.TimeoutException: 245 | return create_error_response( 246 | ErrorType.TIMEOUT, 247 | f"Request timed out after {Config.TIMEOUT} seconds" 248 | ) 249 | except Exception as e: 250 | import logging 251 | logger = logging.getLogger(__name__) 252 | logger.error(f"Unexpected error in recommendations: {str(e)}") 253 | return create_error_response( 254 | ErrorType.API_ERROR, 255 | "Failed to get recommendations", 256 | {"error": str(e)} 257 | ) ``` -------------------------------------------------------------------------------- /semantic_scholar/api/authors.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Author-related API endpoints for the Semantic Scholar API. 3 | """ 4 | 5 | from typing import Dict, List, Optional 6 | from fastmcp import Context 7 | 8 | # Import mcp from centralized location instead of server 9 | from ..mcp import mcp 10 | from ..config import AuthorDetailFields, ErrorType 11 | from ..utils.http import make_request 12 | from ..utils.errors import create_error_response 13 | 14 | @mcp.tool() 15 | async def author_search( 16 | context: Context, 17 | query: str, 18 | fields: Optional[List[str]] = None, 19 | offset: int = 0, 20 | limit: int = 100 21 | ) -> Dict: 22 | """ 23 | Search for authors by name on Semantic Scholar. 24 | This endpoint is optimized for finding authors based on their name. 25 | Results are sorted by relevance to the query. 26 | 27 | Args: 28 | query (str): The name text to search for. The query will be matched against author names 29 | and their known aliases. 30 | 31 | fields (Optional[List[str]]): List of fields to return for each author. 32 | authorId is always returned. 33 | 34 | offset (int): Number of authors to skip for pagination. 35 | Default: 0 36 | 37 | limit (int): Maximum number of authors to return. 38 | Default: 100 39 | Maximum: 1000 40 | 41 | Returns: 42 | Dict: { 43 | "total": int, # Total number of authors matching the query 44 | "offset": int, # Current offset in the results 45 | "next": int, # Next offset (if more results available) 46 | "data": List[Dict] # List of authors with requested fields 47 | } 48 | """ 49 | if not query.strip(): 50 | return create_error_response( 51 | ErrorType.VALIDATION, 52 | "Query string cannot be empty" 53 | ) 54 | 55 | # Validate limit 56 | if limit > 1000: 57 | return create_error_response( 58 | ErrorType.VALIDATION, 59 | "Limit cannot exceed 1000", 60 | {"max_limit": 1000} 61 | ) 62 | 63 | # Validate fields 64 | if fields: 65 | invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS 66 | if invalid_fields: 67 | return create_error_response( 68 | ErrorType.VALIDATION, 69 | f"Invalid fields: {', '.join(invalid_fields)}", 70 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} 71 | ) 72 | 73 | # Build request parameters 74 | params = { 75 | "query": query, 76 | "offset": offset, 77 | "limit": limit 78 | } 79 | if fields: 80 | params["fields"] = ",".join(fields) 81 | 82 | # Make the API request 83 | return await make_request("/author/search", params) 84 | 85 | @mcp.tool() 86 | async def author_details( 87 | context: Context, 88 | author_id: str, 89 | fields: Optional[List[str]] = None 90 | ) -> Dict: 91 | """ 92 | Get detailed information about an author by their ID. 93 | This endpoint provides comprehensive metadata about an author. 94 | 95 | Args: 96 | author_id (str): Semantic Scholar author ID. 97 | This is a unique identifier assigned by Semantic Scholar. 98 | Example: "1741101" (Albert Einstein) 99 | 100 | fields (Optional[List[str]]): List of fields to return. 101 | authorId is always returned. 102 | Available fields include name, papers, citationCount, etc. 103 | 104 | Returns: 105 | Dict: Author details with requested fields. 106 | Always includes authorId. 107 | Returns error response if author not found. 108 | """ 109 | if not author_id.strip(): 110 | return create_error_response( 111 | ErrorType.VALIDATION, 112 | "Author ID cannot be empty" 113 | ) 114 | 115 | # Validate fields 116 | if fields: 117 | invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS 118 | if invalid_fields: 119 | return create_error_response( 120 | ErrorType.VALIDATION, 121 | f"Invalid fields: {', '.join(invalid_fields)}", 122 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} 123 | ) 124 | 125 | # Build request parameters 126 | params = {} 127 | if fields: 128 | params["fields"] = ",".join(fields) 129 | 130 | # Make the API request 131 | result = await make_request(f"/author/{author_id}", params) 132 | 133 | if isinstance(result, Dict) and "error" in result: 134 | error_msg = result["error"].get("message", "") 135 | if "404" in error_msg: 136 | return create_error_response( 137 | ErrorType.VALIDATION, 138 | "Author not found", 139 | {"author_id": author_id} 140 | ) 141 | return result 142 | 143 | return result 144 | 145 | @mcp.tool() 146 | async def author_papers( 147 | context: Context, 148 | author_id: str, 149 | fields: Optional[List[str]] = None, 150 | offset: int = 0, 151 | limit: int = 100 152 | ) -> Dict: 153 | """ 154 | Get papers written by an author with pagination support. 155 | This endpoint provides detailed information about an author's publications. 156 | 157 | Args: 158 | author_id (str): Semantic Scholar author ID. 159 | This is a unique identifier assigned by Semantic Scholar. 160 | Example: "1741101" (Albert Einstein) 161 | 162 | fields (Optional[List[str]]): List of fields to return for each paper. 163 | paperId is always returned. 164 | 165 | offset (int): Number of papers to skip for pagination. 166 | Default: 0 167 | 168 | limit (int): Maximum number of papers to return. 169 | Default: 100 170 | Maximum: 1000 171 | 172 | Returns: 173 | Dict: { 174 | "offset": int, # Current offset in the results 175 | "next": int, # Next offset (if more results available) 176 | "data": List[Dict] # List of papers with requested fields 177 | } 178 | """ 179 | if not author_id.strip(): 180 | return create_error_response( 181 | ErrorType.VALIDATION, 182 | "Author ID cannot be empty" 183 | ) 184 | 185 | # Validate limit 186 | if limit > 1000: 187 | return create_error_response( 188 | ErrorType.VALIDATION, 189 | "Limit cannot exceed 1000", 190 | {"max_limit": 1000} 191 | ) 192 | 193 | # Build request parameters 194 | params = { 195 | "offset": offset, 196 | "limit": limit 197 | } 198 | if fields: 199 | params["fields"] = ",".join(fields) 200 | 201 | # Make the API request 202 | result = await make_request(f"/author/{author_id}/papers", params) 203 | 204 | if isinstance(result, Dict) and "error" in result: 205 | error_msg = result["error"].get("message", "") 206 | if "404" in error_msg: 207 | return create_error_response( 208 | ErrorType.VALIDATION, 209 | "Author not found", 210 | {"author_id": author_id} 211 | ) 212 | return result 213 | 214 | return result 215 | 216 | @mcp.tool() 217 | async def author_batch_details( 218 | context: Context, 219 | author_ids: List[str], 220 | fields: Optional[str] = None 221 | ) -> Dict: 222 | """ 223 | Get details for multiple authors in a single batch request. 224 | This endpoint is optimized for efficiently retrieving details about known authors. 225 | 226 | Args: 227 | author_ids (List[str]): List of Semantic Scholar author IDs. 228 | These are unique identifiers assigned by Semantic Scholar. 229 | Example: ["1741101", "1741102"] 230 | Maximum: 1000 IDs per request 231 | 232 | fields (Optional[str]): Comma-separated list of fields to return for each author. 233 | authorId is always returned. 234 | 235 | Returns: 236 | List[Dict]: List of author details with requested fields. 237 | - Results maintain the same order as input author_ids 238 | - Invalid or not found author IDs return null in the results 239 | - Each author object contains the requested fields 240 | - authorId is always included in each author object 241 | """ 242 | # Validate inputs 243 | if not author_ids: 244 | return create_error_response( 245 | ErrorType.VALIDATION, 246 | "Author IDs list cannot be empty" 247 | ) 248 | 249 | if len(author_ids) > 1000: 250 | return create_error_response( 251 | ErrorType.VALIDATION, 252 | "Cannot process more than 1000 author IDs at once", 253 | {"max_authors": 1000, "received": len(author_ids)} 254 | ) 255 | 256 | # Validate fields if provided 257 | if fields: 258 | field_list = fields.split(",") 259 | invalid_fields = set(field_list) - AuthorDetailFields.VALID_FIELDS 260 | if invalid_fields: 261 | return create_error_response( 262 | ErrorType.VALIDATION, 263 | f"Invalid fields: {', '.join(invalid_fields)}", 264 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} 265 | ) 266 | 267 | # Build request parameters 268 | params = {} 269 | if fields: 270 | params["fields"] = fields 271 | 272 | # Make POST request with proper structure 273 | try: 274 | import httpx 275 | from ..config import Config 276 | 277 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: 278 | from ..utils.http import get_api_key 279 | api_key = get_api_key() 280 | headers = {"x-api-key": api_key} if api_key else {} 281 | 282 | response = await client.post( 283 | f"{Config.BASE_URL}/author/batch", 284 | params=params, 285 | json={"ids": author_ids}, 286 | headers=headers 287 | ) 288 | response.raise_for_status() 289 | return response.json() 290 | 291 | except httpx.HTTPStatusError as e: 292 | if e.response.status_code == 429: 293 | return create_error_response( 294 | ErrorType.RATE_LIMIT, 295 | "Rate limit exceeded", 296 | {"retry_after": e.response.headers.get("retry-after")} 297 | ) 298 | return create_error_response( 299 | ErrorType.API_ERROR, 300 | f"HTTP error: {e.response.status_code}", 301 | {"response": e.response.text} 302 | ) 303 | except httpx.TimeoutException: 304 | return create_error_response( 305 | ErrorType.TIMEOUT, 306 | f"Request timed out after {Config.TIMEOUT} seconds" 307 | ) 308 | except Exception as e: 309 | return create_error_response( 310 | ErrorType.API_ERROR, 311 | str(e) 312 | ) ``` -------------------------------------------------------------------------------- /semantic_scholar/api/papers.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Paper-related API endpoints for the Semantic Scholar API. 3 | """ 4 | 5 | from typing import Dict, List, Optional 6 | from fastmcp import Context 7 | import httpx 8 | 9 | # Import mcp from centralized location instead of server 10 | from ..mcp import mcp 11 | from ..config import PaperFields, CitationReferenceFields, AuthorDetailFields, Config, ErrorType 12 | from ..utils.http import make_request, get_api_key 13 | from ..utils.errors import create_error_response 14 | 15 | @mcp.tool() 16 | async def paper_relevance_search( 17 | context: Context, 18 | query: str, 19 | fields: Optional[List[str]] = None, 20 | publication_types: Optional[List[str]] = None, 21 | open_access_pdf: bool = False, 22 | min_citation_count: Optional[int] = None, 23 | year: Optional[str] = None, 24 | venue: Optional[List[str]] = None, 25 | fields_of_study: Optional[List[str]] = None, 26 | offset: int = 0, 27 | limit: int = 10 28 | ) -> Dict: 29 | """ 30 | Search for papers on Semantic Scholar using relevance-based ranking. 31 | This endpoint is optimized for finding the most relevant papers matching a text query. 32 | Results are sorted by relevance score. 33 | 34 | Args: 35 | query (str): A text query to search for. The query will be matched against paper titles, 36 | abstracts, venue names, and author names. 37 | 38 | fields (Optional[List[str]]): List of fields to return for each paper. 39 | paperId and title are always returned. 40 | 41 | publication_types (Optional[List[str]]): Filter by publication types. 42 | 43 | open_access_pdf (bool): If True, only include papers with a public PDF. 44 | Default: False 45 | 46 | min_citation_count (Optional[int]): Minimum number of citations required. 47 | 48 | year (Optional[str]): Filter by publication year. Supports several formats: 49 | - Single year: "2019" 50 | - Year range: "2016-2020" 51 | - Since year: "2010-" 52 | - Until year: "-2015" 53 | 54 | venue (Optional[List[str]]): Filter by publication venues. 55 | Accepts full venue names or ISO4 abbreviations. 56 | 57 | fields_of_study (Optional[List[str]]): Filter by fields of study. 58 | 59 | offset (int): Number of results to skip for pagination. 60 | Default: 0 61 | 62 | limit (int): Maximum number of results to return. 63 | Default: 10 64 | Maximum: 100 65 | 66 | Returns: 67 | Dict: { 68 | "total": int, # Total number of papers matching the query 69 | "offset": int, # Current offset in the results 70 | "next": int, # Offset for the next page of results (if available) 71 | "data": List[Dict] # List of papers with requested fields 72 | } 73 | """ 74 | if not query.strip(): 75 | return create_error_response( 76 | ErrorType.VALIDATION, 77 | "Query string cannot be empty" 78 | ) 79 | 80 | # Validate and prepare fields 81 | if fields is None: 82 | fields = PaperFields.DEFAULT 83 | else: 84 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS 85 | if invalid_fields: 86 | return create_error_response( 87 | ErrorType.VALIDATION, 88 | f"Invalid fields: {', '.join(invalid_fields)}", 89 | {"valid_fields": list(PaperFields.VALID_FIELDS)} 90 | ) 91 | 92 | # Validate and prepare parameters 93 | limit = min(limit, 100) 94 | params = { 95 | "query": query, 96 | "offset": offset, 97 | "limit": limit, 98 | "fields": ",".join(fields) 99 | } 100 | 101 | # Add optional filters 102 | if publication_types: 103 | params["publicationTypes"] = ",".join(publication_types) 104 | if open_access_pdf: 105 | params["openAccessPdf"] = "true" 106 | if min_citation_count is not None: 107 | params["minCitationCount"] = min_citation_count 108 | if year: 109 | params["year"] = year 110 | if venue: 111 | params["venue"] = ",".join(venue) 112 | if fields_of_study: 113 | params["fieldsOfStudy"] = ",".join(fields_of_study) 114 | 115 | return await make_request("/paper/search", params) 116 | 117 | @mcp.tool() 118 | async def paper_bulk_search( 119 | context: Context, 120 | query: Optional[str] = None, 121 | token: Optional[str] = None, 122 | fields: Optional[List[str]] = None, 123 | sort: Optional[str] = None, 124 | publication_types: Optional[List[str]] = None, 125 | open_access_pdf: bool = False, 126 | min_citation_count: Optional[int] = None, 127 | publication_date_or_year: Optional[str] = None, 128 | year: Optional[str] = None, 129 | venue: Optional[List[str]] = None, 130 | fields_of_study: Optional[List[str]] = None 131 | ) -> Dict: 132 | """ 133 | Bulk search for papers with advanced filtering and sorting options. 134 | Intended for retrieving large sets of papers efficiently. 135 | 136 | Args: 137 | query (Optional[str]): Text query to match against paper title and abstract. 138 | Supports boolean logic with +, |, -, ", *, (), and ~N. 139 | 140 | token (Optional[str]): Continuation token for pagination 141 | 142 | fields (Optional[List[str]]): Fields to return for each paper 143 | paperId is always returned 144 | Default: paperId and title only 145 | 146 | sort (Optional[str]): Sort order in format 'field:order' 147 | Fields: paperId, publicationDate, citationCount 148 | Order: asc (default), desc 149 | Default: 'paperId:asc' 150 | 151 | publication_types (Optional[List[str]]): Filter by publication types 152 | 153 | open_access_pdf (bool): Only include papers with public PDF 154 | 155 | min_citation_count (Optional[int]): Minimum citation threshold 156 | 157 | publication_date_or_year (Optional[str]): Date/year range filter 158 | Format: <startDate>:<endDate> in YYYY-MM-DD 159 | 160 | year (Optional[str]): Publication year filter 161 | Examples: '2019', '2016-2020', '2010-', '-2015' 162 | 163 | venue (Optional[List[str]]): Filter by publication venues 164 | 165 | fields_of_study (Optional[List[str]]): Filter by fields of study 166 | 167 | Returns: 168 | Dict: { 169 | 'total': int, # Total matching papers 170 | 'token': str, # Continuation token for next batch 171 | 'data': List[Dict] # Papers with requested fields 172 | } 173 | """ 174 | # Build request parameters 175 | params = {} 176 | 177 | # Add query if provided 178 | if query: 179 | params["query"] = query.strip() 180 | 181 | # Add continuation token if provided 182 | if token: 183 | params["token"] = token 184 | 185 | # Add fields if provided 186 | if fields: 187 | # Validate fields 188 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS 189 | if invalid_fields: 190 | return create_error_response( 191 | ErrorType.VALIDATION, 192 | f"Invalid fields: {', '.join(invalid_fields)}", 193 | {"valid_fields": list(PaperFields.VALID_FIELDS)} 194 | ) 195 | params["fields"] = ",".join(fields) 196 | 197 | # Add sort if provided 198 | if sort: 199 | # Validate sort format 200 | valid_sort_fields = ["paperId", "publicationDate", "citationCount"] 201 | valid_sort_orders = ["asc", "desc"] 202 | 203 | try: 204 | field, order = sort.split(":") 205 | if field not in valid_sort_fields: 206 | return create_error_response( 207 | ErrorType.VALIDATION, 208 | f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields)}" 209 | ) 210 | if order not in valid_sort_orders: 211 | return create_error_response( 212 | ErrorType.VALIDATION, 213 | f"Invalid sort order. Must be one of: {', '.join(valid_sort_orders)}" 214 | ) 215 | params["sort"] = sort 216 | except ValueError: 217 | return create_error_response( 218 | ErrorType.VALIDATION, 219 | "Sort must be in format 'field:order'" 220 | ) 221 | 222 | # Add publication types if provided 223 | if publication_types: 224 | valid_types = { 225 | "Review", "JournalArticle", "CaseReport", "ClinicalTrial", 226 | "Conference", "Dataset", "Editorial", "LettersAndComments", 227 | "MetaAnalysis", "News", "Study", "Book", "BookSection" 228 | } 229 | invalid_types = set(publication_types) - valid_types 230 | if invalid_types: 231 | return create_error_response( 232 | ErrorType.VALIDATION, 233 | f"Invalid publication types: {', '.join(invalid_types)}", 234 | {"valid_types": list(valid_types)} 235 | ) 236 | params["publicationTypes"] = ",".join(publication_types) 237 | 238 | # Add open access PDF filter 239 | if open_access_pdf: 240 | params["openAccessPdf"] = "true" 241 | 242 | # Add minimum citation count if provided 243 | if min_citation_count is not None: 244 | if min_citation_count < 0: 245 | return create_error_response( 246 | ErrorType.VALIDATION, 247 | "Minimum citation count cannot be negative" 248 | ) 249 | params["minCitationCount"] = str(min_citation_count) 250 | 251 | # Add publication date/year if provided 252 | if publication_date_or_year: 253 | params["publicationDateOrYear"] = publication_date_or_year 254 | elif year: 255 | params["year"] = year 256 | 257 | # Add venue filter if provided 258 | if venue: 259 | params["venue"] = ",".join(venue) 260 | 261 | # Add fields of study filter if provided 262 | if fields_of_study: 263 | valid_fields = { 264 | "Computer Science", "Medicine", "Chemistry", "Biology", 265 | "Materials Science", "Physics", "Geology", "Psychology", 266 | "Art", "History", "Geography", "Sociology", "Business", 267 | "Political Science", "Economics", "Philosophy", "Mathematics", 268 | "Engineering", "Environmental Science", "Agricultural and Food Sciences", 269 | "Education", "Law", "Linguistics" 270 | } 271 | invalid_fields = set(fields_of_study) - valid_fields 272 | if invalid_fields: 273 | return create_error_response( 274 | ErrorType.VALIDATION, 275 | f"Invalid fields of study: {', '.join(invalid_fields)}", 276 | {"valid_fields": list(valid_fields)} 277 | ) 278 | params["fieldsOfStudy"] = ",".join(fields_of_study) 279 | 280 | # Make the API request 281 | result = await make_request("/paper/search/bulk", params) 282 | 283 | # Handle potential errors 284 | if isinstance(result, Dict) and "error" in result: 285 | return result 286 | 287 | return result 288 | 289 | @mcp.tool() 290 | async def paper_title_search( 291 | context: Context, 292 | query: str, 293 | fields: Optional[List[str]] = None, 294 | publication_types: Optional[List[str]] = None, 295 | open_access_pdf: bool = False, 296 | min_citation_count: Optional[int] = None, 297 | year: Optional[str] = None, 298 | venue: Optional[List[str]] = None, 299 | fields_of_study: Optional[List[str]] = None 300 | ) -> Dict: 301 | """ 302 | Find a single paper by title match. This endpoint is optimized for finding a specific paper 303 | by its title and returns the best matching paper based on title similarity. 304 | 305 | Args: 306 | query (str): The title text to search for. The query will be matched against paper titles 307 | to find the closest match. 308 | 309 | fields (Optional[List[str]]): List of fields to return for the paper. 310 | paperId and title are always returned. 311 | 312 | publication_types (Optional[List[str]]): Filter by publication types. 313 | 314 | open_access_pdf (bool): If True, only include papers with a public PDF. 315 | Default: False 316 | 317 | min_citation_count (Optional[int]): Minimum number of citations required. 318 | 319 | year (Optional[str]): Filter by publication year. Supports several formats: 320 | - Single year: "2019" 321 | - Year range: "2016-2020" 322 | - Since year: "2010-" 323 | - Until year: "-2015" 324 | 325 | venue (Optional[List[str]]): Filter by publication venues. 326 | Accepts full venue names or ISO4 abbreviations. 327 | 328 | fields_of_study (Optional[List[str]]): Filter by fields of study. 329 | 330 | Returns: 331 | Dict: { 332 | "paperId": str, # Semantic Scholar Paper ID 333 | "title": str, # Paper title 334 | "matchScore": float, # Similarity score between query and matched title 335 | ... # Additional requested fields 336 | } 337 | 338 | Returns error response if no matching paper is found. 339 | """ 340 | if not query.strip(): 341 | return create_error_response( 342 | ErrorType.VALIDATION, 343 | "Query string cannot be empty" 344 | ) 345 | 346 | # Validate and prepare fields 347 | if fields is None: 348 | fields = PaperFields.DEFAULT 349 | else: 350 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS 351 | if invalid_fields: 352 | return create_error_response( 353 | ErrorType.VALIDATION, 354 | f"Invalid fields: {', '.join(invalid_fields)}", 355 | {"valid_fields": list(PaperFields.VALID_FIELDS)} 356 | ) 357 | 358 | # Build base parameters 359 | params = {"query": query} 360 | 361 | # Add optional parameters 362 | if fields: 363 | params["fields"] = ",".join(fields) 364 | if publication_types: 365 | params["publicationTypes"] = ",".join(publication_types) 366 | if open_access_pdf: 367 | params["openAccessPdf"] = "true" 368 | if min_citation_count is not None: 369 | params["minCitationCount"] = str(min_citation_count) 370 | if year: 371 | params["year"] = year 372 | if venue: 373 | params["venue"] = ",".join(venue) 374 | if fields_of_study: 375 | params["fieldsOfStudy"] = ",".join(fields_of_study) 376 | 377 | result = await make_request("/paper/search/match", params) 378 | 379 | # Handle specific error cases 380 | if isinstance(result, Dict): 381 | if "error" in result: 382 | error_msg = result["error"].get("message", "") 383 | if "404" in error_msg: 384 | return create_error_response( 385 | ErrorType.VALIDATION, 386 | "No matching paper found", 387 | {"original_query": query} 388 | ) 389 | return result 390 | 391 | return result 392 | 393 | @mcp.tool() 394 | async def paper_details( 395 | context: Context, 396 | paper_id: str, 397 | fields: Optional[List[str]] = None 398 | ) -> Dict: 399 | """ 400 | Get details about a paper using various types of identifiers. 401 | This endpoint provides comprehensive metadata about a paper. 402 | 403 | Args: 404 | paper_id (str): Paper identifier in one of the following formats: 405 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 406 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 407 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 408 | - MAG:<id> (e.g., "MAG:112218234") 409 | - ACL:<id> (e.g., "ACL:W12-3903") 410 | - PMID:<id> (e.g., "PMID:19872477") 411 | - PMCID:<id> (e.g., "PMCID:2323736") 412 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 413 | 414 | fields (Optional[List[str]]): List of fields to return. 415 | paperId is always returned. 416 | 417 | Returns: 418 | Dict: Paper details with requested fields. 419 | Always includes paperId. 420 | Returns error response if paper not found. 421 | """ 422 | if not paper_id.strip(): 423 | return create_error_response( 424 | ErrorType.VALIDATION, 425 | "Paper ID cannot be empty" 426 | ) 427 | 428 | # Build request parameters 429 | params = {} 430 | if fields: 431 | params["fields"] = ",".join(fields) 432 | 433 | # Make the API request 434 | result = await make_request(f"/paper/{paper_id}", params) 435 | 436 | # Handle potential errors 437 | if isinstance(result, Dict) and "error" in result: 438 | error_msg = result["error"].get("message", "") 439 | if "404" in error_msg: 440 | return create_error_response( 441 | ErrorType.VALIDATION, 442 | "Paper not found", 443 | {"paper_id": paper_id} 444 | ) 445 | return result 446 | 447 | return result 448 | 449 | @mcp.tool() 450 | async def paper_batch_details( 451 | context: Context, 452 | paper_ids: List[str], 453 | fields: Optional[str] = None 454 | ) -> Dict: 455 | """ 456 | Get details for multiple papers in a single batch request. 457 | This endpoint is optimized for efficiently retrieving details about known papers. 458 | 459 | Args: 460 | paper_ids (List[str]): List of paper identifiers. Each ID can be in any of these formats: 461 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 462 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 463 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 464 | - MAG:<id> (e.g., "MAG:112218234") 465 | - ACL:<id> (e.g., "ACL:W12-3903") 466 | - PMID:<id> (e.g., "PMID:19872477") 467 | - PMCID:<id> (e.g., "PMCID:2323736") 468 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 469 | Maximum: 500 IDs per request 470 | 471 | fields (Optional[str]): Comma-separated list of fields to return for each paper. 472 | paperId is always returned. 473 | 474 | Returns: 475 | List[Dict]: List of paper details with requested fields. 476 | - Results maintain the same order as input paper_ids 477 | - Invalid or not found paper IDs return null in the results 478 | - Each paper object contains the requested fields 479 | - paperId is always included in each paper object 480 | """ 481 | # Validate inputs 482 | if not paper_ids: 483 | return create_error_response( 484 | ErrorType.VALIDATION, 485 | "Paper IDs list cannot be empty" 486 | ) 487 | 488 | if len(paper_ids) > 500: 489 | return create_error_response( 490 | ErrorType.VALIDATION, 491 | "Cannot process more than 500 paper IDs at once", 492 | {"max_papers": 500, "received": len(paper_ids)} 493 | ) 494 | 495 | # Validate fields if provided 496 | if fields: 497 | field_list = fields.split(",") 498 | invalid_fields = set(field_list) - PaperFields.VALID_FIELDS 499 | if invalid_fields: 500 | return create_error_response( 501 | ErrorType.VALIDATION, 502 | f"Invalid fields: {', '.join(invalid_fields)}", 503 | {"valid_fields": list(PaperFields.VALID_FIELDS)} 504 | ) 505 | 506 | # Build request parameters 507 | params = {} 508 | if fields: 509 | params["fields"] = fields 510 | 511 | # Make POST request with proper structure 512 | try: 513 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: 514 | api_key = get_api_key() 515 | headers = {"x-api-key": api_key} if api_key else {} 516 | 517 | response = await client.post( 518 | f"{Config.BASE_URL}/paper/batch", 519 | params=params, 520 | json={"ids": paper_ids}, 521 | headers=headers 522 | ) 523 | response.raise_for_status() 524 | return response.json() 525 | 526 | except httpx.HTTPStatusError as e: 527 | if e.response.status_code == 429: 528 | return create_error_response( 529 | ErrorType.RATE_LIMIT, 530 | "Rate limit exceeded", 531 | {"retry_after": e.response.headers.get("retry-after")} 532 | ) 533 | return create_error_response( 534 | ErrorType.API_ERROR, 535 | f"HTTP error: {e.response.status_code}", 536 | {"response": e.response.text} 537 | ) 538 | except httpx.TimeoutException: 539 | return create_error_response( 540 | ErrorType.TIMEOUT, 541 | f"Request timed out after {Config.TIMEOUT} seconds" 542 | ) 543 | except Exception as e: 544 | return create_error_response( 545 | ErrorType.API_ERROR, 546 | str(e) 547 | ) 548 | 549 | @mcp.tool() 550 | async def paper_authors( 551 | context: Context, 552 | paper_id: str, 553 | fields: Optional[List[str]] = None, 554 | offset: int = 0, 555 | limit: int = 100 556 | ) -> Dict: 557 | """ 558 | Get details about the authors of a paper with pagination support. 559 | This endpoint provides author information and their contributions. 560 | 561 | Args: 562 | paper_id (str): Paper identifier in one of the following formats: 563 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 564 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 565 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 566 | - MAG:<id> (e.g., "MAG:112218234") 567 | - ACL:<id> (e.g., "ACL:W12-3903") 568 | - PMID:<id> (e.g., "PMID:19872477") 569 | - PMCID:<id> (e.g., "PMCID:2323736") 570 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 571 | 572 | fields (Optional[List[str]]): List of fields to return for each author. 573 | authorId is always returned. 574 | 575 | offset (int): Number of authors to skip for pagination. 576 | Default: 0 577 | 578 | limit (int): Maximum number of authors to return. 579 | Default: 100 580 | Maximum: 1000 581 | 582 | Returns: 583 | Dict: { 584 | "offset": int, # Current offset in the results 585 | "next": int, # Next offset (if more results available) 586 | "data": List[Dict] # List of authors with requested fields 587 | } 588 | """ 589 | if not paper_id.strip(): 590 | return create_error_response( 591 | ErrorType.VALIDATION, 592 | "Paper ID cannot be empty" 593 | ) 594 | 595 | # Validate limit 596 | if limit > 1000: 597 | return create_error_response( 598 | ErrorType.VALIDATION, 599 | "Limit cannot exceed 1000", 600 | {"max_limit": 1000} 601 | ) 602 | 603 | # Validate fields 604 | if fields: 605 | invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS 606 | if invalid_fields: 607 | return create_error_response( 608 | ErrorType.VALIDATION, 609 | f"Invalid fields: {', '.join(invalid_fields)}", 610 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} 611 | ) 612 | 613 | # Build request parameters 614 | params = { 615 | "offset": offset, 616 | "limit": limit 617 | } 618 | if fields: 619 | params["fields"] = ",".join(fields) 620 | 621 | # Make the API request 622 | result = await make_request(f"/paper/{paper_id}/authors", params) 623 | 624 | # Handle potential errors 625 | if isinstance(result, Dict) and "error" in result: 626 | error_msg = result["error"].get("message", "") 627 | if "404" in error_msg: 628 | return create_error_response( 629 | ErrorType.VALIDATION, 630 | "Paper not found", 631 | {"paper_id": paper_id} 632 | ) 633 | return result 634 | 635 | return result 636 | 637 | @mcp.tool() 638 | async def paper_citations( 639 | context: Context, 640 | paper_id: str, 641 | fields: Optional[List[str]] = None, 642 | offset: int = 0, 643 | limit: int = 100 644 | ) -> Dict: 645 | """ 646 | Get papers that cite the specified paper (papers where this paper appears in their bibliography). 647 | This endpoint provides detailed citation information including citation contexts. 648 | 649 | Args: 650 | paper_id (str): Paper identifier in one of the following formats: 651 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 652 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 653 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 654 | - MAG:<id> (e.g., "MAG:112218234") 655 | - ACL:<id> (e.g., "ACL:W12-3903") 656 | - PMID:<id> (e.g., "PMID:19872477") 657 | - PMCID:<id> (e.g., "PMCID:2323736") 658 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 659 | 660 | fields (Optional[List[str]]): List of fields to return for each citing paper. 661 | paperId is always returned. 662 | 663 | offset (int): Number of citations to skip for pagination. 664 | Default: 0 665 | 666 | limit (int): Maximum number of citations to return. 667 | Default: 100 668 | Maximum: 1000 669 | 670 | Returns: 671 | Dict: { 672 | "offset": int, # Current offset in the results 673 | "next": int, # Next offset (if more results available) 674 | "data": List[Dict] # List of citing papers with requested fields 675 | } 676 | """ 677 | if not paper_id.strip(): 678 | return create_error_response( 679 | ErrorType.VALIDATION, 680 | "Paper ID cannot be empty" 681 | ) 682 | 683 | # Validate limit 684 | if limit > 1000: 685 | return create_error_response( 686 | ErrorType.VALIDATION, 687 | "Limit cannot exceed 1000", 688 | {"max_limit": 1000} 689 | ) 690 | 691 | # Validate fields 692 | if fields: 693 | invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS 694 | if invalid_fields: 695 | return create_error_response( 696 | ErrorType.VALIDATION, 697 | f"Invalid fields: {', '.join(invalid_fields)}", 698 | {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)} 699 | ) 700 | 701 | # Build request parameters 702 | params = { 703 | "offset": offset, 704 | "limit": limit 705 | } 706 | if fields: 707 | params["fields"] = ",".join(fields) 708 | 709 | # Make the API request 710 | result = await make_request(f"/paper/{paper_id}/citations", params) 711 | 712 | # Handle potential errors 713 | if isinstance(result, Dict) and "error" in result: 714 | error_msg = result["error"].get("message", "") 715 | if "404" in error_msg: 716 | return create_error_response( 717 | ErrorType.VALIDATION, 718 | "Paper not found", 719 | {"paper_id": paper_id} 720 | ) 721 | return result 722 | 723 | return result 724 | 725 | @mcp.tool() 726 | async def paper_references( 727 | context: Context, 728 | paper_id: str, 729 | fields: Optional[List[str]] = None, 730 | offset: int = 0, 731 | limit: int = 100 732 | ) -> Dict: 733 | """ 734 | Get papers cited by the specified paper (papers appearing in this paper's bibliography). 735 | This endpoint provides detailed reference information including citation contexts. 736 | 737 | Args: 738 | paper_id (str): Paper identifier in one of the following formats: 739 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 740 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 741 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 742 | - MAG:<id> (e.g., "MAG:112218234") 743 | - ACL:<id> (e.g., "ACL:W12-3903") 744 | - PMID:<id> (e.g., "PMID:19872477") 745 | - PMCID:<id> (e.g., "PMCID:2323736") 746 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 747 | 748 | fields (Optional[List[str]]): List of fields to return for each referenced paper. 749 | paperId is always returned. 750 | 751 | offset (int): Number of references to skip for pagination. 752 | Default: 0 753 | 754 | limit (int): Maximum number of references to return. 755 | Default: 100 756 | Maximum: 1000 757 | 758 | Returns: 759 | Dict: { 760 | "offset": int, # Current offset in the results 761 | "next": int, # Next offset (if more results available) 762 | "data": List[Dict] # List of referenced papers with requested fields 763 | } 764 | """ 765 | if not paper_id.strip(): 766 | return create_error_response( 767 | ErrorType.VALIDATION, 768 | "Paper ID cannot be empty" 769 | ) 770 | 771 | # Validate limit 772 | if limit > 1000: 773 | return create_error_response( 774 | ErrorType.VALIDATION, 775 | "Limit cannot exceed 1000", 776 | {"max_limit": 1000} 777 | ) 778 | 779 | # Validate fields 780 | if fields: 781 | invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS 782 | if invalid_fields: 783 | return create_error_response( 784 | ErrorType.VALIDATION, 785 | f"Invalid fields: {', '.join(invalid_fields)}", 786 | {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)} 787 | ) 788 | 789 | # Build request parameters 790 | params = { 791 | "offset": offset, 792 | "limit": limit 793 | } 794 | if fields: 795 | params["fields"] = ",".join(fields) 796 | 797 | # Make the API request 798 | result = await make_request(f"/paper/{paper_id}/references", params) 799 | 800 | # Handle potential errors 801 | if isinstance(result, Dict) and "error" in result: 802 | error_msg = result["error"].get("message", "") 803 | if "404" in error_msg: 804 | return create_error_response( 805 | ErrorType.VALIDATION, 806 | "Paper not found", 807 | {"paper_id": paper_id} 808 | ) 809 | return result 810 | 811 | return result ```