# Directory Structure ``` ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── REFACTORING.md ├── requirements.txt ├── run.py ├── semantic_scholar │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── authors.py │ │ ├── papers.py │ │ └── recommendations.py │ ├── config.py │ ├── mcp.py │ ├── server.py │ └── utils │ ├── __init__.py │ ├── errors.py │ └── http.py ├── semantic_scholar_server.py ├── smithery.yaml ├── test │ ├── __init__.py │ ├── test_author.py │ ├── test_paper.py │ ├── test_recommend.py │ └── test_utils.py └── TOOLS.md ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # UV # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. #uv.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ # Ruff stuff: .ruff_cache/ # PyPI configuration file .pypirc # Data files *.npy *.npz *.mat *.pkl # Checkpoint files _METADATA _CHECKPOINT_METADATA # Experimental results experimental_results/ saved_models/ # VS Code .vscode/ # macOS .DS_Store ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown # Semantic Scholar MCP Server [](https://smithery.ai/server/semantic-scholar-fastmcp-mcp-server) A FastMCP server implementation for the Semantic Scholar API, providing comprehensive access to academic paper data, author information, and citation networks. ## Project Structure The project has been refactored into a modular structure for better maintainability: ``` semantic-scholar-server/ ├── semantic_scholar/ # Main package │ ├── __init__.py # Package initialization │ ├── server.py # Server setup and main functionality │ ├── mcp.py # Centralized FastMCP instance definition │ ├── config.py # Configuration classes │ ├── utils/ # Utility modules │ │ ├── __init__.py │ │ ├── errors.py # Error handling │ │ └── http.py # HTTP client and rate limiting │ ├── api/ # API endpoints │ ├── __init__.py │ ├── papers.py # Paper-related endpoints │ ├── authors.py # Author-related endpoints │ └── recommendations.py # Recommendation endpoints ├── run.py # Entry point script ``` This structure: - Separates concerns into logical modules - Makes the codebase easier to understand and maintain - Allows for better testing and future extensions - Keeps related functionality grouped together - Centralizes the FastMCP instance to avoid circular imports ## Features - **Paper Search & Discovery** - Full-text search with advanced filtering - Title-based paper matching - Paper recommendations (single and multi-paper) - Batch paper details retrieval - Advanced search with ranking strategies - **Citation Analysis** - Citation network exploration - Reference tracking - Citation context and influence analysis - **Author Information** - Author search and profile details - Publication history - Batch author details retrieval - **Advanced Features** - Complex search with multiple ranking strategies - Customizable field selection - Efficient batch operations - Rate limiting compliance - Support for both authenticated and unauthenticated access - Graceful shutdown and error handling - Connection pooling and resource management ## System Requirements - Python 3.8+ - FastMCP framework - Environment variable for API key (optional) ## Installation ### Installing via Smithery To install Semantic Scholar MCP Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/semantic-scholar-fastmcp-mcp-server): ```bash npx -y @smithery/cli install semantic-scholar-fastmcp-mcp-server --client claude ``` ### Manual Installation 1. Clone the repository: ```bash git clone https://github.com/YUZongmin/semantic-scholar-fastmcp-mcp-server.git cd semantic-scholar-server ``` 2. Install FastMCP and other dependencies following: https://github.com/jlowin/fastmcp 3. Configure FastMCP: For Claude Desktop users, you'll need to configure the server in your FastMCP configuration file. Add the following to your configuration (typically in `~/.config/claude-desktop/config.json`): ```json { "mcps": { "Semantic Scholar Server": { "command": "/path/to/your/venv/bin/fastmcp", "args": [ "run", "/path/to/your/semantic-scholar-server/run.py" ], "env": { "SEMANTIC_SCHOLAR_API_KEY": "your-api-key-here" # Optional } } } } ``` Make sure to: - Replace `/path/to/your/venv/bin/fastmcp` with the actual path to your FastMCP installation - Replace `/path/to/your/semantic-scholar-server/run.py` with the actual path to run.py on your machine - If you have a Semantic Scholar API key, add it to the `env` section. If not, you can remove the `env` section entirely 4. Start using the server: The server will now be available to your Claude Desktop instance. No need to manually run any commands - Claude will automatically start and manage the server process when needed. ### API Key (Optional) To get higher rate limits and better performance: 1. Get an API key from [Semantic Scholar API](https://www.semanticscholar.org/product/api) 2. Add it to your FastMCP configuration as shown above in the `env` section If no API key is provided, the server will use unauthenticated access with lower rate limits. ## Configuration ### Environment Variables - `SEMANTIC_SCHOLAR_API_KEY`: Your Semantic Scholar API key (optional) - Get your key from [Semantic Scholar API](https://www.semanticscholar.org/product/api) - If not provided, the server will use unauthenticated access ### Rate Limits The server automatically adjusts to the appropriate rate limits: **With API Key**: - Search, batch and recommendation endpoints: 1 request per second - Other endpoints: 10 requests per second **Without API Key**: - All endpoints: 100 requests per 5 minutes - Longer timeouts for requests ## Available MCP Tools > Note: All tools are aligned with the official [Semantic Scholar API documentation](https://api.semanticscholar.org/api-docs/). Please refer to the official documentation for detailed field specifications and the latest updates. ### Paper Search Tools - `paper_relevance_search`: Search for papers using relevance ranking - Supports comprehensive query parameters including year range and citation count filters - Returns paginated results with customizable fields - `paper_bulk_search`: Bulk paper search with sorting options - Similar to relevance search but optimized for larger result sets - Supports sorting by citation count, publication date, etc. - `paper_title_search`: Find papers by exact title match - Useful for finding specific papers when you know the title - Returns detailed paper information with customizable fields - `paper_details`: Get comprehensive details about a specific paper - Accepts various paper ID formats (S2 ID, DOI, ArXiv, etc.) - Returns detailed paper metadata with nested field support - `paper_batch_details`: Efficiently retrieve details for multiple papers - Accepts up to 1000 paper IDs per request - Supports the same ID formats and fields as single paper details ### Citation Tools - `paper_citations`: Get papers that cite a specific paper - Returns paginated list of citing papers - Includes citation context when available - Supports field customization and sorting - `paper_references`: Get papers referenced by a specific paper - Returns paginated list of referenced papers - Includes reference context when available - Supports field customization and sorting ### Author Tools - `author_search`: Search for authors by name - Returns paginated results with customizable fields - Includes affiliations and publication counts - `author_details`: Get detailed information about an author - Returns comprehensive author metadata - Includes metrics like h-index and citation counts - `author_papers`: Get papers written by an author - Returns paginated list of author's publications - Supports field customization and sorting - `author_batch_details`: Get details for multiple authors - Efficiently retrieve information for up to 1000 authors - Returns the same fields as single author details ### Recommendation Tools - `paper_recommendations_single`: Get recommendations based on a single paper - Returns similar papers based on content and citation patterns - Supports field customization for recommended papers - `paper_recommendations_multi`: Get recommendations based on multiple papers - Accepts positive and negative example papers - Returns papers similar to positive examples and dissimilar to negative ones ## Usage Examples ### Basic Paper Search ```python results = await paper_relevance_search( context, query="machine learning", year="2020-2024", min_citation_count=50, fields=["title", "abstract", "authors"] ) ``` ### Paper Recommendations ```python # Single paper recommendation recommendations = await paper_recommendations_single( context, paper_id="649def34f8be52c8b66281af98ae884c09aef38b", fields="title,authors,year" ) # Multi-paper recommendation recommendations = await paper_recommendations_multi( context, positive_paper_ids=["649def34f8be52c8b66281af98ae884c09aef38b", "ARXIV:2106.15928"], negative_paper_ids=["ArXiv:1805.02262"], fields="title,abstract,authors" ) ``` ### Batch Operations ```python # Get details for multiple papers papers = await paper_batch_details( context, paper_ids=["649def34f8be52c8b66281af98ae884c09aef38b", "ARXIV:2106.15928"], fields="title,authors,year,citations" ) # Get details for multiple authors authors = await author_batch_details( context, author_ids=["1741101", "1780531"], fields="name,hIndex,citationCount,paperCount" ) ``` ## Error Handling The server provides standardized error responses: ```python { "error": { "type": "error_type", # rate_limit, api_error, validation, timeout "message": "Error description", "details": { # Additional context "authenticated": true/false # Indicates if request was authenticated } } } ``` ``` -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- ```python """Test package for semantic-scholar-server""" ``` -------------------------------------------------------------------------------- /semantic_scholar/utils/__init__.py: -------------------------------------------------------------------------------- ```python """ Utility modules for the Semantic Scholar API Server. """ ``` -------------------------------------------------------------------------------- /semantic_scholar/__init__.py: -------------------------------------------------------------------------------- ```python """ Semantic Scholar API Server Package A FastMCP-based server for accessing the Semantic Scholar Academic Graph API. """ __version__ = "0.1.0" ``` -------------------------------------------------------------------------------- /semantic_scholar/mcp.py: -------------------------------------------------------------------------------- ```python """ Central definition of the FastMCP instance. """ from fastmcp import FastMCP # Create FastMCP instance mcp = FastMCP("Semantic Scholar Server") ``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- ``` # HTTP client httpx>=0.24.0 # Testing pytest>=7.3.1 pytest-asyncio>=0.21.0 # Environment python-dotenv>=1.0.0 # Server dependencies uvicorn>=0.27.1 fastmcp>=0.1.0 ``` -------------------------------------------------------------------------------- /semantic_scholar/api/__init__.py: -------------------------------------------------------------------------------- ```python """ API endpoints for the Semantic Scholar API Server. """ # Import all endpoints to make them available when importing the package from .papers import ( paper_relevance_search, paper_bulk_search, paper_title_search, paper_details, paper_batch_details, paper_authors, paper_citations, paper_references ) from .authors import ( author_search, author_details, author_papers, author_batch_details ) from .recommendations import ( get_paper_recommendations_single, get_paper_recommendations_multi ) ``` -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- ```yaml # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml startCommand: type: stdio configSchema: # JSON Schema defining the configuration options for the MCP. type: object required: [] properties: semanticScholarApiKey: type: string description: The API key for the Semantic Scholar server. Optional for authenticated access. commandFunction: # A function that produces the CLI command to start the MCP on stdio. |- (config) => ({command:'python',args:['semantic_scholar_server.py'],env:{SEMANTIC_SCHOLAR_API_KEY:config.semanticScholarApiKey || ''}}) ``` -------------------------------------------------------------------------------- /semantic_scholar/utils/errors.py: -------------------------------------------------------------------------------- ```python """ Error handling utilities for the Semantic Scholar API Server. """ from typing import Dict, Optional from ..config import ErrorType def create_error_response( error_type: ErrorType, message: str, details: Optional[Dict] = None ) -> Dict: """ Create a standardized error response. Args: error_type: The type of error that occurred. message: A human-readable message describing the error. details: Optional additional details about the error. Returns: A dictionary with the error information. """ return { "error": { "type": error_type.value, "message": message, "details": details or {} } } ``` -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Entry point script for the Semantic Scholar API Server. Available tools: - paper_relevance_search - paper_bulk_search - paper_title_search - paper_details - paper_batch_details - paper_authors - paper_citations - paper_references - author_search - author_details - author_papers - author_batch_details - get_paper_recommendations_single - get_paper_recommendations_multi """ # Import the mcp instance from centralized location from semantic_scholar.mcp import mcp # Import the main function from server from semantic_scholar.server import main # Import all API modules to ensure tools are registered from semantic_scholar.api import papers, authors, recommendations if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile # Start from a base Python image FROM python:3.8-slim # Set the working directory WORKDIR /app # Copy the requirements file first to leverage Docker cache COPY requirements.txt /app/requirements.txt RUN pip install --no-cache-dir -r requirements.txt # Copy the rest of the application code # This includes the 'semantic_scholar' package and 'run.py' COPY . /app # Alternatively, be more specific: # COPY semantic_scholar /app/semantic_scholar # COPY run.py /app/run.py # Expose the port that the MCP server will run on EXPOSE 8000 # Set the environment variable for the API key (placeholder) # Glama or the user should provide the actual key at runtime ENV SEMANTIC_SCHOLAR_API_KEY="" # Command to run the server using the refactored entry point CMD ["python", "run.py"] ``` -------------------------------------------------------------------------------- /test/test_recommend.py: -------------------------------------------------------------------------------- ```python import unittest import asyncio import os from typing import Optional, List, Dict from .test_utils import make_request, create_error_response, ErrorType, Config class TestRecommendationTools(unittest.TestCase): def setUp(self): """Set up test environment""" # API key is required for recommendations api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY") if not api_key: raise ValueError("SEMANTIC_SCHOLAR_API_KEY environment variable is required for recommendation tests") # Create event loop for async tests self.loop = asyncio.new_event_loop() asyncio.set_event_loop(self.loop) # Sample paper IDs for testing (using full IDs) self.sample_paper_id = "204e3073870fae3d05bcbc2f6a8e263d9b72e776" # "Attention is All You Need" self.positive_paper_ids = [ self.sample_paper_id, "df2b0e26d0599ce3e70df8a9da02e51594e0e992" # BERT ] self.negative_paper_ids = [ "649def34f8be52c8b66281af98ae884c09aef38b" # Different topic ] def tearDown(self): """Clean up after tests""" self.loop.close() def run_async(self, coro): """Helper to run async functions in tests""" return self.loop.run_until_complete(coro) async def async_test_with_delay(self, coro): """Helper to run async tests with delay to handle rate limiting""" await asyncio.sleep(1) # Add 1 second delay between tests return await coro def test_paper_recommendations_single(self): """Test single paper recommendations functionality""" result = self.run_async(self.async_test_with_delay(make_request( f"papers/forpaper/{self.sample_paper_id}", # Using full paper ID params={ "fields": "title,year" # Minimal fields } ))) self.assertIn("recommendedPapers", result) self.assertTrue(isinstance(result["recommendedPapers"], list)) def test_paper_recommendations_multi(self): """Test multi-paper recommendations functionality""" result = self.run_async(self.async_test_with_delay(make_request( "papers", # No leading slash method="POST", params={"fields": "title,year"}, # Minimal fields json={ "positivePaperIds": self.positive_paper_ids, # Changed key name to match API "negativePaperIds": self.negative_paper_ids } ))) self.assertIn("recommendedPapers", result) self.assertTrue(isinstance(result["recommendedPapers"], list)) if __name__ == '__main__': unittest.main() ``` -------------------------------------------------------------------------------- /semantic_scholar/server.py: -------------------------------------------------------------------------------- ```python """ Main server module for the Semantic Scholar API Server. """ import logging import asyncio import signal # Import mcp from centralized location from .mcp import mcp from .utils.http import initialize_client, cleanup_client # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Import API modules to register tools # Note: This must come AFTER mcp is initialized from .api import papers, authors, recommendations async def handle_exception(loop, context): """Global exception handler for the event loop.""" msg = context.get("exception", context["message"]) logger.error(f"Caught exception: {msg}") asyncio.create_task(shutdown()) async def shutdown(): """Gracefully shut down the server.""" logger.info("Initiating graceful shutdown...") # Cancel all tasks tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] for task in tasks: task.cancel() try: await task except asyncio.CancelledError: pass # Cleanup resources await cleanup_client() await mcp.cleanup() logger.info(f"Cancelled {len(tasks)} tasks") logger.info("Shutdown complete") def init_signal_handlers(loop): """Initialize signal handlers for graceful shutdown.""" for sig in (signal.SIGTERM, signal.SIGINT): loop.add_signal_handler(sig, lambda: asyncio.create_task(shutdown())) logger.info("Signal handlers initialized") async def run_server(): """Run the server with proper async context management.""" async with mcp: try: # Initialize HTTP client await initialize_client() # Start the server logger.info("Starting Semantic Scholar Server") await mcp.run_async() except Exception as e: logger.error(f"Server error: {e}") raise finally: await shutdown() def main(): """Main entry point for the server.""" try: # Set up event loop with exception handler loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.set_exception_handler(handle_exception) # Initialize signal handlers init_signal_handlers(loop) # Run the server loop.run_until_complete(run_server()) except KeyboardInterrupt: logger.info("Received keyboard interrupt, shutting down...") except Exception as e: logger.error(f"Fatal error: {str(e)}") finally: try: loop.run_until_complete(asyncio.sleep(0)) # Let pending tasks complete loop.close() except Exception as e: logger.error(f"Error during final cleanup: {str(e)}") logger.info("Server stopped") if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /test/test_author.py: -------------------------------------------------------------------------------- ```python import unittest import asyncio import os from typing import Optional, List, Dict from .test_utils import make_request, create_error_response, ErrorType, Config class TestAuthorTools(unittest.TestCase): def setUp(self): """Set up test environment""" # You can set your API key here for testing os.environ["SEMANTIC_SCHOLAR_API_KEY"] = "" # Optional # Create event loop for async tests self.loop = asyncio.new_event_loop() asyncio.set_event_loop(self.loop) # Sample author IDs for testing self.sample_author_id = "1741101" # Andrew Ng self.sample_author_ids = [ self.sample_author_id, "2061296" # Yann LeCun ] def tearDown(self): """Clean up after tests""" self.loop.close() def run_async(self, coro): """Helper to run async functions in tests""" return self.loop.run_until_complete(coro) async def async_test_with_delay(self, coro): """Helper to run async tests with delay to handle rate limiting""" await asyncio.sleep(1) # Add 1 second delay between tests return await coro def test_author_search(self): """Test author search functionality""" result = self.run_async(self.async_test_with_delay(make_request( "/author/search", params={ "query": "Andrew Ng", "fields": "name,affiliations,paperCount" } ))) self.assertIn("data", result) self.assertIn("total", result) def test_author_details(self): """Test author details functionality""" result = self.run_async(self.async_test_with_delay(make_request( f"/author/{self.sample_author_id}", params={ "fields": "name,affiliations,paperCount,citationCount,hIndex" } ))) self.assertIn("authorId", result) self.assertIn("name", result) def test_author_papers(self): """Test author papers functionality""" result = self.run_async(self.async_test_with_delay(make_request( f"/author/{self.sample_author_id}/papers", params={ "fields": "title,year,citationCount", "limit": 10 } ))) self.assertIn("data", result) self.assertIn("next", result) self.assertIn("offset", result) self.assertTrue(isinstance(result["data"], list)) def test_author_batch_details(self): """Test batch author details functionality""" result = self.run_async(self.async_test_with_delay(make_request( "/author/batch", method="POST", params={"fields": "name,affiliations,paperCount"}, json={"ids": self.sample_author_ids} ))) self.assertTrue(isinstance(result, list)) self.assertEqual(len(result), len(self.sample_author_ids)) if __name__ == '__main__': unittest.main() ``` -------------------------------------------------------------------------------- /TOOLS.md: -------------------------------------------------------------------------------- ```markdown # Semantic Scholar Server Tools This document lists all the tools available in the Semantic Scholar API Server. ## Paper-related Tools ### `paper_relevance_search` Search for papers on Semantic Scholar using relevance-based ranking. ```json { "query": "quantum computing", "fields": ["title", "abstract", "year", "authors"], "limit": 10 } ``` ### `paper_bulk_search` Bulk search for papers with advanced filtering and sorting options. ```json { "query": "machine learning", "fields": ["title", "abstract", "authors"], "sort": "citationCount:desc" } ``` ### `paper_title_search` Find a specific paper by matching its title. ```json { "query": "Attention Is All You Need", "fields": ["title", "abstract", "authors", "year"] } ``` ### `paper_details` Get detailed information about a specific paper by ID. ```json { "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", "fields": ["title", "abstract", "authors", "citations"] } ``` ### `paper_batch_details` Get details for multiple papers in one request. ```json { "paper_ids": ["649def34f8be52c8b66281af98ae884c09aef38b", "ARXIV:2106.15928"], "fields": "title,abstract,authors" } ``` ### `paper_authors` Get the authors of a specific paper. ```json { "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", "fields": ["name", "affiliations"] } ``` ### `paper_citations` Get papers that cite a specific paper. ```json { "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", "fields": ["title", "year", "authors"], "limit": 50 } ``` ### `paper_references` Get papers referenced by a specific paper. ```json { "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", "fields": ["title", "year", "authors"], "limit": 50 } ``` ## Author-related Tools ### `author_search` Search for authors by name. ```json { "query": "Albert Einstein", "fields": ["name", "affiliations", "paperCount"] } ``` ### `author_details` Get detailed information about a specific author. ```json { "author_id": "1741101", "fields": ["name", "affiliations", "papers", "citationCount"] } ``` ### `author_papers` Get papers written by a specific author. ```json { "author_id": "1741101", "fields": ["title", "year", "venue"], "limit": 50 } ``` ### `author_batch_details` Get details for multiple authors at once. ```json { "author_ids": ["1741101", "1741102"], "fields": "name,affiliations,paperCount,citationCount" } ``` ## Recommendation Tools ### `get_paper_recommendations_single` Get paper recommendations based on a single paper. ```json { "paper_id": "649def34f8be52c8b66281af98ae884c09aef38b", "fields": "title,authors,year,abstract", "limit": 20 } ``` ### `get_paper_recommendations_multi` Get paper recommendations based on multiple papers. ```json { "positive_paper_ids": [ "649def34f8be52c8b66281af98ae884c09aef38b", "ARXIV:2106.15928" ], "negative_paper_ids": ["ARXIV:1805.02262"], "fields": "title,authors,year", "limit": 20 } ``` ## Note - The tool name in the error message (`read_paper`) does not exist in this server - Use one of the tools listed above instead - Always include the required parameters for each tool ``` -------------------------------------------------------------------------------- /REFACTORING.md: -------------------------------------------------------------------------------- ```markdown # Semantic Scholar Server Refactoring This document describes the refactoring of the Semantic Scholar server from a single monolithic file to a modular package structure. ## Motivation The original implementation consisted of a single 2,200+ line Python file (`semantic_scholar_server.py`), which made it difficult to: - Understand the overall structure - Locate specific functionality - Debug issues - Make focused changes - Test individual components ## Refactoring Approach We used a modular package approach, separating concerns into logical components: ``` semantic-scholar-server/ ├── semantic_scholar/ # Main package │ ├── __init__.py # Package initialization │ ├── server.py # Server setup and main functionality │ ├── mcp.py # Centralized FastMCP instance definition │ ├── config.py # Configuration classes │ ├── utils/ # Utility modules │ │ ├── __init__.py │ │ ├── errors.py # Error handling │ │ └── http.py # HTTP client and rate limiting │ ├── api/ # API endpoints │ ├── __init__.py │ ├── papers.py # Paper-related endpoints │ ├── authors.py # Author-related endpoints │ └── recommendations.py # Recommendation endpoints ├── run.py # Entry point script ``` ## Key Improvements 1. **Separation of Concerns** - Config classes in their own module - Utilities separated from business logic - API endpoints grouped by domain (papers, authors, recommendations) - Server infrastructure code isolated - FastMCP instance centralized in its own module 2. **Improved Maintainability** - Each file has a single responsibility - Files are much smaller and easier to understand - Clear imports show dependencies between modules - Better docstrings and code organization - No circular dependencies between modules 3. **Enhanced Extensibility** - Adding new endpoints only requires changes to the relevant module - Utilities can be reused across the codebase - Configuration is centralized - Testing individual components is much easier - Each module imports the FastMCP instance from a central location 4. **Clearer Entry Point** - `run.py` provides a simple way to start the server - Server initialization is separated from the API logic - All modules consistently import the FastMCP instance from mcp.py ## Migration Guide The refactored code maintains the same functionality and API as the original implementation. To migrate: 1. Replace the original `semantic_scholar_server.py` with the new package structure 2. Update any import statements that referenced the original file 3. Use `run.py` as the new entry point No changes to API usage are required - all tool functions maintain the same signatures and behavior. ## Future Improvements The modular structure enables several future improvements: 1. **Testing**: Add unit tests for individual components 2. **Caching**: Implement caching layer for improved performance 3. **Logging**: Enhanced logging throughout the application 4. **Metrics**: Add performance monitoring 5. **Documentation**: Generate API documentation from docstrings ``` -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- ```python """Test utilities and core functionality without MCP dependencies""" import httpx import logging import os from typing import Dict, Optional import asyncio from enum import Enum from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # Basic setup logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class ErrorType(Enum): RATE_LIMIT = "rate_limit" API_ERROR = "api_error" VALIDATION = "validation" TIMEOUT = "timeout" class Config: API_VERSION = "v1" GRAPH_BASE_URL = f"https://api.semanticscholar.org/graph/{API_VERSION}" RECOMMENDATIONS_BASE_URL = "https://api.semanticscholar.org/recommendations/v1" TIMEOUT = 30 # seconds def create_error_response( error_type: ErrorType, message: str, details: Optional[Dict] = None ) -> Dict: return { "error": { "type": error_type.value, "message": message, "details": details or {} } } def get_api_key() -> Optional[str]: """Get the Semantic Scholar API key from environment variables.""" api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY") logger.info(f"API Key found: {'Yes' if api_key else 'No'}") return api_key async def make_request(endpoint: str, params: Dict = None, method: str = "GET", json: Dict = None) -> Dict: """Make a request to the Semantic Scholar API.""" try: api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} params = params or {} # Choose base URL based on endpoint is_recommendations = endpoint.startswith("recommendations") or endpoint.startswith("papers/forpaper") base_url = Config.RECOMMENDATIONS_BASE_URL if is_recommendations else Config.GRAPH_BASE_URL # Clean up endpoint if endpoint.startswith("/"): endpoint = endpoint[1:] if is_recommendations and endpoint.startswith("recommendations/"): endpoint = endpoint[15:] # Remove "recommendations/" prefix url = f"{base_url}/{endpoint}" logger.info(f"Making {method} request to {url}") logger.info(f"Headers: {headers}") logger.info(f"Params: {params}") if json: logger.info(f"JSON body: {json}") async with httpx.AsyncClient(timeout=Config.TIMEOUT, follow_redirects=True) as client: if method == "GET": response = await client.get(url, params=params, headers=headers) else: # POST response = await client.post(url, params=params, json=json, headers=headers) logger.info(f"Response status: {response.status_code}") logger.info(f"Response body: {response.text}") response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded", {"retry_after": e.response.headers.get("retry-after")} ) return create_error_response( ErrorType.API_ERROR, f"HTTP error: {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException: return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: return create_error_response( ErrorType.API_ERROR, str(e) ) ``` -------------------------------------------------------------------------------- /test/test_paper.py: -------------------------------------------------------------------------------- ```python import unittest import asyncio import os from typing import Optional, List, Dict import random from .test_utils import make_request, create_error_response, ErrorType, Config class TestPaperTools(unittest.TestCase): def setUp(self): """Set up test environment""" # You can set your API key here for testing os.environ["SEMANTIC_SCHOLAR_API_KEY"] = "" # Optional # Create event loop for async tests self.loop = asyncio.new_event_loop() asyncio.set_event_loop(self.loop) # Sample paper IDs for testing self.sample_paper_id = "649def34f8be52c8b66281af98ae884c09aef38b" self.sample_paper_ids = [ self.sample_paper_id, "ARXIV:2106.15928" ] def tearDown(self): """Clean up after tests""" self.loop.close() def run_async(self, coro): """Helper to run async functions in tests""" return self.loop.run_until_complete(coro) async def async_test_with_delay(self, endpoint: str, **kwargs): """Helper to run async tests with delay to handle rate limiting""" await asyncio.sleep(random.uniform(5, 8)) # Random initial delay max_retries = 5 base_delay = 8 for attempt in range(max_retries): result = await make_request(endpoint, **kwargs) if not isinstance(result, dict) or "error" not in result: return result if result["error"]["type"] == "rate_limit": delay = base_delay * (2 ** attempt) + random.uniform(0, 2) # Add jitter await asyncio.sleep(delay) continue else: return result return result # Return last result if all retries failed @classmethod def setUpClass(cls): """Set up class-level test environment""" # Add initial delay before any tests run asyncio.get_event_loop().run_until_complete(asyncio.sleep(10)) def test_paper_relevance_search(self): """Test paper relevance search functionality""" # Test basic search result = self.run_async(self.async_test_with_delay( "paper/search", # Remove leading slash params={ "query": "quantum computing", "fields": "title,abstract,year" } )) self.assertNotIn("error", result) self.assertIn("data", result) self.assertIn("total", result) # Test with filters result = self.run_async(self.async_test_with_delay( "paper/search", params={ "query": "machine learning", "fields": "title,year", "minCitationCount": 100, "year": "2020-2023" } )) self.assertNotIn("error", result) self.assertIn("data", result) def test_paper_bulk_search(self): """Test paper bulk search functionality""" result = self.run_async(self.async_test_with_delay( "paper/search/bulk", # Remove leading slash params={ "query": "neural networks", "fields": "title,year,authors", "sort": "citationCount:desc" } )) self.assertNotIn("error", result) self.assertIn("data", result) def test_paper_details(self): """Test paper details functionality""" result = self.run_async(self.async_test_with_delay( f"paper/{self.sample_paper_id}", # Remove leading slash params={ "fields": "title,abstract,year,authors" } )) self.assertNotIn("error", result) self.assertIn("paperId", result) self.assertIn("title", result) def test_paper_batch_details(self): """Test batch paper details functionality""" result = self.run_async(self.async_test_with_delay( "paper/batch", # Remove leading slash method="POST", params={"fields": "title,year,authors"}, json={"ids": self.sample_paper_ids} )) self.assertNotIn("error", result) self.assertTrue(isinstance(result, list)) self.assertEqual(len(result), len(self.sample_paper_ids)) if __name__ == '__main__': unittest.main() ``` -------------------------------------------------------------------------------- /semantic_scholar/utils/http.py: -------------------------------------------------------------------------------- ```python """ HTTP client utilities for the Semantic Scholar API Server. """ import os import logging import httpx import asyncio import time from typing import Dict, Optional, Tuple, Any from ..config import Config, ErrorType, RateLimitConfig from .errors import create_error_response logger = logging.getLogger(__name__) # Global HTTP client for connection pooling http_client = None class RateLimiter: """ Rate limiter for API requests to prevent exceeding API limits. """ def __init__(self): self._last_call_time = {} self._locks = {} def _get_rate_limit(self, endpoint: str) -> Tuple[int, int]: """Get the appropriate rate limit for an endpoint.""" if any(restricted in endpoint for restricted in RateLimitConfig.RESTRICTED_ENDPOINTS): if "batch" in endpoint: return RateLimitConfig.BATCH_LIMIT if "search" in endpoint: return RateLimitConfig.SEARCH_LIMIT return RateLimitConfig.DEFAULT_LIMIT return RateLimitConfig.DEFAULT_LIMIT async def acquire(self, endpoint: str): """ Acquire permission to make a request, waiting if necessary to respect rate limits. Args: endpoint: The API endpoint being accessed. """ if endpoint not in self._locks: self._locks[endpoint] = asyncio.Lock() self._last_call_time[endpoint] = 0 async with self._locks[endpoint]: rate_limit = self._get_rate_limit(endpoint) current_time = time.time() time_since_last_call = current_time - self._last_call_time[endpoint] if time_since_last_call < rate_limit[1]: delay = rate_limit[1] - time_since_last_call await asyncio.sleep(delay) self._last_call_time[endpoint] = time.time() # Create global rate limiter instance rate_limiter = RateLimiter() def get_api_key() -> Optional[str]: """ Get the Semantic Scholar API key from environment variables. Returns None if no API key is set, enabling unauthenticated access. """ api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY") if not api_key: logger.warning("No SEMANTIC_SCHOLAR_API_KEY set. Using unauthenticated access with lower rate limits.") return api_key async def initialize_client(): """Initialize the global HTTP client.""" global http_client if http_client is None: http_client = httpx.AsyncClient( timeout=Config.TIMEOUT, limits=httpx.Limits(max_keepalive_connections=10) ) return http_client async def cleanup_client(): """Clean up the global HTTP client.""" global http_client if http_client is not None: await http_client.aclose() http_client = None async def make_request(endpoint: str, params: Dict = None) -> Dict: """ Make a rate-limited request to the Semantic Scholar API. Args: endpoint: The API endpoint to call. params: Optional query parameters. Returns: The JSON response or an error response dictionary. """ try: # Apply rate limiting await rate_limiter.acquire(endpoint) # Get API key if available api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} url = f"{Config.BASE_URL}{endpoint}" # Use global client client = await initialize_client() response = await client.get(url, params=params, headers=headers) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: logger.error(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded. Consider using an API key for higher limits.", { "retry_after": e.response.headers.get("retry-after"), "authenticated": bool(get_api_key()) } ) return create_error_response( ErrorType.API_ERROR, f"HTTP error: {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException as e: logger.error(f"Request timeout for {endpoint}: {str(e)}") return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: logger.error(f"Unexpected error for {endpoint}: {str(e)}") return create_error_response( ErrorType.API_ERROR, str(e) ) ``` -------------------------------------------------------------------------------- /semantic_scholar/config.py: -------------------------------------------------------------------------------- ```python """ Configuration for the Semantic Scholar API Server. """ from dataclasses import dataclass from enum import Enum from typing import Dict, List, Tuple, Any # Rate Limiting Configuration @dataclass class RateLimitConfig: # Define rate limits (requests, seconds) SEARCH_LIMIT = (1, 1) # 1 request per 1 second BATCH_LIMIT = (1, 1) # 1 request per 1 second DEFAULT_LIMIT = (10, 1) # 10 requests per 1 second # Endpoints categorization # These endpoints have stricter rate limits due to their computational intensity # and to prevent abuse of the recommendation system RESTRICTED_ENDPOINTS = [ "/paper/batch", # Batch operations are expensive "/paper/search", # Search operations are computationally intensive "/recommendations" # Recommendation generation is resource-intensive ] # Error Types class ErrorType(Enum): RATE_LIMIT = "rate_limit" API_ERROR = "api_error" VALIDATION = "validation" TIMEOUT = "timeout" # Field Constants class PaperFields: DEFAULT = ["title", "abstract", "year", "citationCount", "authors", "url"] DETAILED = DEFAULT + ["references", "citations", "venue", "influentialCitationCount"] MINIMAL = ["title", "year", "authors"] SEARCH = ["paperId", "title", "year", "citationCount"] # Valid fields from API documentation VALID_FIELDS = { "abstract", "authors", "citationCount", "citations", "corpusId", "embedding", "externalIds", "fieldsOfStudy", "influentialCitationCount", "isOpenAccess", "openAccessPdf", "paperId", "publicationDate", "publicationTypes", "publicationVenue", "references", "s2FieldsOfStudy", "title", "tldr", "url", "venue", "year" } class AuthorDetailFields: """Common field combinations for author details""" # Basic author information BASIC = ["name", "url", "affiliations"] # Author's papers information PAPERS_BASIC = ["papers"] # Returns paperId and title PAPERS_DETAILED = [ "papers.year", "papers.authors", "papers.abstract", "papers.venue", "papers.url" ] # Complete author profile COMPLETE = BASIC + ["papers", "papers.year", "papers.authors", "papers.venue"] # Citation metrics METRICS = ["citationCount", "hIndex", "paperCount"] # Valid fields for author details VALID_FIELDS = { "authorId", "name", "url", "affiliations", "papers", "papers.year", "papers.authors", "papers.abstract", "papers.venue", "papers.url", "citationCount", "hIndex", "paperCount" } class PaperDetailFields: """Common field combinations for paper details""" # Basic paper information BASIC = ["title", "abstract", "year", "venue"] # Author information AUTHOR_BASIC = ["authors"] AUTHOR_DETAILED = ["authors.url", "authors.paperCount", "authors.citationCount"] # Citation information CITATION_BASIC = ["citations", "references"] CITATION_DETAILED = ["citations.title", "citations.abstract", "citations.year", "references.title", "references.abstract", "references.year"] # Full paper details COMPLETE = BASIC + AUTHOR_BASIC + CITATION_BASIC + ["url", "fieldsOfStudy", "publicationVenue", "publicationTypes"] class CitationReferenceFields: """Common field combinations for citation and reference queries""" # Basic information BASIC = ["title"] # Citation/Reference context CONTEXT = ["contexts", "intents", "isInfluential"] # Paper details DETAILED = ["title", "abstract", "authors", "year", "venue"] # Full information COMPLETE = CONTEXT + DETAILED # Valid fields for citation/reference queries VALID_FIELDS = { "contexts", "intents", "isInfluential", "title", "abstract", "authors", "year", "venue", "paperId", "url", "citationCount", "influentialCitationCount" } # Configuration class Config: # API Configuration API_VERSION = "v1" BASE_URL = f"https://api.semanticscholar.org/graph/{API_VERSION}" TIMEOUT = 30 # seconds # Request Limits MAX_BATCH_SIZE = 100 MAX_RESULTS_PER_PAGE = 100 DEFAULT_PAGE_SIZE = 10 MAX_BATCHES = 5 # Fields Configuration DEFAULT_FIELDS = PaperFields.DEFAULT # Feature Flags ENABLE_CACHING = False DEBUG_MODE = False # Search Configuration SEARCH_TYPES = { "comprehensive": { "description": "Balanced search considering relevance and impact", "min_citations": None, "ranking_strategy": "balanced" }, "influential": { "description": "Focus on highly-cited and influential papers", "min_citations": 50, "ranking_strategy": "citations" }, "latest": { "description": "Focus on recent papers with impact", "min_citations": None, "ranking_strategy": "recency" } } ``` -------------------------------------------------------------------------------- /semantic_scholar/api/recommendations.py: -------------------------------------------------------------------------------- ```python """ Recommendation-related API endpoints for the Semantic Scholar API. """ from typing import Dict, List, Optional from fastmcp import Context import httpx # Import mcp from centralized location instead of server from ..mcp import mcp from ..config import Config, ErrorType from ..utils.http import rate_limiter, get_api_key from ..utils.errors import create_error_response @mcp.tool() async def get_paper_recommendations_single( context: Context, paper_id: str, fields: Optional[str] = None, limit: int = 100, from_pool: str = "recent" ) -> Dict: """ Get paper recommendations based on a single seed paper. This endpoint is optimized for finding papers similar to a specific paper. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - CorpusId:<id> (e.g., "CorpusId:215416146") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") fields (Optional[str]): Comma-separated list of fields to return for each paper. paperId is always returned. limit (int): Maximum number of recommendations to return. Default: 100 Maximum: 500 from_pool (str): Which pool of papers to recommend from. Options: - "recent": Recent papers (default) - "all-cs": All computer science papers Default: "recent" Returns: Dict: { "recommendedPapers": List[Dict] # List of recommended papers with requested fields } """ try: # Apply rate limiting endpoint = "/recommendations" await rate_limiter.acquire(endpoint) # Validate limit if limit > 500: return create_error_response( ErrorType.VALIDATION, "Cannot request more than 500 recommendations", {"max_limit": 500, "requested": limit} ) # Validate pool if from_pool not in ["recent", "all-cs"]: return create_error_response( ErrorType.VALIDATION, "Invalid paper pool specified", {"valid_pools": ["recent", "all-cs"]} ) # Build request parameters params = { "limit": limit, "from": from_pool } if fields: params["fields"] = fields # Make the API request async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} url = f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}" response = await client.get(url, params=params, headers=headers) # Handle specific error cases if response.status_code == 404: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded. Consider using an API key for higher limits.", { "retry_after": e.response.headers.get("retry-after"), "authenticated": bool(get_api_key()) } ) return create_error_response( ErrorType.API_ERROR, f"HTTP error {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException: return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: import logging logger = logging.getLogger(__name__) logger.error(f"Unexpected error in recommendations: {str(e)}") return create_error_response( ErrorType.API_ERROR, "Failed to get recommendations", {"error": str(e)} ) @mcp.tool() async def get_paper_recommendations_multi( context: Context, positive_paper_ids: List[str], negative_paper_ids: Optional[List[str]] = None, fields: Optional[str] = None, limit: int = 100 ) -> Dict: """ Get paper recommendations based on multiple positive and optional negative examples. This endpoint is optimized for finding papers similar to a set of papers while avoiding papers similar to the negative examples. Args: positive_paper_ids (List[str]): List of paper IDs to use as positive examples. Papers similar to these will be recommended. Each ID can be in any of these formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - CorpusId:<id> (e.g., "CorpusId:215416146") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") negative_paper_ids (Optional[List[str]]): List of paper IDs to use as negative examples. Papers similar to these will be avoided in recommendations. Uses same ID formats as positive_paper_ids. fields (Optional[str]): Comma-separated list of fields to return for each paper. paperId is always returned. limit (int): Maximum number of recommendations to return. Default: 100 Maximum: 500 Returns: Dict: { "recommendedPapers": List[Dict] # List of recommended papers with requested fields } """ try: # Apply rate limiting endpoint = "/recommendations" await rate_limiter.acquire(endpoint) # Validate inputs if not positive_paper_ids: return create_error_response( ErrorType.VALIDATION, "Must provide at least one positive paper ID" ) if limit > 500: return create_error_response( ErrorType.VALIDATION, "Cannot request more than 500 recommendations", {"max_limit": 500, "requested": limit} ) # Build request parameters params = {"limit": limit} if fields: params["fields"] = fields request_body = { "positivePaperIds": positive_paper_ids, "negativePaperIds": negative_paper_ids or [] } # Make the API request async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} url = "https://api.semanticscholar.org/recommendations/v1/papers" response = await client.post(url, params=params, json=request_body, headers=headers) # Handle specific error cases if response.status_code == 404: return create_error_response( ErrorType.VALIDATION, "One or more input papers not found", { "positive_ids": positive_paper_ids, "negative_ids": negative_paper_ids } ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded. Consider using an API key for higher limits.", { "retry_after": e.response.headers.get("retry-after"), "authenticated": bool(get_api_key()) } ) return create_error_response( ErrorType.API_ERROR, f"HTTP error {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException: return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: import logging logger = logging.getLogger(__name__) logger.error(f"Unexpected error in recommendations: {str(e)}") return create_error_response( ErrorType.API_ERROR, "Failed to get recommendations", {"error": str(e)} ) ``` -------------------------------------------------------------------------------- /semantic_scholar/api/authors.py: -------------------------------------------------------------------------------- ```python """ Author-related API endpoints for the Semantic Scholar API. """ from typing import Dict, List, Optional from fastmcp import Context # Import mcp from centralized location instead of server from ..mcp import mcp from ..config import AuthorDetailFields, ErrorType from ..utils.http import make_request from ..utils.errors import create_error_response @mcp.tool() async def author_search( context: Context, query: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Search for authors by name on Semantic Scholar. This endpoint is optimized for finding authors based on their name. Results are sorted by relevance to the query. Args: query (str): The name text to search for. The query will be matched against author names and their known aliases. fields (Optional[List[str]]): List of fields to return for each author. authorId is always returned. offset (int): Number of authors to skip for pagination. Default: 0 limit (int): Maximum number of authors to return. Default: 100 Maximum: 1000 Returns: Dict: { "total": int, # Total number of authors matching the query "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of authors with requested fields } """ if not query.strip(): return create_error_response( ErrorType.VALIDATION, "Query string cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Validate fields if fields: invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} ) # Build request parameters params = { "query": query, "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request return await make_request("/author/search", params) @mcp.tool() async def author_details( context: Context, author_id: str, fields: Optional[List[str]] = None ) -> Dict: """ Get detailed information about an author by their ID. This endpoint provides comprehensive metadata about an author. Args: author_id (str): Semantic Scholar author ID. This is a unique identifier assigned by Semantic Scholar. Example: "1741101" (Albert Einstein) fields (Optional[List[str]]): List of fields to return. authorId is always returned. Available fields include name, papers, citationCount, etc. Returns: Dict: Author details with requested fields. Always includes authorId. Returns error response if author not found. """ if not author_id.strip(): return create_error_response( ErrorType.VALIDATION, "Author ID cannot be empty" ) # Validate fields if fields: invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} ) # Build request parameters params = {} if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/author/{author_id}", params) if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Author not found", {"author_id": author_id} ) return result return result @mcp.tool() async def author_papers( context: Context, author_id: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Get papers written by an author with pagination support. This endpoint provides detailed information about an author's publications. Args: author_id (str): Semantic Scholar author ID. This is a unique identifier assigned by Semantic Scholar. Example: "1741101" (Albert Einstein) fields (Optional[List[str]]): List of fields to return for each paper. paperId is always returned. offset (int): Number of papers to skip for pagination. Default: 0 limit (int): Maximum number of papers to return. Default: 100 Maximum: 1000 Returns: Dict: { "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of papers with requested fields } """ if not author_id.strip(): return create_error_response( ErrorType.VALIDATION, "Author ID cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Build request parameters params = { "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/author/{author_id}/papers", params) if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Author not found", {"author_id": author_id} ) return result return result @mcp.tool() async def author_batch_details( context: Context, author_ids: List[str], fields: Optional[str] = None ) -> Dict: """ Get details for multiple authors in a single batch request. This endpoint is optimized for efficiently retrieving details about known authors. Args: author_ids (List[str]): List of Semantic Scholar author IDs. These are unique identifiers assigned by Semantic Scholar. Example: ["1741101", "1741102"] Maximum: 1000 IDs per request fields (Optional[str]): Comma-separated list of fields to return for each author. authorId is always returned. Returns: List[Dict]: List of author details with requested fields. - Results maintain the same order as input author_ids - Invalid or not found author IDs return null in the results - Each author object contains the requested fields - authorId is always included in each author object """ # Validate inputs if not author_ids: return create_error_response( ErrorType.VALIDATION, "Author IDs list cannot be empty" ) if len(author_ids) > 1000: return create_error_response( ErrorType.VALIDATION, "Cannot process more than 1000 author IDs at once", {"max_authors": 1000, "received": len(author_ids)} ) # Validate fields if provided if fields: field_list = fields.split(",") invalid_fields = set(field_list) - AuthorDetailFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} ) # Build request parameters params = {} if fields: params["fields"] = fields # Make POST request with proper structure try: import httpx from ..config import Config async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: from ..utils.http import get_api_key api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} response = await client.post( f"{Config.BASE_URL}/author/batch", params=params, json={"ids": author_ids}, headers=headers ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded", {"retry_after": e.response.headers.get("retry-after")} ) return create_error_response( ErrorType.API_ERROR, f"HTTP error: {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException: return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: return create_error_response( ErrorType.API_ERROR, str(e) ) ``` -------------------------------------------------------------------------------- /semantic_scholar/api/papers.py: -------------------------------------------------------------------------------- ```python """ Paper-related API endpoints for the Semantic Scholar API. """ from typing import Dict, List, Optional from fastmcp import Context import httpx # Import mcp from centralized location instead of server from ..mcp import mcp from ..config import PaperFields, CitationReferenceFields, AuthorDetailFields, Config, ErrorType from ..utils.http import make_request, get_api_key from ..utils.errors import create_error_response @mcp.tool() async def paper_relevance_search( context: Context, query: str, fields: Optional[List[str]] = None, publication_types: Optional[List[str]] = None, open_access_pdf: bool = False, min_citation_count: Optional[int] = None, year: Optional[str] = None, venue: Optional[List[str]] = None, fields_of_study: Optional[List[str]] = None, offset: int = 0, limit: int = 10 ) -> Dict: """ Search for papers on Semantic Scholar using relevance-based ranking. This endpoint is optimized for finding the most relevant papers matching a text query. Results are sorted by relevance score. Args: query (str): A text query to search for. The query will be matched against paper titles, abstracts, venue names, and author names. fields (Optional[List[str]]): List of fields to return for each paper. paperId and title are always returned. publication_types (Optional[List[str]]): Filter by publication types. open_access_pdf (bool): If True, only include papers with a public PDF. Default: False min_citation_count (Optional[int]): Minimum number of citations required. year (Optional[str]): Filter by publication year. Supports several formats: - Single year: "2019" - Year range: "2016-2020" - Since year: "2010-" - Until year: "-2015" venue (Optional[List[str]]): Filter by publication venues. Accepts full venue names or ISO4 abbreviations. fields_of_study (Optional[List[str]]): Filter by fields of study. offset (int): Number of results to skip for pagination. Default: 0 limit (int): Maximum number of results to return. Default: 10 Maximum: 100 Returns: Dict: { "total": int, # Total number of papers matching the query "offset": int, # Current offset in the results "next": int, # Offset for the next page of results (if available) "data": List[Dict] # List of papers with requested fields } """ if not query.strip(): return create_error_response( ErrorType.VALIDATION, "Query string cannot be empty" ) # Validate and prepare fields if fields is None: fields = PaperFields.DEFAULT else: invalid_fields = set(fields) - PaperFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(PaperFields.VALID_FIELDS)} ) # Validate and prepare parameters limit = min(limit, 100) params = { "query": query, "offset": offset, "limit": limit, "fields": ",".join(fields) } # Add optional filters if publication_types: params["publicationTypes"] = ",".join(publication_types) if open_access_pdf: params["openAccessPdf"] = "true" if min_citation_count is not None: params["minCitationCount"] = min_citation_count if year: params["year"] = year if venue: params["venue"] = ",".join(venue) if fields_of_study: params["fieldsOfStudy"] = ",".join(fields_of_study) return await make_request("/paper/search", params) @mcp.tool() async def paper_bulk_search( context: Context, query: Optional[str] = None, token: Optional[str] = None, fields: Optional[List[str]] = None, sort: Optional[str] = None, publication_types: Optional[List[str]] = None, open_access_pdf: bool = False, min_citation_count: Optional[int] = None, publication_date_or_year: Optional[str] = None, year: Optional[str] = None, venue: Optional[List[str]] = None, fields_of_study: Optional[List[str]] = None ) -> Dict: """ Bulk search for papers with advanced filtering and sorting options. Intended for retrieving large sets of papers efficiently. Args: query (Optional[str]): Text query to match against paper title and abstract. Supports boolean logic with +, |, -, ", *, (), and ~N. token (Optional[str]): Continuation token for pagination fields (Optional[List[str]]): Fields to return for each paper paperId is always returned Default: paperId and title only sort (Optional[str]): Sort order in format 'field:order' Fields: paperId, publicationDate, citationCount Order: asc (default), desc Default: 'paperId:asc' publication_types (Optional[List[str]]): Filter by publication types open_access_pdf (bool): Only include papers with public PDF min_citation_count (Optional[int]): Minimum citation threshold publication_date_or_year (Optional[str]): Date/year range filter Format: <startDate>:<endDate> in YYYY-MM-DD year (Optional[str]): Publication year filter Examples: '2019', '2016-2020', '2010-', '-2015' venue (Optional[List[str]]): Filter by publication venues fields_of_study (Optional[List[str]]): Filter by fields of study Returns: Dict: { 'total': int, # Total matching papers 'token': str, # Continuation token for next batch 'data': List[Dict] # Papers with requested fields } """ # Build request parameters params = {} # Add query if provided if query: params["query"] = query.strip() # Add continuation token if provided if token: params["token"] = token # Add fields if provided if fields: # Validate fields invalid_fields = set(fields) - PaperFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(PaperFields.VALID_FIELDS)} ) params["fields"] = ",".join(fields) # Add sort if provided if sort: # Validate sort format valid_sort_fields = ["paperId", "publicationDate", "citationCount"] valid_sort_orders = ["asc", "desc"] try: field, order = sort.split(":") if field not in valid_sort_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields)}" ) if order not in valid_sort_orders: return create_error_response( ErrorType.VALIDATION, f"Invalid sort order. Must be one of: {', '.join(valid_sort_orders)}" ) params["sort"] = sort except ValueError: return create_error_response( ErrorType.VALIDATION, "Sort must be in format 'field:order'" ) # Add publication types if provided if publication_types: valid_types = { "Review", "JournalArticle", "CaseReport", "ClinicalTrial", "Conference", "Dataset", "Editorial", "LettersAndComments", "MetaAnalysis", "News", "Study", "Book", "BookSection" } invalid_types = set(publication_types) - valid_types if invalid_types: return create_error_response( ErrorType.VALIDATION, f"Invalid publication types: {', '.join(invalid_types)}", {"valid_types": list(valid_types)} ) params["publicationTypes"] = ",".join(publication_types) # Add open access PDF filter if open_access_pdf: params["openAccessPdf"] = "true" # Add minimum citation count if provided if min_citation_count is not None: if min_citation_count < 0: return create_error_response( ErrorType.VALIDATION, "Minimum citation count cannot be negative" ) params["minCitationCount"] = str(min_citation_count) # Add publication date/year if provided if publication_date_or_year: params["publicationDateOrYear"] = publication_date_or_year elif year: params["year"] = year # Add venue filter if provided if venue: params["venue"] = ",".join(venue) # Add fields of study filter if provided if fields_of_study: valid_fields = { "Computer Science", "Medicine", "Chemistry", "Biology", "Materials Science", "Physics", "Geology", "Psychology", "Art", "History", "Geography", "Sociology", "Business", "Political Science", "Economics", "Philosophy", "Mathematics", "Engineering", "Environmental Science", "Agricultural and Food Sciences", "Education", "Law", "Linguistics" } invalid_fields = set(fields_of_study) - valid_fields if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields of study: {', '.join(invalid_fields)}", {"valid_fields": list(valid_fields)} ) params["fieldsOfStudy"] = ",".join(fields_of_study) # Make the API request result = await make_request("/paper/search/bulk", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: return result return result @mcp.tool() async def paper_title_search( context: Context, query: str, fields: Optional[List[str]] = None, publication_types: Optional[List[str]] = None, open_access_pdf: bool = False, min_citation_count: Optional[int] = None, year: Optional[str] = None, venue: Optional[List[str]] = None, fields_of_study: Optional[List[str]] = None ) -> Dict: """ Find a single paper by title match. This endpoint is optimized for finding a specific paper by its title and returns the best matching paper based on title similarity. Args: query (str): The title text to search for. The query will be matched against paper titles to find the closest match. fields (Optional[List[str]]): List of fields to return for the paper. paperId and title are always returned. publication_types (Optional[List[str]]): Filter by publication types. open_access_pdf (bool): If True, only include papers with a public PDF. Default: False min_citation_count (Optional[int]): Minimum number of citations required. year (Optional[str]): Filter by publication year. Supports several formats: - Single year: "2019" - Year range: "2016-2020" - Since year: "2010-" - Until year: "-2015" venue (Optional[List[str]]): Filter by publication venues. Accepts full venue names or ISO4 abbreviations. fields_of_study (Optional[List[str]]): Filter by fields of study. Returns: Dict: { "paperId": str, # Semantic Scholar Paper ID "title": str, # Paper title "matchScore": float, # Similarity score between query and matched title ... # Additional requested fields } Returns error response if no matching paper is found. """ if not query.strip(): return create_error_response( ErrorType.VALIDATION, "Query string cannot be empty" ) # Validate and prepare fields if fields is None: fields = PaperFields.DEFAULT else: invalid_fields = set(fields) - PaperFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(PaperFields.VALID_FIELDS)} ) # Build base parameters params = {"query": query} # Add optional parameters if fields: params["fields"] = ",".join(fields) if publication_types: params["publicationTypes"] = ",".join(publication_types) if open_access_pdf: params["openAccessPdf"] = "true" if min_citation_count is not None: params["minCitationCount"] = str(min_citation_count) if year: params["year"] = year if venue: params["venue"] = ",".join(venue) if fields_of_study: params["fieldsOfStudy"] = ",".join(fields_of_study) result = await make_request("/paper/search/match", params) # Handle specific error cases if isinstance(result, Dict): if "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "No matching paper found", {"original_query": query} ) return result return result @mcp.tool() async def paper_details( context: Context, paper_id: str, fields: Optional[List[str]] = None ) -> Dict: """ Get details about a paper using various types of identifiers. This endpoint provides comprehensive metadata about a paper. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") fields (Optional[List[str]]): List of fields to return. paperId is always returned. Returns: Dict: Paper details with requested fields. Always includes paperId. Returns error response if paper not found. """ if not paper_id.strip(): return create_error_response( ErrorType.VALIDATION, "Paper ID cannot be empty" ) # Build request parameters params = {} if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/paper/{paper_id}", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) return result return result @mcp.tool() async def paper_batch_details( context: Context, paper_ids: List[str], fields: Optional[str] = None ) -> Dict: """ Get details for multiple papers in a single batch request. This endpoint is optimized for efficiently retrieving details about known papers. Args: paper_ids (List[str]): List of paper identifiers. Each ID can be in any of these formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") Maximum: 500 IDs per request fields (Optional[str]): Comma-separated list of fields to return for each paper. paperId is always returned. Returns: List[Dict]: List of paper details with requested fields. - Results maintain the same order as input paper_ids - Invalid or not found paper IDs return null in the results - Each paper object contains the requested fields - paperId is always included in each paper object """ # Validate inputs if not paper_ids: return create_error_response( ErrorType.VALIDATION, "Paper IDs list cannot be empty" ) if len(paper_ids) > 500: return create_error_response( ErrorType.VALIDATION, "Cannot process more than 500 paper IDs at once", {"max_papers": 500, "received": len(paper_ids)} ) # Validate fields if provided if fields: field_list = fields.split(",") invalid_fields = set(field_list) - PaperFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(PaperFields.VALID_FIELDS)} ) # Build request parameters params = {} if fields: params["fields"] = fields # Make POST request with proper structure try: async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} response = await client.post( f"{Config.BASE_URL}/paper/batch", params=params, json={"ids": paper_ids}, headers=headers ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded", {"retry_after": e.response.headers.get("retry-after")} ) return create_error_response( ErrorType.API_ERROR, f"HTTP error: {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException: return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: return create_error_response( ErrorType.API_ERROR, str(e) ) @mcp.tool() async def paper_authors( context: Context, paper_id: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Get details about the authors of a paper with pagination support. This endpoint provides author information and their contributions. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") fields (Optional[List[str]]): List of fields to return for each author. authorId is always returned. offset (int): Number of authors to skip for pagination. Default: 0 limit (int): Maximum number of authors to return. Default: 100 Maximum: 1000 Returns: Dict: { "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of authors with requested fields } """ if not paper_id.strip(): return create_error_response( ErrorType.VALIDATION, "Paper ID cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Validate fields if fields: invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} ) # Build request parameters params = { "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/paper/{paper_id}/authors", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) return result return result @mcp.tool() async def paper_citations( context: Context, paper_id: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Get papers that cite the specified paper (papers where this paper appears in their bibliography). This endpoint provides detailed citation information including citation contexts. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") fields (Optional[List[str]]): List of fields to return for each citing paper. paperId is always returned. offset (int): Number of citations to skip for pagination. Default: 0 limit (int): Maximum number of citations to return. Default: 100 Maximum: 1000 Returns: Dict: { "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of citing papers with requested fields } """ if not paper_id.strip(): return create_error_response( ErrorType.VALIDATION, "Paper ID cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Validate fields if fields: invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)} ) # Build request parameters params = { "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/paper/{paper_id}/citations", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) return result return result @mcp.tool() async def paper_references( context: Context, paper_id: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Get papers cited by the specified paper (papers appearing in this paper's bibliography). This endpoint provides detailed reference information including citation contexts. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") fields (Optional[List[str]]): List of fields to return for each referenced paper. paperId is always returned. offset (int): Number of references to skip for pagination. Default: 0 limit (int): Maximum number of references to return. Default: 100 Maximum: 1000 Returns: Dict: { "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of referenced papers with requested fields } """ if not paper_id.strip(): return create_error_response( ErrorType.VALIDATION, "Paper ID cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Validate fields if fields: invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)} ) # Build request parameters params = { "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/paper/{paper_id}/references", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) return result return result ``` -------------------------------------------------------------------------------- /semantic_scholar_server.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 from fastmcp import FastMCP, Context import httpx import logging import os from typing import Dict, List, Optional, Tuple, Any from datetime import datetime from enum import Enum import asyncio import time import signal from dataclasses import dataclass logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Global HTTP client for connection pooling http_client = None # Rate Limiting Configuration @dataclass class RateLimitConfig: # Define rate limits (requests, seconds) SEARCH_LIMIT = (1, 1) # 1 request per 1 second BATCH_LIMIT = (1, 1) # 1 request per 1 second DEFAULT_LIMIT = (10, 1) # 10 requests per 1 second # Endpoints categorization # These endpoints have stricter rate limits due to their computational intensity # and to prevent abuse of the recommendation system RESTRICTED_ENDPOINTS = [ "/paper/batch", # Batch operations are expensive "/paper/search", # Search operations are computationally intensive "/recommendations" # Recommendation generation is resource-intensive ] # Error Types class ErrorType(Enum): RATE_LIMIT = "rate_limit" API_ERROR = "api_error" VALIDATION = "validation" TIMEOUT = "timeout" # Field Constants class PaperFields: DEFAULT = ["title", "abstract", "year", "citationCount", "authors", "url"] DETAILED = DEFAULT + ["references", "citations", "venue", "influentialCitationCount"] MINIMAL = ["title", "year", "authors"] SEARCH = ["paperId", "title", "year", "citationCount"] # Valid fields from API documentation VALID_FIELDS = { "abstract", "authors", "citationCount", "citations", "corpusId", "embedding", "externalIds", "fieldsOfStudy", "influentialCitationCount", "isOpenAccess", "openAccessPdf", "paperId", "publicationDate", "publicationTypes", "publicationVenue", "references", "s2FieldsOfStudy", "title", "tldr", "url", "venue", "year" } class AuthorDetailFields: """Common field combinations for author details""" # Basic author information BASIC = ["name", "url", "affiliations"] # Author's papers information PAPERS_BASIC = ["papers"] # Returns paperId and title PAPERS_DETAILED = [ "papers.year", "papers.authors", "papers.abstract", "papers.venue", "papers.url" ] # Complete author profile COMPLETE = BASIC + ["papers", "papers.year", "papers.authors", "papers.venue"] # Citation metrics METRICS = ["citationCount", "hIndex", "paperCount"] # Valid fields for author details VALID_FIELDS = { "authorId", "name", "url", "affiliations", "papers", "papers.year", "papers.authors", "papers.abstract", "papers.venue", "papers.url", "citationCount", "hIndex", "paperCount" } class PaperDetailFields: """Common field combinations for paper details""" # Basic paper information BASIC = ["title", "abstract", "year", "venue"] # Author information AUTHOR_BASIC = ["authors"] AUTHOR_DETAILED = ["authors.url", "authors.paperCount", "authors.citationCount"] # Citation information CITATION_BASIC = ["citations", "references"] CITATION_DETAILED = ["citations.title", "citations.abstract", "citations.year", "references.title", "references.abstract", "references.year"] # Full paper details COMPLETE = BASIC + AUTHOR_BASIC + CITATION_BASIC + ["url", "fieldsOfStudy", "publicationVenue", "publicationTypes"] class CitationReferenceFields: """Common field combinations for citation and reference queries""" # Basic information BASIC = ["title"] # Citation/Reference context CONTEXT = ["contexts", "intents", "isInfluential"] # Paper details DETAILED = ["title", "abstract", "authors", "year", "venue"] # Full information COMPLETE = CONTEXT + DETAILED # Valid fields for citation/reference queries VALID_FIELDS = { "contexts", "intents", "isInfluential", "title", "abstract", "authors", "year", "venue", "paperId", "url", "citationCount", "influentialCitationCount" } # Configuration class Config: # API Configuration API_VERSION = "v1" BASE_URL = f"https://api.semanticscholar.org/graph/{API_VERSION}" TIMEOUT = 30 # seconds # Request Limits MAX_BATCH_SIZE = 100 MAX_RESULTS_PER_PAGE = 100 DEFAULT_PAGE_SIZE = 10 MAX_BATCHES = 5 # Fields Configuration DEFAULT_FIELDS = PaperFields.DEFAULT # Feature Flags ENABLE_CACHING = False DEBUG_MODE = False # Search Configuration SEARCH_TYPES = { "comprehensive": { "description": "Balanced search considering relevance and impact", "min_citations": None, "ranking_strategy": "balanced" }, "influential": { "description": "Focus on highly-cited and influential papers", "min_citations": 50, "ranking_strategy": "citations" }, "latest": { "description": "Focus on recent papers with impact", "min_citations": None, "ranking_strategy": "recency" } } # Rate Limiter class RateLimiter: def __init__(self): self._last_call_time = {} self._locks = {} def _get_rate_limit(self, endpoint: str) -> Tuple[int, int]: if any(restricted in endpoint for restricted in RateLimitConfig.RESTRICTED_ENDPOINTS): return RateLimitConfig.SEARCH_LIMIT return RateLimitConfig.DEFAULT_LIMIT async def acquire(self, endpoint: str): if endpoint not in self._locks: self._locks[endpoint] = asyncio.Lock() self._last_call_time[endpoint] = 0 async with self._locks[endpoint]: rate_limit = self._get_rate_limit(endpoint) current_time = time.time() time_since_last_call = current_time - self._last_call_time[endpoint] if time_since_last_call < rate_limit[1]: delay = rate_limit[1] - time_since_last_call await asyncio.sleep(delay) self._last_call_time[endpoint] = time.time() def create_error_response( error_type: ErrorType, message: str, details: Optional[Dict] = None ) -> Dict: return { "error": { "type": error_type.value, "message": message, "details": details or {} } } mcp = FastMCP("Semantic Scholar Server") rate_limiter = RateLimiter() # Basic functions def get_api_key() -> Optional[str]: """ Get the Semantic Scholar API key from environment variables. Returns None if no API key is set, enabling unauthenticated access. """ api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY") if not api_key: logger.warning("No SEMANTIC_SCHOLAR_API_KEY set. Using unauthenticated access with lower rate limits.") return api_key async def handle_exception(loop, context): """Global exception handler for the event loop.""" msg = context.get("exception", context["message"]) logger.error(f"Caught exception: {msg}") asyncio.create_task(shutdown()) async def initialize_client(): """Initialize the global HTTP client.""" global http_client if http_client is None: http_client = httpx.AsyncClient( timeout=Config.TIMEOUT, limits=httpx.Limits(max_keepalive_connections=10) ) return http_client async def cleanup_client(): """Cleanup the global HTTP client.""" global http_client if http_client is not None: await http_client.aclose() http_client = None async def make_request(endpoint: str, params: Dict = None) -> Dict: """Make a rate-limited request to the Semantic Scholar API.""" try: # Apply rate limiting await rate_limiter.acquire(endpoint) # Get API key if available api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} url = f"{Config.BASE_URL}{endpoint}" # Use global client client = await initialize_client() response = await client.get(url, params=params, headers=headers) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: logger.error(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded. Consider using an API key for higher limits.", { "retry_after": e.response.headers.get("retry-after"), "authenticated": bool(get_api_key()) } ) return create_error_response( ErrorType.API_ERROR, f"HTTP error: {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException as e: logger.error(f"Request timeout for {endpoint}: {str(e)}") return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: logger.error(f"Unexpected error for {endpoint}: {str(e)}") return create_error_response( ErrorType.API_ERROR, str(e) ) # 1. Paper Data Tools # 1.1 Paper relevance search @mcp.tool() async def paper_relevance_search( context: Context, query: str, fields: Optional[List[str]] = None, publication_types: Optional[List[str]] = None, open_access_pdf: bool = False, min_citation_count: Optional[int] = None, year: Optional[str] = None, # supports formats like "2019", "2016-2020", "2010-", "-2015" venue: Optional[List[str]] = None, fields_of_study: Optional[List[str]] = None, offset: int = 0, limit: int = Config.DEFAULT_PAGE_SIZE ) -> Dict: """ Search for papers on Semantic Scholar using relevance-based ranking. This endpoint is optimized for finding the most relevant papers matching a text query. Results are sorted by relevance score. Args: query (str): A text query to search for. The query will be matched against paper titles, abstracts, venue names, and author names. All terms in the query must be present in the paper for it to be returned. The query is case-insensitive and matches word prefixes (e.g. "quantum" matches "quantum" and "quantumly"). fields (Optional[List[str]]): List of fields to return for each paper. paperId and title are always returned. Available fields: - abstract: The paper's abstract - authors: List of authors with name and authorId - citationCount: Total number of citations - citations: List of papers citing this paper - corpusId: Internal ID for the paper - embedding: Vector embedding of the paper - externalIds: External IDs (DOI, MAG, etc) - fieldsOfStudy: List of fields of study - influentialCitationCount: Number of influential citations - isOpenAccess: Whether paper is open access - openAccessPdf: Open access PDF URL if available - paperId: Semantic Scholar paper ID - publicationDate: Publication date in YYYY-MM-DD format - publicationTypes: List of publication types - publicationVenue: Venue information - references: List of papers cited by this paper - s2FieldsOfStudy: Semantic Scholar fields - title: Paper title - tldr: AI-generated TLDR summary - url: URL to Semantic Scholar paper page - venue: Publication venue name - year: Publication year publication_types (Optional[List[str]]): Filter by publication types. Available types: - Review - JournalArticle - CaseReport - ClinicalTrial - Conference - Dataset - Editorial - LettersAndComments - MetaAnalysis - News - Study - Book - BookSection open_access_pdf (bool): If True, only include papers with a public PDF. Default: False min_citation_count (Optional[int]): Minimum number of citations required. Papers with fewer citations will be filtered out. year (Optional[str]): Filter by publication year. Supports several formats: - Single year: "2019" - Year range: "2016-2020" - Since year: "2010-" - Until year: "-2015" venue (Optional[List[str]]): Filter by publication venues. Accepts full venue names or ISO4 abbreviations. Examples: ["Nature", "Science", "N. Engl. J. Med."] fields_of_study (Optional[List[str]]): Filter by fields of study. Available fields: - Computer Science - Medicine - Chemistry - Biology - Materials Science - Physics - Geology - Psychology - Art - History - Geography - Sociology - Business - Political Science - Economics - Philosophy - Mathematics - Engineering - Environmental Science - Agricultural and Food Sciences - Education - Law - Linguistics offset (int): Number of results to skip for pagination. Default: 0 limit (int): Maximum number of results to return. Default: 10 Maximum: 100 Returns: Dict: { "total": int, # Total number of papers matching the query "offset": int, # Current offset in the results "next": int, # Offset for the next page of results (if available) "data": List[Dict] # List of papers with requested fields } Notes: - Results are sorted by relevance to the query - All query terms must be present in the paper (AND operation) - Query matches are case-insensitive - Query matches word prefixes (e.g., "quantum" matches "quantum" and "quantumly") - Maximum of 100 results per request - Use offset parameter for pagination - Rate limits apply (see API documentation) """ if not query.strip(): return create_error_response( ErrorType.VALIDATION, "Query string cannot be empty" ) # Validate and prepare fields if fields is None: fields = PaperFields.DEFAULT else: invalid_fields = set(fields) - PaperFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(PaperFields.VALID_FIELDS)} ) # Validate and prepare parameters limit = min(limit, Config.MAX_RESULTS_PER_PAGE) params = { "query": query, "offset": offset, "limit": limit, "fields": ",".join(fields) } # Add optional filters if publication_types: params["publicationTypes"] = ",".join(publication_types) if open_access_pdf: params["openAccessPdf"] = "true" if min_citation_count is not None: params["minCitationCount"] = min_citation_count if year: params["year"] = year if venue: params["venue"] = ",".join(venue) if fields_of_study: params["fieldsOfStudy"] = ",".join(fields_of_study) return await make_request("/paper/search", params) # 1.2 Paper bulk search @mcp.tool() async def paper_bulk_search( context: Context, query: Optional[str] = None, token: Optional[str] = None, fields: Optional[List[str]] = None, sort: Optional[str] = None, publication_types: Optional[List[str]] = None, open_access_pdf: bool = False, min_citation_count: Optional[int] = None, publication_date_or_year: Optional[str] = None, year: Optional[str] = None, venue: Optional[List[str]] = None, fields_of_study: Optional[List[str]] = None ) -> Dict: """ Bulk search for papers with advanced filtering and sorting options. Intended for retrieving large sets of papers efficiently. Args: query (Optional[str]): Text query to match against paper title and abstract. Supports boolean logic: - '+' for AND operation - '|' for OR operation - '-' to negate a term - '"' for phrase matching - '*' for prefix matching - '()' for precedence - '~N' for edit distance (default 2) Examples: - 'fish ladder' (contains both terms) - 'fish -ladder' (has fish, no ladder) - 'fish | ladder' (either term) - '"fish ladder"' (exact phrase) - '(fish ladder) | outflow' - 'fish~' (fuzzy match) - '"fish ladder"~3' (terms within 3 words) token (Optional[str]): Continuation token for pagination fields (Optional[List[str]]): Fields to return for each paper paperId is always returned Default: paperId and title only sort (Optional[str]): Sort order in format 'field:order' Fields: paperId, publicationDate, citationCount Order: asc (default), desc Default: 'paperId:asc' Examples: - 'publicationDate:asc' (oldest first) - 'citationCount:desc' (most cited first) publication_types (Optional[List[str]]): Filter by publication types: Review, JournalArticle, CaseReport, ClinicalTrial, Conference, Dataset, Editorial, LettersAndComments, MetaAnalysis, News, Study, Book, BookSection open_access_pdf (bool): Only include papers with public PDF min_citation_count (Optional[int]): Minimum citation threshold publication_date_or_year (Optional[str]): Date/year range filter Format: <startDate>:<endDate> in YYYY-MM-DD Supports partial dates and open ranges Examples: - '2019-03-05' (specific date) - '2019-03' (month) - '2019' (year) - '2016-03-05:2020-06-06' (range) - '1981-08-25:' (since date) - ':2015-01' (until date) year (Optional[str]): Publication year filter Examples: '2019', '2016-2020', '2010-', '-2015' venue (Optional[List[str]]): Filter by publication venues Accepts full names or ISO4 abbreviations Examples: ['Nature', 'N. Engl. J. Med.'] fields_of_study (Optional[List[str]]): Filter by fields of study Available fields include: Computer Science, Medicine, Physics, Mathematics, etc. Returns: Dict: { 'total': int, # Total matching papers 'token': str, # Continuation token for next batch 'data': List[Dict] # Papers with requested fields } Notes: - Returns up to 1,000 papers per call - Can fetch up to 10M papers total - Nested data (citations, references) not available - For larger datasets, use the Datasets API """ # Build request parameters params = {} # Add query if provided if query: params["query"] = query.strip() # Add continuation token if provided if token: params["token"] = token # Add fields if provided if fields: # Validate fields invalid_fields = set(fields) - PaperFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(PaperFields.VALID_FIELDS)} ) params["fields"] = ",".join(fields) # Add sort if provided if sort: # Validate sort format valid_sort_fields = ["paperId", "publicationDate", "citationCount"] valid_sort_orders = ["asc", "desc"] try: field, order = sort.split(":") if field not in valid_sort_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields)}" ) if order not in valid_sort_orders: return create_error_response( ErrorType.VALIDATION, f"Invalid sort order. Must be one of: {', '.join(valid_sort_orders)}" ) params["sort"] = sort except ValueError: return create_error_response( ErrorType.VALIDATION, "Sort must be in format 'field:order'" ) # Add publication types if provided if publication_types: valid_types = { "Review", "JournalArticle", "CaseReport", "ClinicalTrial", "Conference", "Dataset", "Editorial", "LettersAndComments", "MetaAnalysis", "News", "Study", "Book", "BookSection" } invalid_types = set(publication_types) - valid_types if invalid_types: return create_error_response( ErrorType.VALIDATION, f"Invalid publication types: {', '.join(invalid_types)}", {"valid_types": list(valid_types)} ) params["publicationTypes"] = ",".join(publication_types) # Add open access PDF filter if open_access_pdf: params["openAccessPdf"] = "true" # Add minimum citation count if provided if min_citation_count is not None: if min_citation_count < 0: return create_error_response( ErrorType.VALIDATION, "Minimum citation count cannot be negative" ) params["minCitationCount"] = str(min_citation_count) # Add publication date/year if provided if publication_date_or_year: params["publicationDateOrYear"] = publication_date_or_year elif year: params["year"] = year # Add venue filter if provided if venue: params["venue"] = ",".join(venue) # Add fields of study filter if provided if fields_of_study: valid_fields = { "Computer Science", "Medicine", "Chemistry", "Biology", "Materials Science", "Physics", "Geology", "Psychology", "Art", "History", "Geography", "Sociology", "Business", "Political Science", "Economics", "Philosophy", "Mathematics", "Engineering", "Environmental Science", "Agricultural and Food Sciences", "Education", "Law", "Linguistics" } invalid_fields = set(fields_of_study) - valid_fields if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields of study: {', '.join(invalid_fields)}", {"valid_fields": list(valid_fields)} ) params["fieldsOfStudy"] = ",".join(fields_of_study) # Make the API request result = await make_request("/paper/search/bulk", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: return result return result # 1.3 Paper title search @mcp.tool() async def paper_title_search( context: Context, query: str, fields: Optional[List[str]] = None, publication_types: Optional[List[str]] = None, open_access_pdf: bool = False, min_citation_count: Optional[int] = None, year: Optional[str] = None, venue: Optional[List[str]] = None, fields_of_study: Optional[List[str]] = None ) -> Dict: """ Find a single paper by title match. This endpoint is optimized for finding a specific paper by its title and returns the best matching paper based on title similarity. Args: query (str): The title text to search for. The query will be matched against paper titles to find the closest match. The match is case-insensitive and ignores punctuation. fields (Optional[List[str]]): List of fields to return for the paper. paperId and title are always returned. Available fields: - abstract: The paper's abstract - authors: List of authors with name and authorId - citationCount: Total number of citations - citations: List of papers citing this paper - corpusId: Internal ID for the paper - embedding: Vector embedding of the paper - externalIds: External IDs (DOI, MAG, etc) - fieldsOfStudy: List of fields of study - influentialCitationCount: Number of influential citations - isOpenAccess: Whether paper is open access - openAccessPdf: Open access PDF URL if available - paperId: Semantic Scholar paper ID - publicationDate: Publication date in YYYY-MM-DD format - publicationTypes: List of publication types - publicationVenue: Venue information - references: List of papers cited by this paper - s2FieldsOfStudy: Semantic Scholar fields - title: Paper title - tldr: AI-generated TLDR summary - url: URL to Semantic Scholar paper page - venue: Publication venue name - year: Publication year publication_types (Optional[List[str]]): Filter by publication types. Available types: - Review - JournalArticle - CaseReport - ClinicalTrial - Conference - Dataset - Editorial - LettersAndComments - MetaAnalysis - News - Study - Book - BookSection open_access_pdf (bool): If True, only include papers with a public PDF. Default: False min_citation_count (Optional[int]): Minimum number of citations required. Papers with fewer citations will be filtered out. year (Optional[str]): Filter by publication year. Supports several formats: - Single year: "2019" - Year range: "2016-2020" - Since year: "2010-" - Until year: "-2015" venue (Optional[List[str]]): Filter by publication venues. Accepts full venue names or ISO4 abbreviations. Examples: ["Nature", "Science", "N. Engl. J. Med."] fields_of_study (Optional[List[str]]): Filter by fields of study. Available fields: - Computer Science - Medicine - Chemistry - Biology - Materials Science - Physics - Geology - Psychology - Art - History - Geography - Sociology - Business - Political Science - Economics - Philosophy - Mathematics - Engineering - Environmental Science - Agricultural and Food Sciences - Education - Law - Linguistics Returns: Dict: { "paperId": str, # Semantic Scholar Paper ID "title": str, # Paper title "matchScore": float, # Similarity score between query and matched title ... # Additional requested fields } Returns error response if no matching paper is found. Notes: - Returns the single best matching paper based on title similarity - Match score indicates how well the title matches the query - Case-insensitive matching - Ignores punctuation in matching - Filters are applied after finding the best title match """ if not query.strip(): return create_error_response( ErrorType.VALIDATION, "Query string cannot be empty" ) # Validate and prepare fields if fields is None: fields = PaperFields.DEFAULT else: invalid_fields = set(fields) - PaperFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(PaperFields.VALID_FIELDS)} ) # Build base parameters params = {"query": query} # Add optional parameters if fields: params["fields"] = ",".join(fields) if publication_types: params["publicationTypes"] = ",".join(publication_types) if open_access_pdf: params["openAccessPdf"] = "true" if min_citation_count is not None: params["minCitationCount"] = str(min_citation_count) if year: params["year"] = year if venue: params["venue"] = ",".join(venue) if fields_of_study: params["fieldsOfStudy"] = ",".join(fields_of_study) result = await make_request("/paper/search/match", params) # Handle specific error cases if isinstance(result, Dict): if "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "No matching paper found", {"original_query": query} ) return result return result # 1.4 Details about a paper @mcp.tool() async def paper_details( context: Context, paper_id: str, fields: Optional[List[str]] = None ) -> Dict: """ Get details about a paper using various types of identifiers. This endpoint provides comprehensive metadata about a paper. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - CorpusId:<id> (e.g., "CorpusId:215416146") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") Supported URLs from: semanticscholar.org, arxiv.org, aclweb.org, acm.org, biorxiv.org fields (Optional[List[str]]): List of fields to return. paperId is always returned. Available fields: - abstract: The paper's abstract - authors: List of authors with name and authorId - citationCount: Total number of citations - citations: List of papers citing this paper - corpusId: Internal ID for the paper - embedding: Vector embedding of the paper - externalIds: External IDs (DOI, MAG, etc) - fieldsOfStudy: List of fields of study - influentialCitationCount: Number of influential citations - isOpenAccess: Whether paper is open access - openAccessPdf: Open access PDF URL if available - paperId: Semantic Scholar paper ID - publicationDate: Publication date in YYYY-MM-DD format - publicationTypes: List of publication types - publicationVenue: Venue information - references: List of papers cited by this paper - s2FieldsOfStudy: Semantic Scholar fields - title: Paper title - tldr: AI-generated TLDR summary - url: URL to Semantic Scholar paper page - venue: Publication venue name - year: Publication year Special syntax for nested fields: - For citations/references: citations.title, references.abstract, etc. - For authors: authors.name, authors.affiliations, etc. - For embeddings: embedding.specter_v2 for v2 embeddings If omitted, returns only paperId and title. Returns: Dict: Paper details with requested fields. Always includes paperId. Returns error response if paper not found. Notes: - Supports multiple identifier types for flexibility - Nested fields available for detailed citation/reference/author data - Rate limits apply (see API documentation) - Some fields may be null if data is not available """ if not paper_id.strip(): return create_error_response( ErrorType.VALIDATION, "Paper ID cannot be empty" ) # Build request parameters params = {} if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/paper/{paper_id}", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) return result return result # 1.5 Get details for multiple papers at once @mcp.tool() async def paper_batch_details( context: Context, paper_ids: List[str], fields: Optional[str] = None ) -> Dict: """ Get details for multiple papers in a single batch request. This endpoint is optimized for efficiently retrieving details about known papers. Args: paper_ids (List[str]): List of paper identifiers. Each ID can be in any of these formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - CorpusId:<id> (e.g., "CorpusId:215416146") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") Supported URLs from: semanticscholar.org, arxiv.org, aclweb.org, acm.org, biorxiv.org Maximum: 500 IDs per request fields (Optional[str]): Comma-separated list of fields to return for each paper. paperId is always returned. Available fields: - abstract: The paper's abstract - authors: List of authors with name and authorId - citationCount: Total number of citations - citations: List of papers citing this paper - corpusId: Internal ID for the paper - embedding: Vector embedding of the paper - externalIds: External IDs (DOI, MAG, etc) - fieldsOfStudy: List of fields of study - influentialCitationCount: Number of influential citations - isOpenAccess: Whether paper is open access - openAccessPdf: Open access PDF URL if available - paperId: Semantic Scholar paper ID - publicationDate: Publication date in YYYY-MM-DD format - publicationTypes: List of publication types - publicationVenue: Venue information - references: List of papers cited by this paper - s2FieldsOfStudy: Semantic Scholar fields - title: Paper title - tldr: AI-generated TLDR summary - url: URL to Semantic Scholar paper page - venue: Publication venue name - year: Publication year Special syntax for nested fields: - For citations/references: citations.title, references.abstract, etc. - For authors: authors.name, authors.affiliations, etc. - For embeddings: embedding.specter_v2 for v2 embeddings If omitted, returns only paperId and title. Returns: List[Dict]: List of paper details with requested fields. - Results maintain the same order as input paper_ids - Invalid or not found paper IDs return null in the results - Each paper object contains the requested fields - paperId is always included in each paper object Notes: - More efficient than making multiple single-paper requests - Maximum of 500 paper IDs per request - Rate limits apply (see API documentation) - Some fields may be null if data is not available - Invalid paper IDs return null instead of causing an error - Order of results matches order of input IDs for easy mapping """ # Validate inputs if not paper_ids: return create_error_response( ErrorType.VALIDATION, "Paper IDs list cannot be empty" ) if len(paper_ids) > 500: return create_error_response( ErrorType.VALIDATION, "Cannot process more than 500 paper IDs at once", {"max_papers": 500, "received": len(paper_ids)} ) # Validate fields if provided if fields: field_list = fields.split(",") invalid_fields = set(field_list) - PaperFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(PaperFields.VALID_FIELDS)} ) # Build request parameters params = {} if fields: params["fields"] = fields # Make POST request with proper structure try: async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} response = await client.post( f"{Config.BASE_URL}/paper/batch", params=params, json={"ids": paper_ids}, headers=headers ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded", {"retry_after": e.response.headers.get("retry-after")} ) return create_error_response( ErrorType.API_ERROR, f"HTTP error: {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException: return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: return create_error_response( ErrorType.API_ERROR, str(e) ) # 1.6 Details about a paper's authors @mcp.tool() async def paper_authors( context: Context, paper_id: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Get details about the authors of a paper with pagination support. This endpoint provides author information and their contributions. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - CorpusId:<id> (e.g., "CorpusId:215416146") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") fields (Optional[List[str]]): List of fields to return for each author. authorId is always returned. Available fields: - name: Author's name - aliases: Alternative names for the author - affiliations: List of author's affiliations - homepage: Author's homepage URL - paperCount: Total number of papers by this author - citationCount: Total citations received by this author - hIndex: Author's h-index - papers: List of papers by this author (returns paperId and title) Special syntax for paper fields: - papers.year: Include year for each paper - papers.authors: Include authors for each paper - papers.abstract: Include abstract for each paper - papers.venue: Include venue for each paper - papers.citations: Include citation count for each paper If omitted, returns only authorId and name. offset (int): Number of authors to skip for pagination. Default: 0 limit (int): Maximum number of authors to return. Default: 100 Maximum: 1000 Returns: Dict: { "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of authors with requested fields } Notes: - Authors are returned in the order they appear on the paper - Supports pagination for papers with many authors - Some fields may be null if data is not available - Rate limits apply (see API documentation) """ if not paper_id.strip(): return create_error_response( ErrorType.VALIDATION, "Paper ID cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Validate fields if fields: invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} ) # Build request parameters params = { "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/paper/{paper_id}/authors", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) return result return result # 1.7 Details about a paper's citations @mcp.tool() async def paper_citations( context: Context, paper_id: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Get papers that cite the specified paper (papers where this paper appears in their bibliography). This endpoint provides detailed citation information including citation contexts. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - CorpusId:<id> (e.g., "CorpusId:215416146") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") fields (Optional[List[str]]): List of fields to return for each citing paper. paperId is always returned. Available fields: - title: Paper title - abstract: Paper abstract - year: Publication year - venue: Publication venue - authors: List of authors - url: URL to paper page - citationCount: Number of citations received - influentialCitationCount: Number of influential citations Citation-specific fields: - contexts: List of citation contexts (text snippets) - intents: List of citation intents (Background, Method, etc.) - isInfluential: Whether this is an influential citation If omitted, returns only paperId and title. offset (int): Number of citations to skip for pagination. Default: 0 limit (int): Maximum number of citations to return. Default: 100 Maximum: 1000 Returns: Dict: { "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of citing papers with requested fields } Notes: - Citations are sorted by citation date (newest first) - Includes citation context when available - Supports pagination for highly-cited papers - Some fields may be null if data is not available - Rate limits apply (see API documentation) """ if not paper_id.strip(): return create_error_response( ErrorType.VALIDATION, "Paper ID cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Validate fields if fields: invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)} ) # Build request parameters params = { "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/paper/{paper_id}/citations", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) return result return result # 1.8 Details about a paper's references @mcp.tool() async def paper_references( context: Context, paper_id: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Get papers cited by the specified paper (papers appearing in this paper's bibliography). This endpoint provides detailed reference information including citation contexts. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - CorpusId:<id> (e.g., "CorpusId:215416146") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") fields (Optional[List[str]]): List of fields to return for each referenced paper. paperId is always returned. Available fields: - title: Paper title - abstract: Paper abstract - year: Publication year - venue: Publication venue - authors: List of authors - url: URL to paper page - citationCount: Number of citations received - influentialCitationCount: Number of influential citations Reference-specific fields: - contexts: List of citation contexts (text snippets) - intents: List of citation intents (Background, Method, etc.) - isInfluential: Whether this is an influential citation If omitted, returns only paperId and title. offset (int): Number of references to skip for pagination. Default: 0 limit (int): Maximum number of references to return. Default: 100 Maximum: 1000 Returns: Dict: { "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of referenced papers with requested fields } Notes: - References are returned in the order they appear in the bibliography - Includes citation context when available - Supports pagination for papers with many references - Some fields may be null if data is not available - Rate limits apply (see API documentation) """ if not paper_id.strip(): return create_error_response( ErrorType.VALIDATION, "Paper ID cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Validate fields if fields: invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)} ) # Build request parameters params = { "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/paper/{paper_id}/references", params) # Handle potential errors if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) return result return result # 2. Author Data Tools # 2.1 Search for authors by name @mcp.tool() async def author_search( context: Context, query: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Search for authors by name on Semantic Scholar. This endpoint is optimized for finding authors based on their name. Results are sorted by relevance to the query. Args: query (str): The name text to search for. The query will be matched against author names and their known aliases. The match is case-insensitive and matches name prefixes. Examples: - "Albert Einstein" - "Einstein, Albert" - "A Einstein" fields (Optional[List[str]]): List of fields to return for each author. authorId is always returned. Available fields: - name: Author's name - aliases: Alternative names for the author - url: URL to author's S2 profile - affiliations: List of author's affiliations - homepage: Author's homepage URL - paperCount: Total number of papers by this author - citationCount: Total citations received by this author - hIndex: Author's h-index - papers: List of papers by this author (returns paperId and title) Special syntax for paper fields: - papers.year: Include year for each paper - papers.authors: Include authors for each paper - papers.abstract: Include abstract for each paper - papers.venue: Include venue for each paper - papers.citations: Include citation count for each paper If omitted, returns only authorId and name. offset (int): Number of authors to skip for pagination. Default: 0 limit (int): Maximum number of authors to return. Default: 100 Maximum: 1000 Returns: Dict: { "total": int, # Total number of authors matching the query "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of authors with requested fields } Notes: - Results are sorted by relevance to the query - Matches against author names and aliases - Case-insensitive matching - Matches name prefixes - Supports pagination for large result sets - Some fields may be null if data is not available - Rate limits apply (see API documentation) """ if not query.strip(): return create_error_response( ErrorType.VALIDATION, "Query string cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Validate fields if fields: invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} ) # Build request parameters params = { "query": query, "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request return await make_request("/author/search", params) # 2.2 Details about an author @mcp.tool() async def author_details( context: Context, author_id: str, fields: Optional[List[str]] = None ) -> Dict: """ Get detailed information about an author by their ID. This endpoint provides comprehensive metadata about an author. Args: author_id (str): Semantic Scholar author ID. This is a unique identifier assigned by Semantic Scholar. Example: "1741101" (Albert Einstein) fields (Optional[List[str]]): List of fields to return. authorId is always returned. Available fields: - name: Author's name - aliases: Alternative names for the author - url: URL to author's S2 profile - affiliations: List of author's affiliations - homepage: Author's homepage URL - paperCount: Total number of papers by this author - citationCount: Total citations received by this author - hIndex: Author's h-index - papers: List of papers by this author (returns paperId and title) Special syntax for paper fields: - papers.year: Include year for each paper - papers.authors: Include authors for each paper - papers.abstract: Include abstract for each paper - papers.venue: Include venue for each paper - papers.citations: Include citation count for each paper If omitted, returns only authorId and name. Returns: Dict: Author details with requested fields. Always includes authorId. Returns error response if author not found. Notes: - Provides comprehensive author metadata - Papers list is limited to most recent papers - For complete paper list, use author_papers endpoint - Some fields may be null if data is not available - Rate limits apply (see API documentation) """ if not author_id.strip(): return create_error_response( ErrorType.VALIDATION, "Author ID cannot be empty" ) # Validate fields if fields: invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} ) # Build request parameters params = {} if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/author/{author_id}", params) if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Author not found", {"author_id": author_id} ) return result return result # 2.3 Details about an author's papers @mcp.tool() async def author_papers( context: Context, author_id: str, fields: Optional[List[str]] = None, offset: int = 0, limit: int = 100 ) -> Dict: """ Get papers written by an author with pagination support. This endpoint provides detailed information about an author's publications. Args: author_id (str): Semantic Scholar author ID. This is a unique identifier assigned by Semantic Scholar. Example: "1741101" (Albert Einstein) fields (Optional[List[str]]): List of fields to return for each paper. paperId is always returned. Available fields: - title: Paper title - abstract: Paper abstract - year: Publication year - venue: Publication venue - authors: List of authors - url: URL to paper page - citationCount: Number of citations received - influentialCitationCount: Number of influential citations - isOpenAccess: Whether paper is open access - openAccessPdf: Open access PDF URL if available - fieldsOfStudy: List of fields of study - s2FieldsOfStudy: Semantic Scholar fields - publicationTypes: List of publication types - publicationDate: Publication date in YYYY-MM-DD format - journal: Journal information - externalIds: External IDs (DOI, MAG, etc) If omitted, returns only paperId and title. offset (int): Number of papers to skip for pagination. Default: 0 limit (int): Maximum number of papers to return. Default: 100 Maximum: 1000 Returns: Dict: { "offset": int, # Current offset in the results "next": int, # Next offset (if more results available) "data": List[Dict] # List of papers with requested fields } Notes: - Papers are sorted by publication date (newest first) - Supports pagination for authors with many papers - Some fields may be null if data is not available - Rate limits apply (see API documentation) """ if not author_id.strip(): return create_error_response( ErrorType.VALIDATION, "Author ID cannot be empty" ) # Validate limit if limit > 1000: return create_error_response( ErrorType.VALIDATION, "Limit cannot exceed 1000", {"max_limit": 1000} ) # Validate fields if fields: invalid_fields = set(fields) - PaperFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(PaperFields.VALID_FIELDS)} ) # Build request parameters params = { "offset": offset, "limit": limit } if fields: params["fields"] = ",".join(fields) # Make the API request result = await make_request(f"/author/{author_id}/papers", params) if isinstance(result, Dict) and "error" in result: error_msg = result["error"].get("message", "") if "404" in error_msg: return create_error_response( ErrorType.VALIDATION, "Author not found", {"author_id": author_id} ) return result return result # 2.4 Get details for multiple authors at once @mcp.tool() async def author_batch_details( context: Context, author_ids: List[str], fields: Optional[str] = None ) -> Dict: """ Get details for multiple authors in a single batch request. This endpoint is optimized for efficiently retrieving details about known authors. Args: author_ids (List[str]): List of Semantic Scholar author IDs. These are unique identifiers assigned by Semantic Scholar. Example: ["1741101", "1741102"] Maximum: 1000 IDs per request fields (Optional[str]): Comma-separated list of fields to return for each author. authorId is always returned. Available fields: - name: Author's name - aliases: Alternative names for the author - url: URL to author's S2 profile - affiliations: List of author's affiliations - homepage: Author's homepage URL - paperCount: Total number of papers by this author - citationCount: Total citations received by this author - hIndex: Author's h-index - papers: List of papers by this author (returns paperId and title) Special syntax for paper fields: - papers.year: Include year for each paper - papers.authors: Include authors for each paper - papers.abstract: Include abstract for each paper - papers.venue: Include venue for each paper - papers.citations: Include citation count for each paper If omitted, returns only authorId and name. Returns: List[Dict]: List of author details with requested fields. - Results maintain the same order as input author_ids - Invalid or not found author IDs return null in the results - Each author object contains the requested fields - authorId is always included in each author object Notes: - More efficient than making multiple single-author requests - Maximum of 1000 author IDs per request - Rate limits apply (see API documentation) - Some fields may be null if data is not available - Invalid author IDs return null instead of causing an error - Order of results matches order of input IDs for easy mapping """ # Validate inputs if not author_ids: return create_error_response( ErrorType.VALIDATION, "Author IDs list cannot be empty" ) if len(author_ids) > 1000: return create_error_response( ErrorType.VALIDATION, "Cannot process more than 1000 author IDs at once", {"max_authors": 1000, "received": len(author_ids)} ) # Validate fields if provided if fields: field_list = fields.split(",") invalid_fields = set(field_list) - AuthorDetailFields.VALID_FIELDS if invalid_fields: return create_error_response( ErrorType.VALIDATION, f"Invalid fields: {', '.join(invalid_fields)}", {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} ) # Build request parameters params = {} if fields: params["fields"] = fields # Make POST request with proper structure try: async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} response = await client.post( f"{Config.BASE_URL}/author/batch", params=params, json={"ids": author_ids}, headers=headers ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded", {"retry_after": e.response.headers.get("retry-after")} ) return create_error_response( ErrorType.API_ERROR, f"HTTP error: {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException: return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: return create_error_response( ErrorType.API_ERROR, str(e) ) # 3. Paper Recommendation Tools # 3.1 Get recommendations based on a single paper @mcp.tool() async def get_paper_recommendations_single( context: Context, paper_id: str, fields: Optional[str] = None, limit: int = 100, from_pool: str = "recent" ) -> Dict: """ Get paper recommendations based on a single seed paper. This endpoint is optimized for finding papers similar to a specific paper. Args: paper_id (str): Paper identifier in one of the following formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - CorpusId:<id> (e.g., "CorpusId:215416146") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") fields (Optional[str]): Comma-separated list of fields to return for each paper. paperId is always returned. Available fields: - title: Paper title - abstract: Paper abstract - year: Publication year - venue: Publication venue - authors: List of authors - url: URL to paper page - citationCount: Number of citations received - influentialCitationCount: Number of influential citations - isOpenAccess: Whether paper is open access - openAccessPdf: Open access PDF URL if available - fieldsOfStudy: List of fields of study - publicationTypes: List of publication types - publicationDate: Publication date in YYYY-MM-DD format - journal: Journal information - externalIds: External IDs (DOI, MAG, etc) If omitted, returns only paperId and title. limit (int): Maximum number of recommendations to return. Default: 100 Maximum: 500 from_pool (str): Which pool of papers to recommend from. Options: - "recent": Recent papers (default) - "all-cs": All computer science papers Default: "recent" Returns: Dict: { "recommendedPapers": List[Dict] # List of recommended papers with requested fields } Notes: - Recommendations are based on content similarity and citation patterns - Results are sorted by relevance to the seed paper - "recent" pool focuses on papers from the last few years - "all-cs" pool includes older computer science papers - Rate limits apply (see API documentation) - Some fields may be null if data is not available """ try: # Apply rate limiting endpoint = "/recommendations" await rate_limiter.acquire(endpoint) # Validate limit if limit > 500: return create_error_response( ErrorType.VALIDATION, "Cannot request more than 500 recommendations", {"max_limit": 500, "requested": limit} ) # Validate pool if from_pool not in ["recent", "all-cs"]: return create_error_response( ErrorType.VALIDATION, "Invalid paper pool specified", {"valid_pools": ["recent", "all-cs"]} ) # Build request parameters params = { "limit": limit, "from": from_pool } if fields: params["fields"] = fields # Make the API request async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} url = f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}" response = await client.get(url, params=params, headers=headers) # Handle specific error cases if response.status_code == 404: return create_error_response( ErrorType.VALIDATION, "Paper not found", {"paper_id": paper_id} ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded. Consider using an API key for higher limits.", { "retry_after": e.response.headers.get("retry-after"), "authenticated": bool(get_api_key()) } ) return create_error_response( ErrorType.API_ERROR, f"HTTP error {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException: return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: logger.error(f"Unexpected error in recommendations: {str(e)}") return create_error_response( ErrorType.API_ERROR, "Failed to get recommendations", {"error": str(e)} ) # 3.2 Get recommendations based on multiple papers @mcp.tool() async def get_paper_recommendations_multi( context: Context, positive_paper_ids: List[str], negative_paper_ids: Optional[List[str]] = None, fields: Optional[str] = None, limit: int = 100 ) -> Dict: """ Get paper recommendations based on multiple positive and optional negative examples. This endpoint is optimized for finding papers similar to a set of papers while avoiding papers similar to the negative examples. Args: positive_paper_ids (List[str]): List of paper IDs to use as positive examples. Papers similar to these will be recommended. Each ID can be in any of these formats: - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") - CorpusId:<id> (e.g., "CorpusId:215416146") - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") - ARXIV:<id> (e.g., "ARXIV:2106.15928") - MAG:<id> (e.g., "MAG:112218234") - ACL:<id> (e.g., "ACL:W12-3903") - PMID:<id> (e.g., "PMID:19872477") - PMCID:<id> (e.g., "PMCID:2323736") - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") negative_paper_ids (Optional[List[str]]): List of paper IDs to use as negative examples. Papers similar to these will be avoided in recommendations. Uses same ID formats as positive_paper_ids. fields (Optional[str]): Comma-separated list of fields to return for each paper. paperId is always returned. Available fields: - title: Paper title - abstract: Paper abstract - year: Publication year - venue: Publication venue - authors: List of authors - url: URL to paper page - citationCount: Number of citations received - influentialCitationCount: Number of influential citations - isOpenAccess: Whether paper is open access - openAccessPdf: Open access PDF URL if available - fieldsOfStudy: List of fields of study - publicationTypes: List of publication types - publicationDate: Publication date in YYYY-MM-DD format - journal: Journal information - externalIds: External IDs (DOI, MAG, etc) If omitted, returns only paperId and title. limit (int): Maximum number of recommendations to return. Default: 100 Maximum: 500 Returns: Dict: { "recommendedPapers": List[Dict] # List of recommended papers with requested fields } Notes: - Recommendations balance similarity to positive examples and dissimilarity to negative examples - Results are sorted by relevance score - More positive examples can help focus recommendations - Negative examples help filter out unwanted topics/approaches - Rate limits apply (see API documentation) - Some fields may be null if data is not available """ try: # Apply rate limiting endpoint = "/recommendations" await rate_limiter.acquire(endpoint) # Validate inputs if not positive_paper_ids: return create_error_response( ErrorType.VALIDATION, "Must provide at least one positive paper ID" ) if limit > 500: return create_error_response( ErrorType.VALIDATION, "Cannot request more than 500 recommendations", {"max_limit": 500, "requested": limit} ) # Build request parameters params = {"limit": limit} if fields: params["fields"] = fields request_body = { "positivePaperIds": positive_paper_ids, "negativePaperIds": negative_paper_ids or [] } # Make the API request async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: api_key = get_api_key() headers = {"x-api-key": api_key} if api_key else {} url = "https://api.semanticscholar.org/recommendations/v1/papers" response = await client.post(url, params=params, json=request_body, headers=headers) # Handle specific error cases if response.status_code == 404: return create_error_response( ErrorType.VALIDATION, "One or more input papers not found", { "positive_ids": positive_paper_ids, "negative_ids": negative_paper_ids } ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: return create_error_response( ErrorType.RATE_LIMIT, "Rate limit exceeded. Consider using an API key for higher limits.", { "retry_after": e.response.headers.get("retry-after"), "authenticated": bool(get_api_key()) } ) return create_error_response( ErrorType.API_ERROR, f"HTTP error {e.response.status_code}", {"response": e.response.text} ) except httpx.TimeoutException: return create_error_response( ErrorType.TIMEOUT, f"Request timed out after {Config.TIMEOUT} seconds" ) except Exception as e: logger.error(f"Unexpected error in recommendations: {str(e)}") return create_error_response( ErrorType.API_ERROR, "Failed to get recommendations", {"error": str(e)} ) async def shutdown(): """Gracefully shut down the server.""" logger.info("Initiating graceful shutdown...") # Cancel all tasks tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] for task in tasks: task.cancel() try: await task except asyncio.CancelledError: pass # Cleanup resources await cleanup_client() await mcp.cleanup() logger.info(f"Cancelled {len(tasks)} tasks") logger.info("Shutdown complete") def init_signal_handlers(loop): """Initialize signal handlers for graceful shutdown.""" for sig in (signal.SIGTERM, signal.SIGINT): loop.add_signal_handler(sig, lambda: asyncio.create_task(shutdown())) logger.info("Signal handlers initialized") async def run_server(): """Run the server with proper async context management.""" async with mcp: try: # Initialize HTTP client await initialize_client() # Start the server logger.info("Starting Semantic Scholar Server") await mcp.run_async() except Exception as e: logger.error(f"Server error: {e}") raise finally: await shutdown() if __name__ == "__main__": try: # Set up event loop with exception handler loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.set_exception_handler(handle_exception) # Initialize signal handlers init_signal_handlers(loop) # Run the server loop.run_until_complete(run_server()) except KeyboardInterrupt: logger.info("Received keyboard interrupt, shutting down...") except Exception as e: logger.error(f"Fatal error: {str(e)}") finally: try: loop.run_until_complete(asyncio.sleep(0)) # Let pending tasks complete loop.close() except Exception as e: logger.error(f"Error during final cleanup: {str(e)}") logger.info("Server stopped") ```