#
tokens: 24869/50000 1/25 files (page 2/2)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 2 of 2. Use http://codebase.md/yuzongmin/semantic-scholar-fastmcp-mcp-server?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── REFACTORING.md
├── requirements.txt
├── run.py
├── semantic_scholar
│   ├── __init__.py
│   ├── api
│   │   ├── __init__.py
│   │   ├── authors.py
│   │   ├── papers.py
│   │   └── recommendations.py
│   ├── config.py
│   ├── mcp.py
│   ├── server.py
│   └── utils
│       ├── __init__.py
│       ├── errors.py
│       └── http.py
├── semantic_scholar_server.py
├── smithery.yaml
├── test
│   ├── __init__.py
│   ├── test_author.py
│   ├── test_paper.py
│   ├── test_recommend.py
│   └── test_utils.py
└── TOOLS.md
```

# Files

--------------------------------------------------------------------------------
/semantic_scholar_server.py:
--------------------------------------------------------------------------------

```python
   1 | #!/usr/bin/env python3
   2 | from fastmcp import FastMCP, Context
   3 | import httpx
   4 | import logging
   5 | import os
   6 | from typing import Dict, List, Optional, Tuple, Any
   7 | from datetime import datetime
   8 | from enum import Enum
   9 | import asyncio
  10 | import time
  11 | import signal
  12 | from dataclasses import dataclass
  13 | 
  14 | logging.basicConfig(level=logging.INFO)
  15 | logger = logging.getLogger(__name__)
  16 | 
  17 | # Global HTTP client for connection pooling
  18 | http_client = None
  19 | 
  20 | # Rate Limiting Configuration
  21 | @dataclass
  22 | class RateLimitConfig:
  23 |     # Define rate limits (requests, seconds)
  24 |     SEARCH_LIMIT = (1, 1)  # 1 request per 1 second
  25 |     BATCH_LIMIT = (1, 1)   # 1 request per 1 second
  26 |     DEFAULT_LIMIT = (10, 1)  # 10 requests per 1 second
  27 |     
  28 |     # Endpoints categorization
  29 |     # These endpoints have stricter rate limits due to their computational intensity
  30 |     # and to prevent abuse of the recommendation system
  31 |     RESTRICTED_ENDPOINTS = [
  32 |         "/paper/batch",     # Batch operations are expensive
  33 |         "/paper/search",    # Search operations are computationally intensive
  34 |         "/recommendations"  # Recommendation generation is resource-intensive
  35 |     ]
  36 | 
  37 | # Error Types
  38 | class ErrorType(Enum):
  39 |     RATE_LIMIT = "rate_limit"
  40 |     API_ERROR = "api_error"
  41 |     VALIDATION = "validation"
  42 |     TIMEOUT = "timeout"
  43 | 
  44 | # Field Constants
  45 | class PaperFields:
  46 |     DEFAULT = ["title", "abstract", "year", "citationCount", "authors", "url"]
  47 |     DETAILED = DEFAULT + ["references", "citations", "venue", "influentialCitationCount"]
  48 |     MINIMAL = ["title", "year", "authors"]
  49 |     SEARCH = ["paperId", "title", "year", "citationCount"]
  50 |     
  51 |     # Valid fields from API documentation
  52 |     VALID_FIELDS = {
  53 |         "abstract",
  54 |         "authors",
  55 |         "citationCount",
  56 |         "citations",
  57 |         "corpusId",
  58 |         "embedding",
  59 |         "externalIds",
  60 |         "fieldsOfStudy",
  61 |         "influentialCitationCount",
  62 |         "isOpenAccess",
  63 |         "openAccessPdf",
  64 |         "paperId",
  65 |         "publicationDate",
  66 |         "publicationTypes",
  67 |         "publicationVenue",
  68 |         "references",
  69 |         "s2FieldsOfStudy",
  70 |         "title",
  71 |         "tldr",
  72 |         "url",
  73 |         "venue",
  74 |         "year"
  75 |     }
  76 | 
  77 | class AuthorDetailFields:
  78 |     """Common field combinations for author details"""
  79 |     
  80 |     # Basic author information
  81 |     BASIC = ["name", "url", "affiliations"]
  82 |     
  83 |     # Author's papers information
  84 |     PAPERS_BASIC = ["papers"]  # Returns paperId and title
  85 |     PAPERS_DETAILED = [
  86 |         "papers.year",
  87 |         "papers.authors",
  88 |         "papers.abstract",
  89 |         "papers.venue",
  90 |         "papers.url"
  91 |     ]
  92 |     
  93 |     # Complete author profile
  94 |     COMPLETE = BASIC + ["papers", "papers.year", "papers.authors", "papers.venue"]
  95 |     
  96 |     # Citation metrics
  97 |     METRICS = ["citationCount", "hIndex", "paperCount"]
  98 | 
  99 |     # Valid fields for author details
 100 |     VALID_FIELDS = {
 101 |         "authorId",
 102 |         "name",
 103 |         "url",
 104 |         "affiliations",
 105 |         "papers",
 106 |         "papers.year",
 107 |         "papers.authors",
 108 |         "papers.abstract",
 109 |         "papers.venue",
 110 |         "papers.url",
 111 |         "citationCount",
 112 |         "hIndex",
 113 |         "paperCount"
 114 |     }
 115 | 
 116 | class PaperDetailFields:
 117 |     """Common field combinations for paper details"""
 118 |     
 119 |     # Basic paper information
 120 |     BASIC = ["title", "abstract", "year", "venue"]
 121 |     
 122 |     # Author information
 123 |     AUTHOR_BASIC = ["authors"]
 124 |     AUTHOR_DETAILED = ["authors.url", "authors.paperCount", "authors.citationCount"]
 125 |     
 126 |     # Citation information
 127 |     CITATION_BASIC = ["citations", "references"]
 128 |     CITATION_DETAILED = ["citations.title", "citations.abstract", "citations.year",
 129 |                         "references.title", "references.abstract", "references.year"]
 130 |     
 131 |     # Full paper details
 132 |     COMPLETE = BASIC + AUTHOR_BASIC + CITATION_BASIC + ["url", "fieldsOfStudy", 
 133 |                                                        "publicationVenue", "publicationTypes"]
 134 | 
 135 | class CitationReferenceFields:
 136 |     """Common field combinations for citation and reference queries"""
 137 |     
 138 |     # Basic information
 139 |     BASIC = ["title"]
 140 |     
 141 |     # Citation/Reference context
 142 |     CONTEXT = ["contexts", "intents", "isInfluential"]
 143 |     
 144 |     # Paper details
 145 |     DETAILED = ["title", "abstract", "authors", "year", "venue"]
 146 |     
 147 |     # Full information
 148 |     COMPLETE = CONTEXT + DETAILED
 149 | 
 150 |     # Valid fields for citation/reference queries
 151 |     VALID_FIELDS = {
 152 |         "contexts",
 153 |         "intents",
 154 |         "isInfluential",
 155 |         "title",
 156 |         "abstract",
 157 |         "authors",
 158 |         "year",
 159 |         "venue",
 160 |         "paperId",
 161 |         "url",
 162 |         "citationCount",
 163 |         "influentialCitationCount"
 164 |     }
 165 | 
 166 | # Configuration
 167 | class Config:
 168 |     # API Configuration
 169 |     API_VERSION = "v1"
 170 |     BASE_URL = f"https://api.semanticscholar.org/graph/{API_VERSION}"
 171 |     TIMEOUT = 30  # seconds
 172 |     
 173 |     # Request Limits
 174 |     MAX_BATCH_SIZE = 100
 175 |     MAX_RESULTS_PER_PAGE = 100
 176 |     DEFAULT_PAGE_SIZE = 10
 177 |     MAX_BATCHES = 5
 178 |     
 179 |     # Fields Configuration
 180 |     DEFAULT_FIELDS = PaperFields.DEFAULT
 181 |     
 182 |     # Feature Flags
 183 |     ENABLE_CACHING = False
 184 |     DEBUG_MODE = False
 185 |     
 186 |     # Search Configuration
 187 |     SEARCH_TYPES = {
 188 |         "comprehensive": {
 189 |             "description": "Balanced search considering relevance and impact",
 190 |             "min_citations": None,
 191 |             "ranking_strategy": "balanced"
 192 |         },
 193 |         "influential": {
 194 |             "description": "Focus on highly-cited and influential papers",
 195 |             "min_citations": 50,
 196 |             "ranking_strategy": "citations"
 197 |         },
 198 |         "latest": {
 199 |             "description": "Focus on recent papers with impact",
 200 |             "min_citations": None,
 201 |             "ranking_strategy": "recency"
 202 |         }
 203 |     }
 204 | 
 205 | # Rate Limiter
 206 | class RateLimiter:
 207 |     def __init__(self):
 208 |         self._last_call_time = {}
 209 |         self._locks = {}
 210 | 
 211 |     def _get_rate_limit(self, endpoint: str) -> Tuple[int, int]:
 212 |         if any(restricted in endpoint for restricted in RateLimitConfig.RESTRICTED_ENDPOINTS):
 213 |             return RateLimitConfig.SEARCH_LIMIT
 214 |         return RateLimitConfig.DEFAULT_LIMIT
 215 | 
 216 |     async def acquire(self, endpoint: str):
 217 |         if endpoint not in self._locks:
 218 |             self._locks[endpoint] = asyncio.Lock()
 219 |             self._last_call_time[endpoint] = 0
 220 | 
 221 |         async with self._locks[endpoint]:
 222 |             rate_limit = self._get_rate_limit(endpoint)
 223 |             current_time = time.time()
 224 |             time_since_last_call = current_time - self._last_call_time[endpoint]
 225 |             
 226 |             if time_since_last_call < rate_limit[1]:
 227 |                 delay = rate_limit[1] - time_since_last_call
 228 |                 await asyncio.sleep(delay)
 229 |             
 230 |             self._last_call_time[endpoint] = time.time()
 231 | 
 232 | def create_error_response(
 233 |     error_type: ErrorType,
 234 |     message: str,
 235 |     details: Optional[Dict] = None
 236 | ) -> Dict:
 237 |     return {
 238 |         "error": {
 239 |             "type": error_type.value,
 240 |             "message": message,
 241 |             "details": details or {}
 242 |         }
 243 |     }
 244 | 
 245 | mcp = FastMCP("Semantic Scholar Server")
 246 | rate_limiter = RateLimiter()
 247 | 
 248 | 
 249 | # Basic functions
 250 | 
 251 | def get_api_key() -> Optional[str]:
 252 |     """
 253 |     Get the Semantic Scholar API key from environment variables.
 254 |     Returns None if no API key is set, enabling unauthenticated access.
 255 |     """
 256 |     api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY")
 257 |     if not api_key:
 258 |         logger.warning("No SEMANTIC_SCHOLAR_API_KEY set. Using unauthenticated access with lower rate limits.")
 259 |     return api_key
 260 | 
 261 | async def handle_exception(loop, context):
 262 |     """Global exception handler for the event loop."""
 263 |     msg = context.get("exception", context["message"])
 264 |     logger.error(f"Caught exception: {msg}")
 265 |     asyncio.create_task(shutdown())
 266 | 
 267 | async def initialize_client():
 268 |     """Initialize the global HTTP client."""
 269 |     global http_client
 270 |     if http_client is None:
 271 |         http_client = httpx.AsyncClient(
 272 |             timeout=Config.TIMEOUT,
 273 |             limits=httpx.Limits(max_keepalive_connections=10)
 274 |         )
 275 |     return http_client
 276 | 
 277 | async def cleanup_client():
 278 |     """Cleanup the global HTTP client."""
 279 |     global http_client
 280 |     if http_client is not None:
 281 |         await http_client.aclose()
 282 |         http_client = None
 283 | 
 284 | async def make_request(endpoint: str, params: Dict = None) -> Dict:
 285 |     """Make a rate-limited request to the Semantic Scholar API."""
 286 |     try:
 287 |         # Apply rate limiting
 288 |         await rate_limiter.acquire(endpoint)
 289 | 
 290 |         # Get API key if available
 291 |         api_key = get_api_key()
 292 |         headers = {"x-api-key": api_key} if api_key else {}
 293 |         url = f"{Config.BASE_URL}{endpoint}"
 294 | 
 295 |         # Use global client
 296 |         client = await initialize_client()
 297 |         response = await client.get(url, params=params, headers=headers)
 298 |         response.raise_for_status()
 299 |         return response.json()
 300 |     except httpx.HTTPStatusError as e:
 301 |         logger.error(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}")
 302 |         if e.response.status_code == 429:
 303 |             return create_error_response(
 304 |                 ErrorType.RATE_LIMIT,
 305 |                 "Rate limit exceeded. Consider using an API key for higher limits.",
 306 |                 {
 307 |                     "retry_after": e.response.headers.get("retry-after"),
 308 |                     "authenticated": bool(get_api_key())
 309 |                 }
 310 |             )
 311 |         return create_error_response(
 312 |             ErrorType.API_ERROR,
 313 |             f"HTTP error: {e.response.status_code}",
 314 |             {"response": e.response.text}
 315 |         )
 316 |     except httpx.TimeoutException as e:
 317 |         logger.error(f"Request timeout for {endpoint}: {str(e)}")
 318 |         return create_error_response(
 319 |             ErrorType.TIMEOUT,
 320 |             f"Request timed out after {Config.TIMEOUT} seconds"
 321 |         )
 322 |     except Exception as e:
 323 |         logger.error(f"Unexpected error for {endpoint}: {str(e)}")
 324 |         return create_error_response(
 325 |             ErrorType.API_ERROR,
 326 |             str(e)
 327 |         )
 328 | 
 329 | 
 330 | 
 331 | 
 332 | # 1. Paper Data Tools
 333 | 
 334 | # 1.1 Paper relevance search
 335 | @mcp.tool()
 336 | async def paper_relevance_search(
 337 |     context: Context,
 338 |     query: str,
 339 |     fields: Optional[List[str]] = None,
 340 |     publication_types: Optional[List[str]] = None,
 341 |     open_access_pdf: bool = False,
 342 |     min_citation_count: Optional[int] = None,
 343 |     year: Optional[str] = None,  # supports formats like "2019", "2016-2020", "2010-", "-2015"
 344 |     venue: Optional[List[str]] = None,
 345 |     fields_of_study: Optional[List[str]] = None,
 346 |     offset: int = 0,
 347 |     limit: int = Config.DEFAULT_PAGE_SIZE
 348 | ) -> Dict:
 349 |     """
 350 |     Search for papers on Semantic Scholar using relevance-based ranking.
 351 |     This endpoint is optimized for finding the most relevant papers matching a text query.
 352 |     Results are sorted by relevance score.
 353 | 
 354 |     Args:
 355 |         query (str): A text query to search for. The query will be matched against paper titles,
 356 |             abstracts, venue names, and author names. All terms in the query must be present
 357 |             in the paper for it to be returned. The query is case-insensitive and matches word
 358 |             prefixes (e.g. "quantum" matches "quantum" and "quantumly").
 359 | 
 360 |         fields (Optional[List[str]]): List of fields to return for each paper.
 361 |             paperId and title are always returned.
 362 |             Available fields:
 363 |             - abstract: The paper's abstract
 364 |             - authors: List of authors with name and authorId
 365 |             - citationCount: Total number of citations
 366 |             - citations: List of papers citing this paper
 367 |             - corpusId: Internal ID for the paper
 368 |             - embedding: Vector embedding of the paper
 369 |             - externalIds: External IDs (DOI, MAG, etc)
 370 |             - fieldsOfStudy: List of fields of study
 371 |             - influentialCitationCount: Number of influential citations
 372 |             - isOpenAccess: Whether paper is open access
 373 |             - openAccessPdf: Open access PDF URL if available
 374 |             - paperId: Semantic Scholar paper ID
 375 |             - publicationDate: Publication date in YYYY-MM-DD format
 376 |             - publicationTypes: List of publication types
 377 |             - publicationVenue: Venue information
 378 |             - references: List of papers cited by this paper
 379 |             - s2FieldsOfStudy: Semantic Scholar fields
 380 |             - title: Paper title
 381 |             - tldr: AI-generated TLDR summary
 382 |             - url: URL to Semantic Scholar paper page
 383 |             - venue: Publication venue name
 384 |             - year: Publication year
 385 | 
 386 |         publication_types (Optional[List[str]]): Filter by publication types.
 387 |             Available types:
 388 |             - Review
 389 |             - JournalArticle
 390 |             - CaseReport
 391 |             - ClinicalTrial
 392 |             - Conference
 393 |             - Dataset
 394 |             - Editorial
 395 |             - LettersAndComments
 396 |             - MetaAnalysis
 397 |             - News
 398 |             - Study
 399 |             - Book
 400 |             - BookSection
 401 | 
 402 |         open_access_pdf (bool): If True, only include papers with a public PDF.
 403 |             Default: False
 404 | 
 405 |         min_citation_count (Optional[int]): Minimum number of citations required.
 406 |             Papers with fewer citations will be filtered out.
 407 | 
 408 |         year (Optional[str]): Filter by publication year. Supports several formats:
 409 |             - Single year: "2019"
 410 |             - Year range: "2016-2020"
 411 |             - Since year: "2010-"
 412 |             - Until year: "-2015"
 413 | 
 414 |         venue (Optional[List[str]]): Filter by publication venues.
 415 |             Accepts full venue names or ISO4 abbreviations.
 416 |             Examples: ["Nature", "Science", "N. Engl. J. Med."]
 417 | 
 418 |         fields_of_study (Optional[List[str]]): Filter by fields of study.
 419 |             Available fields:
 420 |             - Computer Science
 421 |             - Medicine
 422 |             - Chemistry
 423 |             - Biology
 424 |             - Materials Science
 425 |             - Physics
 426 |             - Geology
 427 |             - Psychology
 428 |             - Art
 429 |             - History
 430 |             - Geography
 431 |             - Sociology
 432 |             - Business
 433 |             - Political Science
 434 |             - Economics
 435 |             - Philosophy
 436 |             - Mathematics
 437 |             - Engineering
 438 |             - Environmental Science
 439 |             - Agricultural and Food Sciences
 440 |             - Education
 441 |             - Law
 442 |             - Linguistics
 443 | 
 444 |         offset (int): Number of results to skip for pagination.
 445 |             Default: 0
 446 | 
 447 |         limit (int): Maximum number of results to return.
 448 |             Default: 10
 449 |             Maximum: 100
 450 | 
 451 |     Returns:
 452 |         Dict: {
 453 |             "total": int,      # Total number of papers matching the query
 454 |             "offset": int,     # Current offset in the results
 455 |             "next": int,       # Offset for the next page of results (if available)
 456 |             "data": List[Dict] # List of papers with requested fields
 457 |         }
 458 | 
 459 |     Notes:
 460 |         - Results are sorted by relevance to the query
 461 |         - All query terms must be present in the paper (AND operation)
 462 |         - Query matches are case-insensitive
 463 |         - Query matches word prefixes (e.g., "quantum" matches "quantum" and "quantumly")
 464 |         - Maximum of 100 results per request
 465 |         - Use offset parameter for pagination
 466 |         - Rate limits apply (see API documentation)
 467 |     """
 468 |     if not query.strip():
 469 |         return create_error_response(
 470 |             ErrorType.VALIDATION,
 471 |             "Query string cannot be empty"
 472 |         )
 473 | 
 474 |     # Validate and prepare fields
 475 |     if fields is None:
 476 |         fields = PaperFields.DEFAULT
 477 |     else:
 478 |         invalid_fields = set(fields) - PaperFields.VALID_FIELDS
 479 |         if invalid_fields:
 480 |             return create_error_response(
 481 |                 ErrorType.VALIDATION,
 482 |                 f"Invalid fields: {', '.join(invalid_fields)}",
 483 |                 {"valid_fields": list(PaperFields.VALID_FIELDS)}
 484 |             )
 485 | 
 486 |     # Validate and prepare parameters
 487 |     limit = min(limit, Config.MAX_RESULTS_PER_PAGE)
 488 |     params = {
 489 |         "query": query,
 490 |         "offset": offset,
 491 |         "limit": limit,
 492 |         "fields": ",".join(fields)
 493 |     }
 494 | 
 495 |     # Add optional filters
 496 |     if publication_types:
 497 |         params["publicationTypes"] = ",".join(publication_types)
 498 |     if open_access_pdf:
 499 |         params["openAccessPdf"] = "true"
 500 |     if min_citation_count is not None:
 501 |         params["minCitationCount"] = min_citation_count
 502 |     if year:
 503 |         params["year"] = year
 504 |     if venue:
 505 |         params["venue"] = ",".join(venue)
 506 |     if fields_of_study:
 507 |         params["fieldsOfStudy"] = ",".join(fields_of_study)
 508 | 
 509 |     return await make_request("/paper/search", params)
 510 | 
 511 | # 1.2 Paper bulk search
 512 | @mcp.tool()
 513 | async def paper_bulk_search(
 514 |     context: Context,
 515 |     query: Optional[str] = None,
 516 |     token: Optional[str] = None,
 517 |     fields: Optional[List[str]] = None,
 518 |     sort: Optional[str] = None,
 519 |     publication_types: Optional[List[str]] = None,
 520 |     open_access_pdf: bool = False,
 521 |     min_citation_count: Optional[int] = None,
 522 |     publication_date_or_year: Optional[str] = None,
 523 |     year: Optional[str] = None,
 524 |     venue: Optional[List[str]] = None,
 525 |     fields_of_study: Optional[List[str]] = None
 526 | ) -> Dict:
 527 |     """
 528 |     Bulk search for papers with advanced filtering and sorting options.
 529 |     Intended for retrieving large sets of papers efficiently.
 530 |     
 531 |     Args:
 532 |         query (Optional[str]): Text query to match against paper title and abstract.
 533 |             Supports boolean logic:
 534 |             - '+' for AND operation
 535 |             - '|' for OR operation
 536 |             - '-' to negate a term
 537 |             - '"' for phrase matching
 538 |             - '*' for prefix matching
 539 |             - '()' for precedence
 540 |             - '~N' for edit distance (default 2)
 541 |             Examples:
 542 |             - 'fish ladder' (contains both terms)
 543 |             - 'fish -ladder' (has fish, no ladder)
 544 |             - 'fish | ladder' (either term)
 545 |             - '"fish ladder"' (exact phrase)
 546 |             - '(fish ladder) | outflow'
 547 |             - 'fish~' (fuzzy match)
 548 |             - '"fish ladder"~3' (terms within 3 words)
 549 |             
 550 |         token (Optional[str]): Continuation token for pagination
 551 |         
 552 |         fields (Optional[List[str]]): Fields to return for each paper
 553 |             paperId is always returned
 554 |             Default: paperId and title only
 555 |             
 556 |         sort (Optional[str]): Sort order in format 'field:order'
 557 |             Fields: paperId, publicationDate, citationCount
 558 |             Order: asc (default), desc
 559 |             Default: 'paperId:asc'
 560 |             Examples:
 561 |             - 'publicationDate:asc' (oldest first)
 562 |             - 'citationCount:desc' (most cited first)
 563 |             
 564 |         publication_types (Optional[List[str]]): Filter by publication types:
 565 |             Review, JournalArticle, CaseReport, ClinicalTrial,
 566 |             Conference, Dataset, Editorial, LettersAndComments,
 567 |             MetaAnalysis, News, Study, Book, BookSection
 568 |             
 569 |         open_access_pdf (bool): Only include papers with public PDF
 570 |         
 571 |         min_citation_count (Optional[int]): Minimum citation threshold
 572 |         
 573 |         publication_date_or_year (Optional[str]): Date/year range filter
 574 |             Format: <startDate>:<endDate> in YYYY-MM-DD
 575 |             Supports partial dates and open ranges
 576 |             Examples:
 577 |             - '2019-03-05' (specific date)
 578 |             - '2019-03' (month)
 579 |             - '2019' (year)
 580 |             - '2016-03-05:2020-06-06' (range)
 581 |             - '1981-08-25:' (since date)
 582 |             - ':2015-01' (until date)
 583 |             
 584 |         year (Optional[str]): Publication year filter
 585 |             Examples: '2019', '2016-2020', '2010-', '-2015'
 586 |             
 587 |         venue (Optional[List[str]]): Filter by publication venues
 588 |             Accepts full names or ISO4 abbreviations
 589 |             Examples: ['Nature', 'N. Engl. J. Med.']
 590 |             
 591 |         fields_of_study (Optional[List[str]]): Filter by fields of study
 592 |             Available fields include: Computer Science, Medicine,
 593 |             Physics, Mathematics, etc.
 594 |     
 595 |     Returns:
 596 |         Dict: {
 597 |             'total': int,      # Total matching papers
 598 |             'token': str,      # Continuation token for next batch
 599 |             'data': List[Dict] # Papers with requested fields
 600 |         }
 601 |         
 602 |     Notes:
 603 |         - Returns up to 1,000 papers per call
 604 |         - Can fetch up to 10M papers total
 605 |         - Nested data (citations, references) not available
 606 |         - For larger datasets, use the Datasets API
 607 |     """
 608 |     # Build request parameters
 609 |     params = {}
 610 |     
 611 |     # Add query if provided
 612 |     if query:
 613 |         params["query"] = query.strip()
 614 |         
 615 |     # Add continuation token if provided
 616 |     if token:
 617 |         params["token"] = token
 618 |         
 619 |     # Add fields if provided
 620 |     if fields:
 621 |         # Validate fields
 622 |         invalid_fields = set(fields) - PaperFields.VALID_FIELDS
 623 |         if invalid_fields:
 624 |             return create_error_response(
 625 |                 ErrorType.VALIDATION,
 626 |                 f"Invalid fields: {', '.join(invalid_fields)}",
 627 |                 {"valid_fields": list(PaperFields.VALID_FIELDS)}
 628 |             )
 629 |         params["fields"] = ",".join(fields)
 630 |         
 631 |     # Add sort if provided
 632 |     if sort:
 633 |         # Validate sort format
 634 |         valid_sort_fields = ["paperId", "publicationDate", "citationCount"]
 635 |         valid_sort_orders = ["asc", "desc"]
 636 |         
 637 |         try:
 638 |             field, order = sort.split(":")
 639 |             if field not in valid_sort_fields:
 640 |                 return create_error_response(
 641 |                     ErrorType.VALIDATION,
 642 |                     f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields)}"
 643 |                 )
 644 |             if order not in valid_sort_orders:
 645 |                 return create_error_response(
 646 |                     ErrorType.VALIDATION,
 647 |                     f"Invalid sort order. Must be one of: {', '.join(valid_sort_orders)}"
 648 |                 )
 649 |             params["sort"] = sort
 650 |         except ValueError:
 651 |             return create_error_response(
 652 |                 ErrorType.VALIDATION,
 653 |                 "Sort must be in format 'field:order'"
 654 |             )
 655 |             
 656 |     # Add publication types if provided
 657 |     if publication_types:
 658 |         valid_types = {
 659 |             "Review", "JournalArticle", "CaseReport", "ClinicalTrial",
 660 |             "Conference", "Dataset", "Editorial", "LettersAndComments",
 661 |             "MetaAnalysis", "News", "Study", "Book", "BookSection"
 662 |         }
 663 |         invalid_types = set(publication_types) - valid_types
 664 |         if invalid_types:
 665 |             return create_error_response(
 666 |                 ErrorType.VALIDATION,
 667 |                 f"Invalid publication types: {', '.join(invalid_types)}",
 668 |                 {"valid_types": list(valid_types)}
 669 |             )
 670 |         params["publicationTypes"] = ",".join(publication_types)
 671 |         
 672 |     # Add open access PDF filter
 673 |     if open_access_pdf:
 674 |         params["openAccessPdf"] = "true"
 675 |         
 676 |     # Add minimum citation count if provided
 677 |     if min_citation_count is not None:
 678 |         if min_citation_count < 0:
 679 |             return create_error_response(
 680 |                 ErrorType.VALIDATION,
 681 |                 "Minimum citation count cannot be negative"
 682 |             )
 683 |         params["minCitationCount"] = str(min_citation_count)
 684 |         
 685 |     # Add publication date/year if provided
 686 |     if publication_date_or_year:
 687 |         params["publicationDateOrYear"] = publication_date_or_year
 688 |     elif year:
 689 |         params["year"] = year
 690 |         
 691 |     # Add venue filter if provided
 692 |     if venue:
 693 |         params["venue"] = ",".join(venue)
 694 |         
 695 |     # Add fields of study filter if provided
 696 |     if fields_of_study:
 697 |         valid_fields = {
 698 |             "Computer Science", "Medicine", "Chemistry", "Biology",
 699 |             "Materials Science", "Physics", "Geology", "Psychology",
 700 |             "Art", "History", "Geography", "Sociology", "Business",
 701 |             "Political Science", "Economics", "Philosophy", "Mathematics",
 702 |             "Engineering", "Environmental Science", "Agricultural and Food Sciences",
 703 |             "Education", "Law", "Linguistics"
 704 |         }
 705 |         invalid_fields = set(fields_of_study) - valid_fields
 706 |         if invalid_fields:
 707 |             return create_error_response(
 708 |                 ErrorType.VALIDATION,
 709 |                 f"Invalid fields of study: {', '.join(invalid_fields)}",
 710 |                 {"valid_fields": list(valid_fields)}
 711 |             )
 712 |         params["fieldsOfStudy"] = ",".join(fields_of_study)
 713 |     
 714 |     # Make the API request
 715 |     result = await make_request("/paper/search/bulk", params)
 716 |     
 717 |     # Handle potential errors
 718 |     if isinstance(result, Dict) and "error" in result:
 719 |         return result
 720 |         
 721 |     return result
 722 | 
 723 | # 1.3 Paper title search
 724 | @mcp.tool()
 725 | async def paper_title_search(
 726 |     context: Context,
 727 |     query: str,
 728 |     fields: Optional[List[str]] = None,
 729 |     publication_types: Optional[List[str]] = None,
 730 |     open_access_pdf: bool = False,
 731 |     min_citation_count: Optional[int] = None,
 732 |     year: Optional[str] = None,
 733 |     venue: Optional[List[str]] = None,
 734 |     fields_of_study: Optional[List[str]] = None
 735 | ) -> Dict:
 736 |     """
 737 |     Find a single paper by title match. This endpoint is optimized for finding a specific paper
 738 |     by its title and returns the best matching paper based on title similarity.
 739 | 
 740 |     Args:
 741 |         query (str): The title text to search for. The query will be matched against paper titles
 742 |             to find the closest match. The match is case-insensitive and ignores punctuation.
 743 | 
 744 |         fields (Optional[List[str]]): List of fields to return for the paper.
 745 |             paperId and title are always returned.
 746 |             Available fields:
 747 |             - abstract: The paper's abstract
 748 |             - authors: List of authors with name and authorId
 749 |             - citationCount: Total number of citations
 750 |             - citations: List of papers citing this paper
 751 |             - corpusId: Internal ID for the paper
 752 |             - embedding: Vector embedding of the paper
 753 |             - externalIds: External IDs (DOI, MAG, etc)
 754 |             - fieldsOfStudy: List of fields of study
 755 |             - influentialCitationCount: Number of influential citations
 756 |             - isOpenAccess: Whether paper is open access
 757 |             - openAccessPdf: Open access PDF URL if available
 758 |             - paperId: Semantic Scholar paper ID
 759 |             - publicationDate: Publication date in YYYY-MM-DD format
 760 |             - publicationTypes: List of publication types
 761 |             - publicationVenue: Venue information
 762 |             - references: List of papers cited by this paper
 763 |             - s2FieldsOfStudy: Semantic Scholar fields
 764 |             - title: Paper title
 765 |             - tldr: AI-generated TLDR summary
 766 |             - url: URL to Semantic Scholar paper page
 767 |             - venue: Publication venue name
 768 |             - year: Publication year
 769 | 
 770 |         publication_types (Optional[List[str]]): Filter by publication types.
 771 |             Available types:
 772 |             - Review
 773 |             - JournalArticle
 774 |             - CaseReport
 775 |             - ClinicalTrial
 776 |             - Conference
 777 |             - Dataset
 778 |             - Editorial
 779 |             - LettersAndComments
 780 |             - MetaAnalysis
 781 |             - News
 782 |             - Study
 783 |             - Book
 784 |             - BookSection
 785 | 
 786 |         open_access_pdf (bool): If True, only include papers with a public PDF.
 787 |             Default: False
 788 | 
 789 |         min_citation_count (Optional[int]): Minimum number of citations required.
 790 |             Papers with fewer citations will be filtered out.
 791 | 
 792 |         year (Optional[str]): Filter by publication year. Supports several formats:
 793 |             - Single year: "2019"
 794 |             - Year range: "2016-2020"
 795 |             - Since year: "2010-"
 796 |             - Until year: "-2015"
 797 | 
 798 |         venue (Optional[List[str]]): Filter by publication venues.
 799 |             Accepts full venue names or ISO4 abbreviations.
 800 |             Examples: ["Nature", "Science", "N. Engl. J. Med."]
 801 | 
 802 |         fields_of_study (Optional[List[str]]): Filter by fields of study.
 803 |             Available fields:
 804 |             - Computer Science
 805 |             - Medicine
 806 |             - Chemistry
 807 |             - Biology
 808 |             - Materials Science
 809 |             - Physics
 810 |             - Geology
 811 |             - Psychology
 812 |             - Art
 813 |             - History
 814 |             - Geography
 815 |             - Sociology
 816 |             - Business
 817 |             - Political Science
 818 |             - Economics
 819 |             - Philosophy
 820 |             - Mathematics
 821 |             - Engineering
 822 |             - Environmental Science
 823 |             - Agricultural and Food Sciences
 824 |             - Education
 825 |             - Law
 826 |             - Linguistics
 827 | 
 828 |     Returns:
 829 |         Dict: {
 830 |             "paperId": str,      # Semantic Scholar Paper ID
 831 |             "title": str,        # Paper title
 832 |             "matchScore": float, # Similarity score between query and matched title
 833 |             ...                  # Additional requested fields
 834 |         }
 835 |         
 836 |         Returns error response if no matching paper is found.
 837 | 
 838 |     Notes:
 839 |         - Returns the single best matching paper based on title similarity
 840 |         - Match score indicates how well the title matches the query
 841 |         - Case-insensitive matching
 842 |         - Ignores punctuation in matching
 843 |         - Filters are applied after finding the best title match
 844 |     """
 845 |     if not query.strip():
 846 |         return create_error_response(
 847 |             ErrorType.VALIDATION,
 848 |             "Query string cannot be empty"
 849 |         )
 850 | 
 851 |     # Validate and prepare fields
 852 |     if fields is None:
 853 |         fields = PaperFields.DEFAULT
 854 |     else:
 855 |         invalid_fields = set(fields) - PaperFields.VALID_FIELDS
 856 |         if invalid_fields:
 857 |             return create_error_response(
 858 |                 ErrorType.VALIDATION,
 859 |                 f"Invalid fields: {', '.join(invalid_fields)}",
 860 |                 {"valid_fields": list(PaperFields.VALID_FIELDS)}
 861 |             )
 862 | 
 863 |     # Build base parameters
 864 |     params = {"query": query}
 865 | 
 866 |     # Add optional parameters
 867 |     if fields:
 868 |         params["fields"] = ",".join(fields)
 869 |     if publication_types:
 870 |         params["publicationTypes"] = ",".join(publication_types)
 871 |     if open_access_pdf:
 872 |         params["openAccessPdf"] = "true"
 873 |     if min_citation_count is not None:
 874 |         params["minCitationCount"] = str(min_citation_count)
 875 |     if year:
 876 |         params["year"] = year
 877 |     if venue:
 878 |         params["venue"] = ",".join(venue)
 879 |     if fields_of_study:
 880 |         params["fieldsOfStudy"] = ",".join(fields_of_study)
 881 | 
 882 |     result = await make_request("/paper/search/match", params)
 883 |     
 884 |     # Handle specific error cases
 885 |     if isinstance(result, Dict):
 886 |         if "error" in result:
 887 |             error_msg = result["error"].get("message", "")
 888 |             if "404" in error_msg:
 889 |                 return create_error_response(
 890 |                     ErrorType.VALIDATION,
 891 |                     "No matching paper found",
 892 |                     {"original_query": query}
 893 |                 )
 894 |             return result
 895 |     
 896 |     return result
 897 | 
 898 | # 1.4 Details about a paper
 899 | @mcp.tool()
 900 | async def paper_details(
 901 |     context: Context,
 902 |     paper_id: str,
 903 |     fields: Optional[List[str]] = None
 904 | ) -> Dict:
 905 |     """
 906 |     Get details about a paper using various types of identifiers.
 907 |     This endpoint provides comprehensive metadata about a paper.
 908 | 
 909 |     Args:
 910 |         paper_id (str): Paper identifier in one of the following formats:
 911 |             - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
 912 |             - CorpusId:<id> (e.g., "CorpusId:215416146")
 913 |             - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
 914 |             - ARXIV:<id> (e.g., "ARXIV:2106.15928")
 915 |             - MAG:<id> (e.g., "MAG:112218234")
 916 |             - ACL:<id> (e.g., "ACL:W12-3903")
 917 |             - PMID:<id> (e.g., "PMID:19872477")
 918 |             - PMCID:<id> (e.g., "PMCID:2323736")
 919 |             - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
 920 |               Supported URLs from: semanticscholar.org, arxiv.org, aclweb.org,
 921 |                                  acm.org, biorxiv.org
 922 |         
 923 |         fields (Optional[List[str]]): List of fields to return.
 924 |             paperId is always returned.
 925 |             Available fields:
 926 |             - abstract: The paper's abstract
 927 |             - authors: List of authors with name and authorId
 928 |             - citationCount: Total number of citations
 929 |             - citations: List of papers citing this paper
 930 |             - corpusId: Internal ID for the paper
 931 |             - embedding: Vector embedding of the paper
 932 |             - externalIds: External IDs (DOI, MAG, etc)
 933 |             - fieldsOfStudy: List of fields of study
 934 |             - influentialCitationCount: Number of influential citations
 935 |             - isOpenAccess: Whether paper is open access
 936 |             - openAccessPdf: Open access PDF URL if available
 937 |             - paperId: Semantic Scholar paper ID
 938 |             - publicationDate: Publication date in YYYY-MM-DD format
 939 |             - publicationTypes: List of publication types
 940 |             - publicationVenue: Venue information
 941 |             - references: List of papers cited by this paper
 942 |             - s2FieldsOfStudy: Semantic Scholar fields
 943 |             - title: Paper title
 944 |             - tldr: AI-generated TLDR summary
 945 |             - url: URL to Semantic Scholar paper page
 946 |             - venue: Publication venue name
 947 |             - year: Publication year
 948 | 
 949 |             Special syntax for nested fields:
 950 |             - For citations/references: citations.title, references.abstract, etc.
 951 |             - For authors: authors.name, authors.affiliations, etc.
 952 |             - For embeddings: embedding.specter_v2 for v2 embeddings
 953 | 
 954 |             If omitted, returns only paperId and title.
 955 | 
 956 |     Returns:
 957 |         Dict: Paper details with requested fields.
 958 |             Always includes paperId.
 959 |             Returns error response if paper not found.
 960 | 
 961 |     Notes:
 962 |         - Supports multiple identifier types for flexibility
 963 |         - Nested fields available for detailed citation/reference/author data
 964 |         - Rate limits apply (see API documentation)
 965 |         - Some fields may be null if data is not available
 966 |     """
 967 |     if not paper_id.strip():
 968 |         return create_error_response(
 969 |             ErrorType.VALIDATION,
 970 |             "Paper ID cannot be empty"
 971 |         )
 972 | 
 973 |     # Build request parameters
 974 |     params = {}
 975 |     if fields:
 976 |         params["fields"] = ",".join(fields)
 977 | 
 978 |     # Make the API request
 979 |     result = await make_request(f"/paper/{paper_id}", params)
 980 |     
 981 |     # Handle potential errors
 982 |     if isinstance(result, Dict) and "error" in result:
 983 |         error_msg = result["error"].get("message", "")
 984 |         if "404" in error_msg:
 985 |             return create_error_response(
 986 |                 ErrorType.VALIDATION,
 987 |                 "Paper not found",
 988 |                 {"paper_id": paper_id}
 989 |             )
 990 |         return result
 991 | 
 992 |     return result
 993 | 
 994 | # 1.5 Get details for multiple papers at once
 995 | @mcp.tool()
 996 | async def paper_batch_details(
 997 |     context: Context,
 998 |     paper_ids: List[str],
 999 |     fields: Optional[str] = None
1000 | ) -> Dict:
1001 |     """
1002 |     Get details for multiple papers in a single batch request.
1003 |     This endpoint is optimized for efficiently retrieving details about known papers.
1004 |     
1005 |     Args:
1006 |         paper_ids (List[str]): List of paper identifiers. Each ID can be in any of these formats:
1007 |             - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1008 |             - CorpusId:<id> (e.g., "CorpusId:215416146")
1009 |             - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1010 |             - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1011 |             - MAG:<id> (e.g., "MAG:112218234")
1012 |             - ACL:<id> (e.g., "ACL:W12-3903")
1013 |             - PMID:<id> (e.g., "PMID:19872477")
1014 |             - PMCID:<id> (e.g., "PMCID:2323736")
1015 |             - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1016 |               Supported URLs from: semanticscholar.org, arxiv.org, aclweb.org,
1017 |                                  acm.org, biorxiv.org
1018 |             Maximum: 500 IDs per request
1019 | 
1020 |         fields (Optional[str]): Comma-separated list of fields to return for each paper.
1021 |             paperId is always returned.
1022 |             Available fields:
1023 |             - abstract: The paper's abstract
1024 |             - authors: List of authors with name and authorId
1025 |             - citationCount: Total number of citations
1026 |             - citations: List of papers citing this paper
1027 |             - corpusId: Internal ID for the paper
1028 |             - embedding: Vector embedding of the paper
1029 |             - externalIds: External IDs (DOI, MAG, etc)
1030 |             - fieldsOfStudy: List of fields of study
1031 |             - influentialCitationCount: Number of influential citations
1032 |             - isOpenAccess: Whether paper is open access
1033 |             - openAccessPdf: Open access PDF URL if available
1034 |             - paperId: Semantic Scholar paper ID
1035 |             - publicationDate: Publication date in YYYY-MM-DD format
1036 |             - publicationTypes: List of publication types
1037 |             - publicationVenue: Venue information
1038 |             - references: List of papers cited by this paper
1039 |             - s2FieldsOfStudy: Semantic Scholar fields
1040 |             - title: Paper title
1041 |             - tldr: AI-generated TLDR summary
1042 |             - url: URL to Semantic Scholar paper page
1043 |             - venue: Publication venue name
1044 |             - year: Publication year
1045 | 
1046 |             Special syntax for nested fields:
1047 |             - For citations/references: citations.title, references.abstract, etc.
1048 |             - For authors: authors.name, authors.affiliations, etc.
1049 |             - For embeddings: embedding.specter_v2 for v2 embeddings
1050 | 
1051 |             If omitted, returns only paperId and title.
1052 |     
1053 |     Returns:
1054 |         List[Dict]: List of paper details with requested fields.
1055 |             - Results maintain the same order as input paper_ids
1056 |             - Invalid or not found paper IDs return null in the results
1057 |             - Each paper object contains the requested fields
1058 |             - paperId is always included in each paper object
1059 | 
1060 |     Notes:
1061 |         - More efficient than making multiple single-paper requests
1062 |         - Maximum of 500 paper IDs per request
1063 |         - Rate limits apply (see API documentation)
1064 |         - Some fields may be null if data is not available
1065 |         - Invalid paper IDs return null instead of causing an error
1066 |         - Order of results matches order of input IDs for easy mapping
1067 |     """
1068 |     # Validate inputs
1069 |     if not paper_ids:
1070 |         return create_error_response(
1071 |             ErrorType.VALIDATION,
1072 |             "Paper IDs list cannot be empty"
1073 |         )
1074 |         
1075 |     if len(paper_ids) > 500:
1076 |         return create_error_response(
1077 |             ErrorType.VALIDATION,
1078 |             "Cannot process more than 500 paper IDs at once",
1079 |             {"max_papers": 500, "received": len(paper_ids)}
1080 |         )
1081 | 
1082 |     # Validate fields if provided
1083 |     if fields:
1084 |         field_list = fields.split(",")
1085 |         invalid_fields = set(field_list) - PaperFields.VALID_FIELDS
1086 |         if invalid_fields:
1087 |             return create_error_response(
1088 |                 ErrorType.VALIDATION,
1089 |                 f"Invalid fields: {', '.join(invalid_fields)}",
1090 |                 {"valid_fields": list(PaperFields.VALID_FIELDS)}
1091 |             )
1092 | 
1093 |     # Build request parameters
1094 |     params = {}
1095 |     if fields:
1096 |         params["fields"] = fields
1097 | 
1098 |     # Make POST request with proper structure
1099 |     try:
1100 |         async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
1101 |             api_key = get_api_key()
1102 |             headers = {"x-api-key": api_key} if api_key else {}
1103 |             
1104 |             response = await client.post(
1105 |                 f"{Config.BASE_URL}/paper/batch",
1106 |                 params=params,
1107 |                 json={"ids": paper_ids},
1108 |                 headers=headers
1109 |             )
1110 |             response.raise_for_status()
1111 |             return response.json()
1112 |             
1113 |     except httpx.HTTPStatusError as e:
1114 |         if e.response.status_code == 429:
1115 |             return create_error_response(
1116 |                 ErrorType.RATE_LIMIT,
1117 |                 "Rate limit exceeded",
1118 |                 {"retry_after": e.response.headers.get("retry-after")}
1119 |             )
1120 |         return create_error_response(
1121 |             ErrorType.API_ERROR,
1122 |             f"HTTP error: {e.response.status_code}",
1123 |             {"response": e.response.text}
1124 |         )
1125 |     except httpx.TimeoutException:
1126 |         return create_error_response(
1127 |             ErrorType.TIMEOUT,
1128 |             f"Request timed out after {Config.TIMEOUT} seconds"
1129 |         )
1130 |     except Exception as e:
1131 |         return create_error_response(
1132 |             ErrorType.API_ERROR,
1133 |             str(e)
1134 |         )
1135 | 
1136 | # 1.6 Details about a paper's authors
1137 | @mcp.tool()
1138 | async def paper_authors(
1139 |     context: Context,
1140 |     paper_id: str,
1141 |     fields: Optional[List[str]] = None,
1142 |     offset: int = 0,
1143 |     limit: int = 100
1144 | ) -> Dict:
1145 |     """
1146 |     Get details about the authors of a paper with pagination support.
1147 |     This endpoint provides author information and their contributions.
1148 | 
1149 |     Args:
1150 |         paper_id (str): Paper identifier in one of the following formats:
1151 |             - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1152 |             - CorpusId:<id> (e.g., "CorpusId:215416146")
1153 |             - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1154 |             - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1155 |             - MAG:<id> (e.g., "MAG:112218234")
1156 |             - ACL:<id> (e.g., "ACL:W12-3903")
1157 |             - PMID:<id> (e.g., "PMID:19872477")
1158 |             - PMCID:<id> (e.g., "PMCID:2323736")
1159 |             - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1160 | 
1161 |         fields (Optional[List[str]]): List of fields to return for each author.
1162 |             authorId is always returned.
1163 |             Available fields:
1164 |             - name: Author's name
1165 |             - aliases: Alternative names for the author
1166 |             - affiliations: List of author's affiliations
1167 |             - homepage: Author's homepage URL
1168 |             - paperCount: Total number of papers by this author
1169 |             - citationCount: Total citations received by this author
1170 |             - hIndex: Author's h-index
1171 |             - papers: List of papers by this author (returns paperId and title)
1172 |             
1173 |             Special syntax for paper fields:
1174 |             - papers.year: Include year for each paper
1175 |             - papers.authors: Include authors for each paper
1176 |             - papers.abstract: Include abstract for each paper
1177 |             - papers.venue: Include venue for each paper
1178 |             - papers.citations: Include citation count for each paper
1179 | 
1180 |             If omitted, returns only authorId and name.
1181 | 
1182 |         offset (int): Number of authors to skip for pagination.
1183 |             Default: 0
1184 | 
1185 |         limit (int): Maximum number of authors to return.
1186 |             Default: 100
1187 |             Maximum: 1000
1188 | 
1189 |     Returns:
1190 |         Dict: {
1191 |             "offset": int,     # Current offset in the results
1192 |             "next": int,       # Next offset (if more results available)
1193 |             "data": List[Dict] # List of authors with requested fields
1194 |         }
1195 | 
1196 |     Notes:
1197 |         - Authors are returned in the order they appear on the paper
1198 |         - Supports pagination for papers with many authors
1199 |         - Some fields may be null if data is not available
1200 |         - Rate limits apply (see API documentation)
1201 |     """
1202 |     if not paper_id.strip():
1203 |         return create_error_response(
1204 |             ErrorType.VALIDATION,
1205 |             "Paper ID cannot be empty"
1206 |         )
1207 | 
1208 |     # Validate limit
1209 |     if limit > 1000:
1210 |         return create_error_response(
1211 |             ErrorType.VALIDATION,
1212 |             "Limit cannot exceed 1000",
1213 |             {"max_limit": 1000}
1214 |         )
1215 |     
1216 |     # Validate fields
1217 |     if fields:
1218 |         invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS
1219 |         if invalid_fields:
1220 |             return create_error_response(
1221 |                 ErrorType.VALIDATION,
1222 |                 f"Invalid fields: {', '.join(invalid_fields)}",
1223 |                 {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
1224 |             )
1225 | 
1226 |     # Build request parameters
1227 |     params = {
1228 |         "offset": offset,
1229 |         "limit": limit
1230 |     }
1231 |     if fields:
1232 |         params["fields"] = ",".join(fields)
1233 | 
1234 |     # Make the API request
1235 |     result = await make_request(f"/paper/{paper_id}/authors", params)
1236 |     
1237 |     # Handle potential errors
1238 |     if isinstance(result, Dict) and "error" in result:
1239 |         error_msg = result["error"].get("message", "")
1240 |         if "404" in error_msg:
1241 |             return create_error_response(
1242 |                 ErrorType.VALIDATION,
1243 |                 "Paper not found",
1244 |                 {"paper_id": paper_id}
1245 |             )
1246 |         return result
1247 | 
1248 |     return result
1249 | 
1250 | # 1.7 Details about a paper's citations
1251 | @mcp.tool()
1252 | async def paper_citations(
1253 |     context: Context,
1254 |     paper_id: str,
1255 |     fields: Optional[List[str]] = None,
1256 |     offset: int = 0,
1257 |     limit: int = 100
1258 | ) -> Dict:
1259 |     """
1260 |     Get papers that cite the specified paper (papers where this paper appears in their bibliography).
1261 |     This endpoint provides detailed citation information including citation contexts.
1262 | 
1263 |     Args:
1264 |         paper_id (str): Paper identifier in one of the following formats:
1265 |             - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1266 |             - CorpusId:<id> (e.g., "CorpusId:215416146")
1267 |             - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1268 |             - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1269 |             - MAG:<id> (e.g., "MAG:112218234")
1270 |             - ACL:<id> (e.g., "ACL:W12-3903")
1271 |             - PMID:<id> (e.g., "PMID:19872477")
1272 |             - PMCID:<id> (e.g., "PMCID:2323736")
1273 |             - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1274 | 
1275 |         fields (Optional[List[str]]): List of fields to return for each citing paper.
1276 |             paperId is always returned.
1277 |             Available fields:
1278 |             - title: Paper title
1279 |             - abstract: Paper abstract
1280 |             - year: Publication year
1281 |             - venue: Publication venue
1282 |             - authors: List of authors
1283 |             - url: URL to paper page
1284 |             - citationCount: Number of citations received
1285 |             - influentialCitationCount: Number of influential citations
1286 |             
1287 |             Citation-specific fields:
1288 |             - contexts: List of citation contexts (text snippets)
1289 |             - intents: List of citation intents (Background, Method, etc.)
1290 |             - isInfluential: Whether this is an influential citation
1291 | 
1292 |             If omitted, returns only paperId and title.
1293 | 
1294 |         offset (int): Number of citations to skip for pagination.
1295 |             Default: 0
1296 | 
1297 |         limit (int): Maximum number of citations to return.
1298 |             Default: 100
1299 |             Maximum: 1000
1300 | 
1301 |     Returns:
1302 |         Dict: {
1303 |             "offset": int,     # Current offset in the results
1304 |             "next": int,       # Next offset (if more results available)
1305 |             "data": List[Dict] # List of citing papers with requested fields
1306 |         }
1307 | 
1308 |     Notes:
1309 |         - Citations are sorted by citation date (newest first)
1310 |         - Includes citation context when available
1311 |         - Supports pagination for highly-cited papers
1312 |         - Some fields may be null if data is not available
1313 |         - Rate limits apply (see API documentation)
1314 |     """
1315 |     if not paper_id.strip():
1316 |         return create_error_response(
1317 |             ErrorType.VALIDATION,
1318 |             "Paper ID cannot be empty"
1319 |         )
1320 | 
1321 |     # Validate limit
1322 |     if limit > 1000:
1323 |         return create_error_response(
1324 |             ErrorType.VALIDATION,
1325 |             "Limit cannot exceed 1000",
1326 |             {"max_limit": 1000}
1327 |         )
1328 | 
1329 |     # Validate fields
1330 |     if fields:
1331 |         invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS
1332 |         if invalid_fields:
1333 |             return create_error_response(
1334 |                 ErrorType.VALIDATION,
1335 |                 f"Invalid fields: {', '.join(invalid_fields)}",
1336 |                 {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)}
1337 |             )
1338 | 
1339 |     # Build request parameters
1340 |     params = {
1341 |         "offset": offset,
1342 |         "limit": limit
1343 |     }
1344 |     if fields:
1345 |         params["fields"] = ",".join(fields)
1346 | 
1347 |     # Make the API request
1348 |     result = await make_request(f"/paper/{paper_id}/citations", params)
1349 |     
1350 |     # Handle potential errors
1351 |     if isinstance(result, Dict) and "error" in result:
1352 |         error_msg = result["error"].get("message", "")
1353 |         if "404" in error_msg:
1354 |             return create_error_response(
1355 |                 ErrorType.VALIDATION,
1356 |                 "Paper not found",
1357 |                 {"paper_id": paper_id}
1358 |             )
1359 |         return result
1360 | 
1361 |     return result
1362 | 
1363 | # 1.8 Details about a paper's references
1364 | @mcp.tool()
1365 | async def paper_references(
1366 |     context: Context,
1367 |     paper_id: str,
1368 |     fields: Optional[List[str]] = None,
1369 |     offset: int = 0,
1370 |     limit: int = 100
1371 | ) -> Dict:
1372 |     """
1373 |     Get papers cited by the specified paper (papers appearing in this paper's bibliography).
1374 |     This endpoint provides detailed reference information including citation contexts.
1375 | 
1376 |     Args:
1377 |         paper_id (str): Paper identifier in one of the following formats:
1378 |             - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1379 |             - CorpusId:<id> (e.g., "CorpusId:215416146")
1380 |             - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1381 |             - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1382 |             - MAG:<id> (e.g., "MAG:112218234")
1383 |             - ACL:<id> (e.g., "ACL:W12-3903")
1384 |             - PMID:<id> (e.g., "PMID:19872477")
1385 |             - PMCID:<id> (e.g., "PMCID:2323736")
1386 |             - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1387 | 
1388 |         fields (Optional[List[str]]): List of fields to return for each referenced paper.
1389 |             paperId is always returned.
1390 |             Available fields:
1391 |             - title: Paper title
1392 |             - abstract: Paper abstract
1393 |             - year: Publication year
1394 |             - venue: Publication venue
1395 |             - authors: List of authors
1396 |             - url: URL to paper page
1397 |             - citationCount: Number of citations received
1398 |             - influentialCitationCount: Number of influential citations
1399 |             
1400 |             Reference-specific fields:
1401 |             - contexts: List of citation contexts (text snippets)
1402 |             - intents: List of citation intents (Background, Method, etc.)
1403 |             - isInfluential: Whether this is an influential citation
1404 | 
1405 |             If omitted, returns only paperId and title.
1406 | 
1407 |         offset (int): Number of references to skip for pagination.
1408 |             Default: 0
1409 | 
1410 |         limit (int): Maximum number of references to return.
1411 |             Default: 100
1412 |             Maximum: 1000
1413 | 
1414 |     Returns:
1415 |         Dict: {
1416 |             "offset": int,     # Current offset in the results
1417 |             "next": int,       # Next offset (if more results available)
1418 |             "data": List[Dict] # List of referenced papers with requested fields
1419 |         }
1420 | 
1421 |     Notes:
1422 |         - References are returned in the order they appear in the bibliography
1423 |         - Includes citation context when available
1424 |         - Supports pagination for papers with many references
1425 |         - Some fields may be null if data is not available
1426 |         - Rate limits apply (see API documentation)
1427 |     """
1428 |     if not paper_id.strip():
1429 |         return create_error_response(
1430 |             ErrorType.VALIDATION,
1431 |             "Paper ID cannot be empty"
1432 |         )
1433 | 
1434 |     # Validate limit
1435 |     if limit > 1000:
1436 |         return create_error_response(
1437 |             ErrorType.VALIDATION,
1438 |             "Limit cannot exceed 1000",
1439 |             {"max_limit": 1000}
1440 |         )
1441 | 
1442 |     # Validate fields
1443 |     if fields:
1444 |         invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS
1445 |         if invalid_fields:
1446 |             return create_error_response(
1447 |                 ErrorType.VALIDATION,
1448 |                 f"Invalid fields: {', '.join(invalid_fields)}",
1449 |                 {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)}
1450 |             )
1451 | 
1452 |     # Build request parameters
1453 |     params = {
1454 |         "offset": offset,
1455 |         "limit": limit
1456 |     }
1457 |     if fields:
1458 |         params["fields"] = ",".join(fields)
1459 | 
1460 |     # Make the API request
1461 |     result = await make_request(f"/paper/{paper_id}/references", params)
1462 |     
1463 |     # Handle potential errors
1464 |     if isinstance(result, Dict) and "error" in result:
1465 |         error_msg = result["error"].get("message", "")
1466 |         if "404" in error_msg:
1467 |             return create_error_response(
1468 |                 ErrorType.VALIDATION,
1469 |                 "Paper not found",
1470 |                 {"paper_id": paper_id}
1471 |             )
1472 |         return result
1473 | 
1474 |     return result
1475 | 
1476 | 
1477 | 
1478 | # 2. Author Data Tools
1479 | 
1480 | # 2.1 Search for authors by name
1481 | @mcp.tool()
1482 | async def author_search(
1483 |     context: Context,
1484 |     query: str,
1485 |     fields: Optional[List[str]] = None,
1486 |     offset: int = 0,
1487 |     limit: int = 100
1488 | ) -> Dict:
1489 |     """
1490 |     Search for authors by name on Semantic Scholar.
1491 |     This endpoint is optimized for finding authors based on their name.
1492 |     Results are sorted by relevance to the query.
1493 |     
1494 |     Args:
1495 |         query (str): The name text to search for. The query will be matched against author names
1496 |             and their known aliases. The match is case-insensitive and matches name prefixes.
1497 |             Examples:
1498 |             - "Albert Einstein"
1499 |             - "Einstein, Albert"
1500 |             - "A Einstein"
1501 | 
1502 |         fields (Optional[List[str]]): List of fields to return for each author.
1503 |             authorId is always returned.
1504 |             Available fields:
1505 |             - name: Author's name
1506 |             - aliases: Alternative names for the author
1507 |             - url: URL to author's S2 profile
1508 |             - affiliations: List of author's affiliations
1509 |             - homepage: Author's homepage URL
1510 |             - paperCount: Total number of papers by this author
1511 |             - citationCount: Total citations received by this author
1512 |             - hIndex: Author's h-index
1513 |             - papers: List of papers by this author (returns paperId and title)
1514 |             
1515 |             Special syntax for paper fields:
1516 |             - papers.year: Include year for each paper
1517 |             - papers.authors: Include authors for each paper
1518 |             - papers.abstract: Include abstract for each paper
1519 |             - papers.venue: Include venue for each paper
1520 |             - papers.citations: Include citation count for each paper
1521 | 
1522 |             If omitted, returns only authorId and name.
1523 | 
1524 |         offset (int): Number of authors to skip for pagination.
1525 |             Default: 0
1526 | 
1527 |         limit (int): Maximum number of authors to return.
1528 |             Default: 100
1529 |             Maximum: 1000
1530 | 
1531 |     Returns:
1532 |         Dict: {
1533 |             "total": int,      # Total number of authors matching the query
1534 |             "offset": int,     # Current offset in the results
1535 |             "next": int,       # Next offset (if more results available)
1536 |             "data": List[Dict] # List of authors with requested fields
1537 |         }
1538 | 
1539 |     Notes:
1540 |         - Results are sorted by relevance to the query
1541 |         - Matches against author names and aliases
1542 |         - Case-insensitive matching
1543 |         - Matches name prefixes
1544 |         - Supports pagination for large result sets
1545 |         - Some fields may be null if data is not available
1546 |         - Rate limits apply (see API documentation)
1547 |     """
1548 |     if not query.strip():
1549 |         return create_error_response(
1550 |             ErrorType.VALIDATION,
1551 |             "Query string cannot be empty"
1552 |         )
1553 | 
1554 |     # Validate limit
1555 |     if limit > 1000:
1556 |         return create_error_response(
1557 |             ErrorType.VALIDATION,
1558 |             "Limit cannot exceed 1000",
1559 |             {"max_limit": 1000}
1560 |         )
1561 | 
1562 |     # Validate fields
1563 |     if fields:
1564 |         invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS
1565 |         if invalid_fields:
1566 |             return create_error_response(
1567 |                 ErrorType.VALIDATION,
1568 |                 f"Invalid fields: {', '.join(invalid_fields)}",
1569 |                 {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
1570 |             )
1571 | 
1572 |     # Build request parameters
1573 |     params = {
1574 |         "query": query,
1575 |         "offset": offset,
1576 |         "limit": limit
1577 |     }
1578 |     if fields:
1579 |         params["fields"] = ",".join(fields)
1580 | 
1581 |     # Make the API request
1582 |     return await make_request("/author/search", params)
1583 | 
1584 | # 2.2 Details about an author
1585 | @mcp.tool()
1586 | async def author_details(
1587 |     context: Context,
1588 |     author_id: str,
1589 |     fields: Optional[List[str]] = None
1590 | ) -> Dict:
1591 |     """
1592 |     Get detailed information about an author by their ID.
1593 |     This endpoint provides comprehensive metadata about an author.
1594 | 
1595 |     Args:
1596 |         author_id (str): Semantic Scholar author ID.
1597 |             This is a unique identifier assigned by Semantic Scholar.
1598 |             Example: "1741101" (Albert Einstein)
1599 | 
1600 |         fields (Optional[List[str]]): List of fields to return.
1601 |             authorId is always returned.
1602 |             Available fields:
1603 |             - name: Author's name
1604 |             - aliases: Alternative names for the author
1605 |             - url: URL to author's S2 profile
1606 |             - affiliations: List of author's affiliations
1607 |             - homepage: Author's homepage URL
1608 |             - paperCount: Total number of papers by this author
1609 |             - citationCount: Total citations received by this author
1610 |             - hIndex: Author's h-index
1611 |             - papers: List of papers by this author (returns paperId and title)
1612 |             
1613 |             Special syntax for paper fields:
1614 |             - papers.year: Include year for each paper
1615 |             - papers.authors: Include authors for each paper
1616 |             - papers.abstract: Include abstract for each paper
1617 |             - papers.venue: Include venue for each paper
1618 |             - papers.citations: Include citation count for each paper
1619 | 
1620 |             If omitted, returns only authorId and name.
1621 | 
1622 |     Returns:
1623 |         Dict: Author details with requested fields.
1624 |             Always includes authorId.
1625 |             Returns error response if author not found.
1626 | 
1627 |     Notes:
1628 |         - Provides comprehensive author metadata
1629 |         - Papers list is limited to most recent papers
1630 |         - For complete paper list, use author_papers endpoint
1631 |         - Some fields may be null if data is not available
1632 |         - Rate limits apply (see API documentation)
1633 |     """
1634 |     if not author_id.strip():
1635 |         return create_error_response(
1636 |             ErrorType.VALIDATION,
1637 |             "Author ID cannot be empty"
1638 |         )
1639 | 
1640 |     # Validate fields
1641 |     if fields:
1642 |         invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS
1643 |         if invalid_fields:
1644 |             return create_error_response(
1645 |                 ErrorType.VALIDATION,
1646 |                 f"Invalid fields: {', '.join(invalid_fields)}",
1647 |                 {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
1648 |             )
1649 | 
1650 |     # Build request parameters
1651 |     params = {}
1652 |     if fields:
1653 |         params["fields"] = ",".join(fields)
1654 | 
1655 |     # Make the API request
1656 |     result = await make_request(f"/author/{author_id}", params)
1657 |     
1658 |     if isinstance(result, Dict) and "error" in result:
1659 |         error_msg = result["error"].get("message", "")
1660 |         if "404" in error_msg:
1661 |             return create_error_response(
1662 |                 ErrorType.VALIDATION,
1663 |                 "Author not found",
1664 |                 {"author_id": author_id}
1665 |             )
1666 |         return result
1667 | 
1668 |     return result
1669 | 
1670 | # 2.3 Details about an author's papers
1671 | @mcp.tool()
1672 | async def author_papers(
1673 |     context: Context,
1674 |     author_id: str,
1675 |     fields: Optional[List[str]] = None,
1676 |     offset: int = 0,
1677 |     limit: int = 100
1678 | ) -> Dict:
1679 |     """
1680 |     Get papers written by an author with pagination support.
1681 |     This endpoint provides detailed information about an author's publications.
1682 | 
1683 |     Args:
1684 |         author_id (str): Semantic Scholar author ID.
1685 |             This is a unique identifier assigned by Semantic Scholar.
1686 |             Example: "1741101" (Albert Einstein)
1687 | 
1688 |         fields (Optional[List[str]]): List of fields to return for each paper.
1689 |             paperId is always returned.
1690 |             Available fields:
1691 |             - title: Paper title
1692 |             - abstract: Paper abstract
1693 |             - year: Publication year
1694 |             - venue: Publication venue
1695 |             - authors: List of authors
1696 |             - url: URL to paper page
1697 |             - citationCount: Number of citations received
1698 |             - influentialCitationCount: Number of influential citations
1699 |             - isOpenAccess: Whether paper is open access
1700 |             - openAccessPdf: Open access PDF URL if available
1701 |             - fieldsOfStudy: List of fields of study
1702 |             - s2FieldsOfStudy: Semantic Scholar fields
1703 |             - publicationTypes: List of publication types
1704 |             - publicationDate: Publication date in YYYY-MM-DD format
1705 |             - journal: Journal information
1706 |             - externalIds: External IDs (DOI, MAG, etc)
1707 | 
1708 |             If omitted, returns only paperId and title.
1709 | 
1710 |         offset (int): Number of papers to skip for pagination.
1711 |             Default: 0
1712 | 
1713 |         limit (int): Maximum number of papers to return.
1714 |             Default: 100
1715 |             Maximum: 1000
1716 | 
1717 |     Returns:
1718 |         Dict: {
1719 |             "offset": int,     # Current offset in the results
1720 |             "next": int,       # Next offset (if more results available)
1721 |             "data": List[Dict] # List of papers with requested fields
1722 |         }
1723 | 
1724 |     Notes:
1725 |         - Papers are sorted by publication date (newest first)
1726 |         - Supports pagination for authors with many papers
1727 |         - Some fields may be null if data is not available
1728 |         - Rate limits apply (see API documentation)
1729 |     """
1730 |     if not author_id.strip():
1731 |         return create_error_response(
1732 |             ErrorType.VALIDATION,
1733 |             "Author ID cannot be empty"
1734 |         )
1735 | 
1736 |     # Validate limit
1737 |     if limit > 1000:
1738 |         return create_error_response(
1739 |             ErrorType.VALIDATION,
1740 |             "Limit cannot exceed 1000",
1741 |             {"max_limit": 1000}
1742 |         )
1743 | 
1744 |     # Validate fields
1745 |     if fields:
1746 |         invalid_fields = set(fields) - PaperFields.VALID_FIELDS
1747 |         if invalid_fields:
1748 |             return create_error_response(
1749 |                 ErrorType.VALIDATION,
1750 |                 f"Invalid fields: {', '.join(invalid_fields)}",
1751 |                 {"valid_fields": list(PaperFields.VALID_FIELDS)}
1752 |             )
1753 | 
1754 |     # Build request parameters
1755 |     params = {
1756 |         "offset": offset,
1757 |         "limit": limit
1758 |     }
1759 |     if fields:
1760 |         params["fields"] = ",".join(fields)
1761 | 
1762 |     # Make the API request
1763 |     result = await make_request(f"/author/{author_id}/papers", params)
1764 |     
1765 |     if isinstance(result, Dict) and "error" in result:
1766 |         error_msg = result["error"].get("message", "")
1767 |         if "404" in error_msg:
1768 |             return create_error_response(
1769 |                 ErrorType.VALIDATION,
1770 |                 "Author not found",
1771 |                 {"author_id": author_id}
1772 |             )
1773 |         return result
1774 | 
1775 |     return result
1776 | 
1777 | # 2.4 Get details for multiple authors at once
1778 | @mcp.tool()
1779 | async def author_batch_details(
1780 |     context: Context,
1781 |     author_ids: List[str],
1782 |     fields: Optional[str] = None
1783 | ) -> Dict:
1784 |     """
1785 |     Get details for multiple authors in a single batch request.
1786 |     This endpoint is optimized for efficiently retrieving details about known authors.
1787 | 
1788 |     Args:
1789 |         author_ids (List[str]): List of Semantic Scholar author IDs.
1790 |             These are unique identifiers assigned by Semantic Scholar.
1791 |             Example: ["1741101", "1741102"]
1792 |             Maximum: 1000 IDs per request
1793 | 
1794 |         fields (Optional[str]): Comma-separated list of fields to return for each author.
1795 |             authorId is always returned.
1796 |             Available fields:
1797 |             - name: Author's name
1798 |             - aliases: Alternative names for the author
1799 |             - url: URL to author's S2 profile
1800 |             - affiliations: List of author's affiliations
1801 |             - homepage: Author's homepage URL
1802 |             - paperCount: Total number of papers by this author
1803 |             - citationCount: Total citations received by this author
1804 |             - hIndex: Author's h-index
1805 |             - papers: List of papers by this author (returns paperId and title)
1806 |             
1807 |             Special syntax for paper fields:
1808 |             - papers.year: Include year for each paper
1809 |             - papers.authors: Include authors for each paper
1810 |             - papers.abstract: Include abstract for each paper
1811 |             - papers.venue: Include venue for each paper
1812 |             - papers.citations: Include citation count for each paper
1813 | 
1814 |             If omitted, returns only authorId and name.
1815 | 
1816 |     Returns:
1817 |         List[Dict]: List of author details with requested fields.
1818 |             - Results maintain the same order as input author_ids
1819 |             - Invalid or not found author IDs return null in the results
1820 |             - Each author object contains the requested fields
1821 |             - authorId is always included in each author object
1822 | 
1823 |     Notes:
1824 |         - More efficient than making multiple single-author requests
1825 |         - Maximum of 1000 author IDs per request
1826 |         - Rate limits apply (see API documentation)
1827 |         - Some fields may be null if data is not available
1828 |         - Invalid author IDs return null instead of causing an error
1829 |         - Order of results matches order of input IDs for easy mapping
1830 |     """
1831 |     # Validate inputs
1832 |     if not author_ids:
1833 |         return create_error_response(
1834 |             ErrorType.VALIDATION,
1835 |             "Author IDs list cannot be empty"
1836 |         )
1837 |         
1838 |     if len(author_ids) > 1000:
1839 |         return create_error_response(
1840 |             ErrorType.VALIDATION,
1841 |             "Cannot process more than 1000 author IDs at once",
1842 |             {"max_authors": 1000, "received": len(author_ids)}
1843 |         )
1844 | 
1845 |     # Validate fields if provided
1846 |     if fields:
1847 |         field_list = fields.split(",")
1848 |         invalid_fields = set(field_list) - AuthorDetailFields.VALID_FIELDS
1849 |         if invalid_fields:
1850 |             return create_error_response(
1851 |                 ErrorType.VALIDATION,
1852 |                 f"Invalid fields: {', '.join(invalid_fields)}",
1853 |                 {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
1854 |             )
1855 | 
1856 |     # Build request parameters
1857 |     params = {}
1858 |     if fields:
1859 |         params["fields"] = fields
1860 | 
1861 |     # Make POST request with proper structure
1862 |     try:
1863 |         async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
1864 |             api_key = get_api_key()
1865 |             headers = {"x-api-key": api_key} if api_key else {}
1866 |             
1867 |             response = await client.post(
1868 |                 f"{Config.BASE_URL}/author/batch",
1869 |                 params=params,
1870 |                 json={"ids": author_ids},
1871 |                 headers=headers
1872 |             )
1873 |             response.raise_for_status()
1874 |             return response.json()
1875 |             
1876 |     except httpx.HTTPStatusError as e:
1877 |         if e.response.status_code == 429:
1878 |             return create_error_response(
1879 |                 ErrorType.RATE_LIMIT,
1880 |                 "Rate limit exceeded",
1881 |                 {"retry_after": e.response.headers.get("retry-after")}
1882 |             )
1883 |         return create_error_response(
1884 |             ErrorType.API_ERROR,
1885 |             f"HTTP error: {e.response.status_code}",
1886 |             {"response": e.response.text}
1887 |         )
1888 |     except httpx.TimeoutException:
1889 |         return create_error_response(
1890 |             ErrorType.TIMEOUT,
1891 |             f"Request timed out after {Config.TIMEOUT} seconds"
1892 |         )
1893 |     except Exception as e:
1894 |         return create_error_response(
1895 |             ErrorType.API_ERROR,
1896 |             str(e)
1897 |         )
1898 | 
1899 | 
1900 | # 3. Paper Recommendation Tools
1901 | 
1902 | # 3.1 Get recommendations based on a single paper
1903 | @mcp.tool()
1904 | async def get_paper_recommendations_single(
1905 |     context: Context,
1906 |     paper_id: str,
1907 |     fields: Optional[str] = None,
1908 |     limit: int = 100,
1909 |     from_pool: str = "recent"
1910 | ) -> Dict:
1911 |     """
1912 |     Get paper recommendations based on a single seed paper.
1913 |     This endpoint is optimized for finding papers similar to a specific paper.
1914 | 
1915 |     Args:
1916 |         paper_id (str): Paper identifier in one of the following formats:
1917 |             - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1918 |             - CorpusId:<id> (e.g., "CorpusId:215416146")
1919 |             - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1920 |             - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1921 |             - MAG:<id> (e.g., "MAG:112218234")
1922 |             - ACL:<id> (e.g., "ACL:W12-3903")
1923 |             - PMID:<id> (e.g., "PMID:19872477")
1924 |             - PMCID:<id> (e.g., "PMCID:2323736")
1925 |             - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1926 | 
1927 |         fields (Optional[str]): Comma-separated list of fields to return for each paper.
1928 |             paperId is always returned.
1929 |             Available fields:
1930 |             - title: Paper title
1931 |             - abstract: Paper abstract
1932 |             - year: Publication year
1933 |             - venue: Publication venue
1934 |             - authors: List of authors
1935 |             - url: URL to paper page
1936 |             - citationCount: Number of citations received
1937 |             - influentialCitationCount: Number of influential citations
1938 |             - isOpenAccess: Whether paper is open access
1939 |             - openAccessPdf: Open access PDF URL if available
1940 |             - fieldsOfStudy: List of fields of study
1941 |             - publicationTypes: List of publication types
1942 |             - publicationDate: Publication date in YYYY-MM-DD format
1943 |             - journal: Journal information
1944 |             - externalIds: External IDs (DOI, MAG, etc)
1945 | 
1946 |             If omitted, returns only paperId and title.
1947 | 
1948 |         limit (int): Maximum number of recommendations to return.
1949 |             Default: 100
1950 |             Maximum: 500
1951 | 
1952 |         from_pool (str): Which pool of papers to recommend from.
1953 |             Options:
1954 |             - "recent": Recent papers (default)
1955 |             - "all-cs": All computer science papers
1956 |             Default: "recent"
1957 | 
1958 |     Returns:
1959 |         Dict: {
1960 |             "recommendedPapers": List[Dict] # List of recommended papers with requested fields
1961 |         }
1962 | 
1963 |     Notes:
1964 |         - Recommendations are based on content similarity and citation patterns
1965 |         - Results are sorted by relevance to the seed paper
1966 |         - "recent" pool focuses on papers from the last few years
1967 |         - "all-cs" pool includes older computer science papers
1968 |         - Rate limits apply (see API documentation)
1969 |         - Some fields may be null if data is not available
1970 |     """
1971 |     try:
1972 |         # Apply rate limiting
1973 |         endpoint = "/recommendations"
1974 |         await rate_limiter.acquire(endpoint)
1975 | 
1976 |         # Validate limit
1977 |         if limit > 500:
1978 |             return create_error_response(
1979 |                 ErrorType.VALIDATION,
1980 |                 "Cannot request more than 500 recommendations",
1981 |                 {"max_limit": 500, "requested": limit}
1982 |             )
1983 | 
1984 |         # Validate pool
1985 |         if from_pool not in ["recent", "all-cs"]:
1986 |             return create_error_response(
1987 |                 ErrorType.VALIDATION,
1988 |                 "Invalid paper pool specified",
1989 |                 {"valid_pools": ["recent", "all-cs"]}
1990 |             )
1991 | 
1992 |         # Build request parameters
1993 |         params = {
1994 |             "limit": limit,
1995 |             "from": from_pool
1996 |         }
1997 |         if fields:
1998 |             params["fields"] = fields
1999 | 
2000 |         # Make the API request
2001 |         async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
2002 |             api_key = get_api_key()
2003 |             headers = {"x-api-key": api_key} if api_key else {}
2004 |             
2005 |             url = f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}"
2006 |             response = await client.get(url, params=params, headers=headers)
2007 |             
2008 |             # Handle specific error cases
2009 |             if response.status_code == 404:
2010 |                 return create_error_response(
2011 |                     ErrorType.VALIDATION,
2012 |                     "Paper not found",
2013 |                     {"paper_id": paper_id}
2014 |                 )
2015 |             
2016 |             response.raise_for_status()
2017 |             return response.json()
2018 | 
2019 |     except httpx.HTTPStatusError as e:
2020 |         if e.response.status_code == 429:
2021 |             return create_error_response(
2022 |                 ErrorType.RATE_LIMIT,
2023 |                 "Rate limit exceeded. Consider using an API key for higher limits.",
2024 |                 {
2025 |                     "retry_after": e.response.headers.get("retry-after"),
2026 |                     "authenticated": bool(get_api_key())
2027 |                 }
2028 |             )
2029 |         return create_error_response(
2030 |             ErrorType.API_ERROR,
2031 |             f"HTTP error {e.response.status_code}",
2032 |             {"response": e.response.text}
2033 |         )
2034 |     except httpx.TimeoutException:
2035 |         return create_error_response(
2036 |             ErrorType.TIMEOUT,
2037 |             f"Request timed out after {Config.TIMEOUT} seconds"
2038 |         )
2039 |     except Exception as e:
2040 |         logger.error(f"Unexpected error in recommendations: {str(e)}")
2041 |         return create_error_response(
2042 |             ErrorType.API_ERROR,
2043 |             "Failed to get recommendations",
2044 |             {"error": str(e)}
2045 |         )
2046 | 
2047 | # 3.2 Get recommendations based on multiple papers
2048 | @mcp.tool()
2049 | async def get_paper_recommendations_multi(
2050 |     context: Context,
2051 |     positive_paper_ids: List[str],
2052 |     negative_paper_ids: Optional[List[str]] = None,
2053 |     fields: Optional[str] = None,
2054 |     limit: int = 100
2055 | ) -> Dict:
2056 |     """
2057 |     Get paper recommendations based on multiple positive and optional negative examples.
2058 |     This endpoint is optimized for finding papers similar to a set of papers while
2059 |     avoiding papers similar to the negative examples.
2060 | 
2061 |     Args:
2062 |         positive_paper_ids (List[str]): List of paper IDs to use as positive examples.
2063 |             Papers similar to these will be recommended.
2064 |             Each ID can be in any of these formats:
2065 |             - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
2066 |             - CorpusId:<id> (e.g., "CorpusId:215416146")
2067 |             - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
2068 |             - ARXIV:<id> (e.g., "ARXIV:2106.15928")
2069 |             - MAG:<id> (e.g., "MAG:112218234")
2070 |             - ACL:<id> (e.g., "ACL:W12-3903")
2071 |             - PMID:<id> (e.g., "PMID:19872477")
2072 |             - PMCID:<id> (e.g., "PMCID:2323736")
2073 |             - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
2074 | 
2075 |         negative_paper_ids (Optional[List[str]]): List of paper IDs to use as negative examples.
2076 |             Papers similar to these will be avoided in recommendations.
2077 |             Uses same ID formats as positive_paper_ids.
2078 | 
2079 |         fields (Optional[str]): Comma-separated list of fields to return for each paper.
2080 |             paperId is always returned.
2081 |             Available fields:
2082 |             - title: Paper title
2083 |             - abstract: Paper abstract
2084 |             - year: Publication year
2085 |             - venue: Publication venue
2086 |             - authors: List of authors
2087 |             - url: URL to paper page
2088 |             - citationCount: Number of citations received
2089 |             - influentialCitationCount: Number of influential citations
2090 |             - isOpenAccess: Whether paper is open access
2091 |             - openAccessPdf: Open access PDF URL if available
2092 |             - fieldsOfStudy: List of fields of study
2093 |             - publicationTypes: List of publication types
2094 |             - publicationDate: Publication date in YYYY-MM-DD format
2095 |             - journal: Journal information
2096 |             - externalIds: External IDs (DOI, MAG, etc)
2097 | 
2098 |             If omitted, returns only paperId and title.
2099 | 
2100 |         limit (int): Maximum number of recommendations to return.
2101 |             Default: 100
2102 |             Maximum: 500
2103 | 
2104 |     Returns:
2105 |         Dict: {
2106 |             "recommendedPapers": List[Dict] # List of recommended papers with requested fields
2107 |         }
2108 | 
2109 |     Notes:
2110 |         - Recommendations balance similarity to positive examples and dissimilarity to negative examples
2111 |         - Results are sorted by relevance score
2112 |         - More positive examples can help focus recommendations
2113 |         - Negative examples help filter out unwanted topics/approaches
2114 |         - Rate limits apply (see API documentation)
2115 |         - Some fields may be null if data is not available
2116 |     """
2117 |     try:
2118 |         # Apply rate limiting
2119 |         endpoint = "/recommendations"
2120 |         await rate_limiter.acquire(endpoint)
2121 | 
2122 |         # Validate inputs
2123 |         if not positive_paper_ids:
2124 |             return create_error_response(
2125 |                 ErrorType.VALIDATION,
2126 |                 "Must provide at least one positive paper ID"
2127 |             )
2128 | 
2129 |         if limit > 500:
2130 |             return create_error_response(
2131 |                 ErrorType.VALIDATION,
2132 |                 "Cannot request more than 500 recommendations",
2133 |                 {"max_limit": 500, "requested": limit}
2134 |             )
2135 | 
2136 |         # Build request parameters
2137 |         params = {"limit": limit}
2138 |         if fields:
2139 |             params["fields"] = fields
2140 | 
2141 |         request_body = {
2142 |             "positivePaperIds": positive_paper_ids,
2143 |             "negativePaperIds": negative_paper_ids or []
2144 |         }
2145 | 
2146 |         # Make the API request
2147 |         async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
2148 |             api_key = get_api_key()
2149 |             headers = {"x-api-key": api_key} if api_key else {}
2150 |             
2151 |             url = "https://api.semanticscholar.org/recommendations/v1/papers"
2152 |             response = await client.post(url, params=params, json=request_body, headers=headers)
2153 |             
2154 |             # Handle specific error cases
2155 |             if response.status_code == 404:
2156 |                 return create_error_response(
2157 |                     ErrorType.VALIDATION,
2158 |                     "One or more input papers not found",
2159 |                     {
2160 |                         "positive_ids": positive_paper_ids,
2161 |                         "negative_ids": negative_paper_ids
2162 |                     }
2163 |                 )
2164 |             
2165 |             response.raise_for_status()
2166 |             return response.json()
2167 | 
2168 |     except httpx.HTTPStatusError as e:
2169 |         if e.response.status_code == 429:
2170 |             return create_error_response(
2171 |                 ErrorType.RATE_LIMIT,
2172 |                 "Rate limit exceeded. Consider using an API key for higher limits.",
2173 |                 {
2174 |                     "retry_after": e.response.headers.get("retry-after"),
2175 |                     "authenticated": bool(get_api_key())
2176 |                 }
2177 |             )
2178 |         return create_error_response(
2179 |             ErrorType.API_ERROR,
2180 |             f"HTTP error {e.response.status_code}",
2181 |             {"response": e.response.text}
2182 |         )
2183 |     except httpx.TimeoutException:
2184 |         return create_error_response(
2185 |             ErrorType.TIMEOUT,
2186 |             f"Request timed out after {Config.TIMEOUT} seconds"
2187 |         )
2188 |     except Exception as e:
2189 |         logger.error(f"Unexpected error in recommendations: {str(e)}")
2190 |         return create_error_response(
2191 |             ErrorType.API_ERROR,
2192 |             "Failed to get recommendations",
2193 |             {"error": str(e)}
2194 |         )
2195 | 
2196 | 
2197 | 
2198 | 
2199 | 
2200 | 
2201 | async def shutdown():
2202 |     """Gracefully shut down the server."""
2203 |     logger.info("Initiating graceful shutdown...")
2204 |     
2205 |     # Cancel all tasks
2206 |     tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
2207 |     for task in tasks:
2208 |         task.cancel()
2209 |         try:
2210 |             await task
2211 |         except asyncio.CancelledError:
2212 |             pass
2213 |     
2214 |     # Cleanup resources
2215 |     await cleanup_client()
2216 |     await mcp.cleanup()
2217 |     
2218 |     logger.info(f"Cancelled {len(tasks)} tasks")
2219 |     logger.info("Shutdown complete")
2220 | 
2221 | def init_signal_handlers(loop):
2222 |     """Initialize signal handlers for graceful shutdown."""
2223 |     for sig in (signal.SIGTERM, signal.SIGINT):
2224 |         loop.add_signal_handler(sig, lambda: asyncio.create_task(shutdown()))
2225 |     logger.info("Signal handlers initialized")
2226 | 
2227 | async def run_server():
2228 |     """Run the server with proper async context management."""
2229 |     async with mcp:
2230 |         try:
2231 |             # Initialize HTTP client
2232 |             await initialize_client()
2233 |             
2234 |             # Start the server
2235 |             logger.info("Starting Semantic Scholar Server")
2236 |             await mcp.run_async()
2237 |         except Exception as e:
2238 |             logger.error(f"Server error: {e}")
2239 |             raise
2240 |         finally:
2241 |             await shutdown()
2242 | 
2243 | if __name__ == "__main__":
2244 |     try:
2245 |         # Set up event loop with exception handler
2246 |         loop = asyncio.new_event_loop()
2247 |         asyncio.set_event_loop(loop)
2248 |         loop.set_exception_handler(handle_exception)
2249 |         
2250 |         # Initialize signal handlers
2251 |         init_signal_handlers(loop)
2252 |         
2253 |         # Run the server
2254 |         loop.run_until_complete(run_server())
2255 |     except KeyboardInterrupt:
2256 |         logger.info("Received keyboard interrupt, shutting down...")
2257 |     except Exception as e:
2258 |         logger.error(f"Fatal error: {str(e)}")
2259 |     finally:
2260 |         try:
2261 |             loop.run_until_complete(asyncio.sleep(0))  # Let pending tasks complete
2262 |             loop.close()
2263 |         except Exception as e:
2264 |             logger.error(f"Error during final cleanup: {str(e)}")
2265 |         logger.info("Server stopped")
2266 | 
```
Page 2/2FirstPrevNextLast