This is page 2 of 2. Use http://codebase.md/yuzongmin/semantic-scholar-fastmcp-mcp-server?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── REFACTORING.md ├── requirements.txt ├── run.py ├── semantic_scholar │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── authors.py │ │ ├── papers.py │ │ └── recommendations.py │ ├── config.py │ ├── mcp.py │ ├── server.py │ └── utils │ ├── __init__.py │ ├── errors.py │ └── http.py ├── semantic_scholar_server.py ├── smithery.yaml ├── test │ ├── __init__.py │ ├── test_author.py │ ├── test_paper.py │ ├── test_recommend.py │ └── test_utils.py └── TOOLS.md ``` # Files -------------------------------------------------------------------------------- /semantic_scholar_server.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | from fastmcp import FastMCP, Context 3 | import httpx 4 | import logging 5 | import os 6 | from typing import Dict, List, Optional, Tuple, Any 7 | from datetime import datetime 8 | from enum import Enum 9 | import asyncio 10 | import time 11 | import signal 12 | from dataclasses import dataclass 13 | 14 | logging.basicConfig(level=logging.INFO) 15 | logger = logging.getLogger(__name__) 16 | 17 | # Global HTTP client for connection pooling 18 | http_client = None 19 | 20 | # Rate Limiting Configuration 21 | @dataclass 22 | class RateLimitConfig: 23 | # Define rate limits (requests, seconds) 24 | SEARCH_LIMIT = (1, 1) # 1 request per 1 second 25 | BATCH_LIMIT = (1, 1) # 1 request per 1 second 26 | DEFAULT_LIMIT = (10, 1) # 10 requests per 1 second 27 | 28 | # Endpoints categorization 29 | # These endpoints have stricter rate limits due to their computational intensity 30 | # and to prevent abuse of the recommendation system 31 | RESTRICTED_ENDPOINTS = [ 32 | "/paper/batch", # Batch operations are expensive 33 | "/paper/search", # Search operations are computationally intensive 34 | "/recommendations" # Recommendation generation is resource-intensive 35 | ] 36 | 37 | # Error Types 38 | class ErrorType(Enum): 39 | RATE_LIMIT = "rate_limit" 40 | API_ERROR = "api_error" 41 | VALIDATION = "validation" 42 | TIMEOUT = "timeout" 43 | 44 | # Field Constants 45 | class PaperFields: 46 | DEFAULT = ["title", "abstract", "year", "citationCount", "authors", "url"] 47 | DETAILED = DEFAULT + ["references", "citations", "venue", "influentialCitationCount"] 48 | MINIMAL = ["title", "year", "authors"] 49 | SEARCH = ["paperId", "title", "year", "citationCount"] 50 | 51 | # Valid fields from API documentation 52 | VALID_FIELDS = { 53 | "abstract", 54 | "authors", 55 | "citationCount", 56 | "citations", 57 | "corpusId", 58 | "embedding", 59 | "externalIds", 60 | "fieldsOfStudy", 61 | "influentialCitationCount", 62 | "isOpenAccess", 63 | "openAccessPdf", 64 | "paperId", 65 | "publicationDate", 66 | "publicationTypes", 67 | "publicationVenue", 68 | "references", 69 | "s2FieldsOfStudy", 70 | "title", 71 | "tldr", 72 | "url", 73 | "venue", 74 | "year" 75 | } 76 | 77 | class AuthorDetailFields: 78 | """Common field combinations for author details""" 79 | 80 | # Basic author information 81 | BASIC = ["name", "url", "affiliations"] 82 | 83 | # Author's papers information 84 | PAPERS_BASIC = ["papers"] # Returns paperId and title 85 | PAPERS_DETAILED = [ 86 | "papers.year", 87 | "papers.authors", 88 | "papers.abstract", 89 | "papers.venue", 90 | "papers.url" 91 | ] 92 | 93 | # Complete author profile 94 | COMPLETE = BASIC + ["papers", "papers.year", "papers.authors", "papers.venue"] 95 | 96 | # Citation metrics 97 | METRICS = ["citationCount", "hIndex", "paperCount"] 98 | 99 | # Valid fields for author details 100 | VALID_FIELDS = { 101 | "authorId", 102 | "name", 103 | "url", 104 | "affiliations", 105 | "papers", 106 | "papers.year", 107 | "papers.authors", 108 | "papers.abstract", 109 | "papers.venue", 110 | "papers.url", 111 | "citationCount", 112 | "hIndex", 113 | "paperCount" 114 | } 115 | 116 | class PaperDetailFields: 117 | """Common field combinations for paper details""" 118 | 119 | # Basic paper information 120 | BASIC = ["title", "abstract", "year", "venue"] 121 | 122 | # Author information 123 | AUTHOR_BASIC = ["authors"] 124 | AUTHOR_DETAILED = ["authors.url", "authors.paperCount", "authors.citationCount"] 125 | 126 | # Citation information 127 | CITATION_BASIC = ["citations", "references"] 128 | CITATION_DETAILED = ["citations.title", "citations.abstract", "citations.year", 129 | "references.title", "references.abstract", "references.year"] 130 | 131 | # Full paper details 132 | COMPLETE = BASIC + AUTHOR_BASIC + CITATION_BASIC + ["url", "fieldsOfStudy", 133 | "publicationVenue", "publicationTypes"] 134 | 135 | class CitationReferenceFields: 136 | """Common field combinations for citation and reference queries""" 137 | 138 | # Basic information 139 | BASIC = ["title"] 140 | 141 | # Citation/Reference context 142 | CONTEXT = ["contexts", "intents", "isInfluential"] 143 | 144 | # Paper details 145 | DETAILED = ["title", "abstract", "authors", "year", "venue"] 146 | 147 | # Full information 148 | COMPLETE = CONTEXT + DETAILED 149 | 150 | # Valid fields for citation/reference queries 151 | VALID_FIELDS = { 152 | "contexts", 153 | "intents", 154 | "isInfluential", 155 | "title", 156 | "abstract", 157 | "authors", 158 | "year", 159 | "venue", 160 | "paperId", 161 | "url", 162 | "citationCount", 163 | "influentialCitationCount" 164 | } 165 | 166 | # Configuration 167 | class Config: 168 | # API Configuration 169 | API_VERSION = "v1" 170 | BASE_URL = f"https://api.semanticscholar.org/graph/{API_VERSION}" 171 | TIMEOUT = 30 # seconds 172 | 173 | # Request Limits 174 | MAX_BATCH_SIZE = 100 175 | MAX_RESULTS_PER_PAGE = 100 176 | DEFAULT_PAGE_SIZE = 10 177 | MAX_BATCHES = 5 178 | 179 | # Fields Configuration 180 | DEFAULT_FIELDS = PaperFields.DEFAULT 181 | 182 | # Feature Flags 183 | ENABLE_CACHING = False 184 | DEBUG_MODE = False 185 | 186 | # Search Configuration 187 | SEARCH_TYPES = { 188 | "comprehensive": { 189 | "description": "Balanced search considering relevance and impact", 190 | "min_citations": None, 191 | "ranking_strategy": "balanced" 192 | }, 193 | "influential": { 194 | "description": "Focus on highly-cited and influential papers", 195 | "min_citations": 50, 196 | "ranking_strategy": "citations" 197 | }, 198 | "latest": { 199 | "description": "Focus on recent papers with impact", 200 | "min_citations": None, 201 | "ranking_strategy": "recency" 202 | } 203 | } 204 | 205 | # Rate Limiter 206 | class RateLimiter: 207 | def __init__(self): 208 | self._last_call_time = {} 209 | self._locks = {} 210 | 211 | def _get_rate_limit(self, endpoint: str) -> Tuple[int, int]: 212 | if any(restricted in endpoint for restricted in RateLimitConfig.RESTRICTED_ENDPOINTS): 213 | return RateLimitConfig.SEARCH_LIMIT 214 | return RateLimitConfig.DEFAULT_LIMIT 215 | 216 | async def acquire(self, endpoint: str): 217 | if endpoint not in self._locks: 218 | self._locks[endpoint] = asyncio.Lock() 219 | self._last_call_time[endpoint] = 0 220 | 221 | async with self._locks[endpoint]: 222 | rate_limit = self._get_rate_limit(endpoint) 223 | current_time = time.time() 224 | time_since_last_call = current_time - self._last_call_time[endpoint] 225 | 226 | if time_since_last_call < rate_limit[1]: 227 | delay = rate_limit[1] - time_since_last_call 228 | await asyncio.sleep(delay) 229 | 230 | self._last_call_time[endpoint] = time.time() 231 | 232 | def create_error_response( 233 | error_type: ErrorType, 234 | message: str, 235 | details: Optional[Dict] = None 236 | ) -> Dict: 237 | return { 238 | "error": { 239 | "type": error_type.value, 240 | "message": message, 241 | "details": details or {} 242 | } 243 | } 244 | 245 | mcp = FastMCP("Semantic Scholar Server") 246 | rate_limiter = RateLimiter() 247 | 248 | 249 | # Basic functions 250 | 251 | def get_api_key() -> Optional[str]: 252 | """ 253 | Get the Semantic Scholar API key from environment variables. 254 | Returns None if no API key is set, enabling unauthenticated access. 255 | """ 256 | api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY") 257 | if not api_key: 258 | logger.warning("No SEMANTIC_SCHOLAR_API_KEY set. Using unauthenticated access with lower rate limits.") 259 | return api_key 260 | 261 | async def handle_exception(loop, context): 262 | """Global exception handler for the event loop.""" 263 | msg = context.get("exception", context["message"]) 264 | logger.error(f"Caught exception: {msg}") 265 | asyncio.create_task(shutdown()) 266 | 267 | async def initialize_client(): 268 | """Initialize the global HTTP client.""" 269 | global http_client 270 | if http_client is None: 271 | http_client = httpx.AsyncClient( 272 | timeout=Config.TIMEOUT, 273 | limits=httpx.Limits(max_keepalive_connections=10) 274 | ) 275 | return http_client 276 | 277 | async def cleanup_client(): 278 | """Cleanup the global HTTP client.""" 279 | global http_client 280 | if http_client is not None: 281 | await http_client.aclose() 282 | http_client = None 283 | 284 | async def make_request(endpoint: str, params: Dict = None) -> Dict: 285 | """Make a rate-limited request to the Semantic Scholar API.""" 286 | try: 287 | # Apply rate limiting 288 | await rate_limiter.acquire(endpoint) 289 | 290 | # Get API key if available 291 | api_key = get_api_key() 292 | headers = {"x-api-key": api_key} if api_key else {} 293 | url = f"{Config.BASE_URL}{endpoint}" 294 | 295 | # Use global client 296 | client = await initialize_client() 297 | response = await client.get(url, params=params, headers=headers) 298 | response.raise_for_status() 299 | return response.json() 300 | except httpx.HTTPStatusError as e: 301 | logger.error(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") 302 | if e.response.status_code == 429: 303 | return create_error_response( 304 | ErrorType.RATE_LIMIT, 305 | "Rate limit exceeded. Consider using an API key for higher limits.", 306 | { 307 | "retry_after": e.response.headers.get("retry-after"), 308 | "authenticated": bool(get_api_key()) 309 | } 310 | ) 311 | return create_error_response( 312 | ErrorType.API_ERROR, 313 | f"HTTP error: {e.response.status_code}", 314 | {"response": e.response.text} 315 | ) 316 | except httpx.TimeoutException as e: 317 | logger.error(f"Request timeout for {endpoint}: {str(e)}") 318 | return create_error_response( 319 | ErrorType.TIMEOUT, 320 | f"Request timed out after {Config.TIMEOUT} seconds" 321 | ) 322 | except Exception as e: 323 | logger.error(f"Unexpected error for {endpoint}: {str(e)}") 324 | return create_error_response( 325 | ErrorType.API_ERROR, 326 | str(e) 327 | ) 328 | 329 | 330 | 331 | 332 | # 1. Paper Data Tools 333 | 334 | # 1.1 Paper relevance search 335 | @mcp.tool() 336 | async def paper_relevance_search( 337 | context: Context, 338 | query: str, 339 | fields: Optional[List[str]] = None, 340 | publication_types: Optional[List[str]] = None, 341 | open_access_pdf: bool = False, 342 | min_citation_count: Optional[int] = None, 343 | year: Optional[str] = None, # supports formats like "2019", "2016-2020", "2010-", "-2015" 344 | venue: Optional[List[str]] = None, 345 | fields_of_study: Optional[List[str]] = None, 346 | offset: int = 0, 347 | limit: int = Config.DEFAULT_PAGE_SIZE 348 | ) -> Dict: 349 | """ 350 | Search for papers on Semantic Scholar using relevance-based ranking. 351 | This endpoint is optimized for finding the most relevant papers matching a text query. 352 | Results are sorted by relevance score. 353 | 354 | Args: 355 | query (str): A text query to search for. The query will be matched against paper titles, 356 | abstracts, venue names, and author names. All terms in the query must be present 357 | in the paper for it to be returned. The query is case-insensitive and matches word 358 | prefixes (e.g. "quantum" matches "quantum" and "quantumly"). 359 | 360 | fields (Optional[List[str]]): List of fields to return for each paper. 361 | paperId and title are always returned. 362 | Available fields: 363 | - abstract: The paper's abstract 364 | - authors: List of authors with name and authorId 365 | - citationCount: Total number of citations 366 | - citations: List of papers citing this paper 367 | - corpusId: Internal ID for the paper 368 | - embedding: Vector embedding of the paper 369 | - externalIds: External IDs (DOI, MAG, etc) 370 | - fieldsOfStudy: List of fields of study 371 | - influentialCitationCount: Number of influential citations 372 | - isOpenAccess: Whether paper is open access 373 | - openAccessPdf: Open access PDF URL if available 374 | - paperId: Semantic Scholar paper ID 375 | - publicationDate: Publication date in YYYY-MM-DD format 376 | - publicationTypes: List of publication types 377 | - publicationVenue: Venue information 378 | - references: List of papers cited by this paper 379 | - s2FieldsOfStudy: Semantic Scholar fields 380 | - title: Paper title 381 | - tldr: AI-generated TLDR summary 382 | - url: URL to Semantic Scholar paper page 383 | - venue: Publication venue name 384 | - year: Publication year 385 | 386 | publication_types (Optional[List[str]]): Filter by publication types. 387 | Available types: 388 | - Review 389 | - JournalArticle 390 | - CaseReport 391 | - ClinicalTrial 392 | - Conference 393 | - Dataset 394 | - Editorial 395 | - LettersAndComments 396 | - MetaAnalysis 397 | - News 398 | - Study 399 | - Book 400 | - BookSection 401 | 402 | open_access_pdf (bool): If True, only include papers with a public PDF. 403 | Default: False 404 | 405 | min_citation_count (Optional[int]): Minimum number of citations required. 406 | Papers with fewer citations will be filtered out. 407 | 408 | year (Optional[str]): Filter by publication year. Supports several formats: 409 | - Single year: "2019" 410 | - Year range: "2016-2020" 411 | - Since year: "2010-" 412 | - Until year: "-2015" 413 | 414 | venue (Optional[List[str]]): Filter by publication venues. 415 | Accepts full venue names or ISO4 abbreviations. 416 | Examples: ["Nature", "Science", "N. Engl. J. Med."] 417 | 418 | fields_of_study (Optional[List[str]]): Filter by fields of study. 419 | Available fields: 420 | - Computer Science 421 | - Medicine 422 | - Chemistry 423 | - Biology 424 | - Materials Science 425 | - Physics 426 | - Geology 427 | - Psychology 428 | - Art 429 | - History 430 | - Geography 431 | - Sociology 432 | - Business 433 | - Political Science 434 | - Economics 435 | - Philosophy 436 | - Mathematics 437 | - Engineering 438 | - Environmental Science 439 | - Agricultural and Food Sciences 440 | - Education 441 | - Law 442 | - Linguistics 443 | 444 | offset (int): Number of results to skip for pagination. 445 | Default: 0 446 | 447 | limit (int): Maximum number of results to return. 448 | Default: 10 449 | Maximum: 100 450 | 451 | Returns: 452 | Dict: { 453 | "total": int, # Total number of papers matching the query 454 | "offset": int, # Current offset in the results 455 | "next": int, # Offset for the next page of results (if available) 456 | "data": List[Dict] # List of papers with requested fields 457 | } 458 | 459 | Notes: 460 | - Results are sorted by relevance to the query 461 | - All query terms must be present in the paper (AND operation) 462 | - Query matches are case-insensitive 463 | - Query matches word prefixes (e.g., "quantum" matches "quantum" and "quantumly") 464 | - Maximum of 100 results per request 465 | - Use offset parameter for pagination 466 | - Rate limits apply (see API documentation) 467 | """ 468 | if not query.strip(): 469 | return create_error_response( 470 | ErrorType.VALIDATION, 471 | "Query string cannot be empty" 472 | ) 473 | 474 | # Validate and prepare fields 475 | if fields is None: 476 | fields = PaperFields.DEFAULT 477 | else: 478 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS 479 | if invalid_fields: 480 | return create_error_response( 481 | ErrorType.VALIDATION, 482 | f"Invalid fields: {', '.join(invalid_fields)}", 483 | {"valid_fields": list(PaperFields.VALID_FIELDS)} 484 | ) 485 | 486 | # Validate and prepare parameters 487 | limit = min(limit, Config.MAX_RESULTS_PER_PAGE) 488 | params = { 489 | "query": query, 490 | "offset": offset, 491 | "limit": limit, 492 | "fields": ",".join(fields) 493 | } 494 | 495 | # Add optional filters 496 | if publication_types: 497 | params["publicationTypes"] = ",".join(publication_types) 498 | if open_access_pdf: 499 | params["openAccessPdf"] = "true" 500 | if min_citation_count is not None: 501 | params["minCitationCount"] = min_citation_count 502 | if year: 503 | params["year"] = year 504 | if venue: 505 | params["venue"] = ",".join(venue) 506 | if fields_of_study: 507 | params["fieldsOfStudy"] = ",".join(fields_of_study) 508 | 509 | return await make_request("/paper/search", params) 510 | 511 | # 1.2 Paper bulk search 512 | @mcp.tool() 513 | async def paper_bulk_search( 514 | context: Context, 515 | query: Optional[str] = None, 516 | token: Optional[str] = None, 517 | fields: Optional[List[str]] = None, 518 | sort: Optional[str] = None, 519 | publication_types: Optional[List[str]] = None, 520 | open_access_pdf: bool = False, 521 | min_citation_count: Optional[int] = None, 522 | publication_date_or_year: Optional[str] = None, 523 | year: Optional[str] = None, 524 | venue: Optional[List[str]] = None, 525 | fields_of_study: Optional[List[str]] = None 526 | ) -> Dict: 527 | """ 528 | Bulk search for papers with advanced filtering and sorting options. 529 | Intended for retrieving large sets of papers efficiently. 530 | 531 | Args: 532 | query (Optional[str]): Text query to match against paper title and abstract. 533 | Supports boolean logic: 534 | - '+' for AND operation 535 | - '|' for OR operation 536 | - '-' to negate a term 537 | - '"' for phrase matching 538 | - '*' for prefix matching 539 | - '()' for precedence 540 | - '~N' for edit distance (default 2) 541 | Examples: 542 | - 'fish ladder' (contains both terms) 543 | - 'fish -ladder' (has fish, no ladder) 544 | - 'fish | ladder' (either term) 545 | - '"fish ladder"' (exact phrase) 546 | - '(fish ladder) | outflow' 547 | - 'fish~' (fuzzy match) 548 | - '"fish ladder"~3' (terms within 3 words) 549 | 550 | token (Optional[str]): Continuation token for pagination 551 | 552 | fields (Optional[List[str]]): Fields to return for each paper 553 | paperId is always returned 554 | Default: paperId and title only 555 | 556 | sort (Optional[str]): Sort order in format 'field:order' 557 | Fields: paperId, publicationDate, citationCount 558 | Order: asc (default), desc 559 | Default: 'paperId:asc' 560 | Examples: 561 | - 'publicationDate:asc' (oldest first) 562 | - 'citationCount:desc' (most cited first) 563 | 564 | publication_types (Optional[List[str]]): Filter by publication types: 565 | Review, JournalArticle, CaseReport, ClinicalTrial, 566 | Conference, Dataset, Editorial, LettersAndComments, 567 | MetaAnalysis, News, Study, Book, BookSection 568 | 569 | open_access_pdf (bool): Only include papers with public PDF 570 | 571 | min_citation_count (Optional[int]): Minimum citation threshold 572 | 573 | publication_date_or_year (Optional[str]): Date/year range filter 574 | Format: <startDate>:<endDate> in YYYY-MM-DD 575 | Supports partial dates and open ranges 576 | Examples: 577 | - '2019-03-05' (specific date) 578 | - '2019-03' (month) 579 | - '2019' (year) 580 | - '2016-03-05:2020-06-06' (range) 581 | - '1981-08-25:' (since date) 582 | - ':2015-01' (until date) 583 | 584 | year (Optional[str]): Publication year filter 585 | Examples: '2019', '2016-2020', '2010-', '-2015' 586 | 587 | venue (Optional[List[str]]): Filter by publication venues 588 | Accepts full names or ISO4 abbreviations 589 | Examples: ['Nature', 'N. Engl. J. Med.'] 590 | 591 | fields_of_study (Optional[List[str]]): Filter by fields of study 592 | Available fields include: Computer Science, Medicine, 593 | Physics, Mathematics, etc. 594 | 595 | Returns: 596 | Dict: { 597 | 'total': int, # Total matching papers 598 | 'token': str, # Continuation token for next batch 599 | 'data': List[Dict] # Papers with requested fields 600 | } 601 | 602 | Notes: 603 | - Returns up to 1,000 papers per call 604 | - Can fetch up to 10M papers total 605 | - Nested data (citations, references) not available 606 | - For larger datasets, use the Datasets API 607 | """ 608 | # Build request parameters 609 | params = {} 610 | 611 | # Add query if provided 612 | if query: 613 | params["query"] = query.strip() 614 | 615 | # Add continuation token if provided 616 | if token: 617 | params["token"] = token 618 | 619 | # Add fields if provided 620 | if fields: 621 | # Validate fields 622 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS 623 | if invalid_fields: 624 | return create_error_response( 625 | ErrorType.VALIDATION, 626 | f"Invalid fields: {', '.join(invalid_fields)}", 627 | {"valid_fields": list(PaperFields.VALID_FIELDS)} 628 | ) 629 | params["fields"] = ",".join(fields) 630 | 631 | # Add sort if provided 632 | if sort: 633 | # Validate sort format 634 | valid_sort_fields = ["paperId", "publicationDate", "citationCount"] 635 | valid_sort_orders = ["asc", "desc"] 636 | 637 | try: 638 | field, order = sort.split(":") 639 | if field not in valid_sort_fields: 640 | return create_error_response( 641 | ErrorType.VALIDATION, 642 | f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields)}" 643 | ) 644 | if order not in valid_sort_orders: 645 | return create_error_response( 646 | ErrorType.VALIDATION, 647 | f"Invalid sort order. Must be one of: {', '.join(valid_sort_orders)}" 648 | ) 649 | params["sort"] = sort 650 | except ValueError: 651 | return create_error_response( 652 | ErrorType.VALIDATION, 653 | "Sort must be in format 'field:order'" 654 | ) 655 | 656 | # Add publication types if provided 657 | if publication_types: 658 | valid_types = { 659 | "Review", "JournalArticle", "CaseReport", "ClinicalTrial", 660 | "Conference", "Dataset", "Editorial", "LettersAndComments", 661 | "MetaAnalysis", "News", "Study", "Book", "BookSection" 662 | } 663 | invalid_types = set(publication_types) - valid_types 664 | if invalid_types: 665 | return create_error_response( 666 | ErrorType.VALIDATION, 667 | f"Invalid publication types: {', '.join(invalid_types)}", 668 | {"valid_types": list(valid_types)} 669 | ) 670 | params["publicationTypes"] = ",".join(publication_types) 671 | 672 | # Add open access PDF filter 673 | if open_access_pdf: 674 | params["openAccessPdf"] = "true" 675 | 676 | # Add minimum citation count if provided 677 | if min_citation_count is not None: 678 | if min_citation_count < 0: 679 | return create_error_response( 680 | ErrorType.VALIDATION, 681 | "Minimum citation count cannot be negative" 682 | ) 683 | params["minCitationCount"] = str(min_citation_count) 684 | 685 | # Add publication date/year if provided 686 | if publication_date_or_year: 687 | params["publicationDateOrYear"] = publication_date_or_year 688 | elif year: 689 | params["year"] = year 690 | 691 | # Add venue filter if provided 692 | if venue: 693 | params["venue"] = ",".join(venue) 694 | 695 | # Add fields of study filter if provided 696 | if fields_of_study: 697 | valid_fields = { 698 | "Computer Science", "Medicine", "Chemistry", "Biology", 699 | "Materials Science", "Physics", "Geology", "Psychology", 700 | "Art", "History", "Geography", "Sociology", "Business", 701 | "Political Science", "Economics", "Philosophy", "Mathematics", 702 | "Engineering", "Environmental Science", "Agricultural and Food Sciences", 703 | "Education", "Law", "Linguistics" 704 | } 705 | invalid_fields = set(fields_of_study) - valid_fields 706 | if invalid_fields: 707 | return create_error_response( 708 | ErrorType.VALIDATION, 709 | f"Invalid fields of study: {', '.join(invalid_fields)}", 710 | {"valid_fields": list(valid_fields)} 711 | ) 712 | params["fieldsOfStudy"] = ",".join(fields_of_study) 713 | 714 | # Make the API request 715 | result = await make_request("/paper/search/bulk", params) 716 | 717 | # Handle potential errors 718 | if isinstance(result, Dict) and "error" in result: 719 | return result 720 | 721 | return result 722 | 723 | # 1.3 Paper title search 724 | @mcp.tool() 725 | async def paper_title_search( 726 | context: Context, 727 | query: str, 728 | fields: Optional[List[str]] = None, 729 | publication_types: Optional[List[str]] = None, 730 | open_access_pdf: bool = False, 731 | min_citation_count: Optional[int] = None, 732 | year: Optional[str] = None, 733 | venue: Optional[List[str]] = None, 734 | fields_of_study: Optional[List[str]] = None 735 | ) -> Dict: 736 | """ 737 | Find a single paper by title match. This endpoint is optimized for finding a specific paper 738 | by its title and returns the best matching paper based on title similarity. 739 | 740 | Args: 741 | query (str): The title text to search for. The query will be matched against paper titles 742 | to find the closest match. The match is case-insensitive and ignores punctuation. 743 | 744 | fields (Optional[List[str]]): List of fields to return for the paper. 745 | paperId and title are always returned. 746 | Available fields: 747 | - abstract: The paper's abstract 748 | - authors: List of authors with name and authorId 749 | - citationCount: Total number of citations 750 | - citations: List of papers citing this paper 751 | - corpusId: Internal ID for the paper 752 | - embedding: Vector embedding of the paper 753 | - externalIds: External IDs (DOI, MAG, etc) 754 | - fieldsOfStudy: List of fields of study 755 | - influentialCitationCount: Number of influential citations 756 | - isOpenAccess: Whether paper is open access 757 | - openAccessPdf: Open access PDF URL if available 758 | - paperId: Semantic Scholar paper ID 759 | - publicationDate: Publication date in YYYY-MM-DD format 760 | - publicationTypes: List of publication types 761 | - publicationVenue: Venue information 762 | - references: List of papers cited by this paper 763 | - s2FieldsOfStudy: Semantic Scholar fields 764 | - title: Paper title 765 | - tldr: AI-generated TLDR summary 766 | - url: URL to Semantic Scholar paper page 767 | - venue: Publication venue name 768 | - year: Publication year 769 | 770 | publication_types (Optional[List[str]]): Filter by publication types. 771 | Available types: 772 | - Review 773 | - JournalArticle 774 | - CaseReport 775 | - ClinicalTrial 776 | - Conference 777 | - Dataset 778 | - Editorial 779 | - LettersAndComments 780 | - MetaAnalysis 781 | - News 782 | - Study 783 | - Book 784 | - BookSection 785 | 786 | open_access_pdf (bool): If True, only include papers with a public PDF. 787 | Default: False 788 | 789 | min_citation_count (Optional[int]): Minimum number of citations required. 790 | Papers with fewer citations will be filtered out. 791 | 792 | year (Optional[str]): Filter by publication year. Supports several formats: 793 | - Single year: "2019" 794 | - Year range: "2016-2020" 795 | - Since year: "2010-" 796 | - Until year: "-2015" 797 | 798 | venue (Optional[List[str]]): Filter by publication venues. 799 | Accepts full venue names or ISO4 abbreviations. 800 | Examples: ["Nature", "Science", "N. Engl. J. Med."] 801 | 802 | fields_of_study (Optional[List[str]]): Filter by fields of study. 803 | Available fields: 804 | - Computer Science 805 | - Medicine 806 | - Chemistry 807 | - Biology 808 | - Materials Science 809 | - Physics 810 | - Geology 811 | - Psychology 812 | - Art 813 | - History 814 | - Geography 815 | - Sociology 816 | - Business 817 | - Political Science 818 | - Economics 819 | - Philosophy 820 | - Mathematics 821 | - Engineering 822 | - Environmental Science 823 | - Agricultural and Food Sciences 824 | - Education 825 | - Law 826 | - Linguistics 827 | 828 | Returns: 829 | Dict: { 830 | "paperId": str, # Semantic Scholar Paper ID 831 | "title": str, # Paper title 832 | "matchScore": float, # Similarity score between query and matched title 833 | ... # Additional requested fields 834 | } 835 | 836 | Returns error response if no matching paper is found. 837 | 838 | Notes: 839 | - Returns the single best matching paper based on title similarity 840 | - Match score indicates how well the title matches the query 841 | - Case-insensitive matching 842 | - Ignores punctuation in matching 843 | - Filters are applied after finding the best title match 844 | """ 845 | if not query.strip(): 846 | return create_error_response( 847 | ErrorType.VALIDATION, 848 | "Query string cannot be empty" 849 | ) 850 | 851 | # Validate and prepare fields 852 | if fields is None: 853 | fields = PaperFields.DEFAULT 854 | else: 855 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS 856 | if invalid_fields: 857 | return create_error_response( 858 | ErrorType.VALIDATION, 859 | f"Invalid fields: {', '.join(invalid_fields)}", 860 | {"valid_fields": list(PaperFields.VALID_FIELDS)} 861 | ) 862 | 863 | # Build base parameters 864 | params = {"query": query} 865 | 866 | # Add optional parameters 867 | if fields: 868 | params["fields"] = ",".join(fields) 869 | if publication_types: 870 | params["publicationTypes"] = ",".join(publication_types) 871 | if open_access_pdf: 872 | params["openAccessPdf"] = "true" 873 | if min_citation_count is not None: 874 | params["minCitationCount"] = str(min_citation_count) 875 | if year: 876 | params["year"] = year 877 | if venue: 878 | params["venue"] = ",".join(venue) 879 | if fields_of_study: 880 | params["fieldsOfStudy"] = ",".join(fields_of_study) 881 | 882 | result = await make_request("/paper/search/match", params) 883 | 884 | # Handle specific error cases 885 | if isinstance(result, Dict): 886 | if "error" in result: 887 | error_msg = result["error"].get("message", "") 888 | if "404" in error_msg: 889 | return create_error_response( 890 | ErrorType.VALIDATION, 891 | "No matching paper found", 892 | {"original_query": query} 893 | ) 894 | return result 895 | 896 | return result 897 | 898 | # 1.4 Details about a paper 899 | @mcp.tool() 900 | async def paper_details( 901 | context: Context, 902 | paper_id: str, 903 | fields: Optional[List[str]] = None 904 | ) -> Dict: 905 | """ 906 | Get details about a paper using various types of identifiers. 907 | This endpoint provides comprehensive metadata about a paper. 908 | 909 | Args: 910 | paper_id (str): Paper identifier in one of the following formats: 911 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 912 | - CorpusId:<id> (e.g., "CorpusId:215416146") 913 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 914 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 915 | - MAG:<id> (e.g., "MAG:112218234") 916 | - ACL:<id> (e.g., "ACL:W12-3903") 917 | - PMID:<id> (e.g., "PMID:19872477") 918 | - PMCID:<id> (e.g., "PMCID:2323736") 919 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 920 | Supported URLs from: semanticscholar.org, arxiv.org, aclweb.org, 921 | acm.org, biorxiv.org 922 | 923 | fields (Optional[List[str]]): List of fields to return. 924 | paperId is always returned. 925 | Available fields: 926 | - abstract: The paper's abstract 927 | - authors: List of authors with name and authorId 928 | - citationCount: Total number of citations 929 | - citations: List of papers citing this paper 930 | - corpusId: Internal ID for the paper 931 | - embedding: Vector embedding of the paper 932 | - externalIds: External IDs (DOI, MAG, etc) 933 | - fieldsOfStudy: List of fields of study 934 | - influentialCitationCount: Number of influential citations 935 | - isOpenAccess: Whether paper is open access 936 | - openAccessPdf: Open access PDF URL if available 937 | - paperId: Semantic Scholar paper ID 938 | - publicationDate: Publication date in YYYY-MM-DD format 939 | - publicationTypes: List of publication types 940 | - publicationVenue: Venue information 941 | - references: List of papers cited by this paper 942 | - s2FieldsOfStudy: Semantic Scholar fields 943 | - title: Paper title 944 | - tldr: AI-generated TLDR summary 945 | - url: URL to Semantic Scholar paper page 946 | - venue: Publication venue name 947 | - year: Publication year 948 | 949 | Special syntax for nested fields: 950 | - For citations/references: citations.title, references.abstract, etc. 951 | - For authors: authors.name, authors.affiliations, etc. 952 | - For embeddings: embedding.specter_v2 for v2 embeddings 953 | 954 | If omitted, returns only paperId and title. 955 | 956 | Returns: 957 | Dict: Paper details with requested fields. 958 | Always includes paperId. 959 | Returns error response if paper not found. 960 | 961 | Notes: 962 | - Supports multiple identifier types for flexibility 963 | - Nested fields available for detailed citation/reference/author data 964 | - Rate limits apply (see API documentation) 965 | - Some fields may be null if data is not available 966 | """ 967 | if not paper_id.strip(): 968 | return create_error_response( 969 | ErrorType.VALIDATION, 970 | "Paper ID cannot be empty" 971 | ) 972 | 973 | # Build request parameters 974 | params = {} 975 | if fields: 976 | params["fields"] = ",".join(fields) 977 | 978 | # Make the API request 979 | result = await make_request(f"/paper/{paper_id}", params) 980 | 981 | # Handle potential errors 982 | if isinstance(result, Dict) and "error" in result: 983 | error_msg = result["error"].get("message", "") 984 | if "404" in error_msg: 985 | return create_error_response( 986 | ErrorType.VALIDATION, 987 | "Paper not found", 988 | {"paper_id": paper_id} 989 | ) 990 | return result 991 | 992 | return result 993 | 994 | # 1.5 Get details for multiple papers at once 995 | @mcp.tool() 996 | async def paper_batch_details( 997 | context: Context, 998 | paper_ids: List[str], 999 | fields: Optional[str] = None 1000 | ) -> Dict: 1001 | """ 1002 | Get details for multiple papers in a single batch request. 1003 | This endpoint is optimized for efficiently retrieving details about known papers. 1004 | 1005 | Args: 1006 | paper_ids (List[str]): List of paper identifiers. Each ID can be in any of these formats: 1007 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 1008 | - CorpusId:<id> (e.g., "CorpusId:215416146") 1009 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 1010 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 1011 | - MAG:<id> (e.g., "MAG:112218234") 1012 | - ACL:<id> (e.g., "ACL:W12-3903") 1013 | - PMID:<id> (e.g., "PMID:19872477") 1014 | - PMCID:<id> (e.g., "PMCID:2323736") 1015 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 1016 | Supported URLs from: semanticscholar.org, arxiv.org, aclweb.org, 1017 | acm.org, biorxiv.org 1018 | Maximum: 500 IDs per request 1019 | 1020 | fields (Optional[str]): Comma-separated list of fields to return for each paper. 1021 | paperId is always returned. 1022 | Available fields: 1023 | - abstract: The paper's abstract 1024 | - authors: List of authors with name and authorId 1025 | - citationCount: Total number of citations 1026 | - citations: List of papers citing this paper 1027 | - corpusId: Internal ID for the paper 1028 | - embedding: Vector embedding of the paper 1029 | - externalIds: External IDs (DOI, MAG, etc) 1030 | - fieldsOfStudy: List of fields of study 1031 | - influentialCitationCount: Number of influential citations 1032 | - isOpenAccess: Whether paper is open access 1033 | - openAccessPdf: Open access PDF URL if available 1034 | - paperId: Semantic Scholar paper ID 1035 | - publicationDate: Publication date in YYYY-MM-DD format 1036 | - publicationTypes: List of publication types 1037 | - publicationVenue: Venue information 1038 | - references: List of papers cited by this paper 1039 | - s2FieldsOfStudy: Semantic Scholar fields 1040 | - title: Paper title 1041 | - tldr: AI-generated TLDR summary 1042 | - url: URL to Semantic Scholar paper page 1043 | - venue: Publication venue name 1044 | - year: Publication year 1045 | 1046 | Special syntax for nested fields: 1047 | - For citations/references: citations.title, references.abstract, etc. 1048 | - For authors: authors.name, authors.affiliations, etc. 1049 | - For embeddings: embedding.specter_v2 for v2 embeddings 1050 | 1051 | If omitted, returns only paperId and title. 1052 | 1053 | Returns: 1054 | List[Dict]: List of paper details with requested fields. 1055 | - Results maintain the same order as input paper_ids 1056 | - Invalid or not found paper IDs return null in the results 1057 | - Each paper object contains the requested fields 1058 | - paperId is always included in each paper object 1059 | 1060 | Notes: 1061 | - More efficient than making multiple single-paper requests 1062 | - Maximum of 500 paper IDs per request 1063 | - Rate limits apply (see API documentation) 1064 | - Some fields may be null if data is not available 1065 | - Invalid paper IDs return null instead of causing an error 1066 | - Order of results matches order of input IDs for easy mapping 1067 | """ 1068 | # Validate inputs 1069 | if not paper_ids: 1070 | return create_error_response( 1071 | ErrorType.VALIDATION, 1072 | "Paper IDs list cannot be empty" 1073 | ) 1074 | 1075 | if len(paper_ids) > 500: 1076 | return create_error_response( 1077 | ErrorType.VALIDATION, 1078 | "Cannot process more than 500 paper IDs at once", 1079 | {"max_papers": 500, "received": len(paper_ids)} 1080 | ) 1081 | 1082 | # Validate fields if provided 1083 | if fields: 1084 | field_list = fields.split(",") 1085 | invalid_fields = set(field_list) - PaperFields.VALID_FIELDS 1086 | if invalid_fields: 1087 | return create_error_response( 1088 | ErrorType.VALIDATION, 1089 | f"Invalid fields: {', '.join(invalid_fields)}", 1090 | {"valid_fields": list(PaperFields.VALID_FIELDS)} 1091 | ) 1092 | 1093 | # Build request parameters 1094 | params = {} 1095 | if fields: 1096 | params["fields"] = fields 1097 | 1098 | # Make POST request with proper structure 1099 | try: 1100 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: 1101 | api_key = get_api_key() 1102 | headers = {"x-api-key": api_key} if api_key else {} 1103 | 1104 | response = await client.post( 1105 | f"{Config.BASE_URL}/paper/batch", 1106 | params=params, 1107 | json={"ids": paper_ids}, 1108 | headers=headers 1109 | ) 1110 | response.raise_for_status() 1111 | return response.json() 1112 | 1113 | except httpx.HTTPStatusError as e: 1114 | if e.response.status_code == 429: 1115 | return create_error_response( 1116 | ErrorType.RATE_LIMIT, 1117 | "Rate limit exceeded", 1118 | {"retry_after": e.response.headers.get("retry-after")} 1119 | ) 1120 | return create_error_response( 1121 | ErrorType.API_ERROR, 1122 | f"HTTP error: {e.response.status_code}", 1123 | {"response": e.response.text} 1124 | ) 1125 | except httpx.TimeoutException: 1126 | return create_error_response( 1127 | ErrorType.TIMEOUT, 1128 | f"Request timed out after {Config.TIMEOUT} seconds" 1129 | ) 1130 | except Exception as e: 1131 | return create_error_response( 1132 | ErrorType.API_ERROR, 1133 | str(e) 1134 | ) 1135 | 1136 | # 1.6 Details about a paper's authors 1137 | @mcp.tool() 1138 | async def paper_authors( 1139 | context: Context, 1140 | paper_id: str, 1141 | fields: Optional[List[str]] = None, 1142 | offset: int = 0, 1143 | limit: int = 100 1144 | ) -> Dict: 1145 | """ 1146 | Get details about the authors of a paper with pagination support. 1147 | This endpoint provides author information and their contributions. 1148 | 1149 | Args: 1150 | paper_id (str): Paper identifier in one of the following formats: 1151 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 1152 | - CorpusId:<id> (e.g., "CorpusId:215416146") 1153 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 1154 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 1155 | - MAG:<id> (e.g., "MAG:112218234") 1156 | - ACL:<id> (e.g., "ACL:W12-3903") 1157 | - PMID:<id> (e.g., "PMID:19872477") 1158 | - PMCID:<id> (e.g., "PMCID:2323736") 1159 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 1160 | 1161 | fields (Optional[List[str]]): List of fields to return for each author. 1162 | authorId is always returned. 1163 | Available fields: 1164 | - name: Author's name 1165 | - aliases: Alternative names for the author 1166 | - affiliations: List of author's affiliations 1167 | - homepage: Author's homepage URL 1168 | - paperCount: Total number of papers by this author 1169 | - citationCount: Total citations received by this author 1170 | - hIndex: Author's h-index 1171 | - papers: List of papers by this author (returns paperId and title) 1172 | 1173 | Special syntax for paper fields: 1174 | - papers.year: Include year for each paper 1175 | - papers.authors: Include authors for each paper 1176 | - papers.abstract: Include abstract for each paper 1177 | - papers.venue: Include venue for each paper 1178 | - papers.citations: Include citation count for each paper 1179 | 1180 | If omitted, returns only authorId and name. 1181 | 1182 | offset (int): Number of authors to skip for pagination. 1183 | Default: 0 1184 | 1185 | limit (int): Maximum number of authors to return. 1186 | Default: 100 1187 | Maximum: 1000 1188 | 1189 | Returns: 1190 | Dict: { 1191 | "offset": int, # Current offset in the results 1192 | "next": int, # Next offset (if more results available) 1193 | "data": List[Dict] # List of authors with requested fields 1194 | } 1195 | 1196 | Notes: 1197 | - Authors are returned in the order they appear on the paper 1198 | - Supports pagination for papers with many authors 1199 | - Some fields may be null if data is not available 1200 | - Rate limits apply (see API documentation) 1201 | """ 1202 | if not paper_id.strip(): 1203 | return create_error_response( 1204 | ErrorType.VALIDATION, 1205 | "Paper ID cannot be empty" 1206 | ) 1207 | 1208 | # Validate limit 1209 | if limit > 1000: 1210 | return create_error_response( 1211 | ErrorType.VALIDATION, 1212 | "Limit cannot exceed 1000", 1213 | {"max_limit": 1000} 1214 | ) 1215 | 1216 | # Validate fields 1217 | if fields: 1218 | invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS 1219 | if invalid_fields: 1220 | return create_error_response( 1221 | ErrorType.VALIDATION, 1222 | f"Invalid fields: {', '.join(invalid_fields)}", 1223 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} 1224 | ) 1225 | 1226 | # Build request parameters 1227 | params = { 1228 | "offset": offset, 1229 | "limit": limit 1230 | } 1231 | if fields: 1232 | params["fields"] = ",".join(fields) 1233 | 1234 | # Make the API request 1235 | result = await make_request(f"/paper/{paper_id}/authors", params) 1236 | 1237 | # Handle potential errors 1238 | if isinstance(result, Dict) and "error" in result: 1239 | error_msg = result["error"].get("message", "") 1240 | if "404" in error_msg: 1241 | return create_error_response( 1242 | ErrorType.VALIDATION, 1243 | "Paper not found", 1244 | {"paper_id": paper_id} 1245 | ) 1246 | return result 1247 | 1248 | return result 1249 | 1250 | # 1.7 Details about a paper's citations 1251 | @mcp.tool() 1252 | async def paper_citations( 1253 | context: Context, 1254 | paper_id: str, 1255 | fields: Optional[List[str]] = None, 1256 | offset: int = 0, 1257 | limit: int = 100 1258 | ) -> Dict: 1259 | """ 1260 | Get papers that cite the specified paper (papers where this paper appears in their bibliography). 1261 | This endpoint provides detailed citation information including citation contexts. 1262 | 1263 | Args: 1264 | paper_id (str): Paper identifier in one of the following formats: 1265 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 1266 | - CorpusId:<id> (e.g., "CorpusId:215416146") 1267 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 1268 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 1269 | - MAG:<id> (e.g., "MAG:112218234") 1270 | - ACL:<id> (e.g., "ACL:W12-3903") 1271 | - PMID:<id> (e.g., "PMID:19872477") 1272 | - PMCID:<id> (e.g., "PMCID:2323736") 1273 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 1274 | 1275 | fields (Optional[List[str]]): List of fields to return for each citing paper. 1276 | paperId is always returned. 1277 | Available fields: 1278 | - title: Paper title 1279 | - abstract: Paper abstract 1280 | - year: Publication year 1281 | - venue: Publication venue 1282 | - authors: List of authors 1283 | - url: URL to paper page 1284 | - citationCount: Number of citations received 1285 | - influentialCitationCount: Number of influential citations 1286 | 1287 | Citation-specific fields: 1288 | - contexts: List of citation contexts (text snippets) 1289 | - intents: List of citation intents (Background, Method, etc.) 1290 | - isInfluential: Whether this is an influential citation 1291 | 1292 | If omitted, returns only paperId and title. 1293 | 1294 | offset (int): Number of citations to skip for pagination. 1295 | Default: 0 1296 | 1297 | limit (int): Maximum number of citations to return. 1298 | Default: 100 1299 | Maximum: 1000 1300 | 1301 | Returns: 1302 | Dict: { 1303 | "offset": int, # Current offset in the results 1304 | "next": int, # Next offset (if more results available) 1305 | "data": List[Dict] # List of citing papers with requested fields 1306 | } 1307 | 1308 | Notes: 1309 | - Citations are sorted by citation date (newest first) 1310 | - Includes citation context when available 1311 | - Supports pagination for highly-cited papers 1312 | - Some fields may be null if data is not available 1313 | - Rate limits apply (see API documentation) 1314 | """ 1315 | if not paper_id.strip(): 1316 | return create_error_response( 1317 | ErrorType.VALIDATION, 1318 | "Paper ID cannot be empty" 1319 | ) 1320 | 1321 | # Validate limit 1322 | if limit > 1000: 1323 | return create_error_response( 1324 | ErrorType.VALIDATION, 1325 | "Limit cannot exceed 1000", 1326 | {"max_limit": 1000} 1327 | ) 1328 | 1329 | # Validate fields 1330 | if fields: 1331 | invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS 1332 | if invalid_fields: 1333 | return create_error_response( 1334 | ErrorType.VALIDATION, 1335 | f"Invalid fields: {', '.join(invalid_fields)}", 1336 | {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)} 1337 | ) 1338 | 1339 | # Build request parameters 1340 | params = { 1341 | "offset": offset, 1342 | "limit": limit 1343 | } 1344 | if fields: 1345 | params["fields"] = ",".join(fields) 1346 | 1347 | # Make the API request 1348 | result = await make_request(f"/paper/{paper_id}/citations", params) 1349 | 1350 | # Handle potential errors 1351 | if isinstance(result, Dict) and "error" in result: 1352 | error_msg = result["error"].get("message", "") 1353 | if "404" in error_msg: 1354 | return create_error_response( 1355 | ErrorType.VALIDATION, 1356 | "Paper not found", 1357 | {"paper_id": paper_id} 1358 | ) 1359 | return result 1360 | 1361 | return result 1362 | 1363 | # 1.8 Details about a paper's references 1364 | @mcp.tool() 1365 | async def paper_references( 1366 | context: Context, 1367 | paper_id: str, 1368 | fields: Optional[List[str]] = None, 1369 | offset: int = 0, 1370 | limit: int = 100 1371 | ) -> Dict: 1372 | """ 1373 | Get papers cited by the specified paper (papers appearing in this paper's bibliography). 1374 | This endpoint provides detailed reference information including citation contexts. 1375 | 1376 | Args: 1377 | paper_id (str): Paper identifier in one of the following formats: 1378 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 1379 | - CorpusId:<id> (e.g., "CorpusId:215416146") 1380 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 1381 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 1382 | - MAG:<id> (e.g., "MAG:112218234") 1383 | - ACL:<id> (e.g., "ACL:W12-3903") 1384 | - PMID:<id> (e.g., "PMID:19872477") 1385 | - PMCID:<id> (e.g., "PMCID:2323736") 1386 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 1387 | 1388 | fields (Optional[List[str]]): List of fields to return for each referenced paper. 1389 | paperId is always returned. 1390 | Available fields: 1391 | - title: Paper title 1392 | - abstract: Paper abstract 1393 | - year: Publication year 1394 | - venue: Publication venue 1395 | - authors: List of authors 1396 | - url: URL to paper page 1397 | - citationCount: Number of citations received 1398 | - influentialCitationCount: Number of influential citations 1399 | 1400 | Reference-specific fields: 1401 | - contexts: List of citation contexts (text snippets) 1402 | - intents: List of citation intents (Background, Method, etc.) 1403 | - isInfluential: Whether this is an influential citation 1404 | 1405 | If omitted, returns only paperId and title. 1406 | 1407 | offset (int): Number of references to skip for pagination. 1408 | Default: 0 1409 | 1410 | limit (int): Maximum number of references to return. 1411 | Default: 100 1412 | Maximum: 1000 1413 | 1414 | Returns: 1415 | Dict: { 1416 | "offset": int, # Current offset in the results 1417 | "next": int, # Next offset (if more results available) 1418 | "data": List[Dict] # List of referenced papers with requested fields 1419 | } 1420 | 1421 | Notes: 1422 | - References are returned in the order they appear in the bibliography 1423 | - Includes citation context when available 1424 | - Supports pagination for papers with many references 1425 | - Some fields may be null if data is not available 1426 | - Rate limits apply (see API documentation) 1427 | """ 1428 | if not paper_id.strip(): 1429 | return create_error_response( 1430 | ErrorType.VALIDATION, 1431 | "Paper ID cannot be empty" 1432 | ) 1433 | 1434 | # Validate limit 1435 | if limit > 1000: 1436 | return create_error_response( 1437 | ErrorType.VALIDATION, 1438 | "Limit cannot exceed 1000", 1439 | {"max_limit": 1000} 1440 | ) 1441 | 1442 | # Validate fields 1443 | if fields: 1444 | invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS 1445 | if invalid_fields: 1446 | return create_error_response( 1447 | ErrorType.VALIDATION, 1448 | f"Invalid fields: {', '.join(invalid_fields)}", 1449 | {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)} 1450 | ) 1451 | 1452 | # Build request parameters 1453 | params = { 1454 | "offset": offset, 1455 | "limit": limit 1456 | } 1457 | if fields: 1458 | params["fields"] = ",".join(fields) 1459 | 1460 | # Make the API request 1461 | result = await make_request(f"/paper/{paper_id}/references", params) 1462 | 1463 | # Handle potential errors 1464 | if isinstance(result, Dict) and "error" in result: 1465 | error_msg = result["error"].get("message", "") 1466 | if "404" in error_msg: 1467 | return create_error_response( 1468 | ErrorType.VALIDATION, 1469 | "Paper not found", 1470 | {"paper_id": paper_id} 1471 | ) 1472 | return result 1473 | 1474 | return result 1475 | 1476 | 1477 | 1478 | # 2. Author Data Tools 1479 | 1480 | # 2.1 Search for authors by name 1481 | @mcp.tool() 1482 | async def author_search( 1483 | context: Context, 1484 | query: str, 1485 | fields: Optional[List[str]] = None, 1486 | offset: int = 0, 1487 | limit: int = 100 1488 | ) -> Dict: 1489 | """ 1490 | Search for authors by name on Semantic Scholar. 1491 | This endpoint is optimized for finding authors based on their name. 1492 | Results are sorted by relevance to the query. 1493 | 1494 | Args: 1495 | query (str): The name text to search for. The query will be matched against author names 1496 | and their known aliases. The match is case-insensitive and matches name prefixes. 1497 | Examples: 1498 | - "Albert Einstein" 1499 | - "Einstein, Albert" 1500 | - "A Einstein" 1501 | 1502 | fields (Optional[List[str]]): List of fields to return for each author. 1503 | authorId is always returned. 1504 | Available fields: 1505 | - name: Author's name 1506 | - aliases: Alternative names for the author 1507 | - url: URL to author's S2 profile 1508 | - affiliations: List of author's affiliations 1509 | - homepage: Author's homepage URL 1510 | - paperCount: Total number of papers by this author 1511 | - citationCount: Total citations received by this author 1512 | - hIndex: Author's h-index 1513 | - papers: List of papers by this author (returns paperId and title) 1514 | 1515 | Special syntax for paper fields: 1516 | - papers.year: Include year for each paper 1517 | - papers.authors: Include authors for each paper 1518 | - papers.abstract: Include abstract for each paper 1519 | - papers.venue: Include venue for each paper 1520 | - papers.citations: Include citation count for each paper 1521 | 1522 | If omitted, returns only authorId and name. 1523 | 1524 | offset (int): Number of authors to skip for pagination. 1525 | Default: 0 1526 | 1527 | limit (int): Maximum number of authors to return. 1528 | Default: 100 1529 | Maximum: 1000 1530 | 1531 | Returns: 1532 | Dict: { 1533 | "total": int, # Total number of authors matching the query 1534 | "offset": int, # Current offset in the results 1535 | "next": int, # Next offset (if more results available) 1536 | "data": List[Dict] # List of authors with requested fields 1537 | } 1538 | 1539 | Notes: 1540 | - Results are sorted by relevance to the query 1541 | - Matches against author names and aliases 1542 | - Case-insensitive matching 1543 | - Matches name prefixes 1544 | - Supports pagination for large result sets 1545 | - Some fields may be null if data is not available 1546 | - Rate limits apply (see API documentation) 1547 | """ 1548 | if not query.strip(): 1549 | return create_error_response( 1550 | ErrorType.VALIDATION, 1551 | "Query string cannot be empty" 1552 | ) 1553 | 1554 | # Validate limit 1555 | if limit > 1000: 1556 | return create_error_response( 1557 | ErrorType.VALIDATION, 1558 | "Limit cannot exceed 1000", 1559 | {"max_limit": 1000} 1560 | ) 1561 | 1562 | # Validate fields 1563 | if fields: 1564 | invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS 1565 | if invalid_fields: 1566 | return create_error_response( 1567 | ErrorType.VALIDATION, 1568 | f"Invalid fields: {', '.join(invalid_fields)}", 1569 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} 1570 | ) 1571 | 1572 | # Build request parameters 1573 | params = { 1574 | "query": query, 1575 | "offset": offset, 1576 | "limit": limit 1577 | } 1578 | if fields: 1579 | params["fields"] = ",".join(fields) 1580 | 1581 | # Make the API request 1582 | return await make_request("/author/search", params) 1583 | 1584 | # 2.2 Details about an author 1585 | @mcp.tool() 1586 | async def author_details( 1587 | context: Context, 1588 | author_id: str, 1589 | fields: Optional[List[str]] = None 1590 | ) -> Dict: 1591 | """ 1592 | Get detailed information about an author by their ID. 1593 | This endpoint provides comprehensive metadata about an author. 1594 | 1595 | Args: 1596 | author_id (str): Semantic Scholar author ID. 1597 | This is a unique identifier assigned by Semantic Scholar. 1598 | Example: "1741101" (Albert Einstein) 1599 | 1600 | fields (Optional[List[str]]): List of fields to return. 1601 | authorId is always returned. 1602 | Available fields: 1603 | - name: Author's name 1604 | - aliases: Alternative names for the author 1605 | - url: URL to author's S2 profile 1606 | - affiliations: List of author's affiliations 1607 | - homepage: Author's homepage URL 1608 | - paperCount: Total number of papers by this author 1609 | - citationCount: Total citations received by this author 1610 | - hIndex: Author's h-index 1611 | - papers: List of papers by this author (returns paperId and title) 1612 | 1613 | Special syntax for paper fields: 1614 | - papers.year: Include year for each paper 1615 | - papers.authors: Include authors for each paper 1616 | - papers.abstract: Include abstract for each paper 1617 | - papers.venue: Include venue for each paper 1618 | - papers.citations: Include citation count for each paper 1619 | 1620 | If omitted, returns only authorId and name. 1621 | 1622 | Returns: 1623 | Dict: Author details with requested fields. 1624 | Always includes authorId. 1625 | Returns error response if author not found. 1626 | 1627 | Notes: 1628 | - Provides comprehensive author metadata 1629 | - Papers list is limited to most recent papers 1630 | - For complete paper list, use author_papers endpoint 1631 | - Some fields may be null if data is not available 1632 | - Rate limits apply (see API documentation) 1633 | """ 1634 | if not author_id.strip(): 1635 | return create_error_response( 1636 | ErrorType.VALIDATION, 1637 | "Author ID cannot be empty" 1638 | ) 1639 | 1640 | # Validate fields 1641 | if fields: 1642 | invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS 1643 | if invalid_fields: 1644 | return create_error_response( 1645 | ErrorType.VALIDATION, 1646 | f"Invalid fields: {', '.join(invalid_fields)}", 1647 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} 1648 | ) 1649 | 1650 | # Build request parameters 1651 | params = {} 1652 | if fields: 1653 | params["fields"] = ",".join(fields) 1654 | 1655 | # Make the API request 1656 | result = await make_request(f"/author/{author_id}", params) 1657 | 1658 | if isinstance(result, Dict) and "error" in result: 1659 | error_msg = result["error"].get("message", "") 1660 | if "404" in error_msg: 1661 | return create_error_response( 1662 | ErrorType.VALIDATION, 1663 | "Author not found", 1664 | {"author_id": author_id} 1665 | ) 1666 | return result 1667 | 1668 | return result 1669 | 1670 | # 2.3 Details about an author's papers 1671 | @mcp.tool() 1672 | async def author_papers( 1673 | context: Context, 1674 | author_id: str, 1675 | fields: Optional[List[str]] = None, 1676 | offset: int = 0, 1677 | limit: int = 100 1678 | ) -> Dict: 1679 | """ 1680 | Get papers written by an author with pagination support. 1681 | This endpoint provides detailed information about an author's publications. 1682 | 1683 | Args: 1684 | author_id (str): Semantic Scholar author ID. 1685 | This is a unique identifier assigned by Semantic Scholar. 1686 | Example: "1741101" (Albert Einstein) 1687 | 1688 | fields (Optional[List[str]]): List of fields to return for each paper. 1689 | paperId is always returned. 1690 | Available fields: 1691 | - title: Paper title 1692 | - abstract: Paper abstract 1693 | - year: Publication year 1694 | - venue: Publication venue 1695 | - authors: List of authors 1696 | - url: URL to paper page 1697 | - citationCount: Number of citations received 1698 | - influentialCitationCount: Number of influential citations 1699 | - isOpenAccess: Whether paper is open access 1700 | - openAccessPdf: Open access PDF URL if available 1701 | - fieldsOfStudy: List of fields of study 1702 | - s2FieldsOfStudy: Semantic Scholar fields 1703 | - publicationTypes: List of publication types 1704 | - publicationDate: Publication date in YYYY-MM-DD format 1705 | - journal: Journal information 1706 | - externalIds: External IDs (DOI, MAG, etc) 1707 | 1708 | If omitted, returns only paperId and title. 1709 | 1710 | offset (int): Number of papers to skip for pagination. 1711 | Default: 0 1712 | 1713 | limit (int): Maximum number of papers to return. 1714 | Default: 100 1715 | Maximum: 1000 1716 | 1717 | Returns: 1718 | Dict: { 1719 | "offset": int, # Current offset in the results 1720 | "next": int, # Next offset (if more results available) 1721 | "data": List[Dict] # List of papers with requested fields 1722 | } 1723 | 1724 | Notes: 1725 | - Papers are sorted by publication date (newest first) 1726 | - Supports pagination for authors with many papers 1727 | - Some fields may be null if data is not available 1728 | - Rate limits apply (see API documentation) 1729 | """ 1730 | if not author_id.strip(): 1731 | return create_error_response( 1732 | ErrorType.VALIDATION, 1733 | "Author ID cannot be empty" 1734 | ) 1735 | 1736 | # Validate limit 1737 | if limit > 1000: 1738 | return create_error_response( 1739 | ErrorType.VALIDATION, 1740 | "Limit cannot exceed 1000", 1741 | {"max_limit": 1000} 1742 | ) 1743 | 1744 | # Validate fields 1745 | if fields: 1746 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS 1747 | if invalid_fields: 1748 | return create_error_response( 1749 | ErrorType.VALIDATION, 1750 | f"Invalid fields: {', '.join(invalid_fields)}", 1751 | {"valid_fields": list(PaperFields.VALID_FIELDS)} 1752 | ) 1753 | 1754 | # Build request parameters 1755 | params = { 1756 | "offset": offset, 1757 | "limit": limit 1758 | } 1759 | if fields: 1760 | params["fields"] = ",".join(fields) 1761 | 1762 | # Make the API request 1763 | result = await make_request(f"/author/{author_id}/papers", params) 1764 | 1765 | if isinstance(result, Dict) and "error" in result: 1766 | error_msg = result["error"].get("message", "") 1767 | if "404" in error_msg: 1768 | return create_error_response( 1769 | ErrorType.VALIDATION, 1770 | "Author not found", 1771 | {"author_id": author_id} 1772 | ) 1773 | return result 1774 | 1775 | return result 1776 | 1777 | # 2.4 Get details for multiple authors at once 1778 | @mcp.tool() 1779 | async def author_batch_details( 1780 | context: Context, 1781 | author_ids: List[str], 1782 | fields: Optional[str] = None 1783 | ) -> Dict: 1784 | """ 1785 | Get details for multiple authors in a single batch request. 1786 | This endpoint is optimized for efficiently retrieving details about known authors. 1787 | 1788 | Args: 1789 | author_ids (List[str]): List of Semantic Scholar author IDs. 1790 | These are unique identifiers assigned by Semantic Scholar. 1791 | Example: ["1741101", "1741102"] 1792 | Maximum: 1000 IDs per request 1793 | 1794 | fields (Optional[str]): Comma-separated list of fields to return for each author. 1795 | authorId is always returned. 1796 | Available fields: 1797 | - name: Author's name 1798 | - aliases: Alternative names for the author 1799 | - url: URL to author's S2 profile 1800 | - affiliations: List of author's affiliations 1801 | - homepage: Author's homepage URL 1802 | - paperCount: Total number of papers by this author 1803 | - citationCount: Total citations received by this author 1804 | - hIndex: Author's h-index 1805 | - papers: List of papers by this author (returns paperId and title) 1806 | 1807 | Special syntax for paper fields: 1808 | - papers.year: Include year for each paper 1809 | - papers.authors: Include authors for each paper 1810 | - papers.abstract: Include abstract for each paper 1811 | - papers.venue: Include venue for each paper 1812 | - papers.citations: Include citation count for each paper 1813 | 1814 | If omitted, returns only authorId and name. 1815 | 1816 | Returns: 1817 | List[Dict]: List of author details with requested fields. 1818 | - Results maintain the same order as input author_ids 1819 | - Invalid or not found author IDs return null in the results 1820 | - Each author object contains the requested fields 1821 | - authorId is always included in each author object 1822 | 1823 | Notes: 1824 | - More efficient than making multiple single-author requests 1825 | - Maximum of 1000 author IDs per request 1826 | - Rate limits apply (see API documentation) 1827 | - Some fields may be null if data is not available 1828 | - Invalid author IDs return null instead of causing an error 1829 | - Order of results matches order of input IDs for easy mapping 1830 | """ 1831 | # Validate inputs 1832 | if not author_ids: 1833 | return create_error_response( 1834 | ErrorType.VALIDATION, 1835 | "Author IDs list cannot be empty" 1836 | ) 1837 | 1838 | if len(author_ids) > 1000: 1839 | return create_error_response( 1840 | ErrorType.VALIDATION, 1841 | "Cannot process more than 1000 author IDs at once", 1842 | {"max_authors": 1000, "received": len(author_ids)} 1843 | ) 1844 | 1845 | # Validate fields if provided 1846 | if fields: 1847 | field_list = fields.split(",") 1848 | invalid_fields = set(field_list) - AuthorDetailFields.VALID_FIELDS 1849 | if invalid_fields: 1850 | return create_error_response( 1851 | ErrorType.VALIDATION, 1852 | f"Invalid fields: {', '.join(invalid_fields)}", 1853 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)} 1854 | ) 1855 | 1856 | # Build request parameters 1857 | params = {} 1858 | if fields: 1859 | params["fields"] = fields 1860 | 1861 | # Make POST request with proper structure 1862 | try: 1863 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: 1864 | api_key = get_api_key() 1865 | headers = {"x-api-key": api_key} if api_key else {} 1866 | 1867 | response = await client.post( 1868 | f"{Config.BASE_URL}/author/batch", 1869 | params=params, 1870 | json={"ids": author_ids}, 1871 | headers=headers 1872 | ) 1873 | response.raise_for_status() 1874 | return response.json() 1875 | 1876 | except httpx.HTTPStatusError as e: 1877 | if e.response.status_code == 429: 1878 | return create_error_response( 1879 | ErrorType.RATE_LIMIT, 1880 | "Rate limit exceeded", 1881 | {"retry_after": e.response.headers.get("retry-after")} 1882 | ) 1883 | return create_error_response( 1884 | ErrorType.API_ERROR, 1885 | f"HTTP error: {e.response.status_code}", 1886 | {"response": e.response.text} 1887 | ) 1888 | except httpx.TimeoutException: 1889 | return create_error_response( 1890 | ErrorType.TIMEOUT, 1891 | f"Request timed out after {Config.TIMEOUT} seconds" 1892 | ) 1893 | except Exception as e: 1894 | return create_error_response( 1895 | ErrorType.API_ERROR, 1896 | str(e) 1897 | ) 1898 | 1899 | 1900 | # 3. Paper Recommendation Tools 1901 | 1902 | # 3.1 Get recommendations based on a single paper 1903 | @mcp.tool() 1904 | async def get_paper_recommendations_single( 1905 | context: Context, 1906 | paper_id: str, 1907 | fields: Optional[str] = None, 1908 | limit: int = 100, 1909 | from_pool: str = "recent" 1910 | ) -> Dict: 1911 | """ 1912 | Get paper recommendations based on a single seed paper. 1913 | This endpoint is optimized for finding papers similar to a specific paper. 1914 | 1915 | Args: 1916 | paper_id (str): Paper identifier in one of the following formats: 1917 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 1918 | - CorpusId:<id> (e.g., "CorpusId:215416146") 1919 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 1920 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 1921 | - MAG:<id> (e.g., "MAG:112218234") 1922 | - ACL:<id> (e.g., "ACL:W12-3903") 1923 | - PMID:<id> (e.g., "PMID:19872477") 1924 | - PMCID:<id> (e.g., "PMCID:2323736") 1925 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 1926 | 1927 | fields (Optional[str]): Comma-separated list of fields to return for each paper. 1928 | paperId is always returned. 1929 | Available fields: 1930 | - title: Paper title 1931 | - abstract: Paper abstract 1932 | - year: Publication year 1933 | - venue: Publication venue 1934 | - authors: List of authors 1935 | - url: URL to paper page 1936 | - citationCount: Number of citations received 1937 | - influentialCitationCount: Number of influential citations 1938 | - isOpenAccess: Whether paper is open access 1939 | - openAccessPdf: Open access PDF URL if available 1940 | - fieldsOfStudy: List of fields of study 1941 | - publicationTypes: List of publication types 1942 | - publicationDate: Publication date in YYYY-MM-DD format 1943 | - journal: Journal information 1944 | - externalIds: External IDs (DOI, MAG, etc) 1945 | 1946 | If omitted, returns only paperId and title. 1947 | 1948 | limit (int): Maximum number of recommendations to return. 1949 | Default: 100 1950 | Maximum: 500 1951 | 1952 | from_pool (str): Which pool of papers to recommend from. 1953 | Options: 1954 | - "recent": Recent papers (default) 1955 | - "all-cs": All computer science papers 1956 | Default: "recent" 1957 | 1958 | Returns: 1959 | Dict: { 1960 | "recommendedPapers": List[Dict] # List of recommended papers with requested fields 1961 | } 1962 | 1963 | Notes: 1964 | - Recommendations are based on content similarity and citation patterns 1965 | - Results are sorted by relevance to the seed paper 1966 | - "recent" pool focuses on papers from the last few years 1967 | - "all-cs" pool includes older computer science papers 1968 | - Rate limits apply (see API documentation) 1969 | - Some fields may be null if data is not available 1970 | """ 1971 | try: 1972 | # Apply rate limiting 1973 | endpoint = "/recommendations" 1974 | await rate_limiter.acquire(endpoint) 1975 | 1976 | # Validate limit 1977 | if limit > 500: 1978 | return create_error_response( 1979 | ErrorType.VALIDATION, 1980 | "Cannot request more than 500 recommendations", 1981 | {"max_limit": 500, "requested": limit} 1982 | ) 1983 | 1984 | # Validate pool 1985 | if from_pool not in ["recent", "all-cs"]: 1986 | return create_error_response( 1987 | ErrorType.VALIDATION, 1988 | "Invalid paper pool specified", 1989 | {"valid_pools": ["recent", "all-cs"]} 1990 | ) 1991 | 1992 | # Build request parameters 1993 | params = { 1994 | "limit": limit, 1995 | "from": from_pool 1996 | } 1997 | if fields: 1998 | params["fields"] = fields 1999 | 2000 | # Make the API request 2001 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: 2002 | api_key = get_api_key() 2003 | headers = {"x-api-key": api_key} if api_key else {} 2004 | 2005 | url = f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}" 2006 | response = await client.get(url, params=params, headers=headers) 2007 | 2008 | # Handle specific error cases 2009 | if response.status_code == 404: 2010 | return create_error_response( 2011 | ErrorType.VALIDATION, 2012 | "Paper not found", 2013 | {"paper_id": paper_id} 2014 | ) 2015 | 2016 | response.raise_for_status() 2017 | return response.json() 2018 | 2019 | except httpx.HTTPStatusError as e: 2020 | if e.response.status_code == 429: 2021 | return create_error_response( 2022 | ErrorType.RATE_LIMIT, 2023 | "Rate limit exceeded. Consider using an API key for higher limits.", 2024 | { 2025 | "retry_after": e.response.headers.get("retry-after"), 2026 | "authenticated": bool(get_api_key()) 2027 | } 2028 | ) 2029 | return create_error_response( 2030 | ErrorType.API_ERROR, 2031 | f"HTTP error {e.response.status_code}", 2032 | {"response": e.response.text} 2033 | ) 2034 | except httpx.TimeoutException: 2035 | return create_error_response( 2036 | ErrorType.TIMEOUT, 2037 | f"Request timed out after {Config.TIMEOUT} seconds" 2038 | ) 2039 | except Exception as e: 2040 | logger.error(f"Unexpected error in recommendations: {str(e)}") 2041 | return create_error_response( 2042 | ErrorType.API_ERROR, 2043 | "Failed to get recommendations", 2044 | {"error": str(e)} 2045 | ) 2046 | 2047 | # 3.2 Get recommendations based on multiple papers 2048 | @mcp.tool() 2049 | async def get_paper_recommendations_multi( 2050 | context: Context, 2051 | positive_paper_ids: List[str], 2052 | negative_paper_ids: Optional[List[str]] = None, 2053 | fields: Optional[str] = None, 2054 | limit: int = 100 2055 | ) -> Dict: 2056 | """ 2057 | Get paper recommendations based on multiple positive and optional negative examples. 2058 | This endpoint is optimized for finding papers similar to a set of papers while 2059 | avoiding papers similar to the negative examples. 2060 | 2061 | Args: 2062 | positive_paper_ids (List[str]): List of paper IDs to use as positive examples. 2063 | Papers similar to these will be recommended. 2064 | Each ID can be in any of these formats: 2065 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b") 2066 | - CorpusId:<id> (e.g., "CorpusId:215416146") 2067 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011") 2068 | - ARXIV:<id> (e.g., "ARXIV:2106.15928") 2069 | - MAG:<id> (e.g., "MAG:112218234") 2070 | - ACL:<id> (e.g., "ACL:W12-3903") 2071 | - PMID:<id> (e.g., "PMID:19872477") 2072 | - PMCID:<id> (e.g., "PMCID:2323736") 2073 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1") 2074 | 2075 | negative_paper_ids (Optional[List[str]]): List of paper IDs to use as negative examples. 2076 | Papers similar to these will be avoided in recommendations. 2077 | Uses same ID formats as positive_paper_ids. 2078 | 2079 | fields (Optional[str]): Comma-separated list of fields to return for each paper. 2080 | paperId is always returned. 2081 | Available fields: 2082 | - title: Paper title 2083 | - abstract: Paper abstract 2084 | - year: Publication year 2085 | - venue: Publication venue 2086 | - authors: List of authors 2087 | - url: URL to paper page 2088 | - citationCount: Number of citations received 2089 | - influentialCitationCount: Number of influential citations 2090 | - isOpenAccess: Whether paper is open access 2091 | - openAccessPdf: Open access PDF URL if available 2092 | - fieldsOfStudy: List of fields of study 2093 | - publicationTypes: List of publication types 2094 | - publicationDate: Publication date in YYYY-MM-DD format 2095 | - journal: Journal information 2096 | - externalIds: External IDs (DOI, MAG, etc) 2097 | 2098 | If omitted, returns only paperId and title. 2099 | 2100 | limit (int): Maximum number of recommendations to return. 2101 | Default: 100 2102 | Maximum: 500 2103 | 2104 | Returns: 2105 | Dict: { 2106 | "recommendedPapers": List[Dict] # List of recommended papers with requested fields 2107 | } 2108 | 2109 | Notes: 2110 | - Recommendations balance similarity to positive examples and dissimilarity to negative examples 2111 | - Results are sorted by relevance score 2112 | - More positive examples can help focus recommendations 2113 | - Negative examples help filter out unwanted topics/approaches 2114 | - Rate limits apply (see API documentation) 2115 | - Some fields may be null if data is not available 2116 | """ 2117 | try: 2118 | # Apply rate limiting 2119 | endpoint = "/recommendations" 2120 | await rate_limiter.acquire(endpoint) 2121 | 2122 | # Validate inputs 2123 | if not positive_paper_ids: 2124 | return create_error_response( 2125 | ErrorType.VALIDATION, 2126 | "Must provide at least one positive paper ID" 2127 | ) 2128 | 2129 | if limit > 500: 2130 | return create_error_response( 2131 | ErrorType.VALIDATION, 2132 | "Cannot request more than 500 recommendations", 2133 | {"max_limit": 500, "requested": limit} 2134 | ) 2135 | 2136 | # Build request parameters 2137 | params = {"limit": limit} 2138 | if fields: 2139 | params["fields"] = fields 2140 | 2141 | request_body = { 2142 | "positivePaperIds": positive_paper_ids, 2143 | "negativePaperIds": negative_paper_ids or [] 2144 | } 2145 | 2146 | # Make the API request 2147 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client: 2148 | api_key = get_api_key() 2149 | headers = {"x-api-key": api_key} if api_key else {} 2150 | 2151 | url = "https://api.semanticscholar.org/recommendations/v1/papers" 2152 | response = await client.post(url, params=params, json=request_body, headers=headers) 2153 | 2154 | # Handle specific error cases 2155 | if response.status_code == 404: 2156 | return create_error_response( 2157 | ErrorType.VALIDATION, 2158 | "One or more input papers not found", 2159 | { 2160 | "positive_ids": positive_paper_ids, 2161 | "negative_ids": negative_paper_ids 2162 | } 2163 | ) 2164 | 2165 | response.raise_for_status() 2166 | return response.json() 2167 | 2168 | except httpx.HTTPStatusError as e: 2169 | if e.response.status_code == 429: 2170 | return create_error_response( 2171 | ErrorType.RATE_LIMIT, 2172 | "Rate limit exceeded. Consider using an API key for higher limits.", 2173 | { 2174 | "retry_after": e.response.headers.get("retry-after"), 2175 | "authenticated": bool(get_api_key()) 2176 | } 2177 | ) 2178 | return create_error_response( 2179 | ErrorType.API_ERROR, 2180 | f"HTTP error {e.response.status_code}", 2181 | {"response": e.response.text} 2182 | ) 2183 | except httpx.TimeoutException: 2184 | return create_error_response( 2185 | ErrorType.TIMEOUT, 2186 | f"Request timed out after {Config.TIMEOUT} seconds" 2187 | ) 2188 | except Exception as e: 2189 | logger.error(f"Unexpected error in recommendations: {str(e)}") 2190 | return create_error_response( 2191 | ErrorType.API_ERROR, 2192 | "Failed to get recommendations", 2193 | {"error": str(e)} 2194 | ) 2195 | 2196 | 2197 | 2198 | 2199 | 2200 | 2201 | async def shutdown(): 2202 | """Gracefully shut down the server.""" 2203 | logger.info("Initiating graceful shutdown...") 2204 | 2205 | # Cancel all tasks 2206 | tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] 2207 | for task in tasks: 2208 | task.cancel() 2209 | try: 2210 | await task 2211 | except asyncio.CancelledError: 2212 | pass 2213 | 2214 | # Cleanup resources 2215 | await cleanup_client() 2216 | await mcp.cleanup() 2217 | 2218 | logger.info(f"Cancelled {len(tasks)} tasks") 2219 | logger.info("Shutdown complete") 2220 | 2221 | def init_signal_handlers(loop): 2222 | """Initialize signal handlers for graceful shutdown.""" 2223 | for sig in (signal.SIGTERM, signal.SIGINT): 2224 | loop.add_signal_handler(sig, lambda: asyncio.create_task(shutdown())) 2225 | logger.info("Signal handlers initialized") 2226 | 2227 | async def run_server(): 2228 | """Run the server with proper async context management.""" 2229 | async with mcp: 2230 | try: 2231 | # Initialize HTTP client 2232 | await initialize_client() 2233 | 2234 | # Start the server 2235 | logger.info("Starting Semantic Scholar Server") 2236 | await mcp.run_async() 2237 | except Exception as e: 2238 | logger.error(f"Server error: {e}") 2239 | raise 2240 | finally: 2241 | await shutdown() 2242 | 2243 | if __name__ == "__main__": 2244 | try: 2245 | # Set up event loop with exception handler 2246 | loop = asyncio.new_event_loop() 2247 | asyncio.set_event_loop(loop) 2248 | loop.set_exception_handler(handle_exception) 2249 | 2250 | # Initialize signal handlers 2251 | init_signal_handlers(loop) 2252 | 2253 | # Run the server 2254 | loop.run_until_complete(run_server()) 2255 | except KeyboardInterrupt: 2256 | logger.info("Received keyboard interrupt, shutting down...") 2257 | except Exception as e: 2258 | logger.error(f"Fatal error: {str(e)}") 2259 | finally: 2260 | try: 2261 | loop.run_until_complete(asyncio.sleep(0)) # Let pending tasks complete 2262 | loop.close() 2263 | except Exception as e: 2264 | logger.error(f"Error during final cleanup: {str(e)}") 2265 | logger.info("Server stopped") 2266 | ```