This is page 2 of 2. Use http://codebase.md/yuzongmin/semantic-scholar-fastmcp-mcp-server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── REFACTORING.md
├── requirements.txt
├── run.py
├── semantic_scholar
│ ├── __init__.py
│ ├── api
│ │ ├── __init__.py
│ │ ├── authors.py
│ │ ├── papers.py
│ │ └── recommendations.py
│ ├── config.py
│ ├── mcp.py
│ ├── server.py
│ └── utils
│ ├── __init__.py
│ ├── errors.py
│ └── http.py
├── semantic_scholar_server.py
├── smithery.yaml
├── test
│ ├── __init__.py
│ ├── test_author.py
│ ├── test_paper.py
│ ├── test_recommend.py
│ └── test_utils.py
└── TOOLS.md
```
# Files
--------------------------------------------------------------------------------
/semantic_scholar_server.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | from fastmcp import FastMCP, Context
3 | import httpx
4 | import logging
5 | import os
6 | from typing import Dict, List, Optional, Tuple, Any
7 | from datetime import datetime
8 | from enum import Enum
9 | import asyncio
10 | import time
11 | import signal
12 | from dataclasses import dataclass
13 |
14 | logging.basicConfig(level=logging.INFO)
15 | logger = logging.getLogger(__name__)
16 |
17 | # Global HTTP client for connection pooling
18 | http_client = None
19 |
20 | # Rate Limiting Configuration
21 | @dataclass
22 | class RateLimitConfig:
23 | # Define rate limits (requests, seconds)
24 | SEARCH_LIMIT = (1, 1) # 1 request per 1 second
25 | BATCH_LIMIT = (1, 1) # 1 request per 1 second
26 | DEFAULT_LIMIT = (10, 1) # 10 requests per 1 second
27 |
28 | # Endpoints categorization
29 | # These endpoints have stricter rate limits due to their computational intensity
30 | # and to prevent abuse of the recommendation system
31 | RESTRICTED_ENDPOINTS = [
32 | "/paper/batch", # Batch operations are expensive
33 | "/paper/search", # Search operations are computationally intensive
34 | "/recommendations" # Recommendation generation is resource-intensive
35 | ]
36 |
37 | # Error Types
38 | class ErrorType(Enum):
39 | RATE_LIMIT = "rate_limit"
40 | API_ERROR = "api_error"
41 | VALIDATION = "validation"
42 | TIMEOUT = "timeout"
43 |
44 | # Field Constants
45 | class PaperFields:
46 | DEFAULT = ["title", "abstract", "year", "citationCount", "authors", "url"]
47 | DETAILED = DEFAULT + ["references", "citations", "venue", "influentialCitationCount"]
48 | MINIMAL = ["title", "year", "authors"]
49 | SEARCH = ["paperId", "title", "year", "citationCount"]
50 |
51 | # Valid fields from API documentation
52 | VALID_FIELDS = {
53 | "abstract",
54 | "authors",
55 | "citationCount",
56 | "citations",
57 | "corpusId",
58 | "embedding",
59 | "externalIds",
60 | "fieldsOfStudy",
61 | "influentialCitationCount",
62 | "isOpenAccess",
63 | "openAccessPdf",
64 | "paperId",
65 | "publicationDate",
66 | "publicationTypes",
67 | "publicationVenue",
68 | "references",
69 | "s2FieldsOfStudy",
70 | "title",
71 | "tldr",
72 | "url",
73 | "venue",
74 | "year"
75 | }
76 |
77 | class AuthorDetailFields:
78 | """Common field combinations for author details"""
79 |
80 | # Basic author information
81 | BASIC = ["name", "url", "affiliations"]
82 |
83 | # Author's papers information
84 | PAPERS_BASIC = ["papers"] # Returns paperId and title
85 | PAPERS_DETAILED = [
86 | "papers.year",
87 | "papers.authors",
88 | "papers.abstract",
89 | "papers.venue",
90 | "papers.url"
91 | ]
92 |
93 | # Complete author profile
94 | COMPLETE = BASIC + ["papers", "papers.year", "papers.authors", "papers.venue"]
95 |
96 | # Citation metrics
97 | METRICS = ["citationCount", "hIndex", "paperCount"]
98 |
99 | # Valid fields for author details
100 | VALID_FIELDS = {
101 | "authorId",
102 | "name",
103 | "url",
104 | "affiliations",
105 | "papers",
106 | "papers.year",
107 | "papers.authors",
108 | "papers.abstract",
109 | "papers.venue",
110 | "papers.url",
111 | "citationCount",
112 | "hIndex",
113 | "paperCount"
114 | }
115 |
116 | class PaperDetailFields:
117 | """Common field combinations for paper details"""
118 |
119 | # Basic paper information
120 | BASIC = ["title", "abstract", "year", "venue"]
121 |
122 | # Author information
123 | AUTHOR_BASIC = ["authors"]
124 | AUTHOR_DETAILED = ["authors.url", "authors.paperCount", "authors.citationCount"]
125 |
126 | # Citation information
127 | CITATION_BASIC = ["citations", "references"]
128 | CITATION_DETAILED = ["citations.title", "citations.abstract", "citations.year",
129 | "references.title", "references.abstract", "references.year"]
130 |
131 | # Full paper details
132 | COMPLETE = BASIC + AUTHOR_BASIC + CITATION_BASIC + ["url", "fieldsOfStudy",
133 | "publicationVenue", "publicationTypes"]
134 |
135 | class CitationReferenceFields:
136 | """Common field combinations for citation and reference queries"""
137 |
138 | # Basic information
139 | BASIC = ["title"]
140 |
141 | # Citation/Reference context
142 | CONTEXT = ["contexts", "intents", "isInfluential"]
143 |
144 | # Paper details
145 | DETAILED = ["title", "abstract", "authors", "year", "venue"]
146 |
147 | # Full information
148 | COMPLETE = CONTEXT + DETAILED
149 |
150 | # Valid fields for citation/reference queries
151 | VALID_FIELDS = {
152 | "contexts",
153 | "intents",
154 | "isInfluential",
155 | "title",
156 | "abstract",
157 | "authors",
158 | "year",
159 | "venue",
160 | "paperId",
161 | "url",
162 | "citationCount",
163 | "influentialCitationCount"
164 | }
165 |
166 | # Configuration
167 | class Config:
168 | # API Configuration
169 | API_VERSION = "v1"
170 | BASE_URL = f"https://api.semanticscholar.org/graph/{API_VERSION}"
171 | TIMEOUT = 30 # seconds
172 |
173 | # Request Limits
174 | MAX_BATCH_SIZE = 100
175 | MAX_RESULTS_PER_PAGE = 100
176 | DEFAULT_PAGE_SIZE = 10
177 | MAX_BATCHES = 5
178 |
179 | # Fields Configuration
180 | DEFAULT_FIELDS = PaperFields.DEFAULT
181 |
182 | # Feature Flags
183 | ENABLE_CACHING = False
184 | DEBUG_MODE = False
185 |
186 | # Search Configuration
187 | SEARCH_TYPES = {
188 | "comprehensive": {
189 | "description": "Balanced search considering relevance and impact",
190 | "min_citations": None,
191 | "ranking_strategy": "balanced"
192 | },
193 | "influential": {
194 | "description": "Focus on highly-cited and influential papers",
195 | "min_citations": 50,
196 | "ranking_strategy": "citations"
197 | },
198 | "latest": {
199 | "description": "Focus on recent papers with impact",
200 | "min_citations": None,
201 | "ranking_strategy": "recency"
202 | }
203 | }
204 |
205 | # Rate Limiter
206 | class RateLimiter:
207 | def __init__(self):
208 | self._last_call_time = {}
209 | self._locks = {}
210 |
211 | def _get_rate_limit(self, endpoint: str) -> Tuple[int, int]:
212 | if any(restricted in endpoint for restricted in RateLimitConfig.RESTRICTED_ENDPOINTS):
213 | return RateLimitConfig.SEARCH_LIMIT
214 | return RateLimitConfig.DEFAULT_LIMIT
215 |
216 | async def acquire(self, endpoint: str):
217 | if endpoint not in self._locks:
218 | self._locks[endpoint] = asyncio.Lock()
219 | self._last_call_time[endpoint] = 0
220 |
221 | async with self._locks[endpoint]:
222 | rate_limit = self._get_rate_limit(endpoint)
223 | current_time = time.time()
224 | time_since_last_call = current_time - self._last_call_time[endpoint]
225 |
226 | if time_since_last_call < rate_limit[1]:
227 | delay = rate_limit[1] - time_since_last_call
228 | await asyncio.sleep(delay)
229 |
230 | self._last_call_time[endpoint] = time.time()
231 |
232 | def create_error_response(
233 | error_type: ErrorType,
234 | message: str,
235 | details: Optional[Dict] = None
236 | ) -> Dict:
237 | return {
238 | "error": {
239 | "type": error_type.value,
240 | "message": message,
241 | "details": details or {}
242 | }
243 | }
244 |
245 | mcp = FastMCP("Semantic Scholar Server")
246 | rate_limiter = RateLimiter()
247 |
248 |
249 | # Basic functions
250 |
251 | def get_api_key() -> Optional[str]:
252 | """
253 | Get the Semantic Scholar API key from environment variables.
254 | Returns None if no API key is set, enabling unauthenticated access.
255 | """
256 | api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY")
257 | if not api_key:
258 | logger.warning("No SEMANTIC_SCHOLAR_API_KEY set. Using unauthenticated access with lower rate limits.")
259 | return api_key
260 |
261 | async def handle_exception(loop, context):
262 | """Global exception handler for the event loop."""
263 | msg = context.get("exception", context["message"])
264 | logger.error(f"Caught exception: {msg}")
265 | asyncio.create_task(shutdown())
266 |
267 | async def initialize_client():
268 | """Initialize the global HTTP client."""
269 | global http_client
270 | if http_client is None:
271 | http_client = httpx.AsyncClient(
272 | timeout=Config.TIMEOUT,
273 | limits=httpx.Limits(max_keepalive_connections=10)
274 | )
275 | return http_client
276 |
277 | async def cleanup_client():
278 | """Cleanup the global HTTP client."""
279 | global http_client
280 | if http_client is not None:
281 | await http_client.aclose()
282 | http_client = None
283 |
284 | async def make_request(endpoint: str, params: Dict = None) -> Dict:
285 | """Make a rate-limited request to the Semantic Scholar API."""
286 | try:
287 | # Apply rate limiting
288 | await rate_limiter.acquire(endpoint)
289 |
290 | # Get API key if available
291 | api_key = get_api_key()
292 | headers = {"x-api-key": api_key} if api_key else {}
293 | url = f"{Config.BASE_URL}{endpoint}"
294 |
295 | # Use global client
296 | client = await initialize_client()
297 | response = await client.get(url, params=params, headers=headers)
298 | response.raise_for_status()
299 | return response.json()
300 | except httpx.HTTPStatusError as e:
301 | logger.error(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}")
302 | if e.response.status_code == 429:
303 | return create_error_response(
304 | ErrorType.RATE_LIMIT,
305 | "Rate limit exceeded. Consider using an API key for higher limits.",
306 | {
307 | "retry_after": e.response.headers.get("retry-after"),
308 | "authenticated": bool(get_api_key())
309 | }
310 | )
311 | return create_error_response(
312 | ErrorType.API_ERROR,
313 | f"HTTP error: {e.response.status_code}",
314 | {"response": e.response.text}
315 | )
316 | except httpx.TimeoutException as e:
317 | logger.error(f"Request timeout for {endpoint}: {str(e)}")
318 | return create_error_response(
319 | ErrorType.TIMEOUT,
320 | f"Request timed out after {Config.TIMEOUT} seconds"
321 | )
322 | except Exception as e:
323 | logger.error(f"Unexpected error for {endpoint}: {str(e)}")
324 | return create_error_response(
325 | ErrorType.API_ERROR,
326 | str(e)
327 | )
328 |
329 |
330 |
331 |
332 | # 1. Paper Data Tools
333 |
334 | # 1.1 Paper relevance search
335 | @mcp.tool()
336 | async def paper_relevance_search(
337 | context: Context,
338 | query: str,
339 | fields: Optional[List[str]] = None,
340 | publication_types: Optional[List[str]] = None,
341 | open_access_pdf: bool = False,
342 | min_citation_count: Optional[int] = None,
343 | year: Optional[str] = None, # supports formats like "2019", "2016-2020", "2010-", "-2015"
344 | venue: Optional[List[str]] = None,
345 | fields_of_study: Optional[List[str]] = None,
346 | offset: int = 0,
347 | limit: int = Config.DEFAULT_PAGE_SIZE
348 | ) -> Dict:
349 | """
350 | Search for papers on Semantic Scholar using relevance-based ranking.
351 | This endpoint is optimized for finding the most relevant papers matching a text query.
352 | Results are sorted by relevance score.
353 |
354 | Args:
355 | query (str): A text query to search for. The query will be matched against paper titles,
356 | abstracts, venue names, and author names. All terms in the query must be present
357 | in the paper for it to be returned. The query is case-insensitive and matches word
358 | prefixes (e.g. "quantum" matches "quantum" and "quantumly").
359 |
360 | fields (Optional[List[str]]): List of fields to return for each paper.
361 | paperId and title are always returned.
362 | Available fields:
363 | - abstract: The paper's abstract
364 | - authors: List of authors with name and authorId
365 | - citationCount: Total number of citations
366 | - citations: List of papers citing this paper
367 | - corpusId: Internal ID for the paper
368 | - embedding: Vector embedding of the paper
369 | - externalIds: External IDs (DOI, MAG, etc)
370 | - fieldsOfStudy: List of fields of study
371 | - influentialCitationCount: Number of influential citations
372 | - isOpenAccess: Whether paper is open access
373 | - openAccessPdf: Open access PDF URL if available
374 | - paperId: Semantic Scholar paper ID
375 | - publicationDate: Publication date in YYYY-MM-DD format
376 | - publicationTypes: List of publication types
377 | - publicationVenue: Venue information
378 | - references: List of papers cited by this paper
379 | - s2FieldsOfStudy: Semantic Scholar fields
380 | - title: Paper title
381 | - tldr: AI-generated TLDR summary
382 | - url: URL to Semantic Scholar paper page
383 | - venue: Publication venue name
384 | - year: Publication year
385 |
386 | publication_types (Optional[List[str]]): Filter by publication types.
387 | Available types:
388 | - Review
389 | - JournalArticle
390 | - CaseReport
391 | - ClinicalTrial
392 | - Conference
393 | - Dataset
394 | - Editorial
395 | - LettersAndComments
396 | - MetaAnalysis
397 | - News
398 | - Study
399 | - Book
400 | - BookSection
401 |
402 | open_access_pdf (bool): If True, only include papers with a public PDF.
403 | Default: False
404 |
405 | min_citation_count (Optional[int]): Minimum number of citations required.
406 | Papers with fewer citations will be filtered out.
407 |
408 | year (Optional[str]): Filter by publication year. Supports several formats:
409 | - Single year: "2019"
410 | - Year range: "2016-2020"
411 | - Since year: "2010-"
412 | - Until year: "-2015"
413 |
414 | venue (Optional[List[str]]): Filter by publication venues.
415 | Accepts full venue names or ISO4 abbreviations.
416 | Examples: ["Nature", "Science", "N. Engl. J. Med."]
417 |
418 | fields_of_study (Optional[List[str]]): Filter by fields of study.
419 | Available fields:
420 | - Computer Science
421 | - Medicine
422 | - Chemistry
423 | - Biology
424 | - Materials Science
425 | - Physics
426 | - Geology
427 | - Psychology
428 | - Art
429 | - History
430 | - Geography
431 | - Sociology
432 | - Business
433 | - Political Science
434 | - Economics
435 | - Philosophy
436 | - Mathematics
437 | - Engineering
438 | - Environmental Science
439 | - Agricultural and Food Sciences
440 | - Education
441 | - Law
442 | - Linguistics
443 |
444 | offset (int): Number of results to skip for pagination.
445 | Default: 0
446 |
447 | limit (int): Maximum number of results to return.
448 | Default: 10
449 | Maximum: 100
450 |
451 | Returns:
452 | Dict: {
453 | "total": int, # Total number of papers matching the query
454 | "offset": int, # Current offset in the results
455 | "next": int, # Offset for the next page of results (if available)
456 | "data": List[Dict] # List of papers with requested fields
457 | }
458 |
459 | Notes:
460 | - Results are sorted by relevance to the query
461 | - All query terms must be present in the paper (AND operation)
462 | - Query matches are case-insensitive
463 | - Query matches word prefixes (e.g., "quantum" matches "quantum" and "quantumly")
464 | - Maximum of 100 results per request
465 | - Use offset parameter for pagination
466 | - Rate limits apply (see API documentation)
467 | """
468 | if not query.strip():
469 | return create_error_response(
470 | ErrorType.VALIDATION,
471 | "Query string cannot be empty"
472 | )
473 |
474 | # Validate and prepare fields
475 | if fields is None:
476 | fields = PaperFields.DEFAULT
477 | else:
478 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS
479 | if invalid_fields:
480 | return create_error_response(
481 | ErrorType.VALIDATION,
482 | f"Invalid fields: {', '.join(invalid_fields)}",
483 | {"valid_fields": list(PaperFields.VALID_FIELDS)}
484 | )
485 |
486 | # Validate and prepare parameters
487 | limit = min(limit, Config.MAX_RESULTS_PER_PAGE)
488 | params = {
489 | "query": query,
490 | "offset": offset,
491 | "limit": limit,
492 | "fields": ",".join(fields)
493 | }
494 |
495 | # Add optional filters
496 | if publication_types:
497 | params["publicationTypes"] = ",".join(publication_types)
498 | if open_access_pdf:
499 | params["openAccessPdf"] = "true"
500 | if min_citation_count is not None:
501 | params["minCitationCount"] = min_citation_count
502 | if year:
503 | params["year"] = year
504 | if venue:
505 | params["venue"] = ",".join(venue)
506 | if fields_of_study:
507 | params["fieldsOfStudy"] = ",".join(fields_of_study)
508 |
509 | return await make_request("/paper/search", params)
510 |
511 | # 1.2 Paper bulk search
512 | @mcp.tool()
513 | async def paper_bulk_search(
514 | context: Context,
515 | query: Optional[str] = None,
516 | token: Optional[str] = None,
517 | fields: Optional[List[str]] = None,
518 | sort: Optional[str] = None,
519 | publication_types: Optional[List[str]] = None,
520 | open_access_pdf: bool = False,
521 | min_citation_count: Optional[int] = None,
522 | publication_date_or_year: Optional[str] = None,
523 | year: Optional[str] = None,
524 | venue: Optional[List[str]] = None,
525 | fields_of_study: Optional[List[str]] = None
526 | ) -> Dict:
527 | """
528 | Bulk search for papers with advanced filtering and sorting options.
529 | Intended for retrieving large sets of papers efficiently.
530 |
531 | Args:
532 | query (Optional[str]): Text query to match against paper title and abstract.
533 | Supports boolean logic:
534 | - '+' for AND operation
535 | - '|' for OR operation
536 | - '-' to negate a term
537 | - '"' for phrase matching
538 | - '*' for prefix matching
539 | - '()' for precedence
540 | - '~N' for edit distance (default 2)
541 | Examples:
542 | - 'fish ladder' (contains both terms)
543 | - 'fish -ladder' (has fish, no ladder)
544 | - 'fish | ladder' (either term)
545 | - '"fish ladder"' (exact phrase)
546 | - '(fish ladder) | outflow'
547 | - 'fish~' (fuzzy match)
548 | - '"fish ladder"~3' (terms within 3 words)
549 |
550 | token (Optional[str]): Continuation token for pagination
551 |
552 | fields (Optional[List[str]]): Fields to return for each paper
553 | paperId is always returned
554 | Default: paperId and title only
555 |
556 | sort (Optional[str]): Sort order in format 'field:order'
557 | Fields: paperId, publicationDate, citationCount
558 | Order: asc (default), desc
559 | Default: 'paperId:asc'
560 | Examples:
561 | - 'publicationDate:asc' (oldest first)
562 | - 'citationCount:desc' (most cited first)
563 |
564 | publication_types (Optional[List[str]]): Filter by publication types:
565 | Review, JournalArticle, CaseReport, ClinicalTrial,
566 | Conference, Dataset, Editorial, LettersAndComments,
567 | MetaAnalysis, News, Study, Book, BookSection
568 |
569 | open_access_pdf (bool): Only include papers with public PDF
570 |
571 | min_citation_count (Optional[int]): Minimum citation threshold
572 |
573 | publication_date_or_year (Optional[str]): Date/year range filter
574 | Format: <startDate>:<endDate> in YYYY-MM-DD
575 | Supports partial dates and open ranges
576 | Examples:
577 | - '2019-03-05' (specific date)
578 | - '2019-03' (month)
579 | - '2019' (year)
580 | - '2016-03-05:2020-06-06' (range)
581 | - '1981-08-25:' (since date)
582 | - ':2015-01' (until date)
583 |
584 | year (Optional[str]): Publication year filter
585 | Examples: '2019', '2016-2020', '2010-', '-2015'
586 |
587 | venue (Optional[List[str]]): Filter by publication venues
588 | Accepts full names or ISO4 abbreviations
589 | Examples: ['Nature', 'N. Engl. J. Med.']
590 |
591 | fields_of_study (Optional[List[str]]): Filter by fields of study
592 | Available fields include: Computer Science, Medicine,
593 | Physics, Mathematics, etc.
594 |
595 | Returns:
596 | Dict: {
597 | 'total': int, # Total matching papers
598 | 'token': str, # Continuation token for next batch
599 | 'data': List[Dict] # Papers with requested fields
600 | }
601 |
602 | Notes:
603 | - Returns up to 1,000 papers per call
604 | - Can fetch up to 10M papers total
605 | - Nested data (citations, references) not available
606 | - For larger datasets, use the Datasets API
607 | """
608 | # Build request parameters
609 | params = {}
610 |
611 | # Add query if provided
612 | if query:
613 | params["query"] = query.strip()
614 |
615 | # Add continuation token if provided
616 | if token:
617 | params["token"] = token
618 |
619 | # Add fields if provided
620 | if fields:
621 | # Validate fields
622 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS
623 | if invalid_fields:
624 | return create_error_response(
625 | ErrorType.VALIDATION,
626 | f"Invalid fields: {', '.join(invalid_fields)}",
627 | {"valid_fields": list(PaperFields.VALID_FIELDS)}
628 | )
629 | params["fields"] = ",".join(fields)
630 |
631 | # Add sort if provided
632 | if sort:
633 | # Validate sort format
634 | valid_sort_fields = ["paperId", "publicationDate", "citationCount"]
635 | valid_sort_orders = ["asc", "desc"]
636 |
637 | try:
638 | field, order = sort.split(":")
639 | if field not in valid_sort_fields:
640 | return create_error_response(
641 | ErrorType.VALIDATION,
642 | f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields)}"
643 | )
644 | if order not in valid_sort_orders:
645 | return create_error_response(
646 | ErrorType.VALIDATION,
647 | f"Invalid sort order. Must be one of: {', '.join(valid_sort_orders)}"
648 | )
649 | params["sort"] = sort
650 | except ValueError:
651 | return create_error_response(
652 | ErrorType.VALIDATION,
653 | "Sort must be in format 'field:order'"
654 | )
655 |
656 | # Add publication types if provided
657 | if publication_types:
658 | valid_types = {
659 | "Review", "JournalArticle", "CaseReport", "ClinicalTrial",
660 | "Conference", "Dataset", "Editorial", "LettersAndComments",
661 | "MetaAnalysis", "News", "Study", "Book", "BookSection"
662 | }
663 | invalid_types = set(publication_types) - valid_types
664 | if invalid_types:
665 | return create_error_response(
666 | ErrorType.VALIDATION,
667 | f"Invalid publication types: {', '.join(invalid_types)}",
668 | {"valid_types": list(valid_types)}
669 | )
670 | params["publicationTypes"] = ",".join(publication_types)
671 |
672 | # Add open access PDF filter
673 | if open_access_pdf:
674 | params["openAccessPdf"] = "true"
675 |
676 | # Add minimum citation count if provided
677 | if min_citation_count is not None:
678 | if min_citation_count < 0:
679 | return create_error_response(
680 | ErrorType.VALIDATION,
681 | "Minimum citation count cannot be negative"
682 | )
683 | params["minCitationCount"] = str(min_citation_count)
684 |
685 | # Add publication date/year if provided
686 | if publication_date_or_year:
687 | params["publicationDateOrYear"] = publication_date_or_year
688 | elif year:
689 | params["year"] = year
690 |
691 | # Add venue filter if provided
692 | if venue:
693 | params["venue"] = ",".join(venue)
694 |
695 | # Add fields of study filter if provided
696 | if fields_of_study:
697 | valid_fields = {
698 | "Computer Science", "Medicine", "Chemistry", "Biology",
699 | "Materials Science", "Physics", "Geology", "Psychology",
700 | "Art", "History", "Geography", "Sociology", "Business",
701 | "Political Science", "Economics", "Philosophy", "Mathematics",
702 | "Engineering", "Environmental Science", "Agricultural and Food Sciences",
703 | "Education", "Law", "Linguistics"
704 | }
705 | invalid_fields = set(fields_of_study) - valid_fields
706 | if invalid_fields:
707 | return create_error_response(
708 | ErrorType.VALIDATION,
709 | f"Invalid fields of study: {', '.join(invalid_fields)}",
710 | {"valid_fields": list(valid_fields)}
711 | )
712 | params["fieldsOfStudy"] = ",".join(fields_of_study)
713 |
714 | # Make the API request
715 | result = await make_request("/paper/search/bulk", params)
716 |
717 | # Handle potential errors
718 | if isinstance(result, Dict) and "error" in result:
719 | return result
720 |
721 | return result
722 |
723 | # 1.3 Paper title search
724 | @mcp.tool()
725 | async def paper_title_search(
726 | context: Context,
727 | query: str,
728 | fields: Optional[List[str]] = None,
729 | publication_types: Optional[List[str]] = None,
730 | open_access_pdf: bool = False,
731 | min_citation_count: Optional[int] = None,
732 | year: Optional[str] = None,
733 | venue: Optional[List[str]] = None,
734 | fields_of_study: Optional[List[str]] = None
735 | ) -> Dict:
736 | """
737 | Find a single paper by title match. This endpoint is optimized for finding a specific paper
738 | by its title and returns the best matching paper based on title similarity.
739 |
740 | Args:
741 | query (str): The title text to search for. The query will be matched against paper titles
742 | to find the closest match. The match is case-insensitive and ignores punctuation.
743 |
744 | fields (Optional[List[str]]): List of fields to return for the paper.
745 | paperId and title are always returned.
746 | Available fields:
747 | - abstract: The paper's abstract
748 | - authors: List of authors with name and authorId
749 | - citationCount: Total number of citations
750 | - citations: List of papers citing this paper
751 | - corpusId: Internal ID for the paper
752 | - embedding: Vector embedding of the paper
753 | - externalIds: External IDs (DOI, MAG, etc)
754 | - fieldsOfStudy: List of fields of study
755 | - influentialCitationCount: Number of influential citations
756 | - isOpenAccess: Whether paper is open access
757 | - openAccessPdf: Open access PDF URL if available
758 | - paperId: Semantic Scholar paper ID
759 | - publicationDate: Publication date in YYYY-MM-DD format
760 | - publicationTypes: List of publication types
761 | - publicationVenue: Venue information
762 | - references: List of papers cited by this paper
763 | - s2FieldsOfStudy: Semantic Scholar fields
764 | - title: Paper title
765 | - tldr: AI-generated TLDR summary
766 | - url: URL to Semantic Scholar paper page
767 | - venue: Publication venue name
768 | - year: Publication year
769 |
770 | publication_types (Optional[List[str]]): Filter by publication types.
771 | Available types:
772 | - Review
773 | - JournalArticle
774 | - CaseReport
775 | - ClinicalTrial
776 | - Conference
777 | - Dataset
778 | - Editorial
779 | - LettersAndComments
780 | - MetaAnalysis
781 | - News
782 | - Study
783 | - Book
784 | - BookSection
785 |
786 | open_access_pdf (bool): If True, only include papers with a public PDF.
787 | Default: False
788 |
789 | min_citation_count (Optional[int]): Minimum number of citations required.
790 | Papers with fewer citations will be filtered out.
791 |
792 | year (Optional[str]): Filter by publication year. Supports several formats:
793 | - Single year: "2019"
794 | - Year range: "2016-2020"
795 | - Since year: "2010-"
796 | - Until year: "-2015"
797 |
798 | venue (Optional[List[str]]): Filter by publication venues.
799 | Accepts full venue names or ISO4 abbreviations.
800 | Examples: ["Nature", "Science", "N. Engl. J. Med."]
801 |
802 | fields_of_study (Optional[List[str]]): Filter by fields of study.
803 | Available fields:
804 | - Computer Science
805 | - Medicine
806 | - Chemistry
807 | - Biology
808 | - Materials Science
809 | - Physics
810 | - Geology
811 | - Psychology
812 | - Art
813 | - History
814 | - Geography
815 | - Sociology
816 | - Business
817 | - Political Science
818 | - Economics
819 | - Philosophy
820 | - Mathematics
821 | - Engineering
822 | - Environmental Science
823 | - Agricultural and Food Sciences
824 | - Education
825 | - Law
826 | - Linguistics
827 |
828 | Returns:
829 | Dict: {
830 | "paperId": str, # Semantic Scholar Paper ID
831 | "title": str, # Paper title
832 | "matchScore": float, # Similarity score between query and matched title
833 | ... # Additional requested fields
834 | }
835 |
836 | Returns error response if no matching paper is found.
837 |
838 | Notes:
839 | - Returns the single best matching paper based on title similarity
840 | - Match score indicates how well the title matches the query
841 | - Case-insensitive matching
842 | - Ignores punctuation in matching
843 | - Filters are applied after finding the best title match
844 | """
845 | if not query.strip():
846 | return create_error_response(
847 | ErrorType.VALIDATION,
848 | "Query string cannot be empty"
849 | )
850 |
851 | # Validate and prepare fields
852 | if fields is None:
853 | fields = PaperFields.DEFAULT
854 | else:
855 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS
856 | if invalid_fields:
857 | return create_error_response(
858 | ErrorType.VALIDATION,
859 | f"Invalid fields: {', '.join(invalid_fields)}",
860 | {"valid_fields": list(PaperFields.VALID_FIELDS)}
861 | )
862 |
863 | # Build base parameters
864 | params = {"query": query}
865 |
866 | # Add optional parameters
867 | if fields:
868 | params["fields"] = ",".join(fields)
869 | if publication_types:
870 | params["publicationTypes"] = ",".join(publication_types)
871 | if open_access_pdf:
872 | params["openAccessPdf"] = "true"
873 | if min_citation_count is not None:
874 | params["minCitationCount"] = str(min_citation_count)
875 | if year:
876 | params["year"] = year
877 | if venue:
878 | params["venue"] = ",".join(venue)
879 | if fields_of_study:
880 | params["fieldsOfStudy"] = ",".join(fields_of_study)
881 |
882 | result = await make_request("/paper/search/match", params)
883 |
884 | # Handle specific error cases
885 | if isinstance(result, Dict):
886 | if "error" in result:
887 | error_msg = result["error"].get("message", "")
888 | if "404" in error_msg:
889 | return create_error_response(
890 | ErrorType.VALIDATION,
891 | "No matching paper found",
892 | {"original_query": query}
893 | )
894 | return result
895 |
896 | return result
897 |
898 | # 1.4 Details about a paper
899 | @mcp.tool()
900 | async def paper_details(
901 | context: Context,
902 | paper_id: str,
903 | fields: Optional[List[str]] = None
904 | ) -> Dict:
905 | """
906 | Get details about a paper using various types of identifiers.
907 | This endpoint provides comprehensive metadata about a paper.
908 |
909 | Args:
910 | paper_id (str): Paper identifier in one of the following formats:
911 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
912 | - CorpusId:<id> (e.g., "CorpusId:215416146")
913 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
914 | - ARXIV:<id> (e.g., "ARXIV:2106.15928")
915 | - MAG:<id> (e.g., "MAG:112218234")
916 | - ACL:<id> (e.g., "ACL:W12-3903")
917 | - PMID:<id> (e.g., "PMID:19872477")
918 | - PMCID:<id> (e.g., "PMCID:2323736")
919 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
920 | Supported URLs from: semanticscholar.org, arxiv.org, aclweb.org,
921 | acm.org, biorxiv.org
922 |
923 | fields (Optional[List[str]]): List of fields to return.
924 | paperId is always returned.
925 | Available fields:
926 | - abstract: The paper's abstract
927 | - authors: List of authors with name and authorId
928 | - citationCount: Total number of citations
929 | - citations: List of papers citing this paper
930 | - corpusId: Internal ID for the paper
931 | - embedding: Vector embedding of the paper
932 | - externalIds: External IDs (DOI, MAG, etc)
933 | - fieldsOfStudy: List of fields of study
934 | - influentialCitationCount: Number of influential citations
935 | - isOpenAccess: Whether paper is open access
936 | - openAccessPdf: Open access PDF URL if available
937 | - paperId: Semantic Scholar paper ID
938 | - publicationDate: Publication date in YYYY-MM-DD format
939 | - publicationTypes: List of publication types
940 | - publicationVenue: Venue information
941 | - references: List of papers cited by this paper
942 | - s2FieldsOfStudy: Semantic Scholar fields
943 | - title: Paper title
944 | - tldr: AI-generated TLDR summary
945 | - url: URL to Semantic Scholar paper page
946 | - venue: Publication venue name
947 | - year: Publication year
948 |
949 | Special syntax for nested fields:
950 | - For citations/references: citations.title, references.abstract, etc.
951 | - For authors: authors.name, authors.affiliations, etc.
952 | - For embeddings: embedding.specter_v2 for v2 embeddings
953 |
954 | If omitted, returns only paperId and title.
955 |
956 | Returns:
957 | Dict: Paper details with requested fields.
958 | Always includes paperId.
959 | Returns error response if paper not found.
960 |
961 | Notes:
962 | - Supports multiple identifier types for flexibility
963 | - Nested fields available for detailed citation/reference/author data
964 | - Rate limits apply (see API documentation)
965 | - Some fields may be null if data is not available
966 | """
967 | if not paper_id.strip():
968 | return create_error_response(
969 | ErrorType.VALIDATION,
970 | "Paper ID cannot be empty"
971 | )
972 |
973 | # Build request parameters
974 | params = {}
975 | if fields:
976 | params["fields"] = ",".join(fields)
977 |
978 | # Make the API request
979 | result = await make_request(f"/paper/{paper_id}", params)
980 |
981 | # Handle potential errors
982 | if isinstance(result, Dict) and "error" in result:
983 | error_msg = result["error"].get("message", "")
984 | if "404" in error_msg:
985 | return create_error_response(
986 | ErrorType.VALIDATION,
987 | "Paper not found",
988 | {"paper_id": paper_id}
989 | )
990 | return result
991 |
992 | return result
993 |
994 | # 1.5 Get details for multiple papers at once
995 | @mcp.tool()
996 | async def paper_batch_details(
997 | context: Context,
998 | paper_ids: List[str],
999 | fields: Optional[str] = None
1000 | ) -> Dict:
1001 | """
1002 | Get details for multiple papers in a single batch request.
1003 | This endpoint is optimized for efficiently retrieving details about known papers.
1004 |
1005 | Args:
1006 | paper_ids (List[str]): List of paper identifiers. Each ID can be in any of these formats:
1007 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1008 | - CorpusId:<id> (e.g., "CorpusId:215416146")
1009 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1010 | - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1011 | - MAG:<id> (e.g., "MAG:112218234")
1012 | - ACL:<id> (e.g., "ACL:W12-3903")
1013 | - PMID:<id> (e.g., "PMID:19872477")
1014 | - PMCID:<id> (e.g., "PMCID:2323736")
1015 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1016 | Supported URLs from: semanticscholar.org, arxiv.org, aclweb.org,
1017 | acm.org, biorxiv.org
1018 | Maximum: 500 IDs per request
1019 |
1020 | fields (Optional[str]): Comma-separated list of fields to return for each paper.
1021 | paperId is always returned.
1022 | Available fields:
1023 | - abstract: The paper's abstract
1024 | - authors: List of authors with name and authorId
1025 | - citationCount: Total number of citations
1026 | - citations: List of papers citing this paper
1027 | - corpusId: Internal ID for the paper
1028 | - embedding: Vector embedding of the paper
1029 | - externalIds: External IDs (DOI, MAG, etc)
1030 | - fieldsOfStudy: List of fields of study
1031 | - influentialCitationCount: Number of influential citations
1032 | - isOpenAccess: Whether paper is open access
1033 | - openAccessPdf: Open access PDF URL if available
1034 | - paperId: Semantic Scholar paper ID
1035 | - publicationDate: Publication date in YYYY-MM-DD format
1036 | - publicationTypes: List of publication types
1037 | - publicationVenue: Venue information
1038 | - references: List of papers cited by this paper
1039 | - s2FieldsOfStudy: Semantic Scholar fields
1040 | - title: Paper title
1041 | - tldr: AI-generated TLDR summary
1042 | - url: URL to Semantic Scholar paper page
1043 | - venue: Publication venue name
1044 | - year: Publication year
1045 |
1046 | Special syntax for nested fields:
1047 | - For citations/references: citations.title, references.abstract, etc.
1048 | - For authors: authors.name, authors.affiliations, etc.
1049 | - For embeddings: embedding.specter_v2 for v2 embeddings
1050 |
1051 | If omitted, returns only paperId and title.
1052 |
1053 | Returns:
1054 | List[Dict]: List of paper details with requested fields.
1055 | - Results maintain the same order as input paper_ids
1056 | - Invalid or not found paper IDs return null in the results
1057 | - Each paper object contains the requested fields
1058 | - paperId is always included in each paper object
1059 |
1060 | Notes:
1061 | - More efficient than making multiple single-paper requests
1062 | - Maximum of 500 paper IDs per request
1063 | - Rate limits apply (see API documentation)
1064 | - Some fields may be null if data is not available
1065 | - Invalid paper IDs return null instead of causing an error
1066 | - Order of results matches order of input IDs for easy mapping
1067 | """
1068 | # Validate inputs
1069 | if not paper_ids:
1070 | return create_error_response(
1071 | ErrorType.VALIDATION,
1072 | "Paper IDs list cannot be empty"
1073 | )
1074 |
1075 | if len(paper_ids) > 500:
1076 | return create_error_response(
1077 | ErrorType.VALIDATION,
1078 | "Cannot process more than 500 paper IDs at once",
1079 | {"max_papers": 500, "received": len(paper_ids)}
1080 | )
1081 |
1082 | # Validate fields if provided
1083 | if fields:
1084 | field_list = fields.split(",")
1085 | invalid_fields = set(field_list) - PaperFields.VALID_FIELDS
1086 | if invalid_fields:
1087 | return create_error_response(
1088 | ErrorType.VALIDATION,
1089 | f"Invalid fields: {', '.join(invalid_fields)}",
1090 | {"valid_fields": list(PaperFields.VALID_FIELDS)}
1091 | )
1092 |
1093 | # Build request parameters
1094 | params = {}
1095 | if fields:
1096 | params["fields"] = fields
1097 |
1098 | # Make POST request with proper structure
1099 | try:
1100 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
1101 | api_key = get_api_key()
1102 | headers = {"x-api-key": api_key} if api_key else {}
1103 |
1104 | response = await client.post(
1105 | f"{Config.BASE_URL}/paper/batch",
1106 | params=params,
1107 | json={"ids": paper_ids},
1108 | headers=headers
1109 | )
1110 | response.raise_for_status()
1111 | return response.json()
1112 |
1113 | except httpx.HTTPStatusError as e:
1114 | if e.response.status_code == 429:
1115 | return create_error_response(
1116 | ErrorType.RATE_LIMIT,
1117 | "Rate limit exceeded",
1118 | {"retry_after": e.response.headers.get("retry-after")}
1119 | )
1120 | return create_error_response(
1121 | ErrorType.API_ERROR,
1122 | f"HTTP error: {e.response.status_code}",
1123 | {"response": e.response.text}
1124 | )
1125 | except httpx.TimeoutException:
1126 | return create_error_response(
1127 | ErrorType.TIMEOUT,
1128 | f"Request timed out after {Config.TIMEOUT} seconds"
1129 | )
1130 | except Exception as e:
1131 | return create_error_response(
1132 | ErrorType.API_ERROR,
1133 | str(e)
1134 | )
1135 |
1136 | # 1.6 Details about a paper's authors
1137 | @mcp.tool()
1138 | async def paper_authors(
1139 | context: Context,
1140 | paper_id: str,
1141 | fields: Optional[List[str]] = None,
1142 | offset: int = 0,
1143 | limit: int = 100
1144 | ) -> Dict:
1145 | """
1146 | Get details about the authors of a paper with pagination support.
1147 | This endpoint provides author information and their contributions.
1148 |
1149 | Args:
1150 | paper_id (str): Paper identifier in one of the following formats:
1151 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1152 | - CorpusId:<id> (e.g., "CorpusId:215416146")
1153 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1154 | - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1155 | - MAG:<id> (e.g., "MAG:112218234")
1156 | - ACL:<id> (e.g., "ACL:W12-3903")
1157 | - PMID:<id> (e.g., "PMID:19872477")
1158 | - PMCID:<id> (e.g., "PMCID:2323736")
1159 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1160 |
1161 | fields (Optional[List[str]]): List of fields to return for each author.
1162 | authorId is always returned.
1163 | Available fields:
1164 | - name: Author's name
1165 | - aliases: Alternative names for the author
1166 | - affiliations: List of author's affiliations
1167 | - homepage: Author's homepage URL
1168 | - paperCount: Total number of papers by this author
1169 | - citationCount: Total citations received by this author
1170 | - hIndex: Author's h-index
1171 | - papers: List of papers by this author (returns paperId and title)
1172 |
1173 | Special syntax for paper fields:
1174 | - papers.year: Include year for each paper
1175 | - papers.authors: Include authors for each paper
1176 | - papers.abstract: Include abstract for each paper
1177 | - papers.venue: Include venue for each paper
1178 | - papers.citations: Include citation count for each paper
1179 |
1180 | If omitted, returns only authorId and name.
1181 |
1182 | offset (int): Number of authors to skip for pagination.
1183 | Default: 0
1184 |
1185 | limit (int): Maximum number of authors to return.
1186 | Default: 100
1187 | Maximum: 1000
1188 |
1189 | Returns:
1190 | Dict: {
1191 | "offset": int, # Current offset in the results
1192 | "next": int, # Next offset (if more results available)
1193 | "data": List[Dict] # List of authors with requested fields
1194 | }
1195 |
1196 | Notes:
1197 | - Authors are returned in the order they appear on the paper
1198 | - Supports pagination for papers with many authors
1199 | - Some fields may be null if data is not available
1200 | - Rate limits apply (see API documentation)
1201 | """
1202 | if not paper_id.strip():
1203 | return create_error_response(
1204 | ErrorType.VALIDATION,
1205 | "Paper ID cannot be empty"
1206 | )
1207 |
1208 | # Validate limit
1209 | if limit > 1000:
1210 | return create_error_response(
1211 | ErrorType.VALIDATION,
1212 | "Limit cannot exceed 1000",
1213 | {"max_limit": 1000}
1214 | )
1215 |
1216 | # Validate fields
1217 | if fields:
1218 | invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS
1219 | if invalid_fields:
1220 | return create_error_response(
1221 | ErrorType.VALIDATION,
1222 | f"Invalid fields: {', '.join(invalid_fields)}",
1223 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
1224 | )
1225 |
1226 | # Build request parameters
1227 | params = {
1228 | "offset": offset,
1229 | "limit": limit
1230 | }
1231 | if fields:
1232 | params["fields"] = ",".join(fields)
1233 |
1234 | # Make the API request
1235 | result = await make_request(f"/paper/{paper_id}/authors", params)
1236 |
1237 | # Handle potential errors
1238 | if isinstance(result, Dict) and "error" in result:
1239 | error_msg = result["error"].get("message", "")
1240 | if "404" in error_msg:
1241 | return create_error_response(
1242 | ErrorType.VALIDATION,
1243 | "Paper not found",
1244 | {"paper_id": paper_id}
1245 | )
1246 | return result
1247 |
1248 | return result
1249 |
1250 | # 1.7 Details about a paper's citations
1251 | @mcp.tool()
1252 | async def paper_citations(
1253 | context: Context,
1254 | paper_id: str,
1255 | fields: Optional[List[str]] = None,
1256 | offset: int = 0,
1257 | limit: int = 100
1258 | ) -> Dict:
1259 | """
1260 | Get papers that cite the specified paper (papers where this paper appears in their bibliography).
1261 | This endpoint provides detailed citation information including citation contexts.
1262 |
1263 | Args:
1264 | paper_id (str): Paper identifier in one of the following formats:
1265 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1266 | - CorpusId:<id> (e.g., "CorpusId:215416146")
1267 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1268 | - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1269 | - MAG:<id> (e.g., "MAG:112218234")
1270 | - ACL:<id> (e.g., "ACL:W12-3903")
1271 | - PMID:<id> (e.g., "PMID:19872477")
1272 | - PMCID:<id> (e.g., "PMCID:2323736")
1273 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1274 |
1275 | fields (Optional[List[str]]): List of fields to return for each citing paper.
1276 | paperId is always returned.
1277 | Available fields:
1278 | - title: Paper title
1279 | - abstract: Paper abstract
1280 | - year: Publication year
1281 | - venue: Publication venue
1282 | - authors: List of authors
1283 | - url: URL to paper page
1284 | - citationCount: Number of citations received
1285 | - influentialCitationCount: Number of influential citations
1286 |
1287 | Citation-specific fields:
1288 | - contexts: List of citation contexts (text snippets)
1289 | - intents: List of citation intents (Background, Method, etc.)
1290 | - isInfluential: Whether this is an influential citation
1291 |
1292 | If omitted, returns only paperId and title.
1293 |
1294 | offset (int): Number of citations to skip for pagination.
1295 | Default: 0
1296 |
1297 | limit (int): Maximum number of citations to return.
1298 | Default: 100
1299 | Maximum: 1000
1300 |
1301 | Returns:
1302 | Dict: {
1303 | "offset": int, # Current offset in the results
1304 | "next": int, # Next offset (if more results available)
1305 | "data": List[Dict] # List of citing papers with requested fields
1306 | }
1307 |
1308 | Notes:
1309 | - Citations are sorted by citation date (newest first)
1310 | - Includes citation context when available
1311 | - Supports pagination for highly-cited papers
1312 | - Some fields may be null if data is not available
1313 | - Rate limits apply (see API documentation)
1314 | """
1315 | if not paper_id.strip():
1316 | return create_error_response(
1317 | ErrorType.VALIDATION,
1318 | "Paper ID cannot be empty"
1319 | )
1320 |
1321 | # Validate limit
1322 | if limit > 1000:
1323 | return create_error_response(
1324 | ErrorType.VALIDATION,
1325 | "Limit cannot exceed 1000",
1326 | {"max_limit": 1000}
1327 | )
1328 |
1329 | # Validate fields
1330 | if fields:
1331 | invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS
1332 | if invalid_fields:
1333 | return create_error_response(
1334 | ErrorType.VALIDATION,
1335 | f"Invalid fields: {', '.join(invalid_fields)}",
1336 | {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)}
1337 | )
1338 |
1339 | # Build request parameters
1340 | params = {
1341 | "offset": offset,
1342 | "limit": limit
1343 | }
1344 | if fields:
1345 | params["fields"] = ",".join(fields)
1346 |
1347 | # Make the API request
1348 | result = await make_request(f"/paper/{paper_id}/citations", params)
1349 |
1350 | # Handle potential errors
1351 | if isinstance(result, Dict) and "error" in result:
1352 | error_msg = result["error"].get("message", "")
1353 | if "404" in error_msg:
1354 | return create_error_response(
1355 | ErrorType.VALIDATION,
1356 | "Paper not found",
1357 | {"paper_id": paper_id}
1358 | )
1359 | return result
1360 |
1361 | return result
1362 |
1363 | # 1.8 Details about a paper's references
1364 | @mcp.tool()
1365 | async def paper_references(
1366 | context: Context,
1367 | paper_id: str,
1368 | fields: Optional[List[str]] = None,
1369 | offset: int = 0,
1370 | limit: int = 100
1371 | ) -> Dict:
1372 | """
1373 | Get papers cited by the specified paper (papers appearing in this paper's bibliography).
1374 | This endpoint provides detailed reference information including citation contexts.
1375 |
1376 | Args:
1377 | paper_id (str): Paper identifier in one of the following formats:
1378 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1379 | - CorpusId:<id> (e.g., "CorpusId:215416146")
1380 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1381 | - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1382 | - MAG:<id> (e.g., "MAG:112218234")
1383 | - ACL:<id> (e.g., "ACL:W12-3903")
1384 | - PMID:<id> (e.g., "PMID:19872477")
1385 | - PMCID:<id> (e.g., "PMCID:2323736")
1386 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1387 |
1388 | fields (Optional[List[str]]): List of fields to return for each referenced paper.
1389 | paperId is always returned.
1390 | Available fields:
1391 | - title: Paper title
1392 | - abstract: Paper abstract
1393 | - year: Publication year
1394 | - venue: Publication venue
1395 | - authors: List of authors
1396 | - url: URL to paper page
1397 | - citationCount: Number of citations received
1398 | - influentialCitationCount: Number of influential citations
1399 |
1400 | Reference-specific fields:
1401 | - contexts: List of citation contexts (text snippets)
1402 | - intents: List of citation intents (Background, Method, etc.)
1403 | - isInfluential: Whether this is an influential citation
1404 |
1405 | If omitted, returns only paperId and title.
1406 |
1407 | offset (int): Number of references to skip for pagination.
1408 | Default: 0
1409 |
1410 | limit (int): Maximum number of references to return.
1411 | Default: 100
1412 | Maximum: 1000
1413 |
1414 | Returns:
1415 | Dict: {
1416 | "offset": int, # Current offset in the results
1417 | "next": int, # Next offset (if more results available)
1418 | "data": List[Dict] # List of referenced papers with requested fields
1419 | }
1420 |
1421 | Notes:
1422 | - References are returned in the order they appear in the bibliography
1423 | - Includes citation context when available
1424 | - Supports pagination for papers with many references
1425 | - Some fields may be null if data is not available
1426 | - Rate limits apply (see API documentation)
1427 | """
1428 | if not paper_id.strip():
1429 | return create_error_response(
1430 | ErrorType.VALIDATION,
1431 | "Paper ID cannot be empty"
1432 | )
1433 |
1434 | # Validate limit
1435 | if limit > 1000:
1436 | return create_error_response(
1437 | ErrorType.VALIDATION,
1438 | "Limit cannot exceed 1000",
1439 | {"max_limit": 1000}
1440 | )
1441 |
1442 | # Validate fields
1443 | if fields:
1444 | invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS
1445 | if invalid_fields:
1446 | return create_error_response(
1447 | ErrorType.VALIDATION,
1448 | f"Invalid fields: {', '.join(invalid_fields)}",
1449 | {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)}
1450 | )
1451 |
1452 | # Build request parameters
1453 | params = {
1454 | "offset": offset,
1455 | "limit": limit
1456 | }
1457 | if fields:
1458 | params["fields"] = ",".join(fields)
1459 |
1460 | # Make the API request
1461 | result = await make_request(f"/paper/{paper_id}/references", params)
1462 |
1463 | # Handle potential errors
1464 | if isinstance(result, Dict) and "error" in result:
1465 | error_msg = result["error"].get("message", "")
1466 | if "404" in error_msg:
1467 | return create_error_response(
1468 | ErrorType.VALIDATION,
1469 | "Paper not found",
1470 | {"paper_id": paper_id}
1471 | )
1472 | return result
1473 |
1474 | return result
1475 |
1476 |
1477 |
1478 | # 2. Author Data Tools
1479 |
1480 | # 2.1 Search for authors by name
1481 | @mcp.tool()
1482 | async def author_search(
1483 | context: Context,
1484 | query: str,
1485 | fields: Optional[List[str]] = None,
1486 | offset: int = 0,
1487 | limit: int = 100
1488 | ) -> Dict:
1489 | """
1490 | Search for authors by name on Semantic Scholar.
1491 | This endpoint is optimized for finding authors based on their name.
1492 | Results are sorted by relevance to the query.
1493 |
1494 | Args:
1495 | query (str): The name text to search for. The query will be matched against author names
1496 | and their known aliases. The match is case-insensitive and matches name prefixes.
1497 | Examples:
1498 | - "Albert Einstein"
1499 | - "Einstein, Albert"
1500 | - "A Einstein"
1501 |
1502 | fields (Optional[List[str]]): List of fields to return for each author.
1503 | authorId is always returned.
1504 | Available fields:
1505 | - name: Author's name
1506 | - aliases: Alternative names for the author
1507 | - url: URL to author's S2 profile
1508 | - affiliations: List of author's affiliations
1509 | - homepage: Author's homepage URL
1510 | - paperCount: Total number of papers by this author
1511 | - citationCount: Total citations received by this author
1512 | - hIndex: Author's h-index
1513 | - papers: List of papers by this author (returns paperId and title)
1514 |
1515 | Special syntax for paper fields:
1516 | - papers.year: Include year for each paper
1517 | - papers.authors: Include authors for each paper
1518 | - papers.abstract: Include abstract for each paper
1519 | - papers.venue: Include venue for each paper
1520 | - papers.citations: Include citation count for each paper
1521 |
1522 | If omitted, returns only authorId and name.
1523 |
1524 | offset (int): Number of authors to skip for pagination.
1525 | Default: 0
1526 |
1527 | limit (int): Maximum number of authors to return.
1528 | Default: 100
1529 | Maximum: 1000
1530 |
1531 | Returns:
1532 | Dict: {
1533 | "total": int, # Total number of authors matching the query
1534 | "offset": int, # Current offset in the results
1535 | "next": int, # Next offset (if more results available)
1536 | "data": List[Dict] # List of authors with requested fields
1537 | }
1538 |
1539 | Notes:
1540 | - Results are sorted by relevance to the query
1541 | - Matches against author names and aliases
1542 | - Case-insensitive matching
1543 | - Matches name prefixes
1544 | - Supports pagination for large result sets
1545 | - Some fields may be null if data is not available
1546 | - Rate limits apply (see API documentation)
1547 | """
1548 | if not query.strip():
1549 | return create_error_response(
1550 | ErrorType.VALIDATION,
1551 | "Query string cannot be empty"
1552 | )
1553 |
1554 | # Validate limit
1555 | if limit > 1000:
1556 | return create_error_response(
1557 | ErrorType.VALIDATION,
1558 | "Limit cannot exceed 1000",
1559 | {"max_limit": 1000}
1560 | )
1561 |
1562 | # Validate fields
1563 | if fields:
1564 | invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS
1565 | if invalid_fields:
1566 | return create_error_response(
1567 | ErrorType.VALIDATION,
1568 | f"Invalid fields: {', '.join(invalid_fields)}",
1569 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
1570 | )
1571 |
1572 | # Build request parameters
1573 | params = {
1574 | "query": query,
1575 | "offset": offset,
1576 | "limit": limit
1577 | }
1578 | if fields:
1579 | params["fields"] = ",".join(fields)
1580 |
1581 | # Make the API request
1582 | return await make_request("/author/search", params)
1583 |
1584 | # 2.2 Details about an author
1585 | @mcp.tool()
1586 | async def author_details(
1587 | context: Context,
1588 | author_id: str,
1589 | fields: Optional[List[str]] = None
1590 | ) -> Dict:
1591 | """
1592 | Get detailed information about an author by their ID.
1593 | This endpoint provides comprehensive metadata about an author.
1594 |
1595 | Args:
1596 | author_id (str): Semantic Scholar author ID.
1597 | This is a unique identifier assigned by Semantic Scholar.
1598 | Example: "1741101" (Albert Einstein)
1599 |
1600 | fields (Optional[List[str]]): List of fields to return.
1601 | authorId is always returned.
1602 | Available fields:
1603 | - name: Author's name
1604 | - aliases: Alternative names for the author
1605 | - url: URL to author's S2 profile
1606 | - affiliations: List of author's affiliations
1607 | - homepage: Author's homepage URL
1608 | - paperCount: Total number of papers by this author
1609 | - citationCount: Total citations received by this author
1610 | - hIndex: Author's h-index
1611 | - papers: List of papers by this author (returns paperId and title)
1612 |
1613 | Special syntax for paper fields:
1614 | - papers.year: Include year for each paper
1615 | - papers.authors: Include authors for each paper
1616 | - papers.abstract: Include abstract for each paper
1617 | - papers.venue: Include venue for each paper
1618 | - papers.citations: Include citation count for each paper
1619 |
1620 | If omitted, returns only authorId and name.
1621 |
1622 | Returns:
1623 | Dict: Author details with requested fields.
1624 | Always includes authorId.
1625 | Returns error response if author not found.
1626 |
1627 | Notes:
1628 | - Provides comprehensive author metadata
1629 | - Papers list is limited to most recent papers
1630 | - For complete paper list, use author_papers endpoint
1631 | - Some fields may be null if data is not available
1632 | - Rate limits apply (see API documentation)
1633 | """
1634 | if not author_id.strip():
1635 | return create_error_response(
1636 | ErrorType.VALIDATION,
1637 | "Author ID cannot be empty"
1638 | )
1639 |
1640 | # Validate fields
1641 | if fields:
1642 | invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS
1643 | if invalid_fields:
1644 | return create_error_response(
1645 | ErrorType.VALIDATION,
1646 | f"Invalid fields: {', '.join(invalid_fields)}",
1647 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
1648 | )
1649 |
1650 | # Build request parameters
1651 | params = {}
1652 | if fields:
1653 | params["fields"] = ",".join(fields)
1654 |
1655 | # Make the API request
1656 | result = await make_request(f"/author/{author_id}", params)
1657 |
1658 | if isinstance(result, Dict) and "error" in result:
1659 | error_msg = result["error"].get("message", "")
1660 | if "404" in error_msg:
1661 | return create_error_response(
1662 | ErrorType.VALIDATION,
1663 | "Author not found",
1664 | {"author_id": author_id}
1665 | )
1666 | return result
1667 |
1668 | return result
1669 |
1670 | # 2.3 Details about an author's papers
1671 | @mcp.tool()
1672 | async def author_papers(
1673 | context: Context,
1674 | author_id: str,
1675 | fields: Optional[List[str]] = None,
1676 | offset: int = 0,
1677 | limit: int = 100
1678 | ) -> Dict:
1679 | """
1680 | Get papers written by an author with pagination support.
1681 | This endpoint provides detailed information about an author's publications.
1682 |
1683 | Args:
1684 | author_id (str): Semantic Scholar author ID.
1685 | This is a unique identifier assigned by Semantic Scholar.
1686 | Example: "1741101" (Albert Einstein)
1687 |
1688 | fields (Optional[List[str]]): List of fields to return for each paper.
1689 | paperId is always returned.
1690 | Available fields:
1691 | - title: Paper title
1692 | - abstract: Paper abstract
1693 | - year: Publication year
1694 | - venue: Publication venue
1695 | - authors: List of authors
1696 | - url: URL to paper page
1697 | - citationCount: Number of citations received
1698 | - influentialCitationCount: Number of influential citations
1699 | - isOpenAccess: Whether paper is open access
1700 | - openAccessPdf: Open access PDF URL if available
1701 | - fieldsOfStudy: List of fields of study
1702 | - s2FieldsOfStudy: Semantic Scholar fields
1703 | - publicationTypes: List of publication types
1704 | - publicationDate: Publication date in YYYY-MM-DD format
1705 | - journal: Journal information
1706 | - externalIds: External IDs (DOI, MAG, etc)
1707 |
1708 | If omitted, returns only paperId and title.
1709 |
1710 | offset (int): Number of papers to skip for pagination.
1711 | Default: 0
1712 |
1713 | limit (int): Maximum number of papers to return.
1714 | Default: 100
1715 | Maximum: 1000
1716 |
1717 | Returns:
1718 | Dict: {
1719 | "offset": int, # Current offset in the results
1720 | "next": int, # Next offset (if more results available)
1721 | "data": List[Dict] # List of papers with requested fields
1722 | }
1723 |
1724 | Notes:
1725 | - Papers are sorted by publication date (newest first)
1726 | - Supports pagination for authors with many papers
1727 | - Some fields may be null if data is not available
1728 | - Rate limits apply (see API documentation)
1729 | """
1730 | if not author_id.strip():
1731 | return create_error_response(
1732 | ErrorType.VALIDATION,
1733 | "Author ID cannot be empty"
1734 | )
1735 |
1736 | # Validate limit
1737 | if limit > 1000:
1738 | return create_error_response(
1739 | ErrorType.VALIDATION,
1740 | "Limit cannot exceed 1000",
1741 | {"max_limit": 1000}
1742 | )
1743 |
1744 | # Validate fields
1745 | if fields:
1746 | invalid_fields = set(fields) - PaperFields.VALID_FIELDS
1747 | if invalid_fields:
1748 | return create_error_response(
1749 | ErrorType.VALIDATION,
1750 | f"Invalid fields: {', '.join(invalid_fields)}",
1751 | {"valid_fields": list(PaperFields.VALID_FIELDS)}
1752 | )
1753 |
1754 | # Build request parameters
1755 | params = {
1756 | "offset": offset,
1757 | "limit": limit
1758 | }
1759 | if fields:
1760 | params["fields"] = ",".join(fields)
1761 |
1762 | # Make the API request
1763 | result = await make_request(f"/author/{author_id}/papers", params)
1764 |
1765 | if isinstance(result, Dict) and "error" in result:
1766 | error_msg = result["error"].get("message", "")
1767 | if "404" in error_msg:
1768 | return create_error_response(
1769 | ErrorType.VALIDATION,
1770 | "Author not found",
1771 | {"author_id": author_id}
1772 | )
1773 | return result
1774 |
1775 | return result
1776 |
1777 | # 2.4 Get details for multiple authors at once
1778 | @mcp.tool()
1779 | async def author_batch_details(
1780 | context: Context,
1781 | author_ids: List[str],
1782 | fields: Optional[str] = None
1783 | ) -> Dict:
1784 | """
1785 | Get details for multiple authors in a single batch request.
1786 | This endpoint is optimized for efficiently retrieving details about known authors.
1787 |
1788 | Args:
1789 | author_ids (List[str]): List of Semantic Scholar author IDs.
1790 | These are unique identifiers assigned by Semantic Scholar.
1791 | Example: ["1741101", "1741102"]
1792 | Maximum: 1000 IDs per request
1793 |
1794 | fields (Optional[str]): Comma-separated list of fields to return for each author.
1795 | authorId is always returned.
1796 | Available fields:
1797 | - name: Author's name
1798 | - aliases: Alternative names for the author
1799 | - url: URL to author's S2 profile
1800 | - affiliations: List of author's affiliations
1801 | - homepage: Author's homepage URL
1802 | - paperCount: Total number of papers by this author
1803 | - citationCount: Total citations received by this author
1804 | - hIndex: Author's h-index
1805 | - papers: List of papers by this author (returns paperId and title)
1806 |
1807 | Special syntax for paper fields:
1808 | - papers.year: Include year for each paper
1809 | - papers.authors: Include authors for each paper
1810 | - papers.abstract: Include abstract for each paper
1811 | - papers.venue: Include venue for each paper
1812 | - papers.citations: Include citation count for each paper
1813 |
1814 | If omitted, returns only authorId and name.
1815 |
1816 | Returns:
1817 | List[Dict]: List of author details with requested fields.
1818 | - Results maintain the same order as input author_ids
1819 | - Invalid or not found author IDs return null in the results
1820 | - Each author object contains the requested fields
1821 | - authorId is always included in each author object
1822 |
1823 | Notes:
1824 | - More efficient than making multiple single-author requests
1825 | - Maximum of 1000 author IDs per request
1826 | - Rate limits apply (see API documentation)
1827 | - Some fields may be null if data is not available
1828 | - Invalid author IDs return null instead of causing an error
1829 | - Order of results matches order of input IDs for easy mapping
1830 | """
1831 | # Validate inputs
1832 | if not author_ids:
1833 | return create_error_response(
1834 | ErrorType.VALIDATION,
1835 | "Author IDs list cannot be empty"
1836 | )
1837 |
1838 | if len(author_ids) > 1000:
1839 | return create_error_response(
1840 | ErrorType.VALIDATION,
1841 | "Cannot process more than 1000 author IDs at once",
1842 | {"max_authors": 1000, "received": len(author_ids)}
1843 | )
1844 |
1845 | # Validate fields if provided
1846 | if fields:
1847 | field_list = fields.split(",")
1848 | invalid_fields = set(field_list) - AuthorDetailFields.VALID_FIELDS
1849 | if invalid_fields:
1850 | return create_error_response(
1851 | ErrorType.VALIDATION,
1852 | f"Invalid fields: {', '.join(invalid_fields)}",
1853 | {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
1854 | )
1855 |
1856 | # Build request parameters
1857 | params = {}
1858 | if fields:
1859 | params["fields"] = fields
1860 |
1861 | # Make POST request with proper structure
1862 | try:
1863 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
1864 | api_key = get_api_key()
1865 | headers = {"x-api-key": api_key} if api_key else {}
1866 |
1867 | response = await client.post(
1868 | f"{Config.BASE_URL}/author/batch",
1869 | params=params,
1870 | json={"ids": author_ids},
1871 | headers=headers
1872 | )
1873 | response.raise_for_status()
1874 | return response.json()
1875 |
1876 | except httpx.HTTPStatusError as e:
1877 | if e.response.status_code == 429:
1878 | return create_error_response(
1879 | ErrorType.RATE_LIMIT,
1880 | "Rate limit exceeded",
1881 | {"retry_after": e.response.headers.get("retry-after")}
1882 | )
1883 | return create_error_response(
1884 | ErrorType.API_ERROR,
1885 | f"HTTP error: {e.response.status_code}",
1886 | {"response": e.response.text}
1887 | )
1888 | except httpx.TimeoutException:
1889 | return create_error_response(
1890 | ErrorType.TIMEOUT,
1891 | f"Request timed out after {Config.TIMEOUT} seconds"
1892 | )
1893 | except Exception as e:
1894 | return create_error_response(
1895 | ErrorType.API_ERROR,
1896 | str(e)
1897 | )
1898 |
1899 |
1900 | # 3. Paper Recommendation Tools
1901 |
1902 | # 3.1 Get recommendations based on a single paper
1903 | @mcp.tool()
1904 | async def get_paper_recommendations_single(
1905 | context: Context,
1906 | paper_id: str,
1907 | fields: Optional[str] = None,
1908 | limit: int = 100,
1909 | from_pool: str = "recent"
1910 | ) -> Dict:
1911 | """
1912 | Get paper recommendations based on a single seed paper.
1913 | This endpoint is optimized for finding papers similar to a specific paper.
1914 |
1915 | Args:
1916 | paper_id (str): Paper identifier in one of the following formats:
1917 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
1918 | - CorpusId:<id> (e.g., "CorpusId:215416146")
1919 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
1920 | - ARXIV:<id> (e.g., "ARXIV:2106.15928")
1921 | - MAG:<id> (e.g., "MAG:112218234")
1922 | - ACL:<id> (e.g., "ACL:W12-3903")
1923 | - PMID:<id> (e.g., "PMID:19872477")
1924 | - PMCID:<id> (e.g., "PMCID:2323736")
1925 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
1926 |
1927 | fields (Optional[str]): Comma-separated list of fields to return for each paper.
1928 | paperId is always returned.
1929 | Available fields:
1930 | - title: Paper title
1931 | - abstract: Paper abstract
1932 | - year: Publication year
1933 | - venue: Publication venue
1934 | - authors: List of authors
1935 | - url: URL to paper page
1936 | - citationCount: Number of citations received
1937 | - influentialCitationCount: Number of influential citations
1938 | - isOpenAccess: Whether paper is open access
1939 | - openAccessPdf: Open access PDF URL if available
1940 | - fieldsOfStudy: List of fields of study
1941 | - publicationTypes: List of publication types
1942 | - publicationDate: Publication date in YYYY-MM-DD format
1943 | - journal: Journal information
1944 | - externalIds: External IDs (DOI, MAG, etc)
1945 |
1946 | If omitted, returns only paperId and title.
1947 |
1948 | limit (int): Maximum number of recommendations to return.
1949 | Default: 100
1950 | Maximum: 500
1951 |
1952 | from_pool (str): Which pool of papers to recommend from.
1953 | Options:
1954 | - "recent": Recent papers (default)
1955 | - "all-cs": All computer science papers
1956 | Default: "recent"
1957 |
1958 | Returns:
1959 | Dict: {
1960 | "recommendedPapers": List[Dict] # List of recommended papers with requested fields
1961 | }
1962 |
1963 | Notes:
1964 | - Recommendations are based on content similarity and citation patterns
1965 | - Results are sorted by relevance to the seed paper
1966 | - "recent" pool focuses on papers from the last few years
1967 | - "all-cs" pool includes older computer science papers
1968 | - Rate limits apply (see API documentation)
1969 | - Some fields may be null if data is not available
1970 | """
1971 | try:
1972 | # Apply rate limiting
1973 | endpoint = "/recommendations"
1974 | await rate_limiter.acquire(endpoint)
1975 |
1976 | # Validate limit
1977 | if limit > 500:
1978 | return create_error_response(
1979 | ErrorType.VALIDATION,
1980 | "Cannot request more than 500 recommendations",
1981 | {"max_limit": 500, "requested": limit}
1982 | )
1983 |
1984 | # Validate pool
1985 | if from_pool not in ["recent", "all-cs"]:
1986 | return create_error_response(
1987 | ErrorType.VALIDATION,
1988 | "Invalid paper pool specified",
1989 | {"valid_pools": ["recent", "all-cs"]}
1990 | )
1991 |
1992 | # Build request parameters
1993 | params = {
1994 | "limit": limit,
1995 | "from": from_pool
1996 | }
1997 | if fields:
1998 | params["fields"] = fields
1999 |
2000 | # Make the API request
2001 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
2002 | api_key = get_api_key()
2003 | headers = {"x-api-key": api_key} if api_key else {}
2004 |
2005 | url = f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}"
2006 | response = await client.get(url, params=params, headers=headers)
2007 |
2008 | # Handle specific error cases
2009 | if response.status_code == 404:
2010 | return create_error_response(
2011 | ErrorType.VALIDATION,
2012 | "Paper not found",
2013 | {"paper_id": paper_id}
2014 | )
2015 |
2016 | response.raise_for_status()
2017 | return response.json()
2018 |
2019 | except httpx.HTTPStatusError as e:
2020 | if e.response.status_code == 429:
2021 | return create_error_response(
2022 | ErrorType.RATE_LIMIT,
2023 | "Rate limit exceeded. Consider using an API key for higher limits.",
2024 | {
2025 | "retry_after": e.response.headers.get("retry-after"),
2026 | "authenticated": bool(get_api_key())
2027 | }
2028 | )
2029 | return create_error_response(
2030 | ErrorType.API_ERROR,
2031 | f"HTTP error {e.response.status_code}",
2032 | {"response": e.response.text}
2033 | )
2034 | except httpx.TimeoutException:
2035 | return create_error_response(
2036 | ErrorType.TIMEOUT,
2037 | f"Request timed out after {Config.TIMEOUT} seconds"
2038 | )
2039 | except Exception as e:
2040 | logger.error(f"Unexpected error in recommendations: {str(e)}")
2041 | return create_error_response(
2042 | ErrorType.API_ERROR,
2043 | "Failed to get recommendations",
2044 | {"error": str(e)}
2045 | )
2046 |
2047 | # 3.2 Get recommendations based on multiple papers
2048 | @mcp.tool()
2049 | async def get_paper_recommendations_multi(
2050 | context: Context,
2051 | positive_paper_ids: List[str],
2052 | negative_paper_ids: Optional[List[str]] = None,
2053 | fields: Optional[str] = None,
2054 | limit: int = 100
2055 | ) -> Dict:
2056 | """
2057 | Get paper recommendations based on multiple positive and optional negative examples.
2058 | This endpoint is optimized for finding papers similar to a set of papers while
2059 | avoiding papers similar to the negative examples.
2060 |
2061 | Args:
2062 | positive_paper_ids (List[str]): List of paper IDs to use as positive examples.
2063 | Papers similar to these will be recommended.
2064 | Each ID can be in any of these formats:
2065 | - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
2066 | - CorpusId:<id> (e.g., "CorpusId:215416146")
2067 | - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
2068 | - ARXIV:<id> (e.g., "ARXIV:2106.15928")
2069 | - MAG:<id> (e.g., "MAG:112218234")
2070 | - ACL:<id> (e.g., "ACL:W12-3903")
2071 | - PMID:<id> (e.g., "PMID:19872477")
2072 | - PMCID:<id> (e.g., "PMCID:2323736")
2073 | - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
2074 |
2075 | negative_paper_ids (Optional[List[str]]): List of paper IDs to use as negative examples.
2076 | Papers similar to these will be avoided in recommendations.
2077 | Uses same ID formats as positive_paper_ids.
2078 |
2079 | fields (Optional[str]): Comma-separated list of fields to return for each paper.
2080 | paperId is always returned.
2081 | Available fields:
2082 | - title: Paper title
2083 | - abstract: Paper abstract
2084 | - year: Publication year
2085 | - venue: Publication venue
2086 | - authors: List of authors
2087 | - url: URL to paper page
2088 | - citationCount: Number of citations received
2089 | - influentialCitationCount: Number of influential citations
2090 | - isOpenAccess: Whether paper is open access
2091 | - openAccessPdf: Open access PDF URL if available
2092 | - fieldsOfStudy: List of fields of study
2093 | - publicationTypes: List of publication types
2094 | - publicationDate: Publication date in YYYY-MM-DD format
2095 | - journal: Journal information
2096 | - externalIds: External IDs (DOI, MAG, etc)
2097 |
2098 | If omitted, returns only paperId and title.
2099 |
2100 | limit (int): Maximum number of recommendations to return.
2101 | Default: 100
2102 | Maximum: 500
2103 |
2104 | Returns:
2105 | Dict: {
2106 | "recommendedPapers": List[Dict] # List of recommended papers with requested fields
2107 | }
2108 |
2109 | Notes:
2110 | - Recommendations balance similarity to positive examples and dissimilarity to negative examples
2111 | - Results are sorted by relevance score
2112 | - More positive examples can help focus recommendations
2113 | - Negative examples help filter out unwanted topics/approaches
2114 | - Rate limits apply (see API documentation)
2115 | - Some fields may be null if data is not available
2116 | """
2117 | try:
2118 | # Apply rate limiting
2119 | endpoint = "/recommendations"
2120 | await rate_limiter.acquire(endpoint)
2121 |
2122 | # Validate inputs
2123 | if not positive_paper_ids:
2124 | return create_error_response(
2125 | ErrorType.VALIDATION,
2126 | "Must provide at least one positive paper ID"
2127 | )
2128 |
2129 | if limit > 500:
2130 | return create_error_response(
2131 | ErrorType.VALIDATION,
2132 | "Cannot request more than 500 recommendations",
2133 | {"max_limit": 500, "requested": limit}
2134 | )
2135 |
2136 | # Build request parameters
2137 | params = {"limit": limit}
2138 | if fields:
2139 | params["fields"] = fields
2140 |
2141 | request_body = {
2142 | "positivePaperIds": positive_paper_ids,
2143 | "negativePaperIds": negative_paper_ids or []
2144 | }
2145 |
2146 | # Make the API request
2147 | async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
2148 | api_key = get_api_key()
2149 | headers = {"x-api-key": api_key} if api_key else {}
2150 |
2151 | url = "https://api.semanticscholar.org/recommendations/v1/papers"
2152 | response = await client.post(url, params=params, json=request_body, headers=headers)
2153 |
2154 | # Handle specific error cases
2155 | if response.status_code == 404:
2156 | return create_error_response(
2157 | ErrorType.VALIDATION,
2158 | "One or more input papers not found",
2159 | {
2160 | "positive_ids": positive_paper_ids,
2161 | "negative_ids": negative_paper_ids
2162 | }
2163 | )
2164 |
2165 | response.raise_for_status()
2166 | return response.json()
2167 |
2168 | except httpx.HTTPStatusError as e:
2169 | if e.response.status_code == 429:
2170 | return create_error_response(
2171 | ErrorType.RATE_LIMIT,
2172 | "Rate limit exceeded. Consider using an API key for higher limits.",
2173 | {
2174 | "retry_after": e.response.headers.get("retry-after"),
2175 | "authenticated": bool(get_api_key())
2176 | }
2177 | )
2178 | return create_error_response(
2179 | ErrorType.API_ERROR,
2180 | f"HTTP error {e.response.status_code}",
2181 | {"response": e.response.text}
2182 | )
2183 | except httpx.TimeoutException:
2184 | return create_error_response(
2185 | ErrorType.TIMEOUT,
2186 | f"Request timed out after {Config.TIMEOUT} seconds"
2187 | )
2188 | except Exception as e:
2189 | logger.error(f"Unexpected error in recommendations: {str(e)}")
2190 | return create_error_response(
2191 | ErrorType.API_ERROR,
2192 | "Failed to get recommendations",
2193 | {"error": str(e)}
2194 | )
2195 |
2196 |
2197 |
2198 |
2199 |
2200 |
2201 | async def shutdown():
2202 | """Gracefully shut down the server."""
2203 | logger.info("Initiating graceful shutdown...")
2204 |
2205 | # Cancel all tasks
2206 | tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
2207 | for task in tasks:
2208 | task.cancel()
2209 | try:
2210 | await task
2211 | except asyncio.CancelledError:
2212 | pass
2213 |
2214 | # Cleanup resources
2215 | await cleanup_client()
2216 | await mcp.cleanup()
2217 |
2218 | logger.info(f"Cancelled {len(tasks)} tasks")
2219 | logger.info("Shutdown complete")
2220 |
2221 | def init_signal_handlers(loop):
2222 | """Initialize signal handlers for graceful shutdown."""
2223 | for sig in (signal.SIGTERM, signal.SIGINT):
2224 | loop.add_signal_handler(sig, lambda: asyncio.create_task(shutdown()))
2225 | logger.info("Signal handlers initialized")
2226 |
2227 | async def run_server():
2228 | """Run the server with proper async context management."""
2229 | async with mcp:
2230 | try:
2231 | # Initialize HTTP client
2232 | await initialize_client()
2233 |
2234 | # Start the server
2235 | logger.info("Starting Semantic Scholar Server")
2236 | await mcp.run_async()
2237 | except Exception as e:
2238 | logger.error(f"Server error: {e}")
2239 | raise
2240 | finally:
2241 | await shutdown()
2242 |
2243 | if __name__ == "__main__":
2244 | try:
2245 | # Set up event loop with exception handler
2246 | loop = asyncio.new_event_loop()
2247 | asyncio.set_event_loop(loop)
2248 | loop.set_exception_handler(handle_exception)
2249 |
2250 | # Initialize signal handlers
2251 | init_signal_handlers(loop)
2252 |
2253 | # Run the server
2254 | loop.run_until_complete(run_server())
2255 | except KeyboardInterrupt:
2256 | logger.info("Received keyboard interrupt, shutting down...")
2257 | except Exception as e:
2258 | logger.error(f"Fatal error: {str(e)}")
2259 | finally:
2260 | try:
2261 | loop.run_until_complete(asyncio.sleep(0)) # Let pending tasks complete
2262 | loop.close()
2263 | except Exception as e:
2264 | logger.error(f"Error during final cleanup: {str(e)}")
2265 | logger.info("Server stopped")
2266 |
```