danimal141/arxiv-search-mcp # codebase.md

# Directory Structure

```
├── .gitignore
├── bin
│   └── .keep
├── deno.json
├── deno.lock
├── images
│   └── sample_use.png
├── LICENSE
├── README.md
└── src
    ├── main.test.ts
    └── main.ts
```

# Files

--------------------------------------------------------------------------------
/bin/.keep:
--------------------------------------------------------------------------------

```
1 | 
```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
  1 | # Logs
  2 | logs
  3 | *.log
  4 | npm-debug.log*
  5 | yarn-debug.log*
  6 | yarn-error.log*
  7 | lerna-debug.log*
  8 | .pnpm-debug.log*
  9 | 
 10 | # Diagnostic reports (https://nodejs.org/api/report.html)
 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 12 | 
 13 | # Runtime data
 14 | pids
 15 | *.pid
 16 | *.seed
 17 | *.pid.lock
 18 | 
 19 | # Directory for instrumented libs generated by jscoverage/JSCover
 20 | lib-cov
 21 | 
 22 | # Coverage directory used by tools like istanbul
 23 | coverage
 24 | *.lcov
 25 | 
 26 | # nyc test coverage
 27 | .nyc_output
 28 | 
 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 30 | .grunt
 31 | 
 32 | # Bower dependency directory (https://bower.io/)
 33 | bower_components
 34 | 
 35 | # node-waf configuration
 36 | .lock-wscript
 37 | 
 38 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 39 | build/Release
 40 | 
 41 | # Dependency directories
 42 | node_modules/
 43 | jspm_packages/
 44 | 
 45 | # Snowpack dependency directory (https://snowpack.dev/)
 46 | web_modules/
 47 | 
 48 | # TypeScript cache
 49 | *.tsbuildinfo
 50 | 
 51 | # Optional npm cache directory
 52 | .npm
 53 | 
 54 | # Optional eslint cache
 55 | .eslintcache
 56 | 
 57 | # Optional stylelint cache
 58 | .stylelintcache
 59 | 
 60 | # Microbundle cache
 61 | .rpt2_cache/
 62 | .rts2_cache_cjs/
 63 | .rts2_cache_es/
 64 | .rts2_cache_umd/
 65 | 
 66 | # Optional REPL history
 67 | .node_repl_history
 68 | 
 69 | # Output of 'npm pack'
 70 | *.tgz
 71 | 
 72 | # Yarn Integrity file
 73 | .yarn-integrity
 74 | 
 75 | # dotenv environment variable files
 76 | .env
 77 | .env.development.local
 78 | .env.test.local
 79 | .env.production.local
 80 | .env.local
 81 | 
 82 | # parcel-bundler cache (https://parceljs.org/)
 83 | .cache
 84 | .parcel-cache
 85 | 
 86 | # Next.js build output
 87 | .next
 88 | out
 89 | 
 90 | # Nuxt.js build / generate output
 91 | .nuxt
 92 | dist
 93 | 
 94 | # Gatsby files
 95 | .cache/
 96 | # Comment in the public line in if your project uses Gatsby and not Next.js
 97 | # https://nextjs.org/blog/next-9-1#public-directory-support
 98 | # public
 99 | 
100 | # vuepress build output
101 | .vuepress/dist
102 | 
103 | # vuepress v2.x temp and cache directory
104 | .temp
105 | .cache
106 | 
107 | # vitepress build output
108 | **/.vitepress/dist
109 | 
110 | # vitepress cache directory
111 | **/.vitepress/cache
112 | 
113 | # Docusaurus cache and generated files
114 | .docusaurus
115 | 
116 | # Serverless directories
117 | .serverless/
118 | 
119 | # FuseBox cache
120 | .fusebox/
121 | 
122 | # DynamoDB Local files
123 | .dynamodb/
124 | 
125 | # TernJS port file
126 | .tern-port
127 | 
128 | # Stores VSCode versions used for testing VSCode extensions
129 | .vscode-test
130 | 
131 | # yarn v2
132 | .yarn/cache
133 | .yarn/unplugged
134 | .yarn/build-state.yml
135 | .yarn/install-state.gz
136 | .pnp.*
137 | 
138 | # build output
139 | bin/arxiv-search-mcp
140 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | [![MseeP.ai Security Assessment Badge](https://mseep.net/pr/danimal141-arxiv-search-mcp-badge.png)](https://mseep.ai/app/danimal141-arxiv-search-mcp)
  2 | 
  3 | # arXiv Search MCP Server
  4 | 
  5 | An MCP server that provides tools to search and fetch papers from arXiv.org.
  6 | 
  7 | ## Features
  8 | 
  9 | - Search papers by category
 10 | - Get latest papers sorted by submission date
 11 | - Formatted output with title, authors, summary, and link
 12 | 
 13 | ## Development
 14 | 
 15 | ### Prerequisites
 16 | 
 17 | - [Deno](https://deno.land/) installed on your system
 18 | - MCP compatible environment
 19 | 
 20 | ### Setup
 21 | 
 22 | 1. Clone the repository
 23 | 2. Install dependencies:
 24 | ```bash
 25 | deno cache --reload src/main.ts
 26 | ```
 27 | 
 28 | ### Running the Server
 29 | 
 30 | Development mode with file watching:
 31 | ```bash
 32 | deno task dev
 33 | ```
 34 | 
 35 | Build executable:
 36 | ```bash
 37 | deno task compile
 38 | ```
 39 | 
 40 | ## Integration with Claude Desktop
 41 | 
 42 | Add the following configuration to your `claude_desktop_config.json`:
 43 | 
 44 | ```json
 45 | {
 46 |   "mcpServers": {
 47 |     "arxiv-search-mcp": {
 48 |       "command": "/path/to/dir/arxiv-search-mcp/bin/arxiv-search-mcp"
 49 |     }
 50 |   }
 51 | }
 52 | ```
 53 | 
 54 | Replace `/path/to/dir` with the actual path to your compiled binary.
 55 | 
 56 | ## Usage
 57 | 
 58 | Example usage screenshot:
 59 | ![Sample usage with Claude](images/sample_use.png)
 60 | 
 61 | The server provides a tool named `search_arxiv` that accepts the following parameters:
 62 | 
 63 | ```typescript
 64 | {
 65 |   "category": string,    // arXiv category (e.g., cs.AI, cs.LG, astro-ph)
 66 |   "max_results": number  // Number of papers to fetch (1-100, default: 5)
 67 | }
 68 | ```
 69 | 
 70 | ### Example
 71 | 
 72 | Request:
 73 | ```json
 74 | {
 75 |   "category": "cs.AI",
 76 |   "max_results": 5
 77 | }
 78 | ```
 79 | 
 80 | This will return the 5 most recent papers from the Artificial Intelligence category.
 81 | 
 82 | ### Available Categories
 83 | 
 84 | Some popular arXiv categories:
 85 | 
 86 | - `cs.AI`: Artificial Intelligence
 87 | - `cs.LG`: Machine Learning
 88 | - `cs.CL`: Computation and Language
 89 | - `cs.CV`: Computer Vision
 90 | - `cs.NE`: Neural and Evolutionary Computing
 91 | - `cs.RO`: Robotics
 92 | - `astro-ph`: Astrophysics
 93 | - `physics`: Physics
 94 | - `math`: Mathematics
 95 | - `q-bio`: Quantitative Biology
 96 | 
 97 | For a complete list of categories, visit [arXiv taxonomy](https://arxiv.org/category_taxonomy).
 98 | 
 99 | ## License
100 | 
101 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
102 | 
```

--------------------------------------------------------------------------------
/deno.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "tasks": {
 3 |     "dev": "deno run --watch --allow-read --allow-env --allow-net src/main.ts",
 4 |     "test": "deno test --allow-read --allow-env --allow-net",
 5 |     "compile": "deno compile --allow-read --allow-env --allow-net --output=./bin/arxiv-search-mcp src/main.ts"
 6 |   },
 7 |   "imports": {
 8 |     "@std/assert": "jsr:@std/assert@1",
 9 |     "fastmcp": "npm:fastmcp@^1.21.0",
10 |     "zod": "npm:zod@^3.24.2",
11 |     "fast-xml-parser": "npm:[email protected]"
12 |   }
13 | }
14 | 
```

--------------------------------------------------------------------------------
/src/main.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | /**
  2 |  * Unit tests for arXiv Search MCP Server
  3 |  */
  4 | import { assertEquals, assertMatch } from "@std/assert";
  5 | 
  6 | import {
  7 |   parseArxivEntry,
  8 |   SearchArxivParamsZod,
  9 |   search_arxiv_execute,
 10 |   type XMLFeedEntry,
 11 | } from "./main.ts";
 12 | 
 13 | // Testing utilities
 14 | const mockXMLFeedEntry: XMLFeedEntry = {
 15 |   title: "Test Paper Title",
 16 |   author: [
 17 |     { name: "John Doe" },
 18 |     { name: "Jane Smith" },
 19 |   ],
 20 |   summary: "This is a test paper summary",
 21 |   id: "https://arxiv.org/abs/test.123",
 22 | };
 23 | 
 24 | const mockXMLFeedSingleAuthor: XMLFeedEntry = {
 25 |   title: "Single Author Paper",
 26 |   author: { name: "Solo Author" },
 27 |   summary: "Paper with single author",
 28 |   id: "https://arxiv.org/abs/test.456",
 29 | };
 30 | 
 31 | Deno.test("parseArxivEntry handles multiple authors correctly", () => {
 32 |   const result = parseArxivEntry(mockXMLFeedEntry);
 33 |   assertEquals(result, {
 34 |     title: "Test Paper Title",
 35 |     authors: "John Doe, Jane Smith",
 36 |     summary: "This is a test paper summary",
 37 |     link: "https://arxiv.org/abs/test.123",
 38 |   });
 39 | });
 40 | 
 41 | Deno.test("parseArxivEntry handles single author correctly", () => {
 42 |   const result = parseArxivEntry(mockXMLFeedSingleAuthor);
 43 |   assertEquals(result, {
 44 |     title: "Single Author Paper",
 45 |     authors: "Solo Author",
 46 |     summary: "Paper with single author",
 47 |     link: "https://arxiv.org/abs/test.456",
 48 |   });
 49 | });
 50 | 
 51 | Deno.test("SearchArxivParamsZod validates category correctly", () => {
 52 |   const validResult = SearchArxivParamsZod.safeParse({
 53 |     category: "cs.AI",
 54 |     max_results: 5,
 55 |   });
 56 |   assertEquals(validResult.success, true);
 57 | });
 58 | 
 59 | Deno.test("SearchArxivParamsZod validates max_results range", () => {
 60 |   // Test minimum value
 61 |   const tooSmall = SearchArxivParamsZod.safeParse({
 62 |     category: "cs.AI",
 63 |     max_results: 0,
 64 |   });
 65 |   assertEquals(tooSmall.success, false);
 66 | 
 67 |   // Test maximum value
 68 |   const tooLarge = SearchArxivParamsZod.safeParse({
 69 |     category: "cs.AI",
 70 |     max_results: 101,
 71 |   });
 72 |   assertEquals(tooLarge.success, false);
 73 | 
 74 |   // Test valid range
 75 |   const validRange = SearchArxivParamsZod.safeParse({
 76 |     category: "cs.AI",
 77 |     max_results: 100,
 78 |   });
 79 |   assertEquals(validRange.success, true);
 80 | });
 81 | 
 82 | Deno.test("SearchArxivParamsZod provides default max_results", () => {
 83 |   const result = SearchArxivParamsZod.parse({
 84 |     category: "cs.AI",
 85 |   });
 86 |   assertEquals(result.max_results, 5);
 87 | });
 88 | 
 89 | // Integration test with mocked fetch
 90 | Deno.test("search_arxiv tool handles API errors gracefully", async () => {
 91 |   const originalFetch = globalThis.fetch;
 92 |   globalThis.fetch = async () => {
 93 |     throw new Error("Network error");
 94 |   };
 95 | 
 96 |   try {
 97 |     const result = await search_arxiv_execute({
 98 |       category: "cs.AI",
 99 |       max_results: 5,
100 |     });
101 |     assertMatch(result as string, /Error during search: Network error/);
102 |   } finally {
103 |     globalThis.fetch = originalFetch;
104 |   }
105 | });
106 | 
```

--------------------------------------------------------------------------------
/src/main.ts:
--------------------------------------------------------------------------------

```typescript
  1 | /**
  2 |  * arXiv Search MCP Server
  3 |  */
  4 | import { FastMCP } from "fastmcp";
  5 | import { z } from "zod";
  6 | import { XMLParser } from "fast-xml-parser";
  7 | 
  8 | // Type definitions for XML parsing
  9 | export interface XMLFeedEntry {
 10 |   title: string;
 11 |   author: { name: string }[] | { name: string };
 12 |   summary: string;
 13 |   id: string;
 14 | }
 15 | 
 16 | interface XMLResponse {
 17 |   feed: {
 18 |     entry: XMLFeedEntry[];
 19 |   };
 20 | }
 21 | 
 22 | // Constants
 23 | const ARXIV_API_BASE = "https://export.arxiv.org/api/query?";
 24 | const USER_AGENT = "arxiv-mcp/1.0";
 25 | 
 26 | // Schema definition
 27 | export const SearchArxivParamsZod = z.object({
 28 |   category: z.string().describe("arXiv category (e.g., cs.LG, astro-ph)"),
 29 |   max_results: z.number().min(1).max(100).default(5).describe(
 30 |     "Number of papers to fetch (1-100)",
 31 |   ),
 32 | });
 33 | 
 34 | export interface ArxivEntry {
 35 |   title: string;
 36 |   authors: string;
 37 |   summary: string;
 38 |   link: string;
 39 | }
 40 | 
 41 | // Helper function for parsing XML entries
 42 | export function parseArxivEntry(entry: XMLFeedEntry): ArxivEntry {
 43 |   const authors = Array.isArray(entry.author)
 44 |     ? entry.author.map((a) => a.name).join(", ")
 45 |     : entry.author.name;
 46 | 
 47 |   return {
 48 |     title: entry.title,
 49 |     authors,
 50 |     summary: entry.summary,
 51 |     link: entry.id,
 52 |   };
 53 | }
 54 | 
 55 | // Tool execution function
 56 | export async function search_arxiv_execute(args: z.infer<typeof SearchArxivParamsZod>) {
 57 |   try {
 58 |     // Build API request
 59 |     const query = new URLSearchParams({
 60 |       search_query: `cat:${args.category}`,
 61 |       sortBy: "submittedDate",
 62 |       sortOrder: "descending",
 63 |       max_results: args.max_results.toString(),
 64 |     });
 65 | 
 66 |     const response = await fetch(`${ARXIV_API_BASE}${query}`, {
 67 |       headers: {
 68 |         "User-Agent": USER_AGENT,
 69 |         "Accept": "application/xml",
 70 |       },
 71 |     });
 72 | 
 73 |     if (!response.ok) {
 74 |       throw new Error(`API request failed: ${response.status}`);
 75 |     }
 76 | 
 77 |     const xmlText = await response.text();
 78 |     // Debug output
 79 |     console.error("Raw XML:", xmlText);
 80 | 
 81 |     const parser = new XMLParser({
 82 |       ignoreAttributes: false,
 83 |       attributeNamePrefix: "@_",
 84 |     });
 85 |     const xmlDoc = parser.parse(xmlText) as XMLResponse;
 86 | 
 87 |     // Debug output
 88 |     console.error("Parsed XML:", JSON.stringify(xmlDoc, null, 2));
 89 | 
 90 |     if (!xmlDoc || !xmlDoc.feed) {
 91 |       throw new Error("Failed to parse XML response");
 92 |     }
 93 | 
 94 |     // Parse entries
 95 |     const entries = Array.isArray(xmlDoc.feed.entry)
 96 |       ? xmlDoc.feed.entry.map(parseArxivEntry)
 97 |       : xmlDoc.feed.entry
 98 |       ? [parseArxivEntry(xmlDoc.feed.entry)]
 99 |       : [];
100 | 
101 |     if (entries.length === 0) {
102 |       return "No papers found for the specified category.";
103 |     }
104 | 
105 |     // Format results
106 |     const formattedPapers = entries.map((paper: ArxivEntry) =>
107 |       `Title: ${paper.title}\nAuthors: ${paper.authors}\nSummary: ${paper.summary}\nLink: ${paper.link}`
108 |     );
109 | 
110 |     return formattedPapers.join("\n\n---\n\n");
111 |   } catch (error: unknown) {
112 |     console.error("Error in search_arxiv:", error);
113 |     const errorMessage = error instanceof Error
114 |       ? error.message
115 |       : "An unknown error occurred";
116 |     return `Error during search: ${errorMessage}`;
117 |   }
118 | }
119 | 
120 | const server = new FastMCP({
121 |   name: "arXiv-Search",
122 |   version: "1.0.0",
123 | });
124 | 
125 | server.addTool({
126 |   name: "search_arxiv",
127 |   description: "Search latest papers from a specific arXiv category",
128 |   parameters: SearchArxivParamsZod,
129 |   execute: search_arxiv_execute,
130 | });
131 | 
132 | server.start({
133 |   transportType: "stdio",
134 | });
135 | 
```