omgwtfwow/mcp-crawl4ai-ts # codebase.md

This is page 4 of 4. Use http://codebase.md/omgwtfwow/mcp-crawl4ai-ts?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .env.example
├── .github
│   ├── CI.md
│   ├── copilot-instructions.md
│   └── workflows
│       └── ci.yml
├── .gitignore
├── .prettierignore
├── .prettierrc.json
├── CHANGELOG.md
├── eslint.config.mjs
├── jest.config.cjs
├── jest.setup.cjs
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│   ├── __tests__
│   │   ├── crawl.test.ts
│   │   ├── crawl4ai-service.network.test.ts
│   │   ├── crawl4ai-service.test.ts
│   │   ├── handlers
│   │   │   ├── crawl-handlers.test.ts
│   │   │   ├── parameter-combinations.test.ts
│   │   │   ├── screenshot-saving.test.ts
│   │   │   ├── session-handlers.test.ts
│   │   │   └── utility-handlers.test.ts
│   │   ├── index.cli.test.ts
│   │   ├── index.npx.test.ts
│   │   ├── index.server.test.ts
│   │   ├── index.test.ts
│   │   ├── integration
│   │   │   ├── batch-crawl.integration.test.ts
│   │   │   ├── capture-screenshot.integration.test.ts
│   │   │   ├── crawl-advanced.integration.test.ts
│   │   │   ├── crawl-handlers.integration.test.ts
│   │   │   ├── crawl-recursive.integration.test.ts
│   │   │   ├── crawl.integration.test.ts
│   │   │   ├── execute-js.integration.test.ts
│   │   │   ├── extract-links.integration.test.ts
│   │   │   ├── extract-with-llm.integration.test.ts
│   │   │   ├── generate-pdf.integration.test.ts
│   │   │   ├── get-html.integration.test.ts
│   │   │   ├── get-markdown.integration.test.ts
│   │   │   ├── parse-sitemap.integration.test.ts
│   │   │   ├── session-management.integration.test.ts
│   │   │   ├── smart-crawl.integration.test.ts
│   │   │   └── test-utils.ts
│   │   ├── request-handler.test.ts
│   │   ├── schemas
│   │   │   └── validation-edge-cases.test.ts
│   │   ├── types
│   │   │   └── mocks.ts
│   │   └── utils
│   │       └── javascript-validation.test.ts
│   ├── crawl4ai-service.ts
│   ├── handlers
│   │   ├── base-handler.ts
│   │   ├── content-handlers.ts
│   │   ├── crawl-handlers.ts
│   │   ├── session-handlers.ts
│   │   └── utility-handlers.ts
│   ├── index.ts
│   ├── schemas
│   │   ├── helpers.ts
│   │   └── validation-schemas.ts
│   ├── server.ts
│   └── types.ts
├── tsconfig.build.json
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/src/__tests__/integration/crawl.integration.test.ts:
--------------------------------------------------------------------------------

```typescript
   1 | /* eslint-env jest */
   2 | import { Client } from '@modelcontextprotocol/sdk/client/index.js';
   3 | import {
   4 |   createTestClient,
   5 |   cleanupTestClient,
   6 |   generateSessionId,
   7 |   expectSuccessfulCrawl,
   8 |   expectScreenshot,
   9 |   delay,
  10 |   TEST_TIMEOUTS,
  11 | } from './test-utils.js';
  12 | 
  13 | interface ToolResult {
  14 |   content: Array<{
  15 |     type: string;
  16 |     text?: string;
  17 |     data?: string;
  18 |     mimeType?: string;
  19 |   }>;
  20 | }
  21 | 
  22 | describe('crawl Integration Tests', () => {
  23 |   let client: Client;
  24 | 
  25 |   beforeAll(async () => {
  26 |     client = await createTestClient();
  27 |   }, TEST_TIMEOUTS.medium);
  28 | 
  29 |   afterAll(async () => {
  30 |     if (client) {
  31 |       await cleanupTestClient(client);
  32 |     }
  33 |   });
  34 | 
  35 |   describe('Basic Crawling', () => {
  36 |     it(
  37 |       'should crawl a simple page with basic configuration',
  38 |       async () => {
  39 |         const result = await client.callTool({
  40 |           name: 'crawl',
  41 |           arguments: {
  42 |             url: 'https://httpbin.org/html',
  43 |             cache_mode: 'BYPASS',
  44 |             word_count_threshold: 50,
  45 |           },
  46 |         });
  47 | 
  48 |         await expectSuccessfulCrawl(result);
  49 |       },
  50 |       TEST_TIMEOUTS.short,
  51 |     );
  52 | 
  53 |     it(
  54 |       'should handle invalid URL gracefully',
  55 |       async () => {
  56 |         const result = await client.callTool({
  57 |           name: 'crawl',
  58 |           arguments: {
  59 |             url: 'not-a-valid-url',
  60 |             cache_mode: 'BYPASS',
  61 |           },
  62 |         });
  63 | 
  64 |         const content = (result as ToolResult).content;
  65 |         expect(content).toHaveLength(1);
  66 |         expect(content[0].type).toBe('text');
  67 |         expect(content[0].text).toContain('Error');
  68 |         // Our Zod validation catches this before it reaches the API
  69 |         expect(content[0].text).toContain('Invalid parameters for crawl');
  70 |         expect(content[0].text).toContain('Invalid url');
  71 |       },
  72 |       TEST_TIMEOUTS.short,
  73 |     );
  74 | 
  75 |     it(
  76 |       'should handle non-existent domain gracefully',
  77 |       async () => {
  78 |         const result = await client.callTool({
  79 |           name: 'crawl',
  80 |           arguments: {
  81 |             url: 'https://this-domain-definitely-does-not-exist-12345.com',
  82 |             cache_mode: 'BYPASS',
  83 |           },
  84 |         });
  85 | 
  86 |         const content = (result as ToolResult).content;
  87 |         expect(content).toHaveLength(1);
  88 |         expect(content[0].type).toBe('text');
  89 |         expect(content[0].text).toContain('Error');
  90 |         // Could be DNS error, connection error, or "Internal Server Error"
  91 |         expect(content[0].text).toMatch(/Failed to crawl|Internal Server Error|DNS|connection/i);
  92 |       },
  93 |       TEST_TIMEOUTS.medium,
  94 |     );
  95 | 
  96 |     it(
  97 |       'should handle browser configuration',
  98 |       async () => {
  99 |         const result = await client.callTool({
 100 |           name: 'crawl',
 101 |           arguments: {
 102 |             url: 'https://httpbin.org/user-agent',
 103 |             viewport_width: 1920,
 104 |             viewport_height: 1080,
 105 |             user_agent: 'MCP Integration Test Bot',
 106 |             cache_mode: 'DISABLED',
 107 |           },
 108 |         });
 109 | 
 110 |         await expectSuccessfulCrawl(result);
 111 |       },
 112 |       TEST_TIMEOUTS.short,
 113 |     );
 114 |   });
 115 | 
 116 |   describe('Dynamic Content Tests', () => {
 117 |     it(
 118 |       'should execute JavaScript on page',
 119 |       async () => {
 120 |         const result = await client.callTool({
 121 |           name: 'crawl',
 122 |           arguments: {
 123 |             url: 'https://httpbin.org/html',
 124 |             js_code: ['return document.querySelectorAll("a").length', 'return document.title'],
 125 |             wait_after_js: 1000,
 126 |             cache_mode: 'BYPASS',
 127 |             word_count_threshold: 10,
 128 |           },
 129 |         });
 130 | 
 131 |         await expectSuccessfulCrawl(result);
 132 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 133 |         expect(textContent?.text).toBeTruthy();
 134 |         // httpbin.org/html contains links and a title
 135 |         expect(textContent?.text?.toLowerCase()).toMatch(/herman|melville|moby/); // Content from the page
 136 |       },
 137 |       TEST_TIMEOUTS.medium,
 138 |     );
 139 | 
 140 |     it(
 141 |       'should wait for specific elements',
 142 |       async () => {
 143 |         const result = await client.callTool({
 144 |           name: 'crawl',
 145 |           arguments: {
 146 |             url: 'https://httpbin.org/delay/2',
 147 |             wait_for: 'body',
 148 |             wait_for_timeout: 5000,
 149 |             cache_mode: 'BYPASS',
 150 |           },
 151 |         });
 152 | 
 153 |         await expectSuccessfulCrawl(result);
 154 |       },
 155 |       TEST_TIMEOUTS.medium,
 156 |     );
 157 | 
 158 |     it(
 159 |       'should handle virtual scrolling for infinite feeds',
 160 |       async () => {
 161 |         const result = await client.callTool({
 162 |           name: 'crawl',
 163 |           arguments: {
 164 |             url: 'https://github.com/trending',
 165 |             virtual_scroll_config: {
 166 |               container_selector: '.Box-row',
 167 |               scroll_count: 3,
 168 |               scroll_by: 'container_height',
 169 |               wait_after_scroll: 1.0,
 170 |             },
 171 |             cache_mode: 'BYPASS',
 172 |             wait_for: '.Box-row',
 173 |             word_count_threshold: 50,
 174 |           },
 175 |         });
 176 | 
 177 |         await expectSuccessfulCrawl(result);
 178 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 179 |         // Should have captured multiple trending repos after scrolling
 180 |         expect(textContent?.text).toBeTruthy();
 181 |         expect(textContent?.text?.length).toBeGreaterThan(1000);
 182 |       },
 183 |       TEST_TIMEOUTS.long,
 184 |     );
 185 |   });
 186 | 
 187 |   describe('Session Management Tests', () => {
 188 |     it(
 189 |       'should create and use a session',
 190 |       async () => {
 191 |         const sessionId = generateSessionId();
 192 | 
 193 |         // First crawl with session
 194 |         const result1 = await client.callTool({
 195 |           name: 'crawl',
 196 |           arguments: {
 197 |             url: 'https://github.com',
 198 |             session_id: sessionId,
 199 |             cache_mode: 'BYPASS',
 200 |           },
 201 |         });
 202 | 
 203 |         await expectSuccessfulCrawl(result1);
 204 | 
 205 |         // Second crawl reusing session
 206 |         const result2 = await client.callTool({
 207 |           name: 'crawl',
 208 |           arguments: {
 209 |             url: 'https://github.com/features',
 210 |             session_id: sessionId,
 211 |             cache_mode: 'BYPASS',
 212 |           },
 213 |         });
 214 | 
 215 |         await expectSuccessfulCrawl(result2);
 216 |       },
 217 |       TEST_TIMEOUTS.medium,
 218 |     );
 219 | 
 220 |     it(
 221 |       'should handle cookies in session',
 222 |       async () => {
 223 |         const sessionId = generateSessionId();
 224 | 
 225 |         const result = await client.callTool({
 226 |           name: 'crawl',
 227 |           arguments: {
 228 |             url: 'https://github.com',
 229 |             session_id: sessionId,
 230 |             cookies: [
 231 |               {
 232 |                 name: 'test_cookie',
 233 |                 value: 'test_value',
 234 |                 domain: '.github.com',
 235 |                 path: '/',
 236 |               },
 237 |             ],
 238 |             cache_mode: 'BYPASS',
 239 |           },
 240 |         });
 241 | 
 242 |         await expectSuccessfulCrawl(result);
 243 |       },
 244 |       TEST_TIMEOUTS.medium,
 245 |     );
 246 |   });
 247 | 
 248 |   describe('Content Extraction Tests', () => {
 249 |     it.skip(
 250 |       'should extract content using CSS selectors - SKIPPED: Not supported via REST API',
 251 |       async () => {
 252 |         // CSS extraction is not supported via the REST API due to Python class serialization limitations
 253 |         // This test is kept for documentation purposes but skipped
 254 |         const result = await client.callTool({
 255 |           name: 'crawl',
 256 |           arguments: {
 257 |             url: 'https://www.google.com',
 258 |             extraction_type: 'css',
 259 |             css_selectors: {
 260 |               title: 'title',
 261 |               search_button: 'input[type="submit"]',
 262 |               logo: 'img[alt*="Google"]',
 263 |             },
 264 |             cache_mode: 'BYPASS',
 265 |             word_count_threshold: 10,
 266 |           },
 267 |         });
 268 | 
 269 |         await expectSuccessfulCrawl(result);
 270 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 271 |         expect(textContent?.text).toBeTruthy();
 272 |       },
 273 |       TEST_TIMEOUTS.short,
 274 |     );
 275 | 
 276 |     it(
 277 |       'should extract content using LLM via extract_with_llm tool',
 278 |       async () => {
 279 |         // Note: This test requires the Crawl4AI server to have an LLM provider configured
 280 |         try {
 281 |           const result = await client.callTool({
 282 |             name: 'extract_with_llm',
 283 |             arguments: {
 284 |               url: 'https://httpbin.org/html',
 285 |               query: 'Extract the main page title and any author names mentioned',
 286 |             },
 287 |           });
 288 | 
 289 |           expect(result).toBeTruthy();
 290 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 291 |           expect(textContent?.text).toBeTruthy();
 292 | 
 293 |           // The response should be JSON with an "answer" field
 294 |           try {
 295 |             const parsed = JSON.parse(textContent?.text || '{}');
 296 |             expect(parsed).toHaveProperty('answer');
 297 |             expect(typeof parsed.answer).toBe('string');
 298 |             expect(parsed.answer.length).toBeGreaterThan(0);
 299 |           } catch {
 300 |             // If parsing fails, at least check we got text
 301 |             expect(textContent?.text?.length || 0).toBeGreaterThan(0);
 302 |           }
 303 |         } catch (error) {
 304 |           // If the server doesn't have LLM configured, it will return an error
 305 |           if (error instanceof Error && error.message?.includes('No LLM provider configured')) {
 306 |             console.log('⚠️  LLM extraction test skipped: Server needs LLM provider configured');
 307 |             return;
 308 |           }
 309 |           throw error;
 310 |         }
 311 |       },
 312 |       TEST_TIMEOUTS.long,
 313 |     );
 314 |   });
 315 | 
 316 |   describe('Media Handling Tests', () => {
 317 |     it(
 318 |       'should capture screenshots',
 319 |       async () => {
 320 |         const result = await client.callTool({
 321 |           name: 'crawl',
 322 |           arguments: {
 323 |             url: 'https://httpbin.org/html',
 324 |             screenshot: true,
 325 |             screenshot_wait_for: 1.0,
 326 |             cache_mode: 'BYPASS',
 327 |           },
 328 |         });
 329 | 
 330 |         await expectSuccessfulCrawl(result);
 331 |         await expectScreenshot(result);
 332 |       },
 333 |       TEST_TIMEOUTS.medium,
 334 |     );
 335 | 
 336 |     it(
 337 |       'should generate PDF',
 338 |       async () => {
 339 |         const result = await client.callTool({
 340 |           name: 'crawl',
 341 |           arguments: {
 342 |             url: 'https://httpbin.org/html',
 343 |             pdf: true,
 344 |             cache_mode: 'BYPASS',
 345 |           },
 346 |         });
 347 | 
 348 |         await expectSuccessfulCrawl(result);
 349 |         // PDF generation should return some content
 350 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 351 |         expect(textContent?.text).toBeTruthy();
 352 |         // Should contain some content from the page
 353 |         expect(textContent?.text?.toLowerCase()).toContain('herman');
 354 |       },
 355 |       TEST_TIMEOUTS.medium,
 356 |     );
 357 | 
 358 |     it(
 359 |       'should handle image filtering',
 360 |       async () => {
 361 |         const result = await client.callTool({
 362 |           name: 'crawl',
 363 |           arguments: {
 364 |             url: 'https://github.com',
 365 |             exclude_external_images: true,
 366 |             image_description_min_word_threshold: 20,
 367 |             image_score_threshold: 5,
 368 |             cache_mode: 'BYPASS',
 369 |           },
 370 |         });
 371 | 
 372 |         await expectSuccessfulCrawl(result);
 373 |       },
 374 |       TEST_TIMEOUTS.medium,
 375 |     );
 376 |   });
 377 | 
 378 |   describe('Performance & Caching Tests', () => {
 379 |     it(
 380 |       'should respect cache modes',
 381 |       async () => {
 382 |         const url = 'https://httpbin.org/html'; // Use a simpler page for cache testing
 383 | 
 384 |         // First request - populate cache with ENABLED mode
 385 |         const result1 = await client.callTool({
 386 |           name: 'crawl',
 387 |           arguments: {
 388 |             url,
 389 |             cache_mode: 'ENABLED',
 390 |             word_count_threshold: 10,
 391 |           },
 392 |         });
 393 |         await expectSuccessfulCrawl(result1);
 394 |         const content1 = (result1 as ToolResult).content.find((c) => c.type === 'text')?.text;
 395 | 
 396 |         // Wait a bit to ensure cache is saved
 397 |         await delay(500);
 398 | 
 399 |         // Second request - should use cache (ENABLED mode)
 400 |         const startTime = Date.now();
 401 |         const result2 = await client.callTool({
 402 |           name: 'crawl',
 403 |           arguments: {
 404 |             url,
 405 |             cache_mode: 'ENABLED',
 406 |             word_count_threshold: 10,
 407 |           },
 408 |         });
 409 |         const cacheTime = Date.now() - startTime;
 410 |         await expectSuccessfulCrawl(result2);
 411 |         const content2 = (result2 as ToolResult).content.find((c) => c.type === 'text')?.text;
 412 | 
 413 |         // Content should be identical if cache was used
 414 |         expect(content2).toBe(content1);
 415 | 
 416 |         // Third request - bypass cache
 417 |         const bypassStartTime = Date.now();
 418 |         const result3 = await client.callTool({
 419 |           name: 'crawl',
 420 |           arguments: {
 421 |             url,
 422 |             cache_mode: 'BYPASS',
 423 |             word_count_threshold: 10,
 424 |           },
 425 |         });
 426 |         const bypassTime = Date.now() - bypassStartTime;
 427 |         await expectSuccessfulCrawl(result3);
 428 | 
 429 |         // Cache hit should typically be faster, but we'll make this test more lenient
 430 |         // Just verify all requests succeeded
 431 |         expect(cacheTime).toBeGreaterThan(0);
 432 |         expect(bypassTime).toBeGreaterThan(0);
 433 | 
 434 |         // Fourth request - DISABLED mode should not use cache
 435 |         const result4 = await client.callTool({
 436 |           name: 'crawl',
 437 |           arguments: {
 438 |             url,
 439 |             cache_mode: 'DISABLED',
 440 |             word_count_threshold: 10,
 441 |           },
 442 |         });
 443 |         await expectSuccessfulCrawl(result4);
 444 |       },
 445 |       TEST_TIMEOUTS.long,
 446 |     );
 447 | 
 448 |     it(
 449 |       'should handle timeout configuration',
 450 |       async () => {
 451 |         const result = await client.callTool({
 452 |           name: 'crawl',
 453 |           arguments: {
 454 |             url: 'https://httpbin.org/delay/1',
 455 |             timeout: 20000,
 456 |             page_timeout: 15000,
 457 |             cache_mode: 'BYPASS',
 458 |           },
 459 |         });
 460 | 
 461 |         await expectSuccessfulCrawl(result);
 462 |       },
 463 |       TEST_TIMEOUTS.short,
 464 |     );
 465 |   });
 466 | 
 467 |   describe('Content Filtering Tests', () => {
 468 |     it(
 469 |       'should filter content by tags',
 470 |       async () => {
 471 |         const result = await client.callTool({
 472 |           name: 'crawl',
 473 |           arguments: {
 474 |             url: 'https://httpbin.org/html', // Simpler page for testing
 475 |             excluded_tags: ['script', 'style', 'nav', 'footer', 'header'],
 476 |             word_count_threshold: 10,
 477 |             cache_mode: 'BYPASS',
 478 |             only_text: true, // Force text-only output
 479 |             remove_overlay_elements: true,
 480 |           },
 481 |         });
 482 | 
 483 |         await expectSuccessfulCrawl(result);
 484 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 485 |         expect(textContent?.text).toBeTruthy();
 486 | 
 487 |         // Just verify we got content back - the server's filtering behavior may vary
 488 |         // With all the filters applied, content might be minimal
 489 |         expect(textContent?.text?.length).toBeGreaterThan(10);
 490 | 
 491 |         // Should contain some text from the page
 492 |         expect(textContent?.text).toBeTruthy();
 493 |       },
 494 |       TEST_TIMEOUTS.short,
 495 |     );
 496 | 
 497 |     it(
 498 |       'should filter content by selectors',
 499 |       async () => {
 500 |         const result = await client.callTool({
 501 |           name: 'crawl',
 502 |           arguments: {
 503 |             url: 'https://github.com',
 504 |             excluded_selector: '.footer, .header-nav, [aria-label="Advertisement"]',
 505 |             remove_overlay_elements: true,
 506 |             cache_mode: 'BYPASS',
 507 |           },
 508 |         });
 509 | 
 510 |         await expectSuccessfulCrawl(result);
 511 |       },
 512 |       TEST_TIMEOUTS.medium,
 513 |     );
 514 | 
 515 |     it(
 516 |       'should handle link filtering',
 517 |       async () => {
 518 |         const result = await client.callTool({
 519 |           name: 'crawl',
 520 |           arguments: {
 521 |             url: 'https://github.com',
 522 |             exclude_external_links: true,
 523 |             exclude_social_media_links: true,
 524 |             exclude_domains: ['twitter.com', 'facebook.com', 'linkedin.com'],
 525 |             cache_mode: 'BYPASS',
 526 |           },
 527 |         });
 528 | 
 529 |         await expectSuccessfulCrawl(result);
 530 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 531 |         // Should not contain social media domains
 532 |         expect(textContent?.text).not.toMatch(/twitter\.com|facebook\.com/);
 533 |       },
 534 |       TEST_TIMEOUTS.medium,
 535 |     );
 536 |   });
 537 | 
 538 |   describe('Bot Detection Avoidance Tests', () => {
 539 |     it(
 540 |       'should simulate user behavior',
 541 |       async () => {
 542 |         const result = await client.callTool({
 543 |           name: 'crawl',
 544 |           arguments: {
 545 |             url: 'https://github.com',
 546 |             simulate_user: true,
 547 |             override_navigator: true,
 548 |             magic: true,
 549 |             delay_before_scroll: 1000,
 550 |             scroll_delay: 500,
 551 |             cache_mode: 'BYPASS',
 552 |           },
 553 |         });
 554 | 
 555 |         await expectSuccessfulCrawl(result);
 556 |       },
 557 |       TEST_TIMEOUTS.medium,
 558 |     );
 559 | 
 560 |     it(
 561 |       'should use custom headers and user agent',
 562 |       async () => {
 563 |         const result = await client.callTool({
 564 |           name: 'crawl',
 565 |           arguments: {
 566 |             url: 'https://httpbin.org/headers',
 567 |             user_agent: 'Mozilla/5.0 (compatible; MCP Test Bot)',
 568 |             headers: {
 569 |               'Accept-Language': 'en-US,en;q=0.9',
 570 |               'Accept-Encoding': 'gzip, deflate, br',
 571 |               'X-Custom-Header': 'MCP-Test',
 572 |             },
 573 |             cache_mode: 'BYPASS',
 574 |           },
 575 |         });
 576 | 
 577 |         await expectSuccessfulCrawl(result);
 578 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 579 |         // httpbin returns headers in response
 580 |         expect(textContent?.text).toContain('MCP Test Bot');
 581 |         expect(textContent?.text).toContain('X-Custom-Header');
 582 |       },
 583 |       TEST_TIMEOUTS.medium,
 584 |     );
 585 |   });
 586 | 
 587 |   describe('Error Handling Tests', () => {
 588 |     it(
 589 |       'should handle invalid URLs gracefully',
 590 |       async () => {
 591 |         const result = await client.callTool({
 592 |           name: 'crawl',
 593 |           arguments: {
 594 |             url: 'not-a-valid-url',
 595 |             cache_mode: 'BYPASS',
 596 |           },
 597 |         });
 598 | 
 599 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 600 |         expect(textContent?.text).toContain('Error');
 601 |       },
 602 |       TEST_TIMEOUTS.short,
 603 |     );
 604 | 
 605 |     it(
 606 |       'should handle non-existent domains',
 607 |       async () => {
 608 |         const result = await client.callTool({
 609 |           name: 'crawl',
 610 |           arguments: {
 611 |             url: 'https://this-domain-definitely-does-not-exist-123456789.com',
 612 |             cache_mode: 'BYPASS',
 613 |           },
 614 |         });
 615 | 
 616 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 617 |         expect(textContent?.text?.toLowerCase()).toMatch(/error|failed/);
 618 |       },
 619 |       TEST_TIMEOUTS.short,
 620 |     );
 621 | 
 622 |     it(
 623 |       'should handle JavaScript errors gracefully',
 624 |       async () => {
 625 |         const result = await client.callTool({
 626 |           name: 'crawl',
 627 |           arguments: {
 628 |             url: 'https://httpbin.org/html',
 629 |             js_code: 'throw new Error("Test error")',
 630 |             cache_mode: 'BYPASS',
 631 |           },
 632 |         });
 633 | 
 634 |         // Should still return content even if JS fails
 635 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 636 |         expect(textContent).toBeDefined();
 637 |       },
 638 |       TEST_TIMEOUTS.short,
 639 |     );
 640 |   });
 641 | 
 642 |   describe('Advanced Configurations', () => {
 643 |     it(
 644 |       'should handle complex multi-feature crawl',
 645 |       async () => {
 646 |         const sessionId = generateSessionId();
 647 | 
 648 |         const result = await client.callTool({
 649 |           name: 'crawl',
 650 |           arguments: {
 651 |             url: 'https://httpbin.org/html',
 652 |             // Browser config
 653 |             viewport_width: 1920,
 654 |             viewport_height: 1080,
 655 |             user_agent: 'MCP Advanced Test Bot',
 656 |             // Session
 657 |             session_id: sessionId,
 658 |             // JavaScript
 659 |             js_code: 'return document.querySelectorAll("h1").length',
 660 |             wait_after_js: 1000,
 661 |             // Content filtering
 662 |             excluded_tags: ['script', 'style'],
 663 |             word_count_threshold: 50,
 664 |             remove_overlay_elements: true,
 665 |             // Media
 666 |             screenshot: true,
 667 |             screenshot_wait_for: 1.0,
 668 |             // Performance
 669 |             cache_mode: 'DISABLED',
 670 |             timeout: 60000,
 671 |             // Bot detection
 672 |             simulate_user: true,
 673 |             override_navigator: true,
 674 |           },
 675 |         });
 676 | 
 677 |         await expectSuccessfulCrawl(result);
 678 |         // Screenshot might not always be returned in complex multi-feature crawls
 679 |         // especially with httpbin.org which is a simple HTML page
 680 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 681 |         expect(textContent).toBeDefined();
 682 |       },
 683 |       TEST_TIMEOUTS.long,
 684 |     );
 685 | 
 686 |     it(
 687 |       'should handle proxy configuration',
 688 |       async () => {
 689 |         // Test that proxy configuration is accepted, even without a real proxy
 690 |         const result = await client.callTool({
 691 |           name: 'crawl',
 692 |           arguments: {
 693 |             url: 'https://httpbin.org/ip',
 694 |             proxy_server: 'http://example-proxy.com:8080',
 695 |             proxy_username: 'testuser',
 696 |             proxy_password: 'testpass',
 697 |             cache_mode: 'BYPASS',
 698 |             word_count_threshold: 10,
 699 |           },
 700 |         });
 701 | 
 702 |         // The request should complete (even if proxy doesn't exist, the config should be accepted)
 703 |         expect(result).toBeDefined();
 704 |         const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 705 |         expect(textContent).toBeDefined();
 706 |       },
 707 |       TEST_TIMEOUTS.medium,
 708 |     );
 709 | 
 710 |     it(
 711 |       'should process iframes',
 712 |       async () => {
 713 |         const result = await client.callTool({
 714 |           name: 'crawl',
 715 |           arguments: {
 716 |             url: 'https://www.w3schools.com/html/html_iframe.asp',
 717 |             process_iframes: true,
 718 |             cache_mode: 'BYPASS',
 719 |           },
 720 |         });
 721 | 
 722 |         await expectSuccessfulCrawl(result);
 723 |       },
 724 |       TEST_TIMEOUTS.medium,
 725 |     );
 726 |   });
 727 | 
 728 |   describe('Browser Configuration Tests', () => {
 729 |     describe('Cookie handling', () => {
 730 |       it(
 731 |         'should set and send cookies correctly',
 732 |         async () => {
 733 |           const result = await client.callTool({
 734 |             name: 'crawl',
 735 |             arguments: {
 736 |               url: 'https://httpbin.org/cookies',
 737 |               cookies: [
 738 |                 {
 739 |                   name: 'test_cookie',
 740 |                   value: 'test_value',
 741 |                   domain: '.httpbin.org',
 742 |                   path: '/',
 743 |                 },
 744 |               ],
 745 |               cache_mode: 'BYPASS',
 746 |             },
 747 |           });
 748 | 
 749 |           await expectSuccessfulCrawl(result);
 750 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 751 |           expect(textContent?.text).toBeTruthy();
 752 |           // httpbin returns cookies as JSON in the response
 753 |           expect(textContent?.text).toContain('test_cookie');
 754 |           expect(textContent?.text).toContain('test_value');
 755 |         },
 756 |         TEST_TIMEOUTS.short,
 757 |       );
 758 | 
 759 |       it(
 760 |         'should handle multiple cookies',
 761 |         async () => {
 762 |           const result = await client.callTool({
 763 |             name: 'crawl',
 764 |             arguments: {
 765 |               url: 'https://httpbin.org/cookies',
 766 |               cookies: [
 767 |                 {
 768 |                   name: 'session_id',
 769 |                   value: 'abc123',
 770 |                   domain: '.httpbin.org',
 771 |                   path: '/',
 772 |                 },
 773 |                 {
 774 |                   name: 'user_pref',
 775 |                   value: 'dark_mode',
 776 |                   domain: '.httpbin.org',
 777 |                   path: '/',
 778 |                 },
 779 |               ],
 780 |               cache_mode: 'BYPASS',
 781 |             },
 782 |           });
 783 | 
 784 |           await expectSuccessfulCrawl(result);
 785 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 786 |           expect(textContent?.text).toBeTruthy();
 787 |           // Verify both cookies are present
 788 |           expect(textContent?.text).toContain('session_id');
 789 |           expect(textContent?.text).toContain('abc123');
 790 |           expect(textContent?.text).toContain('user_pref');
 791 |           expect(textContent?.text).toContain('dark_mode');
 792 |         },
 793 |         TEST_TIMEOUTS.short,
 794 |       );
 795 |     });
 796 | 
 797 |     describe('Custom headers', () => {
 798 |       it(
 799 |         'should send custom headers',
 800 |         async () => {
 801 |           const result = await client.callTool({
 802 |             name: 'crawl',
 803 |             arguments: {
 804 |               url: 'https://httpbin.org/headers',
 805 |               headers: {
 806 |                 'X-Custom-Header': 'test-value',
 807 |                 'X-Request-ID': '12345',
 808 |                 'Accept-Language': 'en-US,en;q=0.9',
 809 |               },
 810 |               cache_mode: 'BYPASS',
 811 |             },
 812 |           });
 813 | 
 814 |           await expectSuccessfulCrawl(result);
 815 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 816 |           expect(textContent?.text).toBeTruthy();
 817 |           // httpbin returns headers in the response
 818 |           expect(textContent?.text).toContain('X-Custom-Header');
 819 |           expect(textContent?.text).toContain('test-value');
 820 |           // Note: Some headers may be filtered by the browser
 821 |           // Just verify our custom header got through
 822 |         },
 823 |         TEST_TIMEOUTS.short,
 824 |       );
 825 |     });
 826 | 
 827 |     describe('User-Agent configuration', () => {
 828 |       it(
 829 |         'should set custom user agent',
 830 |         async () => {
 831 |           const customUserAgent = 'MCP-Crawl4AI-Test/1.0 (Integration Tests)';
 832 |           const result = await client.callTool({
 833 |             name: 'crawl',
 834 |             arguments: {
 835 |               url: 'https://httpbin.org/user-agent',
 836 |               user_agent: customUserAgent,
 837 |               cache_mode: 'BYPASS',
 838 |             },
 839 |           });
 840 | 
 841 |           await expectSuccessfulCrawl(result);
 842 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 843 |           expect(textContent?.text).toBeTruthy();
 844 |           // httpbin returns the user-agent in the response
 845 |           expect(textContent?.text).toContain(customUserAgent);
 846 |         },
 847 |         TEST_TIMEOUTS.short,
 848 |       );
 849 |     });
 850 | 
 851 |     describe('Viewport sizes and screenshots', () => {
 852 |       it(
 853 |         'should capture screenshot at mobile size (375x667)',
 854 |         async () => {
 855 |           const result = await client.callTool({
 856 |             name: 'crawl',
 857 |             arguments: {
 858 |               url: 'https://httpbin.org/html',
 859 |               viewport_width: 375,
 860 |               viewport_height: 667,
 861 |               screenshot: true,
 862 |               screenshot_wait_for: 1,
 863 |               cache_mode: 'BYPASS',
 864 |             },
 865 |           });
 866 | 
 867 |           await expectSuccessfulCrawl(result);
 868 |           await expectScreenshot(result);
 869 | 
 870 |           // Check screenshot was captured
 871 |           const imageContent = (result as ToolResult).content.find((c) => c.type === 'image');
 872 |           expect(imageContent).toBeDefined();
 873 |           expect(imageContent?.data).toBeTruthy();
 874 | 
 875 |           // Verify reasonable data size for mobile screenshot
 876 |           const dataLength = imageContent?.data?.length || 0;
 877 |           expect(dataLength).toBeGreaterThan(10000); // At least 10KB
 878 |           expect(dataLength).toBeLessThan(3000000); // Less than 3MB for mobile (base64 encoded)
 879 |         },
 880 |         TEST_TIMEOUTS.medium,
 881 |       );
 882 | 
 883 |       it(
 884 |         'should capture screenshot at tablet size (768x1024)',
 885 |         async () => {
 886 |           const result = await client.callTool({
 887 |             name: 'crawl',
 888 |             arguments: {
 889 |               url: 'https://httpbin.org/html',
 890 |               viewport_width: 768,
 891 |               viewport_height: 1024,
 892 |               screenshot: true,
 893 |               screenshot_wait_for: 1,
 894 |               cache_mode: 'BYPASS',
 895 |             },
 896 |           });
 897 | 
 898 |           await expectSuccessfulCrawl(result);
 899 |           await expectScreenshot(result);
 900 | 
 901 |           // Check screenshot was captured
 902 |           const imageContent = (result as ToolResult).content.find((c) => c.type === 'image');
 903 |           expect(imageContent).toBeDefined();
 904 |           expect(imageContent?.data).toBeTruthy();
 905 | 
 906 |           // Verify reasonable data size for tablet screenshot
 907 |           const dataLength = imageContent?.data?.length || 0;
 908 |           expect(dataLength).toBeGreaterThan(15000); // At least 15KB
 909 |           expect(dataLength).toBeLessThan(3000000); // Less than 3MB for tablet (base64 encoded)
 910 |         },
 911 |         TEST_TIMEOUTS.medium,
 912 |       );
 913 | 
 914 |       it(
 915 |         'should capture screenshot at HD size (1280x720)',
 916 |         async () => {
 917 |           const result = await client.callTool({
 918 |             name: 'crawl',
 919 |             arguments: {
 920 |               url: 'https://httpbin.org/html',
 921 |               viewport_width: 1280,
 922 |               viewport_height: 720,
 923 |               screenshot: true,
 924 |               screenshot_wait_for: 1,
 925 |               cache_mode: 'BYPASS',
 926 |             },
 927 |           });
 928 | 
 929 |           await expectSuccessfulCrawl(result);
 930 |           await expectScreenshot(result);
 931 | 
 932 |           // Check screenshot was captured
 933 |           const imageContent = (result as ToolResult).content.find((c) => c.type === 'image');
 934 |           expect(imageContent).toBeDefined();
 935 |           expect(imageContent?.data).toBeTruthy();
 936 | 
 937 |           // Verify reasonable data size for HD screenshot
 938 |           const dataLength = imageContent?.data?.length || 0;
 939 |           expect(dataLength).toBeGreaterThan(20000); // At least 20KB
 940 |           expect(dataLength).toBeLessThan(3000000); // Less than 3MB for HD (base64 encoded)
 941 |         },
 942 |         TEST_TIMEOUTS.medium,
 943 |       );
 944 | 
 945 |       it(
 946 |         'should fail gracefully for very large viewport (1920x1080)',
 947 |         async () => {
 948 |           const result = await client.callTool({
 949 |             name: 'crawl',
 950 |             arguments: {
 951 |               url: 'https://httpbin.org/html',
 952 |               viewport_width: 1920,
 953 |               viewport_height: 1080,
 954 |               screenshot: true,
 955 |               screenshot_wait_for: 1,
 956 |               cache_mode: 'BYPASS',
 957 |             },
 958 |           });
 959 | 
 960 |           // This should either timeout or return an error based on testing
 961 |           // We expect either an error or no screenshot data
 962 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
 963 |           const imageContent = (result as ToolResult).content.find((c) => c.type === 'image');
 964 | 
 965 |           // If we got text but no image, that's expected for large viewports
 966 |           if (textContent && !imageContent) {
 967 |             expect(textContent).toBeDefined();
 968 |           } else if (textContent?.text?.includes('Error') || textContent?.text?.includes('timeout')) {
 969 |             // Expected error for large viewport
 970 |             expect(textContent.text).toMatch(/Error|timeout/i);
 971 |           }
 972 |         },
 973 |         TEST_TIMEOUTS.long,
 974 |       );
 975 |     });
 976 | 
 977 |     describe('Combined browser configurations', () => {
 978 |       it(
 979 |         'should handle cookies, headers, and custom viewport together',
 980 |         async () => {
 981 |           const result = await client.callTool({
 982 |             name: 'crawl',
 983 |             arguments: {
 984 |               url: 'https://httpbin.org/anything',
 985 |               viewport_width: 768,
 986 |               viewport_height: 1024,
 987 |               user_agent: 'MCP-Test-Bot/2.0',
 988 |               cookies: [
 989 |                 {
 990 |                   name: 'auth_token',
 991 |                   value: 'secret123',
 992 |                   domain: '.httpbin.org',
 993 |                   path: '/',
 994 |                 },
 995 |               ],
 996 |               headers: {
 997 |                 'X-Test-Header': 'combined-test',
 998 |               },
 999 |               cache_mode: 'BYPASS',
1000 |             },
1001 |           });
1002 | 
1003 |           await expectSuccessfulCrawl(result);
1004 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1005 |           expect(textContent?.text).toBeTruthy();
1006 | 
1007 |           // httpbin/anything endpoint returns all request data
1008 |           // Verify all configurations were applied
1009 |           expect(textContent?.text).toContain('MCP-Test-Bot/2.0');
1010 |           expect(textContent?.text).toContain('auth_token');
1011 |           expect(textContent?.text).toContain('X-Test-Header');
1012 |           expect(textContent?.text).toContain('combined-test');
1013 |         },
1014 |         TEST_TIMEOUTS.medium,
1015 |       );
1016 |     });
1017 |   });
1018 | 
1019 |   describe('Crawler Configuration Advanced Tests', () => {
1020 |     describe('Content filtering parameters', () => {
1021 |       it(
1022 |         'should remove forms when remove_forms is true',
1023 |         async () => {
1024 |           const result = await client.callTool({
1025 |             name: 'crawl',
1026 |             arguments: {
1027 |               url: 'https://httpbin.org/forms/post',
1028 |               remove_forms: true,
1029 |               cache_mode: 'BYPASS',
1030 |             },
1031 |           });
1032 | 
1033 |           await expectSuccessfulCrawl(result);
1034 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1035 |           expect(textContent?.text).toBeTruthy();
1036 |           // Forms should be removed, so no form-related text should appear
1037 |           expect(textContent?.text).not.toContain('<form');
1038 |           expect(textContent?.text).not.toContain('type="submit"');
1039 |           expect(textContent?.text).not.toContain('input type=');
1040 |         },
1041 |         TEST_TIMEOUTS.short,
1042 |       );
1043 | 
1044 |       it(
1045 |         'should keep forms when remove_forms is false',
1046 |         async () => {
1047 |           const result = await client.callTool({
1048 |             name: 'crawl',
1049 |             arguments: {
1050 |               url: 'https://httpbin.org/forms/post',
1051 |               remove_forms: false,
1052 |               cache_mode: 'BYPASS',
1053 |               word_count_threshold: 10,
1054 |             },
1055 |           });
1056 | 
1057 |           await expectSuccessfulCrawl(result);
1058 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1059 |           expect(textContent?.text).toBeTruthy();
1060 |           // Forms should be present - check for form-related keywords
1061 |           const text = textContent?.text?.toLowerCase() || '';
1062 |           // httpbin forms page should have form elements
1063 |           expect(text.length).toBeGreaterThan(100);
1064 |         },
1065 |         TEST_TIMEOUTS.short,
1066 |       );
1067 | 
1068 |       it(
1069 |         'should preserve data attributes when keep_data_attributes is true',
1070 |         async () => {
1071 |           const result = await client.callTool({
1072 |             name: 'crawl',
1073 |             arguments: {
1074 |               url: 'https://getbootstrap.com/docs/4.0/components/alerts/',
1075 |               keep_data_attributes: true,
1076 |               cache_mode: 'BYPASS',
1077 |               word_count_threshold: 10,
1078 |             },
1079 |           });
1080 | 
1081 |           await expectSuccessfulCrawl(result);
1082 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1083 |           expect(textContent?.text).toBeTruthy();
1084 |           // Should contain alert content
1085 |           expect(textContent?.text).toContain('alert');
1086 |         },
1087 |         TEST_TIMEOUTS.medium,
1088 |       );
1089 |     });
1090 | 
1091 |     describe('JavaScript execution parameters', () => {
1092 |       it(
1093 |         'should return only JS results when js_only is true',
1094 |         async () => {
1095 |           const result = await client.callTool({
1096 |             name: 'crawl',
1097 |             arguments: {
1098 |               url: 'https://httpbin.org/html',
1099 |               js_code: ['return document.title', 'return document.querySelectorAll("p").length'],
1100 |               js_only: true,
1101 |               cache_mode: 'BYPASS',
1102 |             },
1103 |           });
1104 | 
1105 |           await expectSuccessfulCrawl(result);
1106 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1107 |           expect(textContent?.text).toBeTruthy();
1108 | 
1109 |           // Should contain JS execution results but not the full HTML content
1110 |           // The result should be much shorter than full page content
1111 |           expect(textContent?.text?.length).toBeLessThan(1000);
1112 |           // Should not contain the full Moby Dick text from the page
1113 |           expect(textContent?.text).not.toContain('Herman Melville');
1114 |         },
1115 |         TEST_TIMEOUTS.short,
1116 |       );
1117 | 
1118 |       it(
1119 |         'should handle js_only with session_id',
1120 |         async () => {
1121 |           const sessionId = generateSessionId();
1122 |           const result = await client.callTool({
1123 |             name: 'crawl',
1124 |             arguments: {
1125 |               url: 'https://httpbin.org/html',
1126 |               session_id: sessionId,
1127 |               js_code: 'return window.location.href',
1128 |               js_only: true,
1129 |               cache_mode: 'BYPASS',
1130 |             },
1131 |           });
1132 | 
1133 |           await expectSuccessfulCrawl(result);
1134 |         },
1135 |         TEST_TIMEOUTS.short,
1136 |       );
1137 |     });
1138 | 
1139 |     describe('Page visibility parameters', () => {
1140 |       it(
1141 |         'should extract content when body is hidden and ignore_body_visibility is true',
1142 |         async () => {
1143 |           const result = await client.callTool({
1144 |             name: 'crawl',
1145 |             arguments: {
1146 |               url: 'https://httpbin.org/html',
1147 |               js_code: 'document.body.style.visibility = "hidden"; return "body hidden"',
1148 |               ignore_body_visibility: true,
1149 |               cache_mode: 'BYPASS',
1150 |               word_count_threshold: 10,
1151 |             },
1152 |           });
1153 | 
1154 |           await expectSuccessfulCrawl(result);
1155 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1156 |           expect(textContent?.text).toBeTruthy();
1157 |           // Should still extract content despite hidden body
1158 |           expect(textContent?.text).toContain('Herman Melville');
1159 |         },
1160 |         TEST_TIMEOUTS.short,
1161 |       );
1162 | 
1163 |       it(
1164 |         'should respect body visibility when ignore_body_visibility is false',
1165 |         async () => {
1166 |           const result = await client.callTool({
1167 |             name: 'crawl',
1168 |             arguments: {
1169 |               url: 'https://httpbin.org/html',
1170 |               js_code: 'document.body.style.visibility = "hidden"; return "body hidden"',
1171 |               ignore_body_visibility: false,
1172 |               cache_mode: 'BYPASS',
1173 |               word_count_threshold: 10,
1174 |             },
1175 |           });
1176 | 
1177 |           await expectSuccessfulCrawl(result);
1178 |           // Content extraction behavior may vary when body is hidden
1179 |         },
1180 |         TEST_TIMEOUTS.short,
1181 |       );
1182 |     });
1183 | 
1184 |     describe('Debug and logging parameters', () => {
1185 |       it(
1186 |         'should capture console logs when log_console is true',
1187 |         async () => {
1188 |           const result = await client.callTool({
1189 |             name: 'crawl',
1190 |             arguments: {
1191 |               url: 'https://httpbin.org/html',
1192 |               js_code: [
1193 |                 'console.log("Test log message 1")',
1194 |                 'console.warn("Test warning")',
1195 |                 'console.error("Test error")',
1196 |                 'return "logs executed"',
1197 |               ],
1198 |               log_console: true,
1199 |               cache_mode: 'BYPASS',
1200 |             },
1201 |           });
1202 | 
1203 |           await expectSuccessfulCrawl(result);
1204 |           // Note: Console logs may be captured in a separate field or in verbose output
1205 |         },
1206 |         TEST_TIMEOUTS.short,
1207 |       );
1208 | 
1209 |       it(
1210 |         'should provide verbose output when verbose is true',
1211 |         async () => {
1212 |           const result = await client.callTool({
1213 |             name: 'crawl',
1214 |             arguments: {
1215 |               url: 'https://httpbin.org/html',
1216 |               verbose: true,
1217 |               cache_mode: 'BYPASS',
1218 |               word_count_threshold: 50,
1219 |             },
1220 |           });
1221 | 
1222 |           await expectSuccessfulCrawl(result);
1223 |           // Verbose output may include additional debugging information
1224 |         },
1225 |         TEST_TIMEOUTS.short,
1226 |       );
1227 |     });
1228 | 
1229 |     describe('Media filtering parameters', () => {
1230 |       it(
1231 |         'should exclude external images when exclude_external_images is true',
1232 |         async () => {
1233 |           // First, let's create a page with external images via JS
1234 |           const result = await client.callTool({
1235 |             name: 'crawl',
1236 |             arguments: {
1237 |               url: 'https://httpbin.org/html',
1238 |               js_code: `
1239 |                 const img1 = document.createElement('img');
1240 |                 img1.src = 'https://httpbin.org/image/png';
1241 |                 img1.alt = 'External PNG';
1242 |                 document.body.appendChild(img1);
1243 |                 
1244 |                 const img2 = document.createElement('img');
1245 |                 img2.src = '/local-image.png';
1246 |                 img2.alt = 'Local image';
1247 |                 document.body.appendChild(img2);
1248 |                 
1249 |                 return document.images.length;
1250 |               `,
1251 |               exclude_external_images: true,
1252 |               cache_mode: 'BYPASS',
1253 |             },
1254 |           });
1255 | 
1256 |           await expectSuccessfulCrawl(result);
1257 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1258 |           expect(textContent?.text).toBeTruthy();
1259 |           // The external image references should be filtered out
1260 |         },
1261 |         TEST_TIMEOUTS.medium,
1262 |       );
1263 | 
1264 |       it(
1265 |         'should include external images when exclude_external_images is false',
1266 |         async () => {
1267 |           const result = await client.callTool({
1268 |             name: 'crawl',
1269 |             arguments: {
1270 |               url: 'https://httpbin.org/html',
1271 |               exclude_external_images: false,
1272 |               cache_mode: 'BYPASS',
1273 |             },
1274 |           });
1275 | 
1276 |           await expectSuccessfulCrawl(result);
1277 |         },
1278 |         TEST_TIMEOUTS.short,
1279 |       );
1280 |     });
1281 | 
1282 |     describe('Combined crawler configuration tests', () => {
1283 |       it(
1284 |         'should handle multiple filtering options together',
1285 |         async () => {
1286 |           const result = await client.callTool({
1287 |             name: 'crawl',
1288 |             arguments: {
1289 |               url: 'https://httpbin.org/forms/post',
1290 |               remove_forms: true,
1291 |               exclude_external_links: true,
1292 |               exclude_external_images: true,
1293 |               only_text: true,
1294 |               word_count_threshold: 10,
1295 |               cache_mode: 'BYPASS',
1296 |             },
1297 |           });
1298 | 
1299 |           await expectSuccessfulCrawl(result);
1300 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1301 |           expect(textContent?.text).toBeTruthy();
1302 |           // Should have filtered content
1303 |           expect(textContent?.text).not.toContain('<form');
1304 |           expect(textContent?.text).not.toContain('type="submit"');
1305 |         },
1306 |         TEST_TIMEOUTS.short,
1307 |       );
1308 | 
1309 |       it(
1310 |         'should handle debug options with content extraction',
1311 |         async () => {
1312 |           const result = await client.callTool({
1313 |             name: 'crawl',
1314 |             arguments: {
1315 |               url: 'https://httpbin.org/html',
1316 |               verbose: true,
1317 |               log_console: true,
1318 |               js_code: 'console.log("Debug test"); return document.title',
1319 |               keep_data_attributes: true,
1320 |               cache_mode: 'BYPASS',
1321 |             },
1322 |           });
1323 | 
1324 |           await expectSuccessfulCrawl(result);
1325 |         },
1326 |         TEST_TIMEOUTS.short,
1327 |       );
1328 |     });
1329 | 
1330 |     describe('New crawler parameters (0.7.3/0.7.4)', () => {
1331 |       it(
1332 |         'should accept undetected browser type',
1333 |         async () => {
1334 |           const result = await client.callTool({
1335 |             name: 'crawl',
1336 |             arguments: {
1337 |               url: 'https://httpbin.org/user-agent',
1338 |               browser_type: 'undetected',
1339 |             },
1340 |           });
1341 | 
1342 |           await expectSuccessfulCrawl(result);
1343 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1344 |           expect(textContent?.text).toBeTruthy();
1345 |           // The undetected browser should mask automation indicators
1346 |           // but we can at least verify the request was accepted
1347 |         },
1348 |         TEST_TIMEOUTS.short,
1349 |       );
1350 | 
1351 |       it(
1352 |         'should filter content using css_selector',
1353 |         async () => {
1354 |           const result = await client.callTool({
1355 |             name: 'crawl',
1356 |             arguments: {
1357 |               url: 'https://example.com',
1358 |               css_selector: 'h1',
1359 |               cache_mode: 'BYPASS',
1360 |             },
1361 |           });
1362 | 
1363 |           await expectSuccessfulCrawl(result);
1364 |           const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
1365 |           expect(textContent?.text).toBeTruthy();
1366 |           // css_selector returns ONLY the selected element content
1367 |           expect(textContent?.text?.toLowerCase()).toContain('example domain');
1368 |           // Should NOT contain the paragraph text that's outside the h1
1369 |           expect(textContent?.text).not.toContain('use in illustrative examples');
1370 |         },
1371 |         TEST_TIMEOUTS.short,
1372 |       );
1373 | 
1374 |       it(
1375 |         'should include links when include_links is true',
1376 |         async () => {
1377 |           const result = await client.callTool({
1378 |             name: 'crawl',
1379 |             arguments: {
1380 |               url: 'https://example.com',
1381 |               include_links: true,
1382 |             },
1383 |           });
1384 | 
1385 |           await expectSuccessfulCrawl(result);
1386 |           // Check if links section is included
1387 |           const hasLinksInfo = (result as ToolResult).content.some(
1388 |             (item) => item.type === 'text' && item.text?.includes('Links:'),
1389 |           );
1390 |           expect(hasLinksInfo).toBe(true);
1391 |         },
1392 |         TEST_TIMEOUTS.short,
1393 |       );
1394 | 
1395 |       it(
1396 |         'should respect delay_before_return_html parameter',
1397 |         async () => {
1398 |           const startTime = Date.now();
1399 | 
1400 |           const result = await client.callTool({
1401 |             name: 'crawl',
1402 |             arguments: {
1403 |               url: 'https://httpbin.org/delay/1', // 1 second delay from server
1404 |               delay_before_return_html: 2, // Additional 2 second delay (in seconds, not ms)
1405 |               cache_mode: 'BYPASS',
1406 |             },
1407 |           });
1408 | 
1409 |           const elapsed = Date.now() - startTime;
1410 | 
1411 |           await expectSuccessfulCrawl(result);
1412 |           // Total time should be at least 3 seconds (1s from endpoint + 2s delay)
1413 |           expect(elapsed).toBeGreaterThanOrEqual(3000);
1414 |         },
1415 |         TEST_TIMEOUTS.medium,
1416 |       );
1417 | 
1418 |       it(
1419 |         'should convert relative URLs when resolve_absolute_urls is true',
1420 |         async () => {
1421 |           const result = await client.callTool({
1422 |             name: 'crawl',
1423 |             arguments: {
1424 |               url: 'https://example.com',
1425 |               resolve_absolute_urls: true,
1426 |               include_links: true,
1427 |               cache_mode: 'BYPASS',
1428 |             },
1429 |           });
1430 | 
1431 |           await expectSuccessfulCrawl(result);
1432 | 
1433 |           // Links should be in a separate content item
1434 |           const linksContent = (result as ToolResult).content.find(
1435 |             (c) => c.type === 'text' && c.text?.includes('Links:'),
1436 |           );
1437 | 
1438 |           // With include_links=true, links info should be present
1439 |           expect(linksContent).toBeDefined();
1440 |           expect(linksContent?.text).toContain('External: 1');
1441 |         },
1442 |         TEST_TIMEOUTS.short,
1443 |       );
1444 |     });
1445 |   });
1446 | });
1447 | 
```

--------------------------------------------------------------------------------
/src/__tests__/index.server.test.ts:
--------------------------------------------------------------------------------

```typescript
   1 | /* eslint-env jest */
   2 | import { jest } from '@jest/globals';
   3 | import { describe, it, expect, beforeEach } from '@jest/globals';
   4 | 
   5 | // Create mock functions
   6 | const mockGetMarkdown = jest.fn();
   7 | const mockCaptureScreenshot = jest.fn();
   8 | const mockGeneratePDF = jest.fn();
   9 | const mockExecuteJS = jest.fn();
  10 | const mockGetHTML = jest.fn();
  11 | const mockBatchCrawl = jest.fn();
  12 | const mockExtractWithLLM = jest.fn();
  13 | const mockCrawl = jest.fn();
  14 | const mockParseSitemap = jest.fn();
  15 | 
  16 | // Mock the Crawl4AIService module
  17 | jest.unstable_mockModule('../crawl4ai-service.js', () => ({
  18 |   Crawl4AIService: jest.fn().mockImplementation(() => ({
  19 |     getMarkdown: mockGetMarkdown,
  20 |     captureScreenshot: mockCaptureScreenshot,
  21 |     generatePDF: mockGeneratePDF,
  22 |     executeJS: mockExecuteJS,
  23 |     getHTML: mockGetHTML,
  24 |     batchCrawl: mockBatchCrawl,
  25 |     extractWithLLM: mockExtractWithLLM,
  26 |     crawl: mockCrawl,
  27 |     parseSitemap: mockParseSitemap,
  28 |   })),
  29 | }));
  30 | 
  31 | // Mock MCP SDK
  32 | const mockSetRequestHandler = jest.fn();
  33 | const mockTool = jest.fn();
  34 | const mockConnect = jest.fn();
  35 | 
  36 | jest.unstable_mockModule('@modelcontextprotocol/sdk/server/index.js', () => ({
  37 |   Server: jest.fn().mockImplementation(() => ({
  38 |     setRequestHandler: mockSetRequestHandler,
  39 |     tool: mockTool,
  40 |     connect: mockConnect,
  41 |   })),
  42 | }));
  43 | 
  44 | // Mock the types module that exports the schemas
  45 | const CallToolRequestSchema = { method: 'tools/call' };
  46 | const ListToolsRequestSchema = { method: 'tools/list' };
  47 | 
  48 | jest.unstable_mockModule('@modelcontextprotocol/sdk/types.js', () => ({
  49 |   CallToolRequestSchema,
  50 |   ListToolsRequestSchema,
  51 | }));
  52 | 
  53 | jest.unstable_mockModule('@modelcontextprotocol/sdk/server/stdio.js', () => ({
  54 |   StdioServerTransport: jest.fn(),
  55 | }));
  56 | 
  57 | // Mock axios
  58 | const mockPost = jest.fn();
  59 | const mockGet = jest.fn();
  60 | const mockHead = jest.fn();
  61 | 
  62 | jest.unstable_mockModule('axios', () => ({
  63 |   default: {
  64 |     create: jest.fn(() => ({
  65 |       post: mockPost,
  66 |       get: mockGet,
  67 |       head: mockHead,
  68 |     })),
  69 |     get: mockGet,
  70 |   },
  71 | }));
  72 | 
  73 | // Now dynamically import the modules after mocks are set up
  74 | const { Crawl4AIServer } = await import('../server.js');
  75 | const {
  76 |   GetMarkdownSchema,
  77 |   CrawlSchema,
  78 |   BatchCrawlSchema,
  79 |   CaptureScreenshotSchema: _CaptureScreenshotSchema,
  80 |   GeneratePdfSchema: _GeneratePdfSchema,
  81 |   ExecuteJsSchema: _ExecuteJsSchema,
  82 |   ExtractWithLlmSchema: _ExtractWithLlmSchema,
  83 |   SmartCrawlSchema: _SmartCrawlSchema,
  84 |   CrawlRecursiveSchema: _CrawlRecursiveSchema,
  85 | } = await import('../schemas/validation-schemas.js');
  86 | const { Crawl4AIService } = await import('../crawl4ai-service.js');
  87 | 
  88 | // Import types statically (these are removed at compile time)
  89 | import type {
  90 |   MarkdownEndpointResponse,
  91 |   ScreenshotEndpointResponse,
  92 |   PDFEndpointResponse,
  93 |   HTMLEndpointResponse,
  94 |   CrawlEndpointResponse,
  95 | } from '../types.js';
  96 | 
  97 | // Define types for test results
  98 | interface ContentItem {
  99 |   type: string;
 100 |   text?: string;
 101 |   data?: string;
 102 |   resource?: {
 103 |     uri: string;
 104 |     mimeType: string;
 105 |     blob: string;
 106 |   };
 107 | }
 108 | 
 109 | interface ToolResult {
 110 |   content: ContentItem[];
 111 | }
 112 | 
 113 | type RequestHandler = (request: { method: string; params: unknown }) => Promise<ToolResult>;
 114 | 
 115 | // Removed TestServerMethods interface - no longer needed since we use 'any' type
 116 | 
 117 | describe('Crawl4AIServer Tool Handlers', () => {
 118 |   let server: any; // eslint-disable-line @typescript-eslint/no-explicit-any
 119 |   let requestHandler: RequestHandler;
 120 | 
 121 |   beforeEach(async () => {
 122 |     jest.clearAllMocks();
 123 | 
 124 |     // Reset all mock functions
 125 |     mockGetMarkdown.mockReset();
 126 |     mockCaptureScreenshot.mockReset();
 127 |     mockGeneratePDF.mockReset();
 128 |     mockExecuteJS.mockReset();
 129 |     mockGetHTML.mockReset();
 130 |     mockBatchCrawl.mockReset();
 131 |     mockExtractWithLLM.mockReset();
 132 |     mockCrawl.mockReset();
 133 |     mockParseSitemap.mockReset();
 134 |     mockPost.mockReset();
 135 |     mockGet.mockReset();
 136 |     mockHead.mockReset();
 137 | 
 138 |     // Create server instance - the mock will be used automatically
 139 |     server = new Crawl4AIServer(
 140 |       process.env.CRAWL4AI_BASE_URL || 'http://test.example.com',
 141 |       process.env.CRAWL4AI_API_KEY || 'test-api-key',
 142 |       'test-server',
 143 |       '1.0.0',
 144 |     );
 145 | 
 146 |     // Start the server to register handlers
 147 |     await server.start();
 148 | 
 149 |     // Get the request handler for CallToolRequestSchema
 150 |     const handlerCalls = mockSetRequestHandler.mock.calls;
 151 | 
 152 |     // Find the handler for CallToolRequestSchema (tools/call)
 153 |     for (const call of handlerCalls) {
 154 |       const [schema, handler] = call;
 155 |       if (schema && schema.method === 'tools/call') {
 156 |         requestHandler = handler;
 157 |         break;
 158 |       }
 159 |     }
 160 | 
 161 |     // Debug: Check if we found the handler
 162 |     if (!requestHandler) {
 163 |       console.log('Handler calls:', handlerCalls.length);
 164 |       handlerCalls.forEach((call, i) => {
 165 |         console.log(`Call ${i}:`, call[0], typeof call[1]);
 166 |       });
 167 |     }
 168 |   });
 169 | 
 170 |   // Add a simple test to verify mocking works
 171 |   it('should use the mocked service', () => {
 172 |     const MockedService = Crawl4AIService as jest.MockedClass<typeof Crawl4AIService>;
 173 |     expect(MockedService).toHaveBeenCalledTimes(1);
 174 |     expect(MockedService).toHaveBeenCalledWith('http://localhost:11235', 'test-api-key');
 175 |   });
 176 | 
 177 |   describe('Constructor and setup', () => {
 178 |     it('should initialize with correct configuration', () => {
 179 |       expect(server).toBeDefined();
 180 |       expect(server.service).toBeDefined();
 181 |       expect(server.sessions).toBeDefined();
 182 |     });
 183 | 
 184 |     it('should set up handlers on construction', () => {
 185 |       expect(mockSetRequestHandler).toHaveBeenCalled();
 186 |       expect(mockSetRequestHandler.mock.calls.length).toBeGreaterThan(0);
 187 |     });
 188 |   });
 189 | 
 190 |   describe('Tool Handler Success Cases', () => {
 191 |     describe('get_markdown', () => {
 192 |       it('should handle successful markdown extraction', async () => {
 193 |         const mockResponse: MarkdownEndpointResponse = {
 194 |           url: 'https://example.com',
 195 |           filter: 'fit',
 196 |           query: null,
 197 |           cache: 'false',
 198 |           markdown: '# Example Page\n\nThis is example content.',
 199 |           success: true,
 200 |         };
 201 | 
 202 |         mockGetMarkdown.mockResolvedValue(mockResponse);
 203 | 
 204 |         const result: ToolResult = await server.getMarkdown({
 205 |           url: 'https://example.com',
 206 |         });
 207 | 
 208 |         expect(result.content).toHaveLength(1);
 209 |         expect(result.content[0].type).toBe('text');
 210 |         expect(result.content[0].text).toContain('# Example Page');
 211 |         expect(result.content[0].text).toContain('URL: https://example.com');
 212 |         expect(result.content[0].text).toContain('Filter: fit');
 213 |       });
 214 | 
 215 |       it('should handle markdown with query', async () => {
 216 |         const mockResponse: MarkdownEndpointResponse = {
 217 |           url: 'https://example.com',
 218 |           filter: 'bm25',
 219 |           query: 'test query',
 220 |           cache: 'false',
 221 |           markdown: 'Filtered content',
 222 |           success: true,
 223 |         };
 224 | 
 225 |         mockGetMarkdown.mockResolvedValue(mockResponse);
 226 | 
 227 |         const result: ToolResult = await server.getMarkdown({
 228 |           url: 'https://example.com',
 229 |           filter: 'bm25',
 230 |           query: 'test query',
 231 |         });
 232 | 
 233 |         expect(mockGetMarkdown).toHaveBeenCalledWith({
 234 |           url: 'https://example.com',
 235 |           f: 'bm25',
 236 |           q: 'test query',
 237 |         });
 238 |         expect(result.content[0].text).toContain('Query: test query');
 239 |       });
 240 |     });
 241 | 
 242 |     describe('capture_screenshot', () => {
 243 |       it('should handle successful screenshot capture', async () => {
 244 |         const mockResponse: ScreenshotEndpointResponse = {
 245 |           success: true,
 246 |           screenshot: 'base64-encoded-screenshot-data',
 247 |         };
 248 | 
 249 |         mockCaptureScreenshot.mockResolvedValue(mockResponse);
 250 | 
 251 |         const result: ToolResult = await server.captureScreenshot({
 252 |           url: 'https://example.com',
 253 |         });
 254 | 
 255 |         expect(result.content).toHaveLength(2);
 256 |         expect(result.content[0].type).toBe('image');
 257 |         expect(result.content[0].data).toBe('base64-encoded-screenshot-data');
 258 |         expect(result.content[1].type).toBe('text');
 259 |         expect(result.content[1].text).toBe('Screenshot captured for: https://example.com');
 260 |       });
 261 |     });
 262 | 
 263 |     describe('generate_pdf', () => {
 264 |       it('should handle successful PDF generation', async () => {
 265 |         const mockResponse: PDFEndpointResponse = {
 266 |           success: true,
 267 |           pdf: 'base64-encoded-pdf-data',
 268 |         };
 269 | 
 270 |         mockGeneratePDF.mockResolvedValue(mockResponse);
 271 | 
 272 |         const result: ToolResult = await server.generatePDF({
 273 |           url: 'https://example.com',
 274 |         });
 275 | 
 276 |         expect(result.content).toHaveLength(2);
 277 |         expect(result.content[0].type).toBe('resource');
 278 |         expect(result.content[0].resource.blob).toBeDefined();
 279 |         expect(result.content[1].type).toBe('text');
 280 |         expect(result.content[1].text).toContain('PDF generated for: https://example.com');
 281 |       });
 282 |     });
 283 | 
 284 |     describe('execute_js', () => {
 285 |       it('should handle successful JS execution', async () => {
 286 |         const mockResponse = {
 287 |           markdown: 'Page content',
 288 |           js_execution_result: {
 289 |             success: true,
 290 |             results: ['Title: Example', 'Link count: 5'],
 291 |           },
 292 |         };
 293 | 
 294 |         mockExecuteJS.mockResolvedValue(mockResponse);
 295 | 
 296 |         const result: ToolResult = await server.executeJS({
 297 |           url: 'https://example.com',
 298 |           scripts: ['return document.title', 'return document.links.length'],
 299 |         });
 300 | 
 301 |         expect(result.content).toHaveLength(1);
 302 |         expect(result.content[0].type).toBe('text');
 303 |         expect(result.content[0].text).toContain('JavaScript executed on: https://example.com');
 304 |         expect(result.content[0].text).toContain('Title: Example');
 305 |         expect(result.content[0].text).toContain('Link count: 5');
 306 |       });
 307 | 
 308 |       it('should handle JS execution without results', async () => {
 309 |         const mockResponse = {
 310 |           markdown: 'Page content',
 311 |           js_execution_result: null,
 312 |         };
 313 | 
 314 |         mockExecuteJS.mockResolvedValue(mockResponse);
 315 | 
 316 |         const result: ToolResult = await server.executeJS({
 317 |           url: 'https://example.com',
 318 |           scripts: 'console.log("test")',
 319 |         });
 320 | 
 321 |         expect(result.content[0].text).toContain('JavaScript executed on: https://example.com');
 322 |         expect(result.content[0].text).toContain('No results returned');
 323 |       });
 324 | 
 325 |       it('should handle JS execution with error status', async () => {
 326 |         const mockResponse = {
 327 |           markdown: 'Page content',
 328 |           js_execution_result: {
 329 |             success: true,
 330 |             results: [
 331 |               {
 332 |                 success: false,
 333 |                 error: 'Error: Test error',
 334 |                 stack: 'Error: Test error\n    at eval (eval at evaluate (:291:30), <anonymous>:4:43)',
 335 |               },
 336 |             ],
 337 |           },
 338 |         };
 339 | 
 340 |         mockExecuteJS.mockResolvedValue(mockResponse);
 341 | 
 342 |         const result: ToolResult = await server.executeJS({
 343 |           url: 'https://example.com',
 344 |           scripts: 'throw new Error("Test error")',
 345 |         });
 346 | 
 347 |         expect(result.content[0].text).toContain('JavaScript executed on: https://example.com');
 348 |         expect(result.content[0].text).toContain('Script: throw new Error("Test error")');
 349 |         expect(result.content[0].text).toContain('Returned: Error: Error: Test error');
 350 |       });
 351 | 
 352 |       it('should handle JS execution with no return value', async () => {
 353 |         const mockResponse = {
 354 |           markdown: 'Page content',
 355 |           js_execution_result: {
 356 |             success: true,
 357 |             results: [{ success: true }],
 358 |           },
 359 |         };
 360 | 
 361 |         mockExecuteJS.mockResolvedValue(mockResponse);
 362 | 
 363 |         const result: ToolResult = await server.executeJS({
 364 |           url: 'https://example.com',
 365 |           scripts: 'console.log("hello")',
 366 |         });
 367 | 
 368 |         expect(result.content[0].text).toContain('JavaScript executed on: https://example.com');
 369 |         expect(result.content[0].text).toContain('Returned: Executed successfully (no return value)');
 370 |       });
 371 |     });
 372 | 
 373 |     describe('get_html', () => {
 374 |       it('should handle successful HTML retrieval', async () => {
 375 |         const mockResponse: HTMLEndpointResponse = {
 376 |           html: '<html><body><h1>Example</h1></body></html>',
 377 |           url: 'https://example.com',
 378 |           success: true,
 379 |         };
 380 | 
 381 |         mockGetHTML.mockResolvedValue(mockResponse);
 382 | 
 383 |         const result: ToolResult = await server.getHTML({
 384 |           url: 'https://example.com',
 385 |         });
 386 | 
 387 |         expect(result.content).toHaveLength(1);
 388 |         expect(result.content[0].type).toBe('text');
 389 |         expect(result.content[0].text).toBe('<html><body><h1>Example</h1></body></html>');
 390 |       });
 391 |     });
 392 | 
 393 |     describe('batch_crawl', () => {
 394 |       it('should handle successful batch crawl', async () => {
 395 |         const mockResponse = {
 396 |           results: [
 397 |             { url: 'https://example1.com', markdown: { raw_markdown: 'Content 1' }, success: true },
 398 |             { url: 'https://example2.com', markdown: { raw_markdown: 'Content 2' }, success: true },
 399 |           ],
 400 |           success: true,
 401 |         };
 402 | 
 403 |         // Mock axios response since batchCrawl uses axiosClient directly
 404 |         mockPost.mockResolvedValue({ data: mockResponse });
 405 | 
 406 |         const result: ToolResult = await server.batchCrawl({
 407 |           urls: ['https://example1.com', 'https://example2.com'],
 408 |         });
 409 | 
 410 |         expect(result.content).toHaveLength(1);
 411 |         expect(result.content[0].text).toContain('Batch crawl completed');
 412 |         expect(result.content[0].text).toContain('Processed 2 URLs');
 413 |       });
 414 | 
 415 |       it('should handle batch crawl with remove_images', async () => {
 416 |         // Mock axios response since batchCrawl uses axiosClient directly
 417 |         mockPost.mockResolvedValue({ data: { results: [] } });
 418 | 
 419 |         const result: ToolResult = await server.batchCrawl({
 420 |           urls: ['https://example.com'],
 421 |           remove_images: true,
 422 |         });
 423 | 
 424 |         expect(mockPost).toHaveBeenCalledWith('/crawl', {
 425 |           urls: ['https://example.com'],
 426 |           crawler_config: {
 427 |             exclude_tags: ['img', 'picture', 'svg'],
 428 |           },
 429 |         });
 430 |         expect(result.content[0].text).toContain('Batch crawl completed');
 431 |       });
 432 |     });
 433 | 
 434 |     describe('crawl', () => {
 435 |       it('should handle successful crawl with all options', async () => {
 436 |         const mockResponse: CrawlEndpointResponse = {
 437 |           success: true,
 438 |           results: [
 439 |             {
 440 |               url: 'https://example.com',
 441 |               html: '<html>...</html>',
 442 |               cleaned_html: '<html>clean</html>',
 443 |               fit_html: '<html>fit</html>',
 444 |               success: true,
 445 |               status_code: 200,
 446 |               response_headers: {},
 447 |               session_id: 'test-session',
 448 |               metadata: { title: 'Example' },
 449 |               links: { internal: [], external: [] },
 450 |               media: { images: [], videos: [], audios: [] },
 451 |               markdown: {
 452 |                 raw_markdown: '# Example',
 453 |                 markdown_with_citations: '# Example [1]',
 454 |                 references_markdown: '[1]: https://example.com',
 455 |                 fit_markdown: '# Example',
 456 |                 fit_html: '<h1>Example</h1>',
 457 |               },
 458 |               tables: [],
 459 |               extracted_content: null,
 460 |               screenshot: 'screenshot-data',
 461 |               pdf: 'pdf-data',
 462 |               mhtml: null,
 463 |               js_execution_result: { success: true, results: ['JS result'] },
 464 |               downloaded_files: null,
 465 |               network_requests: null,
 466 |               console_messages: ['Console log'],
 467 |               ssl_certificate: null,
 468 |               dispatch_result: null,
 469 |             },
 470 |           ],
 471 |           server_processing_time_s: 1.5,
 472 |           server_memory_delta_mb: 10,
 473 |           server_peak_memory_mb: 100,
 474 |         };
 475 | 
 476 |         mockCrawl.mockResolvedValue(mockResponse);
 477 | 
 478 |         const result: ToolResult = await server.crawl({
 479 |           url: 'https://example.com',
 480 |           screenshot: true,
 481 |           pdf: true,
 482 |           js_code: 'return document.title',
 483 |           session_id: 'test-session',
 484 |         });
 485 | 
 486 |         expect(result.content.length).toBeGreaterThan(0); // Multiple content types
 487 |         // Check text content
 488 |         const textContent = result.content.find((c) => c.type === 'text' && c.text?.includes('# Example'));
 489 |         expect(textContent).toBeDefined();
 490 |         // Check screenshot
 491 |         const screenshotContent = result.content.find((c) => c.type === 'image');
 492 |         expect(screenshotContent?.data).toBe('screenshot-data');
 493 |       });
 494 | 
 495 |       it('should handle crawl with proxy configuration', async () => {
 496 |         const mockResponse: CrawlEndpointResponse = {
 497 |           success: true,
 498 |           results: [
 499 |             {
 500 |               url: 'https://example.com',
 501 |               markdown: { raw_markdown: 'Proxied content' },
 502 |               success: true,
 503 |               status_code: 200,
 504 |             },
 505 |           ],
 506 |         };
 507 | 
 508 |         mockCrawl.mockResolvedValue(mockResponse);
 509 | 
 510 |         await server.crawl({
 511 |           url: 'https://example.com',
 512 |           proxy_server: 'http://proxy.example.com:8080',
 513 |           proxy_username: 'user',
 514 |           proxy_password: 'pass',
 515 |         });
 516 | 
 517 |         expect(mockCrawl).toHaveBeenCalledWith(
 518 |           expect.objectContaining({
 519 |             browser_config: expect.objectContaining({
 520 |               proxy_config: {
 521 |                 server: 'http://proxy.example.com:8080',
 522 |                 username: 'user',
 523 |                 password: 'pass',
 524 |               },
 525 |             }),
 526 |           }),
 527 |         );
 528 |       });
 529 | 
 530 |       it('should handle crawl with cookies and headers', async () => {
 531 |         const mockResponse: CrawlEndpointResponse = {
 532 |           success: true,
 533 |           results: [
 534 |             {
 535 |               url: 'https://example.com',
 536 |               markdown: { raw_markdown: 'Content with auth' },
 537 |               success: true,
 538 |               status_code: 200,
 539 |             },
 540 |           ],
 541 |         };
 542 | 
 543 |         mockCrawl.mockResolvedValue(mockResponse);
 544 | 
 545 |         await server.crawl({
 546 |           url: 'https://example.com',
 547 |           cookies: [{ name: 'session', value: 'abc123' }],
 548 |           headers: { Authorization: 'Bearer token123' },
 549 |         });
 550 | 
 551 |         expect(mockCrawl).toHaveBeenCalledWith(
 552 |           expect.objectContaining({
 553 |             browser_config: expect.objectContaining({
 554 |               cookies: [{ name: 'session', value: 'abc123' }],
 555 |               headers: { Authorization: 'Bearer token123' },
 556 |             }),
 557 |           }),
 558 |         );
 559 |       });
 560 | 
 561 |       it('should handle virtual scroll configuration', async () => {
 562 |         const mockResponse: CrawlEndpointResponse = {
 563 |           success: true,
 564 |           results: [
 565 |             {
 566 |               url: 'https://example.com',
 567 |               markdown: { raw_markdown: 'Scrolled content' },
 568 |               success: true,
 569 |               status_code: 200,
 570 |             },
 571 |           ],
 572 |         };
 573 | 
 574 |         mockCrawl.mockResolvedValue(mockResponse);
 575 | 
 576 |         await server.crawl({
 577 |           url: 'https://example.com',
 578 |           virtual_scroll_config: {
 579 |             enabled: true,
 580 |             scroll_step: 100,
 581 |             max_scrolls: 10,
 582 |           },
 583 |         });
 584 | 
 585 |         expect(mockCrawl).toHaveBeenCalledWith(
 586 |           expect.objectContaining({
 587 |             crawler_config: expect.objectContaining({
 588 |               virtual_scroll_config: {
 589 |                 enabled: true,
 590 |                 scroll_step: 100,
 591 |                 max_scrolls: 10,
 592 |               },
 593 |             }),
 594 |           }),
 595 |         );
 596 |       });
 597 | 
 598 |       it('should handle js_code as null error', async () => {
 599 |         await expect(
 600 |           server.crawl({
 601 |             url: 'https://example.com',
 602 |             js_code: null,
 603 |           }),
 604 |         ).rejects.toThrow('js_code parameter is null');
 605 |       });
 606 |     });
 607 | 
 608 |     describe('extract_with_llm', () => {
 609 |       it('should handle successful LLM extraction', async () => {
 610 |         mockExtractWithLLM.mockResolvedValue({
 611 |           answer: 'The main topic is JavaScript testing.',
 612 |         });
 613 | 
 614 |         const result: ToolResult = await server.extractWithLLM({
 615 |           url: 'https://example.com',
 616 |           query: 'What is the main topic?',
 617 |         });
 618 | 
 619 |         expect(result.content).toHaveLength(1);
 620 |         expect(result.content[0].text).toBe('The main topic is JavaScript testing.');
 621 |       });
 622 |     });
 623 | 
 624 |     describe('extract_links', () => {
 625 |       it('should extract and categorize links', async () => {
 626 |         mockPost.mockResolvedValue({
 627 |           data: {
 628 |             results: [
 629 |               {
 630 |                 links: {
 631 |                   internal: [
 632 |                     { href: '/page1', text: 'Page 1' },
 633 |                     { href: '/page2', text: 'Page 2' },
 634 |                   ],
 635 |                   external: [{ href: 'https://external.com', text: 'External' }],
 636 |                 },
 637 |               },
 638 |             ],
 639 |           },
 640 |         });
 641 | 
 642 |         const result: ToolResult = await server.extractLinks({
 643 |           url: 'https://example.com',
 644 |           categorize: true,
 645 |         });
 646 | 
 647 |         expect(result.content[0].text).toContain('Link analysis for https://example.com:');
 648 |         expect(result.content[0].text).toContain('internal (2)');
 649 |         expect(result.content[0].text).toContain('/page1');
 650 |         expect(result.content[0].text).toContain('external (1)');
 651 |       });
 652 | 
 653 |       it('should categorize external links (social, images, scripts)', async () => {
 654 |         mockPost.mockResolvedValue({
 655 |           data: {
 656 |             results: [
 657 |               {
 658 |                 links: {
 659 |                   internal: [],
 660 |                   external: [
 661 |                     'https://facebook.com/profile',
 662 |                     'https://example.com/image.jpg',
 663 |                     'https://cdn.com/script.js',
 664 |                   ],
 665 |                 },
 666 |               },
 667 |             ],
 668 |           },
 669 |         });
 670 | 
 671 |         const result: ToolResult = await server.extractLinks({
 672 |           url: 'https://example.com',
 673 |           categorize: true,
 674 |         });
 675 | 
 676 |         expect(result.content[0].text).toContain('social (1)');
 677 |         expect(result.content[0].text).toContain('images (1)');
 678 |         expect(result.content[0].text).toContain('scripts (1)');
 679 |         expect(result.content[0].text).toContain('external (0)');
 680 |       });
 681 |     });
 682 | 
 683 |     describe('crawl_recursive', () => {
 684 |       it('should crawl recursively with depth limit', async () => {
 685 |         // Ensure mock is clean before setting up
 686 |         mockPost.mockReset();
 687 | 
 688 |         mockPost
 689 |           .mockResolvedValueOnce({
 690 |             data: {
 691 |               results: [
 692 |                 {
 693 |                   url: 'https://example.com',
 694 |                   links: {
 695 |                     internal: [{ href: 'https://example.com/page1', text: 'Page 1' }],
 696 |                   },
 697 |                   markdown: { raw_markdown: 'Home page' },
 698 |                   success: true,
 699 |                 },
 700 |               ],
 701 |             },
 702 |           })
 703 |           .mockResolvedValueOnce({
 704 |             data: {
 705 |               results: [
 706 |                 {
 707 |                   url: 'https://example.com/page1',
 708 |                   links: { internal: [] },
 709 |                   markdown: { raw_markdown: 'Page 1 content' },
 710 |                   success: true,
 711 |                 },
 712 |               ],
 713 |             },
 714 |           });
 715 | 
 716 |         const result: ToolResult = await server.crawlRecursive({
 717 |           url: 'https://example.com',
 718 |           max_depth: 2,
 719 |         });
 720 | 
 721 |         expect(result.content[0].text).toContain('Recursive crawl completed:');
 722 |         expect(result.content[0].text).toContain('Pages crawled: 2');
 723 |         expect(result.content[0].text).toContain('https://example.com');
 724 |         expect(result.content[0].text).toContain('https://example.com/page1');
 725 |       });
 726 |     });
 727 | 
 728 |     describe('parse_sitemap', () => {
 729 |       it('should parse sitemap successfully', async () => {
 730 |         mockGet.mockResolvedValue({
 731 |           data: `<?xml version="1.0" encoding="UTF-8"?>
 732 |             <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 733 |               <url><loc>https://example.com/</loc></url>
 734 |               <url><loc>https://example.com/page1</loc></url>
 735 |               <url><loc>https://example.com/page2</loc></url>
 736 |             </urlset>`,
 737 |         });
 738 | 
 739 |         const result: ToolResult = await server.parseSitemap({
 740 |           url: 'https://example.com/sitemap.xml',
 741 |         });
 742 | 
 743 |         expect(result.content[0].text).toContain('Sitemap parsed successfully:');
 744 |         expect(result.content[0].text).toContain('Total URLs found: 3');
 745 |         expect(result.content[0].text).toContain('https://example.com/');
 746 |         expect(result.content[0].text).toContain('https://example.com/page1');
 747 |       });
 748 |     });
 749 | 
 750 |     describe('smart_crawl', () => {
 751 |       it('should handle smart crawl for HTML content', async () => {
 752 |         mockHead.mockResolvedValue({
 753 |           headers: { 'content-type': 'text/html' },
 754 |         });
 755 |         mockPost.mockResolvedValue({
 756 |           data: {
 757 |             results: [
 758 |               {
 759 |                 markdown: { raw_markdown: 'HTML content' },
 760 |                 links: { internal: [], external: [] },
 761 |               },
 762 |             ],
 763 |           },
 764 |         });
 765 | 
 766 |         const result: ToolResult = await server.smartCrawl({
 767 |           url: 'https://example.com',
 768 |         });
 769 | 
 770 |         expect(result.content[0].text).toContain('Smart crawl detected content type');
 771 |         // Already contains 'Smart crawl detected content type'
 772 |       });
 773 | 
 774 |       it('should handle smart crawl for PDF content', async () => {
 775 |         mockHead.mockResolvedValue({
 776 |           headers: { 'content-type': 'application/pdf' },
 777 |         });
 778 | 
 779 |         // Mock the crawl response for PDF
 780 |         mockPost.mockResolvedValue({
 781 |           data: {
 782 |             results: [
 783 |               {
 784 |                 markdown: { raw_markdown: 'PDF content extracted' },
 785 |                 links: { internal: [], external: [] },
 786 |               },
 787 |             ],
 788 |           },
 789 |         });
 790 | 
 791 |         const result: ToolResult = await server.smartCrawl({
 792 |           url: 'https://example.com/doc.pdf',
 793 |         });
 794 | 
 795 |         expect(result.content[0].text).toContain('Smart crawl detected content type');
 796 |         expect(result.content[0].text).toContain('PDF content extracted');
 797 |       });
 798 |     });
 799 |   });
 800 | 
 801 |   describe('Tool Handler Error Cases', () => {
 802 |     describe('Service errors', () => {
 803 |       it('should handle service error for get_markdown', async () => {
 804 |         mockGetMarkdown.mockRejectedValue(new Error('Network error'));
 805 | 
 806 |         await expect(server.getMarkdown({ url: 'https://example.com' })).rejects.toThrow(
 807 |           'Failed to get markdown: Network error',
 808 |         );
 809 |       });
 810 | 
 811 |       it('should handle axios error with response detail', async () => {
 812 |         const axiosError = {
 813 |           response: {
 814 |             data: {
 815 |               detail: 'Invalid API key',
 816 |             },
 817 |           },
 818 |         };
 819 |         mockCaptureScreenshot.mockRejectedValue(axiosError);
 820 | 
 821 |         await expect(server.captureScreenshot({ url: 'https://example.com' })).rejects.toThrow(
 822 |           'Failed to capture screenshot: Invalid API key',
 823 |         );
 824 |       });
 825 | 
 826 |       it('should handle missing screenshot data', async () => {
 827 |         mockCaptureScreenshot.mockResolvedValue({
 828 |           success: false,
 829 |           screenshot: '',
 830 |         });
 831 | 
 832 |         await expect(server.captureScreenshot({ url: 'https://example.com' })).rejects.toThrow(
 833 |           'Screenshot capture failed - no screenshot data in response',
 834 |         );
 835 |       });
 836 | 
 837 |       it('should handle missing PDF data', async () => {
 838 |         mockGeneratePDF.mockResolvedValue({
 839 |           success: true,
 840 |           pdf: '',
 841 |         });
 842 | 
 843 |         await expect(server.generatePDF({ url: 'https://example.com' })).rejects.toThrow(
 844 |           'PDF generation failed - no PDF data in response',
 845 |         );
 846 |       });
 847 |     });
 848 | 
 849 |     describe('Validation errors', () => {
 850 |       it('should handle missing scripts for execute_js', async () => {
 851 |         await expect(
 852 |           server.executeJS({ url: 'https://example.com', scripts: null as unknown as string }),
 853 |         ).rejects.toThrow('scripts is required');
 854 |       });
 855 | 
 856 |       it('should handle empty crawl options', async () => {
 857 |         await expect(server.crawl(null as unknown as Parameters<typeof server.crawl>[0])).rejects.toThrow(
 858 |           'crawl requires options object with at least a url parameter',
 859 |         );
 860 |       });
 861 | 
 862 |       it('should handle crawl_recursive errors', async () => {
 863 |         // Setup the mock to fail - crawlRecursive catches the error internally
 864 |         mockPost.mockRejectedValue(new Error('API error'));
 865 | 
 866 |         const result: ToolResult = await server.crawlRecursive({ url: 'https://example.com' });
 867 | 
 868 |         // The method catches errors and returns a message about no pages crawled
 869 |         expect(result.content[0].text).toContain('Pages crawled: 0');
 870 |         expect(result.content[0].text).toContain('No pages could be crawled');
 871 |       });
 872 | 
 873 |       it('should handle parse_sitemap errors', async () => {
 874 |         mockGet.mockRejectedValue(new Error('Failed to fetch sitemap'));
 875 | 
 876 |         await expect(server.parseSitemap({ url: 'https://example.com/sitemap.xml' })).rejects.toThrow(
 877 |           'Failed to parse sitemap: Failed to fetch sitemap',
 878 |         );
 879 |       });
 880 |     });
 881 | 
 882 |     describe('Edge cases', () => {
 883 |       it('should handle batch crawl with no results', async () => {
 884 |         mockPost.mockResolvedValue({
 885 |           data: {
 886 |             results: [],
 887 |           },
 888 |         });
 889 | 
 890 |         const result: ToolResult = await server.batchCrawl({
 891 |           urls: ['https://example.com'],
 892 |         });
 893 | 
 894 |         expect(result.content[0].text).toContain('Batch crawl completed');
 895 |         expect(result.content[0].text).toContain('Processed 0 URLs');
 896 |       });
 897 | 
 898 |       it('should handle extract_links with no links', async () => {
 899 |         mockPost.mockResolvedValue({
 900 |           data: {
 901 |             results: [
 902 |               {
 903 |                 links: {
 904 |                   internal: [],
 905 |                   external: [],
 906 |                 },
 907 |               },
 908 |             ],
 909 |           },
 910 |         });
 911 | 
 912 |         const result: ToolResult = await server.extractLinks({
 913 |           url: 'https://example.com',
 914 |         });
 915 | 
 916 |         expect(result.content[0].text).toContain('All links from https://example.com:');
 917 |         expect(result.content[0].text).toMatch(/\n\s*$/);
 918 |       });
 919 | 
 920 |       it('should handle smart crawl with HEAD request failure', async () => {
 921 |         mockHead.mockRejectedValue(new Error('HEAD failed'));
 922 |         // Fallback to HTML crawl
 923 |         mockPost.mockResolvedValue({
 924 |           data: {
 925 |             results: [
 926 |               {
 927 |                 markdown: { raw_markdown: 'Fallback content' },
 928 |                 links: { internal: [], external: [] },
 929 |               },
 930 |             ],
 931 |           },
 932 |         });
 933 | 
 934 |         const result: ToolResult = await server.smartCrawl({
 935 |           url: 'https://example.com',
 936 |         });
 937 | 
 938 |         expect(result.content[0].text).toContain('Smart crawl detected content type');
 939 |       });
 940 |     });
 941 | 
 942 |     describe('ZodError validation tests', () => {
 943 |       it('should validate get_markdown parameters', () => {
 944 |         // Valid case
 945 |         expect(() => {
 946 |           GetMarkdownSchema.parse({ url: 'https://example.com' });
 947 |         }).not.toThrow();
 948 | 
 949 |         // Invalid - missing url
 950 |         expect(() => {
 951 |           GetMarkdownSchema.parse({ filter: 'fit' });
 952 |         }).toThrow();
 953 | 
 954 |         // Invalid - bm25 without query
 955 |         expect(() => {
 956 |           GetMarkdownSchema.parse({ url: 'https://example.com', filter: 'bm25' });
 957 |         }).toThrow('Query parameter is required when using bm25 or llm filter');
 958 |       });
 959 | 
 960 |       it('should validate crawl parameters', () => {
 961 |         // Valid case
 962 |         expect(() => {
 963 |           CrawlSchema.parse({ url: 'https://example.com' });
 964 |         }).not.toThrow();
 965 | 
 966 |         // Invalid - js_only without session_id
 967 |         expect(() => {
 968 |           CrawlSchema.parse({ url: 'https://example.com', js_only: true });
 969 |         }).toThrow('js_only requires session_id');
 970 | 
 971 |         // Invalid - empty js_code array
 972 |         expect(() => {
 973 |           CrawlSchema.parse({ url: 'https://example.com', js_code: [] });
 974 |         }).toThrow('js_code array cannot be empty');
 975 |       });
 976 | 
 977 |       it('should validate batch_crawl parameters', () => {
 978 |         // Valid case
 979 |         expect(() => {
 980 |           BatchCrawlSchema.parse({ urls: ['https://example.com'] });
 981 |         }).not.toThrow();
 982 | 
 983 |         // Invalid - not an array
 984 |         expect(() => {
 985 |           BatchCrawlSchema.parse({ urls: 'not-an-array' });
 986 |         }).toThrow();
 987 |       });
 988 |     });
 989 | 
 990 |     describe('Parameter validation edge cases', () => {
 991 |       // These tests require proper schema validation which happens at the handler level
 992 |       // Skipping direct method calls as they bypass validation
 993 |     });
 994 | 
 995 |     describe('Additional coverage tests', () => {
 996 |       it('should handle crawl with media extraction', async () => {
 997 |         mockCrawl.mockResolvedValue({
 998 |           success: true,
 999 |           results: [
1000 |             {
1001 |               url: 'https://example.com',
1002 |               markdown: { raw_markdown: 'Content' },
1003 |               media: {
1004 |                 images: [
1005 |                   { src: 'https://example.com/img1.jpg', alt: 'Image 1' },
1006 |                   { src: 'https://example.com/img2.jpg', alt: 'Image 2' },
1007 |                 ],
1008 |                 videos: [{ src: 'https://example.com/video.mp4', type: 'video/mp4' }],
1009 |                 audios: [],
1010 |               },
1011 |               success: true,
1012 |               status_code: 200,
1013 |             },
1014 |           ],
1015 |         });
1016 | 
1017 |         const result: ToolResult = await server.crawl({
1018 |           url: 'https://example.com',
1019 |           media_handling: { images: true, videos: true },
1020 |         });
1021 | 
1022 |         expect(result.content.length).toBeGreaterThan(0);
1023 |         expect(result.content[0].type).toBe('text');
1024 |         expect(result.content[0].text).toBe('Content');
1025 |       });
1026 | 
1027 |       it('should handle crawl with tables extraction', async () => {
1028 |         mockCrawl.mockResolvedValue({
1029 |           success: true,
1030 |           results: [
1031 |             {
1032 |               url: 'https://example.com',
1033 |               markdown: { raw_markdown: 'Content' },
1034 |               tables: [
1035 |                 {
1036 |                   headers: ['Name', 'Age'],
1037 |                   rows: [
1038 |                     ['John', '30'],
1039 |                     ['Jane', '25'],
1040 |                   ],
1041 |                   markdown: '| Name | Age |\n|------|-----|\n| John | 30 |\n| Jane | 25 |',
1042 |                 },
1043 |               ],
1044 |               success: true,
1045 |               status_code: 200,
1046 |             },
1047 |           ],
1048 |         });
1049 | 
1050 |         const result: ToolResult = await server.crawl({
1051 |           url: 'https://example.com',
1052 |         });
1053 | 
1054 |         expect(result.content.length).toBeGreaterThan(0);
1055 |         expect(result.content[0].type).toBe('text');
1056 |         expect(result.content[0].text).toBe('Content');
1057 |       });
1058 | 
1059 |       it('should handle crawl with network_requests', async () => {
1060 |         mockCrawl.mockResolvedValue({
1061 |           success: true,
1062 |           results: [
1063 |             {
1064 |               url: 'https://example.com',
1065 |               markdown: { raw_markdown: 'Content' },
1066 |               network_requests: [
1067 |                 { url: 'https://api.example.com/data', method: 'GET', status: 200 },
1068 |                 { url: 'https://api.example.com/post', method: 'POST', status: 201 },
1069 |               ],
1070 |               success: true,
1071 |               status_code: 200,
1072 |             },
1073 |           ],
1074 |         });
1075 | 
1076 |         const result: ToolResult = await server.crawl({
1077 |           url: 'https://example.com',
1078 |           network_requests: true,
1079 |         });
1080 | 
1081 |         expect(result.content.length).toBeGreaterThan(0);
1082 |         expect(result.content[0].type).toBe('text');
1083 |         expect(result.content[0].text).toBe('Content');
1084 |       });
1085 | 
1086 |       it('should handle crawl with mhtml output', async () => {
1087 |         mockCrawl.mockResolvedValue({
1088 |           success: true,
1089 |           results: [
1090 |             {
1091 |               url: 'https://example.com',
1092 |               markdown: { raw_markdown: 'Content' },
1093 |               mhtml: 'MHTML content here',
1094 |               success: true,
1095 |               status_code: 200,
1096 |             },
1097 |           ],
1098 |         });
1099 | 
1100 |         const result: ToolResult = await server.crawl({
1101 |           url: 'https://example.com',
1102 |           mhtml: true,
1103 |         });
1104 | 
1105 |         expect(result.content.length).toBeGreaterThan(0);
1106 |         expect(result.content[0].type).toBe('text');
1107 |         expect(result.content[0].text).toBe('Content');
1108 |       });
1109 | 
1110 |       it('should handle crawl with downloaded_files', async () => {
1111 |         mockCrawl.mockResolvedValue({
1112 |           success: true,
1113 |           results: [
1114 |             {
1115 |               url: 'https://example.com',
1116 |               markdown: { raw_markdown: 'Content' },
1117 |               downloaded_files: {
1118 |                 'file1.pdf': 'base64content1',
1119 |                 'file2.doc': 'base64content2',
1120 |               },
1121 |               success: true,
1122 |               status_code: 200,
1123 |             },
1124 |           ],
1125 |         });
1126 | 
1127 |         const result: ToolResult = await server.crawl({
1128 |           url: 'https://example.com',
1129 |           download_files: true,
1130 |         });
1131 | 
1132 |         expect(result.content.length).toBeGreaterThan(0);
1133 |         expect(result.content[0].type).toBe('text');
1134 |         expect(result.content[0].text).toBe('Content');
1135 |       });
1136 | 
1137 |       it('should handle crawl with ssl_certificate', async () => {
1138 |         mockCrawl.mockResolvedValue({
1139 |           success: true,
1140 |           results: [
1141 |             {
1142 |               url: 'https://example.com',
1143 |               markdown: { raw_markdown: 'Content' },
1144 |               ssl_certificate: {
1145 |                 issuer: "Let's Encrypt",
1146 |                 subject: '*.example.com',
1147 |                 validFrom: '2024-01-01',
1148 |                 validTo: '2024-12-31',
1149 |                 protocol: 'TLSv1.3',
1150 |               },
1151 |               success: true,
1152 |               status_code: 200,
1153 |             },
1154 |           ],
1155 |         });
1156 | 
1157 |         const result: ToolResult = await server.crawl({
1158 |           url: 'https://example.com',
1159 |           ssl_certificate: true,
1160 |         });
1161 | 
1162 |         expect(result.content.length).toBeGreaterThan(0);
1163 |         expect(result.content[0].type).toBe('text');
1164 |         expect(result.content[0].text).toBe('Content');
1165 |       });
1166 | 
1167 |       it('should handle crawl with wait_for conditions', async () => {
1168 |         mockCrawl.mockResolvedValue({
1169 |           success: true,
1170 |           results: [
1171 |             {
1172 |               url: 'https://example.com',
1173 |               markdown: { raw_markdown: 'Dynamic content loaded' },
1174 |               success: true,
1175 |               status_code: 200,
1176 |             },
1177 |           ],
1178 |         });
1179 | 
1180 |         await server.crawl({
1181 |           url: 'https://example.com',
1182 |           wait_for: {
1183 |             selector: '.dynamic-content',
1184 |             timeout: 5000,
1185 |           },
1186 |         });
1187 | 
1188 |         expect(mockCrawl).toHaveBeenCalledWith(
1189 |           expect.objectContaining({
1190 |             crawler_config: expect.objectContaining({
1191 |               wait_for: {
1192 |                 selector: '.dynamic-content',
1193 |                 timeout: 5000,
1194 |               },
1195 |             }),
1196 |           }),
1197 |         );
1198 |       });
1199 | 
1200 |       it('should handle crawl error scenarios', async () => {
1201 |         mockCrawl.mockResolvedValue({
1202 |           success: false,
1203 |           results: [
1204 |             {
1205 |               url: 'https://example.com',
1206 |               success: false,
1207 |               error: 'Page load timeout',
1208 |               status_code: 0,
1209 |             },
1210 |           ],
1211 |         });
1212 | 
1213 |         const result: ToolResult = await server.crawl({
1214 |           url: 'https://example.com',
1215 |         });
1216 | 
1217 |         expect(result.content[0].text).toBe('No content extracted');
1218 |       });
1219 | 
1220 |       it('should handle extract_links with categorized output', async () => {
1221 |         mockPost.mockResolvedValue({
1222 |           data: {
1223 |             results: [
1224 |               {
1225 |                 links: {
1226 |                   internal: [
1227 |                     { href: '/page1', text: 'Page 1' },
1228 |                     { href: '/page2', text: 'Page 2' },
1229 |                   ],
1230 |                   external: [{ href: 'https://external.com', text: 'External' }],
1231 |                   social: [{ href: 'https://twitter.com/example', text: 'Twitter' }],
1232 |                   documents: [{ href: '/file.pdf', text: 'PDF Document' }],
1233 |                   images: [{ href: '/image.jpg', text: 'Image' }],
1234 |                 },
1235 |               },
1236 |             ],
1237 |           },
1238 |         });
1239 | 
1240 |         const result: ToolResult = await server.extractLinks({
1241 |           url: 'https://example.com',
1242 |           categorize: true,
1243 |         });
1244 | 
1245 |         expect(result.content[0].text).toContain('internal (2)');
1246 |         expect(result.content[0].text).toContain('external (1)');
1247 |         expect(result.content[0].text).toContain('social (0)'); // No social links in internal/external
1248 |         expect(result.content[0].text).toContain('documents (0)'); // No documents in internal/external
1249 |         expect(result.content[0].text).toContain('images (0)'); // No images in internal/external
1250 |       });
1251 | 
1252 |       it('should handle smart_crawl for sitemap', async () => {
1253 |         // Set up axios client mock for the server instance
1254 |         const axiosClientMock = {
1255 |           head: jest.fn().mockResolvedValue({
1256 |             headers: { 'content-type': 'application/xml' },
1257 |           }),
1258 |           post: jest.fn().mockResolvedValue({
1259 |             data: {
1260 |               results: [
1261 |                 {
1262 |                   url: 'https://example.com/sitemap.xml',
1263 |                   markdown: { raw_markdown: 'Sitemap content' },
1264 |                   success: true,
1265 |                   status_code: 200,
1266 |                 },
1267 |               ],
1268 |             },
1269 |           }),
1270 |         };
1271 |         server.axiosClientForTesting = axiosClientMock;
1272 | 
1273 |         const result: ToolResult = await server.smartCrawl({
1274 |           url: 'https://example.com/sitemap.xml',
1275 |         });
1276 | 
1277 |         expect(result.content[0].text).toContain('Smart crawl detected content type: sitemap');
1278 |         expect(result.content[0].text).toContain('Sitemap content');
1279 |         expect(axiosClientMock.post).toHaveBeenCalledWith(
1280 |           '/crawl',
1281 |           expect.objectContaining({
1282 |             urls: ['https://example.com/sitemap.xml'],
1283 |             crawler_config: expect.objectContaining({
1284 |               cache_mode: 'ENABLED',
1285 |             }),
1286 |             browser_config: expect.objectContaining({
1287 |               headless: true,
1288 |               browser_type: 'chromium',
1289 |             }),
1290 |           }),
1291 |         );
1292 |       });
1293 | 
1294 |       it('should handle smart_crawl for RSS feed', async () => {
1295 |         const axiosClientMock = {
1296 |           head: jest.fn().mockResolvedValue({
1297 |             headers: { 'content-type': 'application/rss+xml' },
1298 |           }),
1299 |           post: jest.fn().mockResolvedValue({
1300 |             data: {
1301 |               results: [
1302 |                 {
1303 |                   url: 'https://example.com/feed.rss',
1304 |                   markdown: { raw_markdown: 'RSS feed content' },
1305 |                   success: true,
1306 |                   status_code: 200,
1307 |                 },
1308 |               ],
1309 |             },
1310 |           }),
1311 |         };
1312 |         server.axiosClientForTesting = axiosClientMock;
1313 | 
1314 |         const result: ToolResult = await server.smartCrawl({
1315 |           url: 'https://example.com/feed.rss',
1316 |         });
1317 | 
1318 |         expect(result.content[0].text).toContain('Smart crawl detected content type: rss');
1319 |         expect(result.content[0].text).toContain('RSS feed content');
1320 |         expect(axiosClientMock.post).toHaveBeenCalledWith(
1321 |           '/crawl',
1322 |           expect.objectContaining({
1323 |             urls: ['https://example.com/feed.rss'],
1324 |             crawler_config: expect.objectContaining({
1325 |               cache_mode: 'ENABLED',
1326 |             }),
1327 |             browser_config: expect.objectContaining({
1328 |               headless: true,
1329 |               browser_type: 'chromium',
1330 |             }),
1331 |           }),
1332 |         );
1333 |       });
1334 | 
1335 |       it('should handle smart_crawl for JSON content', async () => {
1336 |         const axiosClientMock = {
1337 |           head: jest.fn().mockResolvedValue({
1338 |             headers: { 'content-type': 'application/json' },
1339 |           }),
1340 |           post: jest.fn().mockResolvedValue({
1341 |             data: {
1342 |               results: [
1343 |                 {
1344 |                   url: 'https://example.com/data.json',
1345 |                   markdown: { raw_markdown: 'JSON content' },
1346 |                   success: true,
1347 |                   status_code: 200,
1348 |                 },
1349 |               ],
1350 |             },
1351 |           }),
1352 |         };
1353 |         server.axiosClientForTesting = axiosClientMock;
1354 | 
1355 |         const result: ToolResult = await server.smartCrawl({
1356 |           url: 'https://example.com/data.json',
1357 |         });
1358 | 
1359 |         expect(result.content[0].text).toContain('Smart crawl detected content type: json');
1360 |         expect(result.content[0].text).toContain('JSON content');
1361 |         expect(axiosClientMock.post).toHaveBeenCalledWith(
1362 |           '/crawl',
1363 |           expect.objectContaining({
1364 |             urls: ['https://example.com/data.json'],
1365 |             crawler_config: expect.objectContaining({
1366 |               cache_mode: 'ENABLED',
1367 |             }),
1368 |             browser_config: expect.objectContaining({
1369 |               headless: true,
1370 |               browser_type: 'chromium',
1371 |             }),
1372 |           }),
1373 |         );
1374 |       });
1375 | 
1376 |       it('should correctly categorize internal documents and images', async () => {
1377 |         mockPost.mockResolvedValue({
1378 |           data: {
1379 |             results: [
1380 |               {
1381 |                 links: {
1382 |                   internal: [
1383 |                     { href: '/page1', text: 'Page 1' },
1384 |                     { href: '/docs/manual.pdf', text: 'Manual' },
1385 |                     { href: '/images/logo.png', text: 'Logo' },
1386 |                     { href: '/assets/style.css', text: 'Styles' },
1387 |                   ],
1388 |                   external: [{ href: 'https://example.com/report.pdf', text: 'External Report' }],
1389 |                 },
1390 |               },
1391 |             ],
1392 |           },
1393 |         });
1394 | 
1395 |         const result: ToolResult = await server.extractLinks({
1396 |           url: 'https://example.com',
1397 |           categorize: true,
1398 |         });
1399 | 
1400 |         expect(result.content[0].text).toContain('internal (1)'); // Only /page1 remains as internal
1401 |         expect(result.content[0].text).toContain('external (0)'); // External PDF moved to documents
1402 |         expect(result.content[0].text).toContain('documents (2)'); // Both PDFs
1403 |         expect(result.content[0].text).toContain('images (1)'); // The PNG
1404 |         expect(result.content[0].text).toContain('scripts (1)'); // The CSS
1405 |       });
1406 | 
1407 |       it('should handle smart_crawl for plain text', async () => {
1408 |         const axiosClientMock = {
1409 |           head: jest.fn().mockResolvedValue({
1410 |             headers: { 'content-type': 'text/plain' },
1411 |           }),
1412 |           post: jest.fn().mockResolvedValue({
1413 |             data: {
1414 |               results: [
1415 |                 {
1416 |                   url: 'https://example.com/file.txt',
1417 |                   markdown: { raw_markdown: 'This is plain text content' },
1418 |                   success: true,
1419 |                   status_code: 200,
1420 |                 },
1421 |               ],
1422 |             },
1423 |           }),
1424 |         };
1425 |         server.axiosClientForTesting = axiosClientMock;
1426 | 
1427 |         const result: ToolResult = await server.smartCrawl({
1428 |           url: 'https://example.com/file.txt',
1429 |         });
1430 | 
1431 |         expect(result.content[0].text).toContain('Smart crawl detected content type: text');
1432 |         expect(result.content[0].text).toContain('This is plain text content');
1433 |         expect(axiosClientMock.post).toHaveBeenCalledWith(
1434 |           '/crawl',
1435 |           expect.objectContaining({
1436 |             urls: ['https://example.com/file.txt'],
1437 |             crawler_config: expect.objectContaining({
1438 |               cache_mode: 'ENABLED',
1439 |             }),
1440 |             browser_config: expect.objectContaining({
1441 |               headless: true,
1442 |               browser_type: 'chromium',
1443 |             }),
1444 |           }),
1445 |         );
1446 |       });
1447 |     });
1448 | 
1449 |     describe('Additional Method Tests', () => {
1450 |       it('should handle parse_sitemap', async () => {
1451 |         // Mock axios.get to return sitemap XML
1452 |         mockGet.mockResolvedValue({
1453 |           data: `<?xml version="1.0" encoding="UTF-8"?>
1454 |             <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
1455 |               <url><loc>https://example.com/page1</loc></url>
1456 |               <url><loc>https://example.com/page2</loc></url>
1457 |               <url><loc>https://example.com/page3</loc></url>
1458 |             </urlset>`,
1459 |         });
1460 | 
1461 |         const result: ToolResult = await server.parseSitemap({
1462 |           url: 'https://example.com/sitemap.xml',
1463 |         });
1464 | 
1465 |         expect(result.content[0].text).toContain('Sitemap parsed successfully');
1466 |         expect(result.content[0].text).toContain('Total URLs found: 3');
1467 |       });
1468 | 
1469 |       it('should handle parse_sitemap with filter', async () => {
1470 |         // Mock axios.get to return sitemap XML with blog URLs
1471 |         mockGet.mockResolvedValue({
1472 |           data: `<?xml version="1.0" encoding="UTF-8"?>
1473 |             <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
1474 |               <url><loc>https://example.com/page1</loc></url>
1475 |               <url><loc>https://example.com/blog/post1</loc></url>
1476 |               <url><loc>https://example.com/blog/post2</loc></url>
1477 |               <url><loc>https://example.com/page2</loc></url>
1478 |             </urlset>`,
1479 |         });
1480 | 
1481 |         const result: ToolResult = await server.parseSitemap({
1482 |           url: 'https://example.com/sitemap.xml',
1483 |           filter_pattern: '.*blog.*',
1484 |         });
1485 | 
1486 |         expect(result.content[0].text).toContain('Total URLs found: 4');
1487 |         expect(result.content[0].text).toContain('Filtered URLs: 2');
1488 |       });
1489 | 
1490 |       it('should handle crawl_recursive', async () => {
1491 |         mockCrawl.mockResolvedValue({
1492 |           success: true,
1493 |           results: [
1494 |             {
1495 |               url: 'https://example.com',
1496 |               markdown: { raw_markdown: 'Content' },
1497 |               links: { internal: [], external: [] },
1498 |               success: true,
1499 |               status_code: 200,
1500 |             },
1501 |           ],
1502 |         });
1503 | 
1504 |         const result: ToolResult = await server.crawlRecursive({
1505 |           url: 'https://example.com',
1506 |         });
1507 | 
1508 |         expect(result.content[0].text).toContain('Recursive crawl completed');
1509 |       });
1510 | 
1511 |       it('should handle parse_sitemap error', async () => {
1512 |         mockParseSitemap.mockRejectedValue(new Error('Network error'));
1513 | 
1514 |         await expect(
1515 |           server.parseSitemap({
1516 |             url: 'https://example.com/sitemap.xml',
1517 |           }),
1518 |         ).rejects.toThrow('Failed to parse sitemap');
1519 |       });
1520 | 
1521 |       it('should handle crawl with error result', async () => {
1522 |         mockCrawl.mockResolvedValue({
1523 |           success: false,
1524 |           results: [],
1525 |         });
1526 | 
1527 |         await expect(
1528 |           server.crawl({
1529 |             url: 'https://example.com',
1530 |           }),
1531 |         ).rejects.toThrow('Invalid response from server');
1532 |       });
1533 | 
1534 |       it('should handle crawl with metadata and links', async () => {
1535 |         mockCrawl.mockResolvedValue({
1536 |           success: true,
1537 |           results: [
1538 |             {
1539 |               url: 'https://example.com',
1540 |               markdown: { raw_markdown: 'Content' },
1541 |               metadata: { title: 'Test Page', description: 'Test' },
1542 |               links: { internal: ['/page1'], external: ['https://external.com'] },
1543 |               js_execution_result: { results: [42, 'test'] },
1544 |               success: true,
1545 |               status_code: 200,
1546 |             },
1547 |           ],
1548 |         });
1549 | 
1550 |         const result: ToolResult = await server.crawl({
1551 |           url: 'https://example.com',
1552 |         });
1553 | 
1554 |         expect(result.content.length).toBeGreaterThan(1);
1555 |         expect(result.content.some((c) => c.text?.includes('Metadata'))).toBe(true);
1556 |         expect(result.content.some((c) => c.text?.includes('Links'))).toBe(true);
1557 |         expect(result.content.some((c) => c.text?.includes('JavaScript Execution Results'))).toBe(true);
1558 |       });
1559 | 
1560 |       it('should handle executeJS with no scripts', async () => {
1561 |         await expect(
1562 |           server.executeJS({
1563 |             url: 'https://example.com',
1564 |             scripts: null,
1565 |           }),
1566 |         ).rejects.toThrow('scripts is required');
1567 |       });
1568 | 
1569 |       it('should handle executeJS with array of scripts', async () => {
1570 |         mockExecuteJS.mockResolvedValue({
1571 |           content: [{ type: 'text', text: 'JS executed' }],
1572 |         });
1573 | 
1574 |         const result: ToolResult = await server.executeJS({
1575 |           url: 'https://example.com',
1576 |           scripts: ['return 1', 'return 2'],
1577 |         });
1578 | 
1579 |         expect(result.content[0].text).toContain('JavaScript executed on:');
1580 |       });
1581 | 
1582 |       it('should handle batchCrawl with cache bypass', async () => {
1583 |         mockPost.mockResolvedValue({
1584 |           data: {
1585 |             results: [{ success: true }, { success: false }],
1586 |           },
1587 |         });
1588 | 
1589 |         const result: ToolResult = await server.batchCrawl({
1590 |           urls: ['https://example.com/1', 'https://example.com/2'],
1591 |           bypass_cache: true,
1592 |           remove_images: true,
1593 |         });
1594 | 
1595 |         expect(result.content[0].text).toContain('Batch crawl completed');
1596 |         expect(mockPost).toHaveBeenCalledWith(
1597 |           '/crawl',
1598 |           expect.objectContaining({
1599 |             crawler_config: expect.objectContaining({
1600 |               cache_mode: 'BYPASS',
1601 |               exclude_tags: ['img', 'picture', 'svg'],
1602 |             }),
1603 |           }),
1604 |         );
1605 |       });
1606 | 
1607 |       it('should handle smart_crawl with follow_links', async () => {
1608 |         const axiosClientMock = {
1609 |           head: jest.fn().mockResolvedValue({
1610 |             headers: { 'content-type': 'application/xml' },
1611 |           }),
1612 |           post: jest.fn().mockResolvedValue({
1613 |             data: {
1614 |               results: [
1615 |                 {
1616 |                   url: 'https://example.com/sitemap.xml',
1617 |                   markdown: { raw_markdown: '<url><loc>https://example.com/page1</loc></url>' },
1618 |                   success: true,
1619 |                   status_code: 200,
1620 |                 },
1621 |               ],
1622 |             },
1623 |           }),
1624 |         };
1625 |         server.axiosClientForTesting = axiosClientMock;
1626 | 
1627 |         const result: ToolResult = await server.smartCrawl({
1628 |           url: 'https://example.com/sitemap.xml',
1629 |           follow_links: true,
1630 |         });
1631 | 
1632 |         expect(result.content[0].text).toContain('Smart crawl detected content type: sitemap');
1633 |       });
1634 | 
1635 |       it('should handle smart_crawl with HEAD request failure', async () => {
1636 |         const axiosClientMock = {
1637 |           head: jest.fn().mockRejectedValue({ response: { status: 500 } }),
1638 |           post: jest.fn().mockResolvedValue({
1639 |             data: {
1640 |               results: [
1641 |                 {
1642 |                   url: 'https://example.com',
1643 |                   markdown: { raw_markdown: 'Content from crawl' },
1644 |                   success: true,
1645 |                   status_code: 200,
1646 |                 },
1647 |               ],
1648 |             },
1649 |           }),
1650 |         };
1651 |         server.axiosClientForTesting = axiosClientMock;
1652 | 
1653 |         const result: ToolResult = await server.smartCrawl({
1654 |           url: 'https://example.com',
1655 |         });
1656 | 
1657 |         // Should continue despite HEAD failure
1658 |         expect(result.content[0].text).toContain('Smart crawl detected content type: html');
1659 |         expect(result.content[0].text).toContain('Content from crawl');
1660 |       });
1661 | 
1662 |       it('should handle extractLinks with no links', async () => {
1663 |         mockPost.mockResolvedValue({
1664 |           data: {
1665 |             results: [
1666 |               {
1667 |                 markdown: 'Content without links',
1668 |               },
1669 |             ],
1670 |           },
1671 |         });
1672 | 
1673 |         const result: ToolResult = await server.extractLinks({
1674 |           url: 'https://example.com',
1675 |           categorize: false,
1676 |         });
1677 | 
1678 |         expect(result.content[0].text).toContain('All links from');
1679 |       });
1680 | 
1681 |       it('should handle extractLinks with manually extracted links', async () => {
1682 |         mockPost.mockResolvedValue({
1683 |           data: {
1684 |             results: [
1685 |               {
1686 |                 markdown: 'Check out <a href="/page1">Page 1</a>',
1687 |               },
1688 |             ],
1689 |           },
1690 |         });
1691 | 
1692 |         const result: ToolResult = await server.extractLinks({
1693 |           url: 'https://example.com',
1694 |         });
1695 | 
1696 |         expect(result.content[0].text).toContain('All links from');
1697 |       });
1698 | 
1699 |       it('should handle MCP request handler for all tools', async () => {
1700 |         // Request handler should be available from beforeEach
1701 |         expect(requestHandler).toBeDefined();
1702 | 
1703 |         // Test various tools through the request handler
1704 |         const tools = [
1705 |           { name: 'get_markdown', args: { url: 'https://example.com' } },
1706 |           { name: 'capture_screenshot', args: { url: 'https://example.com' } },
1707 |           { name: 'generate_pdf', args: { url: 'https://example.com' } },
1708 |           { name: 'execute_js', args: { url: 'https://example.com', scripts: 'return 1' } },
1709 |           { name: 'batch_crawl', args: { urls: ['https://example.com'] } },
1710 |           { name: 'smart_crawl', args: { url: 'https://example.com' } },
1711 |           { name: 'get_html', args: { url: 'https://example.com' } },
1712 |           { name: 'extract_links', args: { url: 'https://example.com' } },
1713 |           { name: 'crawl_recursive', args: { url: 'https://example.com' } },
1714 |           { name: 'parse_sitemap', args: { url: 'https://example.com/sitemap.xml' } },
1715 |           { name: 'crawl', args: { url: 'https://example.com' } },
1716 |           { name: 'manage_session', args: { action: 'create' } },
1717 |           { name: 'manage_session', args: { action: 'clear', session_id: 'test' } },
1718 |           { name: 'manage_session', args: { action: 'list' } },
1719 |           { name: 'extract_with_llm', args: { url: 'https://example.com', prompt: 'test' } },
1720 |         ];
1721 | 
1722 |         // Mock all service methods to return success
1723 |         mockGetMarkdown.mockResolvedValue({ content: [{ type: 'text', text: 'markdown' }] });
1724 |         mockCaptureScreenshot.mockResolvedValue({ content: [{ type: 'text', text: 'screenshot' }] });
1725 |         mockGeneratePDF.mockResolvedValue({ content: [{ type: 'text', text: 'pdf' }] });
1726 |         mockExecuteJS.mockResolvedValue({ content: [{ type: 'text', text: 'js' }] });
1727 |         mockBatchCrawl.mockResolvedValue({ content: [{ type: 'text', text: 'batch' }] });
1728 |         mockGetHTML.mockResolvedValue({ content: [{ type: 'text', text: 'html' }] });
1729 |         mockExtractWithLLM.mockResolvedValue({ content: [{ type: 'text', text: 'llm' }] });
1730 |         mockCrawl.mockResolvedValue({
1731 |           success: true,
1732 |           results: [
1733 |             {
1734 |               url: 'https://example.com',
1735 |               markdown: { raw_markdown: 'content' },
1736 |               success: true,
1737 |               status_code: 200,
1738 |             },
1739 |           ],
1740 |         });
1741 |         mockPost.mockResolvedValue({
1742 |           data: {
1743 |             results: [
1744 |               {
1745 |                 links: { internal: [], external: [] },
1746 |               },
1747 |             ],
1748 |           },
1749 |         });
1750 | 
1751 |         mockParseSitemap.mockResolvedValue(['https://example.com/page1']);
1752 | 
1753 |         // Test each tool
1754 |         for (const tool of tools) {
1755 |           const result = await requestHandler({
1756 |             method: 'tools/call',
1757 |             params: {
1758 |               name: tool.name,
1759 |               arguments: tool.args,
1760 |             },
1761 |           });
1762 |           expect(result).toBeDefined();
1763 |           expect(result.content).toBeDefined();
1764 |         }
1765 | 
1766 |         // Test unknown tool
1767 |         const unknownResult = await requestHandler({
1768 |           method: 'tools/call',
1769 |           params: {
1770 |             name: 'unknown_tool',
1771 |             arguments: {},
1772 |           },
1773 |         });
1774 |         expect(unknownResult.content[0].text).toContain('Error: Unknown tool');
1775 | 
1776 |         // The handler only handles tools/call requests,
1777 |         // so we don't test other methods here
1778 |       });
1779 | 
1780 |       it('should handle MCP request handler validation errors', async () => {
1781 |         expect(requestHandler).toBeDefined();
1782 | 
1783 |         // Test validation errors for various tools
1784 |         const invalidRequests = [
1785 |           { name: 'get_markdown', args: {} }, // missing url
1786 |           { name: 'capture_screenshot', args: {} }, // missing url
1787 |           { name: 'generate_pdf', args: {} }, // missing url
1788 |           { name: 'execute_js', args: { url: 'https://example.com' } }, // missing scripts
1789 |           { name: 'batch_crawl', args: {} }, // missing urls
1790 |           { name: 'smart_crawl', args: {} }, // missing url
1791 |           { name: 'get_html', args: {} }, // missing url
1792 |           { name: 'extract_links', args: {} }, // missing url
1793 |           { name: 'crawl_recursive', args: {} }, // missing url
1794 |           { name: 'parse_sitemap', args: {} }, // missing url
1795 |           { name: 'crawl', args: {} }, // missing url
1796 |           { name: 'manage_session', args: {} }, // missing action
1797 |           { name: 'manage_session', args: { action: 'clear' } }, // missing session_id for clear
1798 |           { name: 'manage_session', args: { action: 'invalid' } }, // invalid action
1799 |           { name: 'extract_with_llm', args: { url: 'https://example.com' } }, // missing prompt
1800 |         ];
1801 | 
1802 |         for (const req of invalidRequests) {
1803 |           const result = await requestHandler({
1804 |             method: 'tools/call',
1805 |             params: {
1806 |               name: req.name,
1807 |               arguments: req.args,
1808 |             },
1809 |           });
1810 |           expect(result.content[0].text).toContain(`Error: Invalid parameters for ${req.name}`);
1811 |         }
1812 |       });
1813 | 
1814 |       it('should handle crawl with all output types', async () => {
1815 |         mockCrawl.mockResolvedValue({
1816 |           success: true,
1817 |           results: [
1818 |             {
1819 |               url: 'https://example.com',
1820 |               extracted_content: { data: 'extracted' },
1821 |               screenshot: 'base64screenshot',
1822 |               pdf: 'base64pdf',
1823 |               success: true,
1824 |               status_code: 200,
1825 |             },
1826 |           ],
1827 |         });
1828 | 
1829 |         const result: ToolResult = await server.crawl({
1830 |           url: 'https://example.com',
1831 |           screenshot: true,
1832 |           pdf: true,
1833 |         });
1834 | 
1835 |         expect(result.content.some((c) => c.type === 'text')).toBe(true);
1836 |         expect(result.content.some((c) => c.type === 'image')).toBe(true);
1837 |         expect(result.content.some((c) => c.type === 'resource' && c.resource?.mimeType === 'application/pdf')).toBe(
1838 |           true,
1839 |         );
1840 |       });
1841 |     });
1842 | 
1843 |     describe('MCP Protocol Handler Tests', () => {
1844 |       it('should handle tools/list request', async () => {
1845 |         // Find the tools/list handler
1846 |         const toolsListHandler = mockSetRequestHandler.mock.calls.find(
1847 |           (call) => (call[0] as any).method === 'tools/list',
1848 |         )?.[1];
1849 | 
1850 |         expect(toolsListHandler).toBeDefined();
1851 | 
1852 |         const result = await (toolsListHandler as any)({ method: 'tools/list', params: {} }); // eslint-disable-line @typescript-eslint/no-explicit-any
1853 |         expect(result).toBeDefined();
1854 |         expect(result.tools).toBeDefined();
1855 |         expect(result.tools.length).toBe(13); // Should have 13 tools
1856 |       });
1857 | 
1858 |       it('should handle get_markdown query functionality', async () => {
1859 |         mockGetMarkdown.mockResolvedValue({
1860 |           url: 'https://example.com',
1861 |           filter: 'fit',
1862 |           query: 'What products are listed?',
1863 |           cache: 'false',
1864 |           markdown: 'Page content about products',
1865 |           success: true,
1866 |         });
1867 | 
1868 |         const result: ToolResult = await server.getMarkdown({
1869 |           url: 'https://example.com',
1870 |           query: 'What products are listed?',
1871 |         });
1872 | 
1873 |         expect(result.content[0].text).toContain('Query: What products are listed?');
1874 |         expect(result.content[0].text).toContain('Page content about products');
1875 |       });
1876 |     });
1877 |   });
1878 | });
1879 | 
```