#
tokens: 23918/50000 2/58 files (page 3/3)
lines: off (toggle) GitHub
raw markdown copy
This is page 3 of 3. Use http://codebase.md/omgwtfwow/mcp-crawl4ai-ts?lines=false&page={x} to view the full context.

# Directory Structure

```
├── .env.example
├── .github
│   ├── CI.md
│   ├── copilot-instructions.md
│   └── workflows
│       └── ci.yml
├── .gitignore
├── .prettierignore
├── .prettierrc.json
├── CHANGELOG.md
├── eslint.config.mjs
├── jest.config.cjs
├── jest.setup.cjs
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│   ├── __tests__
│   │   ├── crawl.test.ts
│   │   ├── crawl4ai-service.network.test.ts
│   │   ├── crawl4ai-service.test.ts
│   │   ├── handlers
│   │   │   ├── crawl-handlers.test.ts
│   │   │   ├── parameter-combinations.test.ts
│   │   │   ├── screenshot-saving.test.ts
│   │   │   ├── session-handlers.test.ts
│   │   │   └── utility-handlers.test.ts
│   │   ├── index.cli.test.ts
│   │   ├── index.npx.test.ts
│   │   ├── index.server.test.ts
│   │   ├── index.test.ts
│   │   ├── integration
│   │   │   ├── batch-crawl.integration.test.ts
│   │   │   ├── capture-screenshot.integration.test.ts
│   │   │   ├── crawl-advanced.integration.test.ts
│   │   │   ├── crawl-handlers.integration.test.ts
│   │   │   ├── crawl-recursive.integration.test.ts
│   │   │   ├── crawl.integration.test.ts
│   │   │   ├── execute-js.integration.test.ts
│   │   │   ├── extract-links.integration.test.ts
│   │   │   ├── extract-with-llm.integration.test.ts
│   │   │   ├── generate-pdf.integration.test.ts
│   │   │   ├── get-html.integration.test.ts
│   │   │   ├── get-markdown.integration.test.ts
│   │   │   ├── parse-sitemap.integration.test.ts
│   │   │   ├── session-management.integration.test.ts
│   │   │   ├── smart-crawl.integration.test.ts
│   │   │   └── test-utils.ts
│   │   ├── request-handler.test.ts
│   │   ├── schemas
│   │   │   └── validation-edge-cases.test.ts
│   │   ├── types
│   │   │   └── mocks.ts
│   │   └── utils
│   │       └── javascript-validation.test.ts
│   ├── crawl4ai-service.ts
│   ├── handlers
│   │   ├── base-handler.ts
│   │   ├── content-handlers.ts
│   │   ├── crawl-handlers.ts
│   │   ├── session-handlers.ts
│   │   └── utility-handlers.ts
│   ├── index.ts
│   ├── schemas
│   │   ├── helpers.ts
│   │   └── validation-schemas.ts
│   ├── server.ts
│   └── types.ts
├── tsconfig.build.json
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/src/__tests__/integration/crawl.integration.test.ts:
--------------------------------------------------------------------------------

```typescript
/* eslint-env jest */
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import {
  createTestClient,
  cleanupTestClient,
  generateSessionId,
  expectSuccessfulCrawl,
  expectScreenshot,
  delay,
  TEST_TIMEOUTS,
} from './test-utils.js';

interface ToolResult {
  content: Array<{
    type: string;
    text?: string;
    data?: string;
    mimeType?: string;
  }>;
}

describe('crawl Integration Tests', () => {
  let client: Client;

  beforeAll(async () => {
    client = await createTestClient();
  }, TEST_TIMEOUTS.medium);

  afterAll(async () => {
    if (client) {
      await cleanupTestClient(client);
    }
  });

  describe('Basic Crawling', () => {
    it(
      'should crawl a simple page with basic configuration',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/html',
            cache_mode: 'BYPASS',
            word_count_threshold: 50,
          },
        });

        await expectSuccessfulCrawl(result);
      },
      TEST_TIMEOUTS.short,
    );

    it(
      'should handle invalid URL gracefully',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'not-a-valid-url',
            cache_mode: 'BYPASS',
          },
        });

        const content = (result as ToolResult).content;
        expect(content).toHaveLength(1);
        expect(content[0].type).toBe('text');
        expect(content[0].text).toContain('Error');
        // Our Zod validation catches this before it reaches the API
        expect(content[0].text).toContain('Invalid parameters for crawl');
        expect(content[0].text).toContain('Invalid url');
      },
      TEST_TIMEOUTS.short,
    );

    it(
      'should handle non-existent domain gracefully',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://this-domain-definitely-does-not-exist-12345.com',
            cache_mode: 'BYPASS',
          },
        });

        const content = (result as ToolResult).content;
        expect(content).toHaveLength(1);
        expect(content[0].type).toBe('text');
        expect(content[0].text).toContain('Error');
        // Could be DNS error, connection error, or "Internal Server Error"
        expect(content[0].text).toMatch(/Failed to crawl|Internal Server Error|DNS|connection/i);
      },
      TEST_TIMEOUTS.medium,
    );

    it(
      'should handle browser configuration',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/user-agent',
            viewport_width: 1920,
            viewport_height: 1080,
            user_agent: 'MCP Integration Test Bot',
            cache_mode: 'DISABLED',
          },
        });

        await expectSuccessfulCrawl(result);
      },
      TEST_TIMEOUTS.short,
    );
  });

  describe('Dynamic Content Tests', () => {
    it(
      'should execute JavaScript on page',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/html',
            js_code: ['return document.querySelectorAll("a").length', 'return document.title'],
            wait_after_js: 1000,
            cache_mode: 'BYPASS',
            word_count_threshold: 10,
          },
        });

        await expectSuccessfulCrawl(result);
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        expect(textContent?.text).toBeTruthy();
        // httpbin.org/html contains links and a title
        expect(textContent?.text?.toLowerCase()).toMatch(/herman|melville|moby/); // Content from the page
      },
      TEST_TIMEOUTS.medium,
    );

    it(
      'should wait for specific elements',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/delay/2',
            wait_for: 'body',
            wait_for_timeout: 5000,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
      },
      TEST_TIMEOUTS.medium,
    );

    it(
      'should handle virtual scrolling for infinite feeds',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://github.com/trending',
            virtual_scroll_config: {
              container_selector: '.Box-row',
              scroll_count: 3,
              scroll_by: 'container_height',
              wait_after_scroll: 1.0,
            },
            cache_mode: 'BYPASS',
            wait_for: '.Box-row',
            word_count_threshold: 50,
          },
        });

        await expectSuccessfulCrawl(result);
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        // Should have captured multiple trending repos after scrolling
        expect(textContent?.text).toBeTruthy();
        expect(textContent?.text?.length).toBeGreaterThan(1000);
      },
      TEST_TIMEOUTS.long,
    );
  });

  describe('Session Management Tests', () => {
    it(
      'should create and use a session',
      async () => {
        const sessionId = generateSessionId();

        // First crawl with session
        const result1 = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://github.com',
            session_id: sessionId,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result1);

        // Second crawl reusing session
        const result2 = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://github.com/features',
            session_id: sessionId,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result2);
      },
      TEST_TIMEOUTS.medium,
    );

    it(
      'should handle cookies in session',
      async () => {
        const sessionId = generateSessionId();

        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://github.com',
            session_id: sessionId,
            cookies: [
              {
                name: 'test_cookie',
                value: 'test_value',
                domain: '.github.com',
                path: '/',
              },
            ],
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
      },
      TEST_TIMEOUTS.medium,
    );
  });

  describe('Content Extraction Tests', () => {
    it.skip(
      'should extract content using CSS selectors - SKIPPED: Not supported via REST API',
      async () => {
        // CSS extraction is not supported via the REST API due to Python class serialization limitations
        // This test is kept for documentation purposes but skipped
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://www.google.com',
            extraction_type: 'css',
            css_selectors: {
              title: 'title',
              search_button: 'input[type="submit"]',
              logo: 'img[alt*="Google"]',
            },
            cache_mode: 'BYPASS',
            word_count_threshold: 10,
          },
        });

        await expectSuccessfulCrawl(result);
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        expect(textContent?.text).toBeTruthy();
      },
      TEST_TIMEOUTS.short,
    );

    it(
      'should extract content using LLM via extract_with_llm tool',
      async () => {
        // Note: This test requires the Crawl4AI server to have an LLM provider configured
        try {
          const result = await client.callTool({
            name: 'extract_with_llm',
            arguments: {
              url: 'https://httpbin.org/html',
              query: 'Extract the main page title and any author names mentioned',
            },
          });

          expect(result).toBeTruthy();
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();

          // The response should be JSON with an "answer" field
          try {
            const parsed = JSON.parse(textContent?.text || '{}');
            expect(parsed).toHaveProperty('answer');
            expect(typeof parsed.answer).toBe('string');
            expect(parsed.answer.length).toBeGreaterThan(0);
          } catch {
            // If parsing fails, at least check we got text
            expect(textContent?.text?.length || 0).toBeGreaterThan(0);
          }
        } catch (error) {
          // If the server doesn't have LLM configured, it will return an error
          if (error instanceof Error && error.message?.includes('No LLM provider configured')) {
            console.log('⚠️  LLM extraction test skipped: Server needs LLM provider configured');
            return;
          }
          throw error;
        }
      },
      TEST_TIMEOUTS.long,
    );
  });

  describe('Media Handling Tests', () => {
    it(
      'should capture screenshots',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/html',
            screenshot: true,
            screenshot_wait_for: 1.0,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
        await expectScreenshot(result);
      },
      TEST_TIMEOUTS.medium,
    );

    it(
      'should generate PDF',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/html',
            pdf: true,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
        // PDF generation should return some content
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        expect(textContent?.text).toBeTruthy();
        // Should contain some content from the page
        expect(textContent?.text?.toLowerCase()).toContain('herman');
      },
      TEST_TIMEOUTS.medium,
    );

    it(
      'should handle image filtering',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://github.com',
            exclude_external_images: true,
            image_description_min_word_threshold: 20,
            image_score_threshold: 5,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
      },
      TEST_TIMEOUTS.medium,
    );
  });

  describe('Performance & Caching Tests', () => {
    it(
      'should respect cache modes',
      async () => {
        const url = 'https://httpbin.org/html'; // Use a simpler page for cache testing

        // First request - populate cache with ENABLED mode
        const result1 = await client.callTool({
          name: 'crawl',
          arguments: {
            url,
            cache_mode: 'ENABLED',
            word_count_threshold: 10,
          },
        });
        await expectSuccessfulCrawl(result1);
        const content1 = (result1 as ToolResult).content.find((c) => c.type === 'text')?.text;

        // Wait a bit to ensure cache is saved
        await delay(500);

        // Second request - should use cache (ENABLED mode)
        const startTime = Date.now();
        const result2 = await client.callTool({
          name: 'crawl',
          arguments: {
            url,
            cache_mode: 'ENABLED',
            word_count_threshold: 10,
          },
        });
        const cacheTime = Date.now() - startTime;
        await expectSuccessfulCrawl(result2);
        const content2 = (result2 as ToolResult).content.find((c) => c.type === 'text')?.text;

        // Content should be identical if cache was used
        expect(content2).toBe(content1);

        // Third request - bypass cache
        const bypassStartTime = Date.now();
        const result3 = await client.callTool({
          name: 'crawl',
          arguments: {
            url,
            cache_mode: 'BYPASS',
            word_count_threshold: 10,
          },
        });
        const bypassTime = Date.now() - bypassStartTime;
        await expectSuccessfulCrawl(result3);

        // Cache hit should typically be faster, but we'll make this test more lenient
        // Just verify all requests succeeded
        expect(cacheTime).toBeGreaterThan(0);
        expect(bypassTime).toBeGreaterThan(0);

        // Fourth request - DISABLED mode should not use cache
        const result4 = await client.callTool({
          name: 'crawl',
          arguments: {
            url,
            cache_mode: 'DISABLED',
            word_count_threshold: 10,
          },
        });
        await expectSuccessfulCrawl(result4);
      },
      TEST_TIMEOUTS.long,
    );

    it(
      'should handle timeout configuration',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/delay/1',
            timeout: 20000,
            page_timeout: 15000,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
      },
      TEST_TIMEOUTS.short,
    );
  });

  describe('Content Filtering Tests', () => {
    it(
      'should filter content by tags',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/html', // Simpler page for testing
            excluded_tags: ['script', 'style', 'nav', 'footer', 'header'],
            word_count_threshold: 10,
            cache_mode: 'BYPASS',
            only_text: true, // Force text-only output
            remove_overlay_elements: true,
          },
        });

        await expectSuccessfulCrawl(result);
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        expect(textContent?.text).toBeTruthy();

        // Just verify we got content back - the server's filtering behavior may vary
        // With all the filters applied, content might be minimal
        expect(textContent?.text?.length).toBeGreaterThan(10);

        // Should contain some text from the page
        expect(textContent?.text).toBeTruthy();
      },
      TEST_TIMEOUTS.short,
    );

    it(
      'should filter content by selectors',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://github.com',
            excluded_selector: '.footer, .header-nav, [aria-label="Advertisement"]',
            remove_overlay_elements: true,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
      },
      TEST_TIMEOUTS.medium,
    );

    it(
      'should handle link filtering',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://github.com',
            exclude_external_links: true,
            exclude_social_media_links: true,
            exclude_domains: ['twitter.com', 'facebook.com', 'linkedin.com'],
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        // Should not contain social media domains
        expect(textContent?.text).not.toMatch(/twitter\.com|facebook\.com/);
      },
      TEST_TIMEOUTS.medium,
    );
  });

  describe('Bot Detection Avoidance Tests', () => {
    it(
      'should simulate user behavior',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://github.com',
            simulate_user: true,
            override_navigator: true,
            magic: true,
            delay_before_scroll: 1000,
            scroll_delay: 500,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
      },
      TEST_TIMEOUTS.medium,
    );

    it(
      'should use custom headers and user agent',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/headers',
            user_agent: 'Mozilla/5.0 (compatible; MCP Test Bot)',
            headers: {
              'Accept-Language': 'en-US,en;q=0.9',
              'Accept-Encoding': 'gzip, deflate, br',
              'X-Custom-Header': 'MCP-Test',
            },
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        // httpbin returns headers in response
        expect(textContent?.text).toContain('MCP Test Bot');
        expect(textContent?.text).toContain('X-Custom-Header');
      },
      TEST_TIMEOUTS.medium,
    );
  });

  describe('Error Handling Tests', () => {
    it(
      'should handle invalid URLs gracefully',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'not-a-valid-url',
            cache_mode: 'BYPASS',
          },
        });

        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        expect(textContent?.text).toContain('Error');
      },
      TEST_TIMEOUTS.short,
    );

    it(
      'should handle non-existent domains',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://this-domain-definitely-does-not-exist-123456789.com',
            cache_mode: 'BYPASS',
          },
        });

        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        expect(textContent?.text?.toLowerCase()).toMatch(/error|failed/);
      },
      TEST_TIMEOUTS.short,
    );

    it(
      'should handle JavaScript errors gracefully',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/html',
            js_code: 'throw new Error("Test error")',
            cache_mode: 'BYPASS',
          },
        });

        // Should still return content even if JS fails
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        expect(textContent).toBeDefined();
      },
      TEST_TIMEOUTS.short,
    );
  });

  describe('Advanced Configurations', () => {
    it(
      'should handle complex multi-feature crawl',
      async () => {
        const sessionId = generateSessionId();

        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/html',
            // Browser config
            viewport_width: 1920,
            viewport_height: 1080,
            user_agent: 'MCP Advanced Test Bot',
            // Session
            session_id: sessionId,
            // JavaScript
            js_code: 'return document.querySelectorAll("h1").length',
            wait_after_js: 1000,
            // Content filtering
            excluded_tags: ['script', 'style'],
            word_count_threshold: 50,
            remove_overlay_elements: true,
            // Media
            screenshot: true,
            screenshot_wait_for: 1.0,
            // Performance
            cache_mode: 'DISABLED',
            timeout: 60000,
            // Bot detection
            simulate_user: true,
            override_navigator: true,
          },
        });

        await expectSuccessfulCrawl(result);
        // Screenshot might not always be returned in complex multi-feature crawls
        // especially with httpbin.org which is a simple HTML page
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        expect(textContent).toBeDefined();
      },
      TEST_TIMEOUTS.long,
    );

    it(
      'should handle proxy configuration',
      async () => {
        // Test that proxy configuration is accepted, even without a real proxy
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://httpbin.org/ip',
            proxy_server: 'http://example-proxy.com:8080',
            proxy_username: 'testuser',
            proxy_password: 'testpass',
            cache_mode: 'BYPASS',
            word_count_threshold: 10,
          },
        });

        // The request should complete (even if proxy doesn't exist, the config should be accepted)
        expect(result).toBeDefined();
        const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
        expect(textContent).toBeDefined();
      },
      TEST_TIMEOUTS.medium,
    );

    it(
      'should process iframes',
      async () => {
        const result = await client.callTool({
          name: 'crawl',
          arguments: {
            url: 'https://www.w3schools.com/html/html_iframe.asp',
            process_iframes: true,
            cache_mode: 'BYPASS',
          },
        });

        await expectSuccessfulCrawl(result);
      },
      TEST_TIMEOUTS.medium,
    );
  });

  describe('Browser Configuration Tests', () => {
    describe('Cookie handling', () => {
      it(
        'should set and send cookies correctly',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/cookies',
              cookies: [
                {
                  name: 'test_cookie',
                  value: 'test_value',
                  domain: '.httpbin.org',
                  path: '/',
                },
              ],
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // httpbin returns cookies as JSON in the response
          expect(textContent?.text).toContain('test_cookie');
          expect(textContent?.text).toContain('test_value');
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should handle multiple cookies',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/cookies',
              cookies: [
                {
                  name: 'session_id',
                  value: 'abc123',
                  domain: '.httpbin.org',
                  path: '/',
                },
                {
                  name: 'user_pref',
                  value: 'dark_mode',
                  domain: '.httpbin.org',
                  path: '/',
                },
              ],
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // Verify both cookies are present
          expect(textContent?.text).toContain('session_id');
          expect(textContent?.text).toContain('abc123');
          expect(textContent?.text).toContain('user_pref');
          expect(textContent?.text).toContain('dark_mode');
        },
        TEST_TIMEOUTS.short,
      );
    });

    describe('Custom headers', () => {
      it(
        'should send custom headers',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/headers',
              headers: {
                'X-Custom-Header': 'test-value',
                'X-Request-ID': '12345',
                'Accept-Language': 'en-US,en;q=0.9',
              },
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // httpbin returns headers in the response
          expect(textContent?.text).toContain('X-Custom-Header');
          expect(textContent?.text).toContain('test-value');
          // Note: Some headers may be filtered by the browser
          // Just verify our custom header got through
        },
        TEST_TIMEOUTS.short,
      );
    });

    describe('User-Agent configuration', () => {
      it(
        'should set custom user agent',
        async () => {
          const customUserAgent = 'MCP-Crawl4AI-Test/1.0 (Integration Tests)';
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/user-agent',
              user_agent: customUserAgent,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // httpbin returns the user-agent in the response
          expect(textContent?.text).toContain(customUserAgent);
        },
        TEST_TIMEOUTS.short,
      );
    });

    describe('Viewport sizes and screenshots', () => {
      it(
        'should capture screenshot at mobile size (375x667)',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              viewport_width: 375,
              viewport_height: 667,
              screenshot: true,
              screenshot_wait_for: 1,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          await expectScreenshot(result);

          // Check screenshot was captured
          const imageContent = (result as ToolResult).content.find((c) => c.type === 'image');
          expect(imageContent).toBeDefined();
          expect(imageContent?.data).toBeTruthy();

          // Verify reasonable data size for mobile screenshot
          const dataLength = imageContent?.data?.length || 0;
          expect(dataLength).toBeGreaterThan(10000); // At least 10KB
          expect(dataLength).toBeLessThan(3000000); // Less than 3MB for mobile (base64 encoded)
        },
        TEST_TIMEOUTS.medium,
      );

      it(
        'should capture screenshot at tablet size (768x1024)',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              viewport_width: 768,
              viewport_height: 1024,
              screenshot: true,
              screenshot_wait_for: 1,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          await expectScreenshot(result);

          // Check screenshot was captured
          const imageContent = (result as ToolResult).content.find((c) => c.type === 'image');
          expect(imageContent).toBeDefined();
          expect(imageContent?.data).toBeTruthy();

          // Verify reasonable data size for tablet screenshot
          const dataLength = imageContent?.data?.length || 0;
          expect(dataLength).toBeGreaterThan(15000); // At least 15KB
          expect(dataLength).toBeLessThan(3000000); // Less than 3MB for tablet (base64 encoded)
        },
        TEST_TIMEOUTS.medium,
      );

      it(
        'should capture screenshot at HD size (1280x720)',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              viewport_width: 1280,
              viewport_height: 720,
              screenshot: true,
              screenshot_wait_for: 1,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          await expectScreenshot(result);

          // Check screenshot was captured
          const imageContent = (result as ToolResult).content.find((c) => c.type === 'image');
          expect(imageContent).toBeDefined();
          expect(imageContent?.data).toBeTruthy();

          // Verify reasonable data size for HD screenshot
          const dataLength = imageContent?.data?.length || 0;
          expect(dataLength).toBeGreaterThan(20000); // At least 20KB
          expect(dataLength).toBeLessThan(3000000); // Less than 3MB for HD (base64 encoded)
        },
        TEST_TIMEOUTS.medium,
      );

      it(
        'should fail gracefully for very large viewport (1920x1080)',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              viewport_width: 1920,
              viewport_height: 1080,
              screenshot: true,
              screenshot_wait_for: 1,
              cache_mode: 'BYPASS',
            },
          });

          // This should either timeout or return an error based on testing
          // We expect either an error or no screenshot data
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          const imageContent = (result as ToolResult).content.find((c) => c.type === 'image');

          // If we got text but no image, that's expected for large viewports
          if (textContent && !imageContent) {
            expect(textContent).toBeDefined();
          } else if (textContent?.text?.includes('Error') || textContent?.text?.includes('timeout')) {
            // Expected error for large viewport
            expect(textContent.text).toMatch(/Error|timeout/i);
          }
        },
        TEST_TIMEOUTS.long,
      );
    });

    describe('Combined browser configurations', () => {
      it(
        'should handle cookies, headers, and custom viewport together',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/anything',
              viewport_width: 768,
              viewport_height: 1024,
              user_agent: 'MCP-Test-Bot/2.0',
              cookies: [
                {
                  name: 'auth_token',
                  value: 'secret123',
                  domain: '.httpbin.org',
                  path: '/',
                },
              ],
              headers: {
                'X-Test-Header': 'combined-test',
              },
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();

          // httpbin/anything endpoint returns all request data
          // Verify all configurations were applied
          expect(textContent?.text).toContain('MCP-Test-Bot/2.0');
          expect(textContent?.text).toContain('auth_token');
          expect(textContent?.text).toContain('X-Test-Header');
          expect(textContent?.text).toContain('combined-test');
        },
        TEST_TIMEOUTS.medium,
      );
    });
  });

  describe('Crawler Configuration Advanced Tests', () => {
    describe('Content filtering parameters', () => {
      it(
        'should remove forms when remove_forms is true',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/forms/post',
              remove_forms: true,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // Forms should be removed, so no form-related text should appear
          expect(textContent?.text).not.toContain('<form');
          expect(textContent?.text).not.toContain('type="submit"');
          expect(textContent?.text).not.toContain('input type=');
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should keep forms when remove_forms is false',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/forms/post',
              remove_forms: false,
              cache_mode: 'BYPASS',
              word_count_threshold: 10,
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // Forms should be present - check for form-related keywords
          const text = textContent?.text?.toLowerCase() || '';
          // httpbin forms page should have form elements
          expect(text.length).toBeGreaterThan(100);
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should preserve data attributes when keep_data_attributes is true',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://getbootstrap.com/docs/4.0/components/alerts/',
              keep_data_attributes: true,
              cache_mode: 'BYPASS',
              word_count_threshold: 10,
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // Should contain alert content
          expect(textContent?.text).toContain('alert');
        },
        TEST_TIMEOUTS.medium,
      );
    });

    describe('JavaScript execution parameters', () => {
      it(
        'should return only JS results when js_only is true',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              js_code: ['return document.title', 'return document.querySelectorAll("p").length'],
              js_only: true,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();

          // Should contain JS execution results but not the full HTML content
          // The result should be much shorter than full page content
          expect(textContent?.text?.length).toBeLessThan(1000);
          // Should not contain the full Moby Dick text from the page
          expect(textContent?.text).not.toContain('Herman Melville');
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should handle js_only with session_id',
        async () => {
          const sessionId = generateSessionId();
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              session_id: sessionId,
              js_code: 'return window.location.href',
              js_only: true,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
        },
        TEST_TIMEOUTS.short,
      );
    });

    describe('Page visibility parameters', () => {
      it(
        'should extract content when body is hidden and ignore_body_visibility is true',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              js_code: 'document.body.style.visibility = "hidden"; return "body hidden"',
              ignore_body_visibility: true,
              cache_mode: 'BYPASS',
              word_count_threshold: 10,
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // Should still extract content despite hidden body
          expect(textContent?.text).toContain('Herman Melville');
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should respect body visibility when ignore_body_visibility is false',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              js_code: 'document.body.style.visibility = "hidden"; return "body hidden"',
              ignore_body_visibility: false,
              cache_mode: 'BYPASS',
              word_count_threshold: 10,
            },
          });

          await expectSuccessfulCrawl(result);
          // Content extraction behavior may vary when body is hidden
        },
        TEST_TIMEOUTS.short,
      );
    });

    describe('Debug and logging parameters', () => {
      it(
        'should capture console logs when log_console is true',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              js_code: [
                'console.log("Test log message 1")',
                'console.warn("Test warning")',
                'console.error("Test error")',
                'return "logs executed"',
              ],
              log_console: true,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          // Note: Console logs may be captured in a separate field or in verbose output
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should provide verbose output when verbose is true',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              verbose: true,
              cache_mode: 'BYPASS',
              word_count_threshold: 50,
            },
          });

          await expectSuccessfulCrawl(result);
          // Verbose output may include additional debugging information
        },
        TEST_TIMEOUTS.short,
      );
    });

    describe('Media filtering parameters', () => {
      it(
        'should exclude external images when exclude_external_images is true',
        async () => {
          // First, let's create a page with external images via JS
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              js_code: `
                const img1 = document.createElement('img');
                img1.src = 'https://httpbin.org/image/png';
                img1.alt = 'External PNG';
                document.body.appendChild(img1);
                
                const img2 = document.createElement('img');
                img2.src = '/local-image.png';
                img2.alt = 'Local image';
                document.body.appendChild(img2);
                
                return document.images.length;
              `,
              exclude_external_images: true,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // The external image references should be filtered out
        },
        TEST_TIMEOUTS.medium,
      );

      it(
        'should include external images when exclude_external_images is false',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              exclude_external_images: false,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
        },
        TEST_TIMEOUTS.short,
      );
    });

    describe('Combined crawler configuration tests', () => {
      it(
        'should handle multiple filtering options together',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/forms/post',
              remove_forms: true,
              exclude_external_links: true,
              exclude_external_images: true,
              only_text: true,
              word_count_threshold: 10,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // Should have filtered content
          expect(textContent?.text).not.toContain('<form');
          expect(textContent?.text).not.toContain('type="submit"');
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should handle debug options with content extraction',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/html',
              verbose: true,
              log_console: true,
              js_code: 'console.log("Debug test"); return document.title',
              keep_data_attributes: true,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
        },
        TEST_TIMEOUTS.short,
      );
    });

    describe('New crawler parameters (0.7.3/0.7.4)', () => {
      it(
        'should accept undetected browser type',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/user-agent',
              browser_type: 'undetected',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // The undetected browser should mask automation indicators
          // but we can at least verify the request was accepted
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should filter content using css_selector',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://example.com',
              css_selector: 'h1',
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);
          const textContent = (result as ToolResult).content.find((c) => c.type === 'text');
          expect(textContent?.text).toBeTruthy();
          // css_selector returns ONLY the selected element content
          expect(textContent?.text?.toLowerCase()).toContain('example domain');
          // Should NOT contain the paragraph text that's outside the h1
          expect(textContent?.text).not.toContain('use in illustrative examples');
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should include links when include_links is true',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://example.com',
              include_links: true,
            },
          });

          await expectSuccessfulCrawl(result);
          // Check if links section is included
          const hasLinksInfo = (result as ToolResult).content.some(
            (item) => item.type === 'text' && item.text?.includes('Links:'),
          );
          expect(hasLinksInfo).toBe(true);
        },
        TEST_TIMEOUTS.short,
      );

      it(
        'should respect delay_before_return_html parameter',
        async () => {
          const startTime = Date.now();

          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://httpbin.org/delay/1', // 1 second delay from server
              delay_before_return_html: 2, // Additional 2 second delay (in seconds, not ms)
              cache_mode: 'BYPASS',
            },
          });

          const elapsed = Date.now() - startTime;

          await expectSuccessfulCrawl(result);
          // Total time should be at least 3 seconds (1s from endpoint + 2s delay)
          expect(elapsed).toBeGreaterThanOrEqual(3000);
        },
        TEST_TIMEOUTS.medium,
      );

      it(
        'should convert relative URLs when resolve_absolute_urls is true',
        async () => {
          const result = await client.callTool({
            name: 'crawl',
            arguments: {
              url: 'https://example.com',
              resolve_absolute_urls: true,
              include_links: true,
              cache_mode: 'BYPASS',
            },
          });

          await expectSuccessfulCrawl(result);

          // Links should be in a separate content item
          const linksContent = (result as ToolResult).content.find(
            (c) => c.type === 'text' && c.text?.includes('Links:'),
          );

          // With include_links=true, links info should be present
          expect(linksContent).toBeDefined();
          expect(linksContent?.text).toContain('External: 1');
        },
        TEST_TIMEOUTS.short,
      );
    });
  });
});

```

--------------------------------------------------------------------------------
/src/__tests__/index.server.test.ts:
--------------------------------------------------------------------------------

```typescript
/* eslint-env jest */
import { jest } from '@jest/globals';
import { describe, it, expect, beforeEach } from '@jest/globals';

// Create mock functions
const mockGetMarkdown = jest.fn();
const mockCaptureScreenshot = jest.fn();
const mockGeneratePDF = jest.fn();
const mockExecuteJS = jest.fn();
const mockGetHTML = jest.fn();
const mockBatchCrawl = jest.fn();
const mockExtractWithLLM = jest.fn();
const mockCrawl = jest.fn();
const mockParseSitemap = jest.fn();

// Mock the Crawl4AIService module
jest.unstable_mockModule('../crawl4ai-service.js', () => ({
  Crawl4AIService: jest.fn().mockImplementation(() => ({
    getMarkdown: mockGetMarkdown,
    captureScreenshot: mockCaptureScreenshot,
    generatePDF: mockGeneratePDF,
    executeJS: mockExecuteJS,
    getHTML: mockGetHTML,
    batchCrawl: mockBatchCrawl,
    extractWithLLM: mockExtractWithLLM,
    crawl: mockCrawl,
    parseSitemap: mockParseSitemap,
  })),
}));

// Mock MCP SDK
const mockSetRequestHandler = jest.fn();
const mockTool = jest.fn();
const mockConnect = jest.fn();

jest.unstable_mockModule('@modelcontextprotocol/sdk/server/index.js', () => ({
  Server: jest.fn().mockImplementation(() => ({
    setRequestHandler: mockSetRequestHandler,
    tool: mockTool,
    connect: mockConnect,
  })),
}));

// Mock the types module that exports the schemas
const CallToolRequestSchema = { method: 'tools/call' };
const ListToolsRequestSchema = { method: 'tools/list' };

jest.unstable_mockModule('@modelcontextprotocol/sdk/types.js', () => ({
  CallToolRequestSchema,
  ListToolsRequestSchema,
}));

jest.unstable_mockModule('@modelcontextprotocol/sdk/server/stdio.js', () => ({
  StdioServerTransport: jest.fn(),
}));

// Mock axios
const mockPost = jest.fn();
const mockGet = jest.fn();
const mockHead = jest.fn();

jest.unstable_mockModule('axios', () => ({
  default: {
    create: jest.fn(() => ({
      post: mockPost,
      get: mockGet,
      head: mockHead,
    })),
    get: mockGet,
  },
}));

// Now dynamically import the modules after mocks are set up
const { Crawl4AIServer } = await import('../server.js');
const {
  GetMarkdownSchema,
  CrawlSchema,
  BatchCrawlSchema,
  CaptureScreenshotSchema: _CaptureScreenshotSchema,
  GeneratePdfSchema: _GeneratePdfSchema,
  ExecuteJsSchema: _ExecuteJsSchema,
  ExtractWithLlmSchema: _ExtractWithLlmSchema,
  SmartCrawlSchema: _SmartCrawlSchema,
  CrawlRecursiveSchema: _CrawlRecursiveSchema,
} = await import('../schemas/validation-schemas.js');
const { Crawl4AIService } = await import('../crawl4ai-service.js');

// Import types statically (these are removed at compile time)
import type {
  MarkdownEndpointResponse,
  ScreenshotEndpointResponse,
  PDFEndpointResponse,
  HTMLEndpointResponse,
  CrawlEndpointResponse,
} from '../types.js';

// Define types for test results
interface ContentItem {
  type: string;
  text?: string;
  data?: string;
  resource?: {
    uri: string;
    mimeType: string;
    blob: string;
  };
}

interface ToolResult {
  content: ContentItem[];
}

type RequestHandler = (request: { method: string; params: unknown }) => Promise<ToolResult>;

// Removed TestServerMethods interface - no longer needed since we use 'any' type

describe('Crawl4AIServer Tool Handlers', () => {
  let server: any; // eslint-disable-line @typescript-eslint/no-explicit-any
  let requestHandler: RequestHandler;

  beforeEach(async () => {
    jest.clearAllMocks();

    // Reset all mock functions
    mockGetMarkdown.mockReset();
    mockCaptureScreenshot.mockReset();
    mockGeneratePDF.mockReset();
    mockExecuteJS.mockReset();
    mockGetHTML.mockReset();
    mockBatchCrawl.mockReset();
    mockExtractWithLLM.mockReset();
    mockCrawl.mockReset();
    mockParseSitemap.mockReset();
    mockPost.mockReset();
    mockGet.mockReset();
    mockHead.mockReset();

    // Create server instance - the mock will be used automatically
    server = new Crawl4AIServer(
      process.env.CRAWL4AI_BASE_URL || 'http://test.example.com',
      process.env.CRAWL4AI_API_KEY || 'test-api-key',
      'test-server',
      '1.0.0',
    );

    // Start the server to register handlers
    await server.start();

    // Get the request handler for CallToolRequestSchema
    const handlerCalls = mockSetRequestHandler.mock.calls;

    // Find the handler for CallToolRequestSchema (tools/call)
    for (const call of handlerCalls) {
      const [schema, handler] = call;
      if (schema && schema.method === 'tools/call') {
        requestHandler = handler;
        break;
      }
    }

    // Debug: Check if we found the handler
    if (!requestHandler) {
      console.log('Handler calls:', handlerCalls.length);
      handlerCalls.forEach((call, i) => {
        console.log(`Call ${i}:`, call[0], typeof call[1]);
      });
    }
  });

  // Add a simple test to verify mocking works
  it('should use the mocked service', () => {
    const MockedService = Crawl4AIService as jest.MockedClass<typeof Crawl4AIService>;
    expect(MockedService).toHaveBeenCalledTimes(1);
    expect(MockedService).toHaveBeenCalledWith('http://localhost:11235', 'test-api-key');
  });

  describe('Constructor and setup', () => {
    it('should initialize with correct configuration', () => {
      expect(server).toBeDefined();
      expect(server.service).toBeDefined();
      expect(server.sessions).toBeDefined();
    });

    it('should set up handlers on construction', () => {
      expect(mockSetRequestHandler).toHaveBeenCalled();
      expect(mockSetRequestHandler.mock.calls.length).toBeGreaterThan(0);
    });
  });

  describe('Tool Handler Success Cases', () => {
    describe('get_markdown', () => {
      it('should handle successful markdown extraction', async () => {
        const mockResponse: MarkdownEndpointResponse = {
          url: 'https://example.com',
          filter: 'fit',
          query: null,
          cache: 'false',
          markdown: '# Example Page\n\nThis is example content.',
          success: true,
        };

        mockGetMarkdown.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.getMarkdown({
          url: 'https://example.com',
        });

        expect(result.content).toHaveLength(1);
        expect(result.content[0].type).toBe('text');
        expect(result.content[0].text).toContain('# Example Page');
        expect(result.content[0].text).toContain('URL: https://example.com');
        expect(result.content[0].text).toContain('Filter: fit');
      });

      it('should handle markdown with query', async () => {
        const mockResponse: MarkdownEndpointResponse = {
          url: 'https://example.com',
          filter: 'bm25',
          query: 'test query',
          cache: 'false',
          markdown: 'Filtered content',
          success: true,
        };

        mockGetMarkdown.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.getMarkdown({
          url: 'https://example.com',
          filter: 'bm25',
          query: 'test query',
        });

        expect(mockGetMarkdown).toHaveBeenCalledWith({
          url: 'https://example.com',
          f: 'bm25',
          q: 'test query',
        });
        expect(result.content[0].text).toContain('Query: test query');
      });
    });

    describe('capture_screenshot', () => {
      it('should handle successful screenshot capture', async () => {
        const mockResponse: ScreenshotEndpointResponse = {
          success: true,
          screenshot: 'base64-encoded-screenshot-data',
        };

        mockCaptureScreenshot.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.captureScreenshot({
          url: 'https://example.com',
        });

        expect(result.content).toHaveLength(2);
        expect(result.content[0].type).toBe('image');
        expect(result.content[0].data).toBe('base64-encoded-screenshot-data');
        expect(result.content[1].type).toBe('text');
        expect(result.content[1].text).toBe('Screenshot captured for: https://example.com');
      });
    });

    describe('generate_pdf', () => {
      it('should handle successful PDF generation', async () => {
        const mockResponse: PDFEndpointResponse = {
          success: true,
          pdf: 'base64-encoded-pdf-data',
        };

        mockGeneratePDF.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.generatePDF({
          url: 'https://example.com',
        });

        expect(result.content).toHaveLength(2);
        expect(result.content[0].type).toBe('resource');
        expect(result.content[0].resource.blob).toBeDefined();
        expect(result.content[1].type).toBe('text');
        expect(result.content[1].text).toContain('PDF generated for: https://example.com');
      });
    });

    describe('execute_js', () => {
      it('should handle successful JS execution', async () => {
        const mockResponse = {
          markdown: 'Page content',
          js_execution_result: {
            success: true,
            results: ['Title: Example', 'Link count: 5'],
          },
        };

        mockExecuteJS.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.executeJS({
          url: 'https://example.com',
          scripts: ['return document.title', 'return document.links.length'],
        });

        expect(result.content).toHaveLength(1);
        expect(result.content[0].type).toBe('text');
        expect(result.content[0].text).toContain('JavaScript executed on: https://example.com');
        expect(result.content[0].text).toContain('Title: Example');
        expect(result.content[0].text).toContain('Link count: 5');
      });

      it('should handle JS execution without results', async () => {
        const mockResponse = {
          markdown: 'Page content',
          js_execution_result: null,
        };

        mockExecuteJS.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.executeJS({
          url: 'https://example.com',
          scripts: 'console.log("test")',
        });

        expect(result.content[0].text).toContain('JavaScript executed on: https://example.com');
        expect(result.content[0].text).toContain('No results returned');
      });

      it('should handle JS execution with error status', async () => {
        const mockResponse = {
          markdown: 'Page content',
          js_execution_result: {
            success: true,
            results: [
              {
                success: false,
                error: 'Error: Test error',
                stack: 'Error: Test error\n    at eval (eval at evaluate (:291:30), <anonymous>:4:43)',
              },
            ],
          },
        };

        mockExecuteJS.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.executeJS({
          url: 'https://example.com',
          scripts: 'throw new Error("Test error")',
        });

        expect(result.content[0].text).toContain('JavaScript executed on: https://example.com');
        expect(result.content[0].text).toContain('Script: throw new Error("Test error")');
        expect(result.content[0].text).toContain('Returned: Error: Error: Test error');
      });

      it('should handle JS execution with no return value', async () => {
        const mockResponse = {
          markdown: 'Page content',
          js_execution_result: {
            success: true,
            results: [{ success: true }],
          },
        };

        mockExecuteJS.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.executeJS({
          url: 'https://example.com',
          scripts: 'console.log("hello")',
        });

        expect(result.content[0].text).toContain('JavaScript executed on: https://example.com');
        expect(result.content[0].text).toContain('Returned: Executed successfully (no return value)');
      });
    });

    describe('get_html', () => {
      it('should handle successful HTML retrieval', async () => {
        const mockResponse: HTMLEndpointResponse = {
          html: '<html><body><h1>Example</h1></body></html>',
          url: 'https://example.com',
          success: true,
        };

        mockGetHTML.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.getHTML({
          url: 'https://example.com',
        });

        expect(result.content).toHaveLength(1);
        expect(result.content[0].type).toBe('text');
        expect(result.content[0].text).toBe('<html><body><h1>Example</h1></body></html>');
      });
    });

    describe('batch_crawl', () => {
      it('should handle successful batch crawl', async () => {
        const mockResponse = {
          results: [
            { url: 'https://example1.com', markdown: { raw_markdown: 'Content 1' }, success: true },
            { url: 'https://example2.com', markdown: { raw_markdown: 'Content 2' }, success: true },
          ],
          success: true,
        };

        // Mock axios response since batchCrawl uses axiosClient directly
        mockPost.mockResolvedValue({ data: mockResponse });

        const result: ToolResult = await server.batchCrawl({
          urls: ['https://example1.com', 'https://example2.com'],
        });

        expect(result.content).toHaveLength(1);
        expect(result.content[0].text).toContain('Batch crawl completed');
        expect(result.content[0].text).toContain('Processed 2 URLs');
      });

      it('should handle batch crawl with remove_images', async () => {
        // Mock axios response since batchCrawl uses axiosClient directly
        mockPost.mockResolvedValue({ data: { results: [] } });

        const result: ToolResult = await server.batchCrawl({
          urls: ['https://example.com'],
          remove_images: true,
        });

        expect(mockPost).toHaveBeenCalledWith('/crawl', {
          urls: ['https://example.com'],
          crawler_config: {
            exclude_tags: ['img', 'picture', 'svg'],
          },
        });
        expect(result.content[0].text).toContain('Batch crawl completed');
      });
    });

    describe('crawl', () => {
      it('should handle successful crawl with all options', async () => {
        const mockResponse: CrawlEndpointResponse = {
          success: true,
          results: [
            {
              url: 'https://example.com',
              html: '<html>...</html>',
              cleaned_html: '<html>clean</html>',
              fit_html: '<html>fit</html>',
              success: true,
              status_code: 200,
              response_headers: {},
              session_id: 'test-session',
              metadata: { title: 'Example' },
              links: { internal: [], external: [] },
              media: { images: [], videos: [], audios: [] },
              markdown: {
                raw_markdown: '# Example',
                markdown_with_citations: '# Example [1]',
                references_markdown: '[1]: https://example.com',
                fit_markdown: '# Example',
                fit_html: '<h1>Example</h1>',
              },
              tables: [],
              extracted_content: null,
              screenshot: 'screenshot-data',
              pdf: 'pdf-data',
              mhtml: null,
              js_execution_result: { success: true, results: ['JS result'] },
              downloaded_files: null,
              network_requests: null,
              console_messages: ['Console log'],
              ssl_certificate: null,
              dispatch_result: null,
            },
          ],
          server_processing_time_s: 1.5,
          server_memory_delta_mb: 10,
          server_peak_memory_mb: 100,
        };

        mockCrawl.mockResolvedValue(mockResponse);

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
          screenshot: true,
          pdf: true,
          js_code: 'return document.title',
          session_id: 'test-session',
        });

        expect(result.content.length).toBeGreaterThan(0); // Multiple content types
        // Check text content
        const textContent = result.content.find((c) => c.type === 'text' && c.text?.includes('# Example'));
        expect(textContent).toBeDefined();
        // Check screenshot
        const screenshotContent = result.content.find((c) => c.type === 'image');
        expect(screenshotContent?.data).toBe('screenshot-data');
      });

      it('should handle crawl with proxy configuration', async () => {
        const mockResponse: CrawlEndpointResponse = {
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Proxied content' },
              success: true,
              status_code: 200,
            },
          ],
        };

        mockCrawl.mockResolvedValue(mockResponse);

        await server.crawl({
          url: 'https://example.com',
          proxy_server: 'http://proxy.example.com:8080',
          proxy_username: 'user',
          proxy_password: 'pass',
        });

        expect(mockCrawl).toHaveBeenCalledWith(
          expect.objectContaining({
            browser_config: expect.objectContaining({
              proxy_config: {
                server: 'http://proxy.example.com:8080',
                username: 'user',
                password: 'pass',
              },
            }),
          }),
        );
      });

      it('should handle crawl with cookies and headers', async () => {
        const mockResponse: CrawlEndpointResponse = {
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Content with auth' },
              success: true,
              status_code: 200,
            },
          ],
        };

        mockCrawl.mockResolvedValue(mockResponse);

        await server.crawl({
          url: 'https://example.com',
          cookies: [{ name: 'session', value: 'abc123' }],
          headers: { Authorization: 'Bearer token123' },
        });

        expect(mockCrawl).toHaveBeenCalledWith(
          expect.objectContaining({
            browser_config: expect.objectContaining({
              cookies: [{ name: 'session', value: 'abc123' }],
              headers: { Authorization: 'Bearer token123' },
            }),
          }),
        );
      });

      it('should handle virtual scroll configuration', async () => {
        const mockResponse: CrawlEndpointResponse = {
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Scrolled content' },
              success: true,
              status_code: 200,
            },
          ],
        };

        mockCrawl.mockResolvedValue(mockResponse);

        await server.crawl({
          url: 'https://example.com',
          virtual_scroll_config: {
            enabled: true,
            scroll_step: 100,
            max_scrolls: 10,
          },
        });

        expect(mockCrawl).toHaveBeenCalledWith(
          expect.objectContaining({
            crawler_config: expect.objectContaining({
              virtual_scroll_config: {
                enabled: true,
                scroll_step: 100,
                max_scrolls: 10,
              },
            }),
          }),
        );
      });

      it('should handle js_code as null error', async () => {
        await expect(
          server.crawl({
            url: 'https://example.com',
            js_code: null,
          }),
        ).rejects.toThrow('js_code parameter is null');
      });
    });

    describe('extract_with_llm', () => {
      it('should handle successful LLM extraction', async () => {
        mockExtractWithLLM.mockResolvedValue({
          answer: 'The main topic is JavaScript testing.',
        });

        const result: ToolResult = await server.extractWithLLM({
          url: 'https://example.com',
          query: 'What is the main topic?',
        });

        expect(result.content).toHaveLength(1);
        expect(result.content[0].text).toBe('The main topic is JavaScript testing.');
      });
    });

    describe('extract_links', () => {
      it('should extract and categorize links', async () => {
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                links: {
                  internal: [
                    { href: '/page1', text: 'Page 1' },
                    { href: '/page2', text: 'Page 2' },
                  ],
                  external: [{ href: 'https://external.com', text: 'External' }],
                },
              },
            ],
          },
        });

        const result: ToolResult = await server.extractLinks({
          url: 'https://example.com',
          categorize: true,
        });

        expect(result.content[0].text).toContain('Link analysis for https://example.com:');
        expect(result.content[0].text).toContain('internal (2)');
        expect(result.content[0].text).toContain('/page1');
        expect(result.content[0].text).toContain('external (1)');
      });

      it('should categorize external links (social, images, scripts)', async () => {
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                links: {
                  internal: [],
                  external: [
                    'https://facebook.com/profile',
                    'https://example.com/image.jpg',
                    'https://cdn.com/script.js',
                  ],
                },
              },
            ],
          },
        });

        const result: ToolResult = await server.extractLinks({
          url: 'https://example.com',
          categorize: true,
        });

        expect(result.content[0].text).toContain('social (1)');
        expect(result.content[0].text).toContain('images (1)');
        expect(result.content[0].text).toContain('scripts (1)');
        expect(result.content[0].text).toContain('external (0)');
      });
    });

    describe('crawl_recursive', () => {
      it('should crawl recursively with depth limit', async () => {
        // Ensure mock is clean before setting up
        mockPost.mockReset();

        mockPost
          .mockResolvedValueOnce({
            data: {
              results: [
                {
                  url: 'https://example.com',
                  links: {
                    internal: [{ href: 'https://example.com/page1', text: 'Page 1' }],
                  },
                  markdown: { raw_markdown: 'Home page' },
                  success: true,
                },
              ],
            },
          })
          .mockResolvedValueOnce({
            data: {
              results: [
                {
                  url: 'https://example.com/page1',
                  links: { internal: [] },
                  markdown: { raw_markdown: 'Page 1 content' },
                  success: true,
                },
              ],
            },
          });

        const result: ToolResult = await server.crawlRecursive({
          url: 'https://example.com',
          max_depth: 2,
        });

        expect(result.content[0].text).toContain('Recursive crawl completed:');
        expect(result.content[0].text).toContain('Pages crawled: 2');
        expect(result.content[0].text).toContain('https://example.com');
        expect(result.content[0].text).toContain('https://example.com/page1');
      });
    });

    describe('parse_sitemap', () => {
      it('should parse sitemap successfully', async () => {
        mockGet.mockResolvedValue({
          data: `<?xml version="1.0" encoding="UTF-8"?>
            <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
              <url><loc>https://example.com/</loc></url>
              <url><loc>https://example.com/page1</loc></url>
              <url><loc>https://example.com/page2</loc></url>
            </urlset>`,
        });

        const result: ToolResult = await server.parseSitemap({
          url: 'https://example.com/sitemap.xml',
        });

        expect(result.content[0].text).toContain('Sitemap parsed successfully:');
        expect(result.content[0].text).toContain('Total URLs found: 3');
        expect(result.content[0].text).toContain('https://example.com/');
        expect(result.content[0].text).toContain('https://example.com/page1');
      });
    });

    describe('smart_crawl', () => {
      it('should handle smart crawl for HTML content', async () => {
        mockHead.mockResolvedValue({
          headers: { 'content-type': 'text/html' },
        });
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                markdown: { raw_markdown: 'HTML content' },
                links: { internal: [], external: [] },
              },
            ],
          },
        });

        const result: ToolResult = await server.smartCrawl({
          url: 'https://example.com',
        });

        expect(result.content[0].text).toContain('Smart crawl detected content type');
        // Already contains 'Smart crawl detected content type'
      });

      it('should handle smart crawl for PDF content', async () => {
        mockHead.mockResolvedValue({
          headers: { 'content-type': 'application/pdf' },
        });

        // Mock the crawl response for PDF
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                markdown: { raw_markdown: 'PDF content extracted' },
                links: { internal: [], external: [] },
              },
            ],
          },
        });

        const result: ToolResult = await server.smartCrawl({
          url: 'https://example.com/doc.pdf',
        });

        expect(result.content[0].text).toContain('Smart crawl detected content type');
        expect(result.content[0].text).toContain('PDF content extracted');
      });
    });
  });

  describe('Tool Handler Error Cases', () => {
    describe('Service errors', () => {
      it('should handle service error for get_markdown', async () => {
        mockGetMarkdown.mockRejectedValue(new Error('Network error'));

        await expect(server.getMarkdown({ url: 'https://example.com' })).rejects.toThrow(
          'Failed to get markdown: Network error',
        );
      });

      it('should handle axios error with response detail', async () => {
        const axiosError = {
          response: {
            data: {
              detail: 'Invalid API key',
            },
          },
        };
        mockCaptureScreenshot.mockRejectedValue(axiosError);

        await expect(server.captureScreenshot({ url: 'https://example.com' })).rejects.toThrow(
          'Failed to capture screenshot: Invalid API key',
        );
      });

      it('should handle missing screenshot data', async () => {
        mockCaptureScreenshot.mockResolvedValue({
          success: false,
          screenshot: '',
        });

        await expect(server.captureScreenshot({ url: 'https://example.com' })).rejects.toThrow(
          'Screenshot capture failed - no screenshot data in response',
        );
      });

      it('should handle missing PDF data', async () => {
        mockGeneratePDF.mockResolvedValue({
          success: true,
          pdf: '',
        });

        await expect(server.generatePDF({ url: 'https://example.com' })).rejects.toThrow(
          'PDF generation failed - no PDF data in response',
        );
      });
    });

    describe('Validation errors', () => {
      it('should handle missing scripts for execute_js', async () => {
        await expect(
          server.executeJS({ url: 'https://example.com', scripts: null as unknown as string }),
        ).rejects.toThrow('scripts is required');
      });

      it('should handle empty crawl options', async () => {
        await expect(server.crawl(null as unknown as Parameters<typeof server.crawl>[0])).rejects.toThrow(
          'crawl requires options object with at least a url parameter',
        );
      });

      it('should handle crawl_recursive errors', async () => {
        // Setup the mock to fail - crawlRecursive catches the error internally
        mockPost.mockRejectedValue(new Error('API error'));

        const result: ToolResult = await server.crawlRecursive({ url: 'https://example.com' });

        // The method catches errors and returns a message about no pages crawled
        expect(result.content[0].text).toContain('Pages crawled: 0');
        expect(result.content[0].text).toContain('No pages could be crawled');
      });

      it('should handle parse_sitemap errors', async () => {
        mockGet.mockRejectedValue(new Error('Failed to fetch sitemap'));

        await expect(server.parseSitemap({ url: 'https://example.com/sitemap.xml' })).rejects.toThrow(
          'Failed to parse sitemap: Failed to fetch sitemap',
        );
      });
    });

    describe('Edge cases', () => {
      it('should handle batch crawl with no results', async () => {
        mockPost.mockResolvedValue({
          data: {
            results: [],
          },
        });

        const result: ToolResult = await server.batchCrawl({
          urls: ['https://example.com'],
        });

        expect(result.content[0].text).toContain('Batch crawl completed');
        expect(result.content[0].text).toContain('Processed 0 URLs');
      });

      it('should handle extract_links with no links', async () => {
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                links: {
                  internal: [],
                  external: [],
                },
              },
            ],
          },
        });

        const result: ToolResult = await server.extractLinks({
          url: 'https://example.com',
        });

        expect(result.content[0].text).toContain('All links from https://example.com:');
        expect(result.content[0].text).toMatch(/\n\s*$/);
      });

      it('should handle smart crawl with HEAD request failure', async () => {
        mockHead.mockRejectedValue(new Error('HEAD failed'));
        // Fallback to HTML crawl
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                markdown: { raw_markdown: 'Fallback content' },
                links: { internal: [], external: [] },
              },
            ],
          },
        });

        const result: ToolResult = await server.smartCrawl({
          url: 'https://example.com',
        });

        expect(result.content[0].text).toContain('Smart crawl detected content type');
      });
    });

    describe('ZodError validation tests', () => {
      it('should validate get_markdown parameters', () => {
        // Valid case
        expect(() => {
          GetMarkdownSchema.parse({ url: 'https://example.com' });
        }).not.toThrow();

        // Invalid - missing url
        expect(() => {
          GetMarkdownSchema.parse({ filter: 'fit' });
        }).toThrow();

        // Invalid - bm25 without query
        expect(() => {
          GetMarkdownSchema.parse({ url: 'https://example.com', filter: 'bm25' });
        }).toThrow('Query parameter is required when using bm25 or llm filter');
      });

      it('should validate crawl parameters', () => {
        // Valid case
        expect(() => {
          CrawlSchema.parse({ url: 'https://example.com' });
        }).not.toThrow();

        // Invalid - js_only without session_id
        expect(() => {
          CrawlSchema.parse({ url: 'https://example.com', js_only: true });
        }).toThrow('js_only requires session_id');

        // Invalid - empty js_code array
        expect(() => {
          CrawlSchema.parse({ url: 'https://example.com', js_code: [] });
        }).toThrow('js_code array cannot be empty');
      });

      it('should validate batch_crawl parameters', () => {
        // Valid case
        expect(() => {
          BatchCrawlSchema.parse({ urls: ['https://example.com'] });
        }).not.toThrow();

        // Invalid - not an array
        expect(() => {
          BatchCrawlSchema.parse({ urls: 'not-an-array' });
        }).toThrow();
      });
    });

    describe('Parameter validation edge cases', () => {
      // These tests require proper schema validation which happens at the handler level
      // Skipping direct method calls as they bypass validation
    });

    describe('Additional coverage tests', () => {
      it('should handle crawl with media extraction', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Content' },
              media: {
                images: [
                  { src: 'https://example.com/img1.jpg', alt: 'Image 1' },
                  { src: 'https://example.com/img2.jpg', alt: 'Image 2' },
                ],
                videos: [{ src: 'https://example.com/video.mp4', type: 'video/mp4' }],
                audios: [],
              },
              success: true,
              status_code: 200,
            },
          ],
        });

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
          media_handling: { images: true, videos: true },
        });

        expect(result.content.length).toBeGreaterThan(0);
        expect(result.content[0].type).toBe('text');
        expect(result.content[0].text).toBe('Content');
      });

      it('should handle crawl with tables extraction', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Content' },
              tables: [
                {
                  headers: ['Name', 'Age'],
                  rows: [
                    ['John', '30'],
                    ['Jane', '25'],
                  ],
                  markdown: '| Name | Age |\n|------|-----|\n| John | 30 |\n| Jane | 25 |',
                },
              ],
              success: true,
              status_code: 200,
            },
          ],
        });

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
        });

        expect(result.content.length).toBeGreaterThan(0);
        expect(result.content[0].type).toBe('text');
        expect(result.content[0].text).toBe('Content');
      });

      it('should handle crawl with network_requests', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Content' },
              network_requests: [
                { url: 'https://api.example.com/data', method: 'GET', status: 200 },
                { url: 'https://api.example.com/post', method: 'POST', status: 201 },
              ],
              success: true,
              status_code: 200,
            },
          ],
        });

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
          network_requests: true,
        });

        expect(result.content.length).toBeGreaterThan(0);
        expect(result.content[0].type).toBe('text');
        expect(result.content[0].text).toBe('Content');
      });

      it('should handle crawl with mhtml output', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Content' },
              mhtml: 'MHTML content here',
              success: true,
              status_code: 200,
            },
          ],
        });

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
          mhtml: true,
        });

        expect(result.content.length).toBeGreaterThan(0);
        expect(result.content[0].type).toBe('text');
        expect(result.content[0].text).toBe('Content');
      });

      it('should handle crawl with downloaded_files', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Content' },
              downloaded_files: {
                'file1.pdf': 'base64content1',
                'file2.doc': 'base64content2',
              },
              success: true,
              status_code: 200,
            },
          ],
        });

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
          download_files: true,
        });

        expect(result.content.length).toBeGreaterThan(0);
        expect(result.content[0].type).toBe('text');
        expect(result.content[0].text).toBe('Content');
      });

      it('should handle crawl with ssl_certificate', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Content' },
              ssl_certificate: {
                issuer: "Let's Encrypt",
                subject: '*.example.com',
                validFrom: '2024-01-01',
                validTo: '2024-12-31',
                protocol: 'TLSv1.3',
              },
              success: true,
              status_code: 200,
            },
          ],
        });

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
          ssl_certificate: true,
        });

        expect(result.content.length).toBeGreaterThan(0);
        expect(result.content[0].type).toBe('text');
        expect(result.content[0].text).toBe('Content');
      });

      it('should handle crawl with wait_for conditions', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Dynamic content loaded' },
              success: true,
              status_code: 200,
            },
          ],
        });

        await server.crawl({
          url: 'https://example.com',
          wait_for: {
            selector: '.dynamic-content',
            timeout: 5000,
          },
        });

        expect(mockCrawl).toHaveBeenCalledWith(
          expect.objectContaining({
            crawler_config: expect.objectContaining({
              wait_for: {
                selector: '.dynamic-content',
                timeout: 5000,
              },
            }),
          }),
        );
      });

      it('should handle crawl error scenarios', async () => {
        mockCrawl.mockResolvedValue({
          success: false,
          results: [
            {
              url: 'https://example.com',
              success: false,
              error: 'Page load timeout',
              status_code: 0,
            },
          ],
        });

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
        });

        expect(result.content[0].text).toBe('No content extracted');
      });

      it('should handle extract_links with categorized output', async () => {
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                links: {
                  internal: [
                    { href: '/page1', text: 'Page 1' },
                    { href: '/page2', text: 'Page 2' },
                  ],
                  external: [{ href: 'https://external.com', text: 'External' }],
                  social: [{ href: 'https://twitter.com/example', text: 'Twitter' }],
                  documents: [{ href: '/file.pdf', text: 'PDF Document' }],
                  images: [{ href: '/image.jpg', text: 'Image' }],
                },
              },
            ],
          },
        });

        const result: ToolResult = await server.extractLinks({
          url: 'https://example.com',
          categorize: true,
        });

        expect(result.content[0].text).toContain('internal (2)');
        expect(result.content[0].text).toContain('external (1)');
        expect(result.content[0].text).toContain('social (0)'); // No social links in internal/external
        expect(result.content[0].text).toContain('documents (0)'); // No documents in internal/external
        expect(result.content[0].text).toContain('images (0)'); // No images in internal/external
      });

      it('should handle smart_crawl for sitemap', async () => {
        // Set up axios client mock for the server instance
        const axiosClientMock = {
          head: jest.fn().mockResolvedValue({
            headers: { 'content-type': 'application/xml' },
          }),
          post: jest.fn().mockResolvedValue({
            data: {
              results: [
                {
                  url: 'https://example.com/sitemap.xml',
                  markdown: { raw_markdown: 'Sitemap content' },
                  success: true,
                  status_code: 200,
                },
              ],
            },
          }),
        };
        server.axiosClientForTesting = axiosClientMock;

        const result: ToolResult = await server.smartCrawl({
          url: 'https://example.com/sitemap.xml',
        });

        expect(result.content[0].text).toContain('Smart crawl detected content type: sitemap');
        expect(result.content[0].text).toContain('Sitemap content');
        expect(axiosClientMock.post).toHaveBeenCalledWith(
          '/crawl',
          expect.objectContaining({
            urls: ['https://example.com/sitemap.xml'],
            crawler_config: expect.objectContaining({
              cache_mode: 'ENABLED',
            }),
            browser_config: expect.objectContaining({
              headless: true,
              browser_type: 'chromium',
            }),
          }),
        );
      });

      it('should handle smart_crawl for RSS feed', async () => {
        const axiosClientMock = {
          head: jest.fn().mockResolvedValue({
            headers: { 'content-type': 'application/rss+xml' },
          }),
          post: jest.fn().mockResolvedValue({
            data: {
              results: [
                {
                  url: 'https://example.com/feed.rss',
                  markdown: { raw_markdown: 'RSS feed content' },
                  success: true,
                  status_code: 200,
                },
              ],
            },
          }),
        };
        server.axiosClientForTesting = axiosClientMock;

        const result: ToolResult = await server.smartCrawl({
          url: 'https://example.com/feed.rss',
        });

        expect(result.content[0].text).toContain('Smart crawl detected content type: rss');
        expect(result.content[0].text).toContain('RSS feed content');
        expect(axiosClientMock.post).toHaveBeenCalledWith(
          '/crawl',
          expect.objectContaining({
            urls: ['https://example.com/feed.rss'],
            crawler_config: expect.objectContaining({
              cache_mode: 'ENABLED',
            }),
            browser_config: expect.objectContaining({
              headless: true,
              browser_type: 'chromium',
            }),
          }),
        );
      });

      it('should handle smart_crawl for JSON content', async () => {
        const axiosClientMock = {
          head: jest.fn().mockResolvedValue({
            headers: { 'content-type': 'application/json' },
          }),
          post: jest.fn().mockResolvedValue({
            data: {
              results: [
                {
                  url: 'https://example.com/data.json',
                  markdown: { raw_markdown: 'JSON content' },
                  success: true,
                  status_code: 200,
                },
              ],
            },
          }),
        };
        server.axiosClientForTesting = axiosClientMock;

        const result: ToolResult = await server.smartCrawl({
          url: 'https://example.com/data.json',
        });

        expect(result.content[0].text).toContain('Smart crawl detected content type: json');
        expect(result.content[0].text).toContain('JSON content');
        expect(axiosClientMock.post).toHaveBeenCalledWith(
          '/crawl',
          expect.objectContaining({
            urls: ['https://example.com/data.json'],
            crawler_config: expect.objectContaining({
              cache_mode: 'ENABLED',
            }),
            browser_config: expect.objectContaining({
              headless: true,
              browser_type: 'chromium',
            }),
          }),
        );
      });

      it('should correctly categorize internal documents and images', async () => {
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                links: {
                  internal: [
                    { href: '/page1', text: 'Page 1' },
                    { href: '/docs/manual.pdf', text: 'Manual' },
                    { href: '/images/logo.png', text: 'Logo' },
                    { href: '/assets/style.css', text: 'Styles' },
                  ],
                  external: [{ href: 'https://example.com/report.pdf', text: 'External Report' }],
                },
              },
            ],
          },
        });

        const result: ToolResult = await server.extractLinks({
          url: 'https://example.com',
          categorize: true,
        });

        expect(result.content[0].text).toContain('internal (1)'); // Only /page1 remains as internal
        expect(result.content[0].text).toContain('external (0)'); // External PDF moved to documents
        expect(result.content[0].text).toContain('documents (2)'); // Both PDFs
        expect(result.content[0].text).toContain('images (1)'); // The PNG
        expect(result.content[0].text).toContain('scripts (1)'); // The CSS
      });

      it('should handle smart_crawl for plain text', async () => {
        const axiosClientMock = {
          head: jest.fn().mockResolvedValue({
            headers: { 'content-type': 'text/plain' },
          }),
          post: jest.fn().mockResolvedValue({
            data: {
              results: [
                {
                  url: 'https://example.com/file.txt',
                  markdown: { raw_markdown: 'This is plain text content' },
                  success: true,
                  status_code: 200,
                },
              ],
            },
          }),
        };
        server.axiosClientForTesting = axiosClientMock;

        const result: ToolResult = await server.smartCrawl({
          url: 'https://example.com/file.txt',
        });

        expect(result.content[0].text).toContain('Smart crawl detected content type: text');
        expect(result.content[0].text).toContain('This is plain text content');
        expect(axiosClientMock.post).toHaveBeenCalledWith(
          '/crawl',
          expect.objectContaining({
            urls: ['https://example.com/file.txt'],
            crawler_config: expect.objectContaining({
              cache_mode: 'ENABLED',
            }),
            browser_config: expect.objectContaining({
              headless: true,
              browser_type: 'chromium',
            }),
          }),
        );
      });
    });

    describe('Additional Method Tests', () => {
      it('should handle parse_sitemap', async () => {
        // Mock axios.get to return sitemap XML
        mockGet.mockResolvedValue({
          data: `<?xml version="1.0" encoding="UTF-8"?>
            <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
              <url><loc>https://example.com/page1</loc></url>
              <url><loc>https://example.com/page2</loc></url>
              <url><loc>https://example.com/page3</loc></url>
            </urlset>`,
        });

        const result: ToolResult = await server.parseSitemap({
          url: 'https://example.com/sitemap.xml',
        });

        expect(result.content[0].text).toContain('Sitemap parsed successfully');
        expect(result.content[0].text).toContain('Total URLs found: 3');
      });

      it('should handle parse_sitemap with filter', async () => {
        // Mock axios.get to return sitemap XML with blog URLs
        mockGet.mockResolvedValue({
          data: `<?xml version="1.0" encoding="UTF-8"?>
            <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
              <url><loc>https://example.com/page1</loc></url>
              <url><loc>https://example.com/blog/post1</loc></url>
              <url><loc>https://example.com/blog/post2</loc></url>
              <url><loc>https://example.com/page2</loc></url>
            </urlset>`,
        });

        const result: ToolResult = await server.parseSitemap({
          url: 'https://example.com/sitemap.xml',
          filter_pattern: '.*blog.*',
        });

        expect(result.content[0].text).toContain('Total URLs found: 4');
        expect(result.content[0].text).toContain('Filtered URLs: 2');
      });

      it('should handle crawl_recursive', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Content' },
              links: { internal: [], external: [] },
              success: true,
              status_code: 200,
            },
          ],
        });

        const result: ToolResult = await server.crawlRecursive({
          url: 'https://example.com',
        });

        expect(result.content[0].text).toContain('Recursive crawl completed');
      });

      it('should handle parse_sitemap error', async () => {
        mockParseSitemap.mockRejectedValue(new Error('Network error'));

        await expect(
          server.parseSitemap({
            url: 'https://example.com/sitemap.xml',
          }),
        ).rejects.toThrow('Failed to parse sitemap');
      });

      it('should handle crawl with error result', async () => {
        mockCrawl.mockResolvedValue({
          success: false,
          results: [],
        });

        await expect(
          server.crawl({
            url: 'https://example.com',
          }),
        ).rejects.toThrow('Invalid response from server');
      });

      it('should handle crawl with metadata and links', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'Content' },
              metadata: { title: 'Test Page', description: 'Test' },
              links: { internal: ['/page1'], external: ['https://external.com'] },
              js_execution_result: { results: [42, 'test'] },
              success: true,
              status_code: 200,
            },
          ],
        });

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
        });

        expect(result.content.length).toBeGreaterThan(1);
        expect(result.content.some((c) => c.text?.includes('Metadata'))).toBe(true);
        expect(result.content.some((c) => c.text?.includes('Links'))).toBe(true);
        expect(result.content.some((c) => c.text?.includes('JavaScript Execution Results'))).toBe(true);
      });

      it('should handle executeJS with no scripts', async () => {
        await expect(
          server.executeJS({
            url: 'https://example.com',
            scripts: null,
          }),
        ).rejects.toThrow('scripts is required');
      });

      it('should handle executeJS with array of scripts', async () => {
        mockExecuteJS.mockResolvedValue({
          content: [{ type: 'text', text: 'JS executed' }],
        });

        const result: ToolResult = await server.executeJS({
          url: 'https://example.com',
          scripts: ['return 1', 'return 2'],
        });

        expect(result.content[0].text).toContain('JavaScript executed on:');
      });

      it('should handle batchCrawl with cache bypass', async () => {
        mockPost.mockResolvedValue({
          data: {
            results: [{ success: true }, { success: false }],
          },
        });

        const result: ToolResult = await server.batchCrawl({
          urls: ['https://example.com/1', 'https://example.com/2'],
          bypass_cache: true,
          remove_images: true,
        });

        expect(result.content[0].text).toContain('Batch crawl completed');
        expect(mockPost).toHaveBeenCalledWith(
          '/crawl',
          expect.objectContaining({
            crawler_config: expect.objectContaining({
              cache_mode: 'BYPASS',
              exclude_tags: ['img', 'picture', 'svg'],
            }),
          }),
        );
      });

      it('should handle smart_crawl with follow_links', async () => {
        const axiosClientMock = {
          head: jest.fn().mockResolvedValue({
            headers: { 'content-type': 'application/xml' },
          }),
          post: jest.fn().mockResolvedValue({
            data: {
              results: [
                {
                  url: 'https://example.com/sitemap.xml',
                  markdown: { raw_markdown: '<url><loc>https://example.com/page1</loc></url>' },
                  success: true,
                  status_code: 200,
                },
              ],
            },
          }),
        };
        server.axiosClientForTesting = axiosClientMock;

        const result: ToolResult = await server.smartCrawl({
          url: 'https://example.com/sitemap.xml',
          follow_links: true,
        });

        expect(result.content[0].text).toContain('Smart crawl detected content type: sitemap');
      });

      it('should handle smart_crawl with HEAD request failure', async () => {
        const axiosClientMock = {
          head: jest.fn().mockRejectedValue({ response: { status: 500 } }),
          post: jest.fn().mockResolvedValue({
            data: {
              results: [
                {
                  url: 'https://example.com',
                  markdown: { raw_markdown: 'Content from crawl' },
                  success: true,
                  status_code: 200,
                },
              ],
            },
          }),
        };
        server.axiosClientForTesting = axiosClientMock;

        const result: ToolResult = await server.smartCrawl({
          url: 'https://example.com',
        });

        // Should continue despite HEAD failure
        expect(result.content[0].text).toContain('Smart crawl detected content type: html');
        expect(result.content[0].text).toContain('Content from crawl');
      });

      it('should handle extractLinks with no links', async () => {
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                markdown: 'Content without links',
              },
            ],
          },
        });

        const result: ToolResult = await server.extractLinks({
          url: 'https://example.com',
          categorize: false,
        });

        expect(result.content[0].text).toContain('All links from');
      });

      it('should handle extractLinks with manually extracted links', async () => {
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                markdown: 'Check out <a href="/page1">Page 1</a>',
              },
            ],
          },
        });

        const result: ToolResult = await server.extractLinks({
          url: 'https://example.com',
        });

        expect(result.content[0].text).toContain('All links from');
      });

      it('should handle MCP request handler for all tools', async () => {
        // Request handler should be available from beforeEach
        expect(requestHandler).toBeDefined();

        // Test various tools through the request handler
        const tools = [
          { name: 'get_markdown', args: { url: 'https://example.com' } },
          { name: 'capture_screenshot', args: { url: 'https://example.com' } },
          { name: 'generate_pdf', args: { url: 'https://example.com' } },
          { name: 'execute_js', args: { url: 'https://example.com', scripts: 'return 1' } },
          { name: 'batch_crawl', args: { urls: ['https://example.com'] } },
          { name: 'smart_crawl', args: { url: 'https://example.com' } },
          { name: 'get_html', args: { url: 'https://example.com' } },
          { name: 'extract_links', args: { url: 'https://example.com' } },
          { name: 'crawl_recursive', args: { url: 'https://example.com' } },
          { name: 'parse_sitemap', args: { url: 'https://example.com/sitemap.xml' } },
          { name: 'crawl', args: { url: 'https://example.com' } },
          { name: 'manage_session', args: { action: 'create' } },
          { name: 'manage_session', args: { action: 'clear', session_id: 'test' } },
          { name: 'manage_session', args: { action: 'list' } },
          { name: 'extract_with_llm', args: { url: 'https://example.com', prompt: 'test' } },
        ];

        // Mock all service methods to return success
        mockGetMarkdown.mockResolvedValue({ content: [{ type: 'text', text: 'markdown' }] });
        mockCaptureScreenshot.mockResolvedValue({ content: [{ type: 'text', text: 'screenshot' }] });
        mockGeneratePDF.mockResolvedValue({ content: [{ type: 'text', text: 'pdf' }] });
        mockExecuteJS.mockResolvedValue({ content: [{ type: 'text', text: 'js' }] });
        mockBatchCrawl.mockResolvedValue({ content: [{ type: 'text', text: 'batch' }] });
        mockGetHTML.mockResolvedValue({ content: [{ type: 'text', text: 'html' }] });
        mockExtractWithLLM.mockResolvedValue({ content: [{ type: 'text', text: 'llm' }] });
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              markdown: { raw_markdown: 'content' },
              success: true,
              status_code: 200,
            },
          ],
        });
        mockPost.mockResolvedValue({
          data: {
            results: [
              {
                links: { internal: [], external: [] },
              },
            ],
          },
        });

        mockParseSitemap.mockResolvedValue(['https://example.com/page1']);

        // Test each tool
        for (const tool of tools) {
          const result = await requestHandler({
            method: 'tools/call',
            params: {
              name: tool.name,
              arguments: tool.args,
            },
          });
          expect(result).toBeDefined();
          expect(result.content).toBeDefined();
        }

        // Test unknown tool
        const unknownResult = await requestHandler({
          method: 'tools/call',
          params: {
            name: 'unknown_tool',
            arguments: {},
          },
        });
        expect(unknownResult.content[0].text).toContain('Error: Unknown tool');

        // The handler only handles tools/call requests,
        // so we don't test other methods here
      });

      it('should handle MCP request handler validation errors', async () => {
        expect(requestHandler).toBeDefined();

        // Test validation errors for various tools
        const invalidRequests = [
          { name: 'get_markdown', args: {} }, // missing url
          { name: 'capture_screenshot', args: {} }, // missing url
          { name: 'generate_pdf', args: {} }, // missing url
          { name: 'execute_js', args: { url: 'https://example.com' } }, // missing scripts
          { name: 'batch_crawl', args: {} }, // missing urls
          { name: 'smart_crawl', args: {} }, // missing url
          { name: 'get_html', args: {} }, // missing url
          { name: 'extract_links', args: {} }, // missing url
          { name: 'crawl_recursive', args: {} }, // missing url
          { name: 'parse_sitemap', args: {} }, // missing url
          { name: 'crawl', args: {} }, // missing url
          { name: 'manage_session', args: {} }, // missing action
          { name: 'manage_session', args: { action: 'clear' } }, // missing session_id for clear
          { name: 'manage_session', args: { action: 'invalid' } }, // invalid action
          { name: 'extract_with_llm', args: { url: 'https://example.com' } }, // missing prompt
        ];

        for (const req of invalidRequests) {
          const result = await requestHandler({
            method: 'tools/call',
            params: {
              name: req.name,
              arguments: req.args,
            },
          });
          expect(result.content[0].text).toContain(`Error: Invalid parameters for ${req.name}`);
        }
      });

      it('should handle crawl with all output types', async () => {
        mockCrawl.mockResolvedValue({
          success: true,
          results: [
            {
              url: 'https://example.com',
              extracted_content: { data: 'extracted' },
              screenshot: 'base64screenshot',
              pdf: 'base64pdf',
              success: true,
              status_code: 200,
            },
          ],
        });

        const result: ToolResult = await server.crawl({
          url: 'https://example.com',
          screenshot: true,
          pdf: true,
        });

        expect(result.content.some((c) => c.type === 'text')).toBe(true);
        expect(result.content.some((c) => c.type === 'image')).toBe(true);
        expect(result.content.some((c) => c.type === 'resource' && c.resource?.mimeType === 'application/pdf')).toBe(
          true,
        );
      });
    });

    describe('MCP Protocol Handler Tests', () => {
      it('should handle tools/list request', async () => {
        // Find the tools/list handler
        const toolsListHandler = mockSetRequestHandler.mock.calls.find(
          (call) => (call[0] as any).method === 'tools/list',
        )?.[1];

        expect(toolsListHandler).toBeDefined();

        const result = await (toolsListHandler as any)({ method: 'tools/list', params: {} }); // eslint-disable-line @typescript-eslint/no-explicit-any
        expect(result).toBeDefined();
        expect(result.tools).toBeDefined();
        expect(result.tools.length).toBe(13); // Should have 13 tools
      });

      it('should handle get_markdown query functionality', async () => {
        mockGetMarkdown.mockResolvedValue({
          url: 'https://example.com',
          filter: 'fit',
          query: 'What products are listed?',
          cache: 'false',
          markdown: 'Page content about products',
          success: true,
        });

        const result: ToolResult = await server.getMarkdown({
          url: 'https://example.com',
          query: 'What products are listed?',
        });

        expect(result.content[0].text).toContain('Query: What products are listed?');
        expect(result.content[0].text).toContain('Page content about products');
      });
    });
  });
});

```
Page 3/3FirstPrevNextLast