omgwtfwow/mcp-crawl4ai-ts # codebase.md

This is page 2 of 3. Use http://codebase.md/omgwtfwow/mcp-crawl4ai-ts?page={x} to view the full context.

# Directory Structure

```
├── .env.example
├── .github
│   ├── CI.md
│   ├── copilot-instructions.md
│   └── workflows
│       └── ci.yml
├── .gitignore
├── .prettierignore
├── .prettierrc.json
├── CHANGELOG.md
├── eslint.config.mjs
├── jest.config.cjs
├── jest.setup.cjs
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│   ├── __tests__
│   │   ├── crawl.test.ts
│   │   ├── crawl4ai-service.network.test.ts
│   │   ├── crawl4ai-service.test.ts
│   │   ├── handlers
│   │   │   ├── crawl-handlers.test.ts
│   │   │   ├── parameter-combinations.test.ts
│   │   │   ├── screenshot-saving.test.ts
│   │   │   ├── session-handlers.test.ts
│   │   │   └── utility-handlers.test.ts
│   │   ├── index.cli.test.ts
│   │   ├── index.npx.test.ts
│   │   ├── index.server.test.ts
│   │   ├── index.test.ts
│   │   ├── integration
│   │   │   ├── batch-crawl.integration.test.ts
│   │   │   ├── capture-screenshot.integration.test.ts
│   │   │   ├── crawl-advanced.integration.test.ts
│   │   │   ├── crawl-handlers.integration.test.ts
│   │   │   ├── crawl-recursive.integration.test.ts
│   │   │   ├── crawl.integration.test.ts
│   │   │   ├── execute-js.integration.test.ts
│   │   │   ├── extract-links.integration.test.ts
│   │   │   ├── extract-with-llm.integration.test.ts
│   │   │   ├── generate-pdf.integration.test.ts
│   │   │   ├── get-html.integration.test.ts
│   │   │   ├── get-markdown.integration.test.ts
│   │   │   ├── parse-sitemap.integration.test.ts
│   │   │   ├── session-management.integration.test.ts
│   │   │   ├── smart-crawl.integration.test.ts
│   │   │   └── test-utils.ts
│   │   ├── request-handler.test.ts
│   │   ├── schemas
│   │   │   └── validation-edge-cases.test.ts
│   │   ├── types
│   │   │   └── mocks.ts
│   │   └── utils
│   │       └── javascript-validation.test.ts
│   ├── crawl4ai-service.ts
│   ├── handlers
│   │   ├── base-handler.ts
│   │   ├── content-handlers.ts
│   │   ├── crawl-handlers.ts
│   │   ├── session-handlers.ts
│   │   └── utility-handlers.ts
│   ├── index.ts
│   ├── schemas
│   │   ├── helpers.ts
│   │   └── validation-schemas.ts
│   ├── server.ts
│   └── types.ts
├── tsconfig.build.json
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/src/__tests__/request-handler.test.ts:
--------------------------------------------------------------------------------

```typescript
import { jest } from '@jest/globals';

// Mock all dependencies before imports
const mockGetMarkdown = jest.fn();
const mockCaptureScreenshot = jest.fn();
const mockGeneratePDF = jest.fn();
const mockExecuteJS = jest.fn();
const mockGetHTML = jest.fn();
const mockBatchCrawl = jest.fn();
const mockExtractWithLLM = jest.fn();
const mockCrawl = jest.fn();
const mockParseSitemap = jest.fn();

jest.unstable_mockModule('../crawl4ai-service.js', () => ({
  Crawl4AIService: jest.fn().mockImplementation(() => ({
    getMarkdown: mockGetMarkdown,
    captureScreenshot: mockCaptureScreenshot,
    generatePDF: mockGeneratePDF,
    executeJS: mockExecuteJS,
    getHTML: mockGetHTML,
    batchCrawl: mockBatchCrawl,
    extractWithLLM: mockExtractWithLLM,
    crawl: mockCrawl,
    parseSitemap: mockParseSitemap,
  })),
}));

// Mock axios
const mockPost = jest.fn();
const mockAxiosCreate = jest.fn(() => ({
  post: mockPost,
}));

jest.unstable_mockModule('axios', () => ({
  default: {
    create: mockAxiosCreate,
  },
}));

// Mock MCP SDK
const mockSetRequestHandler = jest.fn();
const mockTool = jest.fn();
const mockConnect = jest.fn();

jest.unstable_mockModule('@modelcontextprotocol/sdk/server/index.js', () => ({
  Server: jest.fn().mockImplementation(() => ({
    setRequestHandler: mockSetRequestHandler,
    tool: mockTool,
    connect: mockConnect,
  })),
}));

// Mock the types module that exports the schemas
const CallToolRequestSchema = { method: 'tools/call' };
const ListToolsRequestSchema = { method: 'tools/list' };

jest.unstable_mockModule('@modelcontextprotocol/sdk/types.js', () => ({
  CallToolRequestSchema,
  ListToolsRequestSchema,
}));

jest.unstable_mockModule('@modelcontextprotocol/sdk/server/stdio.js', () => ({
  StdioServerTransport: jest.fn(),
}));

// Now import the server after mocks are set up
const { Crawl4AIServer } = await import('../server.js');

// Removed unused type definitions - using 'any' for test mocks

describe('MCP Request Handler Direct Testing', () => {
  let server: any; // eslint-disable-line @typescript-eslint/no-explicit-any
  let requestHandler: any; // eslint-disable-line @typescript-eslint/no-explicit-any

  beforeEach(async () => {
    jest.clearAllMocks();

    // Set up mock responses
    mockGetMarkdown.mockResolvedValue({ success: true, content: 'markdown content' });
    mockCaptureScreenshot.mockResolvedValue({ success: true, screenshot: 'base64image' });
    mockGeneratePDF.mockResolvedValue({ success: true, pdf: 'base64pdf' });
    mockExecuteJS.mockResolvedValue({ js_execution_result: { results: [42] } });
    mockGetHTML.mockResolvedValue({ success: true, html: '<html></html>' });
    mockExtractWithLLM.mockResolvedValue({ answer: 'extracted answer' });
    mockCrawl.mockResolvedValue({
      success: true,
      results: [
        {
          url: 'https://example.com',
          markdown: { raw_markdown: 'content' },
          success: true,
          status_code: 200,
        },
      ],
    });
    mockParseSitemap.mockResolvedValue(['https://example.com/page1']);
    mockPost.mockResolvedValue({
      data: {
        results: [
          {
            links: { internal: [], external: [] },
            success: true,
          },
        ],
      },
    });

    // Create server
    server = new Crawl4AIServer(
      process.env.CRAWL4AI_BASE_URL || 'http://test.example.com',
      process.env.CRAWL4AI_API_KEY || 'test-api-key',
      'test-server',
      '1.0.0',
    );
    await server.start();

    // Get the request handler for CallToolRequestSchema
    const handlerCalls = mockSetRequestHandler.mock.calls;

    // Find the handler for CallToolRequestSchema (tools/call)
    for (const call of handlerCalls) {
      const [schema, handler] = call;
      if (schema && (schema as any).method === 'tools/call') {
        requestHandler = handler;
        break;
      }
    }
  });

  describe('Tool Handler Coverage', () => {
    it('should handle all valid tool requests', async () => {
      expect(requestHandler).toBeDefined();

      const validRequests = [
        { name: 'get_markdown', arguments: { url: 'https://example.com' } },
        { name: 'capture_screenshot', arguments: { url: 'https://example.com' } },
        { name: 'generate_pdf', arguments: { url: 'https://example.com' } },
        { name: 'execute_js', arguments: { url: 'https://example.com', scripts: 'return 1' } },
        { name: 'batch_crawl', arguments: { urls: ['https://example.com'] } },
        { name: 'smart_crawl', arguments: { url: 'https://example.com' } },
        { name: 'get_html', arguments: { url: 'https://example.com' } },
        { name: 'extract_links', arguments: { url: 'https://example.com' } },
        { name: 'crawl_recursive', arguments: { url: 'https://example.com' } },
        { name: 'parse_sitemap', arguments: { url: 'https://example.com/sitemap.xml' } },
        { name: 'crawl', arguments: { url: 'https://example.com' } },
        { name: 'manage_session', arguments: { action: 'create' } },
        { name: 'manage_session', arguments: { action: 'clear', session_id: 'test' } },
        { name: 'manage_session', arguments: { action: 'list' } },
        { name: 'extract_with_llm', arguments: { url: 'https://example.com', prompt: 'test' } },
      ];

      for (const req of validRequests) {
        const result = await requestHandler({
          method: 'tools/call',
          params: req,
        });
        expect(result).toBeDefined();
        expect(result.content).toBeDefined();
      }
    });

    it('should handle all validation error cases', async () => {
      const invalidRequests = [
        { name: 'get_markdown', arguments: {}, expectedError: 'Invalid parameters for get_markdown' },
        { name: 'capture_screenshot', arguments: {}, expectedError: 'Invalid parameters for capture_screenshot' },
        { name: 'generate_pdf', arguments: {}, expectedError: 'Invalid parameters for generate_pdf' },
        {
          name: 'execute_js',
          arguments: { url: 'https://example.com' },
          expectedError: 'Invalid parameters for execute_js',
        },
        { name: 'batch_crawl', arguments: {}, expectedError: 'Invalid parameters for batch_crawl' },
        { name: 'smart_crawl', arguments: {}, expectedError: 'Invalid parameters for smart_crawl' },
        { name: 'get_html', arguments: {}, expectedError: 'Invalid parameters for get_html' },
        { name: 'extract_links', arguments: {}, expectedError: 'Invalid parameters for extract_links' },
        { name: 'crawl_recursive', arguments: {}, expectedError: 'Invalid parameters for crawl_recursive' },
        { name: 'parse_sitemap', arguments: {}, expectedError: 'Invalid parameters for parse_sitemap' },
        { name: 'crawl', arguments: {}, expectedError: 'Invalid parameters for crawl' },
        { name: 'manage_session', arguments: {}, expectedError: 'Invalid parameters for manage_session' },
        {
          name: 'manage_session',
          arguments: { action: 'clear' },
          expectedError: 'Invalid parameters for manage_session',
        },
        {
          name: 'extract_with_llm',
          arguments: { url: 'https://example.com' },
          expectedError: 'Invalid parameters for extract_with_llm',
        },
      ];

      for (const req of invalidRequests) {
        const result = await requestHandler({
          method: 'tools/call',
          params: req,
        });
        expect(result.content[0].text).toContain(req.expectedError);
      }
    });

    it('should handle unknown tool', async () => {
      const result = await requestHandler({
        method: 'tools/call',
        params: {
          name: 'unknown_tool',
          arguments: {},
        },
      });
      expect(result.content[0].text).toContain('Error: Unknown tool: unknown_tool');
    });

    it('should handle non-ZodError exceptions', async () => {
      // Make the service method throw a non-Zod error
      mockGetMarkdown.mockRejectedValue(new Error('Service error'));

      const result = await requestHandler({
        method: 'tools/call',
        params: {
          name: 'get_markdown',
          arguments: { url: 'https://example.com' },
        },
      });

      expect(result.content[0].text).toContain('Error: Failed to get markdown: Service error');
    });

    it('should handle manage_session with create action', async () => {
      const result = await requestHandler({
        method: 'tools/call',
        params: {
          name: 'manage_session',
          arguments: {
            action: 'create',
            session_id: 'test-session',
            initial_url: 'https://example.com',
          },
        },
      });

      expect(result.content[0].text).toContain('Session created successfully');
      expect(result.content[0].text).toContain('test-session');
    });

    it('should handle manage_session with clear action', async () => {
      // First create a session
      await requestHandler({
        method: 'tools/call',
        params: {
          name: 'manage_session',
          arguments: {
            action: 'create',
            session_id: 'test-to-clear',
          },
        },
      });

      // Then clear it
      const result = await requestHandler({
        method: 'tools/call',
        params: {
          name: 'manage_session',
          arguments: {
            action: 'clear',
            session_id: 'test-to-clear',
          },
        },
      });

      expect(result.content[0].text).toContain('Session cleared successfully');
    });

    it('should handle manage_session with list action', async () => {
      // First create a session
      await requestHandler({
        method: 'tools/call',
        params: {
          name: 'manage_session',
          arguments: {
            action: 'create',
            session_id: 'test-list-session',
          },
        },
      });

      // List sessions
      const result = await requestHandler({
        method: 'tools/call',
        params: {
          name: 'manage_session',
          arguments: { action: 'list' },
        },
      });

      expect(result.content[0].text).toContain('Active sessions');
      expect(result.content[0].text).toContain('test-list-session');
    });
  });
});

```

--------------------------------------------------------------------------------
/src/__tests__/handlers/screenshot-saving.test.ts:
--------------------------------------------------------------------------------

```typescript
import { jest } from '@jest/globals';

// Mock fs/promises
const mockMkdir = jest.fn();
const mockWriteFile = jest.fn();

jest.unstable_mockModule('fs/promises', () => ({
  mkdir: mockMkdir,
  writeFile: mockWriteFile,
}));

// Mock os
const mockHomedir = jest.fn();
jest.unstable_mockModule('os', () => ({
  homedir: mockHomedir,
}));

// Import after mocking
const { ContentHandlers } = await import('../../handlers/content-handlers.js');
const { CrawlHandlers } = await import('../../handlers/crawl-handlers.js');

// Mock the service
const mockService = {
  captureScreenshot: jest.fn(),
  crawl: jest.fn(),
};

// Mock axios client
const mockAxiosClient = {
  post: jest.fn(),
};

describe('Screenshot Local Saving', () => {
  let contentHandlers: InstanceType<typeof ContentHandlers>;
  let crawlHandlers: InstanceType<typeof CrawlHandlers>;
  const testScreenshotBase64 =
    'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=='; // 1x1 red pixel

  beforeEach(() => {
    jest.clearAllMocks();
    contentHandlers = new ContentHandlers(mockService as never, mockAxiosClient as never, new Map());
    crawlHandlers = new CrawlHandlers(mockService as never, mockAxiosClient as never, new Map());

    // Default mock implementations
    mockMkdir.mockResolvedValue(undefined);
    mockWriteFile.mockResolvedValue(undefined);
  });

  describe('ContentHandlers.captureScreenshot', () => {
    it('should save screenshot to local directory when save_to_directory is provided', async () => {
      const mockDate = new Date('2024-01-15T10:30:00Z');
      jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);

      mockService.captureScreenshot.mockResolvedValue({
        success: true,
        screenshot: testScreenshotBase64,
      });

      const result = await contentHandlers.captureScreenshot({
        url: 'https://example.com',
        save_to_directory: '/tmp/screenshots',
      });

      // Verify directory creation
      expect(mockMkdir).toHaveBeenCalledWith('/tmp/screenshots', { recursive: true });

      // Verify file write
      const expectedFilename = 'example-com-2024-01-15T10-30-00.png';
      const expectedPath = '/tmp/screenshots/' + expectedFilename;
      expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, Buffer.from(testScreenshotBase64, 'base64'));

      // Verify response includes saved path
      expect(result.content[1].text).toContain(`Saved to: ${expectedPath}`);
    });

    it('should handle directory creation failure gracefully', async () => {
      const consoleErrorSpy = jest.spyOn(console, 'error').mockImplementation();
      mockMkdir.mockRejectedValue(new Error('Permission denied'));

      mockService.captureScreenshot.mockResolvedValue({
        success: true,
        screenshot: testScreenshotBase64,
      });

      const result = await contentHandlers.captureScreenshot({
        url: 'https://example.com',
        save_to_directory: '/root/screenshots',
      });

      // Should still return the screenshot
      expect(result.content[0].type).toBe('image');
      expect(result.content[0].data).toBe(testScreenshotBase64);

      // Should not include saved path in text
      expect(result.content[1].text).not.toContain('Saved to:');

      // Should log error
      expect(consoleErrorSpy).toHaveBeenCalledWith('Failed to save screenshot locally:', expect.any(Error));

      consoleErrorSpy.mockRestore();
    });

    it('should handle file path instead of directory path', async () => {
      const mockDate = new Date('2024-01-15T10:30:00Z');
      jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);
      const consoleWarnSpy = jest.spyOn(console, 'warn').mockImplementation();

      mockService.captureScreenshot.mockResolvedValue({
        success: true,
        screenshot: testScreenshotBase64,
      });

      await contentHandlers.captureScreenshot({
        url: 'https://example.com',
        save_to_directory: '/tmp/screenshots/screenshot.png',
      });

      // Should warn about file path
      expect(consoleWarnSpy).toHaveBeenCalledWith(
        'Warning: save_to_directory should be a directory path, not a file path. Using parent directory.',
      );

      // Should use parent directory
      expect(mockMkdir).toHaveBeenCalledWith('/tmp/screenshots', { recursive: true });

      // Should still generate filename
      const expectedFilename = 'example-com-2024-01-15T10-30-00.png';
      const expectedPath = '/tmp/screenshots/' + expectedFilename;
      expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, Buffer.from(testScreenshotBase64, 'base64'));

      consoleWarnSpy.mockRestore();
    });

    it('should resolve home directory paths', async () => {
      const mockDate = new Date('2024-01-15T10:30:00Z');
      jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);
      mockHomedir.mockReturnValue('/Users/testuser');

      mockService.captureScreenshot.mockResolvedValue({
        success: true,
        screenshot: testScreenshotBase64,
      });

      await contentHandlers.captureScreenshot({
        url: 'https://example.com',
        save_to_directory: '~/Desktop/screenshots',
      });

      // Should resolve ~ to home directory
      expect(mockMkdir).toHaveBeenCalledWith('/Users/testuser/Desktop/screenshots', { recursive: true });

      const expectedPath = '/Users/testuser/Desktop/screenshots/example-com-2024-01-15T10-30-00.png';
      expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, Buffer.from(testScreenshotBase64, 'base64'));
    });

    it('should not return large screenshots when saved locally', async () => {
      // Create a large fake screenshot (>800KB when decoded)
      const largeBase64 = 'A'.repeat(1200000); // ~900KB when decoded

      mockService.captureScreenshot.mockResolvedValue({
        success: true,
        screenshot: largeBase64,
      });

      const result = await contentHandlers.captureScreenshot({
        url: 'https://example.com',
        save_to_directory: '/tmp',
      });

      // Should not include image in response
      const imageContent = result.content.find((c) => c.type === 'image');
      expect(imageContent).toBeUndefined();

      // Should mention size in text
      const textContent = result.content.find((c) => c.type === 'text');
      expect(textContent?.text).toContain('not returned due to size');
      expect(textContent?.text).toContain('KB');
    });

    it('should sanitize filename for URLs with special characters', async () => {
      const mockDate = new Date('2024-01-15T10:30:00Z');
      jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);

      mockService.captureScreenshot.mockResolvedValue({
        success: true,
        screenshot: testScreenshotBase64,
      });

      await contentHandlers.captureScreenshot({
        url: 'https://my-site.com:8080/path?query=value',
        save_to_directory: '/tmp/screenshots',
      });

      const expectedFilename = 'my-site-com-2024-01-15T10-30-00.png';
      const expectedPath = '/tmp/screenshots/' + expectedFilename;
      expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, expect.any(Buffer));
    });
  });

  describe('CrawlHandlers.crawl', () => {
    it('should save screenshot to local directory when screenshot_directory is provided', async () => {
      const mockDate = new Date('2024-01-15T10:30:00Z');
      jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);

      mockService.crawl.mockResolvedValue({
        results: [
          {
            url: 'https://example.com',
            success: true,
            screenshot: testScreenshotBase64,
            markdown: { raw_markdown: 'Test content' },
          },
        ],
      });

      const result = await crawlHandlers.crawl({
        url: 'https://example.com',
        screenshot: true,
        screenshot_directory: '/tmp/crawl-screenshots',
      });

      // Verify directory creation
      expect(mockMkdir).toHaveBeenCalledWith('/tmp/crawl-screenshots', { recursive: true });

      // Verify file write
      const expectedFilename = 'example-com-2024-01-15T10-30-00.png';
      const expectedPath = '/tmp/crawl-screenshots/' + expectedFilename;
      expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, Buffer.from(testScreenshotBase64, 'base64'));

      // Verify response includes saved path
      const textContent = result.content.find(
        (c) => c.type === 'text' && 'text' in c && c.text?.includes('Screenshot saved'),
      );
      expect(textContent?.text).toContain(`Screenshot saved to: ${expectedPath}`);
    });

    it('should handle file save failure gracefully in crawl', async () => {
      const consoleErrorSpy = jest.spyOn(console, 'error').mockImplementation();
      mockMkdir.mockResolvedValue(undefined); // directory creation succeeds
      mockWriteFile.mockRejectedValue(new Error('Disk full')); // but file write fails

      mockService.crawl.mockResolvedValue({
        results: [
          {
            url: 'https://example.com',
            success: true,
            screenshot: testScreenshotBase64,
            markdown: { raw_markdown: 'Test content' },
          },
        ],
      });

      const result = await crawlHandlers.crawl({
        url: 'https://example.com',
        screenshot: true,
        screenshot_directory: '/tmp/crawl-screenshots',
      });

      // Should still return the screenshot as image
      const imageContent = result.content.find((c) => c.type === 'image');
      expect(imageContent?.data).toBe(testScreenshotBase64);

      // Should log error
      expect(consoleErrorSpy).toHaveBeenCalledWith('Failed to save screenshot locally:', expect.any(Error));

      consoleErrorSpy.mockRestore();
    });

    it('should not attempt to save when screenshot_directory is not provided', async () => {
      mockService.crawl.mockResolvedValue({
        results: [
          {
            url: 'https://example.com',
            success: true,
            screenshot: testScreenshotBase64,
            markdown: { raw_markdown: 'Test content' },
          },
        ],
      });

      await crawlHandlers.crawl({
        url: 'https://example.com',
        screenshot: true,
      });

      // Should not call fs methods
      expect(mockMkdir).not.toHaveBeenCalled();
      expect(mockWriteFile).not.toHaveBeenCalled();
    });
  });
});

```

--------------------------------------------------------------------------------
/src/__tests__/crawl4ai-service.network.test.ts:
--------------------------------------------------------------------------------

```typescript
import { jest } from '@jest/globals';

// Mock axios before importing the service
const mockAxiosInstance = {
  get: jest.fn() as jest.Mock,
  post: jest.fn() as jest.Mock,
  interceptors: {
    request: { use: jest.fn() as jest.Mock },
    response: { use: jest.fn() as jest.Mock },
  },
};

jest.unstable_mockModule('axios', () => ({
  default: {
    create: jest.fn(() => mockAxiosInstance),
    isAxiosError: jest.fn((error: any) => error.isAxiosError === true), // eslint-disable-line @typescript-eslint/no-explicit-any
    get: jest.fn(),
    head: jest.fn(),
  },
  isAxiosError: jest.fn((error: any) => error.isAxiosError === true), // eslint-disable-line @typescript-eslint/no-explicit-any
}));

// Import after mocking
const { Crawl4AIService } = await import('../crawl4ai-service.js');

describe('Crawl4AI Service - Network Failures', () => {
  let service: any; // eslint-disable-line @typescript-eslint/no-explicit-any

  interface ErrorWithCode extends Error {
    code?: string;
    response?: {
      status: number;
      data?: any; // eslint-disable-line @typescript-eslint/no-explicit-any
    };
    isAxiosError?: boolean;
  }

  beforeEach(() => {
    jest.clearAllMocks();
    service = new Crawl4AIService('http://localhost:11235', 'test-api-key');
  });

  describe('Network Timeouts', () => {
    it('should handle request timeout', async () => {
      const timeoutError = new Error('timeout of 30000ms exceeded') as ErrorWithCode;
      timeoutError.code = 'ECONNABORTED';
      timeoutError.isAxiosError = true;
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(timeoutError);

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('Request timed out');
    });

    it('should handle response timeout', async () => {
      const timeoutError = new Error('timeout of 30000ms exceeded') as ErrorWithCode;
      timeoutError.code = 'ETIMEDOUT';
      timeoutError.isAxiosError = true;
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(timeoutError);

      await expect(service.getHTML({ url: 'https://example.com' })).rejects.toThrow('Request timeout');
    });
  });

  describe('HTTP Error Responses', () => {
    it('should handle 401 Unauthorized', async () => {
      const error = {
        response: {
          status: 401,
          data: { error: 'Invalid API key' },
        },
        isAxiosError: true,
      };
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.crawl({ urls: ['https://example.com'] })).rejects.toThrow(
        'Request failed with status 401: Invalid API key',
      );
    });

    it('should handle 403 Forbidden', async () => {
      const error = {
        response: {
          status: 403,
          data: { error: 'Access denied' },
        },
        isAxiosError: true,
      };
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.captureScreenshot({ url: 'https://example.com' })).rejects.toThrow(
        'Request failed with status 403: Access denied',
      );
    });

    it('should handle 404 Not Found', async () => {
      const error = {
        response: {
          status: 404,
          data: { error: 'Endpoint not found' },
        },
        isAxiosError: true,
      };
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.generatePDF({ url: 'https://example.com' })).rejects.toThrow(
        'Request failed with status 404: Endpoint not found',
      );
    });

    it('should handle 429 Too Many Requests', async () => {
      const error = {
        response: {
          status: 429,
          data: { error: 'Rate limit exceeded' },
          headers: {
            'retry-after': '60',
          },
        },
        isAxiosError: true,
      };
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.executeJS({ url: 'https://example.com', scripts: ['return 1;'] })).rejects.toThrow(
        'Request failed with status 429: Rate limit exceeded',
      );
    });

    it('should handle 500 Internal Server Error', async () => {
      const error = {
        response: {
          status: 500,
          data: { error: 'Internal server error' },
        },
        isAxiosError: true,
      };
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.crawl({ urls: ['https://example.com'] })).rejects.toThrow(
        'Request failed with status 500: Internal server error',
      );
    });

    it('should handle 502 Bad Gateway', async () => {
      const error = {
        response: {
          status: 502,
          data: 'Bad Gateway',
        },
        isAxiosError: true,
        message: 'Request failed with status code 502',
      };
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow(
        'Request failed with status 502: Request failed with status code 502',
      );
    });

    it('should handle 503 Service Unavailable', async () => {
      const error = {
        response: {
          status: 503,
          data: { error: 'Service temporarily unavailable' },
        },
        isAxiosError: true,
      };
      (mockAxiosInstance.get as jest.Mock).mockRejectedValue(error);

      await expect(service.extractWithLLM({ url: 'https://example.com', query: 'test' })).rejects.toThrow(
        'Request failed with status 503: Service temporarily unavailable',
      );
    });

    it('should handle 504 Gateway Timeout', async () => {
      const error = {
        response: {
          status: 504,
          data: { error: 'Gateway timeout' },
        },
        isAxiosError: true,
      };
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.getHTML({ url: 'https://example.com' })).rejects.toThrow(
        'Request failed with status 504: Gateway timeout',
      );
    });
  });

  describe('Network Connection Failures', () => {
    it('should handle DNS resolution failure', async () => {
      const error = new Error('getaddrinfo ENOTFOUND invalid.domain') as ErrorWithCode;
      error.code = 'ENOTFOUND';
      error.isAxiosError = true;
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.getMarkdown({ url: 'https://invalid.domain' })).rejects.toThrow(
        'DNS resolution failed: getaddrinfo ENOTFOUND invalid.domain',
      );
    });

    it('should handle connection refused', async () => {
      const error = new Error('connect ECONNREFUSED 127.0.0.1:11235') as ErrorWithCode;
      error.code = 'ECONNREFUSED';
      error.isAxiosError = true;
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.crawl({ urls: ['https://example.com'] })).rejects.toThrow(
        'Connection refused: connect ECONNREFUSED 127.0.0.1:11235',
      );
    });

    it('should handle connection reset', async () => {
      const error = new Error('socket hang up') as ErrorWithCode;
      error.code = 'ECONNRESET';
      error.isAxiosError = true;
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.captureScreenshot({ url: 'https://example.com' })).rejects.toThrow(
        'Connection reset: socket hang up',
      );
    });

    it('should handle network unreachable', async () => {
      const error = new Error('connect ENETUNREACH') as ErrorWithCode;
      error.code = 'ENETUNREACH';
      error.isAxiosError = true;
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      await expect(service.executeJS({ url: 'https://example.com', scripts: ['return 1;'] })).rejects.toThrow(
        'Network unreachable: connect ENETUNREACH',
      );
    });
  });

  describe('Response Parsing Failures', () => {
    it('should handle invalid JSON response', async () => {
      // This test is not applicable anymore since we handle errors at axios level
      // The service will return whatever axios returns
      (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
        data: '<html>Not JSON</html>',
        headers: { 'content-type': 'text/html' },
      });

      const result = await service.getHTML({ url: 'https://example.com' });
      expect(result).toBe('<html>Not JSON</html>');
    });

    it('should handle empty response', async () => {
      (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
        data: null,
      });

      // The service returns null, which is valid
      const result = await service.crawl({ urls: ['https://example.com'] });
      expect(result).toBeNull();
    });

    it('should handle malformed response structure', async () => {
      (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
        data: { unexpected: 'structure' },
      });

      // The service returns whatever the API returns
      const result = await service.crawl({ urls: ['https://example.com'] });
      expect(result).toEqual({ unexpected: 'structure' });
    });
  });

  describe('Request Configuration Errors', () => {
    it('should handle invalid URL format', async () => {
      await expect(service.getMarkdown({ url: 'not-a-valid-url' })).rejects.toThrow('Invalid URL format');
    });

    it('should handle missing required parameters', async () => {
      await expect(service.batchCrawl({ urls: [] })).rejects.toThrow('URLs array cannot be empty');
    });

    it('should handle oversized request payload', async () => {
      const error = new Error('Request Entity Too Large') as ErrorWithCode;
      error.response = { status: 413 };
      error.isAxiosError = true;
      error.message = 'Request Entity Too Large';
      (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);

      const hugeScript = 'x'.repeat(10 * 1024 * 1024); // 10MB
      await expect(service.executeJS({ url: 'https://example.com', scripts: [hugeScript] })).rejects.toThrow(
        'Request failed with status 413: Request Entity Too Large',
      );
    });
  });

  describe('Partial Response Handling', () => {
    it('should handle successful response with partial data', async () => {
      (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
        data: {
          results: [
            { success: true, url: 'https://example.com', markdown: 'Content' },
            { success: false, url: 'https://example.com/page2', error: 'Failed' },
          ],
        },
      });

      const result = await service.crawl({ urls: ['https://example.com', 'https://example.com/page2'] });
      expect(result.results).toHaveLength(2);
      expect(result.results[0].success).toBe(true);
      expect(result.results[1].success).toBe(false);
    });

    it('should handle response with missing optional fields', async () => {
      (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
        data: {
          success: true,
          url: 'https://example.com',
          // Missing markdown field
        },
      });

      const result = await service.getMarkdown({ url: 'https://example.com' });
      expect(result.url).toBe('https://example.com');
      expect(result.markdown).toBeUndefined();
    });
  });
});

```

--------------------------------------------------------------------------------
/src/__tests__/handlers/parameter-combinations.test.ts:
--------------------------------------------------------------------------------

```typescript
import { jest } from '@jest/globals';
import { CrawlHandlers } from '../../handlers/crawl-handlers.js';
import { ContentHandlers } from '../../handlers/content-handlers.js';

type MockService = {
  crawl: jest.Mock;
  getMarkdown: jest.Mock;
  captureScreenshot: jest.Mock;
};

type MockAxiosClient = {
  post: jest.Mock;
  get: jest.Mock;
  head: jest.Mock;
};

describe('Optional Parameter Combinations', () => {
  let crawlHandlers: CrawlHandlers;
  let _contentHandlers: ContentHandlers;
  let mockService: MockService;
  let mockAxiosClient: MockAxiosClient;

  beforeEach(() => {
    jest.clearAllMocks();

    mockService = {
      crawl: jest.fn(),
      getMarkdown: jest.fn(),
      captureScreenshot: jest.fn(),
    };

    mockAxiosClient = {
      post: jest.fn(),
      get: jest.fn(),
      head: jest.fn(),
    };

    crawlHandlers = new CrawlHandlers(mockService, mockAxiosClient, new Map());
    _contentHandlers = new ContentHandlers(mockService, mockAxiosClient, new Map());
  });

  describe('Batch Crawl Parameter Combinations', () => {
    const testCases = [
      {
        name: 'default parameters only',
        options: { urls: ['https://example.com'] },
        expectedConfig: undefined,
      },
      {
        name: 'remove_images only',
        options: { urls: ['https://example.com'], remove_images: true },
        expectedConfig: { exclude_tags: ['img', 'picture', 'svg'] },
      },
      {
        name: 'bypass_cache only',
        options: { urls: ['https://example.com'], bypass_cache: true },
        expectedConfig: { cache_mode: 'BYPASS' },
      },
      {
        name: 'both remove_images and bypass_cache',
        options: { urls: ['https://example.com'], remove_images: true, bypass_cache: true },
        expectedConfig: { exclude_tags: ['img', 'picture', 'svg'], cache_mode: 'BYPASS' },
      },
      {
        name: 'with max_concurrent',
        options: { urls: ['https://example.com'], max_concurrent: 5, remove_images: true },
        expectedConfig: { exclude_tags: ['img', 'picture', 'svg'] },
      },
    ];

    testCases.forEach(({ name, options, expectedConfig }) => {
      it(`should handle ${name}`, async () => {
        mockAxiosClient.post.mockResolvedValue({
          data: { results: [{ success: true }] },
        });

        await crawlHandlers.batchCrawl(options);

        expect(mockAxiosClient.post).toHaveBeenCalledWith('/crawl', {
          urls: options.urls,
          max_concurrent: options.max_concurrent,
          crawler_config: expectedConfig,
        });
      });
    });
  });

  describe('Smart Crawl Parameter Combinations', () => {
    const testCases = [
      {
        name: 'minimal configuration',
        options: { url: 'https://example.com' },
        expectedCacheMode: 'ENABLED',
      },
      {
        name: 'with bypass_cache',
        options: { url: 'https://example.com', bypass_cache: true },
        expectedCacheMode: 'BYPASS',
      },
      {
        name: 'with max_depth',
        options: { url: 'https://example.com', max_depth: 5 },
        expectedCacheMode: 'ENABLED',
      },
      {
        name: 'with follow_links and bypass_cache',
        options: { url: 'https://example.com', follow_links: true, bypass_cache: true },
        expectedCacheMode: 'BYPASS',
      },
    ];

    testCases.forEach(({ name, options, expectedCacheMode }) => {
      it(`should handle ${name}`, async () => {
        mockAxiosClient.head.mockResolvedValue({ headers: { 'content-type': 'text/html' } });
        mockAxiosClient.post.mockResolvedValue({
          data: { results: [{ success: true, markdown: { raw_markdown: 'Content' } }] },
        });

        await crawlHandlers.smartCrawl(options);

        expect(mockAxiosClient.post).toHaveBeenCalledWith('/crawl', {
          urls: [options.url],
          crawler_config: {
            cache_mode: expectedCacheMode,
          },
          browser_config: {
            headless: true,
            browser_type: 'chromium',
          },
        });
      });
    });
  });

  describe('Crawl Parameter Combinations', () => {
    // Table-driven tests for various parameter combinations
    const parameterSets = [
      // Browser configuration combinations
      {
        name: 'browser type with viewport',
        params: {
          url: 'https://example.com',
          browser_type: 'firefox',
          viewport_width: 1920,
          viewport_height: 1080,
        },
      },
      {
        name: 'proxy with authentication',
        params: {
          url: 'https://example.com',
          proxy_server: 'http://proxy.example.com:8080',
          proxy_username: 'user',
          proxy_password: 'pass',
        },
      },
      {
        name: 'cookies and headers',
        params: {
          url: 'https://example.com',
          cookies: [{ name: 'session', value: '123', domain: '.example.com' }],
          headers: { 'X-Custom': 'value', Authorization: 'Bearer token' },
        },
      },
      // Content filtering combinations
      {
        name: 'content filtering options',
        params: {
          url: 'https://example.com',
          word_count_threshold: 100,
          excluded_tags: ['script', 'style'],
          remove_overlay_elements: true,
        },
      },
      {
        name: 'text-only with form removal',
        params: {
          url: 'https://example.com',
          only_text: true,
          remove_forms: true,
          keep_data_attributes: false,
        },
      },
      // JavaScript execution combinations
      {
        name: 'js_code with wait conditions',
        params: {
          url: 'https://example.com',
          js_code: ['document.querySelector("button").click()'],
          wait_for: '#result',
          wait_for_timeout: 5000,
        },
      },
      {
        name: 'js_only with session',
        params: {
          url: 'https://example.com',
          js_only: true,
          session_id: 'test-session-123',
        },
      },
      // Dynamic content handling
      {
        name: 'scrolling configuration',
        params: {
          url: 'https://example.com',
          delay_before_scroll: 2000,
          scroll_delay: 500,
          scan_full_page: true,
        },
      },
      {
        name: 'virtual scroll for infinite feeds',
        params: {
          url: 'https://example.com',
          virtual_scroll_config: {
            container_selector: '.feed',
            scroll_count: 10,
            scroll_by: 500,
            wait_after_scroll: 1000,
          },
        },
      },
      // Media handling combinations
      {
        name: 'screenshot with PDF',
        params: {
          url: 'https://example.com',
          screenshot: true,
          screenshot_wait_for: 3,
          pdf: true,
          capture_mhtml: true,
        },
      },
      {
        name: 'image filtering options',
        params: {
          url: 'https://example.com',
          image_description_min_word_threshold: 10,
          image_score_threshold: 0.5,
          exclude_external_images: true,
        },
      },
      // Link filtering combinations
      {
        name: 'link exclusion options',
        params: {
          url: 'https://example.com',
          exclude_social_media_links: true,
          exclude_domains: ['facebook.com', 'twitter.com'],
          exclude_external_links: true,
        },
      },
      // Page interaction combinations
      {
        name: 'stealth mode options',
        params: {
          url: 'https://example.com',
          simulate_user: true,
          override_navigator: true,
          magic: true,
          user_agent: 'Custom Bot 1.0',
        },
      },
      // Complex combinations
      {
        name: 'kitchen sink - many options',
        params: {
          url: 'https://example.com',
          browser_type: 'chromium',
          viewport_width: 1280,
          viewport_height: 720,
          word_count_threshold: 50,
          excluded_tags: ['nav', 'footer'],
          js_code: ['window.scrollTo(0, document.body.scrollHeight)'],
          wait_for: '.loaded',
          screenshot: true,
          exclude_external_links: true,
          session_id: 'complex-session',
          cache_mode: 'BYPASS',
          verbose: true,
        },
      },
    ];

    parameterSets.forEach(({ name, params }) => {
      it(`should correctly process ${name}`, async () => {
        mockService.crawl.mockResolvedValue({
          results: [
            {
              url: params.url,
              success: true,
              markdown: { raw_markdown: 'Test content' },
            },
          ],
        });

        const result = await crawlHandlers.crawl(params);

        // Verify the service was called
        expect(mockService.crawl).toHaveBeenCalled();

        // Verify response structure
        expect(result.content).toBeDefined();
        expect(result.content[0].type).toBe('text');
      });
    });

    // Test parameter validation
    it('should handle invalid parameter combinations', async () => {
      const invalidParams = {
        url: 'https://example.com',
        js_only: true,
        // Missing required session_id when js_only is true
      };

      await expect(crawlHandlers.crawl(invalidParams)).rejects.toThrow();
    });

    // Test default values
    it('should apply correct defaults when parameters are omitted', async () => {
      mockService.crawl.mockResolvedValue({
        results: [
          {
            url: 'https://example.com',
            success: true,
            markdown: { raw_markdown: 'Content' },
          },
        ],
      });

      await crawlHandlers.crawl({ url: 'https://example.com' });

      const call = mockService.crawl.mock.calls[0][0];

      // Check browser_config defaults
      expect(call.browser_config).toBeDefined();
      expect(call.browser_config.headless).toBe(true);

      // Check that optional configs are not included when not specified
      expect(call.crawler_config.word_count_threshold).toBeUndefined();
      expect(call.crawler_config.excluded_tags).toBeUndefined();
    });
  });

  describe('Parameter Priority and Conflicts', () => {
    it('should handle conflicting cache modes correctly', async () => {
      mockService.crawl.mockResolvedValue({
        results: [{ success: true, markdown: { raw_markdown: 'Content' } }],
      });

      // Test that explicit cache_mode takes precedence
      await crawlHandlers.crawl({
        url: 'https://example.com',
        cache_mode: 'DISABLED',
        // Even with other params that might suggest caching
        session_id: 'test-session',
      });

      const call = mockService.crawl.mock.calls[0][0];
      expect(call.crawler_config.cache_mode).toBe('DISABLED');
    });

    it('should handle mutually exclusive options', async () => {
      mockService.crawl.mockResolvedValue({
        results: [{ success: true, html: '<p>HTML</p>' }],
      });

      // only_text should override other content options
      await crawlHandlers.crawl({
        url: 'https://example.com',
        only_text: true,
        keep_data_attributes: true, // Should be ignored with only_text
      });

      const call = mockService.crawl.mock.calls[0][0];
      expect(call.crawler_config.only_text).toBe(true);
      expect(call.crawler_config.keep_data_attributes).toBe(true); // Still passed through
    });
  });

  describe('Edge Cases for Optional Parameters', () => {
    it('should handle empty arrays correctly', async () => {
      mockService.crawl.mockResolvedValue({
        results: [{ success: true, markdown: { raw_markdown: 'Content' } }],
      });

      await crawlHandlers.crawl({
        url: 'https://example.com',
        excluded_tags: [], // Empty array
        exclude_domains: [], // Empty array
        cookies: [], // Empty array
      });

      const call = mockService.crawl.mock.calls[0][0];
      expect(call.crawler_config.excluded_tags).toEqual([]);
      expect(call.crawler_config.exclude_domains).toEqual([]);
      expect(call.browser_config.cookies).toEqual([]);
    });

    it('should handle null vs undefined correctly', async () => {
      mockService.crawl.mockResolvedValue({
        results: [{ success: true, markdown: { raw_markdown: 'Content' } }],
      });

      // null js_code should throw error
      await expect(
        crawlHandlers.crawl({
          url: 'https://example.com',
          js_code: null as unknown as string[],
        }),
      ).rejects.toThrow('js_code parameter is null');

      // undefined js_code should be fine
      await crawlHandlers.crawl({
        url: 'https://example.com',
        js_code: undefined,
      });

      expect(mockService.crawl).toHaveBeenCalledTimes(1);
    });

    it('should handle boolean flags in all combinations', async () => {
      const booleanFlags = [
        'remove_overlay_elements',
        'process_iframes',
        'exclude_external_links',
        'screenshot',
        'pdf',
        'verbose',
        'log_console',
        'simulate_user',
        'override_navigator',
        'magic',
      ];

      // Test all flags as true
      const allTrue = booleanFlags.reduce((acc, flag) => ({ ...acc, [flag]: true }), {
        url: 'https://example.com',
      });

      mockService.crawl.mockResolvedValue({
        results: [{ success: true, markdown: { raw_markdown: 'Content' } }],
      });

      await crawlHandlers.crawl(allTrue);

      const call = mockService.crawl.mock.calls[0][0];
      booleanFlags.forEach((flag) => {
        const config = call.crawler_config[flag] || call.browser_config[flag];
        expect(config).toBe(true);
      });
    });
  });
});

```

--------------------------------------------------------------------------------
/src/__tests__/index.test.ts:
--------------------------------------------------------------------------------

```typescript
import { jest } from '@jest/globals';
import { z } from 'zod';

// Mock the MCP SDK
jest.mock('@modelcontextprotocol/sdk/server/index.js');
jest.mock('@modelcontextprotocol/sdk/server/stdio.js');

describe('MCP Server Validation', () => {
  describe('Stateless tool validation', () => {
    // Test the createStatelessSchema helper
    const createStatelessSchema = <T extends z.ZodTypeAny>(schema: T, toolName: string) => {
      // Tool-specific guidance for common scenarios
      const toolGuidance: Record<string, string> = {
        capture_screenshot: 'To capture screenshots with sessions, use crawl(session_id, screenshot: true)',
        generate_pdf: 'To generate PDFs with sessions, use crawl(session_id, pdf: true)',
        execute_js: 'To run JavaScript with sessions, use crawl(session_id, js_code: [...])',
        get_html: 'To get HTML with sessions, use crawl(session_id)',
        extract_with_llm: 'To extract data with sessions, first use crawl(session_id) then extract from the response',
      };

      const message = `${toolName} does not support session_id. This tool is stateless - each call creates a new browser. ${
        toolGuidance[toolName] || 'For persistent operations, use crawl with session_id.'
      }`;

      return z
        .object({
          session_id: z.never({ message }).optional(),
        })
        .passthrough()
        .and(schema)
        .transform((data) => {
          const { session_id, ...rest } = data as Record<string, unknown> & { session_id?: unknown };
          if (session_id !== undefined) {
            throw new Error(message);
          }
          return rest;
        });
    };

    it('should reject session_id for stateless tools', () => {
      const ExecuteJsSchema = createStatelessSchema(
        z.object({
          url: z.string().url(),
          js_code: z.union([z.string(), z.array(z.string())]),
        }),
        'execute_js',
      );

      // Should reject with session_id
      expect(() => {
        ExecuteJsSchema.parse({
          url: 'https://example.com',
          js_code: 'return document.title',
          session_id: 'test-session',
        });
      }).toThrow('execute_js does not support session_id');
    });

    it('should accept valid parameters without session_id', () => {
      const ExecuteJsSchema = createStatelessSchema(
        z.object({
          url: z.string().url(),
          js_code: z.union([z.string(), z.array(z.string())]),
        }),
        'execute_js',
      );

      const result = ExecuteJsSchema.parse({
        url: 'https://example.com',
        js_code: 'return document.title',
      });

      expect(result).toEqual({
        url: 'https://example.com',
        js_code: 'return document.title',
      });
    });

    it('should provide helpful error message when session_id is used', () => {
      const GetMarkdownSchema = createStatelessSchema(
        z.object({
          url: z.string().url(),
        }),
        'get_markdown',
      );

      try {
        GetMarkdownSchema.parse({
          url: 'https://example.com',
          session_id: 'my-session',
        });
      } catch (error) {
        expect(error).toBeInstanceOf(z.ZodError);
        const zodError = error as z.ZodError;
        expect(zodError.errors[0].message).toContain('get_markdown does not support session_id');
        expect(zodError.errors[0].message).toContain('For persistent operations, use crawl');
      }
    });

    it('should provide tool-specific guidance for common tools', () => {
      // Test capture_screenshot guidance
      const CaptureScreenshotSchema = createStatelessSchema(z.object({ url: z.string().url() }), 'capture_screenshot');

      try {
        CaptureScreenshotSchema.parse({ url: 'https://example.com', session_id: 'test' });
      } catch (error) {
        const zodError = error as z.ZodError;
        expect(zodError.errors[0].message).toContain('use crawl(session_id, screenshot: true)');
      }

      // Test generate_pdf guidance
      const GeneratePdfSchema = createStatelessSchema(z.object({ url: z.string().url() }), 'generate_pdf');

      try {
        GeneratePdfSchema.parse({ url: 'https://example.com', session_id: 'test' });
      } catch (error) {
        const zodError = error as z.ZodError;
        expect(zodError.errors[0].message).toContain('use crawl(session_id, pdf: true)');
      }

      // Test execute_js guidance
      const ExecuteJsSchema = createStatelessSchema(z.object({ url: z.string().url() }), 'execute_js');

      try {
        ExecuteJsSchema.parse({ url: 'https://example.com', session_id: 'test' });
      } catch (error) {
        const zodError = error as z.ZodError;
        expect(zodError.errors[0].message).toContain('use crawl(session_id, js_code: [...])');
      }
    });

    it('should validate all stateless tools', () => {
      const statelessTools = [
        'get_markdown',
        'capture_screenshot',
        'generate_pdf',
        'execute_js',
        'batch_crawl',
        'smart_crawl',
        'get_html',
        'extract_links',
        'crawl_recursive',
        'parse_sitemap',
        'extract_with_llm',
      ];

      statelessTools.forEach((toolName) => {
        const schema = createStatelessSchema(
          z.object({
            url: z.string().url(),
          }),
          toolName,
        );

        // Should reject session_id
        expect(() => {
          schema.parse({
            url: 'https://example.com',
            session_id: 'test',
          });
        }).toThrow(`${toolName} does not support session_id`);

        // Should accept without session_id
        const result = schema.parse({
          url: 'https://example.com',
        });
        expect(result).toEqual({
          url: 'https://example.com',
        });
      });
    });
  });

  describe('Extract links tool', () => {
    it('should validate extract_links parameters', () => {
      const ExtractLinksSchema = z.object({
        url: z.string().url(),
        categorize: z.boolean().optional().default(true),
      });

      // Valid input with categorize true
      const result1 = ExtractLinksSchema.parse({
        url: 'https://example.com',
        categorize: true,
      });
      expect(result1.categorize).toBe(true);

      // Valid input with categorize false
      const result2 = ExtractLinksSchema.parse({
        url: 'https://example.com',
        categorize: false,
      });
      expect(result2.categorize).toBe(false);

      // Default categorize should be true
      const result3 = ExtractLinksSchema.parse({
        url: 'https://example.com',
      });
      expect(result3.categorize).toBe(true);
    });
  });

  describe('Session management tools', () => {
    it('should validate create_session parameters', () => {
      const CreateSessionSchema = z.object({
        session_id: z.string(),
        initial_url: z.string().optional(),
        browser_type: z.string().optional(),
      });

      // Valid input
      const result = CreateSessionSchema.parse({
        session_id: 'my-session',
        initial_url: 'https://example.com',
      });
      expect(result.session_id).toBe('my-session');

      // Missing required session_id
      expect(() => {
        CreateSessionSchema.parse({
          initial_url: 'https://example.com',
        });
      }).toThrow();
    });

    it('should validate clear_session parameters', () => {
      const ClearSessionSchema = z.object({
        session_id: z.string(),
      });

      // Valid input
      const result = ClearSessionSchema.parse({
        session_id: 'my-session',
      });
      expect(result.session_id).toBe('my-session');

      // Missing required session_id
      expect(() => {
        ClearSessionSchema.parse({});
      }).toThrow();
    });
  });

  describe('crawl validation', () => {
    it('should accept session_id for crawl', () => {
      const CrawlWithConfigSchema = z.object({
        url: z.string().url(),
        session_id: z.string().optional(),
        js_code: z.union([z.string(), z.array(z.string())]).optional(),
      });

      const result = CrawlWithConfigSchema.parse({
        url: 'https://example.com',
        session_id: 'my-session',
        js_code: 'document.querySelector("button").click()',
      });

      expect(result.session_id).toBe('my-session');
    });

    it('should work without session_id', () => {
      const CrawlWithConfigSchema = z.object({
        url: z.string().url(),
        session_id: z.string().optional(),
      });

      const result = CrawlWithConfigSchema.parse({
        url: 'https://example.com',
      });

      expect(result.session_id).toBeUndefined();
    });

    it('should require js_only when using js_code with session_id WITHOUT output options', () => {
      // Create a schema that mirrors the real one's refinement
      const CrawlWithConfigSchema = z
        .object({
          url: z.string().url(),
          session_id: z.string().optional(),
          js_code: z.union([z.string(), z.array(z.string())]).optional(),
          js_only: z.boolean().optional(),
          screenshot: z.boolean().optional(),
          pdf: z.boolean().optional(),
        })
        .refine(
          (data) => {
            // Only require js_only when using js_code + session_id WITHOUT any output options
            if (data.js_code && data.session_id && !data.js_only && !data.screenshot && !data.pdf) {
              return false;
            }
            return true;
          },
          {
            message:
              'When using js_code with session_id WITHOUT screenshot or pdf, you MUST set js_only: true to prevent server errors. If you want screenshots/PDFs, you can omit js_only. Correct usage: crawl({url, session_id, js_code: [...], js_only: true})',
          },
        );

      // Should fail without js_only when no output options
      expect(() => {
        CrawlWithConfigSchema.parse({
          url: 'https://example.com',
          session_id: 'test-session',
          js_code: ['document.querySelector("button").click()'],
        });
      }).toThrow('When using js_code with session_id WITHOUT screenshot or pdf');

      // Should pass with js_only: true
      const result = CrawlWithConfigSchema.parse({
        url: 'https://example.com',
        session_id: 'test-session',
        js_code: ['document.querySelector("button").click()'],
        js_only: true,
      });
      expect(result.js_only).toBe(true);

      // Should pass with screenshot (no js_only required)
      const result2 = CrawlWithConfigSchema.parse({
        url: 'https://example.com',
        session_id: 'test-session',
        js_code: ['document.querySelector("button").click()'],
        screenshot: true,
      });
      expect(result2.screenshot).toBe(true);
      expect(result2.js_only).toBeUndefined();

      // Should pass with pdf (no js_only required)
      const result3 = CrawlWithConfigSchema.parse({
        url: 'https://example.com',
        session_id: 'test-session',
        js_code: ['document.querySelector("button").click()'],
        pdf: true,
      });
      expect(result3.pdf).toBe(true);
      expect(result3.js_only).toBeUndefined();

      // Should pass without js_code
      const result4 = CrawlWithConfigSchema.parse({
        url: 'https://example.com',
        session_id: 'test-session',
      });
      expect(result4.session_id).toBe('test-session');

      // Should pass without session_id
      const result5 = CrawlWithConfigSchema.parse({
        url: 'https://example.com',
        js_code: ['document.querySelector("button").click()'],
      });
      expect(result5.js_code).toBeDefined();
    });
  });

  describe('JavaScript code validation', () => {
    const validateJavaScriptCode = (code: string): boolean => {
      if (/&quot;|&amp;|&lt;|&gt;|&#\d+;|&\w+;/.test(code)) {
        return false;
      }
      if (/<(!DOCTYPE|html|body|head|script|style)\b/i.test(code)) {
        return false;
      }
      if (/[;})]\s*\\n|\\n\s*[{(/]/.test(code)) {
        return false;
      }
      if (/[;})]\s*\\n\s*\w/.test(code)) {
        return false;
      }
      return true;
    };

    const JsCodeSchema = z.union([
      z.string().refine(validateJavaScriptCode, {
        message:
          'Invalid JavaScript: Contains HTML entities (&quot;), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
      }),
      z.array(
        z.string().refine(validateJavaScriptCode, {
          message:
            'Invalid JavaScript: Contains HTML entities (&quot;), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
        }),
      ),
    ]);

    it('should reject JavaScript with HTML entities', () => {
      expect(() => {
        JsCodeSchema.parse('document.querySelector(&quot;button&quot;).click()');
      }).toThrow('Invalid JavaScript: Contains HTML entities');
    });

    it('should reject JavaScript with literal \\n between statements', () => {
      expect(() => {
        JsCodeSchema.parse('console.log("line1");\\nconsole.log("line2")');
      }).toThrow('Invalid JavaScript: Contains HTML entities');
    });

    it('should accept valid JavaScript with \\n inside strings', () => {
      const result = JsCodeSchema.parse('console.log("line1\\nline2")');
      expect(result).toBe('console.log("line1\\nline2")');
    });

    it('should accept valid multiline JavaScript', () => {
      const code = `// Comment
document.querySelector('button').click();
return true;`;
      const result = JsCodeSchema.parse(code);
      expect(result).toBe(code);
    });

    it('should validate arrays of JavaScript code', () => {
      // Invalid array
      expect(() => {
        JsCodeSchema.parse(['document.querySelector(&quot;input&quot;).value = &quot;test&quot;', 'form.submit()']);
      }).toThrow('Invalid JavaScript: Contains HTML entities');

      // Valid array
      const validArray = ['document.querySelector("input").value = "test"', 'form.submit()'];
      const result = JsCodeSchema.parse(validArray);
      expect(result).toEqual(validArray);
    });
  });
});

```

--------------------------------------------------------------------------------
/src/__tests__/handlers/crawl-handlers.test.ts:
--------------------------------------------------------------------------------

```typescript
/* eslint-env jest */
import { jest } from '@jest/globals';
import { AxiosError } from 'axios';
import type { CrawlHandlers as CrawlHandlersType } from '../../handlers/crawl-handlers.js';
import type { Crawl4AIService } from '../../crawl4ai-service.js';

// Mock the service
const mockCrawl = jest.fn();
const mockService = {
  crawl: mockCrawl,
} as unknown as Crawl4AIService;

// Mock axios client
const mockPost = jest.fn() as jest.Mock;
const mockHead = jest.fn() as jest.Mock;
const mockAxiosClient = {
  post: mockPost,
  head: mockHead,
} as any; // eslint-disable-line @typescript-eslint/no-explicit-any

// Mock axios for parseSitemap
const mockAxiosGet = jest.fn();
jest.unstable_mockModule('axios', () => ({
  default: {
    get: mockAxiosGet,
  },
  AxiosError,
}));

// Import after setting up mocks
const { CrawlHandlers: CrawlHandlersClass } = await import('../../handlers/crawl-handlers.js');

describe('CrawlHandlers', () => {
  let handler: CrawlHandlersType;
  let sessions: Map<string, any>; // eslint-disable-line @typescript-eslint/no-explicit-any

  beforeEach(() => {
    jest.clearAllMocks();
    sessions = new Map();
    handler = new CrawlHandlersClass(mockService, mockAxiosClient, sessions);
  });

  describe('batchCrawl', () => {
    it('should handle API errors gracefully', async () => {
      // Mock API error response
      (mockPost as jest.Mock).mockRejectedValue(
        new AxiosError('Request failed with status code 500', 'ERR_BAD_RESPONSE', undefined, undefined, {
          status: 500,
          statusText: 'Internal Server Error',
          data: 'Internal Server Error',
          headers: {},
          config: {} as any, // eslint-disable-line @typescript-eslint/no-explicit-any
        } as any), // eslint-disable-line @typescript-eslint/no-explicit-any
      );

      await expect(
        handler.batchCrawl({
          urls: ['not-a-valid-url', 'https://invalid-domain.com'],
          max_concurrent: 2,
        }),
      ).rejects.toThrow('Failed to batch crawl: Internal Server Error');
    });

    it('should support per-URL configs array', async () => {
      (mockPost as jest.Mock).mockResolvedValue({
        data: {
          results: [
            { url: 'https://example1.com', success: true, markdown: { raw_markdown: 'Test 1' } },
            { url: 'https://example2.com', success: true, markdown: { raw_markdown: 'Test 2' } },
          ],
        },
      });

      const result = await handler.batchCrawl({
        urls: ['https://example1.com', 'https://example2.com'],
        configs: [
          {
            url: 'https://example1.com',
            browser_config: { browser_type: 'chromium' },
            crawler_config: { screenshot: true },
          },
          {
            url: 'https://example2.com',
            browser_config: { browser_type: 'undetected' },
            crawler_config: { pdf: true },
            extraction_strategy: { provider: 'openai' },
          },
        ],
        max_concurrent: 2,
      });

      // Verify the configs array was passed through
      expect(mockPost).toHaveBeenCalledWith(
        '/crawl',
        expect.objectContaining({
          configs: expect.arrayContaining([
            expect.objectContaining({
              url: 'https://example1.com',
              browser_config: { browser_type: 'chromium' },
              crawler_config: { screenshot: true },
            }),
            expect.objectContaining({
              url: 'https://example2.com',
              browser_config: { browser_type: 'undetected' },
              crawler_config: { pdf: true },
              extraction_strategy: { provider: 'openai' },
            }),
          ]),
          max_concurrent: 2,
        }),
      );

      expect(result.content[0].text).toContain('Batch crawl completed');
    });
  });

  describe('smartCrawl', () => {
    it('should detect XML content type from HEAD request', async () => {
      // Mock HEAD response with XML content type
      (mockHead as jest.Mock).mockResolvedValue({
        headers: {
          'content-type': 'application/xml',
        },
      });

      // Mock crawl response
      (mockPost as jest.Mock).mockResolvedValue({
        data: {
          results: [
            {
              success: true,
              markdown: {
                raw_markdown: '<xml>Test content</xml>',
              },
            },
          ],
        },
      });

      const result = await handler.smartCrawl({
        url: 'https://example.com/data.xml',
      });

      expect(result.content[0].text).toContain('Smart crawl detected content type: sitemap');
      expect(result.content[0].text).toContain('<xml>Test content</xml>');
    });

    it('should handle HEAD request failure gracefully', async () => {
      // Mock HEAD request failure
      (mockHead as jest.Mock).mockRejectedValue(new Error('HEAD request failed'));

      // Mock successful crawl
      (mockPost as jest.Mock).mockResolvedValue({
        data: {
          results: [
            {
              success: true,
              markdown: {
                raw_markdown: 'Test content',
              },
            },
          ],
        },
      });

      const result = await handler.smartCrawl({
        url: 'https://example.com',
      });

      expect(result.content[0].text).toContain('Smart crawl detected content type: html');
    });

    it('should follow links from sitemap when follow_links is true', async () => {
      // Mock successful HEAD request
      (mockHead as jest.Mock).mockResolvedValue({
        headers: {
          'content-type': 'application/xml',
        },
      });

      // Mock initial crawl with sitemap content
      (mockPost as jest.Mock).mockResolvedValueOnce({
        data: {
          results: [
            {
              success: true,
              markdown: `<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  <url>
    <loc>https://example.com/page1</loc>
  </url>
  <url>
    <loc>https://example.com/page2</loc>
  </url>
</urlset>`,
            },
          ],
        },
      });

      // Mock follow-up crawl
      (mockPost as jest.Mock).mockResolvedValueOnce({
        data: {
          results: [{ success: true }, { success: true }],
        },
      });

      const result = await handler.smartCrawl({
        url: 'https://example.com/sitemap.xml',
        follow_links: true,
        max_depth: 2,
      });

      expect(result.content[0].text).toContain('Smart crawl detected content type: sitemap');
      expect(result.content[0].text).toContain('Followed 2 links:');
      expect(result.content[0].text).toContain('https://example.com/page1');
      expect(result.content[0].text).toContain('https://example.com/page2');
    });

    it('should handle smartCrawl API errors', async () => {
      (mockHead as jest.Mock).mockResolvedValue({ headers: {} });
      // Mock crawl to get empty results first, then error on follow-up
      (mockPost as jest.Mock).mockResolvedValueOnce({
        data: {
          results: [],
        },
      });

      const result = await handler.smartCrawl({
        url: 'https://example.com',
      });

      // With empty results, it should still return a response
      expect(result.content[0].text).toContain('Smart crawl detected content type: html');
      expect(result.content[0].text).toContain('No content extracted');
    });
  });

  describe('crawlRecursive', () => {
    it('should handle max_depth limit correctly', async () => {
      // Mock successful crawl with links
      (mockPost as jest.Mock).mockResolvedValueOnce({
        data: {
          results: [
            {
              success: true,
              markdown: {
                raw_markdown: 'Test content',
              },
              links: {
                internal: [{ href: 'https://example.com/page1' }, { href: 'https://example.com/page2' }],
                external: [],
              },
            },
          ],
        },
      });

      // Mock second crawl for page1
      (mockPost as jest.Mock).mockResolvedValueOnce({
        data: {
          results: [
            {
              success: true,
              markdown: {
                raw_markdown: 'Page 1 content',
              },
              links: {
                internal: [],
                external: [],
              },
            },
          ],
        },
      });

      // Mock third crawl for page2
      (mockPost as jest.Mock).mockResolvedValueOnce({
        data: {
          results: [
            {
              success: true,
              markdown: {
                raw_markdown: 'Page 2 content',
              },
              links: {
                internal: [],
                external: [],
              },
            },
          ],
        },
      });

      const result = await handler.crawlRecursive({
        url: 'https://example.com',
        max_depth: 1, // Should crawl initial URL and one level deep
      });

      expect(result.content[0].text).toContain('Pages crawled: 3'); // Initial + 2 pages at depth 1
      expect(result.content[0].text).toContain('Max depth reached: 1');
      expect(mockPost).toHaveBeenCalledTimes(3); // Initial crawl + two more
    });

    it('should handle invalid URLs in discovered links', async () => {
      // Mock crawl with invalid link
      (mockPost as jest.Mock).mockResolvedValue({
        data: {
          results: [
            {
              success: true,
              markdown: {
                raw_markdown: 'Test content',
              },
              links: {
                internal: [
                  { href: 'javascript:void(0)' }, // Invalid URL
                  { href: 'https://example.com/valid' }, // Valid URL
                ],
                external: [],
              },
            },
          ],
        },
      });

      const result = await handler.crawlRecursive({
        url: 'https://example.com',
        max_depth: 1,
      });

      // Should continue despite invalid URL
      expect(result.content[0].text).toContain('Pages crawled:');
    });

    it('should handle crawl failures during recursion', async () => {
      // First crawl succeeds
      (mockPost as jest.Mock).mockResolvedValueOnce({
        data: {
          results: [
            {
              success: true,
              markdown: {
                raw_markdown: 'Test content',
              },
              links: {
                internal: [{ href: 'https://example.com/page1' }],
                external: [],
              },
            },
          ],
        },
      });

      // Second crawl fails
      (mockPost as jest.Mock).mockRejectedValueOnce(new Error('Crawl failed'));

      const result = await handler.crawlRecursive({
        url: 'https://example.com',
        max_depth: 1,
      });

      // Should continue despite failure
      expect(result.content[0].text).toContain('Pages crawled: 1');
    });

    it('should handle crawlRecursive API errors', async () => {
      (mockPost as jest.Mock).mockRejectedValue(new Error('API Error'));

      const result = await handler.crawlRecursive({
        url: 'https://example.com',
      });

      // When the initial crawl fails, it should return a result with no pages crawled
      expect(result.content[0].text).toContain('Pages crawled: 0');
      expect(result.content[0].text).toContain('No pages could be crawled');
    });
  });

  describe('parseSitemap', () => {
    it('should handle network errors gracefully', async () => {
      // Mock ENOTFOUND error
      const error = new Error('getaddrinfo ENOTFOUND not-a-real-domain-12345.com');
      (error as { code?: string }).code = 'ENOTFOUND';
      mockAxiosGet.mockRejectedValue(error);

      await expect(
        handler.parseSitemap({
          url: 'https://not-a-real-domain-12345.com/sitemap.xml',
        }),
      ).rejects.toThrow('Failed to parse sitemap: getaddrinfo ENOTFOUND not-a-real-domain-12345.com');
    });
  });

  describe('crawl', () => {
    it('should handle word_count_threshold parameter', async () => {
      (mockCrawl as jest.Mock).mockResolvedValue({
        results: [
          {
            success: true,
            markdown: {
              raw_markdown: 'Test content',
            },
          },
        ],
      });

      const result = await handler.crawl({
        url: 'https://example.com',
        word_count_threshold: 100,
      });

      expect(mockCrawl).toHaveBeenCalledWith(
        expect.objectContaining({
          crawler_config: expect.objectContaining({
            word_count_threshold: 100,
          }),
        }),
      );
      expect(result.content[0].text).toBe('Test content');
    });

    it('should update session last_used time when using session_id', async () => {
      const sessionId = 'test-session';
      const session = {
        id: sessionId,
        created_at: new Date(),
        last_used: new Date('2025-08-01'),
      };
      sessions.set(sessionId, session);

      (mockCrawl as jest.Mock).mockResolvedValue({
        results: [
          {
            success: true,
            markdown: {
              raw_markdown: 'Test content',
            },
          },
        ],
      });

      await handler.crawl({
        url: 'https://example.com',
        session_id: sessionId,
      });

      const updatedSession = sessions.get(sessionId) as { last_used: Date };
      expect(updatedSession.last_used.getTime()).toBeGreaterThan(new Date('2025-08-01').getTime());
    });

    it('should handle image description parameters', async () => {
      (mockCrawl as jest.Mock).mockResolvedValue({
        results: [
          {
            success: true,
            markdown: {
              raw_markdown: 'Test content',
            },
          },
        ],
      });

      await handler.crawl({
        url: 'https://example.com',
        image_description_min_word_threshold: 10,
        image_score_threshold: 0.5,
      });

      expect(mockCrawl).toHaveBeenCalledWith(
        expect.objectContaining({
          crawler_config: expect.objectContaining({
            image_description_min_word_threshold: 10,
            image_score_threshold: 0.5,
          }),
        }),
      );
    });

    it('should handle exclude_social_media_links parameter', async () => {
      (mockCrawl as jest.Mock).mockResolvedValue({
        results: [
          {
            success: true,
            markdown: {
              raw_markdown: 'Test content',
            },
          },
        ],
      });

      await handler.crawl({
        url: 'https://example.com',
        exclude_social_media_links: true,
      });

      expect(mockCrawl).toHaveBeenCalledWith(
        expect.objectContaining({
          crawler_config: expect.objectContaining({
            exclude_social_media_links: true,
          }),
        }),
      );
    });

    it('should use extracted_content when available as string', async () => {
      (mockCrawl as jest.Mock).mockResolvedValue({
        results: [
          {
            success: true,
            extracted_content: 'Extracted text content',
          },
        ],
      });

      const result = await handler.crawl({
        url: 'https://example.com',
      });

      expect(result.content[0].text).toBe('Extracted text content');
    });

    it('should handle extracted_content as object', async () => {
      const extractedObj = { title: 'Test', body: 'Content' };
      (mockCrawl as jest.Mock).mockResolvedValue({
        results: [
          {
            success: true,
            extracted_content: extractedObj,
          },
        ],
      });

      const result = await handler.crawl({
        url: 'https://example.com',
      });

      expect(result.content[0].text).toBe(JSON.stringify(extractedObj, null, 2));
    });

    it('should fallback to html when markdown is not available', async () => {
      (mockCrawl as jest.Mock).mockResolvedValue({
        results: [
          {
            success: true,
            html: '<html><body>HTML content</body></html>',
          },
        ],
      });

      const result = await handler.crawl({
        url: 'https://example.com',
      });

      expect(result.content[0].text).toBe('<html><body>HTML content</body></html>');
    });

    it('should fallback to fit_html when neither markdown nor html is available', async () => {
      (mockCrawl as jest.Mock).mockResolvedValue({
        results: [
          {
            success: true,
            fit_html: '<div>Fit HTML content</div>',
          },
        ],
      });

      const result = await handler.crawl({
        url: 'https://example.com',
      });

      expect(result.content[0].text).toBe('<div>Fit HTML content</div>');
    });

    it('should handle js_code as null error', async () => {
      await expect(
        handler.crawl({
          url: 'https://example.com',
          js_code: null,
        }),
      ).rejects.toThrow(
        'Failed to crawl: js_code parameter is null. Please provide JavaScript code as a string or array of strings.',
      );
    });
  });
});

```

--------------------------------------------------------------------------------
/src/__tests__/crawl.test.ts:
--------------------------------------------------------------------------------

```typescript
/* eslint-env jest */
import { jest } from '@jest/globals';
import type { AxiosResponse } from 'axios';
import type { MockAxiosInstance } from './types/mocks.js';
import type { Crawl4AIService as Crawl4AIServiceType } from '../crawl4ai-service.js';

// Manual mock for axios
const mockAxios = {
  create: jest.fn(),
};

jest.unstable_mockModule('axios', () => ({
  default: mockAxios,
}));

// Import modules after mocking
const { Crawl4AIService } = await import('../crawl4ai-service.js');

// Helper function to create a complete AxiosResponse object
function createMockAxiosResponse<T>(data: T): AxiosResponse<T> {
  return {
    data,
    status: 200,
    statusText: 'OK',
    headers: {},
    config: {
      url: '',
      method: 'post',
      headers: {},
    },
  } as AxiosResponse<T>;
}

describe('crawl parameter mapping', () => {
  let service: Crawl4AIServiceType;
  let mockAxiosInstance: MockAxiosInstance;

  beforeEach(() => {
    mockAxiosInstance = {
      post: jest.fn(),
      get: jest.fn(),
      head: jest.fn(),
    };
    mockAxios.create.mockReturnValue(mockAxiosInstance);
    service = new Crawl4AIService('http://test.com', 'test-key');
  });

  afterEach(() => {
    jest.clearAllMocks();
  });

  describe('Browser configuration mapping', () => {
    it('should map all browser config parameters correctly', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        browser_config: {
          browser_type: 'firefox',
          headless: true,
          viewport_width: 1920,
          viewport_height: 1080,
          user_agent: 'Custom User Agent',
          proxy_config: {
            server: 'http://proxy.com:8080',
            username: 'proxyuser',
            password: 'proxypass',
          },
          cookies: [{ name: 'session', value: 'abc123', domain: '.example.com', path: '/' }],
          headers: { 'X-Custom-Header': 'value' },
          extra_args: ['--disable-gpu'],
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: {
          browser_type: 'firefox',
          headless: true,
          viewport_width: 1920,
          viewport_height: 1080,
          user_agent: 'Custom User Agent',
          proxy_config: {
            server: 'http://proxy.com:8080',
            username: 'proxyuser',
            password: 'proxypass',
          },
          cookies: [{ name: 'session', value: 'abc123', domain: '.example.com', path: '/' }],
          headers: { 'X-Custom-Header': 'value' },
          extra_args: ['--disable-gpu'],
        },
        crawler_config: {},
      });
    });

    it('should support undetected browser type', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        browser_config: {
          browser_type: 'undetected',
          headless: true,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: {
          browser_type: 'undetected',
          headless: true,
        },
        crawler_config: {},
      });
    });

    it('should support unified proxy format (string)', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        browser_config: {
          proxy: 'http://user:[email protected]:8080',
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: {
          proxy: 'http://user:[email protected]:8080',
        },
        crawler_config: {},
      });
    });

    it('should support unified proxy format (object)', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        browser_config: {
          proxy: {
            server: 'http://proxy.example.com:8080',
            username: 'user',
            password: 'pass',
          },
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: {
          proxy: {
            server: 'http://proxy.example.com:8080',
            username: 'user',
            password: 'pass',
          },
        },
        crawler_config: {},
      });
    });
  });

  describe('Crawler configuration mapping', () => {
    it('should map content filtering parameters', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          word_count_threshold: 150,
          excluded_tags: ['nav', 'footer', 'aside'],
          excluded_selector: '#ads, .popup',
          remove_overlay_elements: true,
          only_text: true,
          remove_forms: true,
          keep_data_attributes: true,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          word_count_threshold: 150,
          excluded_tags: ['nav', 'footer', 'aside'],
          excluded_selector: '#ads, .popup',
          remove_overlay_elements: true,
          only_text: true,
          remove_forms: true,
          keep_data_attributes: true,
        },
      });
    });

    it('should map JavaScript execution parameters', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          js_code: ['document.querySelector(".load-more").click()', 'window.scrollTo(0, 1000)'],
          js_only: true,
          wait_for: '.content-loaded',
          wait_for_timeout: 10000,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          js_code: ['document.querySelector(".load-more").click()', 'window.scrollTo(0, 1000)'],
          js_only: true,
          wait_for: '.content-loaded',
          wait_for_timeout: 10000,
        },
      });
    });

    it('should map page navigation and timing parameters', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          wait_until: 'networkidle',
          page_timeout: 45000,
          wait_for_images: true,
          ignore_body_visibility: false,
          scan_full_page: true,
          delay_before_scroll: 2000,
          scroll_delay: 1000,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          wait_until: 'networkidle',
          page_timeout: 45000,
          wait_for_images: true,
          ignore_body_visibility: false,
          scan_full_page: true,
          delay_before_scroll: 2000,
          scroll_delay: 1000,
        },
      });
    });

    it('should map media handling parameters', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          screenshot: true,
          screenshot_wait_for: 2.5,
          pdf: true,
          capture_mhtml: true,
          image_description_min_word_threshold: 30,
          image_score_threshold: 5,
          exclude_external_images: true,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          screenshot: true,
          screenshot_wait_for: 2.5,
          pdf: true,
          capture_mhtml: true,
          image_description_min_word_threshold: 30,
          image_score_threshold: 5,
          exclude_external_images: true,
        },
      });
    });

    it('should map link filtering parameters', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          exclude_external_links: true,
          exclude_social_media_links: true,
          exclude_domains: ['ads.com', 'tracker.io', 'analytics.com'],
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          exclude_external_links: true,
          exclude_social_media_links: true,
          exclude_domains: ['ads.com', 'tracker.io', 'analytics.com'],
        },
      });
    });

    it('should map page interaction parameters', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          simulate_user: true,
          override_navigator: true,
          magic: true,
          process_iframes: true,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          simulate_user: true,
          override_navigator: true,
          magic: true,
          process_iframes: true,
        },
      });
    });

    it('should map virtual scroll configuration', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          virtual_scroll_config: {
            container_selector: '#timeline',
            scroll_count: 20,
            scroll_by: 'container_height',
            wait_after_scroll: 1.5,
          },
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          virtual_scroll_config: {
            container_selector: '#timeline',
            scroll_count: 20,
            scroll_by: 'container_height',
            wait_after_scroll: 1.5,
          },
        },
      });
    });

    // Note: Extraction strategies removed - not supported via REST API
    // Use extract_with_llm tool instead for structured data extraction

    it('should map session and cache parameters', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          session_id: 'test-session-123',
          cache_mode: 'DISABLED',
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          session_id: 'test-session-123',
          cache_mode: 'DISABLED',
        },
      });
    });

    it('should map new crawler parameters', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          delay_before_return_html: 2000,
          css_selector: '.main-content',
          include_links: true,
          resolve_absolute_urls: true,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          delay_before_return_html: 2000,
          css_selector: '.main-content',
          include_links: true,
          resolve_absolute_urls: true,
        },
      });
    });

    it('should map performance and debug parameters', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          timeout: 90000,
          verbose: true,
          log_console: true,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          timeout: 90000,
          verbose: true,
          log_console: true,
        },
      });
    });
  });

  describe('Extraction strategies', () => {
    it('should support extraction_strategy passthrough', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        extraction_strategy: {
          provider: 'openai',
          api_key: 'sk-test',
          model: 'gpt-4',
          temperature: 0.7,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {},
        extraction_strategy: {
          provider: 'openai',
          api_key: 'sk-test',
          model: 'gpt-4',
          temperature: 0.7,
        },
      });
    });

    it('should support table_extraction_strategy passthrough', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        table_extraction_strategy: {
          enable_chunking: true,
          thresholds: {
            min_rows: 5,
            max_columns: 20,
          },
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {},
        table_extraction_strategy: {
          enable_chunking: true,
          thresholds: {
            min_rows: 5,
            max_columns: 20,
          },
        },
      });
    });

    it('should support markdown_generator_options passthrough', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        markdown_generator_options: {
          include_links: true,
          preserve_formatting: true,
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {},
        markdown_generator_options: {
          include_links: true,
          preserve_formatting: true,
        },
      });
    });
  });

  describe('Combined configurations', () => {
    it('should handle both browser and crawler configs together', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        browser_config: {
          viewport_width: 1920,
          viewport_height: 1080,
          user_agent: 'Custom Bot',
        },
        crawler_config: {
          word_count_threshold: 100,
          js_code: 'document.querySelector(".accept").click()',
          wait_for: '.content',
          screenshot: true,
          session_id: 'test-session',
          cache_mode: 'BYPASS',
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: {
          viewport_width: 1920,
          viewport_height: 1080,
          user_agent: 'Custom Bot',
        },
        crawler_config: {
          word_count_threshold: 100,
          js_code: 'document.querySelector(".accept").click()',
          wait_for: '.content',
          screenshot: true,
          session_id: 'test-session',
          cache_mode: 'BYPASS',
        },
      });
    });
  });

  describe('Edge cases', () => {
    it('should handle undefined values correctly', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          word_count_threshold: 0, // Should be included (falsy but defined)
          excluded_tags: undefined, // Should not be included
          remove_overlay_elements: false, // Should be included
          only_text: undefined, // Should not be included
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          word_count_threshold: 0,
          excluded_tags: undefined,
          remove_overlay_elements: false,
          only_text: undefined,
        },
      });
    });

    it('should handle empty arrays correctly', async () => {
      const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
      mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);

      await service.crawl({
        url: 'https://example.com',
        crawler_config: {
          excluded_tags: [],
          exclude_domains: [],
        },
      });

      expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
        urls: ['https://example.com'],
        browser_config: undefined,
        crawler_config: {
          excluded_tags: [],
          exclude_domains: [],
        },
      });
    });
  });
});

```

--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------

```markdown
# Changelog

## Version 3.0.2 (2025-09-01)

### Bug Fixes
- Fixed manage_session tool schema compatibility with Claude/Anthropic tools
  - Removed oneOf/allOf/anyOf from top-level schema
  - Simplified to plain object schema with enum constraints
  - Maintains all functionality while improving MCP client compatibility

## Version 3.0.1 (2025-08-30)

### Documentation
- Updated README.md to accurately document all new parameters from v3.0.0
- Added documentation for batch_crawl configs array parameter
- Clarified proxy object format support
- Documented all new crawler parameters from Crawl4AI 0.7.3/0.7.4

## Version 3.0.0 (2025-08-30)

### Features
- Added full support for Crawl4AI 0.7.3/0.7.4 features:
  - **'undetected' browser type** - Stealth browser option for anti-bot detection
  - **New crawler parameters**:
    - `delay_before_return_html` - Delay before returning HTML content
    - `css_selector` - Filter content by CSS selector
    - `include_links` - Include extracted links in response
    - `resolve_absolute_urls` - Convert relative URLs to absolute
  - **Extraction strategies** - Support for LLM extraction, table extraction, and markdown generation options
  - **Multi-config batch crawling** - Per-URL configurations in batch_crawl
  - **Unified proxy format** - Support both string and object proxy configurations
  - **Memory metrics display** - Show server memory usage when available

### Improvements
- Enhanced error formatting for better debugging
- Better handling of object error responses from API
- Fixed batch_crawl to include required `urls` field when using configs array

### Testing
- Added comprehensive integration tests for all new features
- Fixed TypeScript errors in test files
- All 306 unit tests passing
- All 150 integration tests passing

### Backward Compatibility
- Fully backward compatible with older Crawl4AI servers (before 0.7.4)
- All new features are optional and gracefully degrade

## Version 2.9.0 (2025-08-29)

### Breaking Changes
- Consolidated session management into single `manage_session` tool
  - Replaces `create_session`, `clear_session`, and `list_sessions` tools
  - Uses discriminated union with `action` parameter: 'create', 'clear', or 'list'
  - Reduces tool count from 15 to 13

### Removed
- Removed `create_session` tool (use `manage_session` with `action: 'create'`)
- Removed `clear_session` tool (use `manage_session` with `action: 'clear'`)
- Removed `list_sessions` tool (use `manage_session` with `action: 'list'`)

### Improvements
- Simplified API surface for better LLM interaction
- Improved type safety with discriminated unions
- Reduced code duplication in session management

### Testing
- Updated all tests to use new `manage_session` tool
- Maintained 100% test coverage

## Version 2.7.1 (2025-08-30)

### Bug Fixes
- Fixed lint/formatting issues in test files
- Cleaned up trailing whitespace

## Version 2.7.0 (2025-08-30)

### Compatibility Updates
- Verified full compatibility with Crawl4AI version 0.7.4
  - All 15 MCP tools tested and working
  - 100% integration test pass rate (148 tests)
  - Supports new v0.7.3/0.7.4 features including:
    - Undetected browser support with stealth mode
    - Multi-URL configuration system
    - Enhanced table extraction
    - Memory optimization improvements

### Bug Fixes
- Fixed unit test timeout issues in NPX and CLI tests
  - Added proper process cleanup and timeouts
  - Fixed edge case where dotenv was loading during tests
  - Ensured all spawned child processes are properly terminated

### Testing
- Comprehensive testing against Crawl4AI v0.7.4 Docker image
- All integration tests pass with LLM features enabled
- Unit test suite: 308 tests passing
- Integration test suite: 148 tests passing

## Version 2.6.12 (2025-08-05)

### Bug Fixes
- Fixed server startup issue when running via npx
  - Removed complex module detection logic that was preventing server startup
  - Server now always starts when the script is executed (as intended for MCP servers)
  - Simplified dotenv loading to only attempt in development when env vars aren't set

## Version 2.6.11 (2025-08-05)

### Bug Fixes
- Fixed environment variable handling when running via npx
  - Only loads .env file if CRAWL4AI_BASE_URL is not already set
  - Prevents issues when env vars are passed via CLI/MCP configuration
  - Ensures package works correctly with Claude Desktop and other MCP clients

## Version 2.6.10 (2025-08-05)

### Bug Fixes
- Fixed unit tests to use correct localhost URL from jest.setup.cjs
- Fixed network error handling tests to not specify request body in nock mocks
- Unit tests always use http://localhost:11235 as configured
- Integration tests get URL from .env file

### Code Quality
- Replaced all 'any' type warnings with proper type assertions in tests
- All tests passing with zero lint warnings

## Version 2.6.9 (2025-08-05)

### Testing Improvements
- Improved crawl4ai-service.ts test coverage from 76% to 84%
- Added comprehensive network error handling tests
- Added URL validation tests for all service methods
- Added tests for optional parameter handling
- Added JavaScript validation edge case tests

### Code Quality
- All tests pass with zero lint errors
- Maintained 100% function coverage for service layer

## Version 2.6.8 (2025-08-05)

### Code Cleanup
- Removed unused mock generation system
- Cleaned up package.json scripts
- Simplified development workflow

### Chores
- Verified alignment between unit tests, integration tests, and implementation
- Confirmed all tests properly mock API interactions

## Version 2.6.7 (2025-08-05)

### Bug Fixes
- Fixed integration tests to use production Crawl4AI server from environment variables
- Fixed child process environment variable loading in test utilities
- Added support for both string and object markdown responses from Crawl4AI API
- Fixed timeout issues in MHTML capture and HTML extraction tests
- Replaced unreliable test URLs (httpbin.org) with stable alternatives
- Added 30-second timeout to session creation to prevent socket hang-ups

### Testing Improvements
- Integration tests now run sequentially (maxWorkers: 1) to avoid rate limiting
- Added proper working directory configuration for child processes
- Fixed all integration tests to pass with production API
- Maintained test coverage at 92.25% with all tests passing

## Version 2.6.6 (2025-08-05)

### Testing
- Improved test coverage from 88.8% to 93.19%
  - Added comprehensive CLI entry point tests for signal handling, environment variables, and dotenv loading
  - Added network failure tests for axios timeout and HTTP error scenarios
  - Added input validation edge case tests for JavaScript code validation
  - Added parameter combination tests for optional parameters and edge cases
  - Improved branch coverage from 80.76% to 86.12%
  - Improved function coverage from 96.41% to 98.92%

## Version 2.6.5 (2025-08-05)

### Features
- Enhanced screenshot handling for better compatibility
  - Added home directory (`~`) path resolution support
  - Large screenshots (>800KB) are now saved locally without being returned inline to avoid MCP's 1MB response limit
  - Clear indication when screenshots are too large to display inline

### Bug Fixes
- Improved screenshot directory handling
  - Better parameter descriptions clarifying that only directory paths should be provided
  - Added automatic handling when file paths are mistakenly provided instead of directories
  - Warning messages when incorrect path format is detected
  - Ensures compatibility with various LLM usage patterns

## Version 2.6.4 (2025-08-04)

### Features
- Added local screenshot storage support
  - capture_screenshot: New save_to_directory parameter saves screenshots locally while returning as MCP resource
  - crawl: New screenshot_directory parameter saves screenshots when screenshot=true
  - Automatic filename generation using URL hostname and timestamp
  - Creates directories if they don't exist
  - Graceful error handling - failures don't interrupt the crawl operation
  - Added comprehensive unit tests for file saving functionality

## Version 2.6.3 (2025-08-04)

### Enhancements
- Improved tool descriptions for better LLM understanding and workflow clarity
  - Added [STATELESS], [SUPPORTS SESSIONS], [SESSION MANAGEMENT] indicators
  - Enhanced get_html description to emphasize selector discovery for automation
  - Added inspect-first workflow patterns to crawl tool description
  - Emphasized element verification in js_code parameter description
  - Added typical workflow guidance to create_session
  - Improved cross-references between related tools
  - Removed problematic one-shot form pattern that assumed element existence

### Bug Fixes
- Fixed crawl_recursive max_depth behavior
  - max_depth: 0 now correctly crawls only the initial page
  - Previously, max_depth: 0 would crawl pages at depth 0 and depth 1

## Version 2.6.2 (2025-08-04)

### Refactoring
- Consolidated error handling in server.ts with validateAndExecute helper
  - Reduced ~90 lines of duplicate code  
  - Preserved exact error message format for LLM compatibility
  - Improved maintainability while keeping behavior identical
  - Server.ts coverage improved from ~90% to 98.66%

## Version 2.6.1 (2025-08-04)

### Testing
- Improved crawl-handlers test coverage from 87% to 97%
  - Added comprehensive unit tests for all crawl handler methods
  - Test error handling for batchCrawl, smartCrawl, crawlRecursive, parseSitemap
  - Cover edge cases including XML detection, URL validation, depth limits
  - Added integration tests for real API behavior validation
  - Test all crawl parameters including word_count_threshold, image thresholds, exclude_social_media_links
  - Properly handle MCP error formatting vs direct handler throws

## Version 2.6.0 (2025-08-04)

### Testing
- Added comprehensive test coverage for error handling paths
  - Session creation with failed initial crawl
  - JavaScript execution error handling with accurate API response formats
  - Extract links manual extraction fallback when API returns empty links
  - Improved coverage from 87.23% to 89.71% lines
- Added integration tests for crawl error handling
  - Invalid URL validation
  - Non-existent domain handling
- Added unit tests for utility handlers
  - Manual link extraction from markdown
  - Malformed URL handling
  - Empty results scenarios

### Improvements
- Better error resilience in session creation when initial crawl fails
- More accurate test mocks based on real API responses

## Version 2.5.0 (2025-08-04)

### Refactoring
- Removed backward compatibility exports from index.ts
- Updated test imports to use direct module paths
- Cleaned up index.ts to focus solely on CLI entry point

### Testing
- Updated jest.setup.cjs to load .env for integration tests
- Unit tests continue using localhost:11235
- Integration tests now use values from .env file

## Version 2.4.0 (2025-08-04)

### Features
- Replaced Codecov with GitHub Actions-based coverage badge
  - Coverage badge now uses GitHub Gist for storage
  - No external dependencies for coverage tracking
  - Badge updates automatically with each CI run
  - Coverage reports published to GitHub Pages
  - Interactive HTML coverage report available at https://omgwtfwow.github.io/mcp-crawl4ai-ts/coverage/

### Bug Fixes
- Fixed smart_crawl implementation to remove unsupported 'strategy' parameter
- Fixed coverage extraction in CI to use lcov.info format
- Added proper URL encoding for Shields.io endpoint badge

### CI/CD Improvements
- Added GitHub Pages deployment for coverage reports
- Added write permissions for GitHub Actions to create gh-pages branch
- Removed Codecov integration completely

### Maintenance
- Removed .codecov.yml configuration file
- Removed CODECOV_TOKEN from repository secrets
- Updated README.md with new coverage badge

## Version 2.3.0 (2025-08-03)

### Refactoring
- Split large 2,366-line index.ts file into modular structure
  - Created handlers/ directory with operation-specific handlers
  - Created schemas/ directory for validation schemas
  - Reduced file sizes to under 1,000 lines each (most under 300)
  - Maintained backward compatibility with all exports
  - Improved code organization and maintainability

### Testing
- Updated tests to work with new modular structure
- Maintained test coverage at 87.23% (exceeds 86% requirement)
- All 165 unit tests passing

## Version 2.2.0 (2025-08-03)

### Features
- Added comprehensive test coverage infrastructure
  - Set up Jest code coverage with Istanbul
  - Added test:coverage and test:ci npm scripts
  - Configured coverage thresholds (80% for all metrics)
  - Added coverage badge to README
  - Achieved 86.51% line coverage, 82.21% statement coverage

### Testing Improvements
- Added comprehensive unit tests for all tool handlers in index.ts
  - Tests for success cases, error handling, and edge cases
  - Tests for MCP protocol request handling
  - Tests for parameter validation with Zod schemas
- Added unit tests for JavaScript validation function
- Added tests for private methods: parseSitemap and detectContentType
- Fixed integration test reliability issues:
  - Replaced example.com with httpbin.org in execute-js tests
  - Fixed test expectations for JavaScript execution results
  - Fixed MCP request handler test setup

### Bug Fixes
- Fixed parse_sitemap implementation to use axios.get directly instead of non-existent service method
- Fixed TypeScript 'any' warnings in test files (eliminated 90+ warnings)
- Fixed linting errors and formatting issues across the test suite
- Fixed test URL in batch-crawl test (httpbingo.org → httpbin.org)

### CI/CD Improvements
- Updated GitHub Actions workflow to include coverage reporting
- Added Node.js 22.x to the test matrix
- Fixed all failing CI tests

## Version 2.1.2 (2025-08-03)

### Documentation
- Updated Node.js requirement from 16+ to 18+ to reflect actual testing and support
  - Node.js 16 reached End-of-Life in September 2023
  - CI only tests on Node.js 18.x and 20.x
  - Added `engines` field to package.json to enforce Node.js 18+ requirement

## Version 2.1.1 (2025-08-03)

### Bug Fixes
- Fixed GitHub homepage README display issue by renaming .github/README.md to CI.md
  - GitHub was showing the CI documentation instead of the main project README

## Version 2.1.0 (2025-08-03)

### Bug Fixes
- Fixed `smart_crawl` bug where markdown object was incorrectly printed as `[object Object]`
  - Now correctly accesses `result.markdown.raw_markdown` for content display
- Fixed integration test timeout issues:
  - Replaced example.com with httpbin.org/html in tests to avoid "domcontentloaded" timeout issues
  - Fixed httpbin.org URLs by adding proper path suffixes (e.g., /links/5/0)
  - Limited Jest parallelization for integration tests to prevent server overload
- Fixed parameter mapping in `get_markdown` tool - now correctly maps schema properties (`filter`, `query`, `cache`) to API parameters (`f`, `q`, `c`)
- Fixed `smart_crawl` schema to use `follow_links` parameter instead of `remove_images`
- Fixed `extract_links` schema mismatch - corrected schema to use `categorize` parameter as defined in tool
- Fixed `extract_links` implementation to properly handle link objects returned by API
- Fixed `crawl_recursive` schema mismatch - corrected schema to use `include_pattern` and `exclude_pattern` instead of `filter_pattern` and `bypass_cache`
- Fixed `crawl_recursive` implementation to use `/crawl` endpoint instead of `/md` for proper link extraction
- Fixed `crawl_recursive` type issues and improved link handling for recursive crawling
- Fixed `parse_sitemap` implementation to fetch sitemaps directly instead of through Crawl4AI server API
- Fixed `create_session` schema to make `session_id` optional as documented
- Enhanced `create_session` response to include all session parameters for programmatic access
- Implemented proper handling for non-functional server parameters:
  - `batch_crawl`: `remove_images` now uses `exclude_tags` in crawler_config to actually remove images
  - `smart_crawl`: `follow_links` now crawls URLs found in sitemaps/RSS feeds (max 10 URLs)
- Fixed `crawl` and `generate_pdf` tools PDF response to use proper MCP SDK embedded resource format with blob field

### Improvements
- Added comprehensive integration tests for `batch_crawl` tool (7 tests)
- Added comprehensive integration tests for `smart_crawl` tool (8 tests)
- Fixed all ESLint formatting issues across the codebase
- Enhanced error handling for empty URL arrays in batch_crawl
- Improved test reliability by replacing problematic test URLs
- Updated tool descriptions to accurately reflect actual behavior
- Added proper TypeScript types for getMarkdown function
- Enhanced test coverage for batch_crawl parameter handling
- Added comprehensive unit and integration tests for `extract_links` tool
- Improved JSON endpoint detection in `extract_links` tool
- Better error handling for `extract_links` with graceful error messages
- Added comprehensive integration tests for `crawl_recursive` tool
- Improved `crawl_recursive` output format to clearly show depth levels and internal link counts
- Enhanced error handling in `crawl_recursive` to continue crawling even if individual pages fail
- Added comprehensive integration tests for `parse_sitemap` tool with various test cases
- Added comprehensive integration tests for session management tools (`create_session`, `clear_session`, `list_sessions`)
- Enhanced integration tests for `extract_with_llm` tool to handle non-deterministic LLM responses
- Installed nock library for future HTTP mocking in unit tests
- Fixed TypeScript lint warnings by replacing `any` types with proper types:
  - Changed error handling to use proper type assertions
  - Updated `unknown[]` for JavaScript execution results
  - Used `Record<string, unknown>` for generic objects
  - Created `LinkItem` interface for better type safety
  - Fixed all production code `any` types
  - Removed unused legacy `CrawlResult` interface
- Consolidated unit tests to use nock for HTTP mocking:
  - Removed redundant Jest mock test file
  - Removed unused mocks directory
  - Renamed test file for clarity
  - Improved unit test performance from 92s to ~1s by removing timeout tests
  - Cleaned up test organization and removed test README
- Added GitHub Actions CI workflow:
  - Automatic testing on push to main and pull requests
  - Tests run on Node.js 18.x and 20.x
  - Includes linting, formatting checks, and build verification
- Added mock helper scripts:
  - `npm run generate-mocks`: Generate nock mock code from real API
  - `npm run view-mocks`: View and save API responses for reference
  - Both scripts help maintain accurate test mocks

## Version 2.0.1 (2025-08-02)
Update README

## Version 2.0.0 (2025-08-02)

### Breaking Changes
- Renamed `crawl_with_config` tool to `crawl`

### New Features
- Added comprehensive response types for all endpoints (PDF, screenshot, HTML, markdown)
- Enhanced parameter validation with clearer error messages
- Improved documentation for JavaScript execution patterns
- Added selector strategy guidance for form interaction
- Better distinction between `wait_for` and `wait_until` usage

### Bug Fixes
- Fixed server 500 errors by always including `crawler_config` in requests
- Updated media and links types to match actual server responses
- Corrected validation for `js_only` parameter usage

### Documentation
- Added troubleshooting section with common issues and solutions
- Included practical examples for form filling and multi-step navigation
- Enhanced tool descriptions with clear warnings and recommendations
- Added selector strategy guide for working with dynamic content

### Technical Improvements
- Updated all TypeScript types based on actual server responses
- Improved error handling and user-friendly messages
- Enhanced Zod validation schemas with helpful refinements
- Added comprehensive integration tests for new features

### Known Issues
- `js_only: true` causes server serialization errors - use `screenshot: true` as workaround
- Using `wait_for` with elements that already exist can cause timeouts - use `wait_until` instead

## Version 1.0.2
- Initial stable release with full MCP implementation
- Support for all Crawl4AI endpoints
- Basic session management
- Integration with MCP clients
```

--------------------------------------------------------------------------------
/src/handlers/crawl-handlers.ts:
--------------------------------------------------------------------------------

```typescript
import { BaseHandler } from './base-handler.js';
import {
  BatchCrawlOptions,
  CrawlResultItem,
  AdvancedCrawlConfig,
  CrawlEndpointResponse,
  ExtractionStrategy,
  TableExtractionStrategy,
  MarkdownGeneratorOptions,
} from '../types.js';
import * as fs from 'fs/promises';
import * as path from 'path';
import * as os from 'os';

export class CrawlHandlers extends BaseHandler {
  async batchCrawl(options: BatchCrawlOptions) {
    try {
      let response;

      // Check if we have per-URL configs (new in 0.7.3/0.7.4)
      if (options.configs && options.configs.length > 0) {
        // Use the new configs array format
        // Extract URLs from configs for the urls field
        const urls = options.configs.map((config) => config.url);
        const requestBody = {
          urls: urls,
          configs: options.configs,
          max_concurrent: options.max_concurrent,
        };
        response = await this.axiosClient.post('/crawl', requestBody);
      } else {
        // Use the legacy format with single crawler_config
        // Build crawler config if needed
        const crawler_config: Record<string, unknown> = {};

        // Handle remove_images by using exclude_tags
        if (options.remove_images) {
          crawler_config.exclude_tags = ['img', 'picture', 'svg'];
        }

        if (options.bypass_cache) {
          crawler_config.cache_mode = 'BYPASS';
        }

        response = await this.axiosClient.post('/crawl', {
          urls: options.urls,
          max_concurrent: options.max_concurrent,
          crawler_config: Object.keys(crawler_config).length > 0 ? crawler_config : undefined,
        });
      }

      const results = response.data.results || [];

      // Add memory metrics if available
      let metricsText = '';
      const responseData = response.data as CrawlEndpointResponse;
      if (responseData.server_memory_delta_mb !== undefined || responseData.server_peak_memory_mb !== undefined) {
        const memoryInfo = [];
        if (responseData.server_processing_time_s !== undefined) {
          memoryInfo.push(`Processing time: ${responseData.server_processing_time_s.toFixed(2)}s`);
        }
        if (responseData.server_memory_delta_mb !== undefined) {
          memoryInfo.push(`Memory delta: ${responseData.server_memory_delta_mb.toFixed(1)}MB`);
        }
        if (responseData.server_peak_memory_mb !== undefined) {
          memoryInfo.push(`Peak memory: ${responseData.server_peak_memory_mb.toFixed(1)}MB`);
        }
        if (memoryInfo.length > 0) {
          metricsText = `\n\nServer metrics: ${memoryInfo.join(', ')}`;
        }
      }

      return {
        content: [
          {
            type: 'text',
            text: `Batch crawl completed. Processed ${results.length} URLs:\n\n${results
              .map(
                (r: CrawlResultItem, i: number) => `${i + 1}. ${options.urls[i]}: ${r.success ? 'Success' : 'Failed'}`,
              )
              .join('\n')}${metricsText}`,
          },
        ],
      };
    } catch (error) {
      throw this.formatError(error, 'batch crawl');
    }
  }

  async smartCrawl(options: { url: string; max_depth?: number; follow_links?: boolean; bypass_cache?: boolean }) {
    try {
      // First, try to detect the content type from URL or HEAD request
      let contentType = '';
      try {
        const headResponse = await this.axiosClient.head(options.url);
        contentType = headResponse.headers['content-type'] || '';
      } catch {
        // If HEAD request fails, continue anyway - we'll detect from the crawl response
        console.debug('HEAD request failed, will detect content type from response');
      }

      let detectedType = 'html';
      if (options.url.includes('sitemap') || options.url.endsWith('.xml')) {
        detectedType = 'sitemap';
      } else if (options.url.includes('rss') || options.url.includes('feed')) {
        detectedType = 'rss';
      } else if (contentType.includes('text/plain') || options.url.endsWith('.txt')) {
        detectedType = 'text';
      } else if (contentType.includes('application/xml') || contentType.includes('text/xml')) {
        detectedType = 'xml';
      } else if (contentType.includes('application/json')) {
        detectedType = 'json';
      }

      // Crawl without the unsupported 'strategy' parameter
      const response = await this.axiosClient.post('/crawl', {
        urls: [options.url],
        crawler_config: {
          cache_mode: options.bypass_cache ? 'BYPASS' : 'ENABLED',
        },
        browser_config: {
          headless: true,
          browser_type: 'chromium',
        },
      });

      const results = response.data.results || [];
      const result = results[0] || {};

      // Handle follow_links for sitemaps and RSS feeds
      if (options.follow_links && (detectedType === 'sitemap' || detectedType === 'rss' || detectedType === 'xml')) {
        // Extract URLs from the content
        const urlPattern = /<loc>(.*?)<\/loc>|<link[^>]*>(.*?)<\/link>|href=["']([^"']+)["']/gi;
        const content = result.markdown || result.html || '';
        const foundUrls: string[] = [];
        let match;

        while ((match = urlPattern.exec(content)) !== null) {
          const url = match[1] || match[2] || match[3];
          if (url && url.startsWith('http')) {
            foundUrls.push(url);
          }
        }

        if (foundUrls.length > 0) {
          // Limit to first 10 URLs to avoid overwhelming the system
          const urlsToFollow = foundUrls.slice(0, Math.min(10, options.max_depth || 10));

          // Crawl the found URLs
          await this.axiosClient.post('/crawl', {
            urls: urlsToFollow,
            max_concurrent: 3,
            bypass_cache: options.bypass_cache,
          });

          return {
            content: [
              {
                type: 'text',
                text: `Smart crawl detected content type: ${detectedType}\n\nMain content:\n${result.markdown?.raw_markdown || result.html || 'No content extracted'}\n\n---\nFollowed ${urlsToFollow.length} links:\n${urlsToFollow.map((url, i) => `${i + 1}. ${url}`).join('\n')}`,
              },
              ...(result.metadata
                ? [
                    {
                      type: 'text',
                      text: `\n\n---\nMetadata:\n${JSON.stringify(result.metadata, null, 2)}`,
                    },
                  ]
                : []),
            ],
          };
        }
      }

      return {
        content: [
          {
            type: 'text',
            text: `Smart crawl detected content type: ${detectedType}\n\n${result.markdown?.raw_markdown || result.html || 'No content extracted'}`,
          },
          ...(result.metadata
            ? [
                {
                  type: 'text',
                  text: `\n\n---\nMetadata:\n${JSON.stringify(result.metadata, null, 2)}`,
                },
              ]
            : []),
        ],
      };
    } catch (error) {
      throw this.formatError(error, 'smart crawl');
    }
  }

  async crawlRecursive(options: {
    url: string;
    max_depth?: number;
    max_pages?: number;
    include_pattern?: string;
    exclude_pattern?: string;
  }) {
    try {
      const startUrl = new URL(options.url);
      const visited = new Set<string>();
      const toVisit: Array<{ url: string; depth: number }> = [{ url: options.url, depth: 0 }];
      const results: Array<{ url: string; content: string; internal_links_found: number; depth: number }> = [];
      let maxDepthReached = 0;

      const includeRegex = options.include_pattern ? new RegExp(options.include_pattern) : null;
      const excludeRegex = options.exclude_pattern ? new RegExp(options.exclude_pattern) : null;

      const maxDepth = options.max_depth !== undefined ? options.max_depth : 3;
      const maxPages = options.max_pages || 50;

      while (toVisit.length > 0 && results.length < maxPages) {
        const current = toVisit.shift();
        if (!current || visited.has(current.url) || current.depth > maxDepth) {
          continue;
        }

        visited.add(current.url);

        try {
          // Check URL patterns
          if (excludeRegex && excludeRegex.test(current.url)) continue;
          if (includeRegex && !includeRegex.test(current.url)) continue;

          // Crawl the page using the crawl endpoint to get links
          const response = await this.axiosClient.post('/crawl', {
            urls: [current.url],
            crawler_config: {
              cache_mode: 'BYPASS',
            },
          });

          const crawlResults = response.data.results || [response.data];
          const result: CrawlResultItem = crawlResults[0];

          if (result && result.success) {
            const markdownContent = result.markdown?.fit_markdown || result.markdown?.raw_markdown || '';
            const internalLinksCount = result.links?.internal?.length || 0;
            maxDepthReached = Math.max(maxDepthReached, current.depth);
            results.push({
              url: current.url,
              content: markdownContent,
              internal_links_found: internalLinksCount,
              depth: current.depth,
            });

            // Add internal links to crawl queue
            if (current.depth < maxDepth && result.links?.internal) {
              for (const linkObj of result.links.internal) {
                const linkUrl = linkObj.href || linkObj;
                try {
                  const absoluteUrl = new URL(linkUrl, current.url).toString();
                  if (!visited.has(absoluteUrl) && new URL(absoluteUrl).hostname === startUrl.hostname) {
                    toVisit.push({ url: absoluteUrl, depth: current.depth + 1 });
                  }
                } catch (e) {
                  // Skip invalid URLs
                  console.debug('Invalid URL:', e);
                }
              }
            }
          }
        } catch (error) {
          // Log but continue crawling other pages
          console.error(`Failed to crawl ${current.url}:`, error instanceof Error ? error.message : error);
        }
      }

      // Prepare the output text
      let outputText = `Recursive crawl completed:\n\nPages crawled: ${results.length}\nStarting URL: ${options.url}\n`;

      if (results.length > 0) {
        outputText += `Max depth reached: ${maxDepthReached} (limit: ${maxDepth})\n\nNote: Only internal links (same domain) are followed during recursive crawling.\n\nPages found:\n${results.map((r) => `- [Depth ${r.depth}] ${r.url}\n  Content: ${r.content.length} chars\n  Internal links found: ${r.internal_links_found}`).join('\n')}`;
      } else {
        outputText += `\nNo pages could be crawled. This might be due to:\n- The starting URL returned an error\n- No internal links were found\n- All discovered links were filtered out by include/exclude patterns`;
      }

      return {
        content: [
          {
            type: 'text',
            text: outputText,
          },
        ],
      };
    } catch (error) {
      throw this.formatError(error, 'crawl recursively');
    }
  }

  async parseSitemap(options: { url: string; filter_pattern?: string }) {
    try {
      // Fetch the sitemap directly (not through Crawl4AI server)
      const axios = (await import('axios')).default;
      const response = await axios.get(options.url, {
        timeout: 30000,
        headers: {
          'User-Agent': 'Mozilla/5.0 (compatible; MCP-Crawl4AI/1.0)',
        },
      });
      const sitemapContent = response.data;

      // Parse XML content - simple regex approach for basic sitemaps
      const urlMatches = sitemapContent.match(/<loc>(.*?)<\/loc>/g) || [];
      const urls = urlMatches.map((match: string) => match.replace(/<\/?loc>/g, ''));

      // Apply filter if provided
      let filteredUrls = urls;
      if (options.filter_pattern) {
        const filterRegex = new RegExp(options.filter_pattern);
        filteredUrls = urls.filter((url: string) => filterRegex.test(url));
      }

      return {
        content: [
          {
            type: 'text',
            text: `Sitemap parsed successfully:\n\nTotal URLs found: ${urls.length}\nFiltered URLs: ${filteredUrls.length}\n\nURLs:\n${filteredUrls.slice(0, 100).join('\n')}${filteredUrls.length > 100 ? '\n... and ' + (filteredUrls.length - 100) + ' more' : ''}`,
          },
        ],
      };
    } catch (error) {
      throw this.formatError(error, 'parse sitemap');
    }
  }

  async crawl(options: Record<string, unknown>) {
    try {
      // Ensure options is an object
      if (!options || typeof options !== 'object') {
        throw new Error('crawl requires options object with at least a url parameter');
      }

      // Build browser_config
      const browser_config: Record<string, unknown> = {
        headless: true, // Always true as noted
      };

      if (options.browser_type) browser_config.browser_type = options.browser_type;
      if (options.viewport_width) browser_config.viewport_width = options.viewport_width;
      if (options.viewport_height) browser_config.viewport_height = options.viewport_height;
      if (options.user_agent) browser_config.user_agent = options.user_agent;
      if (options.headers) browser_config.headers = options.headers;
      if (options.cookies) browser_config.cookies = options.cookies;

      // Handle proxy configuration - support both unified and legacy formats
      if (options.proxy) {
        // New unified format (0.7.3/0.7.4)
        browser_config.proxy = options.proxy;
      } else if (options.proxy_server) {
        // Legacy format for backward compatibility
        browser_config.proxy_config = {
          server: options.proxy_server,
          username: options.proxy_username,
          password: options.proxy_password,
        };
      }

      // Build crawler_config
      const crawler_config: Record<string, unknown> = {};

      // Content filtering
      if (options.word_count_threshold !== undefined)
        crawler_config.word_count_threshold = options.word_count_threshold;
      if (options.excluded_tags) crawler_config.excluded_tags = options.excluded_tags;
      if (options.remove_overlay_elements) crawler_config.remove_overlay_elements = options.remove_overlay_elements;

      // JavaScript execution
      if (options.js_code !== undefined && options.js_code !== null) {
        // If js_code is an array, join it with newlines for the server
        crawler_config.js_code = Array.isArray(options.js_code) ? options.js_code.join('\n') : options.js_code;
      } else if (options.js_code === null) {
        // If js_code is explicitly null, throw a helpful error
        throw new Error('js_code parameter is null. Please provide JavaScript code as a string or array of strings.');
      }
      if (options.wait_for) crawler_config.wait_for = options.wait_for;
      if (options.wait_for_timeout) crawler_config.wait_for_timeout = options.wait_for_timeout;

      // Dynamic content
      if (options.delay_before_scroll) crawler_config.delay_before_scroll = options.delay_before_scroll;
      if (options.scroll_delay) crawler_config.scroll_delay = options.scroll_delay;

      // Content processing
      if (options.process_iframes) crawler_config.process_iframes = options.process_iframes;
      if (options.exclude_external_links) crawler_config.exclude_external_links = options.exclude_external_links;

      // Export options
      if (options.screenshot) crawler_config.screenshot = options.screenshot;
      if (options.pdf) crawler_config.pdf = options.pdf;

      // Session and cache
      if (options.session_id) {
        crawler_config.session_id = options.session_id;
        // Update session last_used time
        const session = this.sessions.get(String(options.session_id));
        if (session) {
          session.last_used = new Date();
        }
      }
      if (options.cache_mode) crawler_config.cache_mode = String(options.cache_mode).toLowerCase();

      // Performance
      if (options.timeout) crawler_config.timeout = options.timeout;
      if (options.verbose) crawler_config.verbose = options.verbose;

      // Additional crawler parameters
      if (options.wait_until) crawler_config.wait_until = options.wait_until;
      if (options.page_timeout) crawler_config.page_timeout = options.page_timeout;
      if (options.wait_for_images) crawler_config.wait_for_images = options.wait_for_images;
      if (options.ignore_body_visibility) crawler_config.ignore_body_visibility = options.ignore_body_visibility;
      if (options.scan_full_page) crawler_config.scan_full_page = options.scan_full_page;
      if (options.remove_forms) crawler_config.remove_forms = options.remove_forms;
      if (options.keep_data_attributes) crawler_config.keep_data_attributes = options.keep_data_attributes;
      if (options.excluded_selector) crawler_config.excluded_selector = options.excluded_selector;
      if (options.only_text) crawler_config.only_text = options.only_text;

      // Media handling
      if (options.image_description_min_word_threshold !== undefined)
        crawler_config.image_description_min_word_threshold = options.image_description_min_word_threshold;
      if (options.image_score_threshold !== undefined)
        crawler_config.image_score_threshold = options.image_score_threshold;
      if (options.exclude_external_images) crawler_config.exclude_external_images = options.exclude_external_images;
      if (options.screenshot_wait_for !== undefined) crawler_config.screenshot_wait_for = options.screenshot_wait_for;

      // Link filtering
      if (options.exclude_social_media_links)
        crawler_config.exclude_social_media_links = options.exclude_social_media_links;
      if (options.exclude_domains) crawler_config.exclude_domains = options.exclude_domains;

      // Page interaction
      if (options.js_only) crawler_config.js_only = options.js_only;
      if (options.simulate_user) crawler_config.simulate_user = options.simulate_user;
      if (options.override_navigator) crawler_config.override_navigator = options.override_navigator;
      if (options.magic) crawler_config.magic = options.magic;

      // Virtual scroll
      if (options.virtual_scroll_config) crawler_config.virtual_scroll_config = options.virtual_scroll_config;

      // Cache control
      if (options.cache_mode) crawler_config.cache_mode = options.cache_mode;

      // Other
      if (options.log_console) crawler_config.log_console = options.log_console;
      if (options.capture_mhtml) crawler_config.capture_mhtml = options.capture_mhtml;

      // New parameters from 0.7.3/0.7.4
      if (options.delay_before_return_html) crawler_config.delay_before_return_html = options.delay_before_return_html;
      if (options.css_selector) crawler_config.css_selector = options.css_selector;
      if (options.include_links !== undefined) crawler_config.include_links = options.include_links;
      if (options.resolve_absolute_urls !== undefined)
        crawler_config.resolve_absolute_urls = options.resolve_absolute_urls;

      // Call service with proper configuration
      const crawlConfig: AdvancedCrawlConfig = {
        url: options.url ? String(options.url) : undefined,
        crawler_config,
      };

      // Add extraction strategy passthrough objects if provided
      if (options.extraction_strategy)
        crawlConfig.extraction_strategy = options.extraction_strategy as ExtractionStrategy;
      if (options.table_extraction_strategy)
        crawlConfig.table_extraction_strategy = options.table_extraction_strategy as TableExtractionStrategy;
      if (options.markdown_generator_options)
        crawlConfig.markdown_generator_options = options.markdown_generator_options as MarkdownGeneratorOptions;

      // Only include browser_config if we're not using a session
      if (!options.session_id) {
        crawlConfig.browser_config = browser_config;
      }

      const response: CrawlEndpointResponse = await this.service.crawl(crawlConfig);

      // Validate response structure
      if (!response || !response.results || response.results.length === 0) {
        throw new Error('Invalid response from server: no results received');
      }

      const result: CrawlResultItem = response.results[0];

      // Build response content
      const content = [];

      // Main content - use markdown.raw_markdown as primary content
      let mainContent = 'No content extracted';

      if (result.extracted_content) {
        // Handle extraction results which might be objects or strings
        if (typeof result.extracted_content === 'string') {
          mainContent = result.extracted_content;
        } else if (typeof result.extracted_content === 'object') {
          mainContent = JSON.stringify(result.extracted_content, null, 2);
        }
      } else if (result.markdown?.raw_markdown) {
        mainContent = result.markdown.raw_markdown;
      } else if (result.html) {
        mainContent = result.html;
      } else if (result.fit_html) {
        mainContent = result.fit_html;
      }

      content.push({
        type: 'text',
        text: mainContent,
      });

      // Screenshot if available
      if (result.screenshot) {
        // Save to local directory if requested
        let savedFilePath: string | undefined;
        if (options.screenshot_directory && typeof options.screenshot_directory === 'string') {
          try {
            // Resolve home directory path
            let screenshotDir = options.screenshot_directory;
            if (screenshotDir.startsWith('~')) {
              const homedir = os.homedir();
              screenshotDir = path.join(homedir, screenshotDir.slice(1));
            }

            // Check if user provided a file path instead of directory
            if (screenshotDir.endsWith('.png') || screenshotDir.endsWith('.jpg')) {
              console.warn(
                `Warning: screenshot_directory should be a directory path, not a file path. Using parent directory.`,
              );
              screenshotDir = path.dirname(screenshotDir);
            }

            // Ensure directory exists
            await fs.mkdir(screenshotDir, { recursive: true });

            // Generate filename from URL and timestamp
            const url = new URL(String(options.url));
            const hostname = url.hostname.replace(/[^a-z0-9]/gi, '-');
            const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
            const filename = `${hostname}-${timestamp}.png`;

            savedFilePath = path.join(screenshotDir, filename);

            // Convert base64 to buffer and save
            const buffer = Buffer.from(result.screenshot, 'base64');
            await fs.writeFile(savedFilePath, buffer);
          } catch (saveError) {
            // Log error but don't fail the operation
            console.error('Failed to save screenshot locally:', saveError);
          }
        }

        // If saved locally and screenshot is large (>800KB), don't return the base64 data
        const screenshotSize = Buffer.from(result.screenshot, 'base64').length;
        const shouldReturnImage = !savedFilePath || screenshotSize < 800 * 1024; // 800KB threshold

        if (shouldReturnImage) {
          content.push({
            type: 'image',
            data: result.screenshot,
            mimeType: 'image/png',
          });
        }

        if (savedFilePath) {
          const sizeInfo = !shouldReturnImage
            ? ` (${Math.round(screenshotSize / 1024)}KB - too large to display inline)`
            : '';
          content.push({
            type: 'text',
            text: `\n---\nScreenshot saved to: ${savedFilePath}${sizeInfo}`,
          });
        }
      }

      // PDF if available
      if (result.pdf) {
        content.push({
          type: 'resource',
          resource: {
            uri: `data:application/pdf;name=${encodeURIComponent(new URL(String(options.url)).hostname)}.pdf;base64,${result.pdf}`,
            mimeType: 'application/pdf',
            blob: result.pdf,
          },
        });
      }

      // Metadata
      if (result.metadata) {
        content.push({
          type: 'text',
          text: `\n---\nMetadata: ${JSON.stringify(result.metadata, null, 2)}`,
        });
      }

      // Links
      if (result.links && (result.links.internal.length > 0 || result.links.external.length > 0)) {
        content.push({
          type: 'text',
          text: `\n---\nLinks: Internal: ${result.links.internal.length}, External: ${result.links.external.length}`,
        });
      }

      // JS execution results if available
      if (result.js_execution_result && result.js_execution_result.results.length > 0) {
        const jsResults = result.js_execution_result.results
          .map((res: unknown, idx: number) => {
            return `Result ${idx + 1}: ${JSON.stringify(res, null, 2)}`;
          })
          .join('\n');
        content.push({
          type: 'text',
          text: `\n---\nJavaScript Execution Results:\n${jsResults}`,
        });
      }

      // Add memory metrics if available
      if (response.server_memory_delta_mb !== undefined || response.server_peak_memory_mb !== undefined) {
        const memoryInfo = [];
        if (response.server_processing_time_s !== undefined) {
          memoryInfo.push(`Processing time: ${response.server_processing_time_s.toFixed(2)}s`);
        }
        if (response.server_memory_delta_mb !== undefined) {
          memoryInfo.push(`Memory delta: ${response.server_memory_delta_mb.toFixed(1)}MB`);
        }
        if (response.server_peak_memory_mb !== undefined) {
          memoryInfo.push(`Peak memory: ${response.server_peak_memory_mb.toFixed(1)}MB`);
        }
        if (memoryInfo.length > 0) {
          content.push({
            type: 'text',
            text: `\n---\nServer metrics: ${memoryInfo.join(', ')}`,
          });
        }
      }

      return { content };
    } catch (error) {
      throw this.formatError(error, 'crawl');
    }
  }
}

```

--------------------------------------------------------------------------------
/src/__tests__/crawl4ai-service.test.ts:
--------------------------------------------------------------------------------

```typescript
import nock from 'nock';
import { Crawl4AIService } from '../crawl4ai-service.js';
import type {
  MarkdownEndpointResponse,
  ScreenshotEndpointResponse,
  PDFEndpointResponse,
  HTMLEndpointResponse,
  CrawlEndpointResponse,
} from '../types.js';

/**
 * Unit tests for Crawl4AIService using nock for HTTP mocking
 *
 * Mock Maintenance:
 * - These mocks are maintained manually based on the actual API responses
 * - When the API changes, update the mock responses to match
 * - Integration tests validate against the real API
 */

describe('Crawl4AIService', () => {
  let service: Crawl4AIService;
  // Unit tests always use localhost as configured in jest.setup.cjs
  const baseURL = 'http://localhost:11235';
  const apiKey = 'test-api-key';

  beforeEach(() => {
    service = new Crawl4AIService(baseURL, apiKey);
    // Clean all nock interceptors before each test
    nock.cleanAll();
  });

  afterEach(() => {
    // Clean up any remaining interceptors
    nock.cleanAll();
  });

  describe('getMarkdown', () => {
    it('should fetch markdown with default parameters', async () => {
      const mockResponse: MarkdownEndpointResponse = {
        url: 'https://example.com',
        filter: 'fit',
        query: null,
        cache: 'false',
        markdown: '# Example Page\n\nThis is example content.',
        success: true,
      };

      // Mock the HTTP request
      nock(baseURL)
        .post('/md', {
          url: 'https://example.com',
          f: 'fit',
          q: undefined,
          c: undefined,
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.getMarkdown({
        url: 'https://example.com',
        f: 'fit',
      });

      expect(result).toEqual(mockResponse);
    });

    it('should fetch markdown with all parameters', async () => {
      const mockResponse: MarkdownEndpointResponse = {
        url: 'https://example.com',
        filter: 'bm25',
        query: 'test query',
        cache: 'true',
        markdown: '# Filtered Content\n\nMatching content for test query.',
        success: true,
      };

      nock(baseURL)
        .post('/md', {
          url: 'https://example.com',
          f: 'bm25',
          q: 'test query',
          c: 'true',
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.getMarkdown({
        url: 'https://example.com',
        f: 'bm25',
        q: 'test query',
        c: 'true',
      });

      expect(result).toEqual(mockResponse);
    });

    it('should handle API errors', async () => {
      nock(baseURL).post('/md').matchHeader('x-api-key', apiKey).reply(500, { detail: 'Internal server error' });

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow(
        'Request failed with status 500: Internal server error',
      );
    });

    it('should validate URL format', async () => {
      await expect(service.getMarkdown({ url: 'invalid-url' })).rejects.toThrow('Invalid URL format');
    });

    it('should handle network errors', async () => {
      nock(baseURL).post('/md').matchHeader('x-api-key', apiKey).replyWithError('Network error');

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('Network error');
    });
  });

  describe('captureScreenshot', () => {
    it('should capture screenshot successfully', async () => {
      const mockResponse: ScreenshotEndpointResponse = {
        success: true,
        screenshot: 'base64-encoded-screenshot-data',
      };

      nock(baseURL)
        .post('/screenshot', {
          url: 'https://example.com',
          screenshot_wait_for: 2,
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.captureScreenshot({
        url: 'https://example.com',
        screenshot_wait_for: 2,
      });

      expect(result).toEqual(mockResponse);
    });

    it('should validate URL format', async () => {
      await expect(service.captureScreenshot({ url: 'not-a-url' })).rejects.toThrow('Invalid URL format');
    });
  });

  describe('generatePDF', () => {
    it('should generate PDF successfully', async () => {
      const mockResponse: PDFEndpointResponse = {
        success: true,
        pdf: 'base64-encoded-pdf-data',
      };

      nock(baseURL)
        .post('/pdf', {
          url: 'https://example.com',
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.generatePDF({
        url: 'https://example.com',
      });

      expect(result).toEqual(mockResponse);
    });

    it('should validate URL format', async () => {
      await expect(service.generatePDF({ url: 'not a url' })).rejects.toThrow('Invalid URL format');
    });
  });

  describe('getHTML', () => {
    it('should fetch HTML successfully', async () => {
      const mockResponse: HTMLEndpointResponse = {
        html: '<html><body><h1>Example</h1></body></html>',
        url: 'https://example.com',
        success: true,
      };

      nock(baseURL)
        .post('/html', {
          url: 'https://example.com',
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.getHTML({
        url: 'https://example.com',
      });

      expect(result).toEqual(mockResponse);
    });

    it('should validate URL format', async () => {
      await expect(service.getHTML({ url: 'just text' })).rejects.toThrow('Invalid URL format');
    });
  });

  describe('crawl', () => {
    it('should crawl with basic configuration', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://example.com',
            html: '<html>...</html>',
            cleaned_html: '<html>...</html>',
            fit_html: '<html>...</html>',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: '# Example',
              markdown_with_citations: '# Example [1]',
              references_markdown: '[1]: https://example.com',
              fit_markdown: '# Example',
              fit_html: '<h1>Example</h1>',
            },
            tables: [],
            extracted_content: null,
            screenshot: null,
            pdf: null,
            mhtml: null,
            js_execution_result: null,
            downloaded_files: null,
            network_requests: null,
            console_messages: null,
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 1.5,
        server_memory_delta_mb: 10,
        server_peak_memory_mb: 100,
      };

      nock(baseURL)
        .post('/crawl', {
          urls: ['https://example.com'],
          browser_config: { headless: true },
          crawler_config: { cache_mode: 'ENABLED' },
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.crawl({
        urls: ['https://example.com'],
        browser_config: { headless: true },
        crawler_config: { cache_mode: 'ENABLED' },
      });

      expect(result).toEqual(mockResponse);
    });

    it('should reject invalid JavaScript in crawler_config', async () => {
      await expect(
        service.crawl({
          url: 'https://example.com',
          crawler_config: {
            js_code: 'console.log(&quot;test&quot;)',
          },
        }),
      ).rejects.toThrow('Invalid JavaScript: Contains HTML entities');
    });

    it('should handle js_code as array with invalid script', async () => {
      await expect(
        service.crawl({
          url: 'https://example.com',
          crawler_config: {
            js_code: ['valid code', '<script>alert("test")</script>'],
          },
        }),
      ).rejects.toThrow('Invalid JavaScript: Contains HTML entities');
    });

    // Timeout testing is better suited for integration tests
    // where we can test against real API behavior
  });

  describe('batchCrawl', () => {
    it('should batch crawl multiple URLs', async () => {
      const urls = ['https://example1.com', 'https://example2.com'];
      const mockResponse = {
        success: true,
        results: urls.map((url) => ({
          url,
          success: true,
          markdown: { raw_markdown: `Content from ${url}` },
        })),
      };

      nock(baseURL)
        .post('/crawl', (body) => {
          return body.urls?.length === 2 && body.urls[0] === urls[0] && body.urls[1] === urls[1];
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.batchCrawl({ urls });

      expect(result.success).toBe(true);
      expect(result.results).toHaveLength(2);
    });

    it('should validate empty URLs array', async () => {
      await expect(service.batchCrawl({ urls: [] })).rejects.toThrow('URLs array cannot be empty');
    });
  });

  describe('executeJS', () => {
    it('should execute JavaScript successfully', async () => {
      const mockResponse = {
        success: true,
        js_execution_result: {
          success: true,
          results: ['Example Title'],
        },
        markdown: '# Example Page',
      };

      nock(baseURL)
        .post('/execute_js', {
          url: 'https://example.com',
          scripts: ['return document.title'],
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.executeJS({
        url: 'https://example.com',
        scripts: 'return document.title',
      });

      expect(result).toEqual(mockResponse);
    });

    it('should handle array of scripts', async () => {
      const scripts = ['return document.title', 'return window.location.href'];
      const mockResponse = {
        success: true,
        js_execution_result: {
          success: true,
          results: ['Example Title', 'https://example.com'],
        },
      };

      nock(baseURL)
        .post('/execute_js', {
          url: 'https://example.com',
          scripts: scripts,
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.executeJS({
        url: 'https://example.com',
        scripts,
      });

      expect(result).toEqual(mockResponse);
    });

    it('should reject scripts with HTML entities', async () => {
      await expect(
        service.executeJS({
          url: 'https://httpbin.org/html',
          scripts: 'console.log(&quot;test&quot;)',
        }),
      ).rejects.toThrow('Invalid JavaScript: Contains HTML entities');
    });

    it('should reject scripts with HTML tags', async () => {
      await expect(
        service.executeJS({
          url: 'https://httpbin.org/html',
          scripts: '<script>alert("test")</script>',
        }),
      ).rejects.toThrow('Invalid JavaScript: Contains HTML entities');
    });

    it('should reject scripts with literal \\n', async () => {
      await expect(
        service.executeJS({
          url: 'https://httpbin.org/html',
          scripts: 'console.log("test");\\nconsole.log("test2");',
        }),
      ).rejects.toThrow('Invalid JavaScript: Contains HTML entities');
    });

    it('should reject array with invalid scripts', async () => {
      await expect(
        service.executeJS({
          url: 'https://httpbin.org/html',
          scripts: ['valid script', 'console.log(&amp;&amp; true)'],
        }),
      ).rejects.toThrow('Invalid JavaScript: Contains HTML entities');
    });

    it('should validate URL format', async () => {
      await expect(service.executeJS({ url: '//no-protocol', scripts: 'return 1' })).rejects.toThrow(
        'Invalid URL format',
      );
    });

    it('should reject scripts with escaped backslash-n pattern', async () => {
      // Test the specific pattern that line 40-41 checks for: })\\nword
      const scriptWithPattern = 'function test() {}\\nconsole.log("test")';
      await expect(
        service.executeJS({
          url: 'https://example.com',
          scripts: scriptWithPattern,
        }),
      ).rejects.toThrow('Invalid JavaScript: Contains HTML entities');
    });

    it('should allow valid JavaScript with actual newlines', async () => {
      const validScript = `function test() {
        console.log("This has real newlines");
        return true;
      }`;

      const mockResponse = {
        success: true,
        js_execution_result: { results: [true] },
      };

      nock(baseURL).post('/execute_js').matchHeader('x-api-key', apiKey).reply(200, mockResponse);

      const result = await service.executeJS({
        url: 'https://example.com',
        scripts: validScript,
      });

      expect(result.success).toBe(true);
    });
  });

  describe('extractWithLLM', () => {
    it('should extract content with LLM', async () => {
      const mockResponse = {
        answer: 'The main topic of this page is JavaScript testing.',
      };

      nock(baseURL)
        .get('/llm/https%3A%2F%2Fexample.com?q=What%20is%20the%20main%20topic%3F')
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.extractWithLLM({
        url: 'https://example.com',
        query: 'What is the main topic?',
      });

      expect(result).toEqual(mockResponse);
    });

    // Timeout testing moved to integration tests

    it('should handle missing LLM provider', async () => {
      nock(baseURL)
        .get(/\/llm\/.*/)
        .matchHeader('x-api-key', apiKey)
        .reply(401, { detail: 'No LLM provider configured' });

      await expect(
        service.extractWithLLM({
          url: 'https://example.com',
          query: 'test',
        }),
      ).rejects.toThrow('No LLM provider configured');
    });
  });

  describe('Browser Configuration', () => {
    it('should send cookies configuration correctly', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://httpbin.org/cookies',
            html: '<html>...</html>',
            cleaned_html: '<html>...</html>',
            fit_html: '<html>...</html>',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: '{"cookies": {"test": "value"}}',
              markdown_with_citations: '',
              references_markdown: '',
              fit_markdown: '{"cookies": {"test": "value"}}',
              fit_html: '',
            },
            tables: [],
            extracted_content: null,
            screenshot: null,
            pdf: null,
            mhtml: null,
            js_execution_result: null,
            downloaded_files: null,
            network_requests: null,
            console_messages: null,
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 1.0,
        server_memory_delta_mb: 5,
        server_peak_memory_mb: 50,
      };

      nock(baseURL)
        .post('/crawl', {
          urls: ['https://httpbin.org/cookies'],
          browser_config: {
            headless: true,
            cookies: [
              {
                name: 'test',
                value: 'value',
                domain: '.httpbin.org',
                path: '/',
              },
            ],
          },
          crawler_config: {},
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.crawl({
        urls: ['https://httpbin.org/cookies'],
        browser_config: {
          headless: true,
          cookies: [
            {
              name: 'test',
              value: 'value',
              domain: '.httpbin.org',
              path: '/',
            },
          ],
        },
        crawler_config: {},
      });

      expect(result.success).toBe(true);
      expect(result.results[0].markdown?.raw_markdown).toContain('cookies');
    });

    it('should send headers configuration correctly', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://httpbin.org/headers',
            html: '<html>...</html>',
            cleaned_html: '<html>...</html>',
            fit_html: '<html>...</html>',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: '{"headers": {"X-Custom": "test-value"}}',
              markdown_with_citations: '',
              references_markdown: '',
              fit_markdown: '{"headers": {"X-Custom": "test-value"}}',
              fit_html: '',
            },
            tables: [],
            extracted_content: null,
            screenshot: null,
            pdf: null,
            mhtml: null,
            js_execution_result: null,
            downloaded_files: null,
            network_requests: null,
            console_messages: null,
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 1.0,
        server_memory_delta_mb: 5,
        server_peak_memory_mb: 50,
      };

      nock(baseURL)
        .post('/crawl', {
          urls: ['https://httpbin.org/headers'],
          browser_config: {
            headless: true,
            headers: {
              'X-Custom': 'test-value',
              'X-Request-ID': '12345',
            },
          },
          crawler_config: {},
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.crawl({
        urls: ['https://httpbin.org/headers'],
        browser_config: {
          headless: true,
          headers: {
            'X-Custom': 'test-value',
            'X-Request-ID': '12345',
          },
        },
        crawler_config: {},
      });

      expect(result.success).toBe(true);
      expect(result.results[0].markdown?.raw_markdown).toContain('headers');
    });

    it('should send viewport configuration correctly', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://example.com',
            html: '<html>...</html>',
            cleaned_html: '<html>...</html>',
            fit_html: '<html>...</html>',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: 'Content',
              markdown_with_citations: '',
              references_markdown: '',
              fit_markdown: 'Content',
              fit_html: '',
            },
            tables: [],
            extracted_content: null,
            screenshot: 'base64-screenshot-data',
            pdf: null,
            mhtml: null,
            js_execution_result: null,
            downloaded_files: null,
            network_requests: null,
            console_messages: null,
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 2.0,
        server_memory_delta_mb: 10,
        server_peak_memory_mb: 100,
      };

      nock(baseURL)
        .post('/crawl', {
          urls: ['https://example.com'],
          browser_config: {
            headless: true,
            viewport_width: 375,
            viewport_height: 667,
          },
          crawler_config: {
            screenshot: true,
          },
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.crawl({
        urls: ['https://example.com'],
        browser_config: {
          headless: true,
          viewport_width: 375,
          viewport_height: 667,
        },
        crawler_config: {
          screenshot: true,
        },
      });

      expect(result.success).toBe(true);
      expect(result.results[0].screenshot).toBeTruthy();
    });

    it('should send user agent configuration correctly', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://httpbin.org/user-agent',
            html: '<html>...</html>',
            cleaned_html: '<html>...</html>',
            fit_html: '<html>...</html>',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: '{"user-agent": "Custom-Bot/1.0"}',
              markdown_with_citations: '',
              references_markdown: '',
              fit_markdown: '{"user-agent": "Custom-Bot/1.0"}',
              fit_html: '',
            },
            tables: [],
            extracted_content: null,
            screenshot: null,
            pdf: null,
            mhtml: null,
            js_execution_result: null,
            downloaded_files: null,
            network_requests: null,
            console_messages: null,
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 1.0,
        server_memory_delta_mb: 5,
        server_peak_memory_mb: 50,
      };

      nock(baseURL)
        .post('/crawl', {
          urls: ['https://httpbin.org/user-agent'],
          browser_config: {
            headless: true,
            user_agent: 'Custom-Bot/1.0',
          },
          crawler_config: {},
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.crawl({
        urls: ['https://httpbin.org/user-agent'],
        browser_config: {
          headless: true,
          user_agent: 'Custom-Bot/1.0',
        },
        crawler_config: {},
      });

      expect(result.success).toBe(true);
      expect(result.results[0].markdown?.raw_markdown).toContain('Custom-Bot/1.0');
    });

    it('should handle complex browser configuration', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://httpbin.org/anything',
            html: '<html>...</html>',
            cleaned_html: '<html>...</html>',
            fit_html: '<html>...</html>',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: 'Response with all configs',
              markdown_with_citations: '',
              references_markdown: '',
              fit_markdown: 'Response with all configs',
              fit_html: '',
            },
            tables: [],
            extracted_content: null,
            screenshot: null,
            pdf: null,
            mhtml: null,
            js_execution_result: null,
            downloaded_files: null,
            network_requests: null,
            console_messages: null,
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 1.5,
        server_memory_delta_mb: 8,
        server_peak_memory_mb: 80,
      };

      const complexConfig = {
        urls: ['https://httpbin.org/anything'],
        browser_config: {
          headless: true,
          viewport_width: 768,
          viewport_height: 1024,
          user_agent: 'Test-Bot/2.0',
          cookies: [
            {
              name: 'session',
              value: 'abc123',
              domain: '.httpbin.org',
              path: '/',
            },
          ],
          headers: {
            'X-Test': 'value',
          },
        },
        crawler_config: {
          cache_mode: 'BYPASS' as const,
        },
      };

      nock(baseURL).post('/crawl', complexConfig).matchHeader('x-api-key', apiKey).reply(200, mockResponse);

      const result = await service.crawl(complexConfig);

      expect(result.success).toBe(true);
      expect(result.results).toHaveLength(1);
    });
  });

  describe('Crawler Configuration Advanced Parameters', () => {
    it('should send content filtering parameters correctly', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://httpbin.org/forms/post',
            html: '<html>...</html>',
            cleaned_html: '<html>...</html>',
            fit_html: '<html>...</html>',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: 'Form content without forms',
              markdown_with_citations: '',
              references_markdown: '',
              fit_markdown: 'Form content without forms',
              fit_html: '',
            },
            tables: [],
            extracted_content: null,
            screenshot: null,
            pdf: null,
            mhtml: null,
            js_execution_result: null,
            downloaded_files: null,
            network_requests: null,
            console_messages: null,
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 1.0,
        server_memory_delta_mb: 5,
        server_peak_memory_mb: 50,
      };

      nock(baseURL)
        .post('/crawl', {
          urls: ['https://httpbin.org/forms/post'],
          browser_config: {
            headless: true,
          },
          crawler_config: {
            remove_forms: true,
            keep_data_attributes: true,
            exclude_external_images: true,
          },
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.crawl({
        urls: ['https://httpbin.org/forms/post'],
        browser_config: {
          headless: true,
        },
        crawler_config: {
          remove_forms: true,
          keep_data_attributes: true,
          exclude_external_images: true,
        },
      });

      expect(result.success).toBe(true);
    });

    it('should send js_only parameter correctly', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://httpbin.org/html',
            html: '',
            cleaned_html: '',
            fit_html: '',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: '',
              markdown_with_citations: '',
              references_markdown: '',
              fit_markdown: '',
              fit_html: '',
            },
            tables: [],
            extracted_content: null,
            screenshot: null,
            pdf: null,
            mhtml: null,
            js_execution_result: {
              success: true,
              results: ['Page Title', '5'],
            },
            downloaded_files: null,
            network_requests: null,
            console_messages: null,
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 1.0,
        server_memory_delta_mb: 5,
        server_peak_memory_mb: 50,
      };

      nock(baseURL)
        .post('/crawl', {
          urls: ['https://httpbin.org/html'],
          browser_config: {
            headless: true,
          },
          crawler_config: {
            js_code: ['return document.title', 'return document.querySelectorAll("p").length'],
            js_only: true,
          },
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.crawl({
        urls: ['https://httpbin.org/html'],
        browser_config: {
          headless: true,
        },
        crawler_config: {
          js_code: ['return document.title', 'return document.querySelectorAll("p").length'],
          js_only: true,
        },
      });

      expect(result.success).toBe(true);
      expect(result.results[0].js_execution_result).toBeDefined();
    });

    it('should send visibility and debug parameters correctly', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://httpbin.org/html',
            html: '<html>...</html>',
            cleaned_html: '<html>...</html>',
            fit_html: '<html>...</html>',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: 'Content',
              markdown_with_citations: '',
              references_markdown: '',
              fit_markdown: 'Content',
              fit_html: '',
            },
            tables: [],
            extracted_content: null,
            screenshot: null,
            pdf: null,
            mhtml: null,
            js_execution_result: null,
            downloaded_files: null,
            network_requests: null,
            console_messages: ['Test log message 1', 'Test warning', 'Test error'],
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 1.5,
        server_memory_delta_mb: 8,
        server_peak_memory_mb: 80,
      };

      nock(baseURL)
        .post('/crawl', {
          urls: ['https://httpbin.org/html'],
          browser_config: {
            headless: true,
          },
          crawler_config: {
            ignore_body_visibility: true,
            verbose: true,
            log_console: true,
          },
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, mockResponse);

      const result = await service.crawl({
        urls: ['https://httpbin.org/html'],
        browser_config: {
          headless: true,
        },
        crawler_config: {
          ignore_body_visibility: true,
          verbose: true,
          log_console: true,
        },
      });

      expect(result.success).toBe(true);
      expect(result.results[0].console_messages).toBeDefined();
    });
  });

  describe('parseSitemap', () => {
    it('should fetch and return sitemap content', async () => {
      const mockSitemapXML = `<?xml version="1.0" encoding="UTF-8"?>
        <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
          <url><loc>https://example.com/page1</loc></url>
          <url><loc>https://example.com/page2</loc></url>
        </urlset>`;

      // parseSitemap now uses axios directly without baseURL
      nock('https://example.com').get('/sitemap.xml').reply(200, mockSitemapXML);

      const response = await service.parseSitemap('https://example.com/sitemap.xml');
      expect(response).toBe(mockSitemapXML);
    });

    it('should handle sitemap fetch errors', async () => {
      nock('https://example.com').get('/sitemap.xml').reply(404, 'Not Found');

      await expect(service.parseSitemap('https://example.com/sitemap.xml')).rejects.toThrow();
    });
  });

  describe('detectContentType', () => {
    it('should return content type from HEAD request', async () => {
      // detectContentType now uses axios directly without baseURL
      nock('https://example.com').head('/document.pdf').reply(200, '', { 'content-type': 'application/pdf' });

      const contentType = await service.detectContentType('https://example.com/document.pdf');
      expect(contentType).toBe('application/pdf');
    });

    it('should return empty string when content-type header is missing', async () => {
      nock('https://example.com').head('/file').reply(200, '');

      const contentType = await service.detectContentType('https://example.com/file');
      expect(contentType).toBe('');
    });

    it('should return empty string on HEAD request failure', async () => {
      nock('https://example.com').head('/file').reply(404, 'Not Found');

      const contentType = await service.detectContentType('https://example.com/file');
      expect(contentType).toBe('');
    });
  });

  describe('Network Error Handling', () => {
    it('should handle ECONNABORTED error', async () => {
      const error = new Error('Connection aborted') as Error & { code?: string };
      error.code = 'ECONNABORTED';

      nock(baseURL).post('/md').matchHeader('x-api-key', apiKey).replyWithError(error);

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('Request timed out');
    });

    it('should handle ETIMEDOUT error', async () => {
      const error = new Error('Socket timed out') as Error & { code?: string };
      error.code = 'ETIMEDOUT';

      nock(baseURL).post('/md').matchHeader('x-api-key', apiKey).replyWithError(error);

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('Request timeout');
    });

    it('should handle ENOTFOUND error', async () => {
      const error = new Error('getaddrinfo ENOTFOUND') as Error & { code?: string };
      error.code = 'ENOTFOUND';

      nock(baseURL).post('/md').matchHeader('x-api-key', apiKey).replyWithError(error);

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('DNS resolution failed');
    });

    it('should handle ECONNREFUSED error', async () => {
      const error = new Error('connect ECONNREFUSED') as Error & { code?: string };
      error.code = 'ECONNREFUSED';

      nock(baseURL).post('/md').matchHeader('x-api-key', apiKey).replyWithError(error);

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('Connection refused');
    });

    it('should handle ECONNRESET error', async () => {
      const error = new Error('socket hang up') as Error & { code?: string };
      error.code = 'ECONNRESET';

      nock(baseURL).post('/md').matchHeader('x-api-key', apiKey).replyWithError(error);

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('Connection reset');
    });

    it('should handle ENETUNREACH error', async () => {
      const error = new Error('Network is unreachable') as Error & { code?: string };
      error.code = 'ENETUNREACH';

      nock(baseURL).post('/md').matchHeader('x-api-key', apiKey).replyWithError(error);

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('Network unreachable');
    });

    it('should handle generic axios errors', async () => {
      const error = new Error('Generic error') as Error & { isAxiosError?: boolean };
      error.isAxiosError = true;

      nock(baseURL).post('/md').matchHeader('x-api-key', apiKey).replyWithError(error);

      await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('Generic error');
    });
  });

  describe('Optional Parameter Handling', () => {
    it('should handle batchCrawl with remove_images option', async () => {
      const urls = ['https://example.com'];

      nock(baseURL)
        .post('/crawl', (body) => {
          return body.crawler_config?.exclude_tags?.includes('img');
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, { success: true, results: [] });

      await service.batchCrawl({ urls, remove_images: true });
    });

    it('should handle batchCrawl with bypass_cache option', async () => {
      const urls = ['https://example.com'];

      nock(baseURL)
        .post('/crawl', (body) => {
          return body.crawler_config?.cache_mode === 'BYPASS';
        })
        .matchHeader('x-api-key', apiKey)
        .reply(200, { success: true, results: [] });

      await service.batchCrawl({ urls, bypass_cache: true });
    });

    it('should test edge case JavaScript validation pattern', async () => {
      // Test the specific pattern on line 40-41: })\\nword
      const scriptWithEdgeCase = 'if (true) {}\\nwindow.alert("test")';
      await expect(
        service.executeJS({
          url: 'https://example.com',
          scripts: scriptWithEdgeCase,
        }),
      ).rejects.toThrow('Invalid JavaScript: Contains HTML entities');
    });

    it('should include memory metrics in crawl response', async () => {
      const mockResponse: CrawlEndpointResponse = {
        success: true,
        results: [
          {
            url: 'https://example.com',
            html: '<html>Test</html>',
            cleaned_html: '<html>Test</html>',
            fit_html: '<html>Test</html>',
            success: true,
            status_code: 200,
            response_headers: {},
            session_id: null,
            metadata: {},
            links: { internal: [], external: [] },
            media: { images: [], videos: [], audios: [] },
            markdown: {
              raw_markdown: 'Test content',
              markdown_with_citations: '',
              references_markdown: '',
              fit_markdown: 'Test content',
              fit_html: '',
            },
            tables: [],
            extracted_content: null,
            screenshot: null,
            pdf: null,
            mhtml: null,
            js_execution_result: null,
            downloaded_files: null,
            network_requests: null,
            console_messages: null,
            ssl_certificate: null,
            dispatch_result: null,
          },
        ],
        server_processing_time_s: 2.5,
        server_memory_delta_mb: 15.3,
        server_peak_memory_mb: 512.7,
      };

      nock(baseURL).post('/crawl').matchHeader('x-api-key', apiKey).reply(200, mockResponse);

      const result = await service.crawl({ url: 'https://example.com' });

      expect(result.server_processing_time_s).toBe(2.5);
      expect(result.server_memory_delta_mb).toBe(15.3);
      expect(result.server_peak_memory_mb).toBe(512.7);
    });
  });
});

```

--------------------------------------------------------------------------------
/src/server.ts:
--------------------------------------------------------------------------------

```typescript
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
import axios, { AxiosInstance } from 'axios';
import { z } from 'zod';
import { Crawl4AIService } from './crawl4ai-service.js';
import { SessionInfo } from './handlers/base-handler.js';
import { ContentHandlers } from './handlers/content-handlers.js';
import { SessionHandlers } from './handlers/session-handlers.js';
import { UtilityHandlers } from './handlers/utility-handlers.js';
import { CrawlHandlers } from './handlers/crawl-handlers.js';
import { BatchCrawlOptions } from './types.js';
// Define the tool call result type
type ToolCallResult = {
  content: Array<{
    type: string;
    text?: string;
    data?: string;
    mimeType?: string;
  }>;
  session_id?: string;
  browser_type?: string;
};
import {
  GetMarkdownSchema,
  CaptureScreenshotSchema,
  GeneratePdfSchema,
  ExecuteJsSchema,
  BatchCrawlSchema,
  SmartCrawlSchema,
  GetHtmlSchema,
  ExtractLinksSchema,
  CrawlRecursiveSchema,
  ParseSitemapSchema,
  CrawlSchema,
  ManageSessionSchema,
  ExtractWithLlmSchema,
} from './schemas/validation-schemas.js';

export class Crawl4AIServer {
  private server: Server;
  protected axiosClient: AxiosInstance;
  protected service: Crawl4AIService;
  private sessions: Map<string, SessionInfo> = new Map();
  private serverName: string;
  private serverVersion: string;

  // Handler instances
  private contentHandlers: ContentHandlers;
  private sessionHandlers: SessionHandlers;
  private utilityHandlers: UtilityHandlers;
  private crawlHandlers: CrawlHandlers;

  constructor(baseUrl: string, apiKey: string, serverName: string = 'crawl4ai-mcp', serverVersion: string = '1.0.0') {
    this.serverName = serverName;
    this.serverVersion = serverVersion;
    this.server = new Server(
      {
        name: serverName,
        version: serverVersion,
      },
      {
        capabilities: {
          tools: {},
        },
      },
    );

    // Initialize axios client with API key
    this.axiosClient = axios.create({
      baseURL: baseUrl,
      headers: {
        'X-API-Key': apiKey,
        'Content-Type': 'application/json',
      },
      timeout: 120000, // 2 minutes timeout
    });

    // Initialize the service
    this.service = new Crawl4AIService(baseUrl, apiKey);

    // Initialize handlers
    this.contentHandlers = new ContentHandlers(this.service, this.axiosClient, this.sessions);
    this.sessionHandlers = new SessionHandlers(this.service, this.axiosClient, this.sessions);
    this.utilityHandlers = new UtilityHandlers(this.service, this.axiosClient, this.sessions);
    this.crawlHandlers = new CrawlHandlers(this.service, this.axiosClient, this.sessions);

    this.setupHandlers();
  }

  /**
   * Helper method to validate arguments and execute handler with consistent error formatting
   * Preserves the exact error message format that LLMs rely on
   */
  private async validateAndExecute<T>(
    toolName: string,
    args: unknown,
    schema: z.ZodSchema<T>,
    handler: (validatedArgs: T) => Promise<ToolCallResult>,
  ): Promise<ToolCallResult> {
    try {
      const validatedArgs = schema.parse(args);
      return await handler(validatedArgs);
    } catch (error) {
      if (error instanceof z.ZodError) {
        // EXACT same formatting as before - critical for LLM understanding
        const details = error.errors
          .map((e) => (e.path.length > 0 ? `${e.path.join('.')}: ${e.message}` : e.message))
          .join(', ');
        throw new Error(`Invalid parameters for ${toolName}: ${details}`);
      }
      throw error;
    }
  }

  private setupHandlers() {
    // Handle list tools request
    this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
      tools: [
        {
          name: 'get_markdown',
          description:
            '[STATELESS] Extract content as markdown with filtering options. Supports: raw (full content), fit (optimized, default), bm25 (keyword search), llm (AI-powered extraction). Use bm25/llm with query for specific content. Creates new browser each time. For persistence use create_session + crawl.',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'The URL to extract markdown from',
              },
              filter: {
                type: 'string',
                enum: ['raw', 'fit', 'bm25', 'llm'],
                description: 'Filter type: raw (full), fit (optimized), bm25 (search), llm (AI extraction)',
                default: 'fit',
              },
              query: {
                type: 'string',
                description: 'Query string for bm25/llm filters. Required when using bm25 or llm filter.',
              },
              cache: {
                type: 'string',
                description: 'Cache-bust parameter (use different values to force fresh extraction)',
                default: '0',
              },
            },
            required: ['url'],
          },
        },
        {
          name: 'capture_screenshot',
          description:
            "[STATELESS] Capture webpage screenshot. Returns base64-encoded PNG data. Creates new browser each time. Optionally saves screenshot to local directory. IMPORTANT: Chained calls (execute_js then capture_screenshot) will NOT work - the screenshot won't see JS changes! For JS changes + screenshot use create_session + crawl(session_id, js_code, screenshot:true) in ONE call.",
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'The URL to capture',
              },
              screenshot_wait_for: {
                type: 'number',
                description: 'Seconds to wait before taking screenshot (allows page loading/animations)',
                default: 2,
              },
              save_to_directory: {
                type: 'string',
                description:
                  "Directory path to save screenshot (e.g., ~/Desktop, /tmp). Do NOT include filename - it will be auto-generated. Large screenshots (>800KB) won't be returned inline when saved.",
              },
            },
            required: ['url'],
          },
        },
        {
          name: 'generate_pdf',
          description:
            '[STATELESS] Convert webpage to PDF. Returns base64-encoded PDF data. Creates new browser each time. Cannot capture form fills or JS changes. For persistent PDFs use create_session + crawl(session_id, pdf:true).',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'The URL to convert to PDF',
              },
            },
            required: ['url'],
          },
        },
        {
          name: 'execute_js',
          description:
            '[STATELESS] Execute JavaScript and get return values + page content. Creates new browser each time. Use for: extracting data, triggering dynamic content, checking page state. Scripts with "return" statements return actual values (strings, numbers, objects, arrays). Note: null returns as {"success": true}. Returns values but page state is lost. For persistent JS execution, use crawl with session_id.',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'The URL to load',
              },
              scripts: {
                type: ['string', 'array'],
                items: { type: 'string' },
                description:
                  'JavaScript to execute. Use "return" to get values back! Each string runs separately. Returns appear in results array. Examples: "return document.title", "return document.querySelectorAll(\'a\').length", "return {url: location.href, links: [...document.links].map(a => a.href)}". Use proper JS syntax: real quotes, no HTML entities.',
              },
            },
            required: ['url', 'scripts'],
          },
        },
        {
          name: 'batch_crawl',
          description:
            '[STATELESS] Crawl multiple URLs concurrently for efficiency. Use when: processing URL lists, comparing multiple pages, or bulk data extraction. Faster than sequential crawling. Max 5 concurrent by default. Each URL gets a fresh browser. Cannot maintain state between URLs. For persistent operations use create_session + crawl.',
          inputSchema: {
            type: 'object',
            properties: {
              urls: {
                type: 'array',
                items: { type: 'string' },
                description: 'List of URLs to crawl',
              },
              max_concurrent: {
                type: 'number',
                description:
                  'Parallel request limit. Higher = faster but more resource intensive. Adjust based on server capacity and rate limits',
                default: 5,
              },
              remove_images: {
                type: 'boolean',
                description: 'Remove images from output by excluding img, picture, and svg tags',
                default: false,
              },
              bypass_cache: {
                type: 'boolean',
                description: 'Bypass cache for all URLs',
                default: false,
              },
            },
            required: ['urls'],
          },
        },
        {
          name: 'smart_crawl',
          description:
            '[STATELESS] Auto-detect and handle different content types (HTML, sitemap, RSS, text). Use when: URL type is unknown, crawling feeds/sitemaps, or want automatic format handling. Adapts strategy based on content. Creates new browser each time. For persistent operations use create_session + crawl.',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'The URL to crawl intelligently',
              },
              max_depth: {
                type: 'number',
                description: 'Maximum crawl depth for sitemaps',
                default: 2,
              },
              follow_links: {
                type: 'boolean',
                description: 'For sitemaps/RSS: crawl found URLs (max 10). For HTML: no effect',
                default: false,
              },
              bypass_cache: {
                type: 'boolean',
                description: 'Force fresh crawl',
                default: false,
              },
            },
            required: ['url'],
          },
        },
        {
          name: 'get_html',
          description:
            '[STATELESS] Get sanitized/processed HTML for inspection and automation planning. Use when: finding form fields/selectors, analyzing page structure before automation, building schemas. Returns cleaned HTML showing element names, IDs, and classes - perfect for identifying selectors for subsequent crawl operations. Commonly used before crawl to find selectors for automation. Creates new browser each time.',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'The URL to extract HTML from',
              },
            },
            required: ['url'],
          },
        },
        {
          name: 'extract_links',
          description:
            '[STATELESS] Extract and categorize all page links. Use when: building sitemaps, analyzing site structure, finding broken links, or discovering resources. Groups by internal/external/social/documents. Creates new browser each time. For persistent operations use create_session + crawl.',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'The URL to extract links from',
              },
              categorize: {
                type: 'boolean',
                description:
                  'Group links by type: internal (same domain), external, social media, documents (PDF/DOC), images. Helpful for link analysis',
                default: true,
              },
            },
            required: ['url'],
          },
        },
        {
          name: 'crawl_recursive',
          description:
            '[STATELESS] Deep crawl a website following internal links. Use when: mapping entire sites, finding all pages, building comprehensive indexes. Control with max_depth (default 3) and max_pages (default 50). Note: May need JS execution for dynamic sites. Each page gets a fresh browser. For persistent operations use create_session + crawl.',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'Starting URL to crawl from',
              },
              max_depth: {
                type: 'number',
                description: 'Maximum depth to follow links',
                default: 3,
              },
              max_pages: {
                type: 'number',
                description: 'Maximum number of pages to crawl',
                default: 50,
              },
              include_pattern: {
                type: 'string',
                description:
                  'Regex to match URLs to crawl. Example: ".*\\/blog\\/.*" for blog posts only, ".*\\.html$" for HTML pages',
              },
              exclude_pattern: {
                type: 'string',
                description:
                  'Regex to skip URLs. Example: ".*\\/(login|admin).*" to avoid auth pages, ".*\\.pdf$" to skip PDFs',
              },
            },
            required: ['url'],
          },
        },
        {
          name: 'parse_sitemap',
          description:
            '[STATELESS] Extract URLs from XML sitemaps. Use when: discovering all site pages, planning crawl strategies, or checking sitemap validity. Supports regex filtering. Try sitemap.xml or robots.txt first. Creates new browser each time.',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'URL of the sitemap (e.g., https://example.com/sitemap.xml)',
              },
              filter_pattern: {
                type: 'string',
                description: 'Optional regex pattern to filter URLs',
              },
            },
            required: ['url'],
          },
        },
        {
          name: 'crawl',
          description:
            '[SUPPORTS SESSIONS] THE ONLY TOOL WITH BROWSER PERSISTENCE\n\n' +
            'RECOMMENDED PATTERNS:\n' +
            '• Inspect first workflow:\n' +
            '  1) get_html(url) → find selectors & verify elements exist\n' +
            '  2) create_session() → "session-123"\n' +
            '  3) crawl({url, session_id: "session-123", js_code: ["action 1"]})\n' +
            '  4) crawl({url: "/page2", session_id: "session-123", js_code: ["action 2"]})\n\n' +
            '• Multi-step with state:\n' +
            '  1) create_session() → "session-123"\n' +
            '  2) crawl({url, session_id: "session-123"}) → inspect current state\n' +
            '  3) crawl({url, session_id: "session-123", js_code: ["verified actions"]})\n\n' +
            'WITH session_id: Maintains browser state (cookies, localStorage, page) across calls\n' +
            'WITHOUT session_id: Creates fresh browser each time (like other tools)\n\n' +
            'WHEN TO USE SESSIONS vs STATELESS:\n' +
            '• Need state between calls? → create_session + crawl\n' +
            '• Just extracting data? → Use stateless tools\n' +
            '• Filling forms? → Inspect first, then use sessions\n' +
            '• Taking screenshot after JS? → Must use crawl with session\n' +
            '• Unsure if elements exist? → Always use get_html first\n\n' +
            'CRITICAL FOR js_code:\n' +
            'RECOMMENDED: Always use screenshot: true when running js_code\n' +
            'This avoids server serialization errors and gives visual confirmation',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'The URL to crawl',
              },
              session_id: {
                type: 'string',
                description:
                  'ENABLES PERSISTENCE: Use SAME ID across all crawl calls to maintain browser state.\n' +
                  '• First call with ID: Creates persistent browser\n' +
                  '• Subsequent calls with SAME ID: Reuses browser with all state intact\n' +
                  '• Different/no ID: Fresh browser (stateless)\n' +
                  'WARNING: ONLY works with crawl tool - other tools ignore this parameter',
              },

              // === CORE CONFIGURATION ===
              browser_type: {
                type: 'string',
                enum: ['chromium', 'firefox', 'webkit'],
                description:
                  'Browser engine for crawling. Chromium offers best compatibility, Firefox for specific use cases, WebKit for Safari-like behavior',
                default: 'chromium',
              },
              viewport_width: {
                type: 'number',
                description: 'Browser window width in pixels. Affects responsive layouts and content visibility',
                default: 1080,
              },
              viewport_height: {
                type: 'number',
                description: 'Browser window height in pixels. Impacts content loading and screenshot dimensions',
                default: 600,
              },
              user_agent: {
                type: 'string',
                description:
                  'Custom browser identity. Use for: mobile sites (include "Mobile"), avoiding bot detection, or specific browser requirements. Example: "Mozilla/5.0 (iPhone...)"',
              },
              proxy_server: {
                type: 'string',
                description: 'Proxy server URL (e.g., "http://proxy.example.com:8080")',
              },
              proxy_username: {
                type: 'string',
                description: 'Proxy authentication username',
              },
              proxy_password: {
                type: 'string',
                description: 'Proxy authentication password',
              },
              cookies: {
                type: 'array',
                items: {
                  type: 'object',
                  properties: {
                    name: { type: 'string', description: 'Cookie name' },
                    value: { type: 'string', description: 'Cookie value' },
                    domain: { type: 'string', description: 'Domain where cookie is valid' },
                    path: { type: 'string', description: 'URL path scope for cookie' },
                  },
                  required: ['name', 'value', 'domain'],
                },
                description: 'Pre-set cookies for authentication or personalization',
              },
              headers: {
                type: 'object',
                description: 'Custom HTTP headers for API keys, auth tokens, or specific server requirements',
              },

              // === CONTENT PROCESSING ===
              word_count_threshold: {
                type: 'number',
                description:
                  'Min words per text block. Filters out menus, footers, and short snippets. Lower = more content but more noise. Higher = only substantial paragraphs',
                default: 200,
              },
              excluded_tags: {
                type: 'array',
                items: { type: 'string' },
                description:
                  'HTML tags to remove completely. Common: ["nav", "footer", "aside", "script", "style"]. Cleans up content before extraction',
              },
              remove_overlay_elements: {
                type: 'boolean',
                description: 'Automatically remove popups, modals, and overlays that obscure content',
                default: false,
              },
              js_code: {
                type: ['string', 'array'],
                items: { type: 'string' },
                description:
                  'JavaScript to execute. Each string runs separately. Use return to get values.\n\n' +
                  'IMPORTANT: Always verify elements exist before acting on them!\n' +
                  'Use get_html first to find correct selectors, then:\n' +
                  'GOOD: ["if (document.querySelector(\'input[name=\\"email\\"]\')) { ... }"]\n' +
                  'BAD: ["document.querySelector(\'input[name=\\"email\\"]\').value = \'...\'"]\n\n' +
                  'USAGE PATTERNS:\n' +
                  '1. WITH screenshot/pdf: {js_code: [...], screenshot: true} ✓\n' +
                  '2. MULTI-STEP: First {js_code: [...], session_id: "x"}, then {js_only: true, session_id: "x"}\n' +
                  '3. AVOID: {js_code: [...], js_only: true} on first call ✗\n\n' +
                  'SELECTOR TIPS: Use get_html first to find:\n' +
                  '  • name="..." (best for forms)\n' +
                  '  • id="..." (if unique)\n' +
                  '  • class="..." (careful, may repeat)\n\n' +
                  'FORM EXAMPLE WITH VERIFICATION: [\n' +
                  '  "const emailInput = document.querySelector(\'input[name=\\"email\\"]\');",\n' +
                  '  "if (emailInput) emailInput.value = \'[email protected]\';",\n' +
                  '  "const submitBtn = document.querySelector(\'button[type=\\"submit\\"]\');",\n' +
                  '  "if (submitBtn) submitBtn.click();"\n' +
                  ']',
              },
              js_only: {
                type: 'boolean',
                description:
                  'FOR SUBSEQUENT CALLS ONLY: Reuse existing session without navigation\n' +
                  'First call: Use js_code WITHOUT js_only (or with screenshot/pdf)\n' +
                  'Later calls: Use js_only=true to run more JS in same session\n' +
                  'ERROR: Using js_only=true on first call causes server errors',
                default: false,
              },
              wait_for: {
                type: 'string',
                description:
                  'Wait for element that loads AFTER initial page load. Format: "css:.selector" or "js:() => condition"\n\n' +
                  'WHEN TO USE:\n' +
                  '  • Dynamic content that loads after page (AJAX, lazy load)\n' +
                  '  • Elements that appear after animations/transitions\n' +
                  '  • Content loaded by JavaScript frameworks\n\n' +
                  'WHEN NOT TO USE:\n' +
                  '  • Elements already in initial HTML (forms, static content)\n' +
                  '  • Standard page elements (just use wait_until: "load")\n' +
                  '  • Can cause timeouts/errors if element already exists!\n\n' +
                  'SELECTOR TIPS: Use get_html first to check if element exists\n' +
                  'Examples: "css:.ajax-content", "js:() => document.querySelector(\'.lazy-loaded\')"',
              },
              wait_for_timeout: {
                type: 'number',
                description: 'Maximum milliseconds to wait for condition',
                default: 30000,
              },
              delay_before_scroll: {
                type: 'number',
                description: 'Milliseconds to wait before scrolling. Allows initial content to render',
                default: 1000,
              },
              scroll_delay: {
                type: 'number',
                description: 'Milliseconds between scroll steps for lazy-loaded content',
                default: 500,
              },
              process_iframes: {
                type: 'boolean',
                description: 'Extract content from embedded iframes including videos and forms',
                default: false,
              },
              exclude_external_links: {
                type: 'boolean',
                description: 'Remove links pointing to different domains for cleaner content',
                default: false,
              },
              screenshot: {
                type: 'boolean',
                description: 'Capture full-page screenshot as base64 PNG',
                default: false,
              },
              screenshot_directory: {
                type: 'string',
                description:
                  "Directory path to save screenshot (e.g., ~/Desktop, /tmp). Do NOT include filename - it will be auto-generated. Large screenshots (>800KB) won't be returned inline when saved.",
              },
              pdf: {
                type: 'boolean',
                description: 'Generate PDF as base64 preserving exact layout',
                default: false,
              },
              cache_mode: {
                type: 'string',
                enum: ['ENABLED', 'BYPASS', 'DISABLED'],
                description:
                  'Cache strategy. ENABLED: Use cache if available. BYPASS: Fetch fresh (recommended). DISABLED: No cache',
                default: 'BYPASS',
              },
              timeout: {
                type: 'number',
                description: 'Overall request timeout in milliseconds',
                default: 60000,
              },
              verbose: {
                type: 'boolean',
                description:
                  'Enable server-side debug logging (not shown in output). Only for troubleshooting. Does not affect extraction results',
                default: false,
              },

              // === DYNAMIC CONTENT HANDLING ===
              wait_until: {
                type: 'string',
                enum: ['domcontentloaded', 'networkidle', 'load'],
                description:
                  'When to consider page loaded (use INSTEAD of wait_for for initial load):\n' +
                  '• "domcontentloaded" (default): Fast, DOM ready, use for forms/static content\n' +
                  '• "load": All resources loaded, use if you need images\n' +
                  '• "networkidle": Wait for network quiet, use for heavy JS apps\n' +
                  "WARNING: Don't use wait_for for elements in initial HTML!",
                default: 'domcontentloaded',
              },
              page_timeout: {
                type: 'number',
                description: 'Page navigation timeout in milliseconds',
                default: 60000,
              },
              wait_for_images: {
                type: 'boolean',
                description: 'Wait for all images to load before extraction',
                default: false,
              },
              ignore_body_visibility: {
                type: 'boolean',
                description: 'Skip checking if body element is visible',
                default: true,
              },
              scan_full_page: {
                type: 'boolean',
                description:
                  'Auto-scroll entire page to trigger lazy loading. WARNING: Can be slow on long pages. Avoid combining with wait_until:"networkidle" or CSS extraction on dynamic sites. Better to use virtual_scroll_config for infinite feeds',
                default: false,
              },
              remove_forms: {
                type: 'boolean',
                description: 'Remove all form elements from extracted content',
                default: false,
              },
              keep_data_attributes: {
                type: 'boolean',
                description: 'Preserve data-* attributes in cleaned HTML',
                default: false,
              },
              excluded_selector: {
                type: 'string',
                description:
                  'CSS selector for elements to remove. Comma-separate multiple selectors.\n\n' +
                  'SELECTOR STRATEGY: Use get_html first to inspect page structure. Look for:\n' +
                  '  • id attributes (e.g., #cookie-banner)\n' +
                  '  • CSS classes (e.g., .advertisement, .popup)\n' +
                  '  • data-* attributes (e.g., [data-type="ad"])\n' +
                  '  • Element type + attributes (e.g., div[role="banner"])\n\n' +
                  'Examples: "#cookie-banner, .advertisement, .social-share"',
              },
              only_text: {
                type: 'boolean',
                description: 'Extract only text content, no HTML structure',
                default: false,
              },

              // === OUTPUT OPTIONS ===
              image_description_min_word_threshold: {
                type: 'number',
                description: 'Minimum words for image alt text to be considered valid',
                default: 50,
              },
              image_score_threshold: {
                type: 'number',
                description: 'Minimum relevance score for images (filters low-quality images)',
                default: 3,
              },
              exclude_external_images: {
                type: 'boolean',
                description: 'Exclude images from external domains',
                default: false,
              },
              screenshot_wait_for: {
                type: 'number',
                description: 'Extra wait time in seconds before taking screenshot',
              },

              // === LINK & DOMAIN FILTERING ===
              exclude_social_media_links: {
                type: 'boolean',
                description: 'Remove links to social media platforms',
                default: false,
              },
              exclude_domains: {
                type: 'array',
                items: { type: 'string' },
                description: 'List of domains to exclude from links (e.g., ["ads.com", "tracker.io"])',
              },

              // === PERFORMANCE & ANTI-BOT ===
              simulate_user: {
                type: 'boolean',
                description:
                  'Mimic human behavior with random mouse movements and delays. Helps bypass bot detection on protected sites. Slows crawling but improves success rate',
                default: false,
              },
              override_navigator: {
                type: 'boolean',
                description: 'Override navigator properties for stealth',
                default: false,
              },
              magic: {
                type: 'boolean',
                description:
                  'EXPERIMENTAL: Auto-handles popups, cookies, overlays.\n' +
                  'Use as LAST RESORT - can conflict with wait_for & CSS extraction\n' +
                  'Try first: remove_overlay_elements, excluded_selector\n' +
                  'Avoid with: CSS extraction, precise timing needs',
                default: false,
              },

              // Virtual Scroll Configuration
              virtual_scroll_config: {
                type: 'object',
                description:
                  'For infinite scroll sites that REPLACE content (Twitter/Instagram feeds).\n' +
                  'USE when: Content disappears as you scroll (virtual scrolling)\n' +
                  "DON'T USE when: Content appends (use scan_full_page instead)\n" +
                  'Example: {container_selector: "#timeline", scroll_count: 10, wait_after_scroll: 1}',
                properties: {
                  container_selector: {
                    type: 'string',
                    description:
                      'CSS selector for the scrollable container.\n\n' +
                      'SELECTOR STRATEGY: Use get_html first to inspect page structure. Look for:\n' +
                      '  • id attributes (e.g., #timeline)\n' +
                      '  • role attributes (e.g., [role="feed"])\n' +
                      '  • CSS classes (e.g., .feed, .timeline)\n' +
                      '  • data-* attributes (e.g., [data-testid="primaryColumn"])\n\n' +
                      'Common: "#timeline" (Twitter), "[role=\'feed\']" (generic), ".feed" (Instagram)',
                  },
                  scroll_count: {
                    type: 'number',
                    description:
                      'How many times to scroll. Each scroll loads new content batch. More = more posts but slower',
                    default: 10,
                  },
                  scroll_by: {
                    type: ['string', 'number'],
                    description:
                      'Distance per scroll. "container_height": one viewport, "page_height": full page, or pixels like 500',
                    default: 'container_height',
                  },
                  wait_after_scroll: {
                    type: 'number',
                    description: 'Seconds to wait after each scroll',
                    default: 0.5,
                  },
                },
                required: ['container_selector'],
              },

              // Other
              log_console: {
                type: 'boolean',
                description: 'Capture browser console logs for debugging',
                default: false,
              },
            },
            required: ['url'],
          },
        },
        {
          name: 'manage_session',
          description:
            '[SESSION MANAGEMENT] Unified tool for managing browser sessions. Supports three actions:\n\n' +
            '• CREATE: Start a persistent browser session that maintains state across calls\n' +
            '• CLEAR: Remove a session from local tracking\n' +
            '• LIST: Show all active sessions with age and usage info\n\n' +
            'USAGE EXAMPLES:\n' +
            '1. Create session: {action: "create", session_id: "my-session", initial_url: "https://example.com"}\n' +
            '2. Clear session: {action: "clear", session_id: "my-session"}\n' +
            '3. List sessions: {action: "list"}\n\n' +
            'Browser sessions maintain ALL state (cookies, localStorage, page) across multiple crawl calls. Essential for: forms, login flows, multi-step processes, maintaining state across operations.',
          inputSchema: {
            // Anthropic/Claude tools require top-level schemas to be a plain object without oneOf/allOf/anyOf
            type: 'object',
            properties: {
              action: {
                type: 'string',
                description: 'Action to perform: create, clear, or list',
                enum: ['create', 'clear', 'list'],
              },
              session_id: {
                type: 'string',
                description:
                  'Session identifier. Required for action="clear". Optional for create (auto-generated if omitted).',
              },
              initial_url: {
                type: 'string',
                description: 'URL to load when creating session (action="create").',
              },
              browser_type: {
                type: 'string',
                enum: ['chromium', 'firefox', 'webkit'],
                description: 'Browser engine for the session (action="create").',
                default: 'chromium',
              },
            },
            required: ['action'],
          },
        },
        {
          name: 'extract_with_llm',
          description:
            '[STATELESS] Ask questions about webpage content using AI. Returns natural language answers. ' +
            'Crawls fresh each time. For dynamic content or sessions, use crawl with session_id first.',
          inputSchema: {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'The URL to extract data from',
              },
              query: {
                type: 'string',
                description:
                  'Your question about the webpage content. Examples: "What is the main topic?", ' +
                  '"List all product prices", "Summarize the key points", "What contact information is available?"',
              },
            },
            required: ['url', 'query'],
          },
        },
      ],
    }));

    // Handle tool calls
    this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
      const { name, arguments: args } = request.params;

      try {
        switch (name) {
          case 'get_markdown':
            return await this.validateAndExecute(
              'get_markdown',
              args,
              GetMarkdownSchema as z.ZodSchema<z.infer<typeof GetMarkdownSchema>>,
              async (validatedArgs) => this.contentHandlers.getMarkdown(validatedArgs),
            );

          case 'capture_screenshot':
            return await this.validateAndExecute(
              'capture_screenshot',
              args,
              CaptureScreenshotSchema,
              async (validatedArgs) => this.contentHandlers.captureScreenshot(validatedArgs),
            );

          case 'generate_pdf':
            return await this.validateAndExecute('generate_pdf', args, GeneratePdfSchema, async (validatedArgs) =>
              this.contentHandlers.generatePDF(validatedArgs),
            );

          case 'execute_js':
            return await this.validateAndExecute('execute_js', args, ExecuteJsSchema, async (validatedArgs) =>
              this.utilityHandlers.executeJS(validatedArgs),
            );

          case 'batch_crawl':
            return await this.validateAndExecute('batch_crawl', args, BatchCrawlSchema, async (validatedArgs) =>
              this.crawlHandlers.batchCrawl(validatedArgs as BatchCrawlOptions),
            );

          case 'smart_crawl':
            return await this.validateAndExecute('smart_crawl', args, SmartCrawlSchema, async (validatedArgs) =>
              this.crawlHandlers.smartCrawl(validatedArgs),
            );

          case 'get_html':
            return await this.validateAndExecute('get_html', args, GetHtmlSchema, async (validatedArgs) =>
              this.contentHandlers.getHTML(validatedArgs),
            );

          case 'extract_links':
            return await this.validateAndExecute(
              'extract_links',
              args,
              ExtractLinksSchema as z.ZodSchema<z.infer<typeof ExtractLinksSchema>>,
              async (validatedArgs) => this.utilityHandlers.extractLinks(validatedArgs),
            );

          case 'crawl_recursive':
            return await this.validateAndExecute('crawl_recursive', args, CrawlRecursiveSchema, async (validatedArgs) =>
              this.crawlHandlers.crawlRecursive(validatedArgs),
            );

          case 'parse_sitemap':
            return await this.validateAndExecute('parse_sitemap', args, ParseSitemapSchema, async (validatedArgs) =>
              this.crawlHandlers.parseSitemap(validatedArgs),
            );

          case 'crawl':
            return await this.validateAndExecute('crawl', args, CrawlSchema, async (validatedArgs) =>
              this.crawlHandlers.crawl(validatedArgs),
            );

          case 'manage_session':
            return await this.validateAndExecute('manage_session', args, ManageSessionSchema, async (validatedArgs) =>
              this.sessionHandlers.manageSession(validatedArgs),
            );

          case 'extract_with_llm':
            return await this.validateAndExecute(
              'extract_with_llm',
              args,
              ExtractWithLlmSchema,
              async (validatedArgs) => this.contentHandlers.extractWithLLM(validatedArgs),
            );

          default:
            throw new Error(`Unknown tool: ${name}`);
        }
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: `Error: ${error instanceof Error ? error.message : String(error)}`,
            },
          ],
        };
      }
    });
  }

  // Expose handler methods for testing
  protected async getMarkdown(options: Parameters<ContentHandlers['getMarkdown']>[0]) {
    return this.contentHandlers.getMarkdown(options);
  }

  protected async captureScreenshot(options: Parameters<ContentHandlers['captureScreenshot']>[0]) {
    return this.contentHandlers.captureScreenshot(options);
  }

  protected async generatePDF(options: Parameters<ContentHandlers['generatePDF']>[0]) {
    return this.contentHandlers.generatePDF(options);
  }

  protected async getHTML(options: Parameters<ContentHandlers['getHTML']>[0]) {
    return this.contentHandlers.getHTML(options);
  }

  protected async extractWithLLM(options: Parameters<ContentHandlers['extractWithLLM']>[0]) {
    return this.contentHandlers.extractWithLLM(options);
  }

  protected async executeJS(options: Parameters<UtilityHandlers['executeJS']>[0]) {
    return this.utilityHandlers.executeJS(options);
  }

  protected async extractLinks(options: Parameters<UtilityHandlers['extractLinks']>[0]) {
    return this.utilityHandlers.extractLinks(options);
  }

  protected async batchCrawl(options: Parameters<CrawlHandlers['batchCrawl']>[0]) {
    return this.crawlHandlers.batchCrawl(options);
  }

  protected async smartCrawl(options: Parameters<CrawlHandlers['smartCrawl']>[0]) {
    return this.crawlHandlers.smartCrawl(options);
  }

  protected async crawlRecursive(options: Parameters<CrawlHandlers['crawlRecursive']>[0]) {
    return this.crawlHandlers.crawlRecursive(options);
  }

  protected async parseSitemap(options: Parameters<CrawlHandlers['parseSitemap']>[0]) {
    return this.crawlHandlers.parseSitemap(options);
  }

  protected async crawl(options: Parameters<CrawlHandlers['crawl']>[0]) {
    return this.crawlHandlers.crawl(options);
  }

  // Setter for axiosClient to update all handlers (for testing)
  set axiosClientForTesting(client: AxiosInstance) {
    this.axiosClient = client;
    // Re-initialize handlers with new client
    this.contentHandlers = new ContentHandlers(this.service, client, this.sessions);
    this.sessionHandlers = new SessionHandlers(this.service, client, this.sessions);
    this.utilityHandlers = new UtilityHandlers(this.service, client, this.sessions);
    this.crawlHandlers = new CrawlHandlers(this.service, client, this.sessions);
  }

  /* istanbul ignore next */
  async start() {
    const transport = new StdioServerTransport();
    await this.server.connect(transport);
    console.error(`${this.serverName} v${this.serverVersion} started`);
  }
}

```