omgwtfwow/mcp-crawl4ai-ts # codebase.md

This is page 2 of 4. Use http://codebase.md/omgwtfwow/mcp-crawl4ai-ts?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .env.example
├── .github
│   ├── CI.md
│   ├── copilot-instructions.md
│   └── workflows
│       └── ci.yml
├── .gitignore
├── .prettierignore
├── .prettierrc.json
├── CHANGELOG.md
├── eslint.config.mjs
├── jest.config.cjs
├── jest.setup.cjs
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│   ├── __tests__
│   │   ├── crawl.test.ts
│   │   ├── crawl4ai-service.network.test.ts
│   │   ├── crawl4ai-service.test.ts
│   │   ├── handlers
│   │   │   ├── crawl-handlers.test.ts
│   │   │   ├── parameter-combinations.test.ts
│   │   │   ├── screenshot-saving.test.ts
│   │   │   ├── session-handlers.test.ts
│   │   │   └── utility-handlers.test.ts
│   │   ├── index.cli.test.ts
│   │   ├── index.npx.test.ts
│   │   ├── index.server.test.ts
│   │   ├── index.test.ts
│   │   ├── integration
│   │   │   ├── batch-crawl.integration.test.ts
│   │   │   ├── capture-screenshot.integration.test.ts
│   │   │   ├── crawl-advanced.integration.test.ts
│   │   │   ├── crawl-handlers.integration.test.ts
│   │   │   ├── crawl-recursive.integration.test.ts
│   │   │   ├── crawl.integration.test.ts
│   │   │   ├── execute-js.integration.test.ts
│   │   │   ├── extract-links.integration.test.ts
│   │   │   ├── extract-with-llm.integration.test.ts
│   │   │   ├── generate-pdf.integration.test.ts
│   │   │   ├── get-html.integration.test.ts
│   │   │   ├── get-markdown.integration.test.ts
│   │   │   ├── parse-sitemap.integration.test.ts
│   │   │   ├── session-management.integration.test.ts
│   │   │   ├── smart-crawl.integration.test.ts
│   │   │   └── test-utils.ts
│   │   ├── request-handler.test.ts
│   │   ├── schemas
│   │   │   └── validation-edge-cases.test.ts
│   │   ├── types
│   │   │   └── mocks.ts
│   │   └── utils
│   │       └── javascript-validation.test.ts
│   ├── crawl4ai-service.ts
│   ├── handlers
│   │   ├── base-handler.ts
│   │   ├── content-handlers.ts
│   │   ├── crawl-handlers.ts
│   │   ├── session-handlers.ts
│   │   └── utility-handlers.ts
│   ├── index.ts
│   ├── schemas
│   │   ├── helpers.ts
│   │   └── validation-schemas.ts
│   ├── server.ts
│   └── types.ts
├── tsconfig.build.json
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/src/__tests__/index.npx.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { spawn } from 'child_process';
  2 | import * as path from 'path';
  3 | import * as url from 'url';
  4 | import * as fs from 'fs/promises';
  5 | 
  6 | const __dirname = url.fileURLToPath(new URL('.', import.meta.url));
  7 | 
  8 | describe('NPX Execution Tests', () => {
  9 |   // These tests ensure the package works when installed and run via npx
 10 |   // This prevents issues like the one in v2.6.11 where the server wouldn't start
 11 | 
 12 |   describe('Simulated NPX execution', () => {
 13 |     it('should start server when run from dist/index.js directly', async () => {
 14 |       // This simulates how npx runs the built package
 15 |       const distIndexPath = path.join(__dirname, '..', '..', 'dist', 'index.js');
 16 | 
 17 |       // Check if dist/index.js exists (it should after build)
 18 |       try {
 19 |         await fs.access(distIndexPath);
 20 |       } catch {
 21 |         console.warn('Skipping test - dist/index.js not found. Run "npm run build" first.');
 22 |         return;
 23 |       }
 24 | 
 25 |       const child = spawn('node', [distIndexPath], {
 26 |         env: {
 27 |           ...process.env,
 28 |           CRAWL4AI_BASE_URL: 'http://localhost:11235',
 29 |           CRAWL4AI_API_KEY: 'test-key',
 30 |           // Don't load .env file to simulate production
 31 |           NODE_ENV: 'production',
 32 |         },
 33 |         stdio: 'pipe',
 34 |       });
 35 | 
 36 |       let stderr = '';
 37 |       child.stderr.on('data', (data) => {
 38 |         stderr += data.toString();
 39 |       });
 40 | 
 41 |       // Wait for server to start
 42 |       await new Promise<void>((resolve) => {
 43 |         const timeout = setTimeout(() => {
 44 |           child.kill();
 45 |           resolve();
 46 |         }, 2000);
 47 | 
 48 |         child.stderr.on('data', (data) => {
 49 |           const output = data.toString();
 50 |           if (output.includes('started')) {
 51 |             clearTimeout(timeout);
 52 |             child.kill();
 53 |             resolve();
 54 |           }
 55 |         });
 56 |       });
 57 | 
 58 |       // Server should have started successfully
 59 |       expect(stderr).toContain('crawl4ai-mcp');
 60 |       expect(stderr).toContain('started');
 61 |     });
 62 | 
 63 |     it('should start server without dotenv when env vars are provided', async () => {
 64 |       // This tests that we don't require dotenv in production
 65 |       const distIndexPath = path.join(__dirname, '..', '..', 'dist', 'index.js');
 66 | 
 67 |       try {
 68 |         await fs.access(distIndexPath);
 69 |       } catch {
 70 |         console.warn('Skipping test - dist/index.js not found.');
 71 |         return;
 72 |       }
 73 | 
 74 |       // Temporarily rename node_modules/dotenv to simulate it not being available
 75 |       const dotenvPath = path.join(__dirname, '..', '..', 'node_modules', 'dotenv');
 76 |       const dotenvBackupPath = path.join(__dirname, '..', '..', 'node_modules', 'dotenv.backup');
 77 | 
 78 |       let dotenvRenamed = false;
 79 |       try {
 80 |         // Only rename if dotenv exists
 81 |         try {
 82 |           await fs.access(dotenvPath);
 83 |           await fs.rename(dotenvPath, dotenvBackupPath);
 84 |           dotenvRenamed = true;
 85 |         } catch {
 86 |           // dotenv doesn't exist, which is fine for this test
 87 |         }
 88 | 
 89 |         const child = spawn('node', [distIndexPath], {
 90 |           env: {
 91 |             CRAWL4AI_BASE_URL: 'http://localhost:11235',
 92 |             CRAWL4AI_API_KEY: 'test-key',
 93 |             PATH: process.env.PATH,
 94 |           },
 95 |           stdio: 'pipe',
 96 |         });
 97 | 
 98 |         let stderr = '';
 99 |         child.stderr.on('data', (data) => {
100 |           stderr += data.toString();
101 |         });
102 | 
103 |         // Wait for server to start
104 |         await new Promise<void>((resolve) => {
105 |           setTimeout(() => {
106 |             child.kill();
107 |             resolve();
108 |           }, 2000);
109 |         });
110 | 
111 |         // Server should still start even without dotenv
112 |         expect(stderr).toContain('crawl4ai-mcp');
113 |         expect(stderr).toContain('started');
114 |       } finally {
115 |         // Restore dotenv if we renamed it
116 |         if (dotenvRenamed) {
117 |           await fs.rename(dotenvBackupPath, dotenvPath);
118 |         }
119 |       }
120 |     });
121 | 
122 |     it('should handle MCP protocol initialization', async () => {
123 |       // This simulates the full MCP handshake that Claude Desktop does
124 |       const distIndexPath = path.join(__dirname, '..', '..', 'dist', 'index.js');
125 | 
126 |       try {
127 |         await fs.access(distIndexPath);
128 |       } catch {
129 |         console.warn('Skipping test - dist/index.js not found.');
130 |         return;
131 |       }
132 | 
133 |       const child = spawn('node', [distIndexPath], {
134 |         env: {
135 |           ...process.env,
136 |           CRAWL4AI_BASE_URL: 'http://localhost:11235',
137 |           CRAWL4AI_API_KEY: 'test-key',
138 |         },
139 |         stdio: 'pipe',
140 |       });
141 | 
142 |       let stdout = '';
143 |       let stderr = '';
144 | 
145 |       child.stdout.on('data', (data) => {
146 |         stdout += data.toString();
147 |       });
148 | 
149 |       child.stderr.on('data', (data) => {
150 |         stderr += data.toString();
151 |       });
152 | 
153 |       // Wait for server to start
154 |       await new Promise((resolve) => setTimeout(resolve, 500));
155 | 
156 |       // Send MCP initialization request (like Claude Desktop does)
157 |       const initRequest =
158 |         JSON.stringify({
159 |           jsonrpc: '2.0',
160 |           method: 'initialize',
161 |           params: {
162 |             protocolVersion: '2025-06-18',
163 |             capabilities: {},
164 |             clientInfo: {
165 |               name: 'test-client',
166 |               version: '1.0.0',
167 |             },
168 |           },
169 |           id: 1,
170 |         }) + '\n';
171 | 
172 |       child.stdin.write(initRequest);
173 | 
174 |       // Wait for response
175 |       await new Promise((resolve) => setTimeout(resolve, 1000));
176 | 
177 |       // Parse the response
178 |       const response = stdout.trim().split('\n').pop();
179 |       let parsed;
180 |       try {
181 |         parsed = JSON.parse(response || '{}');
182 |       } catch {
183 |         // Response might not be valid JSON yet
184 |         parsed = {};
185 |       }
186 | 
187 |       child.kill();
188 | 
189 |       // Should have received an initialization response
190 |       expect(stderr).toContain('started');
191 |       expect(parsed.id).toBe(1);
192 |       expect(parsed.result).toBeDefined();
193 |     });
194 | 
195 |     it('should fail gracefully when CRAWL4AI_BASE_URL is missing', async () => {
196 |       const distIndexPath = path.join(__dirname, '..', '..', 'dist', 'index.js');
197 | 
198 |       try {
199 |         await fs.access(distIndexPath);
200 |       } catch {
201 |         console.warn('Skipping test - dist/index.js not found.');
202 |         return;
203 |       }
204 | 
205 |       const child = spawn('node', [distIndexPath], {
206 |         env: {
207 |           // Explicitly set to empty string to prevent dotenv from loading
208 |           CRAWL4AI_BASE_URL: '',
209 |           PATH: process.env.PATH,
210 |         },
211 |         stdio: 'pipe',
212 |       });
213 | 
214 |       let stderr = '';
215 |       child.stderr.on('data', (data) => {
216 |         stderr += data.toString();
217 |       });
218 | 
219 |       const exitCode = await new Promise<number | null>((resolve, reject) => {
220 |         // Add timeout to prevent hanging
221 |         const timeout = setTimeout(() => {
222 |           child.kill('SIGTERM');
223 |           reject(new Error('Process timeout'));
224 |         }, 10000); // 10 second timeout
225 | 
226 |         child.on('exit', (code) => {
227 |           clearTimeout(timeout);
228 |           resolve(code);
229 |         });
230 | 
231 |         child.on('error', (err) => {
232 |           clearTimeout(timeout);
233 |           reject(err);
234 |         });
235 |       });
236 | 
237 |       // Should exit with error code
238 |       expect(exitCode).toBe(1);
239 |       expect(stderr).toContain('CRAWL4AI_BASE_URL environment variable is required');
240 | 
241 |       // Ensure cleanup
242 |       child.kill();
243 |     }, 15000); // 15 second test timeout
244 |   });
245 | 
246 |   describe('NPX-specific edge cases', () => {
247 |     it('should work with different Node.js execution paths', async () => {
248 |       // NPX might use different paths for node execution
249 |       const distIndexPath = path.join(__dirname, '..', '..', 'dist', 'index.js');
250 | 
251 |       try {
252 |         await fs.access(distIndexPath);
253 |       } catch {
254 |         console.warn('Skipping test - dist/index.js not found.');
255 |         return;
256 |       }
257 | 
258 |       // Test with different argv[1] values that npx might use
259 |       const testPaths = [
260 |         distIndexPath,
261 |         '/tmp/npx-12345/node_modules/.bin/mcp-crawl4ai-ts',
262 |         path.join(process.env.HOME || '', '.npm/_npx/12345/node_modules/mcp-crawl4ai-ts/dist/index.js'),
263 |       ];
264 | 
265 |       for (const testPath of testPaths) {
266 |         const child = spawn('node', [distIndexPath], {
267 |           env: {
268 |             ...process.env,
269 |             CRAWL4AI_BASE_URL: 'http://localhost:11235',
270 |             // Simulate different execution contexts
271 |             npm_execpath: testPath,
272 |           },
273 |           stdio: 'pipe',
274 |         });
275 | 
276 |         let started = false;
277 |         child.stderr.on('data', (data) => {
278 |           if (data.toString().includes('started')) {
279 |             started = true;
280 |           }
281 |         });
282 | 
283 |         // Give it time to start
284 |         await new Promise((resolve) => setTimeout(resolve, 500));
285 |         child.kill();
286 | 
287 |         expect(started).toBe(true);
288 |       }
289 |     });
290 |   });
291 | });
292 | 
```

--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------

```typescript
  1 | export interface CrawlOptions {
  2 |   remove_images?: boolean;
  3 |   bypass_cache?: boolean;
  4 |   filter_mode?: 'blacklist' | 'whitelist';
  5 |   filter_list?: string[];
  6 |   screenshot?: boolean;
  7 |   wait_for?: string;
  8 |   timeout?: number;
  9 | }
 10 | 
 11 | export interface JSExecuteOptions {
 12 |   js_code: string | string[];
 13 |   // Only url and js_code (scripts) are supported by /execute_js endpoint
 14 | }
 15 | 
 16 | export interface JSExecuteEndpointOptions {
 17 |   url: string;
 18 |   scripts: string | string[];
 19 |   // Only url and scripts are supported by /execute_js endpoint
 20 | }
 21 | 
 22 | export interface JSExecuteEndpointResponse {
 23 |   success: boolean;
 24 |   js_execution_result: {
 25 |     success: boolean;
 26 |     results: unknown[];
 27 |   };
 28 |   markdown?: string | CrawlMarkdownResult;
 29 | }
 30 | 
 31 | export interface ScreenshotEndpointOptions {
 32 |   url: string;
 33 |   screenshot_wait_for?: number;
 34 |   save_to_directory?: string;
 35 |   // output_path is omitted to get base64 response
 36 | }
 37 | 
 38 | export interface ScreenshotEndpointResponse {
 39 |   success: boolean;
 40 |   screenshot: string; // base64 encoded image
 41 | }
 42 | 
 43 | export interface PDFEndpointOptions {
 44 |   url: string;
 45 |   // Only url is supported by /pdf endpoint
 46 | }
 47 | 
 48 | export interface PDFEndpointResponse {
 49 |   success: boolean;
 50 |   pdf: string; // base64 encoded PDF
 51 | }
 52 | 
 53 | export interface HTMLEndpointOptions {
 54 |   url: string;
 55 |   // Only url is supported by /html endpoint
 56 | }
 57 | 
 58 | export interface HTMLEndpointResponse {
 59 |   html: string;
 60 |   url: string;
 61 |   success: boolean;
 62 | }
 63 | 
 64 | export type FilterType = 'raw' | 'fit' | 'bm25' | 'llm';
 65 | 
 66 | export interface MarkdownEndpointOptions {
 67 |   url: string;
 68 |   f?: FilterType; // Filter type: raw, fit (default), bm25, llm
 69 |   q?: string; // Query string for bm25/llm filters
 70 |   c?: string; // Cache-bust parameter
 71 | }
 72 | 
 73 | export interface MarkdownEndpointResponse {
 74 |   url: string;
 75 |   filter: string;
 76 |   query: string | null;
 77 |   cache: string;
 78 |   markdown: string;
 79 |   success: boolean;
 80 | }
 81 | 
 82 | export interface LLMEndpointOptions {
 83 |   url: string;
 84 |   query: string;
 85 | }
 86 | 
 87 | export interface LLMEndpointResponse {
 88 |   answer: string;
 89 | }
 90 | 
 91 | export interface BatchCrawlOptions extends CrawlOptions {
 92 |   urls: string[];
 93 |   max_concurrent?: number;
 94 |   // New: Support per-URL configs array (0.7.3/0.7.4)
 95 |   configs?: Array<{
 96 |     url: string;
 97 |     browser_config?: BrowserConfig;
 98 |     crawler_config?: CrawlerConfig;
 99 |     extraction_strategy?: ExtractionStrategy;
100 |     table_extraction_strategy?: TableExtractionStrategy;
101 |     markdown_generator_options?: MarkdownGeneratorOptions;
102 |     matcher?: string | ((url: string) => boolean);
103 |   }>;
104 | }
105 | 
106 | // Browser configuration options
107 | export interface BrowserConfig {
108 |   browser_type?: 'chromium' | 'firefox' | 'webkit' | 'undetected';
109 |   headless?: boolean;
110 |   viewport_width?: number;
111 |   viewport_height?: number;
112 |   user_agent?: string;
113 |   // Unified proxy config - accepts string or object format (new in 0.7.3/0.7.4)
114 |   proxy?:
115 |     | string
116 |     | {
117 |         server: string;
118 |         username?: string;
119 |         password?: string;
120 |       };
121 |   // Legacy field kept for backward compatibility
122 |   proxy_config?: {
123 |     server: string;
124 |     username?: string;
125 |     password?: string;
126 |   };
127 |   cookies?: Array<{
128 |     name: string;
129 |     value: string;
130 |     domain: string;
131 |     path?: string;
132 |   }>;
133 |   headers?: Record<string, string>;
134 |   extra_args?: string[];
135 | }
136 | 
137 | // Virtual scroll configuration for sites like Twitter/Instagram
138 | export interface VirtualScrollConfig {
139 |   container_selector: string;
140 |   scroll_count?: number;
141 |   scroll_by?: string | number;
142 |   wait_after_scroll?: number;
143 | }
144 | 
145 | // Crawler configuration options
146 | export interface CrawlerConfig {
147 |   // Content filtering
148 |   word_count_threshold?: number;
149 |   excluded_tags?: string[];
150 |   excluded_selector?: string;
151 |   remove_overlay_elements?: boolean;
152 |   only_text?: boolean;
153 |   remove_forms?: boolean;
154 |   keep_data_attributes?: boolean;
155 | 
156 |   // JavaScript execution
157 |   js_code?: string | string[];
158 |   js_only?: boolean;
159 |   wait_for?: string;
160 |   wait_for_timeout?: number;
161 | 
162 |   // Page navigation & timing
163 |   wait_until?: 'domcontentloaded' | 'networkidle' | 'load';
164 |   page_timeout?: number;
165 |   wait_for_images?: boolean;
166 |   ignore_body_visibility?: boolean;
167 | 
168 |   // Dynamic content handling
169 |   delay_before_scroll?: number;
170 |   scroll_delay?: number;
171 |   scan_full_page?: boolean;
172 |   virtual_scroll_config?: VirtualScrollConfig;
173 | 
174 |   // Content processing
175 |   process_iframes?: boolean;
176 |   exclude_external_links?: boolean;
177 | 
178 |   // Media handling
179 |   screenshot?: boolean;
180 |   screenshot_wait_for?: number;
181 |   pdf?: boolean;
182 |   capture_mhtml?: boolean;
183 |   image_description_min_word_threshold?: number;
184 |   image_score_threshold?: number;
185 |   exclude_external_images?: boolean;
186 | 
187 |   // Link filtering
188 |   exclude_social_media_links?: boolean;
189 |   exclude_domains?: string[];
190 | 
191 |   // Page interaction
192 |   simulate_user?: boolean;
193 |   override_navigator?: boolean;
194 |   magic?: boolean;
195 | 
196 |   // Session management
197 |   session_id?: string;
198 | 
199 |   // Cache control
200 |   cache_mode?: 'ENABLED' | 'BYPASS' | 'DISABLED';
201 | 
202 |   // Performance options
203 |   timeout?: number;
204 |   verbose?: boolean;
205 | 
206 |   // Debug
207 |   log_console?: boolean;
208 | 
209 |   // New parameters from 0.7.3/0.7.4
210 |   delay_before_return_html?: number; // Delay in ms before capturing final HTML
211 |   css_selector?: string; // CSS selector to extract specific elements
212 |   include_links?: boolean; // Whether to include links in the response
213 |   resolve_absolute_urls?: boolean; // Convert relative URLs to absolute ones
214 | }
215 | 
216 | // Extraction strategy passthrough objects (new in 0.7.3/0.7.4)
217 | export interface ExtractionStrategy {
218 |   [key: string]: unknown;
219 | }
220 | 
221 | export interface TableExtractionStrategy {
222 |   enable_chunking?: boolean;
223 |   thresholds?: Record<string, unknown>;
224 |   [key: string]: unknown;
225 | }
226 | 
227 | export interface MarkdownGeneratorOptions {
228 |   include_links?: boolean;
229 |   [key: string]: unknown;
230 | }
231 | 
232 | // Advanced crawl configuration combining browser and crawler configs
233 | export interface AdvancedCrawlConfig {
234 |   url?: string;
235 |   urls?: string[];
236 |   browser_config?: BrowserConfig;
237 |   crawler_config?: CrawlerConfig;
238 |   priority?: number;
239 |   extraction_strategy?: ExtractionStrategy;
240 |   table_extraction_strategy?: TableExtractionStrategy;
241 |   markdown_generator_options?: MarkdownGeneratorOptions;
242 | }
243 | 
244 | // Session management types (used internally by MCP server)
245 | export interface SessionInfo {
246 |   id: string;
247 |   created_at: Date;
248 |   last_used: Date;
249 |   initial_url?: string;
250 |   metadata?: Record<string, unknown>;
251 | }
252 | 
253 | // Crawl endpoint types
254 | export interface CrawlEndpointOptions {
255 |   urls: string[];
256 |   browser_config?: BrowserConfig;
257 |   crawler_config?: CrawlerConfig;
258 | }
259 | 
260 | export interface CrawlMarkdownResult {
261 |   raw_markdown: string;
262 |   markdown_with_citations: string;
263 |   references_markdown: string;
264 |   fit_markdown: string;
265 |   fit_html: string;
266 | }
267 | 
268 | export interface CrawlMediaResult {
269 |   images: Array<{
270 |     src?: string | null;
271 |     data?: string;
272 |     alt?: string | null;
273 |     desc?: string;
274 |     score?: number;
275 |     type?: string;
276 |     group_id?: number;
277 |     format?: string | null;
278 |     width?: number | null;
279 |   }>;
280 |   videos: Array<{
281 |     src?: string | null;
282 |     data?: string;
283 |     alt?: string | null;
284 |     desc?: string;
285 |     score?: number;
286 |     type?: string;
287 |     group_id?: number;
288 |     format?: string | null;
289 |     width?: number | null;
290 |   }>;
291 |   audios: Array<{
292 |     src?: string | null;
293 |     data?: string;
294 |     alt?: string | null;
295 |     desc?: string;
296 |     score?: number;
297 |     type?: string;
298 |     group_id?: number;
299 |     format?: string | null;
300 |     width?: number | null;
301 |   }>;
302 | }
303 | 
304 | interface LinkItem {
305 |   href: string;
306 |   text: string;
307 |   title: string;
308 |   base_domain?: string | null;
309 |   head_data?: Record<string, unknown> | null;
310 |   head_extraction_status?: string | null;
311 |   head_extraction_error?: string | null;
312 |   intrinsic_score?: number;
313 |   contextual_score?: number | null;
314 |   total_score?: number | null;
315 | }
316 | 
317 | export interface CrawlLinksResult {
318 |   internal: LinkItem[];
319 |   external: LinkItem[];
320 | }
321 | 
322 | export interface CrawlResultItem {
323 |   url: string;
324 |   html: string;
325 |   cleaned_html: string;
326 |   fit_html: string;
327 |   success: boolean;
328 |   error_message?: string;
329 |   status_code: number;
330 |   response_headers: Record<string, unknown>;
331 |   redirected_url?: string;
332 |   session_id: string | null;
333 |   metadata: Record<string, unknown>;
334 |   links: CrawlLinksResult;
335 |   media: CrawlMediaResult;
336 |   markdown: CrawlMarkdownResult;
337 |   tables: unknown[];
338 |   extracted_content: unknown | null;
339 |   screenshot: string | null; // base64 PNG when screenshot: true
340 |   pdf: string | null; // base64 PDF when pdf: true
341 |   mhtml: string | null;
342 |   js_execution_result: {
343 |     success: boolean;
344 |     results: unknown[];
345 |   } | null;
346 |   downloaded_files: unknown | null;
347 |   network_requests: unknown | null;
348 |   console_messages: unknown | null;
349 |   ssl_certificate: unknown | null;
350 |   dispatch_result: unknown | null;
351 | }
352 | 
353 | export interface CrawlEndpointResponse {
354 |   success: boolean;
355 |   results: CrawlResultItem[];
356 |   server_processing_time_s: number;
357 |   server_memory_delta_mb: number;
358 |   server_peak_memory_mb: number;
359 | }
360 | 
```

--------------------------------------------------------------------------------
/src/schemas/validation-schemas.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { z } from 'zod';
  2 | import { validateJavaScriptCode, createStatelessSchema } from './helpers.js';
  3 | 
  4 | export const JsCodeSchema = z
  5 |   .union([
  6 |     z.string().refine(validateJavaScriptCode, {
  7 |       message:
  8 |         'Invalid JavaScript: Contains HTML entities (&quot;), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
  9 |     }),
 10 |     z.array(
 11 |       z.string().refine(validateJavaScriptCode, {
 12 |         message:
 13 |           'Invalid JavaScript: Contains HTML entities (&quot;), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
 14 |       }),
 15 |     ),
 16 |   ])
 17 |   .describe('JavaScript code as string or array of strings');
 18 | 
 19 | export const VirtualScrollConfigSchema = z.object({
 20 |   container_selector: z.string(),
 21 |   scroll_count: z.number().optional(),
 22 |   scroll_by: z.union([z.string(), z.number()]).optional(),
 23 |   wait_after_scroll: z.number().optional(),
 24 | });
 25 | 
 26 | const GetMarkdownBaseSchema = z.object({
 27 |   url: z.string().url(),
 28 |   filter: z.enum(['raw', 'fit', 'bm25', 'llm']).optional().default('fit'),
 29 |   query: z.string().optional(),
 30 |   cache: z.string().optional().default('0'),
 31 | });
 32 | 
 33 | export const GetMarkdownSchema = createStatelessSchema(GetMarkdownBaseSchema, 'get_markdown').refine(
 34 |   (data) => {
 35 |     // If filter is bm25 or llm, query is required
 36 |     if ((data.filter === 'bm25' || data.filter === 'llm') && !data.query) {
 37 |       return false;
 38 |     }
 39 |     return true;
 40 |   },
 41 |   {
 42 |     message: 'Query parameter is required when using bm25 or llm filter',
 43 |     path: ['query'],
 44 |   },
 45 | );
 46 | 
 47 | export const ExecuteJsSchema = createStatelessSchema(
 48 |   z.object({
 49 |     url: z.string().url(),
 50 |     scripts: JsCodeSchema,
 51 |   }),
 52 |   'execute_js',
 53 | );
 54 | 
 55 | export const GetHtmlSchema = createStatelessSchema(
 56 |   z.object({
 57 |     url: z.string().url(),
 58 |   }),
 59 |   'get_html',
 60 | );
 61 | 
 62 | export const CaptureScreenshotSchema = createStatelessSchema(
 63 |   z.object({
 64 |     url: z.string().url(),
 65 |     screenshot_wait_for: z.number().optional(),
 66 |     save_to_directory: z.string().optional().describe('Local directory to save screenshot file'),
 67 |     // output_path not exposed as MCP needs base64 data
 68 |   }),
 69 |   'capture_screenshot',
 70 | );
 71 | 
 72 | export const GeneratePdfSchema = createStatelessSchema(
 73 |   z.object({
 74 |     url: z.string().url(),
 75 |     // Only url is supported - output_path not exposed as MCP needs base64 data
 76 |   }),
 77 |   'generate_pdf',
 78 | );
 79 | 
 80 | export const ExtractWithLlmSchema = createStatelessSchema(
 81 |   z.object({
 82 |     url: z.string().url(),
 83 |     query: z.string(),
 84 |   }),
 85 |   'extract_with_llm',
 86 | );
 87 | 
 88 | export const BatchCrawlSchema = createStatelessSchema(
 89 |   z.object({
 90 |     urls: z.array(z.string().url()),
 91 |     max_concurrent: z.number().optional(),
 92 |     remove_images: z.boolean().optional(),
 93 |     bypass_cache: z.boolean().optional(),
 94 |     // New: Support per-URL configs array (0.7.3/0.7.4)
 95 |     configs: z
 96 |       .array(
 97 |         z.object({
 98 |           url: z.string().url(),
 99 |           browser_config: z.record(z.unknown()).optional(),
100 |           crawler_config: z.record(z.unknown()).optional(),
101 |           extraction_strategy: z.record(z.unknown()).optional(),
102 |           table_extraction_strategy: z.record(z.unknown()).optional(),
103 |           markdown_generator_options: z.record(z.unknown()).optional(),
104 |           matcher: z.union([z.string(), z.function()]).optional(),
105 |         }),
106 |       )
107 |       .optional(),
108 |   }),
109 |   'batch_crawl',
110 | );
111 | 
112 | export const SmartCrawlSchema = createStatelessSchema(
113 |   z.object({
114 |     url: z.string().url(),
115 |     max_depth: z.number().optional(),
116 |     follow_links: z.boolean().optional(),
117 |     bypass_cache: z.boolean().optional(),
118 |   }),
119 |   'smart_crawl',
120 | );
121 | 
122 | export const ExtractLinksSchema = createStatelessSchema(
123 |   z.object({
124 |     url: z.string().url(),
125 |     categorize: z.boolean().optional().default(true),
126 |   }),
127 |   'extract_links',
128 | );
129 | 
130 | export const CrawlRecursiveSchema = createStatelessSchema(
131 |   z.object({
132 |     url: z.string().url(),
133 |     max_depth: z.number().optional(),
134 |     max_pages: z.number().optional(),
135 |     include_pattern: z.string().optional(),
136 |     exclude_pattern: z.string().optional(),
137 |   }),
138 |   'crawl_recursive',
139 | );
140 | 
141 | export const ParseSitemapSchema = createStatelessSchema(
142 |   z.object({
143 |     url: z.string().url(),
144 |     filter_pattern: z.string().optional(),
145 |   }),
146 |   'parse_sitemap',
147 | );
148 | 
149 | // Unified session management schema
150 | export const ManageSessionSchema = z.discriminatedUnion('action', [
151 |   z.object({
152 |     action: z.literal('create'),
153 |     session_id: z.string().optional(),
154 |     initial_url: z.string().url().optional(),
155 |     browser_type: z.enum(['chromium', 'firefox', 'webkit']).optional(),
156 |   }),
157 |   z.object({
158 |     action: z.literal('clear'),
159 |     session_id: z.string(),
160 |   }),
161 |   z.object({
162 |     action: z.literal('list'),
163 |   }),
164 | ]);
165 | 
166 | export const CrawlSchema = z
167 |   .object({
168 |     url: z.string().url(),
169 | 
170 |     // Browser configuration
171 |     browser_type: z.enum(['chromium', 'firefox', 'webkit']).optional(),
172 |     viewport_width: z.number().optional(),
173 |     viewport_height: z.number().optional(),
174 |     user_agent: z.string().optional(),
175 |     proxy_server: z.string().optional(),
176 |     proxy_username: z.string().optional(),
177 |     proxy_password: z.string().optional(),
178 |     cookies: z
179 |       .array(
180 |         z.object({
181 |           name: z.string(),
182 |           value: z.string(),
183 |           domain: z.string(),
184 |           path: z.string().optional(),
185 |         }),
186 |       )
187 |       .optional(),
188 |     headers: z.record(z.string()).optional(),
189 |     extra_args: z.array(z.string()).optional(),
190 | 
191 |     // Content filtering
192 |     word_count_threshold: z.number().optional(),
193 |     excluded_tags: z.array(z.string()).optional(),
194 |     excluded_selector: z.string().optional(),
195 |     remove_overlay_elements: z.boolean().optional(),
196 |     only_text: z.boolean().optional(),
197 |     remove_forms: z.boolean().optional(),
198 |     keep_data_attributes: z.boolean().optional(),
199 | 
200 |     // JavaScript execution
201 |     js_code: JsCodeSchema.optional(),
202 |     js_only: z.boolean().optional(),
203 |     wait_for: z.string().optional(),
204 |     wait_for_timeout: z.number().optional(),
205 | 
206 |     // Page navigation & timing
207 |     wait_until: z.enum(['domcontentloaded', 'networkidle', 'load']).optional(),
208 |     page_timeout: z.number().optional(),
209 |     wait_for_images: z.boolean().optional(),
210 |     ignore_body_visibility: z.boolean().optional(),
211 | 
212 |     // Dynamic content
213 |     delay_before_scroll: z.number().optional(),
214 |     scroll_delay: z.number().optional(),
215 |     scan_full_page: z.boolean().optional(),
216 |     virtual_scroll_config: VirtualScrollConfigSchema.optional(),
217 | 
218 |     // Content processing
219 |     process_iframes: z.boolean().optional(),
220 |     exclude_external_links: z.boolean().optional(),
221 | 
222 |     // Media handling
223 |     screenshot: z.boolean().optional(),
224 |     screenshot_wait_for: z.number().optional(),
225 |     screenshot_directory: z
226 |       .string()
227 |       .optional()
228 |       .describe('Local directory to save screenshot file when screenshot=true'),
229 |     pdf: z.boolean().optional(),
230 |     capture_mhtml: z.boolean().optional(),
231 |     image_description_min_word_threshold: z.number().optional(),
232 |     image_score_threshold: z.number().optional(),
233 |     exclude_external_images: z.boolean().optional(),
234 | 
235 |     // Link filtering
236 |     exclude_social_media_links: z.boolean().optional(),
237 |     exclude_domains: z.array(z.string()).optional(),
238 | 
239 |     // Page interaction
240 |     simulate_user: z.boolean().optional(),
241 |     override_navigator: z.boolean().optional(),
242 |     magic: z.boolean().optional(),
243 | 
244 |     // Session and cache
245 |     session_id: z.string().optional(),
246 |     cache_mode: z.enum(['ENABLED', 'BYPASS', 'DISABLED']).optional(),
247 | 
248 |     // Performance options
249 |     timeout: z.number().optional(),
250 |     verbose: z.boolean().optional(),
251 | 
252 |     // Debug
253 |     log_console: z.boolean().optional(),
254 | 
255 |     // New parameters from 0.7.3/0.7.4
256 |     delay_before_return_html: z.number().optional(),
257 |     css_selector: z.string().optional(),
258 |     include_links: z.boolean().optional(),
259 |     resolve_absolute_urls: z.boolean().optional(),
260 |   })
261 |   .refine(
262 |     (data) => {
263 |       // js_only is for subsequent calls in same session, not first call
264 |       // Using it incorrectly causes server errors
265 |       if (data.js_only && !data.session_id) {
266 |         return false;
267 |       }
268 |       return true;
269 |     },
270 |     {
271 |       message:
272 |         "Error: js_only requires session_id (it's for continuing existing sessions).\n" +
273 |         'For first call with js_code, use: {js_code: [...], screenshot: true}\n' +
274 |         'For multi-step: First {js_code: [...], session_id: "x"}, then {js_only: true, session_id: "x"}',
275 |     },
276 |   )
277 |   .refine(
278 |     (data) => {
279 |       // Empty js_code array is not allowed
280 |       if (Array.isArray(data.js_code) && data.js_code.length === 0) {
281 |         return false;
282 |       }
283 |       return true;
284 |     },
285 |     {
286 |       message:
287 |         'Error: js_code array cannot be empty. Either provide JavaScript code to execute or remove the js_code parameter entirely.',
288 |     },
289 |   );
290 | 
291 | // Re-export types we need
292 | export type { z };
293 | 
```

--------------------------------------------------------------------------------
/src/__tests__/schemas/validation-edge-cases.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | // import { jest } from '@jest/globals';
  2 | import { validateJavaScriptCode } from '../../schemas/helpers.js';
  3 | import { JsCodeSchema, CrawlSchema } from '../../schemas/validation-schemas.js';
  4 | 
  5 | describe('JavaScript Validation Edge Cases', () => {
  6 |   describe('validateJavaScriptCode', () => {
  7 |     describe('Valid JavaScript that might look suspicious', () => {
  8 |       it('should accept strings containing HTML-like syntax in string literals', () => {
  9 |         const validCases = [
 10 |           `const html = '<div class="test">Hello</div>';`,
 11 |           `const template = \`<button onclick="alert('test')">Click</button>\`;`,
 12 |           `const regex = /<div[^>]*>/g;`,
 13 |           `const arrow = () => { return '<span>Arrow</span>'; }`,
 14 |           `const className = 'container';`,
 15 |         ];
 16 | 
 17 |         validCases.forEach((code) => {
 18 |           expect(validateJavaScriptCode(code)).toBe(true);
 19 |         });
 20 |       });
 21 | 
 22 |       it('should accept legitimate escape sequences', () => {
 23 |         const validCases = [
 24 |           `const str = "Line 1\\nLine 2";`, // Real newline escape
 25 |           `const tab = "Col1\\tCol2";`,
 26 |           `const quote = "He said \\"Hello\\"";`,
 27 |           `const unicode = "\\u0048\\u0065\\u006C\\u006C\\u006F";`,
 28 |           `const template = \`Multi
 29 | line
 30 | string\`;`, // Real newlines in template literals
 31 |         ];
 32 | 
 33 |         validCases.forEach((code) => {
 34 |           expect(validateJavaScriptCode(code)).toBe(true);
 35 |         });
 36 |       });
 37 | 
 38 |       it('should accept complex but valid JavaScript patterns', () => {
 39 |         const validCases = [
 40 |           // Nested template literals
 41 |           `const nested = \`Outer \${inner ? \`Inner: \${value}\` : 'None'}\`;`,
 42 |           // Regular expressions that might look like HTML
 43 |           `const htmlTag = /<([a-z]+)([^>]*)>/gi;`,
 44 |           // JSON strings without HTML entities
 45 |           `const json = '{"name": "Test", "value": "Some data"}';`,
 46 |           // Function with HTML in comments
 47 |           `function render() {
 48 |             // This creates div content
 49 |             return document.createElement('div');
 50 |           }`,
 51 |           // Complex string concatenation
 52 |           `const result = '<div' + ' class="' + className + '">' + content + '</div>';`,
 53 |         ];
 54 | 
 55 |         validCases.forEach((code) => {
 56 |           expect(validateJavaScriptCode(code)).toBe(true);
 57 |         });
 58 |       });
 59 | 
 60 |       it('should accept Unicode and special characters', () => {
 61 |         const validCases = [
 62 |           `const emoji = "Hello 👋 World 🌍";`,
 63 |           `const chinese = "你好世界";`,
 64 |           `const arabic = "مرحبا بالعالم";`,
 65 |           `const special = "©2024 Company™";`,
 66 |           `const math = "∑(n=1 to ∞) = π²/6";`,
 67 |         ];
 68 | 
 69 |         validCases.forEach((code) => {
 70 |           expect(validateJavaScriptCode(code)).toBe(true);
 71 |         });
 72 |       });
 73 |     });
 74 | 
 75 |     describe('Invalid JavaScript that should be rejected', () => {
 76 |       it('should reject HTML entities outside string literals', () => {
 77 |         const invalidCases = [
 78 |           `const value = &quot;test&quot;;`, // HTML entities as code
 79 |           `const text = &amp;&amp; true;`,
 80 |           `if (a &lt; b) { }`,
 81 |           `const escaped = &nbsp;`,
 82 |           `return &apos;hello&apos;;`,
 83 |         ];
 84 | 
 85 |         invalidCases.forEach((code) => {
 86 |           expect(validateJavaScriptCode(code)).toBe(false);
 87 |         });
 88 |       });
 89 | 
 90 |       it('should reject literal backslash-n outside strings', () => {
 91 |         const invalidCases = [
 92 |           `const text = "Hello";\\nconst world = "World";`, // Literal \n between statements
 93 |           `console.log("test");\\nconsole.log("more");`,
 94 |           `return value;\\nreturn other;`,
 95 |         ];
 96 | 
 97 |         invalidCases.forEach((code) => {
 98 |           expect(validateJavaScriptCode(code)).toBe(false);
 99 |         });
100 |       });
101 | 
102 |       it('should reject HTML tags outside string literals', () => {
103 |         const invalidCases = [
104 |           `<script>alert('test')</script>`,
105 |           `<!DOCTYPE html>`,
106 |           `<html><body>test</body></html>`,
107 |           `<style>body { color: red; }</style>`,
108 |         ];
109 | 
110 |         invalidCases.forEach((code) => {
111 |           expect(validateJavaScriptCode(code)).toBe(false);
112 |         });
113 |       });
114 |     });
115 | 
116 |     describe('Edge cases and boundaries', () => {
117 |       it('should handle empty and whitespace-only input', () => {
118 |         expect(validateJavaScriptCode('')).toBe(true);
119 |         expect(validateJavaScriptCode('   ')).toBe(true);
120 |         expect(validateJavaScriptCode('\n\n\n')).toBe(true);
121 |         expect(validateJavaScriptCode('\t')).toBe(true);
122 |       });
123 | 
124 |       it('should handle very long valid strings', () => {
125 |         const longString = 'const x = "' + 'a'.repeat(10000) + '";';
126 |         expect(validateJavaScriptCode(longString)).toBe(true);
127 |       });
128 | 
129 |       it('should handle nested quotes correctly', () => {
130 |         const validCases = [
131 |           `const x = "She said \\"Hello\\" to me";`,
132 |           `const y = 'It\\'s a nice day';`,
133 |           `const z = \`Template with "quotes" and 'apostrophes'\`;`,
134 |         ];
135 | 
136 |         validCases.forEach((code) => {
137 |           expect(validateJavaScriptCode(code)).toBe(true);
138 |         });
139 |       });
140 | 
141 |       it('should handle multiline strings correctly', () => {
142 |         const multiline = `
143 | const longText = \`
144 |   This is a multiline
145 |   template literal with
146 |   multiple lines
147 | \`;`;
148 |         expect(validateJavaScriptCode(multiline)).toBe(true);
149 |       });
150 |     });
151 |   });
152 | 
153 |   describe('Schema Validation Edge Cases', () => {
154 |     describe('JsCodeSchema', () => {
155 |       it('should accept both string and array of strings', () => {
156 |         expect(() => JsCodeSchema.parse('return 1;')).not.toThrow();
157 |         expect(() => JsCodeSchema.parse(['return 1;', 'return 2;'])).not.toThrow();
158 |       });
159 | 
160 |       it('should reject invalid JavaScript in arrays', () => {
161 |         expect(() => JsCodeSchema.parse(['valid();', '&quot;invalid&quot;'])).toThrow();
162 |       });
163 | 
164 |       it('should handle empty arrays', () => {
165 |         expect(() => JsCodeSchema.parse([])).not.toThrow();
166 |       });
167 |     });
168 | 
169 |     describe('CrawlSchema edge cases', () => {
170 |       it('should handle all optional parameters', () => {
171 |         const minimal = { url: 'https://example.com' };
172 |         expect(() => CrawlSchema.parse(minimal)).not.toThrow();
173 |       });
174 | 
175 |       it('should validate js_only requires session_id', () => {
176 |         const invalid = {
177 |           url: 'https://example.com',
178 |           js_only: true,
179 |           // Missing session_id
180 |         };
181 |         expect(() => CrawlSchema.parse(invalid)).toThrow();
182 |       });
183 | 
184 |       it('should reject empty js_code array', () => {
185 |         const invalid = {
186 |           url: 'https://example.com',
187 |           js_code: [],
188 |         };
189 |         expect(() => CrawlSchema.parse(invalid)).toThrow();
190 |       });
191 | 
192 |       it('should accept all valid cache modes', () => {
193 |         const validModes = ['ENABLED', 'BYPASS', 'DISABLED'];
194 |         validModes.forEach((mode) => {
195 |           const config = { url: 'https://example.com', cache_mode: mode };
196 |           expect(() => CrawlSchema.parse(config)).not.toThrow();
197 |         });
198 |       });
199 | 
200 |       it('should validate viewport dimensions', () => {
201 |         const validViewport = {
202 |           url: 'https://example.com',
203 |           viewport_width: 1920,
204 |           viewport_height: 1080,
205 |         };
206 |         expect(() => CrawlSchema.parse(validViewport)).not.toThrow();
207 |       });
208 | 
209 |       it('should validate complex configurations', () => {
210 |         const complex = {
211 |           url: 'https://example.com',
212 |           browser_type: 'chromium',
213 |           viewport_width: 1280,
214 |           viewport_height: 720,
215 |           user_agent: 'Custom User Agent',
216 |           headers: { 'X-Custom': 'value' },
217 |           cookies: [{ name: 'session', value: '123', domain: '.example.com' }],
218 |           js_code: ['document.querySelector("button").click()'],
219 |           wait_for: '#loaded',
220 |           screenshot: true,
221 |           pdf: true,
222 |           session_id: 'test-session',
223 |           cache_mode: 'BYPASS',
224 |         };
225 |         expect(() => CrawlSchema.parse(complex)).not.toThrow();
226 |       });
227 |     });
228 |   });
229 | 
230 |   describe('Property-based testing for regex patterns', () => {
231 |     // Generate random valid JavaScript-like strings
232 |     const generateValidJS = () => {
233 |       const templates = [
234 |         () => `const x = ${Math.random()};`,
235 |         () => `function test() { return "${Math.random()}"; }`,
236 |         () => `if (${Math.random() > 0.5}) { console.log("test"); }`,
237 |         () => `const arr = [${Math.random()}, ${Math.random()}];`,
238 |         () => `// Comment with ${Math.random()}`,
239 |       ];
240 |       return templates[Math.floor(Math.random() * templates.length)]();
241 |     };
242 | 
243 |     it('should consistently validate generated valid JavaScript', () => {
244 |       for (let i = 0; i < 100; i++) {
245 |         const code = generateValidJS();
246 |         expect(validateJavaScriptCode(code)).toBe(true);
247 |       }
248 |     });
249 | 
250 |     // Test boundary conditions with special characters
251 |     const specialChars = ['<', '>', '&', '"', "'", '\\', '\n', '\r', '\t'];
252 | 
253 |     it('should handle special characters in string contexts correctly', () => {
254 |       specialChars.forEach((char) => {
255 |         const inString = `const x = "${char}";`;
256 |         const inTemplate = `const y = \`${char}\`;`;
257 | 
258 |         // These should be valid (special chars inside strings)
259 |         expect(validateJavaScriptCode(inString)).toBe(true);
260 |         expect(validateJavaScriptCode(inTemplate)).toBe(true);
261 |       });
262 |     });
263 |   });
264 | });
265 | 
```

--------------------------------------------------------------------------------
/src/handlers/utility-handlers.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { BaseHandler } from './base-handler.js';
  2 | import { JSExecuteEndpointOptions, JSExecuteEndpointResponse, CrawlResultItem } from '../types.js';
  3 | 
  4 | export class UtilityHandlers extends BaseHandler {
  5 |   async executeJS(options: JSExecuteEndpointOptions) {
  6 |     try {
  7 |       // Check if scripts is provided
  8 |       if (!options.scripts || options.scripts === null) {
  9 |         throw new Error(
 10 |           'scripts is required. Please provide JavaScript code to execute. Use "return" statements to get values back.',
 11 |         );
 12 |       }
 13 | 
 14 |       const result: JSExecuteEndpointResponse = await this.service.executeJS(options);
 15 | 
 16 |       // Extract JavaScript execution results
 17 |       const jsResults = result.js_execution_result?.results || [];
 18 |       // Ensure scripts is always an array for mapping
 19 |       const scripts = Array.isArray(options.scripts) ? options.scripts : [options.scripts];
 20 | 
 21 |       // Format results for display
 22 |       let formattedResults = '';
 23 |       if (jsResults.length > 0) {
 24 |         formattedResults = jsResults
 25 |           .map((res: unknown, idx: number) => {
 26 |             const script = scripts[idx] || 'Script ' + (idx + 1);
 27 |             // Handle the actual return value or success/error status
 28 |             let resultStr = '';
 29 |             if (res && typeof res === 'object' && 'success' in res) {
 30 |               // This is a status object (e.g., from null return or execution without return)
 31 |               const statusObj = res as { success: unknown; error?: unknown };
 32 |               resultStr = statusObj.success
 33 |                 ? 'Executed successfully (no return value)'
 34 |                 : `Error: ${statusObj.error || 'Unknown error'}`;
 35 |             } else {
 36 |               // This is an actual return value
 37 |               resultStr = JSON.stringify(res, null, 2);
 38 |             }
 39 |             return `Script: ${script}\nReturned: ${resultStr}`;
 40 |           })
 41 |           .join('\n\n');
 42 |       } else {
 43 |         formattedResults = 'No results returned';
 44 |       }
 45 | 
 46 |       // Handle markdown content - can be string or object
 47 |       let markdownContent = '';
 48 |       if (result.markdown) {
 49 |         if (typeof result.markdown === 'string') {
 50 |           markdownContent = result.markdown;
 51 |         } else if (typeof result.markdown === 'object' && result.markdown.raw_markdown) {
 52 |           // Use raw_markdown from the object structure
 53 |           markdownContent = result.markdown.raw_markdown;
 54 |         }
 55 |       }
 56 | 
 57 |       return {
 58 |         content: [
 59 |           {
 60 |             type: 'text',
 61 |             text: `JavaScript executed on: ${options.url}\n\nResults:\n${formattedResults}${markdownContent ? `\n\nPage Content After Execution:\n${markdownContent}` : ''}`,
 62 |           },
 63 |         ],
 64 |       };
 65 |     } catch (error) {
 66 |       throw this.formatError(error, 'execute JavaScript');
 67 |     }
 68 |   }
 69 | 
 70 |   async extractLinks(options: { url: string; categorize?: boolean }) {
 71 |     try {
 72 |       // Use crawl endpoint instead of md to get full link data
 73 |       const response = await this.axiosClient.post('/crawl', {
 74 |         urls: [options.url],
 75 |         crawler_config: {
 76 |           cache_mode: 'bypass',
 77 |         },
 78 |       });
 79 | 
 80 |       const results = response.data.results || [response.data];
 81 |       const result: CrawlResultItem = results[0] || {};
 82 | 
 83 |       // Variables for manually extracted links
 84 |       let manuallyExtractedInternal: string[] = [];
 85 |       let manuallyExtractedExternal: string[] = [];
 86 |       let hasManuallyExtractedLinks = false;
 87 | 
 88 |       // Check if the response is likely JSON or non-HTML content
 89 |       if (!result.links || (result.links.internal.length === 0 && result.links.external.length === 0)) {
 90 |         // Try to detect if this might be a JSON endpoint
 91 |         const markdownContent = result.markdown?.raw_markdown || result.markdown?.fit_markdown || '';
 92 |         const htmlContent = result.html || '';
 93 | 
 94 |         // Check for JSON indicators
 95 |         if (
 96 |           // Check URL pattern
 97 |           options.url.includes('/api/') ||
 98 |           options.url.includes('/api.') ||
 99 |           // Check content type (often shown in markdown conversion)
100 |           markdownContent.includes('application/json') ||
101 |           // Check for JSON structure patterns
102 |           (markdownContent.startsWith('{') && markdownContent.endsWith('}')) ||
103 |           (markdownContent.startsWith('[') && markdownContent.endsWith(']')) ||
104 |           // Check HTML for JSON indicators
105 |           htmlContent.includes('application/json') ||
106 |           // Common JSON patterns
107 |           markdownContent.includes('"links"') ||
108 |           markdownContent.includes('"url"') ||
109 |           markdownContent.includes('"data"')
110 |         ) {
111 |           return {
112 |             content: [
113 |               {
114 |                 type: 'text',
115 |                 text: `Note: ${options.url} appears to return JSON data rather than HTML. The extract_links tool is designed for HTML pages with <a> tags. To extract URLs from JSON, you would need to parse the JSON structure directly.`,
116 |               },
117 |             ],
118 |           };
119 |         }
120 |         // If no links found but it's HTML, let's check the markdown content for href patterns
121 |         if (markdownContent && markdownContent.includes('href=')) {
122 |           // Extract links manually from markdown if server didn't provide them
123 |           const hrefPattern = /href=["']([^"']+)["']/g;
124 |           const foundLinks: string[] = [];
125 |           let match;
126 |           while ((match = hrefPattern.exec(markdownContent)) !== null) {
127 |             foundLinks.push(match[1]);
128 |           }
129 |           if (foundLinks.length > 0) {
130 |             hasManuallyExtractedLinks = true;
131 |             // Categorize found links
132 |             const currentDomain = new URL(options.url).hostname;
133 | 
134 |             foundLinks.forEach((link) => {
135 |               try {
136 |                 const linkUrl = new URL(link, options.url);
137 |                 if (linkUrl.hostname === currentDomain) {
138 |                   manuallyExtractedInternal.push(linkUrl.href);
139 |                 } else {
140 |                   manuallyExtractedExternal.push(linkUrl.href);
141 |                 }
142 |               } catch {
143 |                 // Relative link
144 |                 manuallyExtractedInternal.push(link);
145 |               }
146 |             });
147 |           }
148 |         }
149 |       }
150 | 
151 |       // Handle both cases: API-provided links and manually extracted links
152 |       let internalUrls: string[] = [];
153 |       let externalUrls: string[] = [];
154 | 
155 |       if (result.links && (result.links.internal.length > 0 || result.links.external.length > 0)) {
156 |         // Use API-provided links
157 |         internalUrls = result.links.internal.map((link) => (typeof link === 'string' ? link : link.href));
158 |         externalUrls = result.links.external.map((link) => (typeof link === 'string' ? link : link.href));
159 |       } else if (hasManuallyExtractedLinks) {
160 |         // Use manually extracted links
161 |         internalUrls = manuallyExtractedInternal;
162 |         externalUrls = manuallyExtractedExternal;
163 |       }
164 | 
165 |       const allUrls = [...internalUrls, ...externalUrls];
166 | 
167 |       if (!options.categorize) {
168 |         return {
169 |           content: [
170 |             {
171 |               type: 'text',
172 |               text: `All links from ${options.url}:\n${allUrls.join('\n')}`,
173 |             },
174 |           ],
175 |         };
176 |       }
177 | 
178 |       // Categorize links
179 |       const categorized: Record<string, string[]> = {
180 |         internal: [],
181 |         external: [],
182 |         social: [],
183 |         documents: [],
184 |         images: [],
185 |         scripts: [],
186 |       };
187 | 
188 |       // Further categorize links
189 |       const socialDomains = ['facebook.com', 'twitter.com', 'linkedin.com', 'instagram.com', 'youtube.com'];
190 |       const docExtensions = ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'];
191 |       const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp'];
192 |       const scriptExtensions = ['.js', '.css'];
193 | 
194 |       // Categorize internal URLs
195 |       internalUrls.forEach((href: string) => {
196 |         if (docExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
197 |           categorized.documents.push(href);
198 |         } else if (imageExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
199 |           categorized.images.push(href);
200 |         } else if (scriptExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
201 |           categorized.scripts.push(href);
202 |         } else {
203 |           categorized.internal.push(href);
204 |         }
205 |       });
206 | 
207 |       // Categorize external URLs
208 |       externalUrls.forEach((href: string) => {
209 |         if (socialDomains.some((domain) => href.includes(domain))) {
210 |           categorized.social.push(href);
211 |         } else if (docExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
212 |           categorized.documents.push(href);
213 |         } else if (imageExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
214 |           categorized.images.push(href);
215 |         } else if (scriptExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
216 |           categorized.scripts.push(href);
217 |         } else {
218 |           categorized.external.push(href);
219 |         }
220 |       });
221 | 
222 |       // Return based on categorize option (defaults to true)
223 |       if (options.categorize) {
224 |         return {
225 |           content: [
226 |             {
227 |               type: 'text',
228 |               text: `Link analysis for ${options.url}:\n\n${Object.entries(categorized)
229 |                 .map(
230 |                   ([category, links]: [string, string[]]) =>
231 |                     `${category} (${links.length}):\n${links.slice(0, 10).join('\n')}${links.length > 10 ? '\n...' : ''}`,
232 |                 )
233 |                 .join('\n\n')}`,
234 |             },
235 |           ],
236 |         };
237 |       } else {
238 |         // Return simple list without categorization
239 |         const allLinks = [...internalUrls, ...externalUrls];
240 |         return {
241 |           content: [
242 |             {
243 |               type: 'text',
244 |               text: `All links from ${options.url} (${allLinks.length} total):\n\n${allLinks.slice(0, 50).join('\n')}${allLinks.length > 50 ? '\n...' : ''}`,
245 |             },
246 |           ],
247 |         };
248 |       }
249 |     } catch (error) {
250 |       throw this.formatError(error, 'extract links');
251 |     }
252 |   }
253 | }
254 | 
```

--------------------------------------------------------------------------------
/src/crawl4ai-service.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import axios, { AxiosInstance, AxiosError } from 'axios';
  2 | import {
  3 |   BatchCrawlOptions,
  4 |   AdvancedCrawlConfig,
  5 |   CrawlEndpointOptions,
  6 |   CrawlEndpointResponse,
  7 |   JSExecuteEndpointOptions,
  8 |   JSExecuteEndpointResponse,
  9 |   ScreenshotEndpointOptions,
 10 |   ScreenshotEndpointResponse,
 11 |   PDFEndpointOptions,
 12 |   PDFEndpointResponse,
 13 |   HTMLEndpointOptions,
 14 |   HTMLEndpointResponse,
 15 |   MarkdownEndpointOptions,
 16 |   MarkdownEndpointResponse,
 17 |   LLMEndpointOptions,
 18 |   LLMEndpointResponse,
 19 | } from './types.js';
 20 | 
 21 | // Helper to validate JavaScript code
 22 | const validateJavaScriptCode = (code: string): boolean => {
 23 |   // Check for common HTML entities that shouldn't be in JS
 24 |   if (/&quot;|&amp;|&lt;|&gt;|&#\d+;|&\w+;/.test(code)) {
 25 |     return false;
 26 |   }
 27 | 
 28 |   // Basic check to ensure it's not HTML
 29 |   if (/<(!DOCTYPE|html|body|head|script|style)\b/i.test(code)) {
 30 |     return false;
 31 |   }
 32 | 
 33 |   // Check for literal \n, \t, \r outside of strings (common LLM mistake)
 34 |   // Look for patterns like: ;\n or }\n or )\n which suggest literal newlines
 35 |   if (/[;})]\s*\\n|\\n\s*[{(/]/.test(code)) {
 36 |     return false;
 37 |   }
 38 | 
 39 |   // Check for obvious cases of literal \n between statements
 40 |   if (/[;})]\s*\\n\s*\w/.test(code)) {
 41 |     return false;
 42 |   }
 43 | 
 44 |   return true;
 45 | };
 46 | 
 47 | // Helper to validate URL format
 48 | const validateURL = (url: string): boolean => {
 49 |   try {
 50 |     new URL(url);
 51 |     return true;
 52 |   } catch {
 53 |     return false;
 54 |   }
 55 | };
 56 | 
 57 | // Helper to handle axios errors consistently
 58 | const handleAxiosError = (error: unknown): never => {
 59 |   if (axios.isAxiosError(error)) {
 60 |     const axiosError = error as AxiosError;
 61 | 
 62 |     // Handle timeout errors
 63 |     if (axiosError.code === 'ECONNABORTED') {
 64 |       throw new Error('Request timed out');
 65 |     }
 66 | 
 67 |     if (axiosError.code === 'ETIMEDOUT') {
 68 |       throw new Error('Request timeout');
 69 |     }
 70 | 
 71 |     // Handle network errors
 72 |     if (axiosError.code === 'ENOTFOUND') {
 73 |       throw new Error(`DNS resolution failed: ${axiosError.message}`);
 74 |     }
 75 | 
 76 |     if (axiosError.code === 'ECONNREFUSED') {
 77 |       throw new Error(`Connection refused: ${axiosError.message}`);
 78 |     }
 79 | 
 80 |     if (axiosError.code === 'ECONNRESET') {
 81 |       throw new Error(`Connection reset: ${axiosError.message}`);
 82 |     }
 83 | 
 84 |     if (axiosError.code === 'ENETUNREACH') {
 85 |       throw new Error(`Network unreachable: ${axiosError.message}`);
 86 |     }
 87 | 
 88 |     // Handle HTTP errors
 89 |     if (axiosError.response) {
 90 |       const status = axiosError.response.status;
 91 |       const data = axiosError.response.data as any; // eslint-disable-line @typescript-eslint/no-explicit-any
 92 |       const message = data?.error || data?.detail || data?.message || axiosError.message;
 93 |       throw new Error(`Request failed with status ${status}: ${message}`);
 94 |     }
 95 | 
 96 |     // Handle request errors (e.g., invalid URL)
 97 |     if (axiosError.request) {
 98 |       throw new Error(`Request failed: ${axiosError.message}`);
 99 |     }
100 |   }
101 | 
102 |   // Re-throw unknown errors
103 |   throw error;
104 | };
105 | 
106 | export class Crawl4AIService {
107 |   private axiosClient: AxiosInstance;
108 | 
109 |   constructor(baseURL: string, apiKey: string) {
110 |     this.axiosClient = axios.create({
111 |       baseURL,
112 |       headers: {
113 |         'X-API-Key': apiKey,
114 |         'Content-Type': 'application/json',
115 |       },
116 |       timeout: 120000,
117 |     });
118 |   }
119 | 
120 |   async getMarkdown(options: MarkdownEndpointOptions): Promise<MarkdownEndpointResponse> {
121 |     // Validate URL
122 |     if (!validateURL(options.url)) {
123 |       throw new Error('Invalid URL format');
124 |     }
125 | 
126 |     try {
127 |       const response = await this.axiosClient.post('/md', {
128 |         url: options.url,
129 |         f: options.f,
130 |         q: options.q,
131 |         c: options.c,
132 |       });
133 | 
134 |       return response.data;
135 |     } catch (error) {
136 |       return handleAxiosError(error);
137 |     }
138 |   }
139 | 
140 |   async captureScreenshot(options: ScreenshotEndpointOptions): Promise<ScreenshotEndpointResponse> {
141 |     // Validate URL
142 |     if (!validateURL(options.url)) {
143 |       throw new Error('Invalid URL format');
144 |     }
145 | 
146 |     try {
147 |       const response = await this.axiosClient.post('/screenshot', {
148 |         url: options.url,
149 |         screenshot_wait_for: options.screenshot_wait_for,
150 |         // output_path is omitted to get base64 response
151 |       });
152 | 
153 |       return response.data;
154 |     } catch (error) {
155 |       return handleAxiosError(error);
156 |     }
157 |   }
158 | 
159 |   async generatePDF(options: PDFEndpointOptions): Promise<PDFEndpointResponse> {
160 |     // Validate URL
161 |     if (!validateURL(options.url)) {
162 |       throw new Error('Invalid URL format');
163 |     }
164 | 
165 |     try {
166 |       const response = await this.axiosClient.post('/pdf', {
167 |         url: options.url,
168 |         // output_path is omitted to get base64 response
169 |       });
170 | 
171 |       return response.data;
172 |     } catch (error) {
173 |       return handleAxiosError(error);
174 |     }
175 |   }
176 | 
177 |   async executeJS(options: JSExecuteEndpointOptions): Promise<JSExecuteEndpointResponse> {
178 |     // Validate URL
179 |     if (!validateURL(options.url)) {
180 |       throw new Error('Invalid URL format');
181 |     }
182 | 
183 |     // Ensure scripts is always an array
184 |     const scripts = Array.isArray(options.scripts) ? options.scripts : [options.scripts];
185 | 
186 |     // Validate each script
187 |     for (const script of scripts) {
188 |       if (!validateJavaScriptCode(script)) {
189 |         throw new Error(
190 |           'Invalid JavaScript: Contains HTML entities (&quot;), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
191 |         );
192 |       }
193 |     }
194 | 
195 |     try {
196 |       const response = await this.axiosClient.post('/execute_js', {
197 |         url: options.url,
198 |         scripts: scripts, // Always send as array
199 |         // Only url and scripts are supported by the endpoint
200 |       });
201 | 
202 |       return response.data;
203 |     } catch (error) {
204 |       return handleAxiosError(error);
205 |     }
206 |   }
207 | 
208 |   async batchCrawl(options: BatchCrawlOptions) {
209 |     // Validate URLs
210 |     if (!options.urls || options.urls.length === 0) {
211 |       throw new Error('URLs array cannot be empty');
212 |     }
213 | 
214 |     // Build crawler config if needed
215 |     const crawler_config: Record<string, unknown> = {};
216 | 
217 |     // Handle remove_images by using exclude_tags
218 |     if (options.remove_images) {
219 |       crawler_config.exclude_tags = ['img', 'picture', 'svg'];
220 |     }
221 | 
222 |     if (options.bypass_cache) {
223 |       crawler_config.cache_mode = 'BYPASS';
224 |     }
225 | 
226 |     try {
227 |       const response = await this.axiosClient.post('/crawl', {
228 |         urls: options.urls,
229 |         max_concurrent: options.max_concurrent,
230 |         crawler_config: Object.keys(crawler_config).length > 0 ? crawler_config : undefined,
231 |       });
232 | 
233 |       return response.data;
234 |     } catch (error) {
235 |       return handleAxiosError(error);
236 |     }
237 |   }
238 | 
239 |   async getHTML(options: HTMLEndpointOptions): Promise<HTMLEndpointResponse> {
240 |     // Validate URL
241 |     if (!validateURL(options.url)) {
242 |       throw new Error('Invalid URL format');
243 |     }
244 | 
245 |     try {
246 |       const response = await this.axiosClient.post('/html', {
247 |         url: options.url,
248 |         // Only url is supported by the endpoint
249 |       });
250 | 
251 |       return response.data;
252 |     } catch (error) {
253 |       return handleAxiosError(error);
254 |     }
255 |   }
256 | 
257 |   async parseSitemap(url: string) {
258 |     try {
259 |       // Use axios directly without baseURL for fetching external URLs
260 |       const response = await axios.get(url);
261 |       return response.data;
262 |     } catch (error) {
263 |       return handleAxiosError(error);
264 |     }
265 |   }
266 | 
267 |   async detectContentType(url: string): Promise<string> {
268 |     try {
269 |       // Use axios directly without baseURL for external URLs
270 |       const response = await axios.head(url);
271 |       return response.headers['content-type'] || '';
272 |     } catch {
273 |       return '';
274 |     }
275 |   }
276 | 
277 |   async crawl(options: AdvancedCrawlConfig): Promise<CrawlEndpointResponse> {
278 |     // Validate JS code if present
279 |     if (options.crawler_config?.js_code) {
280 |       const scripts = Array.isArray(options.crawler_config.js_code)
281 |         ? options.crawler_config.js_code
282 |         : [options.crawler_config.js_code];
283 | 
284 |       for (const script of scripts) {
285 |         if (!validateJavaScriptCode(script)) {
286 |           throw new Error(
287 |             'Invalid JavaScript: Contains HTML entities (&quot;), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
288 |           );
289 |         }
290 |       }
291 |     }
292 | 
293 |     // Server only accepts urls array, not url string
294 |     const urls = options.url ? [options.url] : options.urls || [];
295 | 
296 |     const requestBody: CrawlEndpointOptions & {
297 |       extraction_strategy?: unknown;
298 |       table_extraction_strategy?: unknown;
299 |       markdown_generator_options?: unknown;
300 |     } = {
301 |       urls,
302 |       browser_config: options.browser_config,
303 |       crawler_config: options.crawler_config || {}, // Always include crawler_config, even if empty
304 |     };
305 | 
306 |     // Add extraction strategy passthrough fields if present
307 |     if (options.extraction_strategy) {
308 |       requestBody.extraction_strategy = options.extraction_strategy;
309 |     }
310 |     if (options.table_extraction_strategy) {
311 |       requestBody.table_extraction_strategy = options.table_extraction_strategy;
312 |     }
313 |     if (options.markdown_generator_options) {
314 |       requestBody.markdown_generator_options = options.markdown_generator_options;
315 |     }
316 | 
317 |     try {
318 |       const response = await this.axiosClient.post('/crawl', requestBody);
319 |       return response.data;
320 |     } catch (error) {
321 |       return handleAxiosError(error);
322 |     }
323 |   }
324 | 
325 |   async extractWithLLM(options: LLMEndpointOptions): Promise<LLMEndpointResponse> {
326 |     // Validate URL
327 |     if (!validateURL(options.url)) {
328 |       throw new Error('Invalid URL format');
329 |     }
330 | 
331 |     try {
332 |       const encodedUrl = encodeURIComponent(options.url);
333 |       const encodedQuery = encodeURIComponent(options.query);
334 |       const response = await this.axiosClient.get(`/llm/${encodedUrl}?q=${encodedQuery}`);
335 |       return response.data;
336 |     } catch (error) {
337 |       // Special handling for LLM-specific errors
338 |       if (axios.isAxiosError(error)) {
339 |         const axiosError = error as AxiosError;
340 |         if (axiosError.code === 'ECONNABORTED' || axiosError.response?.status === 504) {
341 |           throw new Error('LLM extraction timed out. Try a simpler query or different URL.');
342 |         }
343 |         if (axiosError.response?.status === 401) {
344 |           throw new Error(
345 |             'LLM extraction failed: No LLM provider configured on server. Please ensure the server has an API key set.',
346 |           );
347 |         }
348 |       }
349 |       return handleAxiosError(error);
350 |     }
351 |   }
352 | }
353 | 
```

--------------------------------------------------------------------------------
/src/__tests__/request-handler.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { jest } from '@jest/globals';
  2 | 
  3 | // Mock all dependencies before imports
  4 | const mockGetMarkdown = jest.fn();
  5 | const mockCaptureScreenshot = jest.fn();
  6 | const mockGeneratePDF = jest.fn();
  7 | const mockExecuteJS = jest.fn();
  8 | const mockGetHTML = jest.fn();
  9 | const mockBatchCrawl = jest.fn();
 10 | const mockExtractWithLLM = jest.fn();
 11 | const mockCrawl = jest.fn();
 12 | const mockParseSitemap = jest.fn();
 13 | 
 14 | jest.unstable_mockModule('../crawl4ai-service.js', () => ({
 15 |   Crawl4AIService: jest.fn().mockImplementation(() => ({
 16 |     getMarkdown: mockGetMarkdown,
 17 |     captureScreenshot: mockCaptureScreenshot,
 18 |     generatePDF: mockGeneratePDF,
 19 |     executeJS: mockExecuteJS,
 20 |     getHTML: mockGetHTML,
 21 |     batchCrawl: mockBatchCrawl,
 22 |     extractWithLLM: mockExtractWithLLM,
 23 |     crawl: mockCrawl,
 24 |     parseSitemap: mockParseSitemap,
 25 |   })),
 26 | }));
 27 | 
 28 | // Mock axios
 29 | const mockPost = jest.fn();
 30 | const mockAxiosCreate = jest.fn(() => ({
 31 |   post: mockPost,
 32 | }));
 33 | 
 34 | jest.unstable_mockModule('axios', () => ({
 35 |   default: {
 36 |     create: mockAxiosCreate,
 37 |   },
 38 | }));
 39 | 
 40 | // Mock MCP SDK
 41 | const mockSetRequestHandler = jest.fn();
 42 | const mockTool = jest.fn();
 43 | const mockConnect = jest.fn();
 44 | 
 45 | jest.unstable_mockModule('@modelcontextprotocol/sdk/server/index.js', () => ({
 46 |   Server: jest.fn().mockImplementation(() => ({
 47 |     setRequestHandler: mockSetRequestHandler,
 48 |     tool: mockTool,
 49 |     connect: mockConnect,
 50 |   })),
 51 | }));
 52 | 
 53 | // Mock the types module that exports the schemas
 54 | const CallToolRequestSchema = { method: 'tools/call' };
 55 | const ListToolsRequestSchema = { method: 'tools/list' };
 56 | 
 57 | jest.unstable_mockModule('@modelcontextprotocol/sdk/types.js', () => ({
 58 |   CallToolRequestSchema,
 59 |   ListToolsRequestSchema,
 60 | }));
 61 | 
 62 | jest.unstable_mockModule('@modelcontextprotocol/sdk/server/stdio.js', () => ({
 63 |   StdioServerTransport: jest.fn(),
 64 | }));
 65 | 
 66 | // Now import the server after mocks are set up
 67 | const { Crawl4AIServer } = await import('../server.js');
 68 | 
 69 | // Removed unused type definitions - using 'any' for test mocks
 70 | 
 71 | describe('MCP Request Handler Direct Testing', () => {
 72 |   let server: any; // eslint-disable-line @typescript-eslint/no-explicit-any
 73 |   let requestHandler: any; // eslint-disable-line @typescript-eslint/no-explicit-any
 74 | 
 75 |   beforeEach(async () => {
 76 |     jest.clearAllMocks();
 77 | 
 78 |     // Set up mock responses
 79 |     mockGetMarkdown.mockResolvedValue({ success: true, content: 'markdown content' });
 80 |     mockCaptureScreenshot.mockResolvedValue({ success: true, screenshot: 'base64image' });
 81 |     mockGeneratePDF.mockResolvedValue({ success: true, pdf: 'base64pdf' });
 82 |     mockExecuteJS.mockResolvedValue({ js_execution_result: { results: [42] } });
 83 |     mockGetHTML.mockResolvedValue({ success: true, html: '<html></html>' });
 84 |     mockExtractWithLLM.mockResolvedValue({ answer: 'extracted answer' });
 85 |     mockCrawl.mockResolvedValue({
 86 |       success: true,
 87 |       results: [
 88 |         {
 89 |           url: 'https://example.com',
 90 |           markdown: { raw_markdown: 'content' },
 91 |           success: true,
 92 |           status_code: 200,
 93 |         },
 94 |       ],
 95 |     });
 96 |     mockParseSitemap.mockResolvedValue(['https://example.com/page1']);
 97 |     mockPost.mockResolvedValue({
 98 |       data: {
 99 |         results: [
100 |           {
101 |             links: { internal: [], external: [] },
102 |             success: true,
103 |           },
104 |         ],
105 |       },
106 |     });
107 | 
108 |     // Create server
109 |     server = new Crawl4AIServer(
110 |       process.env.CRAWL4AI_BASE_URL || 'http://test.example.com',
111 |       process.env.CRAWL4AI_API_KEY || 'test-api-key',
112 |       'test-server',
113 |       '1.0.0',
114 |     );
115 |     await server.start();
116 | 
117 |     // Get the request handler for CallToolRequestSchema
118 |     const handlerCalls = mockSetRequestHandler.mock.calls;
119 | 
120 |     // Find the handler for CallToolRequestSchema (tools/call)
121 |     for (const call of handlerCalls) {
122 |       const [schema, handler] = call;
123 |       if (schema && (schema as any).method === 'tools/call') {
124 |         requestHandler = handler;
125 |         break;
126 |       }
127 |     }
128 |   });
129 | 
130 |   describe('Tool Handler Coverage', () => {
131 |     it('should handle all valid tool requests', async () => {
132 |       expect(requestHandler).toBeDefined();
133 | 
134 |       const validRequests = [
135 |         { name: 'get_markdown', arguments: { url: 'https://example.com' } },
136 |         { name: 'capture_screenshot', arguments: { url: 'https://example.com' } },
137 |         { name: 'generate_pdf', arguments: { url: 'https://example.com' } },
138 |         { name: 'execute_js', arguments: { url: 'https://example.com', scripts: 'return 1' } },
139 |         { name: 'batch_crawl', arguments: { urls: ['https://example.com'] } },
140 |         { name: 'smart_crawl', arguments: { url: 'https://example.com' } },
141 |         { name: 'get_html', arguments: { url: 'https://example.com' } },
142 |         { name: 'extract_links', arguments: { url: 'https://example.com' } },
143 |         { name: 'crawl_recursive', arguments: { url: 'https://example.com' } },
144 |         { name: 'parse_sitemap', arguments: { url: 'https://example.com/sitemap.xml' } },
145 |         { name: 'crawl', arguments: { url: 'https://example.com' } },
146 |         { name: 'manage_session', arguments: { action: 'create' } },
147 |         { name: 'manage_session', arguments: { action: 'clear', session_id: 'test' } },
148 |         { name: 'manage_session', arguments: { action: 'list' } },
149 |         { name: 'extract_with_llm', arguments: { url: 'https://example.com', prompt: 'test' } },
150 |       ];
151 | 
152 |       for (const req of validRequests) {
153 |         const result = await requestHandler({
154 |           method: 'tools/call',
155 |           params: req,
156 |         });
157 |         expect(result).toBeDefined();
158 |         expect(result.content).toBeDefined();
159 |       }
160 |     });
161 | 
162 |     it('should handle all validation error cases', async () => {
163 |       const invalidRequests = [
164 |         { name: 'get_markdown', arguments: {}, expectedError: 'Invalid parameters for get_markdown' },
165 |         { name: 'capture_screenshot', arguments: {}, expectedError: 'Invalid parameters for capture_screenshot' },
166 |         { name: 'generate_pdf', arguments: {}, expectedError: 'Invalid parameters for generate_pdf' },
167 |         {
168 |           name: 'execute_js',
169 |           arguments: { url: 'https://example.com' },
170 |           expectedError: 'Invalid parameters for execute_js',
171 |         },
172 |         { name: 'batch_crawl', arguments: {}, expectedError: 'Invalid parameters for batch_crawl' },
173 |         { name: 'smart_crawl', arguments: {}, expectedError: 'Invalid parameters for smart_crawl' },
174 |         { name: 'get_html', arguments: {}, expectedError: 'Invalid parameters for get_html' },
175 |         { name: 'extract_links', arguments: {}, expectedError: 'Invalid parameters for extract_links' },
176 |         { name: 'crawl_recursive', arguments: {}, expectedError: 'Invalid parameters for crawl_recursive' },
177 |         { name: 'parse_sitemap', arguments: {}, expectedError: 'Invalid parameters for parse_sitemap' },
178 |         { name: 'crawl', arguments: {}, expectedError: 'Invalid parameters for crawl' },
179 |         { name: 'manage_session', arguments: {}, expectedError: 'Invalid parameters for manage_session' },
180 |         {
181 |           name: 'manage_session',
182 |           arguments: { action: 'clear' },
183 |           expectedError: 'Invalid parameters for manage_session',
184 |         },
185 |         {
186 |           name: 'extract_with_llm',
187 |           arguments: { url: 'https://example.com' },
188 |           expectedError: 'Invalid parameters for extract_with_llm',
189 |         },
190 |       ];
191 | 
192 |       for (const req of invalidRequests) {
193 |         const result = await requestHandler({
194 |           method: 'tools/call',
195 |           params: req,
196 |         });
197 |         expect(result.content[0].text).toContain(req.expectedError);
198 |       }
199 |     });
200 | 
201 |     it('should handle unknown tool', async () => {
202 |       const result = await requestHandler({
203 |         method: 'tools/call',
204 |         params: {
205 |           name: 'unknown_tool',
206 |           arguments: {},
207 |         },
208 |       });
209 |       expect(result.content[0].text).toContain('Error: Unknown tool: unknown_tool');
210 |     });
211 | 
212 |     it('should handle non-ZodError exceptions', async () => {
213 |       // Make the service method throw a non-Zod error
214 |       mockGetMarkdown.mockRejectedValue(new Error('Service error'));
215 | 
216 |       const result = await requestHandler({
217 |         method: 'tools/call',
218 |         params: {
219 |           name: 'get_markdown',
220 |           arguments: { url: 'https://example.com' },
221 |         },
222 |       });
223 | 
224 |       expect(result.content[0].text).toContain('Error: Failed to get markdown: Service error');
225 |     });
226 | 
227 |     it('should handle manage_session with create action', async () => {
228 |       const result = await requestHandler({
229 |         method: 'tools/call',
230 |         params: {
231 |           name: 'manage_session',
232 |           arguments: {
233 |             action: 'create',
234 |             session_id: 'test-session',
235 |             initial_url: 'https://example.com',
236 |           },
237 |         },
238 |       });
239 | 
240 |       expect(result.content[0].text).toContain('Session created successfully');
241 |       expect(result.content[0].text).toContain('test-session');
242 |     });
243 | 
244 |     it('should handle manage_session with clear action', async () => {
245 |       // First create a session
246 |       await requestHandler({
247 |         method: 'tools/call',
248 |         params: {
249 |           name: 'manage_session',
250 |           arguments: {
251 |             action: 'create',
252 |             session_id: 'test-to-clear',
253 |           },
254 |         },
255 |       });
256 | 
257 |       // Then clear it
258 |       const result = await requestHandler({
259 |         method: 'tools/call',
260 |         params: {
261 |           name: 'manage_session',
262 |           arguments: {
263 |             action: 'clear',
264 |             session_id: 'test-to-clear',
265 |           },
266 |         },
267 |       });
268 | 
269 |       expect(result.content[0].text).toContain('Session cleared successfully');
270 |     });
271 | 
272 |     it('should handle manage_session with list action', async () => {
273 |       // First create a session
274 |       await requestHandler({
275 |         method: 'tools/call',
276 |         params: {
277 |           name: 'manage_session',
278 |           arguments: {
279 |             action: 'create',
280 |             session_id: 'test-list-session',
281 |           },
282 |         },
283 |       });
284 | 
285 |       // List sessions
286 |       const result = await requestHandler({
287 |         method: 'tools/call',
288 |         params: {
289 |           name: 'manage_session',
290 |           arguments: { action: 'list' },
291 |         },
292 |       });
293 | 
294 |       expect(result.content[0].text).toContain('Active sessions');
295 |       expect(result.content[0].text).toContain('test-list-session');
296 |     });
297 |   });
298 | });
299 | 
```

--------------------------------------------------------------------------------
/src/__tests__/handlers/screenshot-saving.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { jest } from '@jest/globals';
  2 | 
  3 | // Mock fs/promises
  4 | const mockMkdir = jest.fn();
  5 | const mockWriteFile = jest.fn();
  6 | 
  7 | jest.unstable_mockModule('fs/promises', () => ({
  8 |   mkdir: mockMkdir,
  9 |   writeFile: mockWriteFile,
 10 | }));
 11 | 
 12 | // Mock os
 13 | const mockHomedir = jest.fn();
 14 | jest.unstable_mockModule('os', () => ({
 15 |   homedir: mockHomedir,
 16 | }));
 17 | 
 18 | // Import after mocking
 19 | const { ContentHandlers } = await import('../../handlers/content-handlers.js');
 20 | const { CrawlHandlers } = await import('../../handlers/crawl-handlers.js');
 21 | 
 22 | // Mock the service
 23 | const mockService = {
 24 |   captureScreenshot: jest.fn(),
 25 |   crawl: jest.fn(),
 26 | };
 27 | 
 28 | // Mock axios client
 29 | const mockAxiosClient = {
 30 |   post: jest.fn(),
 31 | };
 32 | 
 33 | describe('Screenshot Local Saving', () => {
 34 |   let contentHandlers: InstanceType<typeof ContentHandlers>;
 35 |   let crawlHandlers: InstanceType<typeof CrawlHandlers>;
 36 |   const testScreenshotBase64 =
 37 |     'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=='; // 1x1 red pixel
 38 | 
 39 |   beforeEach(() => {
 40 |     jest.clearAllMocks();
 41 |     contentHandlers = new ContentHandlers(mockService as never, mockAxiosClient as never, new Map());
 42 |     crawlHandlers = new CrawlHandlers(mockService as never, mockAxiosClient as never, new Map());
 43 | 
 44 |     // Default mock implementations
 45 |     mockMkdir.mockResolvedValue(undefined);
 46 |     mockWriteFile.mockResolvedValue(undefined);
 47 |   });
 48 | 
 49 |   describe('ContentHandlers.captureScreenshot', () => {
 50 |     it('should save screenshot to local directory when save_to_directory is provided', async () => {
 51 |       const mockDate = new Date('2024-01-15T10:30:00Z');
 52 |       jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);
 53 | 
 54 |       mockService.captureScreenshot.mockResolvedValue({
 55 |         success: true,
 56 |         screenshot: testScreenshotBase64,
 57 |       });
 58 | 
 59 |       const result = await contentHandlers.captureScreenshot({
 60 |         url: 'https://example.com',
 61 |         save_to_directory: '/tmp/screenshots',
 62 |       });
 63 | 
 64 |       // Verify directory creation
 65 |       expect(mockMkdir).toHaveBeenCalledWith('/tmp/screenshots', { recursive: true });
 66 | 
 67 |       // Verify file write
 68 |       const expectedFilename = 'example-com-2024-01-15T10-30-00.png';
 69 |       const expectedPath = '/tmp/screenshots/' + expectedFilename;
 70 |       expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, Buffer.from(testScreenshotBase64, 'base64'));
 71 | 
 72 |       // Verify response includes saved path
 73 |       expect(result.content[1].text).toContain(`Saved to: ${expectedPath}`);
 74 |     });
 75 | 
 76 |     it('should handle directory creation failure gracefully', async () => {
 77 |       const consoleErrorSpy = jest.spyOn(console, 'error').mockImplementation();
 78 |       mockMkdir.mockRejectedValue(new Error('Permission denied'));
 79 | 
 80 |       mockService.captureScreenshot.mockResolvedValue({
 81 |         success: true,
 82 |         screenshot: testScreenshotBase64,
 83 |       });
 84 | 
 85 |       const result = await contentHandlers.captureScreenshot({
 86 |         url: 'https://example.com',
 87 |         save_to_directory: '/root/screenshots',
 88 |       });
 89 | 
 90 |       // Should still return the screenshot
 91 |       expect(result.content[0].type).toBe('image');
 92 |       expect(result.content[0].data).toBe(testScreenshotBase64);
 93 | 
 94 |       // Should not include saved path in text
 95 |       expect(result.content[1].text).not.toContain('Saved to:');
 96 | 
 97 |       // Should log error
 98 |       expect(consoleErrorSpy).toHaveBeenCalledWith('Failed to save screenshot locally:', expect.any(Error));
 99 | 
100 |       consoleErrorSpy.mockRestore();
101 |     });
102 | 
103 |     it('should handle file path instead of directory path', async () => {
104 |       const mockDate = new Date('2024-01-15T10:30:00Z');
105 |       jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);
106 |       const consoleWarnSpy = jest.spyOn(console, 'warn').mockImplementation();
107 | 
108 |       mockService.captureScreenshot.mockResolvedValue({
109 |         success: true,
110 |         screenshot: testScreenshotBase64,
111 |       });
112 | 
113 |       await contentHandlers.captureScreenshot({
114 |         url: 'https://example.com',
115 |         save_to_directory: '/tmp/screenshots/screenshot.png',
116 |       });
117 | 
118 |       // Should warn about file path
119 |       expect(consoleWarnSpy).toHaveBeenCalledWith(
120 |         'Warning: save_to_directory should be a directory path, not a file path. Using parent directory.',
121 |       );
122 | 
123 |       // Should use parent directory
124 |       expect(mockMkdir).toHaveBeenCalledWith('/tmp/screenshots', { recursive: true });
125 | 
126 |       // Should still generate filename
127 |       const expectedFilename = 'example-com-2024-01-15T10-30-00.png';
128 |       const expectedPath = '/tmp/screenshots/' + expectedFilename;
129 |       expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, Buffer.from(testScreenshotBase64, 'base64'));
130 | 
131 |       consoleWarnSpy.mockRestore();
132 |     });
133 | 
134 |     it('should resolve home directory paths', async () => {
135 |       const mockDate = new Date('2024-01-15T10:30:00Z');
136 |       jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);
137 |       mockHomedir.mockReturnValue('/Users/testuser');
138 | 
139 |       mockService.captureScreenshot.mockResolvedValue({
140 |         success: true,
141 |         screenshot: testScreenshotBase64,
142 |       });
143 | 
144 |       await contentHandlers.captureScreenshot({
145 |         url: 'https://example.com',
146 |         save_to_directory: '~/Desktop/screenshots',
147 |       });
148 | 
149 |       // Should resolve ~ to home directory
150 |       expect(mockMkdir).toHaveBeenCalledWith('/Users/testuser/Desktop/screenshots', { recursive: true });
151 | 
152 |       const expectedPath = '/Users/testuser/Desktop/screenshots/example-com-2024-01-15T10-30-00.png';
153 |       expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, Buffer.from(testScreenshotBase64, 'base64'));
154 |     });
155 | 
156 |     it('should not return large screenshots when saved locally', async () => {
157 |       // Create a large fake screenshot (>800KB when decoded)
158 |       const largeBase64 = 'A'.repeat(1200000); // ~900KB when decoded
159 | 
160 |       mockService.captureScreenshot.mockResolvedValue({
161 |         success: true,
162 |         screenshot: largeBase64,
163 |       });
164 | 
165 |       const result = await contentHandlers.captureScreenshot({
166 |         url: 'https://example.com',
167 |         save_to_directory: '/tmp',
168 |       });
169 | 
170 |       // Should not include image in response
171 |       const imageContent = result.content.find((c) => c.type === 'image');
172 |       expect(imageContent).toBeUndefined();
173 | 
174 |       // Should mention size in text
175 |       const textContent = result.content.find((c) => c.type === 'text');
176 |       expect(textContent?.text).toContain('not returned due to size');
177 |       expect(textContent?.text).toContain('KB');
178 |     });
179 | 
180 |     it('should sanitize filename for URLs with special characters', async () => {
181 |       const mockDate = new Date('2024-01-15T10:30:00Z');
182 |       jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);
183 | 
184 |       mockService.captureScreenshot.mockResolvedValue({
185 |         success: true,
186 |         screenshot: testScreenshotBase64,
187 |       });
188 | 
189 |       await contentHandlers.captureScreenshot({
190 |         url: 'https://my-site.com:8080/path?query=value',
191 |         save_to_directory: '/tmp/screenshots',
192 |       });
193 | 
194 |       const expectedFilename = 'my-site-com-2024-01-15T10-30-00.png';
195 |       const expectedPath = '/tmp/screenshots/' + expectedFilename;
196 |       expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, expect.any(Buffer));
197 |     });
198 |   });
199 | 
200 |   describe('CrawlHandlers.crawl', () => {
201 |     it('should save screenshot to local directory when screenshot_directory is provided', async () => {
202 |       const mockDate = new Date('2024-01-15T10:30:00Z');
203 |       jest.spyOn(globalThis, 'Date').mockImplementation(() => mockDate as never);
204 | 
205 |       mockService.crawl.mockResolvedValue({
206 |         results: [
207 |           {
208 |             url: 'https://example.com',
209 |             success: true,
210 |             screenshot: testScreenshotBase64,
211 |             markdown: { raw_markdown: 'Test content' },
212 |           },
213 |         ],
214 |       });
215 | 
216 |       const result = await crawlHandlers.crawl({
217 |         url: 'https://example.com',
218 |         screenshot: true,
219 |         screenshot_directory: '/tmp/crawl-screenshots',
220 |       });
221 | 
222 |       // Verify directory creation
223 |       expect(mockMkdir).toHaveBeenCalledWith('/tmp/crawl-screenshots', { recursive: true });
224 | 
225 |       // Verify file write
226 |       const expectedFilename = 'example-com-2024-01-15T10-30-00.png';
227 |       const expectedPath = '/tmp/crawl-screenshots/' + expectedFilename;
228 |       expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, Buffer.from(testScreenshotBase64, 'base64'));
229 | 
230 |       // Verify response includes saved path
231 |       const textContent = result.content.find(
232 |         (c) => c.type === 'text' && 'text' in c && c.text?.includes('Screenshot saved'),
233 |       );
234 |       expect(textContent?.text).toContain(`Screenshot saved to: ${expectedPath}`);
235 |     });
236 | 
237 |     it('should handle file save failure gracefully in crawl', async () => {
238 |       const consoleErrorSpy = jest.spyOn(console, 'error').mockImplementation();
239 |       mockMkdir.mockResolvedValue(undefined); // directory creation succeeds
240 |       mockWriteFile.mockRejectedValue(new Error('Disk full')); // but file write fails
241 | 
242 |       mockService.crawl.mockResolvedValue({
243 |         results: [
244 |           {
245 |             url: 'https://example.com',
246 |             success: true,
247 |             screenshot: testScreenshotBase64,
248 |             markdown: { raw_markdown: 'Test content' },
249 |           },
250 |         ],
251 |       });
252 | 
253 |       const result = await crawlHandlers.crawl({
254 |         url: 'https://example.com',
255 |         screenshot: true,
256 |         screenshot_directory: '/tmp/crawl-screenshots',
257 |       });
258 | 
259 |       // Should still return the screenshot as image
260 |       const imageContent = result.content.find((c) => c.type === 'image');
261 |       expect(imageContent?.data).toBe(testScreenshotBase64);
262 | 
263 |       // Should log error
264 |       expect(consoleErrorSpy).toHaveBeenCalledWith('Failed to save screenshot locally:', expect.any(Error));
265 | 
266 |       consoleErrorSpy.mockRestore();
267 |     });
268 | 
269 |     it('should not attempt to save when screenshot_directory is not provided', async () => {
270 |       mockService.crawl.mockResolvedValue({
271 |         results: [
272 |           {
273 |             url: 'https://example.com',
274 |             success: true,
275 |             screenshot: testScreenshotBase64,
276 |             markdown: { raw_markdown: 'Test content' },
277 |           },
278 |         ],
279 |       });
280 | 
281 |       await crawlHandlers.crawl({
282 |         url: 'https://example.com',
283 |         screenshot: true,
284 |       });
285 | 
286 |       // Should not call fs methods
287 |       expect(mockMkdir).not.toHaveBeenCalled();
288 |       expect(mockWriteFile).not.toHaveBeenCalled();
289 |     });
290 |   });
291 | });
292 | 
```

--------------------------------------------------------------------------------
/src/__tests__/crawl4ai-service.network.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { jest } from '@jest/globals';
  2 | 
  3 | // Mock axios before importing the service
  4 | const mockAxiosInstance = {
  5 |   get: jest.fn() as jest.Mock,
  6 |   post: jest.fn() as jest.Mock,
  7 |   interceptors: {
  8 |     request: { use: jest.fn() as jest.Mock },
  9 |     response: { use: jest.fn() as jest.Mock },
 10 |   },
 11 | };
 12 | 
 13 | jest.unstable_mockModule('axios', () => ({
 14 |   default: {
 15 |     create: jest.fn(() => mockAxiosInstance),
 16 |     isAxiosError: jest.fn((error: any) => error.isAxiosError === true), // eslint-disable-line @typescript-eslint/no-explicit-any
 17 |     get: jest.fn(),
 18 |     head: jest.fn(),
 19 |   },
 20 |   isAxiosError: jest.fn((error: any) => error.isAxiosError === true), // eslint-disable-line @typescript-eslint/no-explicit-any
 21 | }));
 22 | 
 23 | // Import after mocking
 24 | const { Crawl4AIService } = await import('../crawl4ai-service.js');
 25 | 
 26 | describe('Crawl4AI Service - Network Failures', () => {
 27 |   let service: any; // eslint-disable-line @typescript-eslint/no-explicit-any
 28 | 
 29 |   interface ErrorWithCode extends Error {
 30 |     code?: string;
 31 |     response?: {
 32 |       status: number;
 33 |       data?: any; // eslint-disable-line @typescript-eslint/no-explicit-any
 34 |     };
 35 |     isAxiosError?: boolean;
 36 |   }
 37 | 
 38 |   beforeEach(() => {
 39 |     jest.clearAllMocks();
 40 |     service = new Crawl4AIService('http://localhost:11235', 'test-api-key');
 41 |   });
 42 | 
 43 |   describe('Network Timeouts', () => {
 44 |     it('should handle request timeout', async () => {
 45 |       const timeoutError = new Error('timeout of 30000ms exceeded') as ErrorWithCode;
 46 |       timeoutError.code = 'ECONNABORTED';
 47 |       timeoutError.isAxiosError = true;
 48 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(timeoutError);
 49 | 
 50 |       await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow('Request timed out');
 51 |     });
 52 | 
 53 |     it('should handle response timeout', async () => {
 54 |       const timeoutError = new Error('timeout of 30000ms exceeded') as ErrorWithCode;
 55 |       timeoutError.code = 'ETIMEDOUT';
 56 |       timeoutError.isAxiosError = true;
 57 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(timeoutError);
 58 | 
 59 |       await expect(service.getHTML({ url: 'https://example.com' })).rejects.toThrow('Request timeout');
 60 |     });
 61 |   });
 62 | 
 63 |   describe('HTTP Error Responses', () => {
 64 |     it('should handle 401 Unauthorized', async () => {
 65 |       const error = {
 66 |         response: {
 67 |           status: 401,
 68 |           data: { error: 'Invalid API key' },
 69 |         },
 70 |         isAxiosError: true,
 71 |       };
 72 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
 73 | 
 74 |       await expect(service.crawl({ urls: ['https://example.com'] })).rejects.toThrow(
 75 |         'Request failed with status 401: Invalid API key',
 76 |       );
 77 |     });
 78 | 
 79 |     it('should handle 403 Forbidden', async () => {
 80 |       const error = {
 81 |         response: {
 82 |           status: 403,
 83 |           data: { error: 'Access denied' },
 84 |         },
 85 |         isAxiosError: true,
 86 |       };
 87 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
 88 | 
 89 |       await expect(service.captureScreenshot({ url: 'https://example.com' })).rejects.toThrow(
 90 |         'Request failed with status 403: Access denied',
 91 |       );
 92 |     });
 93 | 
 94 |     it('should handle 404 Not Found', async () => {
 95 |       const error = {
 96 |         response: {
 97 |           status: 404,
 98 |           data: { error: 'Endpoint not found' },
 99 |         },
100 |         isAxiosError: true,
101 |       };
102 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
103 | 
104 |       await expect(service.generatePDF({ url: 'https://example.com' })).rejects.toThrow(
105 |         'Request failed with status 404: Endpoint not found',
106 |       );
107 |     });
108 | 
109 |     it('should handle 429 Too Many Requests', async () => {
110 |       const error = {
111 |         response: {
112 |           status: 429,
113 |           data: { error: 'Rate limit exceeded' },
114 |           headers: {
115 |             'retry-after': '60',
116 |           },
117 |         },
118 |         isAxiosError: true,
119 |       };
120 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
121 | 
122 |       await expect(service.executeJS({ url: 'https://example.com', scripts: ['return 1;'] })).rejects.toThrow(
123 |         'Request failed with status 429: Rate limit exceeded',
124 |       );
125 |     });
126 | 
127 |     it('should handle 500 Internal Server Error', async () => {
128 |       const error = {
129 |         response: {
130 |           status: 500,
131 |           data: { error: 'Internal server error' },
132 |         },
133 |         isAxiosError: true,
134 |       };
135 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
136 | 
137 |       await expect(service.crawl({ urls: ['https://example.com'] })).rejects.toThrow(
138 |         'Request failed with status 500: Internal server error',
139 |       );
140 |     });
141 | 
142 |     it('should handle 502 Bad Gateway', async () => {
143 |       const error = {
144 |         response: {
145 |           status: 502,
146 |           data: 'Bad Gateway',
147 |         },
148 |         isAxiosError: true,
149 |         message: 'Request failed with status code 502',
150 |       };
151 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
152 | 
153 |       await expect(service.getMarkdown({ url: 'https://example.com' })).rejects.toThrow(
154 |         'Request failed with status 502: Request failed with status code 502',
155 |       );
156 |     });
157 | 
158 |     it('should handle 503 Service Unavailable', async () => {
159 |       const error = {
160 |         response: {
161 |           status: 503,
162 |           data: { error: 'Service temporarily unavailable' },
163 |         },
164 |         isAxiosError: true,
165 |       };
166 |       (mockAxiosInstance.get as jest.Mock).mockRejectedValue(error);
167 | 
168 |       await expect(service.extractWithLLM({ url: 'https://example.com', query: 'test' })).rejects.toThrow(
169 |         'Request failed with status 503: Service temporarily unavailable',
170 |       );
171 |     });
172 | 
173 |     it('should handle 504 Gateway Timeout', async () => {
174 |       const error = {
175 |         response: {
176 |           status: 504,
177 |           data: { error: 'Gateway timeout' },
178 |         },
179 |         isAxiosError: true,
180 |       };
181 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
182 | 
183 |       await expect(service.getHTML({ url: 'https://example.com' })).rejects.toThrow(
184 |         'Request failed with status 504: Gateway timeout',
185 |       );
186 |     });
187 |   });
188 | 
189 |   describe('Network Connection Failures', () => {
190 |     it('should handle DNS resolution failure', async () => {
191 |       const error = new Error('getaddrinfo ENOTFOUND invalid.domain') as ErrorWithCode;
192 |       error.code = 'ENOTFOUND';
193 |       error.isAxiosError = true;
194 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
195 | 
196 |       await expect(service.getMarkdown({ url: 'https://invalid.domain' })).rejects.toThrow(
197 |         'DNS resolution failed: getaddrinfo ENOTFOUND invalid.domain',
198 |       );
199 |     });
200 | 
201 |     it('should handle connection refused', async () => {
202 |       const error = new Error('connect ECONNREFUSED 127.0.0.1:11235') as ErrorWithCode;
203 |       error.code = 'ECONNREFUSED';
204 |       error.isAxiosError = true;
205 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
206 | 
207 |       await expect(service.crawl({ urls: ['https://example.com'] })).rejects.toThrow(
208 |         'Connection refused: connect ECONNREFUSED 127.0.0.1:11235',
209 |       );
210 |     });
211 | 
212 |     it('should handle connection reset', async () => {
213 |       const error = new Error('socket hang up') as ErrorWithCode;
214 |       error.code = 'ECONNRESET';
215 |       error.isAxiosError = true;
216 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
217 | 
218 |       await expect(service.captureScreenshot({ url: 'https://example.com' })).rejects.toThrow(
219 |         'Connection reset: socket hang up',
220 |       );
221 |     });
222 | 
223 |     it('should handle network unreachable', async () => {
224 |       const error = new Error('connect ENETUNREACH') as ErrorWithCode;
225 |       error.code = 'ENETUNREACH';
226 |       error.isAxiosError = true;
227 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
228 | 
229 |       await expect(service.executeJS({ url: 'https://example.com', scripts: ['return 1;'] })).rejects.toThrow(
230 |         'Network unreachable: connect ENETUNREACH',
231 |       );
232 |     });
233 |   });
234 | 
235 |   describe('Response Parsing Failures', () => {
236 |     it('should handle invalid JSON response', async () => {
237 |       // This test is not applicable anymore since we handle errors at axios level
238 |       // The service will return whatever axios returns
239 |       (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
240 |         data: '<html>Not JSON</html>',
241 |         headers: { 'content-type': 'text/html' },
242 |       });
243 | 
244 |       const result = await service.getHTML({ url: 'https://example.com' });
245 |       expect(result).toBe('<html>Not JSON</html>');
246 |     });
247 | 
248 |     it('should handle empty response', async () => {
249 |       (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
250 |         data: null,
251 |       });
252 | 
253 |       // The service returns null, which is valid
254 |       const result = await service.crawl({ urls: ['https://example.com'] });
255 |       expect(result).toBeNull();
256 |     });
257 | 
258 |     it('should handle malformed response structure', async () => {
259 |       (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
260 |         data: { unexpected: 'structure' },
261 |       });
262 | 
263 |       // The service returns whatever the API returns
264 |       const result = await service.crawl({ urls: ['https://example.com'] });
265 |       expect(result).toEqual({ unexpected: 'structure' });
266 |     });
267 |   });
268 | 
269 |   describe('Request Configuration Errors', () => {
270 |     it('should handle invalid URL format', async () => {
271 |       await expect(service.getMarkdown({ url: 'not-a-valid-url' })).rejects.toThrow('Invalid URL format');
272 |     });
273 | 
274 |     it('should handle missing required parameters', async () => {
275 |       await expect(service.batchCrawl({ urls: [] })).rejects.toThrow('URLs array cannot be empty');
276 |     });
277 | 
278 |     it('should handle oversized request payload', async () => {
279 |       const error = new Error('Request Entity Too Large') as ErrorWithCode;
280 |       error.response = { status: 413 };
281 |       error.isAxiosError = true;
282 |       error.message = 'Request Entity Too Large';
283 |       (mockAxiosInstance.post as jest.Mock).mockRejectedValue(error);
284 | 
285 |       const hugeScript = 'x'.repeat(10 * 1024 * 1024); // 10MB
286 |       await expect(service.executeJS({ url: 'https://example.com', scripts: [hugeScript] })).rejects.toThrow(
287 |         'Request failed with status 413: Request Entity Too Large',
288 |       );
289 |     });
290 |   });
291 | 
292 |   describe('Partial Response Handling', () => {
293 |     it('should handle successful response with partial data', async () => {
294 |       (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
295 |         data: {
296 |           results: [
297 |             { success: true, url: 'https://example.com', markdown: 'Content' },
298 |             { success: false, url: 'https://example.com/page2', error: 'Failed' },
299 |           ],
300 |         },
301 |       });
302 | 
303 |       const result = await service.crawl({ urls: ['https://example.com', 'https://example.com/page2'] });
304 |       expect(result.results).toHaveLength(2);
305 |       expect(result.results[0].success).toBe(true);
306 |       expect(result.results[1].success).toBe(false);
307 |     });
308 | 
309 |     it('should handle response with missing optional fields', async () => {
310 |       (mockAxiosInstance.post as jest.Mock).mockResolvedValue({
311 |         data: {
312 |           success: true,
313 |           url: 'https://example.com',
314 |           // Missing markdown field
315 |         },
316 |       });
317 | 
318 |       const result = await service.getMarkdown({ url: 'https://example.com' });
319 |       expect(result.url).toBe('https://example.com');
320 |       expect(result.markdown).toBeUndefined();
321 |     });
322 |   });
323 | });
324 | 
```

--------------------------------------------------------------------------------
/src/__tests__/handlers/parameter-combinations.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { jest } from '@jest/globals';
  2 | import { CrawlHandlers } from '../../handlers/crawl-handlers.js';
  3 | import { ContentHandlers } from '../../handlers/content-handlers.js';
  4 | 
  5 | type MockService = {
  6 |   crawl: jest.Mock;
  7 |   getMarkdown: jest.Mock;
  8 |   captureScreenshot: jest.Mock;
  9 | };
 10 | 
 11 | type MockAxiosClient = {
 12 |   post: jest.Mock;
 13 |   get: jest.Mock;
 14 |   head: jest.Mock;
 15 | };
 16 | 
 17 | describe('Optional Parameter Combinations', () => {
 18 |   let crawlHandlers: CrawlHandlers;
 19 |   let _contentHandlers: ContentHandlers;
 20 |   let mockService: MockService;
 21 |   let mockAxiosClient: MockAxiosClient;
 22 | 
 23 |   beforeEach(() => {
 24 |     jest.clearAllMocks();
 25 | 
 26 |     mockService = {
 27 |       crawl: jest.fn(),
 28 |       getMarkdown: jest.fn(),
 29 |       captureScreenshot: jest.fn(),
 30 |     };
 31 | 
 32 |     mockAxiosClient = {
 33 |       post: jest.fn(),
 34 |       get: jest.fn(),
 35 |       head: jest.fn(),
 36 |     };
 37 | 
 38 |     crawlHandlers = new CrawlHandlers(mockService, mockAxiosClient, new Map());
 39 |     _contentHandlers = new ContentHandlers(mockService, mockAxiosClient, new Map());
 40 |   });
 41 | 
 42 |   describe('Batch Crawl Parameter Combinations', () => {
 43 |     const testCases = [
 44 |       {
 45 |         name: 'default parameters only',
 46 |         options: { urls: ['https://example.com'] },
 47 |         expectedConfig: undefined,
 48 |       },
 49 |       {
 50 |         name: 'remove_images only',
 51 |         options: { urls: ['https://example.com'], remove_images: true },
 52 |         expectedConfig: { exclude_tags: ['img', 'picture', 'svg'] },
 53 |       },
 54 |       {
 55 |         name: 'bypass_cache only',
 56 |         options: { urls: ['https://example.com'], bypass_cache: true },
 57 |         expectedConfig: { cache_mode: 'BYPASS' },
 58 |       },
 59 |       {
 60 |         name: 'both remove_images and bypass_cache',
 61 |         options: { urls: ['https://example.com'], remove_images: true, bypass_cache: true },
 62 |         expectedConfig: { exclude_tags: ['img', 'picture', 'svg'], cache_mode: 'BYPASS' },
 63 |       },
 64 |       {
 65 |         name: 'with max_concurrent',
 66 |         options: { urls: ['https://example.com'], max_concurrent: 5, remove_images: true },
 67 |         expectedConfig: { exclude_tags: ['img', 'picture', 'svg'] },
 68 |       },
 69 |     ];
 70 | 
 71 |     testCases.forEach(({ name, options, expectedConfig }) => {
 72 |       it(`should handle ${name}`, async () => {
 73 |         mockAxiosClient.post.mockResolvedValue({
 74 |           data: { results: [{ success: true }] },
 75 |         });
 76 | 
 77 |         await crawlHandlers.batchCrawl(options);
 78 | 
 79 |         expect(mockAxiosClient.post).toHaveBeenCalledWith('/crawl', {
 80 |           urls: options.urls,
 81 |           max_concurrent: options.max_concurrent,
 82 |           crawler_config: expectedConfig,
 83 |         });
 84 |       });
 85 |     });
 86 |   });
 87 | 
 88 |   describe('Smart Crawl Parameter Combinations', () => {
 89 |     const testCases = [
 90 |       {
 91 |         name: 'minimal configuration',
 92 |         options: { url: 'https://example.com' },
 93 |         expectedCacheMode: 'ENABLED',
 94 |       },
 95 |       {
 96 |         name: 'with bypass_cache',
 97 |         options: { url: 'https://example.com', bypass_cache: true },
 98 |         expectedCacheMode: 'BYPASS',
 99 |       },
100 |       {
101 |         name: 'with max_depth',
102 |         options: { url: 'https://example.com', max_depth: 5 },
103 |         expectedCacheMode: 'ENABLED',
104 |       },
105 |       {
106 |         name: 'with follow_links and bypass_cache',
107 |         options: { url: 'https://example.com', follow_links: true, bypass_cache: true },
108 |         expectedCacheMode: 'BYPASS',
109 |       },
110 |     ];
111 | 
112 |     testCases.forEach(({ name, options, expectedCacheMode }) => {
113 |       it(`should handle ${name}`, async () => {
114 |         mockAxiosClient.head.mockResolvedValue({ headers: { 'content-type': 'text/html' } });
115 |         mockAxiosClient.post.mockResolvedValue({
116 |           data: { results: [{ success: true, markdown: { raw_markdown: 'Content' } }] },
117 |         });
118 | 
119 |         await crawlHandlers.smartCrawl(options);
120 | 
121 |         expect(mockAxiosClient.post).toHaveBeenCalledWith('/crawl', {
122 |           urls: [options.url],
123 |           crawler_config: {
124 |             cache_mode: expectedCacheMode,
125 |           },
126 |           browser_config: {
127 |             headless: true,
128 |             browser_type: 'chromium',
129 |           },
130 |         });
131 |       });
132 |     });
133 |   });
134 | 
135 |   describe('Crawl Parameter Combinations', () => {
136 |     // Table-driven tests for various parameter combinations
137 |     const parameterSets = [
138 |       // Browser configuration combinations
139 |       {
140 |         name: 'browser type with viewport',
141 |         params: {
142 |           url: 'https://example.com',
143 |           browser_type: 'firefox',
144 |           viewport_width: 1920,
145 |           viewport_height: 1080,
146 |         },
147 |       },
148 |       {
149 |         name: 'proxy with authentication',
150 |         params: {
151 |           url: 'https://example.com',
152 |           proxy_server: 'http://proxy.example.com:8080',
153 |           proxy_username: 'user',
154 |           proxy_password: 'pass',
155 |         },
156 |       },
157 |       {
158 |         name: 'cookies and headers',
159 |         params: {
160 |           url: 'https://example.com',
161 |           cookies: [{ name: 'session', value: '123', domain: '.example.com' }],
162 |           headers: { 'X-Custom': 'value', Authorization: 'Bearer token' },
163 |         },
164 |       },
165 |       // Content filtering combinations
166 |       {
167 |         name: 'content filtering options',
168 |         params: {
169 |           url: 'https://example.com',
170 |           word_count_threshold: 100,
171 |           excluded_tags: ['script', 'style'],
172 |           remove_overlay_elements: true,
173 |         },
174 |       },
175 |       {
176 |         name: 'text-only with form removal',
177 |         params: {
178 |           url: 'https://example.com',
179 |           only_text: true,
180 |           remove_forms: true,
181 |           keep_data_attributes: false,
182 |         },
183 |       },
184 |       // JavaScript execution combinations
185 |       {
186 |         name: 'js_code with wait conditions',
187 |         params: {
188 |           url: 'https://example.com',
189 |           js_code: ['document.querySelector("button").click()'],
190 |           wait_for: '#result',
191 |           wait_for_timeout: 5000,
192 |         },
193 |       },
194 |       {
195 |         name: 'js_only with session',
196 |         params: {
197 |           url: 'https://example.com',
198 |           js_only: true,
199 |           session_id: 'test-session-123',
200 |         },
201 |       },
202 |       // Dynamic content handling
203 |       {
204 |         name: 'scrolling configuration',
205 |         params: {
206 |           url: 'https://example.com',
207 |           delay_before_scroll: 2000,
208 |           scroll_delay: 500,
209 |           scan_full_page: true,
210 |         },
211 |       },
212 |       {
213 |         name: 'virtual scroll for infinite feeds',
214 |         params: {
215 |           url: 'https://example.com',
216 |           virtual_scroll_config: {
217 |             container_selector: '.feed',
218 |             scroll_count: 10,
219 |             scroll_by: 500,
220 |             wait_after_scroll: 1000,
221 |           },
222 |         },
223 |       },
224 |       // Media handling combinations
225 |       {
226 |         name: 'screenshot with PDF',
227 |         params: {
228 |           url: 'https://example.com',
229 |           screenshot: true,
230 |           screenshot_wait_for: 3,
231 |           pdf: true,
232 |           capture_mhtml: true,
233 |         },
234 |       },
235 |       {
236 |         name: 'image filtering options',
237 |         params: {
238 |           url: 'https://example.com',
239 |           image_description_min_word_threshold: 10,
240 |           image_score_threshold: 0.5,
241 |           exclude_external_images: true,
242 |         },
243 |       },
244 |       // Link filtering combinations
245 |       {
246 |         name: 'link exclusion options',
247 |         params: {
248 |           url: 'https://example.com',
249 |           exclude_social_media_links: true,
250 |           exclude_domains: ['facebook.com', 'twitter.com'],
251 |           exclude_external_links: true,
252 |         },
253 |       },
254 |       // Page interaction combinations
255 |       {
256 |         name: 'stealth mode options',
257 |         params: {
258 |           url: 'https://example.com',
259 |           simulate_user: true,
260 |           override_navigator: true,
261 |           magic: true,
262 |           user_agent: 'Custom Bot 1.0',
263 |         },
264 |       },
265 |       // Complex combinations
266 |       {
267 |         name: 'kitchen sink - many options',
268 |         params: {
269 |           url: 'https://example.com',
270 |           browser_type: 'chromium',
271 |           viewport_width: 1280,
272 |           viewport_height: 720,
273 |           word_count_threshold: 50,
274 |           excluded_tags: ['nav', 'footer'],
275 |           js_code: ['window.scrollTo(0, document.body.scrollHeight)'],
276 |           wait_for: '.loaded',
277 |           screenshot: true,
278 |           exclude_external_links: true,
279 |           session_id: 'complex-session',
280 |           cache_mode: 'BYPASS',
281 |           verbose: true,
282 |         },
283 |       },
284 |     ];
285 | 
286 |     parameterSets.forEach(({ name, params }) => {
287 |       it(`should correctly process ${name}`, async () => {
288 |         mockService.crawl.mockResolvedValue({
289 |           results: [
290 |             {
291 |               url: params.url,
292 |               success: true,
293 |               markdown: { raw_markdown: 'Test content' },
294 |             },
295 |           ],
296 |         });
297 | 
298 |         const result = await crawlHandlers.crawl(params);
299 | 
300 |         // Verify the service was called
301 |         expect(mockService.crawl).toHaveBeenCalled();
302 | 
303 |         // Verify response structure
304 |         expect(result.content).toBeDefined();
305 |         expect(result.content[0].type).toBe('text');
306 |       });
307 |     });
308 | 
309 |     // Test parameter validation
310 |     it('should handle invalid parameter combinations', async () => {
311 |       const invalidParams = {
312 |         url: 'https://example.com',
313 |         js_only: true,
314 |         // Missing required session_id when js_only is true
315 |       };
316 | 
317 |       await expect(crawlHandlers.crawl(invalidParams)).rejects.toThrow();
318 |     });
319 | 
320 |     // Test default values
321 |     it('should apply correct defaults when parameters are omitted', async () => {
322 |       mockService.crawl.mockResolvedValue({
323 |         results: [
324 |           {
325 |             url: 'https://example.com',
326 |             success: true,
327 |             markdown: { raw_markdown: 'Content' },
328 |           },
329 |         ],
330 |       });
331 | 
332 |       await crawlHandlers.crawl({ url: 'https://example.com' });
333 | 
334 |       const call = mockService.crawl.mock.calls[0][0];
335 | 
336 |       // Check browser_config defaults
337 |       expect(call.browser_config).toBeDefined();
338 |       expect(call.browser_config.headless).toBe(true);
339 | 
340 |       // Check that optional configs are not included when not specified
341 |       expect(call.crawler_config.word_count_threshold).toBeUndefined();
342 |       expect(call.crawler_config.excluded_tags).toBeUndefined();
343 |     });
344 |   });
345 | 
346 |   describe('Parameter Priority and Conflicts', () => {
347 |     it('should handle conflicting cache modes correctly', async () => {
348 |       mockService.crawl.mockResolvedValue({
349 |         results: [{ success: true, markdown: { raw_markdown: 'Content' } }],
350 |       });
351 | 
352 |       // Test that explicit cache_mode takes precedence
353 |       await crawlHandlers.crawl({
354 |         url: 'https://example.com',
355 |         cache_mode: 'DISABLED',
356 |         // Even with other params that might suggest caching
357 |         session_id: 'test-session',
358 |       });
359 | 
360 |       const call = mockService.crawl.mock.calls[0][0];
361 |       expect(call.crawler_config.cache_mode).toBe('DISABLED');
362 |     });
363 | 
364 |     it('should handle mutually exclusive options', async () => {
365 |       mockService.crawl.mockResolvedValue({
366 |         results: [{ success: true, html: '<p>HTML</p>' }],
367 |       });
368 | 
369 |       // only_text should override other content options
370 |       await crawlHandlers.crawl({
371 |         url: 'https://example.com',
372 |         only_text: true,
373 |         keep_data_attributes: true, // Should be ignored with only_text
374 |       });
375 | 
376 |       const call = mockService.crawl.mock.calls[0][0];
377 |       expect(call.crawler_config.only_text).toBe(true);
378 |       expect(call.crawler_config.keep_data_attributes).toBe(true); // Still passed through
379 |     });
380 |   });
381 | 
382 |   describe('Edge Cases for Optional Parameters', () => {
383 |     it('should handle empty arrays correctly', async () => {
384 |       mockService.crawl.mockResolvedValue({
385 |         results: [{ success: true, markdown: { raw_markdown: 'Content' } }],
386 |       });
387 | 
388 |       await crawlHandlers.crawl({
389 |         url: 'https://example.com',
390 |         excluded_tags: [], // Empty array
391 |         exclude_domains: [], // Empty array
392 |         cookies: [], // Empty array
393 |       });
394 | 
395 |       const call = mockService.crawl.mock.calls[0][0];
396 |       expect(call.crawler_config.excluded_tags).toEqual([]);
397 |       expect(call.crawler_config.exclude_domains).toEqual([]);
398 |       expect(call.browser_config.cookies).toEqual([]);
399 |     });
400 | 
401 |     it('should handle null vs undefined correctly', async () => {
402 |       mockService.crawl.mockResolvedValue({
403 |         results: [{ success: true, markdown: { raw_markdown: 'Content' } }],
404 |       });
405 | 
406 |       // null js_code should throw error
407 |       await expect(
408 |         crawlHandlers.crawl({
409 |           url: 'https://example.com',
410 |           js_code: null as unknown as string[],
411 |         }),
412 |       ).rejects.toThrow('js_code parameter is null');
413 | 
414 |       // undefined js_code should be fine
415 |       await crawlHandlers.crawl({
416 |         url: 'https://example.com',
417 |         js_code: undefined,
418 |       });
419 | 
420 |       expect(mockService.crawl).toHaveBeenCalledTimes(1);
421 |     });
422 | 
423 |     it('should handle boolean flags in all combinations', async () => {
424 |       const booleanFlags = [
425 |         'remove_overlay_elements',
426 |         'process_iframes',
427 |         'exclude_external_links',
428 |         'screenshot',
429 |         'pdf',
430 |         'verbose',
431 |         'log_console',
432 |         'simulate_user',
433 |         'override_navigator',
434 |         'magic',
435 |       ];
436 | 
437 |       // Test all flags as true
438 |       const allTrue = booleanFlags.reduce((acc, flag) => ({ ...acc, [flag]: true }), {
439 |         url: 'https://example.com',
440 |       });
441 | 
442 |       mockService.crawl.mockResolvedValue({
443 |         results: [{ success: true, markdown: { raw_markdown: 'Content' } }],
444 |       });
445 | 
446 |       await crawlHandlers.crawl(allTrue);
447 | 
448 |       const call = mockService.crawl.mock.calls[0][0];
449 |       booleanFlags.forEach((flag) => {
450 |         const config = call.crawler_config[flag] || call.browser_config[flag];
451 |         expect(config).toBe(true);
452 |       });
453 |     });
454 |   });
455 | });
456 | 
```

--------------------------------------------------------------------------------
/src/__tests__/index.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { jest } from '@jest/globals';
  2 | import { z } from 'zod';
  3 | 
  4 | // Mock the MCP SDK
  5 | jest.mock('@modelcontextprotocol/sdk/server/index.js');
  6 | jest.mock('@modelcontextprotocol/sdk/server/stdio.js');
  7 | 
  8 | describe('MCP Server Validation', () => {
  9 |   describe('Stateless tool validation', () => {
 10 |     // Test the createStatelessSchema helper
 11 |     const createStatelessSchema = <T extends z.ZodTypeAny>(schema: T, toolName: string) => {
 12 |       // Tool-specific guidance for common scenarios
 13 |       const toolGuidance: Record<string, string> = {
 14 |         capture_screenshot: 'To capture screenshots with sessions, use crawl(session_id, screenshot: true)',
 15 |         generate_pdf: 'To generate PDFs with sessions, use crawl(session_id, pdf: true)',
 16 |         execute_js: 'To run JavaScript with sessions, use crawl(session_id, js_code: [...])',
 17 |         get_html: 'To get HTML with sessions, use crawl(session_id)',
 18 |         extract_with_llm: 'To extract data with sessions, first use crawl(session_id) then extract from the response',
 19 |       };
 20 | 
 21 |       const message = `${toolName} does not support session_id. This tool is stateless - each call creates a new browser. ${
 22 |         toolGuidance[toolName] || 'For persistent operations, use crawl with session_id.'
 23 |       }`;
 24 | 
 25 |       return z
 26 |         .object({
 27 |           session_id: z.never({ message }).optional(),
 28 |         })
 29 |         .passthrough()
 30 |         .and(schema)
 31 |         .transform((data) => {
 32 |           const { session_id, ...rest } = data as Record<string, unknown> & { session_id?: unknown };
 33 |           if (session_id !== undefined) {
 34 |             throw new Error(message);
 35 |           }
 36 |           return rest;
 37 |         });
 38 |     };
 39 | 
 40 |     it('should reject session_id for stateless tools', () => {
 41 |       const ExecuteJsSchema = createStatelessSchema(
 42 |         z.object({
 43 |           url: z.string().url(),
 44 |           js_code: z.union([z.string(), z.array(z.string())]),
 45 |         }),
 46 |         'execute_js',
 47 |       );
 48 | 
 49 |       // Should reject with session_id
 50 |       expect(() => {
 51 |         ExecuteJsSchema.parse({
 52 |           url: 'https://example.com',
 53 |           js_code: 'return document.title',
 54 |           session_id: 'test-session',
 55 |         });
 56 |       }).toThrow('execute_js does not support session_id');
 57 |     });
 58 | 
 59 |     it('should accept valid parameters without session_id', () => {
 60 |       const ExecuteJsSchema = createStatelessSchema(
 61 |         z.object({
 62 |           url: z.string().url(),
 63 |           js_code: z.union([z.string(), z.array(z.string())]),
 64 |         }),
 65 |         'execute_js',
 66 |       );
 67 | 
 68 |       const result = ExecuteJsSchema.parse({
 69 |         url: 'https://example.com',
 70 |         js_code: 'return document.title',
 71 |       });
 72 | 
 73 |       expect(result).toEqual({
 74 |         url: 'https://example.com',
 75 |         js_code: 'return document.title',
 76 |       });
 77 |     });
 78 | 
 79 |     it('should provide helpful error message when session_id is used', () => {
 80 |       const GetMarkdownSchema = createStatelessSchema(
 81 |         z.object({
 82 |           url: z.string().url(),
 83 |         }),
 84 |         'get_markdown',
 85 |       );
 86 | 
 87 |       try {
 88 |         GetMarkdownSchema.parse({
 89 |           url: 'https://example.com',
 90 |           session_id: 'my-session',
 91 |         });
 92 |       } catch (error) {
 93 |         expect(error).toBeInstanceOf(z.ZodError);
 94 |         const zodError = error as z.ZodError;
 95 |         expect(zodError.errors[0].message).toContain('get_markdown does not support session_id');
 96 |         expect(zodError.errors[0].message).toContain('For persistent operations, use crawl');
 97 |       }
 98 |     });
 99 | 
100 |     it('should provide tool-specific guidance for common tools', () => {
101 |       // Test capture_screenshot guidance
102 |       const CaptureScreenshotSchema = createStatelessSchema(z.object({ url: z.string().url() }), 'capture_screenshot');
103 | 
104 |       try {
105 |         CaptureScreenshotSchema.parse({ url: 'https://example.com', session_id: 'test' });
106 |       } catch (error) {
107 |         const zodError = error as z.ZodError;
108 |         expect(zodError.errors[0].message).toContain('use crawl(session_id, screenshot: true)');
109 |       }
110 | 
111 |       // Test generate_pdf guidance
112 |       const GeneratePdfSchema = createStatelessSchema(z.object({ url: z.string().url() }), 'generate_pdf');
113 | 
114 |       try {
115 |         GeneratePdfSchema.parse({ url: 'https://example.com', session_id: 'test' });
116 |       } catch (error) {
117 |         const zodError = error as z.ZodError;
118 |         expect(zodError.errors[0].message).toContain('use crawl(session_id, pdf: true)');
119 |       }
120 | 
121 |       // Test execute_js guidance
122 |       const ExecuteJsSchema = createStatelessSchema(z.object({ url: z.string().url() }), 'execute_js');
123 | 
124 |       try {
125 |         ExecuteJsSchema.parse({ url: 'https://example.com', session_id: 'test' });
126 |       } catch (error) {
127 |         const zodError = error as z.ZodError;
128 |         expect(zodError.errors[0].message).toContain('use crawl(session_id, js_code: [...])');
129 |       }
130 |     });
131 | 
132 |     it('should validate all stateless tools', () => {
133 |       const statelessTools = [
134 |         'get_markdown',
135 |         'capture_screenshot',
136 |         'generate_pdf',
137 |         'execute_js',
138 |         'batch_crawl',
139 |         'smart_crawl',
140 |         'get_html',
141 |         'extract_links',
142 |         'crawl_recursive',
143 |         'parse_sitemap',
144 |         'extract_with_llm',
145 |       ];
146 | 
147 |       statelessTools.forEach((toolName) => {
148 |         const schema = createStatelessSchema(
149 |           z.object({
150 |             url: z.string().url(),
151 |           }),
152 |           toolName,
153 |         );
154 | 
155 |         // Should reject session_id
156 |         expect(() => {
157 |           schema.parse({
158 |             url: 'https://example.com',
159 |             session_id: 'test',
160 |           });
161 |         }).toThrow(`${toolName} does not support session_id`);
162 | 
163 |         // Should accept without session_id
164 |         const result = schema.parse({
165 |           url: 'https://example.com',
166 |         });
167 |         expect(result).toEqual({
168 |           url: 'https://example.com',
169 |         });
170 |       });
171 |     });
172 |   });
173 | 
174 |   describe('Extract links tool', () => {
175 |     it('should validate extract_links parameters', () => {
176 |       const ExtractLinksSchema = z.object({
177 |         url: z.string().url(),
178 |         categorize: z.boolean().optional().default(true),
179 |       });
180 | 
181 |       // Valid input with categorize true
182 |       const result1 = ExtractLinksSchema.parse({
183 |         url: 'https://example.com',
184 |         categorize: true,
185 |       });
186 |       expect(result1.categorize).toBe(true);
187 | 
188 |       // Valid input with categorize false
189 |       const result2 = ExtractLinksSchema.parse({
190 |         url: 'https://example.com',
191 |         categorize: false,
192 |       });
193 |       expect(result2.categorize).toBe(false);
194 | 
195 |       // Default categorize should be true
196 |       const result3 = ExtractLinksSchema.parse({
197 |         url: 'https://example.com',
198 |       });
199 |       expect(result3.categorize).toBe(true);
200 |     });
201 |   });
202 | 
203 |   describe('Session management tools', () => {
204 |     it('should validate create_session parameters', () => {
205 |       const CreateSessionSchema = z.object({
206 |         session_id: z.string(),
207 |         initial_url: z.string().optional(),
208 |         browser_type: z.string().optional(),
209 |       });
210 | 
211 |       // Valid input
212 |       const result = CreateSessionSchema.parse({
213 |         session_id: 'my-session',
214 |         initial_url: 'https://example.com',
215 |       });
216 |       expect(result.session_id).toBe('my-session');
217 | 
218 |       // Missing required session_id
219 |       expect(() => {
220 |         CreateSessionSchema.parse({
221 |           initial_url: 'https://example.com',
222 |         });
223 |       }).toThrow();
224 |     });
225 | 
226 |     it('should validate clear_session parameters', () => {
227 |       const ClearSessionSchema = z.object({
228 |         session_id: z.string(),
229 |       });
230 | 
231 |       // Valid input
232 |       const result = ClearSessionSchema.parse({
233 |         session_id: 'my-session',
234 |       });
235 |       expect(result.session_id).toBe('my-session');
236 | 
237 |       // Missing required session_id
238 |       expect(() => {
239 |         ClearSessionSchema.parse({});
240 |       }).toThrow();
241 |     });
242 |   });
243 | 
244 |   describe('crawl validation', () => {
245 |     it('should accept session_id for crawl', () => {
246 |       const CrawlWithConfigSchema = z.object({
247 |         url: z.string().url(),
248 |         session_id: z.string().optional(),
249 |         js_code: z.union([z.string(), z.array(z.string())]).optional(),
250 |       });
251 | 
252 |       const result = CrawlWithConfigSchema.parse({
253 |         url: 'https://example.com',
254 |         session_id: 'my-session',
255 |         js_code: 'document.querySelector("button").click()',
256 |       });
257 | 
258 |       expect(result.session_id).toBe('my-session');
259 |     });
260 | 
261 |     it('should work without session_id', () => {
262 |       const CrawlWithConfigSchema = z.object({
263 |         url: z.string().url(),
264 |         session_id: z.string().optional(),
265 |       });
266 | 
267 |       const result = CrawlWithConfigSchema.parse({
268 |         url: 'https://example.com',
269 |       });
270 | 
271 |       expect(result.session_id).toBeUndefined();
272 |     });
273 | 
274 |     it('should require js_only when using js_code with session_id WITHOUT output options', () => {
275 |       // Create a schema that mirrors the real one's refinement
276 |       const CrawlWithConfigSchema = z
277 |         .object({
278 |           url: z.string().url(),
279 |           session_id: z.string().optional(),
280 |           js_code: z.union([z.string(), z.array(z.string())]).optional(),
281 |           js_only: z.boolean().optional(),
282 |           screenshot: z.boolean().optional(),
283 |           pdf: z.boolean().optional(),
284 |         })
285 |         .refine(
286 |           (data) => {
287 |             // Only require js_only when using js_code + session_id WITHOUT any output options
288 |             if (data.js_code && data.session_id && !data.js_only && !data.screenshot && !data.pdf) {
289 |               return false;
290 |             }
291 |             return true;
292 |           },
293 |           {
294 |             message:
295 |               'When using js_code with session_id WITHOUT screenshot or pdf, you MUST set js_only: true to prevent server errors. If you want screenshots/PDFs, you can omit js_only. Correct usage: crawl({url, session_id, js_code: [...], js_only: true})',
296 |           },
297 |         );
298 | 
299 |       // Should fail without js_only when no output options
300 |       expect(() => {
301 |         CrawlWithConfigSchema.parse({
302 |           url: 'https://example.com',
303 |           session_id: 'test-session',
304 |           js_code: ['document.querySelector("button").click()'],
305 |         });
306 |       }).toThrow('When using js_code with session_id WITHOUT screenshot or pdf');
307 | 
308 |       // Should pass with js_only: true
309 |       const result = CrawlWithConfigSchema.parse({
310 |         url: 'https://example.com',
311 |         session_id: 'test-session',
312 |         js_code: ['document.querySelector("button").click()'],
313 |         js_only: true,
314 |       });
315 |       expect(result.js_only).toBe(true);
316 | 
317 |       // Should pass with screenshot (no js_only required)
318 |       const result2 = CrawlWithConfigSchema.parse({
319 |         url: 'https://example.com',
320 |         session_id: 'test-session',
321 |         js_code: ['document.querySelector("button").click()'],
322 |         screenshot: true,
323 |       });
324 |       expect(result2.screenshot).toBe(true);
325 |       expect(result2.js_only).toBeUndefined();
326 | 
327 |       // Should pass with pdf (no js_only required)
328 |       const result3 = CrawlWithConfigSchema.parse({
329 |         url: 'https://example.com',
330 |         session_id: 'test-session',
331 |         js_code: ['document.querySelector("button").click()'],
332 |         pdf: true,
333 |       });
334 |       expect(result3.pdf).toBe(true);
335 |       expect(result3.js_only).toBeUndefined();
336 | 
337 |       // Should pass without js_code
338 |       const result4 = CrawlWithConfigSchema.parse({
339 |         url: 'https://example.com',
340 |         session_id: 'test-session',
341 |       });
342 |       expect(result4.session_id).toBe('test-session');
343 | 
344 |       // Should pass without session_id
345 |       const result5 = CrawlWithConfigSchema.parse({
346 |         url: 'https://example.com',
347 |         js_code: ['document.querySelector("button").click()'],
348 |       });
349 |       expect(result5.js_code).toBeDefined();
350 |     });
351 |   });
352 | 
353 |   describe('JavaScript code validation', () => {
354 |     const validateJavaScriptCode = (code: string): boolean => {
355 |       if (/&quot;|&amp;|&lt;|&gt;|&#\d+;|&\w+;/.test(code)) {
356 |         return false;
357 |       }
358 |       if (/<(!DOCTYPE|html|body|head|script|style)\b/i.test(code)) {
359 |         return false;
360 |       }
361 |       if (/[;})]\s*\\n|\\n\s*[{(/]/.test(code)) {
362 |         return false;
363 |       }
364 |       if (/[;})]\s*\\n\s*\w/.test(code)) {
365 |         return false;
366 |       }
367 |       return true;
368 |     };
369 | 
370 |     const JsCodeSchema = z.union([
371 |       z.string().refine(validateJavaScriptCode, {
372 |         message:
373 |           'Invalid JavaScript: Contains HTML entities (&quot;), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
374 |       }),
375 |       z.array(
376 |         z.string().refine(validateJavaScriptCode, {
377 |           message:
378 |             'Invalid JavaScript: Contains HTML entities (&quot;), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
379 |         }),
380 |       ),
381 |     ]);
382 | 
383 |     it('should reject JavaScript with HTML entities', () => {
384 |       expect(() => {
385 |         JsCodeSchema.parse('document.querySelector(&quot;button&quot;).click()');
386 |       }).toThrow('Invalid JavaScript: Contains HTML entities');
387 |     });
388 | 
389 |     it('should reject JavaScript with literal \\n between statements', () => {
390 |       expect(() => {
391 |         JsCodeSchema.parse('console.log("line1");\\nconsole.log("line2")');
392 |       }).toThrow('Invalid JavaScript: Contains HTML entities');
393 |     });
394 | 
395 |     it('should accept valid JavaScript with \\n inside strings', () => {
396 |       const result = JsCodeSchema.parse('console.log("line1\\nline2")');
397 |       expect(result).toBe('console.log("line1\\nline2")');
398 |     });
399 | 
400 |     it('should accept valid multiline JavaScript', () => {
401 |       const code = `// Comment
402 | document.querySelector('button').click();
403 | return true;`;
404 |       const result = JsCodeSchema.parse(code);
405 |       expect(result).toBe(code);
406 |     });
407 | 
408 |     it('should validate arrays of JavaScript code', () => {
409 |       // Invalid array
410 |       expect(() => {
411 |         JsCodeSchema.parse(['document.querySelector(&quot;input&quot;).value = &quot;test&quot;', 'form.submit()']);
412 |       }).toThrow('Invalid JavaScript: Contains HTML entities');
413 | 
414 |       // Valid array
415 |       const validArray = ['document.querySelector("input").value = "test"', 'form.submit()'];
416 |       const result = JsCodeSchema.parse(validArray);
417 |       expect(result).toEqual(validArray);
418 |     });
419 |   });
420 | });
421 | 
```

--------------------------------------------------------------------------------
/src/__tests__/handlers/crawl-handlers.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | /* eslint-env jest */
  2 | import { jest } from '@jest/globals';
  3 | import { AxiosError } from 'axios';
  4 | import type { CrawlHandlers as CrawlHandlersType } from '../../handlers/crawl-handlers.js';
  5 | import type { Crawl4AIService } from '../../crawl4ai-service.js';
  6 | 
  7 | // Mock the service
  8 | const mockCrawl = jest.fn();
  9 | const mockService = {
 10 |   crawl: mockCrawl,
 11 | } as unknown as Crawl4AIService;
 12 | 
 13 | // Mock axios client
 14 | const mockPost = jest.fn() as jest.Mock;
 15 | const mockHead = jest.fn() as jest.Mock;
 16 | const mockAxiosClient = {
 17 |   post: mockPost,
 18 |   head: mockHead,
 19 | } as any; // eslint-disable-line @typescript-eslint/no-explicit-any
 20 | 
 21 | // Mock axios for parseSitemap
 22 | const mockAxiosGet = jest.fn();
 23 | jest.unstable_mockModule('axios', () => ({
 24 |   default: {
 25 |     get: mockAxiosGet,
 26 |   },
 27 |   AxiosError,
 28 | }));
 29 | 
 30 | // Import after setting up mocks
 31 | const { CrawlHandlers: CrawlHandlersClass } = await import('../../handlers/crawl-handlers.js');
 32 | 
 33 | describe('CrawlHandlers', () => {
 34 |   let handler: CrawlHandlersType;
 35 |   let sessions: Map<string, any>; // eslint-disable-line @typescript-eslint/no-explicit-any
 36 | 
 37 |   beforeEach(() => {
 38 |     jest.clearAllMocks();
 39 |     sessions = new Map();
 40 |     handler = new CrawlHandlersClass(mockService, mockAxiosClient, sessions);
 41 |   });
 42 | 
 43 |   describe('batchCrawl', () => {
 44 |     it('should handle API errors gracefully', async () => {
 45 |       // Mock API error response
 46 |       (mockPost as jest.Mock).mockRejectedValue(
 47 |         new AxiosError('Request failed with status code 500', 'ERR_BAD_RESPONSE', undefined, undefined, {
 48 |           status: 500,
 49 |           statusText: 'Internal Server Error',
 50 |           data: 'Internal Server Error',
 51 |           headers: {},
 52 |           config: {} as any, // eslint-disable-line @typescript-eslint/no-explicit-any
 53 |         } as any), // eslint-disable-line @typescript-eslint/no-explicit-any
 54 |       );
 55 | 
 56 |       await expect(
 57 |         handler.batchCrawl({
 58 |           urls: ['not-a-valid-url', 'https://invalid-domain.com'],
 59 |           max_concurrent: 2,
 60 |         }),
 61 |       ).rejects.toThrow('Failed to batch crawl: Internal Server Error');
 62 |     });
 63 | 
 64 |     it('should support per-URL configs array', async () => {
 65 |       (mockPost as jest.Mock).mockResolvedValue({
 66 |         data: {
 67 |           results: [
 68 |             { url: 'https://example1.com', success: true, markdown: { raw_markdown: 'Test 1' } },
 69 |             { url: 'https://example2.com', success: true, markdown: { raw_markdown: 'Test 2' } },
 70 |           ],
 71 |         },
 72 |       });
 73 | 
 74 |       const result = await handler.batchCrawl({
 75 |         urls: ['https://example1.com', 'https://example2.com'],
 76 |         configs: [
 77 |           {
 78 |             url: 'https://example1.com',
 79 |             browser_config: { browser_type: 'chromium' },
 80 |             crawler_config: { screenshot: true },
 81 |           },
 82 |           {
 83 |             url: 'https://example2.com',
 84 |             browser_config: { browser_type: 'undetected' },
 85 |             crawler_config: { pdf: true },
 86 |             extraction_strategy: { provider: 'openai' },
 87 |           },
 88 |         ],
 89 |         max_concurrent: 2,
 90 |       });
 91 | 
 92 |       // Verify the configs array was passed through
 93 |       expect(mockPost).toHaveBeenCalledWith(
 94 |         '/crawl',
 95 |         expect.objectContaining({
 96 |           configs: expect.arrayContaining([
 97 |             expect.objectContaining({
 98 |               url: 'https://example1.com',
 99 |               browser_config: { browser_type: 'chromium' },
100 |               crawler_config: { screenshot: true },
101 |             }),
102 |             expect.objectContaining({
103 |               url: 'https://example2.com',
104 |               browser_config: { browser_type: 'undetected' },
105 |               crawler_config: { pdf: true },
106 |               extraction_strategy: { provider: 'openai' },
107 |             }),
108 |           ]),
109 |           max_concurrent: 2,
110 |         }),
111 |       );
112 | 
113 |       expect(result.content[0].text).toContain('Batch crawl completed');
114 |     });
115 |   });
116 | 
117 |   describe('smartCrawl', () => {
118 |     it('should detect XML content type from HEAD request', async () => {
119 |       // Mock HEAD response with XML content type
120 |       (mockHead as jest.Mock).mockResolvedValue({
121 |         headers: {
122 |           'content-type': 'application/xml',
123 |         },
124 |       });
125 | 
126 |       // Mock crawl response
127 |       (mockPost as jest.Mock).mockResolvedValue({
128 |         data: {
129 |           results: [
130 |             {
131 |               success: true,
132 |               markdown: {
133 |                 raw_markdown: '<xml>Test content</xml>',
134 |               },
135 |             },
136 |           ],
137 |         },
138 |       });
139 | 
140 |       const result = await handler.smartCrawl({
141 |         url: 'https://example.com/data.xml',
142 |       });
143 | 
144 |       expect(result.content[0].text).toContain('Smart crawl detected content type: sitemap');
145 |       expect(result.content[0].text).toContain('<xml>Test content</xml>');
146 |     });
147 | 
148 |     it('should handle HEAD request failure gracefully', async () => {
149 |       // Mock HEAD request failure
150 |       (mockHead as jest.Mock).mockRejectedValue(new Error('HEAD request failed'));
151 | 
152 |       // Mock successful crawl
153 |       (mockPost as jest.Mock).mockResolvedValue({
154 |         data: {
155 |           results: [
156 |             {
157 |               success: true,
158 |               markdown: {
159 |                 raw_markdown: 'Test content',
160 |               },
161 |             },
162 |           ],
163 |         },
164 |       });
165 | 
166 |       const result = await handler.smartCrawl({
167 |         url: 'https://example.com',
168 |       });
169 | 
170 |       expect(result.content[0].text).toContain('Smart crawl detected content type: html');
171 |     });
172 | 
173 |     it('should follow links from sitemap when follow_links is true', async () => {
174 |       // Mock successful HEAD request
175 |       (mockHead as jest.Mock).mockResolvedValue({
176 |         headers: {
177 |           'content-type': 'application/xml',
178 |         },
179 |       });
180 | 
181 |       // Mock initial crawl with sitemap content
182 |       (mockPost as jest.Mock).mockResolvedValueOnce({
183 |         data: {
184 |           results: [
185 |             {
186 |               success: true,
187 |               markdown: `<?xml version="1.0" encoding="UTF-8"?>
188 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
189 |   <url>
190 |     <loc>https://example.com/page1</loc>
191 |   </url>
192 |   <url>
193 |     <loc>https://example.com/page2</loc>
194 |   </url>
195 | </urlset>`,
196 |             },
197 |           ],
198 |         },
199 |       });
200 | 
201 |       // Mock follow-up crawl
202 |       (mockPost as jest.Mock).mockResolvedValueOnce({
203 |         data: {
204 |           results: [{ success: true }, { success: true }],
205 |         },
206 |       });
207 | 
208 |       const result = await handler.smartCrawl({
209 |         url: 'https://example.com/sitemap.xml',
210 |         follow_links: true,
211 |         max_depth: 2,
212 |       });
213 | 
214 |       expect(result.content[0].text).toContain('Smart crawl detected content type: sitemap');
215 |       expect(result.content[0].text).toContain('Followed 2 links:');
216 |       expect(result.content[0].text).toContain('https://example.com/page1');
217 |       expect(result.content[0].text).toContain('https://example.com/page2');
218 |     });
219 | 
220 |     it('should handle smartCrawl API errors', async () => {
221 |       (mockHead as jest.Mock).mockResolvedValue({ headers: {} });
222 |       // Mock crawl to get empty results first, then error on follow-up
223 |       (mockPost as jest.Mock).mockResolvedValueOnce({
224 |         data: {
225 |           results: [],
226 |         },
227 |       });
228 | 
229 |       const result = await handler.smartCrawl({
230 |         url: 'https://example.com',
231 |       });
232 | 
233 |       // With empty results, it should still return a response
234 |       expect(result.content[0].text).toContain('Smart crawl detected content type: html');
235 |       expect(result.content[0].text).toContain('No content extracted');
236 |     });
237 |   });
238 | 
239 |   describe('crawlRecursive', () => {
240 |     it('should handle max_depth limit correctly', async () => {
241 |       // Mock successful crawl with links
242 |       (mockPost as jest.Mock).mockResolvedValueOnce({
243 |         data: {
244 |           results: [
245 |             {
246 |               success: true,
247 |               markdown: {
248 |                 raw_markdown: 'Test content',
249 |               },
250 |               links: {
251 |                 internal: [{ href: 'https://example.com/page1' }, { href: 'https://example.com/page2' }],
252 |                 external: [],
253 |               },
254 |             },
255 |           ],
256 |         },
257 |       });
258 | 
259 |       // Mock second crawl for page1
260 |       (mockPost as jest.Mock).mockResolvedValueOnce({
261 |         data: {
262 |           results: [
263 |             {
264 |               success: true,
265 |               markdown: {
266 |                 raw_markdown: 'Page 1 content',
267 |               },
268 |               links: {
269 |                 internal: [],
270 |                 external: [],
271 |               },
272 |             },
273 |           ],
274 |         },
275 |       });
276 | 
277 |       // Mock third crawl for page2
278 |       (mockPost as jest.Mock).mockResolvedValueOnce({
279 |         data: {
280 |           results: [
281 |             {
282 |               success: true,
283 |               markdown: {
284 |                 raw_markdown: 'Page 2 content',
285 |               },
286 |               links: {
287 |                 internal: [],
288 |                 external: [],
289 |               },
290 |             },
291 |           ],
292 |         },
293 |       });
294 | 
295 |       const result = await handler.crawlRecursive({
296 |         url: 'https://example.com',
297 |         max_depth: 1, // Should crawl initial URL and one level deep
298 |       });
299 | 
300 |       expect(result.content[0].text).toContain('Pages crawled: 3'); // Initial + 2 pages at depth 1
301 |       expect(result.content[0].text).toContain('Max depth reached: 1');
302 |       expect(mockPost).toHaveBeenCalledTimes(3); // Initial crawl + two more
303 |     });
304 | 
305 |     it('should handle invalid URLs in discovered links', async () => {
306 |       // Mock crawl with invalid link
307 |       (mockPost as jest.Mock).mockResolvedValue({
308 |         data: {
309 |           results: [
310 |             {
311 |               success: true,
312 |               markdown: {
313 |                 raw_markdown: 'Test content',
314 |               },
315 |               links: {
316 |                 internal: [
317 |                   { href: 'javascript:void(0)' }, // Invalid URL
318 |                   { href: 'https://example.com/valid' }, // Valid URL
319 |                 ],
320 |                 external: [],
321 |               },
322 |             },
323 |           ],
324 |         },
325 |       });
326 | 
327 |       const result = await handler.crawlRecursive({
328 |         url: 'https://example.com',
329 |         max_depth: 1,
330 |       });
331 | 
332 |       // Should continue despite invalid URL
333 |       expect(result.content[0].text).toContain('Pages crawled:');
334 |     });
335 | 
336 |     it('should handle crawl failures during recursion', async () => {
337 |       // First crawl succeeds
338 |       (mockPost as jest.Mock).mockResolvedValueOnce({
339 |         data: {
340 |           results: [
341 |             {
342 |               success: true,
343 |               markdown: {
344 |                 raw_markdown: 'Test content',
345 |               },
346 |               links: {
347 |                 internal: [{ href: 'https://example.com/page1' }],
348 |                 external: [],
349 |               },
350 |             },
351 |           ],
352 |         },
353 |       });
354 | 
355 |       // Second crawl fails
356 |       (mockPost as jest.Mock).mockRejectedValueOnce(new Error('Crawl failed'));
357 | 
358 |       const result = await handler.crawlRecursive({
359 |         url: 'https://example.com',
360 |         max_depth: 1,
361 |       });
362 | 
363 |       // Should continue despite failure
364 |       expect(result.content[0].text).toContain('Pages crawled: 1');
365 |     });
366 | 
367 |     it('should handle crawlRecursive API errors', async () => {
368 |       (mockPost as jest.Mock).mockRejectedValue(new Error('API Error'));
369 | 
370 |       const result = await handler.crawlRecursive({
371 |         url: 'https://example.com',
372 |       });
373 | 
374 |       // When the initial crawl fails, it should return a result with no pages crawled
375 |       expect(result.content[0].text).toContain('Pages crawled: 0');
376 |       expect(result.content[0].text).toContain('No pages could be crawled');
377 |     });
378 |   });
379 | 
380 |   describe('parseSitemap', () => {
381 |     it('should handle network errors gracefully', async () => {
382 |       // Mock ENOTFOUND error
383 |       const error = new Error('getaddrinfo ENOTFOUND not-a-real-domain-12345.com');
384 |       (error as { code?: string }).code = 'ENOTFOUND';
385 |       mockAxiosGet.mockRejectedValue(error);
386 | 
387 |       await expect(
388 |         handler.parseSitemap({
389 |           url: 'https://not-a-real-domain-12345.com/sitemap.xml',
390 |         }),
391 |       ).rejects.toThrow('Failed to parse sitemap: getaddrinfo ENOTFOUND not-a-real-domain-12345.com');
392 |     });
393 |   });
394 | 
395 |   describe('crawl', () => {
396 |     it('should handle word_count_threshold parameter', async () => {
397 |       (mockCrawl as jest.Mock).mockResolvedValue({
398 |         results: [
399 |           {
400 |             success: true,
401 |             markdown: {
402 |               raw_markdown: 'Test content',
403 |             },
404 |           },
405 |         ],
406 |       });
407 | 
408 |       const result = await handler.crawl({
409 |         url: 'https://example.com',
410 |         word_count_threshold: 100,
411 |       });
412 | 
413 |       expect(mockCrawl).toHaveBeenCalledWith(
414 |         expect.objectContaining({
415 |           crawler_config: expect.objectContaining({
416 |             word_count_threshold: 100,
417 |           }),
418 |         }),
419 |       );
420 |       expect(result.content[0].text).toBe('Test content');
421 |     });
422 | 
423 |     it('should update session last_used time when using session_id', async () => {
424 |       const sessionId = 'test-session';
425 |       const session = {
426 |         id: sessionId,
427 |         created_at: new Date(),
428 |         last_used: new Date('2025-08-01'),
429 |       };
430 |       sessions.set(sessionId, session);
431 | 
432 |       (mockCrawl as jest.Mock).mockResolvedValue({
433 |         results: [
434 |           {
435 |             success: true,
436 |             markdown: {
437 |               raw_markdown: 'Test content',
438 |             },
439 |           },
440 |         ],
441 |       });
442 | 
443 |       await handler.crawl({
444 |         url: 'https://example.com',
445 |         session_id: sessionId,
446 |       });
447 | 
448 |       const updatedSession = sessions.get(sessionId) as { last_used: Date };
449 |       expect(updatedSession.last_used.getTime()).toBeGreaterThan(new Date('2025-08-01').getTime());
450 |     });
451 | 
452 |     it('should handle image description parameters', async () => {
453 |       (mockCrawl as jest.Mock).mockResolvedValue({
454 |         results: [
455 |           {
456 |             success: true,
457 |             markdown: {
458 |               raw_markdown: 'Test content',
459 |             },
460 |           },
461 |         ],
462 |       });
463 | 
464 |       await handler.crawl({
465 |         url: 'https://example.com',
466 |         image_description_min_word_threshold: 10,
467 |         image_score_threshold: 0.5,
468 |       });
469 | 
470 |       expect(mockCrawl).toHaveBeenCalledWith(
471 |         expect.objectContaining({
472 |           crawler_config: expect.objectContaining({
473 |             image_description_min_word_threshold: 10,
474 |             image_score_threshold: 0.5,
475 |           }),
476 |         }),
477 |       );
478 |     });
479 | 
480 |     it('should handle exclude_social_media_links parameter', async () => {
481 |       (mockCrawl as jest.Mock).mockResolvedValue({
482 |         results: [
483 |           {
484 |             success: true,
485 |             markdown: {
486 |               raw_markdown: 'Test content',
487 |             },
488 |           },
489 |         ],
490 |       });
491 | 
492 |       await handler.crawl({
493 |         url: 'https://example.com',
494 |         exclude_social_media_links: true,
495 |       });
496 | 
497 |       expect(mockCrawl).toHaveBeenCalledWith(
498 |         expect.objectContaining({
499 |           crawler_config: expect.objectContaining({
500 |             exclude_social_media_links: true,
501 |           }),
502 |         }),
503 |       );
504 |     });
505 | 
506 |     it('should use extracted_content when available as string', async () => {
507 |       (mockCrawl as jest.Mock).mockResolvedValue({
508 |         results: [
509 |           {
510 |             success: true,
511 |             extracted_content: 'Extracted text content',
512 |           },
513 |         ],
514 |       });
515 | 
516 |       const result = await handler.crawl({
517 |         url: 'https://example.com',
518 |       });
519 | 
520 |       expect(result.content[0].text).toBe('Extracted text content');
521 |     });
522 | 
523 |     it('should handle extracted_content as object', async () => {
524 |       const extractedObj = { title: 'Test', body: 'Content' };
525 |       (mockCrawl as jest.Mock).mockResolvedValue({
526 |         results: [
527 |           {
528 |             success: true,
529 |             extracted_content: extractedObj,
530 |           },
531 |         ],
532 |       });
533 | 
534 |       const result = await handler.crawl({
535 |         url: 'https://example.com',
536 |       });
537 | 
538 |       expect(result.content[0].text).toBe(JSON.stringify(extractedObj, null, 2));
539 |     });
540 | 
541 |     it('should fallback to html when markdown is not available', async () => {
542 |       (mockCrawl as jest.Mock).mockResolvedValue({
543 |         results: [
544 |           {
545 |             success: true,
546 |             html: '<html><body>HTML content</body></html>',
547 |           },
548 |         ],
549 |       });
550 | 
551 |       const result = await handler.crawl({
552 |         url: 'https://example.com',
553 |       });
554 | 
555 |       expect(result.content[0].text).toBe('<html><body>HTML content</body></html>');
556 |     });
557 | 
558 |     it('should fallback to fit_html when neither markdown nor html is available', async () => {
559 |       (mockCrawl as jest.Mock).mockResolvedValue({
560 |         results: [
561 |           {
562 |             success: true,
563 |             fit_html: '<div>Fit HTML content</div>',
564 |           },
565 |         ],
566 |       });
567 | 
568 |       const result = await handler.crawl({
569 |         url: 'https://example.com',
570 |       });
571 | 
572 |       expect(result.content[0].text).toBe('<div>Fit HTML content</div>');
573 |     });
574 | 
575 |     it('should handle js_code as null error', async () => {
576 |       await expect(
577 |         handler.crawl({
578 |           url: 'https://example.com',
579 |           js_code: null,
580 |         }),
581 |       ).rejects.toThrow(
582 |         'Failed to crawl: js_code parameter is null. Please provide JavaScript code as a string or array of strings.',
583 |       );
584 |     });
585 |   });
586 | });
587 | 
```

--------------------------------------------------------------------------------
/src/__tests__/crawl.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | /* eslint-env jest */
  2 | import { jest } from '@jest/globals';
  3 | import type { AxiosResponse } from 'axios';
  4 | import type { MockAxiosInstance } from './types/mocks.js';
  5 | import type { Crawl4AIService as Crawl4AIServiceType } from '../crawl4ai-service.js';
  6 | 
  7 | // Manual mock for axios
  8 | const mockAxios = {
  9 |   create: jest.fn(),
 10 | };
 11 | 
 12 | jest.unstable_mockModule('axios', () => ({
 13 |   default: mockAxios,
 14 | }));
 15 | 
 16 | // Import modules after mocking
 17 | const { Crawl4AIService } = await import('../crawl4ai-service.js');
 18 | 
 19 | // Helper function to create a complete AxiosResponse object
 20 | function createMockAxiosResponse<T>(data: T): AxiosResponse<T> {
 21 |   return {
 22 |     data,
 23 |     status: 200,
 24 |     statusText: 'OK',
 25 |     headers: {},
 26 |     config: {
 27 |       url: '',
 28 |       method: 'post',
 29 |       headers: {},
 30 |     },
 31 |   } as AxiosResponse<T>;
 32 | }
 33 | 
 34 | describe('crawl parameter mapping', () => {
 35 |   let service: Crawl4AIServiceType;
 36 |   let mockAxiosInstance: MockAxiosInstance;
 37 | 
 38 |   beforeEach(() => {
 39 |     mockAxiosInstance = {
 40 |       post: jest.fn(),
 41 |       get: jest.fn(),
 42 |       head: jest.fn(),
 43 |     };
 44 |     mockAxios.create.mockReturnValue(mockAxiosInstance);
 45 |     service = new Crawl4AIService('http://test.com', 'test-key');
 46 |   });
 47 | 
 48 |   afterEach(() => {
 49 |     jest.clearAllMocks();
 50 |   });
 51 | 
 52 |   describe('Browser configuration mapping', () => {
 53 |     it('should map all browser config parameters correctly', async () => {
 54 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
 55 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
 56 | 
 57 |       await service.crawl({
 58 |         url: 'https://example.com',
 59 |         browser_config: {
 60 |           browser_type: 'firefox',
 61 |           headless: true,
 62 |           viewport_width: 1920,
 63 |           viewport_height: 1080,
 64 |           user_agent: 'Custom User Agent',
 65 |           proxy_config: {
 66 |             server: 'http://proxy.com:8080',
 67 |             username: 'proxyuser',
 68 |             password: 'proxypass',
 69 |           },
 70 |           cookies: [{ name: 'session', value: 'abc123', domain: '.example.com', path: '/' }],
 71 |           headers: { 'X-Custom-Header': 'value' },
 72 |           extra_args: ['--disable-gpu'],
 73 |         },
 74 |       });
 75 | 
 76 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
 77 |         urls: ['https://example.com'],
 78 |         browser_config: {
 79 |           browser_type: 'firefox',
 80 |           headless: true,
 81 |           viewport_width: 1920,
 82 |           viewport_height: 1080,
 83 |           user_agent: 'Custom User Agent',
 84 |           proxy_config: {
 85 |             server: 'http://proxy.com:8080',
 86 |             username: 'proxyuser',
 87 |             password: 'proxypass',
 88 |           },
 89 |           cookies: [{ name: 'session', value: 'abc123', domain: '.example.com', path: '/' }],
 90 |           headers: { 'X-Custom-Header': 'value' },
 91 |           extra_args: ['--disable-gpu'],
 92 |         },
 93 |         crawler_config: {},
 94 |       });
 95 |     });
 96 | 
 97 |     it('should support undetected browser type', async () => {
 98 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
 99 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
100 | 
101 |       await service.crawl({
102 |         url: 'https://example.com',
103 |         browser_config: {
104 |           browser_type: 'undetected',
105 |           headless: true,
106 |         },
107 |       });
108 | 
109 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
110 |         urls: ['https://example.com'],
111 |         browser_config: {
112 |           browser_type: 'undetected',
113 |           headless: true,
114 |         },
115 |         crawler_config: {},
116 |       });
117 |     });
118 | 
119 |     it('should support unified proxy format (string)', async () => {
120 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
121 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
122 | 
123 |       await service.crawl({
124 |         url: 'https://example.com',
125 |         browser_config: {
126 |           proxy: 'http://user:[email protected]:8080',
127 |         },
128 |       });
129 | 
130 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
131 |         urls: ['https://example.com'],
132 |         browser_config: {
133 |           proxy: 'http://user:[email protected]:8080',
134 |         },
135 |         crawler_config: {},
136 |       });
137 |     });
138 | 
139 |     it('should support unified proxy format (object)', async () => {
140 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
141 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
142 | 
143 |       await service.crawl({
144 |         url: 'https://example.com',
145 |         browser_config: {
146 |           proxy: {
147 |             server: 'http://proxy.example.com:8080',
148 |             username: 'user',
149 |             password: 'pass',
150 |           },
151 |         },
152 |       });
153 | 
154 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
155 |         urls: ['https://example.com'],
156 |         browser_config: {
157 |           proxy: {
158 |             server: 'http://proxy.example.com:8080',
159 |             username: 'user',
160 |             password: 'pass',
161 |           },
162 |         },
163 |         crawler_config: {},
164 |       });
165 |     });
166 |   });
167 | 
168 |   describe('Crawler configuration mapping', () => {
169 |     it('should map content filtering parameters', async () => {
170 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
171 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
172 | 
173 |       await service.crawl({
174 |         url: 'https://example.com',
175 |         crawler_config: {
176 |           word_count_threshold: 150,
177 |           excluded_tags: ['nav', 'footer', 'aside'],
178 |           excluded_selector: '#ads, .popup',
179 |           remove_overlay_elements: true,
180 |           only_text: true,
181 |           remove_forms: true,
182 |           keep_data_attributes: true,
183 |         },
184 |       });
185 | 
186 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
187 |         urls: ['https://example.com'],
188 |         browser_config: undefined,
189 |         crawler_config: {
190 |           word_count_threshold: 150,
191 |           excluded_tags: ['nav', 'footer', 'aside'],
192 |           excluded_selector: '#ads, .popup',
193 |           remove_overlay_elements: true,
194 |           only_text: true,
195 |           remove_forms: true,
196 |           keep_data_attributes: true,
197 |         },
198 |       });
199 |     });
200 | 
201 |     it('should map JavaScript execution parameters', async () => {
202 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
203 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
204 | 
205 |       await service.crawl({
206 |         url: 'https://example.com',
207 |         crawler_config: {
208 |           js_code: ['document.querySelector(".load-more").click()', 'window.scrollTo(0, 1000)'],
209 |           js_only: true,
210 |           wait_for: '.content-loaded',
211 |           wait_for_timeout: 10000,
212 |         },
213 |       });
214 | 
215 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
216 |         urls: ['https://example.com'],
217 |         browser_config: undefined,
218 |         crawler_config: {
219 |           js_code: ['document.querySelector(".load-more").click()', 'window.scrollTo(0, 1000)'],
220 |           js_only: true,
221 |           wait_for: '.content-loaded',
222 |           wait_for_timeout: 10000,
223 |         },
224 |       });
225 |     });
226 | 
227 |     it('should map page navigation and timing parameters', async () => {
228 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
229 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
230 | 
231 |       await service.crawl({
232 |         url: 'https://example.com',
233 |         crawler_config: {
234 |           wait_until: 'networkidle',
235 |           page_timeout: 45000,
236 |           wait_for_images: true,
237 |           ignore_body_visibility: false,
238 |           scan_full_page: true,
239 |           delay_before_scroll: 2000,
240 |           scroll_delay: 1000,
241 |         },
242 |       });
243 | 
244 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
245 |         urls: ['https://example.com'],
246 |         browser_config: undefined,
247 |         crawler_config: {
248 |           wait_until: 'networkidle',
249 |           page_timeout: 45000,
250 |           wait_for_images: true,
251 |           ignore_body_visibility: false,
252 |           scan_full_page: true,
253 |           delay_before_scroll: 2000,
254 |           scroll_delay: 1000,
255 |         },
256 |       });
257 |     });
258 | 
259 |     it('should map media handling parameters', async () => {
260 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
261 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
262 | 
263 |       await service.crawl({
264 |         url: 'https://example.com',
265 |         crawler_config: {
266 |           screenshot: true,
267 |           screenshot_wait_for: 2.5,
268 |           pdf: true,
269 |           capture_mhtml: true,
270 |           image_description_min_word_threshold: 30,
271 |           image_score_threshold: 5,
272 |           exclude_external_images: true,
273 |         },
274 |       });
275 | 
276 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
277 |         urls: ['https://example.com'],
278 |         browser_config: undefined,
279 |         crawler_config: {
280 |           screenshot: true,
281 |           screenshot_wait_for: 2.5,
282 |           pdf: true,
283 |           capture_mhtml: true,
284 |           image_description_min_word_threshold: 30,
285 |           image_score_threshold: 5,
286 |           exclude_external_images: true,
287 |         },
288 |       });
289 |     });
290 | 
291 |     it('should map link filtering parameters', async () => {
292 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
293 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
294 | 
295 |       await service.crawl({
296 |         url: 'https://example.com',
297 |         crawler_config: {
298 |           exclude_external_links: true,
299 |           exclude_social_media_links: true,
300 |           exclude_domains: ['ads.com', 'tracker.io', 'analytics.com'],
301 |         },
302 |       });
303 | 
304 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
305 |         urls: ['https://example.com'],
306 |         browser_config: undefined,
307 |         crawler_config: {
308 |           exclude_external_links: true,
309 |           exclude_social_media_links: true,
310 |           exclude_domains: ['ads.com', 'tracker.io', 'analytics.com'],
311 |         },
312 |       });
313 |     });
314 | 
315 |     it('should map page interaction parameters', async () => {
316 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
317 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
318 | 
319 |       await service.crawl({
320 |         url: 'https://example.com',
321 |         crawler_config: {
322 |           simulate_user: true,
323 |           override_navigator: true,
324 |           magic: true,
325 |           process_iframes: true,
326 |         },
327 |       });
328 | 
329 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
330 |         urls: ['https://example.com'],
331 |         browser_config: undefined,
332 |         crawler_config: {
333 |           simulate_user: true,
334 |           override_navigator: true,
335 |           magic: true,
336 |           process_iframes: true,
337 |         },
338 |       });
339 |     });
340 | 
341 |     it('should map virtual scroll configuration', async () => {
342 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
343 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
344 | 
345 |       await service.crawl({
346 |         url: 'https://example.com',
347 |         crawler_config: {
348 |           virtual_scroll_config: {
349 |             container_selector: '#timeline',
350 |             scroll_count: 20,
351 |             scroll_by: 'container_height',
352 |             wait_after_scroll: 1.5,
353 |           },
354 |         },
355 |       });
356 | 
357 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
358 |         urls: ['https://example.com'],
359 |         browser_config: undefined,
360 |         crawler_config: {
361 |           virtual_scroll_config: {
362 |             container_selector: '#timeline',
363 |             scroll_count: 20,
364 |             scroll_by: 'container_height',
365 |             wait_after_scroll: 1.5,
366 |           },
367 |         },
368 |       });
369 |     });
370 | 
371 |     // Note: Extraction strategies removed - not supported via REST API
372 |     // Use extract_with_llm tool instead for structured data extraction
373 | 
374 |     it('should map session and cache parameters', async () => {
375 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
376 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
377 | 
378 |       await service.crawl({
379 |         url: 'https://example.com',
380 |         crawler_config: {
381 |           session_id: 'test-session-123',
382 |           cache_mode: 'DISABLED',
383 |         },
384 |       });
385 | 
386 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
387 |         urls: ['https://example.com'],
388 |         browser_config: undefined,
389 |         crawler_config: {
390 |           session_id: 'test-session-123',
391 |           cache_mode: 'DISABLED',
392 |         },
393 |       });
394 |     });
395 | 
396 |     it('should map new crawler parameters', async () => {
397 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
398 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
399 | 
400 |       await service.crawl({
401 |         url: 'https://example.com',
402 |         crawler_config: {
403 |           delay_before_return_html: 2000,
404 |           css_selector: '.main-content',
405 |           include_links: true,
406 |           resolve_absolute_urls: true,
407 |         },
408 |       });
409 | 
410 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
411 |         urls: ['https://example.com'],
412 |         browser_config: undefined,
413 |         crawler_config: {
414 |           delay_before_return_html: 2000,
415 |           css_selector: '.main-content',
416 |           include_links: true,
417 |           resolve_absolute_urls: true,
418 |         },
419 |       });
420 |     });
421 | 
422 |     it('should map performance and debug parameters', async () => {
423 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
424 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
425 | 
426 |       await service.crawl({
427 |         url: 'https://example.com',
428 |         crawler_config: {
429 |           timeout: 90000,
430 |           verbose: true,
431 |           log_console: true,
432 |         },
433 |       });
434 | 
435 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
436 |         urls: ['https://example.com'],
437 |         browser_config: undefined,
438 |         crawler_config: {
439 |           timeout: 90000,
440 |           verbose: true,
441 |           log_console: true,
442 |         },
443 |       });
444 |     });
445 |   });
446 | 
447 |   describe('Extraction strategies', () => {
448 |     it('should support extraction_strategy passthrough', async () => {
449 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
450 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
451 | 
452 |       await service.crawl({
453 |         url: 'https://example.com',
454 |         extraction_strategy: {
455 |           provider: 'openai',
456 |           api_key: 'sk-test',
457 |           model: 'gpt-4',
458 |           temperature: 0.7,
459 |         },
460 |       });
461 | 
462 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
463 |         urls: ['https://example.com'],
464 |         browser_config: undefined,
465 |         crawler_config: {},
466 |         extraction_strategy: {
467 |           provider: 'openai',
468 |           api_key: 'sk-test',
469 |           model: 'gpt-4',
470 |           temperature: 0.7,
471 |         },
472 |       });
473 |     });
474 | 
475 |     it('should support table_extraction_strategy passthrough', async () => {
476 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
477 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
478 | 
479 |       await service.crawl({
480 |         url: 'https://example.com',
481 |         table_extraction_strategy: {
482 |           enable_chunking: true,
483 |           thresholds: {
484 |             min_rows: 5,
485 |             max_columns: 20,
486 |           },
487 |         },
488 |       });
489 | 
490 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
491 |         urls: ['https://example.com'],
492 |         browser_config: undefined,
493 |         crawler_config: {},
494 |         table_extraction_strategy: {
495 |           enable_chunking: true,
496 |           thresholds: {
497 |             min_rows: 5,
498 |             max_columns: 20,
499 |           },
500 |         },
501 |       });
502 |     });
503 | 
504 |     it('should support markdown_generator_options passthrough', async () => {
505 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
506 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
507 | 
508 |       await service.crawl({
509 |         url: 'https://example.com',
510 |         markdown_generator_options: {
511 |           include_links: true,
512 |           preserve_formatting: true,
513 |         },
514 |       });
515 | 
516 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
517 |         urls: ['https://example.com'],
518 |         browser_config: undefined,
519 |         crawler_config: {},
520 |         markdown_generator_options: {
521 |           include_links: true,
522 |           preserve_formatting: true,
523 |         },
524 |       });
525 |     });
526 |   });
527 | 
528 |   describe('Combined configurations', () => {
529 |     it('should handle both browser and crawler configs together', async () => {
530 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
531 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
532 | 
533 |       await service.crawl({
534 |         url: 'https://example.com',
535 |         browser_config: {
536 |           viewport_width: 1920,
537 |           viewport_height: 1080,
538 |           user_agent: 'Custom Bot',
539 |         },
540 |         crawler_config: {
541 |           word_count_threshold: 100,
542 |           js_code: 'document.querySelector(".accept").click()',
543 |           wait_for: '.content',
544 |           screenshot: true,
545 |           session_id: 'test-session',
546 |           cache_mode: 'BYPASS',
547 |         },
548 |       });
549 | 
550 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
551 |         urls: ['https://example.com'],
552 |         browser_config: {
553 |           viewport_width: 1920,
554 |           viewport_height: 1080,
555 |           user_agent: 'Custom Bot',
556 |         },
557 |         crawler_config: {
558 |           word_count_threshold: 100,
559 |           js_code: 'document.querySelector(".accept").click()',
560 |           wait_for: '.content',
561 |           screenshot: true,
562 |           session_id: 'test-session',
563 |           cache_mode: 'BYPASS',
564 |         },
565 |       });
566 |     });
567 |   });
568 | 
569 |   describe('Edge cases', () => {
570 |     it('should handle undefined values correctly', async () => {
571 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
572 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
573 | 
574 |       await service.crawl({
575 |         url: 'https://example.com',
576 |         crawler_config: {
577 |           word_count_threshold: 0, // Should be included (falsy but defined)
578 |           excluded_tags: undefined, // Should not be included
579 |           remove_overlay_elements: false, // Should be included
580 |           only_text: undefined, // Should not be included
581 |         },
582 |       });
583 | 
584 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
585 |         urls: ['https://example.com'],
586 |         browser_config: undefined,
587 |         crawler_config: {
588 |           word_count_threshold: 0,
589 |           excluded_tags: undefined,
590 |           remove_overlay_elements: false,
591 |           only_text: undefined,
592 |         },
593 |       });
594 |     });
595 | 
596 |     it('should handle empty arrays correctly', async () => {
597 |       const mockResponse = createMockAxiosResponse({ results: [{ markdown: 'test' }] });
598 |       mockAxiosInstance.post.mockResolvedValueOnce(mockResponse);
599 | 
600 |       await service.crawl({
601 |         url: 'https://example.com',
602 |         crawler_config: {
603 |           excluded_tags: [],
604 |           exclude_domains: [],
605 |         },
606 |       });
607 | 
608 |       expect(mockAxiosInstance.post).toHaveBeenCalledWith('/crawl', {
609 |         urls: ['https://example.com'],
610 |         browser_config: undefined,
611 |         crawler_config: {
612 |           excluded_tags: [],
613 |           exclude_domains: [],
614 |         },
615 |       });
616 |     });
617 |   });
618 | });
619 | 
```