This is page 6 of 8. Use http://codebase.md/bsmi021/mcp-gemini-server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .env.example
├── .eslintignore
├── .eslintrc.json
├── .gitignore
├── .prettierrc.json
├── Dockerfile
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── review-prompt.txt
├── scripts
│ ├── gemini-review.sh
│ └── run-with-health-check.sh
├── smithery.yaml
├── src
│ ├── config
│ │ └── ConfigurationManager.ts
│ ├── createServer.ts
│ ├── index.ts
│ ├── resources
│ │ └── system-prompt.md
│ ├── server.ts
│ ├── services
│ │ ├── ExampleService.ts
│ │ ├── gemini
│ │ │ ├── GeminiCacheService.ts
│ │ │ ├── GeminiChatService.ts
│ │ │ ├── GeminiContentService.ts
│ │ │ ├── GeminiGitDiffService.ts
│ │ │ ├── GeminiPromptTemplates.ts
│ │ │ ├── GeminiTypes.ts
│ │ │ ├── GeminiUrlContextService.ts
│ │ │ ├── GeminiValidationSchemas.ts
│ │ │ ├── GitHubApiService.ts
│ │ │ ├── GitHubUrlParser.ts
│ │ │ └── ModelMigrationService.ts
│ │ ├── GeminiService.ts
│ │ ├── index.ts
│ │ ├── mcp
│ │ │ ├── index.ts
│ │ │ └── McpClientService.ts
│ │ ├── ModelSelectionService.ts
│ │ ├── session
│ │ │ ├── index.ts
│ │ │ ├── InMemorySessionStore.ts
│ │ │ ├── SessionStore.ts
│ │ │ └── SQLiteSessionStore.ts
│ │ └── SessionService.ts
│ ├── tools
│ │ ├── exampleToolParams.ts
│ │ ├── geminiCacheParams.ts
│ │ ├── geminiCacheTool.ts
│ │ ├── geminiChatParams.ts
│ │ ├── geminiChatTool.ts
│ │ ├── geminiCodeReviewParams.ts
│ │ ├── geminiCodeReviewTool.ts
│ │ ├── geminiGenerateContentConsolidatedParams.ts
│ │ ├── geminiGenerateContentConsolidatedTool.ts
│ │ ├── geminiGenerateImageParams.ts
│ │ ├── geminiGenerateImageTool.ts
│ │ ├── geminiGenericParamSchemas.ts
│ │ ├── geminiRouteMessageParams.ts
│ │ ├── geminiRouteMessageTool.ts
│ │ ├── geminiUrlAnalysisTool.ts
│ │ ├── index.ts
│ │ ├── mcpClientParams.ts
│ │ ├── mcpClientTool.ts
│ │ ├── registration
│ │ │ ├── index.ts
│ │ │ ├── registerAllTools.ts
│ │ │ ├── ToolAdapter.ts
│ │ │ └── ToolRegistry.ts
│ │ ├── schemas
│ │ │ ├── BaseToolSchema.ts
│ │ │ ├── CommonSchemas.ts
│ │ │ ├── index.ts
│ │ │ ├── ToolSchemas.ts
│ │ │ └── writeToFileParams.ts
│ │ └── writeToFileTool.ts
│ ├── types
│ │ ├── exampleServiceTypes.ts
│ │ ├── geminiServiceTypes.ts
│ │ ├── gitdiff-parser.d.ts
│ │ ├── googleGenAI.d.ts
│ │ ├── googleGenAITypes.ts
│ │ ├── index.ts
│ │ ├── micromatch.d.ts
│ │ ├── modelcontextprotocol-sdk.d.ts
│ │ ├── node-fetch.d.ts
│ │ └── serverTypes.ts
│ └── utils
│ ├── errors.ts
│ ├── filePathSecurity.ts
│ ├── FileSecurityService.ts
│ ├── geminiErrors.ts
│ ├── healthCheck.ts
│ ├── index.ts
│ ├── logger.ts
│ ├── RetryService.ts
│ ├── ToolError.ts
│ └── UrlSecurityService.ts
├── tests
│ ├── .env.test.example
│ ├── basic-router.test.vitest.ts
│ ├── e2e
│ │ ├── clients
│ │ │ └── mcp-test-client.ts
│ │ ├── README.md
│ │ └── streamableHttpTransport.test.vitest.ts
│ ├── integration
│ │ ├── dummyMcpServerSse.ts
│ │ ├── dummyMcpServerStdio.ts
│ │ ├── geminiRouterIntegration.test.vitest.ts
│ │ ├── mcpClientIntegration.test.vitest.ts
│ │ ├── multiModelIntegration.test.vitest.ts
│ │ └── urlContextIntegration.test.vitest.ts
│ ├── tsconfig.test.json
│ ├── unit
│ │ ├── config
│ │ │ └── ConfigurationManager.multimodel.test.vitest.ts
│ │ ├── server
│ │ │ └── transportLogic.test.vitest.ts
│ │ ├── services
│ │ │ ├── gemini
│ │ │ │ ├── GeminiChatService.test.vitest.ts
│ │ │ │ ├── GeminiGitDiffService.test.vitest.ts
│ │ │ │ ├── geminiImageGeneration.test.vitest.ts
│ │ │ │ ├── GeminiPromptTemplates.test.vitest.ts
│ │ │ │ ├── GeminiUrlContextService.test.vitest.ts
│ │ │ │ ├── GeminiValidationSchemas.test.vitest.ts
│ │ │ │ ├── GitHubApiService.test.vitest.ts
│ │ │ │ ├── GitHubUrlParser.test.vitest.ts
│ │ │ │ └── ThinkingBudget.test.vitest.ts
│ │ │ ├── mcp
│ │ │ │ └── McpClientService.test.vitest.ts
│ │ │ ├── ModelSelectionService.test.vitest.ts
│ │ │ └── session
│ │ │ └── SQLiteSessionStore.test.vitest.ts
│ │ ├── tools
│ │ │ ├── geminiCacheTool.test.vitest.ts
│ │ │ ├── geminiChatTool.test.vitest.ts
│ │ │ ├── geminiCodeReviewTool.test.vitest.ts
│ │ │ ├── geminiGenerateContentConsolidatedTool.test.vitest.ts
│ │ │ ├── geminiGenerateImageTool.test.vitest.ts
│ │ │ ├── geminiRouteMessageTool.test.vitest.ts
│ │ │ ├── mcpClientTool.test.vitest.ts
│ │ │ ├── mcpToolsTests.test.vitest.ts
│ │ │ └── schemas
│ │ │ ├── BaseToolSchema.test.vitest.ts
│ │ │ ├── ToolParamSchemas.test.vitest.ts
│ │ │ └── ToolSchemas.test.vitest.ts
│ │ └── utils
│ │ ├── errors.test.vitest.ts
│ │ ├── FileSecurityService.test.vitest.ts
│ │ ├── FileSecurityService.vitest.ts
│ │ ├── FileSecurityServiceBasics.test.vitest.ts
│ │ ├── healthCheck.test.vitest.ts
│ │ ├── RetryService.test.vitest.ts
│ │ └── UrlSecurityService.test.vitest.ts
│ └── utils
│ ├── assertions.ts
│ ├── debug-error.ts
│ ├── env-check.ts
│ ├── environment.ts
│ ├── error-helpers.ts
│ ├── express-mocks.ts
│ ├── integration-types.ts
│ ├── mock-types.ts
│ ├── test-fixtures.ts
│ ├── test-generators.ts
│ ├── test-setup.ts
│ └── vitest.d.ts
├── tsconfig.json
├── tsconfig.test.json
├── vitest-globals.d.ts
├── vitest.config.ts
└── vitest.setup.ts
```
# Files
--------------------------------------------------------------------------------
/tests/unit/utils/FileSecurityService.vitest.ts:
--------------------------------------------------------------------------------
```typescript
1 | // Using vitest globals - see vitest.config.ts globals: true
2 | import * as path from "path";
3 | import * as fs from "fs/promises";
4 | import * as fsSync from "fs";
5 |
6 | // Import the code to test
7 | import { FileSecurityService } from "../../../src/utils/FileSecurityService.js";
8 | import { ValidationError } from "../../../src/utils/errors.js";
9 | import { logger } from "../../../src/utils/logger.js";
10 |
11 | describe("FileSecurityService", () => {
12 | // Mock logger
13 | const loggerMock = {
14 | info: vi.fn(),
15 | warn: vi.fn(),
16 | error: vi.fn(),
17 | debug: vi.fn(),
18 | };
19 |
20 | // Define test constants for all tests
21 | const TEST_CONTENT = "Test file content";
22 | const TEST_DIR = path.resolve("./test-security-dir");
23 | const OUTSIDE_DIR = path.resolve("./outside-security-dir");
24 |
25 | // Setup before each test
26 | beforeEach(() => {
27 | // Reset mocks and create test directories
28 | vi.clearAllMocks();
29 |
30 | // Replace logger with mock
31 | vi.spyOn(logger, "info").mockImplementation(loggerMock.info);
32 | vi.spyOn(logger, "warn").mockImplementation(loggerMock.warn);
33 | vi.spyOn(logger, "error").mockImplementation(loggerMock.error);
34 | vi.spyOn(logger, "debug").mockImplementation(loggerMock.debug);
35 |
36 | // Create test directories
37 | fsSync.mkdirSync(TEST_DIR, { recursive: true });
38 | fsSync.mkdirSync(OUTSIDE_DIR, { recursive: true });
39 | });
40 |
41 | // Cleanup after each test
42 | afterEach(() => {
43 | // Restore original logger
44 | vi.restoreAllMocks();
45 |
46 | // Clean up test directories
47 | try {
48 | fsSync.rmSync(TEST_DIR, { recursive: true, force: true });
49 | fsSync.rmSync(OUTSIDE_DIR, { recursive: true, force: true });
50 | } catch (err) {
51 | // Ignore cleanup errors
52 | }
53 | });
54 |
55 | describe("Constructor and Configuration", () => {
56 | it("should initialize with default allowed directories", () => {
57 | const service = new FileSecurityService();
58 | const allowedDirs = service.getAllowedDirectories();
59 |
60 | expect(allowedDirs.length).toBeGreaterThan(0);
61 | expect(allowedDirs).toContain(path.resolve(process.cwd()));
62 | });
63 |
64 | it("should initialize with custom allowed directories", () => {
65 | const customDirs = [TEST_DIR, OUTSIDE_DIR];
66 | const service = new FileSecurityService(customDirs);
67 | const allowedDirs = service.getAllowedDirectories();
68 |
69 | expect(allowedDirs.length).toBe(2);
70 | expect(allowedDirs).toContain(path.resolve(TEST_DIR));
71 | expect(allowedDirs).toContain(path.resolve(OUTSIDE_DIR));
72 | });
73 |
74 | it("should initialize with a secure base path", () => {
75 | const service = new FileSecurityService([], TEST_DIR);
76 | const basePath = service.getSecureBasePath();
77 |
78 | expect(basePath).toBe(path.normalize(TEST_DIR));
79 |
80 | // Verify allowed directories includes the base path
81 | const allowedDirs = service.getAllowedDirectories();
82 | expect(allowedDirs).toContain(path.normalize(TEST_DIR));
83 | });
84 |
85 | it("should set allowed directories", () => {
86 | const service = new FileSecurityService();
87 | const newDirs = [TEST_DIR, OUTSIDE_DIR];
88 |
89 | service.setAllowedDirectories(newDirs);
90 | const allowedDirs = service.getAllowedDirectories();
91 |
92 | expect(allowedDirs.length).toBe(2);
93 | expect(allowedDirs).toContain(path.normalize(TEST_DIR));
94 | expect(allowedDirs).toContain(path.normalize(OUTSIDE_DIR));
95 | });
96 |
97 | it("should throw error when setting empty allowed directories", () => {
98 | const service = new FileSecurityService();
99 |
100 | expect(() => service.setAllowedDirectories([])).toThrow(ValidationError);
101 | expect(() => service.setAllowedDirectories([])).toThrow(
102 | /At least one allowed directory/
103 | );
104 | });
105 |
106 | it("should throw error when setting non-absolute allowed directories", () => {
107 | const service = new FileSecurityService();
108 |
109 | expect(() => service.setAllowedDirectories(["./relative/path"])).toThrow(
110 | ValidationError
111 | );
112 | expect(() => service.setAllowedDirectories(["./relative/path"])).toThrow(
113 | /Directory path must be absolute/
114 | );
115 | });
116 |
117 | it("should set and get secure base path", () => {
118 | const service = new FileSecurityService();
119 | service.setSecureBasePath(TEST_DIR);
120 |
121 | const basePath = service.getSecureBasePath();
122 | expect(basePath).toBe(path.normalize(TEST_DIR));
123 | });
124 |
125 | it("should throw error when setting non-absolute secure base path", () => {
126 | const service = new FileSecurityService();
127 |
128 | expect(() => service.setSecureBasePath("./relative/path")).toThrow(
129 | ValidationError
130 | );
131 | expect(() => service.setSecureBasePath("./relative/path")).toThrow(
132 | /Base path must be absolute/
133 | );
134 | });
135 |
136 | it("should configure from environment", () => {
137 | // Save original env var
138 | const originalEnvVar = process.env.GEMINI_SAFE_FILE_BASE_DIR;
139 |
140 | // Set env var for test
141 | process.env.GEMINI_SAFE_FILE_BASE_DIR = TEST_DIR;
142 |
143 | const service = FileSecurityService.configureFromEnvironment();
144 | const allowedDirs = service.getAllowedDirectories();
145 |
146 | expect(allowedDirs).toContain(path.normalize(TEST_DIR));
147 |
148 | // Restore original env var
149 | if (originalEnvVar) {
150 | process.env.GEMINI_SAFE_FILE_BASE_DIR = originalEnvVar;
151 | } else {
152 | delete process.env.GEMINI_SAFE_FILE_BASE_DIR;
153 | }
154 | });
155 | });
156 |
157 | describe("Path Validation", () => {
158 | let service: FileSecurityService;
159 |
160 | beforeEach(() => {
161 | service = new FileSecurityService([TEST_DIR]);
162 | });
163 |
164 | it("should validate path within allowed directory", () => {
165 | const testFilePath = path.join(TEST_DIR, "test-file.txt");
166 | const validatedPath = service.validateAndResolvePath(testFilePath);
167 |
168 | expect(validatedPath).toBe(path.normalize(testFilePath));
169 | });
170 |
171 | it("should validate paths with relative components", () => {
172 | const complexPath = path.join(
173 | TEST_DIR,
174 | ".",
175 | "subdir",
176 | "..",
177 | "test-file.txt"
178 | );
179 | const validatedPath = service.validateAndResolvePath(complexPath);
180 |
181 | // Should normalize to TEST_DIR/test-file.txt
182 | const expectedPath = path.normalize(path.join(TEST_DIR, "test-file.txt"));
183 | expect(validatedPath).toBe(expectedPath);
184 | });
185 |
186 | it("should reject paths outside allowed directories", () => {
187 | const outsidePath = path.join(OUTSIDE_DIR, "test-file.txt");
188 |
189 | expect(() => service.validateAndResolvePath(outsidePath)).toThrow(
190 | ValidationError
191 | );
192 | expect(() => service.validateAndResolvePath(outsidePath)).toThrow(
193 | /Access denied/
194 | );
195 | });
196 |
197 | it("should reject paths with directory traversal", () => {
198 | const traversalPath = path.join(
199 | TEST_DIR,
200 | "..",
201 | "outside",
202 | "test-file.txt"
203 | );
204 |
205 | expect(() => service.validateAndResolvePath(traversalPath)).toThrow(
206 | ValidationError
207 | );
208 | expect(() => service.validateAndResolvePath(traversalPath)).toThrow(
209 | /Access denied/
210 | );
211 | });
212 |
213 | it("should check file existence with mustExist option", () => {
214 | const nonExistentPath = path.join(TEST_DIR, "non-existent.txt");
215 |
216 | expect(() =>
217 | service.validateAndResolvePath(nonExistentPath, { mustExist: true })
218 | ).toThrow(ValidationError);
219 | expect(() =>
220 | service.validateAndResolvePath(nonExistentPath, { mustExist: true })
221 | ).toThrow(/File not found/);
222 | });
223 |
224 | it("should use custom allowed directories when provided", () => {
225 | // Path is outside the service's configured directory but inside custom allowed dir
226 | const customAllowedPath = path.join(OUTSIDE_DIR, "custom-allowed.txt");
227 |
228 | const validatedPath = service.validateAndResolvePath(customAllowedPath, {
229 | allowedDirs: [OUTSIDE_DIR],
230 | });
231 |
232 | expect(validatedPath).toBe(path.normalize(customAllowedPath));
233 | });
234 | });
235 |
236 | describe("isPathWithinAllowedDirs", () => {
237 | let service: FileSecurityService;
238 |
239 | beforeEach(() => {
240 | service = new FileSecurityService([TEST_DIR]);
241 | });
242 |
243 | it("should return true for paths within allowed directories", () => {
244 | const insidePath = path.join(TEST_DIR, "test-file.txt");
245 | const result = service.isPathWithinAllowedDirs(insidePath);
246 |
247 | expect(result).toBe(true);
248 | });
249 |
250 | it("should return true for exact match with allowed directory", () => {
251 | const result = service.isPathWithinAllowedDirs(TEST_DIR);
252 |
253 | expect(result).toBe(true);
254 | });
255 |
256 | it("should return false for paths outside allowed directories", () => {
257 | const outsidePath = path.join(OUTSIDE_DIR, "test-file.txt");
258 | const result = service.isPathWithinAllowedDirs(outsidePath);
259 |
260 | expect(result).toBe(false);
261 | });
262 |
263 | it("should return false for paths with directory traversal", () => {
264 | const traversalPath = path.join(
265 | TEST_DIR,
266 | "..",
267 | "outside",
268 | "test-file.txt"
269 | );
270 | const result = service.isPathWithinAllowedDirs(traversalPath);
271 |
272 | expect(result).toBe(false);
273 | });
274 |
275 | it("should use custom allowed directories when provided", () => {
276 | const outsidePath = path.join(OUTSIDE_DIR, "test-file.txt");
277 |
278 | // Should be false with default allowed dirs
279 | expect(service.isPathWithinAllowedDirs(outsidePath)).toBe(false);
280 |
281 | // Should be true with custom allowed dirs
282 | expect(service.isPathWithinAllowedDirs(outsidePath, [OUTSIDE_DIR])).toBe(
283 | true
284 | );
285 | });
286 |
287 | it("should return false when no allowed directories exist", () => {
288 | const result = service.isPathWithinAllowedDirs(TEST_DIR, []);
289 |
290 | expect(result).toBe(false);
291 | });
292 | });
293 |
294 | describe("fullyResolvePath", () => {
295 | let service: FileSecurityService;
296 |
297 | beforeEach(() => {
298 | service = new FileSecurityService([TEST_DIR, OUTSIDE_DIR]);
299 | });
300 |
301 | it("should resolve a normal file path", async () => {
302 | const testPath = path.join(TEST_DIR, "test-file.txt");
303 | const resolvedPath = await service.fullyResolvePath(testPath);
304 |
305 | expect(resolvedPath).toBe(path.normalize(testPath));
306 | });
307 |
308 | it("should handle non-existent paths", async () => {
309 | const nonExistentPath = path.join(
310 | TEST_DIR,
311 | "non-existent",
312 | "test-file.txt"
313 | );
314 | const resolvedPath = await service.fullyResolvePath(nonExistentPath);
315 |
316 | expect(resolvedPath).toBe(path.normalize(nonExistentPath));
317 | });
318 |
319 | it("should resolve and validate a symlink to a file", async () => {
320 | // Create target file
321 | const targetPath = path.join(TEST_DIR, "target.txt");
322 | await fs.writeFile(targetPath, TEST_CONTENT, "utf8");
323 |
324 | // Create symlink
325 | const symlinkPath = path.join(TEST_DIR, "symlink.txt");
326 | await fs.symlink(targetPath, symlinkPath);
327 |
328 | // Resolve the symlink
329 | const resolvedPath = await service.fullyResolvePath(symlinkPath);
330 |
331 | // Should resolve to the target path
332 | expect(resolvedPath).toBe(path.normalize(targetPath));
333 | });
334 |
335 | it("should reject symlinks pointing outside allowed directories", async () => {
336 | // Create target file in outside (non-allowed) directory
337 | const targetPath = path.join(OUTSIDE_DIR, "target.txt");
338 | await fs.writeFile(targetPath, TEST_CONTENT, "utf8");
339 |
340 | // Create symlink in test (allowed) directory pointing to outside
341 | const symlinkPath = path.join(TEST_DIR, "bad-symlink.txt");
342 |
343 | // Setup service with only TEST_DIR allowed (not OUTSIDE_DIR)
344 | const restrictedService = new FileSecurityService([TEST_DIR]);
345 |
346 | await fs.symlink(targetPath, symlinkPath);
347 |
348 | // Try to resolve the symlink
349 | await expect(
350 | restrictedService.fullyResolvePath(symlinkPath)
351 | ).rejects.toThrow(ValidationError);
352 | await expect(
353 | restrictedService.fullyResolvePath(symlinkPath)
354 | ).rejects.toThrow(/Security error/);
355 | await expect(
356 | restrictedService.fullyResolvePath(symlinkPath)
357 | ).rejects.toThrow(/outside allowed directories/);
358 | });
359 |
360 | it("should detect and validate symlinked parent directories", async () => {
361 | // Create target directory in allowed location
362 | const targetDir = path.join(TEST_DIR, "target-dir");
363 | await fs.mkdir(targetDir, { recursive: true });
364 |
365 | // Create symlink to directory
366 | const symlinkDir = path.join(TEST_DIR, "symlink-dir");
367 | await fs.symlink(targetDir, symlinkDir);
368 |
369 | // Create a file path inside the symlinked directory
370 | const filePath = path.join(symlinkDir, "test-file.txt");
371 |
372 | // Resolve the path
373 | const resolvedPath = await service.fullyResolvePath(filePath);
374 |
375 | // Should resolve to actual path in target directory
376 | const expectedPath = path.join(targetDir, "test-file.txt");
377 | expect(resolvedPath).toBe(path.normalize(expectedPath));
378 | });
379 |
380 | it("should reject symlinked parent directories pointing outside allowed directories", async () => {
381 | // Create target directory in outside (not allowed) directory
382 | const targetDir = path.join(OUTSIDE_DIR, "target-dir");
383 | await fs.mkdir(targetDir, { recursive: true });
384 |
385 | // Create symlink in test directory pointing to outside directory
386 | const symlinkDir = path.join(TEST_DIR, "bad-symlink-dir");
387 | await fs.symlink(targetDir, symlinkDir);
388 |
389 | // Create a file path inside the symlinked directory
390 | const filePath = path.join(symlinkDir, "test-file.txt");
391 |
392 | // Setup service with only TEST_DIR allowed
393 | const restrictedService = new FileSecurityService([TEST_DIR]);
394 |
395 | // Try to resolve the path
396 | await expect(
397 | restrictedService.fullyResolvePath(filePath)
398 | ).rejects.toThrow(ValidationError);
399 | await expect(
400 | restrictedService.fullyResolvePath(filePath)
401 | ).rejects.toThrow(/Security error/);
402 | });
403 | });
404 |
405 | describe("secureWriteFile", () => {
406 | let service: FileSecurityService;
407 |
408 | beforeEach(() => {
409 | service = new FileSecurityService([TEST_DIR]);
410 | });
411 |
412 | it("should write file to an allowed directory", async () => {
413 | const filePath = path.join(TEST_DIR, "test-file.txt");
414 |
415 | await service.secureWriteFile(filePath, TEST_CONTENT);
416 |
417 | // Verify file was written
418 | const content = await fs.readFile(filePath, "utf8");
419 | expect(content).toBe(TEST_CONTENT);
420 | });
421 |
422 | it("should create directories if they don't exist", async () => {
423 | const nestedFilePath = path.join(
424 | TEST_DIR,
425 | "nested",
426 | "deep",
427 | "test-file.txt"
428 | );
429 |
430 | await service.secureWriteFile(nestedFilePath, TEST_CONTENT);
431 |
432 | // Verify directories were created and file exists
433 | const content = await fs.readFile(nestedFilePath, "utf8");
434 | expect(content).toBe(TEST_CONTENT);
435 | });
436 |
437 | it("should reject writing outside allowed directories", async () => {
438 | const outsidePath = path.join(OUTSIDE_DIR, "test-file.txt");
439 |
440 | await expect(
441 | service.secureWriteFile(outsidePath, TEST_CONTENT)
442 | ).rejects.toThrow(ValidationError);
443 | await expect(
444 | service.secureWriteFile(outsidePath, TEST_CONTENT)
445 | ).rejects.toThrow(/Access denied/);
446 |
447 | // Verify file was not created
448 | await expect(fs.access(outsidePath)).rejects.toThrow();
449 | });
450 |
451 | it("should reject overwriting existing files by default", async () => {
452 | const filePath = path.join(TEST_DIR, "existing-file.txt");
453 |
454 | // Create the file first
455 | await fs.writeFile(filePath, "Original content", "utf8");
456 |
457 | // Try to overwrite without setting overwrite flag
458 | await expect(
459 | service.secureWriteFile(filePath, TEST_CONTENT)
460 | ).rejects.toThrow(ValidationError);
461 | await expect(
462 | service.secureWriteFile(filePath, TEST_CONTENT)
463 | ).rejects.toThrow(/File already exists/);
464 |
465 | // Verify file wasn't changed
466 | const content = await fs.readFile(filePath, "utf8");
467 | expect(content).toBe("Original content");
468 | });
469 |
470 | it("should allow overwriting existing files with overwrite flag", async () => {
471 | const filePath = path.join(TEST_DIR, "existing-file.txt");
472 |
473 | // Create the file first
474 | await fs.writeFile(filePath, "Original content", "utf8");
475 |
476 | // Overwrite with overwrite flag
477 | await service.secureWriteFile(filePath, TEST_CONTENT, {
478 | overwrite: true,
479 | });
480 |
481 | // Verify file was overwritten
482 | const content = await fs.readFile(filePath, "utf8");
483 | expect(content).toBe(TEST_CONTENT);
484 | });
485 |
486 | it("should support custom allowed directories", async () => {
487 | // Path is outside the service's configured directories
488 | const customAllowedPath = path.join(OUTSIDE_DIR, "custom-allowed.txt");
489 |
490 | // Use explicit allowedDirs
491 | await service.secureWriteFile(customAllowedPath, TEST_CONTENT, {
492 | allowedDirs: [OUTSIDE_DIR],
493 | });
494 |
495 | // Verify file was written
496 | const content = await fs.readFile(customAllowedPath, "utf8");
497 | expect(content).toBe(TEST_CONTENT);
498 | });
499 | });
500 | });
501 |
```
--------------------------------------------------------------------------------
/src/utils/FileSecurityService.ts:
--------------------------------------------------------------------------------
```typescript
1 | import * as fs from "fs/promises";
2 | import * as fsSync from "fs";
3 | import * as path from "path";
4 | import { logger } from "./logger.js";
5 | import { ValidationError } from "./errors.js";
6 |
7 | /**
8 | * Type guard to check if an error is an ENOENT (file not found) error
9 | * @param err - The error to check
10 | * @returns True if the error is an ENOENT error
11 | */
12 | function isENOENTError(err: unknown): boolean {
13 | return (
14 | err !== null &&
15 | typeof err === "object" &&
16 | "code" in err &&
17 | err.code === "ENOENT"
18 | );
19 | }
20 |
21 | /**
22 | * Type guard to check if an error has a message property
23 | * @param err - The error to check
24 | * @returns True if the error has a message property
25 | */
26 | function hasErrorMessage(err: unknown): err is { message: string } {
27 | return (
28 | err !== null &&
29 | typeof err === "object" &&
30 | "message" in err &&
31 | typeof err.message === "string"
32 | );
33 | }
34 |
35 | /**
36 | * Centralized service for handling file-related security operations
37 | * Provides comprehensive validation, resolution, and secure file operations
38 | */
39 | export class FileSecurityService {
40 | private allowedDirectories: string[] = [];
41 | private secureBasePath?: string;
42 |
43 | // Default safe base directory - using the project root as the default
44 | private readonly DEFAULT_SAFE_BASE_DIR: string =
45 | process.env.GEMINI_SAFE_FILE_BASE_DIR || path.resolve(process.cwd());
46 |
47 | /**
48 | * Creates a new instance of the FileSecurityService
49 | * @param allowedDirectories Optional array of allowed directories for file operations
50 | * @param secureBasePath Optional single secure base path (takes precedence over env vars)
51 | */
52 | constructor(allowedDirectories?: string[], secureBasePath?: string) {
53 | // Initialize with environment variable if set
54 | this.secureBasePath =
55 | process.env.GEMINI_SAFE_FILE_BASE_DIR ||
56 | (secureBasePath ? path.normalize(secureBasePath) : undefined);
57 |
58 | // Initialize allowed directories
59 | if (allowedDirectories && allowedDirectories.length > 0) {
60 | this.setAllowedDirectories(allowedDirectories);
61 | } else if (this.secureBasePath) {
62 | this.allowedDirectories = [this.secureBasePath];
63 | } else {
64 | this.allowedDirectories = [path.resolve(process.cwd())];
65 | }
66 |
67 | logger.info(
68 | `File operations restricted to: ${this.allowedDirectories.join(", ")}`
69 | );
70 | }
71 |
72 | /**
73 | * Sets the secure base directory for file operations.
74 | * @param basePath The absolute path to restrict file operations to
75 | */
76 | public setSecureBasePath(basePath: string): void {
77 | if (!path.isAbsolute(basePath)) {
78 | throw new ValidationError("Base path must be absolute");
79 | }
80 |
81 | // Store the base path in a private field
82 | this.secureBasePath = path.normalize(basePath);
83 |
84 | // Update allowed directories to include this path
85 | if (!this.allowedDirectories.includes(this.secureBasePath)) {
86 | this.allowedDirectories.push(this.secureBasePath);
87 | }
88 |
89 | logger.debug(`Secure base path set to: ${this.secureBasePath}`);
90 | }
91 |
92 | /**
93 | * Gets the current secure base directory if set
94 | */
95 | public getSecureBasePath(): string | undefined {
96 | return this.secureBasePath;
97 | }
98 |
99 | /**
100 | * Sets the allowed directories for file operations
101 | * @param directories Array of absolute paths allowed for file operations
102 | */
103 | public setAllowedDirectories(directories: string[]): void {
104 | if (!directories || directories.length === 0) {
105 | throw new ValidationError(
106 | "At least one allowed directory must be provided"
107 | );
108 | }
109 |
110 | // Validate all directories are absolute paths
111 | for (const dir of directories) {
112 | if (!path.isAbsolute(dir)) {
113 | throw new ValidationError(`Directory path must be absolute: ${dir}`);
114 | }
115 | }
116 |
117 | // Store normalized paths
118 | this.allowedDirectories = directories.map((dir) => path.normalize(dir));
119 | logger.debug(
120 | `Allowed directories set to: ${this.allowedDirectories.join(", ")}`
121 | );
122 | }
123 |
124 | /**
125 | * Gets the current allowed directories
126 | */
127 | public getAllowedDirectories(): string[] {
128 | return [...this.allowedDirectories];
129 | }
130 |
131 | /**
132 | * Validates that a file path is secure and resolves it to an absolute path
133 | * Can work with either a base directory or multiple allowed directories
134 | *
135 | * @param filePath The file path to validate
136 | * @param options Optional configuration
137 | * @returns The validated absolute file path
138 | * @throws ValidationError if the path is invalid or insecure
139 | */
140 | public validateAndResolvePath(
141 | filePath: string,
142 | options: {
143 | mustExist?: boolean;
144 | allowedDirs?: string[];
145 | basePath?: string;
146 | } = {}
147 | ): string {
148 | const { mustExist = false, allowedDirs, basePath } = options;
149 |
150 | // Determine which allowed directories to use
151 | const effectiveAllowedDirs =
152 | allowedDirs ||
153 | (basePath ? [path.normalize(basePath)] : this.allowedDirectories);
154 |
155 | logger.debug(`Validating file path: ${filePath}`);
156 | logger.debug(
157 | `Using allowed directories: ${effectiveAllowedDirs.join(", ")}`
158 | );
159 |
160 | // Resolve the absolute path
161 | const absolutePath = path.isAbsolute(filePath)
162 | ? filePath
163 | : path.resolve(this.secureBasePath || process.cwd(), filePath);
164 |
165 | // Normalize path to handle . and .. segments
166 | const normalizedPath = path.normalize(absolutePath);
167 |
168 | // Check if the path is within any allowed directory
169 | if (!this.isPathWithinAllowedDirs(normalizedPath, effectiveAllowedDirs)) {
170 | logger.warn(
171 | `Access denied: Path not in allowed directories: ${filePath}`
172 | );
173 | throw new ValidationError(
174 | `Access denied: The file path must be within the allowed directories`
175 | );
176 | }
177 |
178 | // Check if the file exists (if required)
179 | if (mustExist) {
180 | try {
181 | fsSync.accessSync(normalizedPath, fsSync.constants.F_OK);
182 | } catch (error) {
183 | logger.warn(`File not found: ${normalizedPath}`);
184 | throw new ValidationError(`File not found: ${normalizedPath}`);
185 | }
186 | }
187 |
188 | logger.debug(`Validated path: ${normalizedPath}`);
189 | return normalizedPath;
190 | }
191 |
192 | /**
193 | * Checks if a given file path is within any of the allowed directories.
194 | *
195 | * @param filePath The relative or absolute path to check.
196 | * @param allowedDirs Optional array of allowed directory paths (defaults to instance's allowed directories)
197 | * @returns True if the file path is within any of the allowed directories, false otherwise.
198 | */
199 | public isPathWithinAllowedDirs(
200 | filePath: string,
201 | allowedDirs?: string[]
202 | ): boolean {
203 | // Use instance's allowed directories if none provided
204 | const effectiveAllowedDirs = allowedDirs || this.allowedDirectories;
205 |
206 | // Return false if effectiveAllowedDirs is empty
207 | if (!effectiveAllowedDirs || effectiveAllowedDirs.length === 0) {
208 | return false;
209 | }
210 |
211 | // Canonicalize the file path to an absolute path
212 | const resolvedFilePath = path.resolve(filePath);
213 |
214 | // Normalize the path to handle sequences like '..'
215 | const normalizedFilePath = path.normalize(resolvedFilePath);
216 |
217 | // Check if the file path is within any of the allowed directories
218 | for (const allowedDir of effectiveAllowedDirs) {
219 | // Normalize the allowed directory path
220 | const normalizedAllowedDir = path.normalize(path.resolve(allowedDir));
221 |
222 | // Check if it's an allowed directory containing the file, or an exact match
223 | if (
224 | normalizedFilePath.startsWith(normalizedAllowedDir + path.sep) ||
225 | normalizedFilePath === normalizedAllowedDir
226 | ) {
227 | // Additional check: ensure no upward traversal after matching the prefix
228 | const relativePath = path.relative(
229 | normalizedAllowedDir,
230 | normalizedFilePath
231 | );
232 |
233 | if (!relativePath.startsWith("..") && !path.isAbsolute(relativePath)) {
234 | return true;
235 | }
236 | }
237 | }
238 |
239 | return false;
240 | }
241 |
242 | /**
243 | * Fully resolves a file path, handling symlinks and security checks
244 | *
245 | * @param filePath The file path to resolve
246 | * @returns The fully resolved file path
247 | * @throws ValidationError if the path contains insecure symlinks
248 | */
249 | public async fullyResolvePath(filePath: string): Promise<string> {
250 | const normalizedPath = path.normalize(path.resolve(filePath));
251 |
252 | try {
253 | // Check if the target file exists and is a symlink
254 | try {
255 | const stats = await fs.lstat(normalizedPath);
256 | if (stats.isSymbolicLink()) {
257 | logger.warn(`Path is a symlink: ${normalizedPath}`);
258 | const target = await fs.readlink(normalizedPath);
259 | const resolvedPath = path.resolve(
260 | path.dirname(normalizedPath),
261 | target
262 | );
263 |
264 | // Ensure the symlink target is within allowed directories
265 | if (!this.isPathWithinAllowedDirs(resolvedPath)) {
266 | throw new ValidationError(
267 | `Security error: Symlink target is outside allowed directories: ${resolvedPath}`
268 | );
269 | }
270 |
271 | return resolvedPath;
272 | }
273 | } catch (err) {
274 | // If file doesn't exist (ENOENT), that's fine in many cases
275 | if (!isENOENTError(err)) {
276 | throw err;
277 | }
278 | }
279 |
280 | // Also check parent directories to ensure we're not inside a symlinked directory
281 | let currentPath = path.dirname(normalizedPath);
282 | const root = path.parse(currentPath).root;
283 |
284 | // Track resolved parent paths
285 | const resolvedPaths = new Map<string, string>();
286 |
287 | while (currentPath !== root) {
288 | try {
289 | const dirStats = await fs.lstat(currentPath);
290 | if (dirStats.isSymbolicLink()) {
291 | // Resolve the symlink
292 | const linkTarget = await fs.readlink(currentPath);
293 | const resolvedPath = path.resolve(
294 | path.dirname(currentPath),
295 | linkTarget
296 | );
297 |
298 | logger.warn(
299 | `Parent directory is a symlink: ${currentPath} -> ${resolvedPath}`
300 | );
301 | resolvedPaths.set(currentPath, resolvedPath);
302 |
303 | // If this is the immediate parent, update the final path
304 | if (currentPath === path.dirname(normalizedPath)) {
305 | const updatedPath = path.join(
306 | resolvedPath,
307 | path.basename(normalizedPath)
308 | );
309 |
310 | // Ensure resolved path is still secure
311 | if (!this.isPathWithinAllowedDirs(updatedPath)) {
312 | throw new ValidationError(
313 | `Security error: Resolved symlink path is outside allowed directories: ${updatedPath}`
314 | );
315 | }
316 |
317 | return updatedPath;
318 | }
319 | }
320 | } catch (err) {
321 | if (!isENOENTError(err)) {
322 | throw err;
323 | }
324 | }
325 |
326 | currentPath = path.dirname(currentPath);
327 | }
328 |
329 | // If we found symlinks in parent directories, perform a final security check
330 | if (resolvedPaths.size > 0) {
331 | try {
332 | // Get fully resolved path including all symlinks
333 | const finalResolvedPath = await fs.realpath(normalizedPath);
334 |
335 | // Final security check with the fully resolved path
336 | if (!this.isPathWithinAllowedDirs(finalResolvedPath)) {
337 | throw new ValidationError(
338 | `Security error: Resolved path is outside allowed directories: ${finalResolvedPath}`
339 | );
340 | }
341 |
342 | return finalResolvedPath;
343 | } catch (err) {
344 | // Handle case where path doesn't exist yet
345 | if (isENOENTError(err)) {
346 | // Try to resolve just the directory part
347 | const resolvedDir = await fs
348 | .realpath(path.dirname(normalizedPath))
349 | .catch((dirErr) => {
350 | if (isENOENTError(dirErr)) {
351 | return path.dirname(normalizedPath);
352 | }
353 | throw dirErr;
354 | });
355 |
356 | const finalPath = path.join(
357 | resolvedDir,
358 | path.basename(normalizedPath)
359 | );
360 |
361 | // Final security check
362 | if (!this.isPathWithinAllowedDirs(finalPath)) {
363 | throw new ValidationError(
364 | `Security error: Resolved path is outside allowed directories: ${finalPath}`
365 | );
366 | }
367 |
368 | return finalPath;
369 | }
370 | throw err;
371 | }
372 | }
373 |
374 | // No symlinks found, return the normalized path
375 | return normalizedPath;
376 | } catch (err) {
377 | if (hasErrorMessage(err) && err.message.includes("Security error:")) {
378 | // Re-throw security errors
379 | throw err;
380 | }
381 | // For other errors, provide a clearer message
382 | const errorMsg = hasErrorMessage(err) ? err.message : String(err);
383 | logger.error(`Error resolving path: ${errorMsg}`, err);
384 | throw new ValidationError(`Error validating path security: ${errorMsg}`);
385 | }
386 | }
387 |
388 | /**
389 | * Securely writes content to a file, ensuring the path is within allowed directories.
390 | *
391 | * @param filePath The relative or absolute path to the file.
392 | * @param content The string content to write to the file.
393 | * @param options Optional configuration
394 | * @returns A promise that resolves when the file is written
395 | * @throws ValidationError if the path is invalid, outside allowed directories,
396 | * if the file exists and overwrite is false, or for any other security/file system error
397 | */
398 | public async secureWriteFile(
399 | filePath: string,
400 | content: string,
401 | options: {
402 | overwrite?: boolean;
403 | allowedDirs?: string[];
404 | } = {}
405 | ): Promise<void> {
406 | const { overwrite = false, allowedDirs } = options;
407 |
408 | // Use instance's allowed directories if none provided
409 | const effectiveAllowedDirs = allowedDirs || this.allowedDirectories;
410 |
411 | // 1. Initial validation against allowed directories
412 | const validatedPath = this.validateAndResolvePath(filePath, {
413 | allowedDirs: effectiveAllowedDirs,
414 | });
415 |
416 | // 2. Fully resolve the path handling symlinks and do final security check
417 | const finalFilePath = await this.fullyResolvePath(validatedPath);
418 |
419 | // 3. Check if file exists and overwrite flag is false
420 | if (!overwrite) {
421 | try {
422 | await fs.access(finalFilePath);
423 | // If we get here, the file exists
424 | logger.error(
425 | `File already exists and overwrite is false: ${finalFilePath}`
426 | );
427 | throw new ValidationError(
428 | `File already exists: ${filePath}. Set overwrite flag to true to replace it.`
429 | );
430 | } catch (err) {
431 | // File doesn't exist or other access error - this is expected for new files
432 | if (!isENOENTError(err)) {
433 | // If error is not "file doesn't exist", it's another access error
434 | logger.error(`Error checking file existence: ${finalFilePath}`, err);
435 | const errorMsg = hasErrorMessage(err) ? err.message : String(err);
436 | throw new ValidationError(`Error checking file access: ${errorMsg}`);
437 | }
438 | // If err.code === 'ENOENT', the file doesn't exist, which is fine for new files
439 | }
440 | }
441 |
442 | // 4. Create parent directories if they don't exist
443 | const dirPath = path.dirname(finalFilePath);
444 | try {
445 | await fs.mkdir(dirPath, { recursive: true });
446 | } catch (err) {
447 | logger.error(`Error creating directory ${dirPath}:`, err);
448 | const errorMsg = hasErrorMessage(err) ? err.message : String(err);
449 | throw new ValidationError(
450 | `Failed to create directory structure: ${errorMsg}`
451 | );
452 | }
453 |
454 | // 5. Write the file
455 | try {
456 | await fs.writeFile(finalFilePath, content, "utf8");
457 | logger.info(`Successfully wrote file to ${finalFilePath}`);
458 | } catch (err) {
459 | logger.error(`Error writing file ${finalFilePath}:`, err);
460 | const errorMsg = hasErrorMessage(err) ? err.message : String(err);
461 | throw new ValidationError(`Failed to write file: ${errorMsg}`);
462 | }
463 | }
464 |
465 | /**
466 | * Initializes file path security from environment variables
467 | * Call this during application startup
468 | */
469 | public static configureFromEnvironment(): FileSecurityService {
470 | const customBaseDir = process.env.GEMINI_SAFE_FILE_BASE_DIR;
471 | const service = new FileSecurityService();
472 |
473 | if (customBaseDir) {
474 | // Validate that the custom base directory exists
475 | try {
476 | fsSync.accessSync(customBaseDir, fsSync.constants.F_OK);
477 | logger.info(`File operations restricted to: ${customBaseDir}`);
478 | service.setAllowedDirectories([customBaseDir]);
479 | } catch (error) {
480 | logger.warn(
481 | `Configured GEMINI_SAFE_FILE_BASE_DIR does not exist: ${customBaseDir}`
482 | );
483 | logger.warn(`Falling back to default directory: ${process.cwd()}`);
484 | service.setAllowedDirectories([process.cwd()]);
485 | }
486 | } else {
487 | logger.info(
488 | `File operations restricted to current working directory: ${process.cwd()}`
489 | );
490 | service.setAllowedDirectories([process.cwd()]);
491 | }
492 |
493 | return service;
494 | }
495 | }
496 |
```
--------------------------------------------------------------------------------
/tests/unit/services/gemini/GeminiUrlContextService.test.vitest.ts:
--------------------------------------------------------------------------------
```typescript
1 | // Using vitest globals - see vitest.config.ts globals: true
2 | import { GeminiUrlContextService } from "../../../../src/services/gemini/GeminiUrlContextService.js";
3 | import { ConfigurationManager } from "../../../../src/config/ConfigurationManager.js";
4 | import { GeminiUrlFetchError } from "../../../../src/utils/geminiErrors.js";
5 |
6 | // Mock dependencies
7 | vi.mock("../../../../src/config/ConfigurationManager.js");
8 | vi.mock("../../../../src/utils/logger.js");
9 | vi.mock("../../../../src/utils/UrlSecurityService.js");
10 |
11 | // Mock fetch globally
12 | const mockFetch = vi.fn();
13 | global.fetch = mockFetch;
14 |
15 | interface MockConfigManager {
16 | getUrlContextConfig: ReturnType<typeof vi.fn>;
17 | }
18 |
19 | describe("GeminiUrlContextService", () => {
20 | let service: GeminiUrlContextService;
21 | let mockConfig: MockConfigManager;
22 |
23 | beforeEach(() => {
24 | // Reset all mocks
25 | vi.clearAllMocks();
26 |
27 | // Mock configuration
28 | mockConfig = {
29 | getUrlContextConfig: vi.fn().mockReturnValue({
30 | enabled: true,
31 | maxUrlsPerRequest: 20,
32 | defaultMaxContentKb: 100,
33 | defaultTimeoutMs: 10000,
34 | allowedDomains: ["*"],
35 | blocklistedDomains: [],
36 | convertToMarkdown: true,
37 | includeMetadata: true,
38 | enableCaching: true,
39 | cacheExpiryMinutes: 15,
40 | maxCacheSize: 1000,
41 | rateLimitPerDomainPerMinute: 10,
42 | userAgent: "MCP-Gemini-Server/1.0",
43 | }),
44 | };
45 |
46 | // Create service instance
47 | service = new GeminiUrlContextService(
48 | mockConfig as unknown as ConfigurationManager
49 | );
50 | });
51 |
52 | afterEach(() => {
53 | vi.resetAllMocks();
54 | });
55 |
56 | describe("fetchUrlContent", () => {
57 | it("should successfully fetch and process HTML content", async () => {
58 | const mockHtmlContent = `
59 | <!DOCTYPE html>
60 | <html>
61 | <head>
62 | <title>Test Page</title>
63 | <meta name="description" content="A test page">
64 | </head>
65 | <body>
66 | <h1>Main Heading</h1>
67 | <p>This is a test paragraph with <strong>bold text</strong>.</p>
68 | <ul>
69 | <li>Item 1</li>
70 | <li>Item 2</li>
71 | </ul>
72 | </body>
73 | </html>
74 | `;
75 |
76 | mockFetch.mockResolvedValueOnce({
77 | ok: true,
78 | status: 200,
79 | statusText: "OK",
80 | url: "https://example.com/test",
81 | headers: new Map([
82 | ["content-type", "text/html; charset=utf-8"],
83 | ["content-length", mockHtmlContent.length.toString()],
84 | ]),
85 | text: () => Promise.resolve(mockHtmlContent),
86 | });
87 |
88 | const result = await service.fetchUrlContent("https://example.com/test");
89 |
90 | expect(result).toBeDefined();
91 | expect(result.metadata.url).toBe("https://example.com/test");
92 | expect(result.metadata.statusCode).toBe(200);
93 | expect(result.metadata.title).toBe("Test Page");
94 | expect(result.metadata.description).toBe("A test page");
95 | expect(result.content).toContain("# Main Heading");
96 | expect(result.content).toContain("**bold text**");
97 | expect(result.content).toContain("- Item 1");
98 | });
99 |
100 | it("should handle fetch errors gracefully", async () => {
101 | mockFetch.mockRejectedValueOnce(new Error("Network error"));
102 |
103 | await expect(
104 | service.fetchUrlContent("https://example.com/error")
105 | ).rejects.toThrow(GeminiUrlFetchError);
106 | });
107 |
108 | it("should handle HTTP error responses", async () => {
109 | mockFetch.mockResolvedValueOnce({
110 | ok: false,
111 | status: 404,
112 | statusText: "Not Found",
113 | url: "https://example.com/notfound",
114 | headers: new Map(),
115 | text: () => Promise.resolve("Page not found"),
116 | });
117 |
118 | await expect(
119 | service.fetchUrlContent("https://example.com/notfound")
120 | ).rejects.toThrow(GeminiUrlFetchError);
121 | });
122 |
123 | it("should respect content size limits", async () => {
124 | const largeContent = "x".repeat(200 * 1024); // 200KB content
125 |
126 | mockFetch.mockResolvedValueOnce({
127 | ok: true,
128 | status: 200,
129 | statusText: "OK",
130 | url: "https://example.com/large",
131 | headers: new Map([
132 | ["content-type", "text/html"],
133 | ["content-length", largeContent.length.toString()],
134 | ]),
135 | text: () => Promise.resolve(largeContent),
136 | });
137 |
138 | const result = await service.fetchUrlContent(
139 | "https://example.com/large",
140 | {
141 | maxContentLength: 100 * 1024, // 100KB limit
142 | }
143 | );
144 |
145 | expect(result.metadata.truncated).toBe(true);
146 | expect(result.content.length).toBeLessThanOrEqual(100 * 1024);
147 | });
148 |
149 | it("should handle JSON content without conversion", async () => {
150 | const jsonContent = JSON.stringify({
151 | message: "Hello World",
152 | data: [1, 2, 3],
153 | });
154 |
155 | mockFetch.mockResolvedValueOnce({
156 | ok: true,
157 | status: 200,
158 | statusText: "OK",
159 | url: "https://api.example.com/data",
160 | headers: new Map([
161 | ["content-type", "application/json"],
162 | ["content-length", jsonContent.length.toString()],
163 | ]),
164 | text: () => Promise.resolve(jsonContent),
165 | });
166 |
167 | const result = await service.fetchUrlContent(
168 | "https://api.example.com/data",
169 | {
170 | convertToMarkdown: false,
171 | }
172 | );
173 |
174 | expect(result.content).toBe(jsonContent);
175 | expect(result.metadata.contentType).toBe("application/json");
176 | });
177 | });
178 |
179 | describe("processUrlsForContext", () => {
180 | it("should process multiple URLs successfully", async () => {
181 | const urls = ["https://example1.com", "https://example2.com"];
182 |
183 | const mockContent1 =
184 | "<html><head><title>Page 1</title></head><body><p>Content 1</p></body></html>";
185 | const mockContent2 =
186 | "<html><head><title>Page 2</title></head><body><p>Content 2</p></body></html>";
187 |
188 | mockFetch
189 | .mockResolvedValueOnce({
190 | ok: true,
191 | status: 200,
192 | statusText: "OK",
193 | url: urls[0],
194 | headers: new Map([["content-type", "text/html"]]),
195 | text: () => Promise.resolve(mockContent1),
196 | })
197 | .mockResolvedValueOnce({
198 | ok: true,
199 | status: 200,
200 | statusText: "OK",
201 | url: urls[1],
202 | headers: new Map([["content-type", "text/html"]]),
203 | text: () => Promise.resolve(mockContent2),
204 | });
205 |
206 | const result = await service.processUrlsForContext(urls);
207 |
208 | expect(result.contents).toHaveLength(2);
209 | expect(result.batchResult.summary.totalUrls).toBe(2);
210 | expect(result.batchResult.summary.successCount).toBe(2);
211 | expect(result.batchResult.summary.failureCount).toBe(0);
212 | expect(result.contents[0]).toBeDefined();
213 | expect(result.contents[0]!.parts).toBeDefined();
214 | expect(result.contents[0]!.parts![0]).toBeDefined();
215 | expect(result.contents[0]!.parts![0]!.text).toContain(
216 | "Content from https://example1.com"
217 | );
218 |
219 | expect(result.contents[1]).toBeDefined();
220 | expect(result.contents[1]!.parts).toBeDefined();
221 | expect(result.contents[1]!.parts![0]).toBeDefined();
222 | expect(result.contents[1]!.parts![0]!.text).toContain(
223 | "Content from https://example2.com"
224 | );
225 | });
226 |
227 | it("should handle mixed success and failure scenarios", async () => {
228 | const urls = [
229 | "https://example1.com",
230 | "https://failed.com",
231 | "https://example3.com",
232 | ];
233 |
234 | mockFetch
235 | .mockResolvedValueOnce({
236 | ok: true,
237 | status: 200,
238 | statusText: "OK",
239 | url: urls[0],
240 | headers: new Map([["content-type", "text/html"]]),
241 | text: () => Promise.resolve("<html><body>Content 1</body></html>"),
242 | })
243 | .mockRejectedValueOnce(new Error("Network error"))
244 | .mockResolvedValueOnce({
245 | ok: true,
246 | status: 200,
247 | statusText: "OK",
248 | url: urls[2],
249 | headers: new Map([["content-type", "text/html"]]),
250 | text: () => Promise.resolve("<html><body>Content 3</body></html>"),
251 | });
252 |
253 | const result = await service.processUrlsForContext(urls);
254 |
255 | expect(result.batchResult.summary.totalUrls).toBe(3);
256 | expect(result.batchResult.summary.successCount).toBe(2);
257 | expect(result.batchResult.summary.failureCount).toBe(1);
258 | expect(result.batchResult.failed).toHaveLength(1);
259 | expect(result.batchResult.failed[0].url).toBe("https://failed.com");
260 | });
261 |
262 | it("should reject if too many URLs provided", async () => {
263 | const urls = Array.from(
264 | { length: 25 },
265 | (_, i) => `https://example${i}.com`
266 | );
267 |
268 | await expect(service.processUrlsForContext(urls)).rejects.toThrow(
269 | "Too many URLs: 25. Maximum allowed: 20"
270 | );
271 | });
272 |
273 | it("should reject if no URLs provided", async () => {
274 | await expect(service.processUrlsForContext([])).rejects.toThrow(
275 | "No URLs provided for processing"
276 | );
277 | });
278 | });
279 |
280 | describe("HTML to Markdown conversion", () => {
281 | it("should convert headings correctly", async () => {
282 | const htmlContent = `
283 | <html>
284 | <body>
285 | <h1>Heading 1</h1>
286 | <h2>Heading 2</h2>
287 | <h3>Heading 3</h3>
288 | </body>
289 | </html>
290 | `;
291 |
292 | mockFetch.mockResolvedValueOnce({
293 | ok: true,
294 | status: 200,
295 | url: "https://example.com",
296 | headers: new Map([["content-type", "text/html"]]),
297 | text: () => Promise.resolve(htmlContent),
298 | });
299 |
300 | const result = await service.fetchUrlContent("https://example.com");
301 |
302 | expect(result.content).toContain("# Heading 1");
303 | expect(result.content).toContain("## Heading 2");
304 | expect(result.content).toContain("### Heading 3");
305 | });
306 |
307 | it("should convert lists correctly", async () => {
308 | const htmlContent = `
309 | <html>
310 | <body>
311 | <ul>
312 | <li>Unordered item 1</li>
313 | <li>Unordered item 2</li>
314 | </ul>
315 | <ol>
316 | <li>Ordered item 1</li>
317 | <li>Ordered item 2</li>
318 | </ol>
319 | </body>
320 | </html>
321 | `;
322 |
323 | mockFetch.mockResolvedValueOnce({
324 | ok: true,
325 | status: 200,
326 | url: "https://example.com",
327 | headers: new Map([["content-type", "text/html"]]),
328 | text: () => Promise.resolve(htmlContent),
329 | });
330 |
331 | const result = await service.fetchUrlContent("https://example.com");
332 |
333 | expect(result.content).toContain("- Unordered item 1");
334 | expect(result.content).toContain("- Unordered item 2");
335 | expect(result.content).toContain("1. Ordered item 1");
336 | expect(result.content).toContain("2. Ordered item 2");
337 | });
338 |
339 | it("should convert links correctly", async () => {
340 | const htmlContent = `
341 | <html>
342 | <body>
343 | <a href="https://example.com">Example Link</a>
344 | <a href="/relative/path">Relative Link</a>
345 | </body>
346 | </html>
347 | `;
348 |
349 | mockFetch.mockResolvedValueOnce({
350 | ok: true,
351 | status: 200,
352 | url: "https://example.com",
353 | headers: new Map([["content-type", "text/html"]]),
354 | text: () => Promise.resolve(htmlContent),
355 | });
356 |
357 | const result = await service.fetchUrlContent("https://example.com");
358 |
359 | expect(result.content).toContain("[Example Link](https://example.com)");
360 | expect(result.content).toContain("[Relative Link](/relative/path)");
361 | });
362 |
363 | it("should remove script and style tags", async () => {
364 | const htmlContent = `
365 | <html>
366 | <head>
367 | <style>body { color: red; }</style>
368 | </head>
369 | <body>
370 | <p>Visible content</p>
371 | <script>console.log('hidden');</script>
372 | <p>More visible content</p>
373 | </body>
374 | </html>
375 | `;
376 |
377 | mockFetch.mockResolvedValueOnce({
378 | ok: true,
379 | status: 200,
380 | url: "https://example.com",
381 | headers: new Map([["content-type", "text/html"]]),
382 | text: () => Promise.resolve(htmlContent),
383 | });
384 |
385 | const result = await service.fetchUrlContent("https://example.com");
386 |
387 | expect(result.content).toContain("Visible content");
388 | expect(result.content).toContain("More visible content");
389 | expect(result.content).not.toContain("color: red");
390 | expect(result.content).not.toContain("console.log");
391 | });
392 | });
393 |
394 | describe("Content metadata extraction", () => {
395 | it("should extract title and description from meta tags", async () => {
396 | const htmlContent = `
397 | <html>
398 | <head>
399 | <title>Test Page Title</title>
400 | <meta name="description" content="Test page description">
401 | <meta property="og:image" content="https://example.com/image.jpg">
402 | <link rel="canonical" href="https://example.com/canonical">
403 | </head>
404 | <body>
405 | <p>Content</p>
406 | </body>
407 | </html>
408 | `;
409 |
410 | mockFetch.mockResolvedValueOnce({
411 | ok: true,
412 | status: 200,
413 | url: "https://example.com",
414 | headers: new Map([["content-type", "text/html"]]),
415 | text: () => Promise.resolve(htmlContent),
416 | });
417 |
418 | const result = await service.fetchUrlContent("https://example.com");
419 |
420 | expect(result.metadata.title).toBe("Test Page Title");
421 | expect(result.metadata.description).toBe("Test page description");
422 | expect(result.metadata.ogImage).toBe("https://example.com/image.jpg");
423 | expect(result.metadata.canonicalUrl).toBe(
424 | "https://example.com/canonical"
425 | );
426 | });
427 |
428 | it("should handle HTML entities in metadata", async () => {
429 | const htmlContent = `
430 | <html>
431 | <head>
432 | <title>Title with & ampersand <tags></title>
433 | <meta name="description" content="Description with "quotes" and spaces">
434 | </head>
435 | <body>
436 | <p>Content</p>
437 | </body>
438 | </html>
439 | `;
440 |
441 | mockFetch.mockResolvedValueOnce({
442 | ok: true,
443 | status: 200,
444 | url: "https://example.com",
445 | headers: new Map([["content-type", "text/html"]]),
446 | text: () => Promise.resolve(htmlContent),
447 | });
448 |
449 | const result = await service.fetchUrlContent("https://example.com");
450 |
451 | expect(result.metadata.title).toBe("Title with & ampersand <tags>");
452 | expect(result.metadata.description).toBe(
453 | 'Description with "quotes" and spaces'
454 | );
455 | });
456 | });
457 |
458 | describe("Caching functionality", () => {
459 | it("should cache successful results", async () => {
460 | const htmlContent = "<html><body><p>Cached content</p></body></html>";
461 |
462 | mockFetch.mockResolvedValueOnce({
463 | ok: true,
464 | status: 200,
465 | url: "https://example.com",
466 | headers: new Map([["content-type", "text/html"]]),
467 | text: () => Promise.resolve(htmlContent),
468 | });
469 |
470 | // First call - should fetch from network
471 | const result1 = await service.fetchUrlContent("https://example.com");
472 | expect(mockFetch).toHaveBeenCalledTimes(1);
473 |
474 | // Second call - should return from cache
475 | const result2 = await service.fetchUrlContent("https://example.com");
476 | expect(mockFetch).toHaveBeenCalledTimes(1); // No additional fetch
477 |
478 | expect(result1.content).toBe(result2.content);
479 | expect(result1.metadata.url).toBe(result2.metadata.url);
480 | });
481 | });
482 |
483 | describe("Rate limiting", () => {
484 | it("should enforce rate limits per domain", async () => {
485 | const url = "https://example.com/page";
486 |
487 | // Mock multiple successful responses
488 | for (let i = 0; i < 15; i++) {
489 | mockFetch.mockResolvedValueOnce({
490 | ok: true,
491 | status: 200,
492 | url,
493 | headers: new Map([["content-type", "text/html"]]),
494 | text: () => Promise.resolve("<html><body>Content</body></html>"),
495 | });
496 | }
497 |
498 | // First 10 requests should succeed
499 | for (let i = 0; i < 10; i++) {
500 | await service.fetchUrlContent(`${url}?page=${i}`);
501 | }
502 |
503 | // 11th request should fail due to rate limiting
504 | await expect(service.fetchUrlContent(`${url}?page=11`)).rejects.toThrow(
505 | GeminiUrlFetchError
506 | );
507 | });
508 | });
509 |
510 | describe("Error handling", () => {
511 | it("should handle timeout errors", async () => {
512 | mockFetch.mockRejectedValueOnce(new Error("Request timeout"));
513 |
514 | await expect(
515 | service.fetchUrlContent("https://example.com/timeout")
516 | ).rejects.toThrow(GeminiUrlFetchError);
517 | });
518 |
519 | it("should handle unsupported content types", async () => {
520 | mockFetch.mockResolvedValueOnce({
521 | ok: true,
522 | status: 200,
523 | url: "https://example.com/binary",
524 | headers: new Map([["content-type", "application/octet-stream"]]),
525 | text: () => Promise.resolve("binary data"),
526 | });
527 |
528 | await expect(
529 | service.fetchUrlContent("https://example.com/binary")
530 | ).rejects.toThrow(GeminiUrlFetchError);
531 | });
532 | });
533 | });
534 |
```
--------------------------------------------------------------------------------
/src/utils/UrlSecurityService.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { ConfigurationManager } from "../config/ConfigurationManager.js";
2 | import { GeminiUrlValidationError } from "./geminiErrors.js";
3 | import { logger } from "./logger.js";
4 |
5 | export interface UrlValidationResult {
6 | valid: boolean;
7 | reason?: string;
8 | warnings?: string[];
9 | }
10 |
11 | export interface SecurityMetrics {
12 | validationAttempts: number;
13 | validationFailures: number;
14 | blockedDomains: Set<string>;
15 | suspiciousPatterns: string[];
16 | rateLimitViolations: number;
17 | }
18 |
19 | /**
20 | * Comprehensive URL Security Service for validating and securing URL access
21 | * Prevents access to malicious, private, or restricted URLs
22 | */
23 | export class UrlSecurityService {
24 | private readonly logger: typeof logger;
25 | private readonly securityMetrics: SecurityMetrics;
26 |
27 | // Known dangerous TLDs and patterns
28 | private readonly dangerousTlds = new Set([
29 | "tk",
30 | "ml",
31 | "ga",
32 | "cf", // Free domains often used for malicious purposes
33 | "bit",
34 | "link",
35 | "click", // URL shorteners that can hide destinations
36 | "download",
37 | "zip",
38 | "exe", // File-like TLDs
39 | ]);
40 |
41 | // Suspicious URL patterns
42 | private readonly suspiciousPatterns = [
43 | /\.\./, // Path traversal
44 | /@.*@/, // Multiple @ symbols
45 | /javascript:/i, // JavaScript protocol
46 | /data:/i, // Data URLs
47 | /file:/i, // File protocol
48 | /ftp:/i, // FTP protocol
49 | /localhost|127\.0\.0\.1|0\.0\.0\.0/i, // Localhost
50 | /\.(local|internal|private|corp|lan)$/i, // Internal domains
51 | /%[0-9a-f]{2}/i, // URL encoding (suspicious in domain names)
52 | /[<>{}\\^`|"]/i, // Dangerous characters
53 | ];
54 |
55 | // Known malicious domains and patterns (expandable list)
56 | private readonly knownMaliciousDomains = new Set([
57 | "malware.com",
58 | "phishing.com",
59 | "spam.com",
60 | "virus.com",
61 | "trojan.com",
62 | ]);
63 |
64 | // Private/internal network ranges
65 | private readonly privateNetworkRanges = [
66 | /^10\./,
67 | /^172\.(1[6-9]|2[0-9]|3[01])\./,
68 | /^192\.168\./,
69 | /^169\.254\./, // Link-local
70 | /^224\./, // Multicast
71 | /^fc00:/, // IPv6 unique local
72 | /^fe80:/, // IPv6 link-local
73 | /^ff00:/, // IPv6 multicast
74 | ];
75 |
76 | constructor(private readonly config: ConfigurationManager) {
77 | this.logger = logger;
78 | this.securityMetrics = {
79 | validationAttempts: 0,
80 | validationFailures: 0,
81 | blockedDomains: new Set(),
82 | suspiciousPatterns: [],
83 | rateLimitViolations: 0,
84 | };
85 | }
86 |
87 | /**
88 | * Comprehensive URL validation with security checks
89 | */
90 | async validateUrl(url: string, allowedDomains?: string[]): Promise<void> {
91 | this.securityMetrics.validationAttempts++;
92 |
93 | try {
94 | // Basic URL format validation
95 | let parsedUrl: URL;
96 | try {
97 | parsedUrl = new URL(url);
98 | } catch (error) {
99 | this.logSecurityEvent("Invalid URL format", { url, error });
100 | throw new GeminiUrlValidationError(
101 | `Invalid URL format: ${url}`,
102 | url,
103 | "invalid_format"
104 | );
105 | }
106 |
107 | // Protocol validation
108 | if (!this.isAllowedProtocol(parsedUrl.protocol)) {
109 | this.logSecurityEvent("Blocked protocol", {
110 | url,
111 | protocol: parsedUrl.protocol,
112 | });
113 | throw new GeminiUrlValidationError(
114 | `Protocol not allowed: ${parsedUrl.protocol}`,
115 | url,
116 | "blocked_domain"
117 | );
118 | }
119 |
120 | // Check for suspicious patterns
121 | const suspiciousCheck = this.checkSuspiciousPatterns(url, parsedUrl);
122 | if (!suspiciousCheck.valid) {
123 | this.logSecurityEvent("Suspicious pattern detected", {
124 | url,
125 | reason: suspiciousCheck.reason,
126 | });
127 | throw new GeminiUrlValidationError(
128 | suspiciousCheck.reason || "Suspicious URL pattern detected",
129 | url,
130 | "suspicious_pattern"
131 | );
132 | }
133 |
134 | // Domain validation
135 | await this.validateDomain(parsedUrl, allowedDomains);
136 |
137 | // Check for known malicious domains
138 | if (this.isKnownMaliciousDomain(parsedUrl.hostname)) {
139 | this.logSecurityEvent("Known malicious domain", {
140 | url,
141 | domain: parsedUrl.hostname,
142 | });
143 | this.securityMetrics.blockedDomains.add(parsedUrl.hostname);
144 | throw new GeminiUrlValidationError(
145 | `Access to known malicious domain blocked: ${parsedUrl.hostname}`,
146 | url,
147 | "blocked_domain"
148 | );
149 | }
150 |
151 | // Check URL configuration limits
152 | this.validateUrlConfiguration(parsedUrl);
153 |
154 | // Additional security checks
155 | await this.performAdvancedSecurityChecks(parsedUrl);
156 |
157 | this.logger.debug("URL validation passed", {
158 | url,
159 | domain: parsedUrl.hostname,
160 | });
161 | } catch (error) {
162 | this.securityMetrics.validationFailures++;
163 | if (error instanceof GeminiUrlValidationError) {
164 | throw error;
165 | }
166 | throw new GeminiUrlValidationError(
167 | `URL validation failed: ${error instanceof Error ? error.message : String(error)}`,
168 | url,
169 | "invalid_format"
170 | );
171 | }
172 | }
173 |
174 | /**
175 | * Check if URL is accessible without actually fetching it
176 | */
177 | async checkUrlAccessibility(url: string): Promise<boolean> {
178 | try {
179 | const response = await fetch(url, {
180 | method: "HEAD",
181 | headers: {
182 | "User-Agent": "MCP-Gemini-Server-HealthCheck/1.0",
183 | },
184 | });
185 | return response.ok;
186 | } catch (error) {
187 | this.logger.debug("URL accessibility check failed", { url, error });
188 | return false;
189 | }
190 | }
191 |
192 | /**
193 | * Get security metrics for monitoring
194 | */
195 | getSecurityMetrics(): SecurityMetrics {
196 | return {
197 | ...this.securityMetrics,
198 | blockedDomains: new Set(this.securityMetrics.blockedDomains),
199 | suspiciousPatterns: [...this.securityMetrics.suspiciousPatterns],
200 | };
201 | }
202 |
203 | /**
204 | * Reset security metrics (useful for testing)
205 | */
206 | resetSecurityMetrics(): void {
207 | this.securityMetrics.validationAttempts = 0;
208 | this.securityMetrics.validationFailures = 0;
209 | this.securityMetrics.blockedDomains.clear();
210 | this.securityMetrics.suspiciousPatterns.length = 0;
211 | this.securityMetrics.rateLimitViolations = 0;
212 | }
213 |
214 | /**
215 | * Add custom malicious domain to blocklist
216 | */
217 | addMaliciousDomain(domain: string): void {
218 | this.knownMaliciousDomains.add(domain.toLowerCase());
219 | this.logger.info("Added domain to malicious blocklist", { domain });
220 | }
221 |
222 | /**
223 | * Check if protocol is allowed
224 | */
225 | private isAllowedProtocol(protocol: string): boolean {
226 | const allowedProtocols = ["http:", "https:"];
227 | return allowedProtocols.includes(protocol.toLowerCase());
228 | }
229 |
230 | /**
231 | * Check for suspicious URL patterns
232 | */
233 | private checkSuspiciousPatterns(
234 | url: string,
235 | parsedUrl: URL
236 | ): UrlValidationResult {
237 | const warnings: string[] = [];
238 |
239 | // Check for control characters
240 | if (this.hasControlCharacters(url)) {
241 | const reason = "Control characters detected in URL";
242 | this.securityMetrics.suspiciousPatterns.push(reason);
243 | return { valid: false, reason };
244 | }
245 |
246 | // Check each suspicious pattern
247 | for (const pattern of this.suspiciousPatterns) {
248 | if (pattern.test(url)) {
249 | const reason = `Suspicious pattern detected: ${pattern.source}`;
250 | this.securityMetrics.suspiciousPatterns.push(reason);
251 | return { valid: false, reason };
252 | }
253 | }
254 |
255 | // Check for dangerous TLDs
256 | const tld = parsedUrl.hostname.split(".").pop()?.toLowerCase();
257 | if (tld && this.dangerousTlds.has(tld)) {
258 | warnings.push(`Potentially dangerous TLD: .${tld}`);
259 | }
260 |
261 | // Check for IDN homograph attacks
262 | if (this.detectIdnHomograph(parsedUrl.hostname)) {
263 | this.logger.warn("IDN homograph attack detected", {
264 | hostname: parsedUrl.hostname,
265 | });
266 | return {
267 | valid: false,
268 | reason: "Potential IDN homograph attack detected in domain name",
269 | };
270 | }
271 |
272 | // Check for URL shorteners (could hide destination)
273 | if (this.isUrlShortener(parsedUrl.hostname)) {
274 | warnings.push("URL shortener detected - destination cannot be verified");
275 | }
276 |
277 | return { valid: true, warnings };
278 | }
279 |
280 | /**
281 | * Validate domain against whitelist/blacklist
282 | */
283 | private async validateDomain(
284 | parsedUrl: URL,
285 | allowedDomains?: string[]
286 | ): Promise<void> {
287 | const hostname = parsedUrl.hostname.toLowerCase();
288 | const urlConfig = this.config.getUrlContextConfig();
289 |
290 | // Check blocklist first
291 | if (urlConfig.blocklistedDomains.length > 0) {
292 | for (const blockedPattern of urlConfig.blocklistedDomains) {
293 | if (this.matchesDomainPattern(hostname, blockedPattern)) {
294 | this.securityMetrics.blockedDomains.add(hostname);
295 | throw new GeminiUrlValidationError(
296 | `Domain is blocked: ${hostname}`,
297 | parsedUrl.href,
298 | "blocked_domain"
299 | );
300 | }
301 | }
302 | }
303 |
304 | // Check allowlist if specified
305 | const domainsToCheck = allowedDomains || urlConfig.allowedDomains;
306 | if (domainsToCheck.length > 0 && !domainsToCheck.includes("*")) {
307 | let allowed = false;
308 | for (const allowedPattern of domainsToCheck) {
309 | if (this.matchesDomainPattern(hostname, allowedPattern)) {
310 | allowed = true;
311 | break;
312 | }
313 | }
314 |
315 | if (!allowed) {
316 | throw new GeminiUrlValidationError(
317 | `Domain not in allowlist: ${hostname}`,
318 | parsedUrl.href,
319 | "blocked_domain"
320 | );
321 | }
322 | }
323 |
324 | // Check for private/internal networks
325 | if (this.isPrivateOrInternalAddress(hostname)) {
326 | throw new GeminiUrlValidationError(
327 | `Access to private/internal addresses blocked: ${hostname}`,
328 | parsedUrl.href,
329 | "blocked_domain"
330 | );
331 | }
332 | }
333 |
334 | /**
335 | * Check if domain matches a pattern (supports wildcards)
336 | */
337 | private matchesDomainPattern(domain: string, pattern: string): boolean {
338 | if (pattern === "*") {
339 | return true;
340 | }
341 |
342 | if (pattern.startsWith("*.")) {
343 | const suffix = pattern.slice(2);
344 | return domain === suffix || domain.endsWith("." + suffix);
345 | }
346 |
347 | // For blocklist, also block subdomains
348 | // e.g., "malicious.com" should block "sub.malicious.com"
349 | if (domain === pattern || domain.endsWith("." + pattern)) {
350 | return true;
351 | }
352 |
353 | return false;
354 | }
355 |
356 | /**
357 | * Check if address is private/internal
358 | */
359 | private isPrivateOrInternalAddress(hostname: string): boolean {
360 | // Check if it's an IP address
361 | const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
362 | const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
363 |
364 | if (ipv4Regex.test(hostname) || ipv6Regex.test(hostname)) {
365 | return this.privateNetworkRanges.some((range) => range.test(hostname));
366 | }
367 |
368 | // Check for internal domain patterns
369 | return (
370 | /\.(local|internal|private|corp|lan|test|dev|localhost)$/i.test(
371 | hostname
372 | ) || hostname === "localhost"
373 | );
374 | }
375 |
376 | /**
377 | * Check if domain is a known URL shortener
378 | */
379 | private isUrlShortener(hostname: string): boolean {
380 | const shorteners = [
381 | "bit.ly",
382 | "tinyurl.com",
383 | "short.link",
384 | "ow.ly",
385 | "t.co",
386 | "goo.gl",
387 | "tiny.cc",
388 | "is.gd",
389 | "buff.ly",
390 | "bitly.com",
391 | ];
392 | return shorteners.includes(hostname);
393 | }
394 |
395 | /**
396 | * Detect potential IDN homograph attacks
397 | */
398 | private detectIdnHomograph(hostname: string): boolean {
399 | // Check if hostname contains Punycode (IDN encoded) parts
400 | const parts = hostname.split(".");
401 | const punycodePattern = /^xn--/;
402 |
403 | for (const part of parts) {
404 | if (punycodePattern.test(part)) {
405 | // This is a Punycode domain
406 | // Check for suspicious patterns that indicate homograph attacks
407 |
408 | // Common homograph attacks target well-known domains
409 | // They usually have short encoded names that look like popular sites
410 | const encodedPart = part.substring(4); // Remove "xn--" prefix
411 |
412 | // Check for patterns that look like common targets
413 | // e.g., "gogle", "mircosoft", "amaz0n" etc.
414 | // These tend to encode to relatively short Punycode strings
415 | if (encodedPart.length <= 10 && parts.length === 2) {
416 | // Short encoded domain + TLD (like google.com) - suspicious
417 | const tld = parts[parts.length - 1];
418 | if (["com", "org", "net", "io", "co"].includes(tld)) {
419 | this.logger.warn("Suspicious Punycode domain detected", {
420 | hostname,
421 | part,
422 | });
423 | return true;
424 | }
425 | }
426 |
427 | // Also flag if it's a subdomain of a legitimate domain
428 | // e.g., xn--pple-43d.com (apple with Cyrillic 'a')
429 | if (
430 | parts.length === 2 &&
431 | encodedPart.match(/^[a-z0-9]{3,8}-[a-z0-9]{2,4}$/)
432 | ) {
433 | // Pattern matches common homograph encoding patterns
434 | return true;
435 | }
436 | }
437 | }
438 |
439 | // Check for mixed scripts that could be confusing
440 | const hasLatin = /[a-zA-Z]/.test(hostname);
441 | const hasCyrillic = /[\u0400-\u04FF]/.test(hostname);
442 | const hasGreek = /[\u0370-\u03FF]/.test(hostname);
443 |
444 | // Mixed scripts could indicate homograph attack
445 | const scriptCount = [hasLatin, hasCyrillic, hasGreek].filter(
446 | Boolean
447 | ).length;
448 | if (scriptCount > 1) {
449 | return true;
450 | }
451 |
452 | // Check for any Cyrillic characters that could be confused with Latin
453 | // This includes common lookalike characters
454 | if (hasCyrillic && hostname.match(/[a-zA-Z]/)) {
455 | // Has both Cyrillic and Latin - likely homograph attack
456 | return true;
457 | }
458 |
459 | return false;
460 | }
461 |
462 | /**
463 | * Check if domain is known to be malicious
464 | */
465 | private isKnownMaliciousDomain(hostname: string): boolean {
466 | const lowerHostname = hostname.toLowerCase();
467 |
468 | // Check exact matches
469 | if (this.knownMaliciousDomains.has(lowerHostname)) {
470 | return true;
471 | }
472 |
473 | // Check subdomains of known malicious domains
474 | for (const maliciousDomain of this.knownMaliciousDomains) {
475 | if (lowerHostname.endsWith("." + maliciousDomain)) {
476 | return true;
477 | }
478 | }
479 |
480 | return false;
481 | }
482 |
483 | /**
484 | * Validate URL against configuration limits
485 | */
486 | private validateUrlConfiguration(parsedUrl: URL): void {
487 | // Check URL length
488 | if (parsedUrl.href.length > 2048) {
489 | throw new GeminiUrlValidationError(
490 | "URL too long (max 2048 characters)",
491 | parsedUrl.href,
492 | "invalid_format"
493 | );
494 | }
495 |
496 | // Check for suspicious ports
497 | const port = parsedUrl.port;
498 | if (port) {
499 | const portNum = parseInt(port);
500 | const allowedPorts = [80, 443, 8080, 8443];
501 | if (!allowedPorts.includes(portNum)) {
502 | throw new GeminiUrlValidationError(
503 | `Port not allowed: ${port}`,
504 | parsedUrl.href,
505 | "blocked_domain"
506 | );
507 | }
508 | }
509 | }
510 |
511 | /**
512 | * Perform advanced security checks
513 | */
514 | private async performAdvancedSecurityChecks(parsedUrl: URL): Promise<void> {
515 | // Check for recently registered domains (simplified check)
516 | const hostname = parsedUrl.hostname;
517 | const parts = hostname.split(".");
518 |
519 | // Private IP check is already done in validateDomain method
520 |
521 | // Very new domains might be suspicious
522 | if (parts.length === 2 && parts[0].length < 3) {
523 | this.logger.warn("Potentially suspicious short domain", { hostname });
524 | }
525 |
526 | // Check for excessive subdomains (possible DGA)
527 | if (parts.length > 5) {
528 | this.logger.warn("Excessive subdomain levels detected", {
529 | hostname,
530 | levels: parts.length,
531 | });
532 | }
533 |
534 | // Check for random-looking domains
535 | if (this.looksRandomlyGenerated(hostname)) {
536 | this.logger.warn("Potentially randomly generated domain", { hostname });
537 | }
538 | }
539 |
540 | /**
541 | * Check if domain name looks randomly generated
542 | */
543 | private looksRandomlyGenerated(hostname: string): boolean {
544 | const mainDomain = hostname.split(".")[0];
545 |
546 | // Check for patterns indicating random generation
547 | const hasRepeatingChars = /(.)\1{3,}/.test(mainDomain);
548 | const hasAlternatingPattern = /([a-z])([0-9])\1\2/.test(mainDomain);
549 | const hasExcessiveNumbers =
550 | (mainDomain.match(/[0-9]/g) || []).length > mainDomain.length * 0.5;
551 | const hasNoVowels = !/[aeiou]/i.test(mainDomain);
552 | const isVeryShort = mainDomain.length < 4;
553 | const isVeryLong = mainDomain.length > 20;
554 |
555 | return (
556 | hasRepeatingChars ||
557 | hasAlternatingPattern ||
558 | hasExcessiveNumbers ||
559 | (hasNoVowels && !isVeryShort) ||
560 | isVeryLong
561 | );
562 | }
563 |
564 | /**
565 | * Log security events for monitoring
566 | */
567 | private logSecurityEvent(
568 | event: string,
569 | details: Record<string, unknown>
570 | ): void {
571 | this.logger.warn(`Security event: ${event}`, details);
572 | }
573 |
574 | // Helper method to check for control characters
575 | private hasControlCharacters(text: string): boolean {
576 | for (let i = 0; i < text.length; i++) {
577 | const charCode = text.charCodeAt(i);
578 | if (
579 | (charCode >= 0 && charCode <= 31) ||
580 | (charCode >= 127 && charCode <= 159)
581 | ) {
582 | return true;
583 | }
584 | }
585 | return false;
586 | }
587 | }
588 |
```
--------------------------------------------------------------------------------
/tests/unit/services/gemini/GeminiValidationSchemas.test.vitest.ts:
--------------------------------------------------------------------------------
```typescript
1 | // Using vitest globals - see vitest.config.ts globals: true
2 | import { ZodError } from "zod";
3 | import {
4 | validateImageGenerationParams,
5 | validateGenerateContentParams,
6 | validateRouteMessageParams,
7 | ImageGenerationParamsSchema,
8 | GenerateContentParamsSchema,
9 | RouteMessageParamsSchema,
10 | ThinkingConfigSchema,
11 | GenerationConfigSchema,
12 | } from "../../../../src/services/gemini/GeminiValidationSchemas.js";
13 |
14 | describe("GeminiValidationSchemas", () => {
15 | describe("Image Generation Validation", () => {
16 | it("should validate valid image generation parameters", () => {
17 | const validParams = {
18 | prompt: "A beautiful sunset over the ocean",
19 | modelName: "imagen-3.1-generate-003",
20 | resolution: "1024x1024",
21 | numberOfImages: 2,
22 | safetySettings: [
23 | {
24 | category: "HARM_CATEGORY_HARASSMENT",
25 | threshold: "BLOCK_MEDIUM_AND_ABOVE",
26 | },
27 | ],
28 | negativePrompt: "clouds, rain",
29 | stylePreset: "photographic",
30 | seed: 12345,
31 | styleStrength: 0.75,
32 | };
33 |
34 | // Should not throw
35 | const result = ImageGenerationParamsSchema.parse(validParams);
36 | expect(result.prompt).toBe(validParams.prompt);
37 | expect(result.modelName).toBe(validParams.modelName);
38 | expect(result.resolution).toBe(validParams.resolution);
39 | });
40 |
41 | it("should validate using the validateImageGenerationParams helper", () => {
42 | const result = validateImageGenerationParams(
43 | "A beautiful sunset",
44 | "imagen-3.1-generate-003",
45 | "1024x1024",
46 | 2
47 | );
48 |
49 | expect(result.prompt).toBe("A beautiful sunset");
50 | expect(result.modelName).toBe("imagen-3.1-generate-003");
51 | expect(result.resolution).toBe("1024x1024");
52 | expect(result.numberOfImages).toBe(2);
53 | });
54 |
55 | it("should throw on invalid prompt", () => {
56 | expect(() => ImageGenerationParamsSchema.parse({ prompt: "" })).toThrow(
57 | ZodError
58 | );
59 |
60 | try {
61 | ImageGenerationParamsSchema.parse({ prompt: "" });
62 | } catch (err) {
63 | expect(err).toBeInstanceOf(ZodError);
64 | const zodError = err as ZodError;
65 | expect(zodError.errors[0].path[0]).toBe("prompt");
66 | }
67 | });
68 |
69 | it("should throw on invalid resolution", () => {
70 | expect(() =>
71 | ImageGenerationParamsSchema.parse({
72 | prompt: "valid prompt",
73 | resolution: "invalid-resolution",
74 | })
75 | ).toThrow(ZodError);
76 |
77 | try {
78 | ImageGenerationParamsSchema.parse({
79 | prompt: "valid prompt",
80 | resolution: "invalid-resolution",
81 | });
82 | } catch (err) {
83 | expect(err).toBeInstanceOf(ZodError);
84 | const zodError = err as ZodError;
85 | expect(zodError.errors[0].path[0]).toBe("resolution");
86 | }
87 | });
88 |
89 | it("should throw on invalid numberOfImages", () => {
90 | expect(() =>
91 | ImageGenerationParamsSchema.parse({
92 | prompt: "valid prompt",
93 | numberOfImages: 20, // Max is 8
94 | })
95 | ).toThrow(ZodError);
96 |
97 | try {
98 | ImageGenerationParamsSchema.parse({
99 | prompt: "valid prompt",
100 | numberOfImages: 20, // Max is 8
101 | });
102 | } catch (err) {
103 | expect(err).toBeInstanceOf(ZodError);
104 | const zodError = err as ZodError;
105 | expect(zodError.errors[0].path[0]).toBe("numberOfImages");
106 | }
107 | });
108 |
109 | it("should throw on invalid styleStrength", () => {
110 | expect(() =>
111 | ImageGenerationParamsSchema.parse({
112 | prompt: "valid prompt",
113 | styleStrength: 2.5, // Max is 1.0
114 | })
115 | ).toThrow(ZodError);
116 |
117 | try {
118 | ImageGenerationParamsSchema.parse({
119 | prompt: "valid prompt",
120 | styleStrength: 2.5, // Max is 1.0
121 | });
122 | } catch (err) {
123 | expect(err).toBeInstanceOf(ZodError);
124 | const zodError = err as ZodError;
125 | expect(zodError.errors[0].path[0]).toBe("styleStrength");
126 | }
127 | });
128 | });
129 |
130 | describe("Thinking Budget Validation", () => {
131 | it("should validate valid thinking budget", () => {
132 | const validThinkingConfig = {
133 | thinkingBudget: 5000,
134 | };
135 |
136 | // Should not throw
137 | const result = ThinkingConfigSchema.parse(validThinkingConfig);
138 | expect(result?.thinkingBudget).toBe(5000);
139 | });
140 |
141 | it("should validate empty thinking budget object", () => {
142 | const emptyThinkingConfig = {};
143 |
144 | // Should not throw
145 | const result = ThinkingConfigSchema.parse(emptyThinkingConfig);
146 | expect(result?.thinkingBudget).toBeUndefined();
147 | });
148 |
149 | it("should validate valid reasoningEffort values", () => {
150 | const validValues = ["none", "low", "medium", "high"];
151 |
152 | for (const value of validValues) {
153 | // Should not throw
154 | const result = ThinkingConfigSchema.parse({ reasoningEffort: value });
155 | expect(result?.reasoningEffort).toBe(value);
156 | }
157 | });
158 |
159 | it("should throw on invalid reasoningEffort values", () => {
160 | expect(() =>
161 | ThinkingConfigSchema.parse({ reasoningEffort: "invalid" })
162 | ).toThrow(ZodError);
163 |
164 | try {
165 | ThinkingConfigSchema.parse({ reasoningEffort: "invalid" });
166 | } catch (err) {
167 | expect(err).toBeInstanceOf(ZodError);
168 | const zodError = err as ZodError;
169 | expect(zodError.errors[0].path[0]).toBe("reasoningEffort");
170 | }
171 | });
172 |
173 | it("should validate both thinkingBudget and reasoningEffort in same object", () => {
174 | const config = {
175 | thinkingBudget: 5000,
176 | reasoningEffort: "medium",
177 | };
178 |
179 | // Should not throw
180 | const result = ThinkingConfigSchema.parse(config);
181 | expect(result?.thinkingBudget).toBe(5000);
182 | expect(result?.reasoningEffort).toBe("medium");
183 | });
184 |
185 | it("should validate thinking budget at boundaries", () => {
186 | // Min value (0)
187 | expect(() =>
188 | ThinkingConfigSchema.parse({ thinkingBudget: 0 })
189 | ).not.toThrow();
190 |
191 | // Max value (24576)
192 | expect(() =>
193 | ThinkingConfigSchema.parse({ thinkingBudget: 24576 })
194 | ).not.toThrow();
195 | });
196 |
197 | it("should throw on invalid thinking budget values", () => {
198 | // Below min value
199 | expect(() => ThinkingConfigSchema.parse({ thinkingBudget: -1 })).toThrow(
200 | ZodError
201 | );
202 |
203 | try {
204 | ThinkingConfigSchema.parse({ thinkingBudget: -1 });
205 | } catch (err) {
206 | expect(err).toBeInstanceOf(ZodError);
207 | const zodError = err as ZodError;
208 | expect(zodError.errors[0].path[0]).toBe("thinkingBudget");
209 | }
210 |
211 | // Above max value
212 | expect(() =>
213 | ThinkingConfigSchema.parse({ thinkingBudget: 30000 })
214 | ).toThrow(ZodError);
215 |
216 | try {
217 | ThinkingConfigSchema.parse({ thinkingBudget: 30000 });
218 | } catch (err) {
219 | expect(err).toBeInstanceOf(ZodError);
220 | const zodError = err as ZodError;
221 | expect(zodError.errors[0].path[0]).toBe("thinkingBudget");
222 | }
223 |
224 | // Non-integer value
225 | expect(() =>
226 | ThinkingConfigSchema.parse({ thinkingBudget: 100.5 })
227 | ).toThrow(ZodError);
228 |
229 | try {
230 | ThinkingConfigSchema.parse({ thinkingBudget: 100.5 });
231 | } catch (err) {
232 | expect(err).toBeInstanceOf(ZodError);
233 | const zodError = err as ZodError;
234 | expect(zodError.errors[0].path[0]).toBe("thinkingBudget");
235 | }
236 | });
237 |
238 | it("should validate thinking config within generation config", () => {
239 | const validGenerationConfig = {
240 | temperature: 0.7,
241 | thinkingConfig: {
242 | thinkingBudget: 5000,
243 | },
244 | };
245 |
246 | // Should not throw
247 | const result = GenerationConfigSchema.parse(validGenerationConfig);
248 | expect(result?.temperature).toBe(0.7);
249 | expect(result?.thinkingConfig?.thinkingBudget).toBe(5000);
250 | });
251 |
252 | it("should validate reasoningEffort within generation config", () => {
253 | const validGenerationConfig = {
254 | temperature: 0.7,
255 | thinkingConfig: {
256 | reasoningEffort: "high",
257 | },
258 | };
259 |
260 | // Should not throw
261 | const result = GenerationConfigSchema.parse(validGenerationConfig);
262 | expect(result?.temperature).toBe(0.7);
263 | expect(result?.thinkingConfig?.reasoningEffort).toBe("high");
264 | });
265 |
266 | it("should throw on invalid thinking budget in generation config", () => {
267 | expect(() =>
268 | GenerationConfigSchema.parse({
269 | temperature: 0.7,
270 | thinkingConfig: {
271 | thinkingBudget: 30000, // Above max
272 | },
273 | })
274 | ).toThrow(ZodError);
275 |
276 | try {
277 | GenerationConfigSchema.parse({
278 | temperature: 0.7,
279 | thinkingConfig: {
280 | thinkingBudget: 30000, // Above max
281 | },
282 | });
283 | } catch (err) {
284 | expect(err).toBeInstanceOf(ZodError);
285 | const zodError = err as ZodError;
286 | expect(zodError.errors[0].path[0]).toBe("thinkingConfig");
287 | expect(zodError.errors[0].path[1]).toBe("thinkingBudget");
288 | }
289 | });
290 | });
291 |
292 | describe("Content Generation Validation", () => {
293 | it("should validate valid content generation parameters", () => {
294 | const validParams = {
295 | prompt: "Tell me about AI",
296 | modelName: "gemini-1.5-flash",
297 | generationConfig: {
298 | temperature: 0.7,
299 | topP: 0.9,
300 | maxOutputTokens: 1000,
301 | thinkingConfig: {
302 | thinkingBudget: 4096,
303 | },
304 | },
305 | safetySettings: [
306 | {
307 | category: "HARM_CATEGORY_HARASSMENT",
308 | threshold: "BLOCK_MEDIUM_AND_ABOVE",
309 | },
310 | ],
311 | systemInstruction: "You are a helpful assistant",
312 | };
313 |
314 | // Should not throw
315 | const result = GenerateContentParamsSchema.parse(validParams);
316 | expect(result.prompt).toBe(validParams.prompt);
317 | expect(result.modelName).toBe(validParams.modelName);
318 | expect(result.generationConfig).toEqual(validParams.generationConfig);
319 | });
320 |
321 | it("should validate using the validateGenerateContentParams helper", () => {
322 | const result = validateGenerateContentParams({
323 | prompt: "Tell me about AI",
324 | modelName: "gemini-1.5-flash",
325 | });
326 |
327 | expect(result.prompt).toBe("Tell me about AI");
328 | expect(result.modelName).toBe("gemini-1.5-flash");
329 | });
330 |
331 | it("should throw on invalid prompt", () => {
332 | expect(() => GenerateContentParamsSchema.parse({ prompt: "" })).toThrow(
333 | ZodError
334 | );
335 |
336 | try {
337 | GenerateContentParamsSchema.parse({ prompt: "" });
338 | } catch (err) {
339 | expect(err).toBeInstanceOf(ZodError);
340 | const zodError = err as ZodError;
341 | expect(zodError.errors[0].path[0]).toBe("prompt");
342 | }
343 | });
344 |
345 | it("should throw on invalid temperature", () => {
346 | expect(() =>
347 | GenerateContentParamsSchema.parse({
348 | prompt: "valid prompt",
349 | generationConfig: {
350 | temperature: 2.5, // Max is 1.0
351 | },
352 | })
353 | ).toThrow(ZodError);
354 |
355 | try {
356 | GenerateContentParamsSchema.parse({
357 | prompt: "valid prompt",
358 | generationConfig: {
359 | temperature: 2.5, // Max is 1.0
360 | },
361 | });
362 | } catch (err) {
363 | expect(err).toBeInstanceOf(ZodError);
364 | const zodError = err as ZodError;
365 | expect(zodError.errors[0].path[0]).toBe("generationConfig");
366 | expect(zodError.errors[0].path[1]).toBe("temperature");
367 | }
368 | });
369 |
370 | it("should accept string or ContentSchema for systemInstruction", () => {
371 | // String form
372 | expect(() =>
373 | GenerateContentParamsSchema.parse({
374 | prompt: "valid prompt",
375 | systemInstruction: "You are a helpful assistant",
376 | })
377 | ).not.toThrow();
378 |
379 | // Object form
380 | expect(() =>
381 | GenerateContentParamsSchema.parse({
382 | prompt: "valid prompt",
383 | systemInstruction: {
384 | role: "system",
385 | parts: [{ text: "You are a helpful assistant" }],
386 | },
387 | })
388 | ).not.toThrow();
389 | });
390 | });
391 |
392 | describe("Router Validation", () => {
393 | it("should validate valid router parameters", () => {
394 | const validParams = {
395 | message: "What is the capital of France?",
396 | models: ["gemini-1.5-pro", "gemini-1.5-flash"],
397 | routingPrompt: "Choose the best model for this question",
398 | defaultModel: "gemini-1.5-pro",
399 | generationConfig: {
400 | temperature: 0.7,
401 | maxOutputTokens: 1000,
402 | },
403 | safetySettings: [
404 | {
405 | category: "HARM_CATEGORY_HARASSMENT",
406 | threshold: "BLOCK_MEDIUM_AND_ABOVE",
407 | },
408 | ],
409 | systemInstruction: "You are a helpful assistant",
410 | };
411 |
412 | // Should not throw
413 | const result = RouteMessageParamsSchema.parse(validParams);
414 | expect(result.message).toBe(validParams.message);
415 | expect(result.models).toEqual(validParams.models);
416 | expect(result.routingPrompt).toBe(validParams.routingPrompt);
417 | });
418 |
419 | it("should validate using the validateRouteMessageParams helper", () => {
420 | const result = validateRouteMessageParams({
421 | message: "What is the capital of France?",
422 | models: ["gemini-1.5-pro", "gemini-1.5-flash"],
423 | });
424 |
425 | expect(result.message).toBe("What is the capital of France?");
426 | expect(result.models).toEqual(["gemini-1.5-pro", "gemini-1.5-flash"]);
427 | });
428 |
429 | it("should throw on empty message", () => {
430 | expect(() =>
431 | RouteMessageParamsSchema.parse({
432 | message: "",
433 | models: ["gemini-1.5-pro"],
434 | })
435 | ).toThrow(ZodError);
436 |
437 | try {
438 | RouteMessageParamsSchema.parse({
439 | message: "",
440 | models: ["gemini-1.5-pro"],
441 | });
442 | } catch (err) {
443 | expect(err).toBeInstanceOf(ZodError);
444 | const zodError = err as ZodError;
445 | expect(zodError.errors[0].path[0]).toBe("message");
446 | }
447 | });
448 |
449 | it("should throw on empty models array", () => {
450 | expect(() =>
451 | RouteMessageParamsSchema.parse({
452 | message: "valid message",
453 | models: [],
454 | })
455 | ).toThrow(ZodError);
456 |
457 | try {
458 | RouteMessageParamsSchema.parse({
459 | message: "valid message",
460 | models: [],
461 | });
462 | } catch (err) {
463 | expect(err).toBeInstanceOf(ZodError);
464 | const zodError = err as ZodError;
465 | expect(zodError.errors[0].path[0]).toBe("models");
466 | }
467 | });
468 |
469 | it("should throw on missing required fields", () => {
470 | expect(() =>
471 | RouteMessageParamsSchema.parse({
472 | // Missing required message field
473 | models: ["gemini-1.5-pro"],
474 | })
475 | ).toThrow(ZodError);
476 |
477 | try {
478 | RouteMessageParamsSchema.parse({
479 | // Missing required message field
480 | models: ["gemini-1.5-pro"],
481 | });
482 | } catch (err) {
483 | expect(err).toBeInstanceOf(ZodError);
484 | const zodError = err as ZodError;
485 | expect(zodError.errors[0].path[0]).toBe("message");
486 | }
487 |
488 | expect(() =>
489 | RouteMessageParamsSchema.parse({
490 | message: "valid message",
491 | // Missing required models field
492 | })
493 | ).toThrow(ZodError);
494 |
495 | try {
496 | RouteMessageParamsSchema.parse({
497 | message: "valid message",
498 | // Missing required models field
499 | });
500 | } catch (err) {
501 | expect(err).toBeInstanceOf(ZodError);
502 | const zodError = err as ZodError;
503 | expect(zodError.errors[0].path[0]).toBe("models");
504 | }
505 | });
506 |
507 | it("should validate optional fields when provided", () => {
508 | // Testing with just the required fields
509 | expect(() =>
510 | RouteMessageParamsSchema.parse({
511 | message: "valid message",
512 | models: ["gemini-1.5-pro"],
513 | })
514 | ).not.toThrow();
515 |
516 | // Testing with optional fields
517 | expect(() =>
518 | RouteMessageParamsSchema.parse({
519 | message: "valid message",
520 | models: ["gemini-1.5-pro"],
521 | routingPrompt: "custom prompt",
522 | defaultModel: "gemini-1.5-flash",
523 | })
524 | ).not.toThrow();
525 |
526 | // Testing with invalid optional field
527 | expect(() =>
528 | RouteMessageParamsSchema.parse({
529 | message: "valid message",
530 | models: ["gemini-1.5-pro"],
531 | defaultModel: "", // Empty string
532 | })
533 | ).toThrow(ZodError);
534 |
535 | try {
536 | RouteMessageParamsSchema.parse({
537 | message: "valid message",
538 | models: ["gemini-1.5-pro"],
539 | defaultModel: "", // Empty string
540 | });
541 | } catch (err) {
542 | expect(err).toBeInstanceOf(ZodError);
543 | const zodError = err as ZodError;
544 | expect(zodError.errors[0].path[0]).toBe("defaultModel");
545 | }
546 | });
547 |
548 | it("should accept string or ContentSchema for systemInstruction", () => {
549 | // String form
550 | expect(() =>
551 | RouteMessageParamsSchema.parse({
552 | message: "valid message",
553 | models: ["gemini-1.5-pro"],
554 | systemInstruction: "You are a helpful assistant",
555 | })
556 | ).not.toThrow();
557 |
558 | // Object form
559 | expect(() =>
560 | RouteMessageParamsSchema.parse({
561 | message: "valid message",
562 | models: ["gemini-1.5-pro"],
563 | systemInstruction: {
564 | parts: [{ text: "You are a helpful assistant" }],
565 | },
566 | })
567 | ).not.toThrow();
568 | });
569 | });
570 | });
571 |
```
--------------------------------------------------------------------------------
/tests/unit/tools/geminiCodeReviewTool.test.vitest.ts:
--------------------------------------------------------------------------------
```typescript
1 | // Using vitest globals - see vitest.config.ts globals: true
2 | import {
3 | geminiCodeReviewTool,
4 | geminiCodeReviewStreamTool,
5 | } from "../../../src/tools/geminiCodeReviewTool.js";
6 | import { GeminiService } from "../../../src/services/index.js";
7 |
8 | // Mock dependencies
9 | vi.mock("../../../src/services/index.js");
10 |
11 | type MockGeminiService = {
12 | reviewGitDiff: ReturnType<typeof vi.fn>;
13 | reviewGitDiffStream: ReturnType<typeof vi.fn>;
14 | reviewGitHubRepository: ReturnType<typeof vi.fn>;
15 | reviewGitHubPullRequest: ReturnType<typeof vi.fn>;
16 | };
17 |
18 | describe("geminiCodeReviewTool", () => {
19 | let mockGeminiService: MockGeminiService;
20 |
21 | beforeEach(() => {
22 | vi.clearAllMocks();
23 |
24 | // Setup mock GeminiService
25 | mockGeminiService = {
26 | reviewGitDiff: vi.fn(),
27 | reviewGitDiffStream: vi.fn(),
28 | reviewGitHubRepository: vi.fn(),
29 | reviewGitHubPullRequest: vi.fn(),
30 | };
31 |
32 | vi.mocked(GeminiService).mockImplementation(() => mockGeminiService as any);
33 | });
34 |
35 | describe("Tool Configuration", () => {
36 | it("should have correct name and description", () => {
37 | expect(geminiCodeReviewTool.name).toBe("gemini_code_review");
38 | expect(geminiCodeReviewTool.description).toContain(
39 | "Performs comprehensive code reviews"
40 | );
41 | });
42 |
43 | it("should have valid input schema", () => {
44 | expect(geminiCodeReviewTool.inputSchema).toBeDefined();
45 | expect((geminiCodeReviewTool.inputSchema as any)._def.discriminator).toBe(
46 | "source"
47 | );
48 | });
49 | });
50 |
51 | describe("Local Diff Review", () => {
52 | it("should handle local diff review", async () => {
53 | const mockReview =
54 | "Code Review:\n- Good use of types\n- Consider error handling";
55 | mockGeminiService.reviewGitDiff.mockResolvedValue(mockReview);
56 |
57 | const args = {
58 | source: "local_diff" as const,
59 | diffContent: "diff --git a/file.ts b/file.ts\n+const x = 1;",
60 | model: "gemini-2.5-pro-preview-05-06",
61 | reviewFocus: "security" as const,
62 | customPrompt: "Focus on TypeScript best practices",
63 | };
64 |
65 | const result = await geminiCodeReviewTool.execute(
66 | args,
67 | mockGeminiService as any
68 | );
69 |
70 | expect(mockGeminiService.reviewGitDiff).toHaveBeenCalledWith({
71 | diffContent: args.diffContent,
72 | modelName: args.model,
73 | reviewFocus: "security", // Should take first value from array
74 | customPrompt: args.customPrompt,
75 | diffOptions: {
76 | maxFilesToInclude: undefined,
77 | excludePatterns: undefined,
78 | prioritizeFiles: undefined,
79 | },
80 | reasoningEffort: undefined,
81 | repositoryContext: undefined,
82 | });
83 |
84 | expect(result.content[0].type).toBe("text");
85 | expect(result.content[0].text).toBe(mockReview);
86 | });
87 |
88 | it("should handle local diff with repository context", async () => {
89 | const mockReview = "Review complete";
90 | mockGeminiService.reviewGitDiff.mockResolvedValue(mockReview);
91 |
92 | const args = {
93 | source: "local_diff" as const,
94 | diffContent: "diff content",
95 | repositoryContext: {
96 | name: "my-project",
97 | description: "A TypeScript project",
98 | languages: ["TypeScript", "JavaScript"],
99 | frameworks: ["React", "Node.js"],
100 | },
101 | maxFilesToInclude: 50,
102 | excludePatterns: ["*.test.ts", "dist/**"],
103 | prioritizeFiles: ["src/**/*.ts"],
104 | };
105 |
106 | const result = await geminiCodeReviewTool.execute(
107 | args,
108 | mockGeminiService as any
109 | );
110 | expect(result).toBeDefined();
111 | expect(result.content[0].text).toBe(mockReview);
112 |
113 | expect(mockGeminiService.reviewGitDiff).toHaveBeenCalledWith(
114 | expect.objectContaining({
115 | repositoryContext: JSON.stringify(args.repositoryContext),
116 | diffOptions: {
117 | maxFilesToInclude: 50,
118 | excludePatterns: ["*.test.ts", "dist/**"],
119 | prioritizeFiles: ["src/**/*.ts"],
120 | },
121 | })
122 | );
123 | });
124 | });
125 |
126 | describe("GitHub Repository Review", () => {
127 | it("should handle GitHub repository review", async () => {
128 | const mockReview = "Repository Review:\n- Well-structured codebase";
129 | mockGeminiService.reviewGitHubRepository.mockResolvedValue(mockReview);
130 |
131 | const args = {
132 | source: "github_repo" as const,
133 | repoUrl: "https://github.com/owner/repo",
134 | branch: "main",
135 | maxFiles: 50,
136 | reasoningEffort: "high" as const,
137 | reviewFocus: "architecture" as const,
138 | };
139 |
140 | const result = await geminiCodeReviewTool.execute(
141 | args,
142 | mockGeminiService as any
143 | );
144 |
145 | expect(mockGeminiService.reviewGitHubRepository).toHaveBeenCalledWith({
146 | owner: "owner",
147 | repo: "repo",
148 | branch: args.branch,
149 | maxFilesToInclude: args.maxFiles,
150 | modelName: undefined,
151 | reasoningEffort: args.reasoningEffort,
152 | reviewFocus: "architecture", // Should take first value from array
153 | excludePatterns: undefined,
154 | prioritizeFiles: undefined,
155 | customPrompt: undefined,
156 | });
157 |
158 | expect(result.content[0].text).toBe(mockReview);
159 | });
160 | });
161 |
162 | describe("GitHub Pull Request Review", () => {
163 | it("should handle GitHub PR review", async () => {
164 | const mockReview =
165 | "PR Review:\n- Changes look good\n- Tests are comprehensive";
166 | mockGeminiService.reviewGitHubPullRequest.mockResolvedValue(mockReview);
167 |
168 | const args = {
169 | source: "github_pr" as const,
170 | prUrl: "https://github.com/owner/repo/pull/123",
171 | model: "gemini-2.5-flash-preview-05-20",
172 | filesOnly: true,
173 | excludePatterns: ["*.generated.ts"],
174 | };
175 |
176 | const result = await geminiCodeReviewTool.execute(
177 | args,
178 | mockGeminiService as any
179 | );
180 |
181 | expect(mockGeminiService.reviewGitHubPullRequest).toHaveBeenCalledWith({
182 | owner: "owner",
183 | repo: "repo",
184 | prNumber: 123,
185 | modelName: args.model,
186 | reasoningEffort: undefined,
187 | reviewFocus: undefined,
188 | excludePatterns: args.excludePatterns,
189 | customPrompt: undefined,
190 | });
191 |
192 | expect(result.content[0].text).toBe(mockReview);
193 | });
194 | it("should handle GitHub PR review with all optional parameters", async () => {
195 | const mockReview =
196 | "Comprehensive PR Review:\n- Code quality is excellent\n- Security considerations addressed";
197 | mockGeminiService.reviewGitHubPullRequest.mockResolvedValue(mockReview);
198 |
199 | const args = {
200 | source: "github_pr" as const,
201 | prUrl: "https://github.com/owner/repo/pull/456",
202 | model: "gemini-2.5-pro-preview-05-06",
203 | reasoningEffort: "high" as const,
204 | reviewFocus: "security" as const,
205 | excludePatterns: ["*.test.ts", "*.spec.ts", "dist/**"],
206 | customPrompt:
207 | "Focus on security vulnerabilities and performance optimizations",
208 | filesOnly: false,
209 | };
210 |
211 | const result = await geminiCodeReviewTool.execute(
212 | args,
213 | mockGeminiService as any
214 | );
215 |
216 | expect(mockGeminiService.reviewGitHubPullRequest).toHaveBeenCalledWith({
217 | owner: "owner",
218 | repo: "repo",
219 | prNumber: 456,
220 | modelName: args.model,
221 | reasoningEffort: args.reasoningEffort,
222 | reviewFocus: "security", // Should take first value from array
223 | excludePatterns: args.excludePatterns,
224 | customPrompt: args.customPrompt,
225 | });
226 |
227 | expect(result.content[0].text).toBe(mockReview);
228 | });
229 |
230 | it("should handle GitHub PR review with deprecated filesOnly parameter", async () => {
231 | const mockReview = "Files-only PR Review";
232 | mockGeminiService.reviewGitHubPullRequest.mockResolvedValue(mockReview);
233 |
234 | const args = {
235 | source: "github_pr" as const,
236 | prUrl: "https://github.com/owner/repo/pull/789",
237 | filesOnly: true,
238 | };
239 |
240 | const result = await geminiCodeReviewTool.execute(
241 | args,
242 | mockGeminiService as any
243 | );
244 |
245 | expect(mockGeminiService.reviewGitHubPullRequest).toHaveBeenCalledWith({
246 | owner: "owner",
247 | repo: "repo",
248 | prNumber: 789,
249 | modelName: undefined,
250 | reasoningEffort: undefined,
251 | reviewFocus: undefined,
252 | excludePatterns: undefined,
253 | customPrompt: undefined,
254 | });
255 |
256 | expect(result.content[0].text).toBe(mockReview);
257 | });
258 |
259 | it("should handle GitHub PR review with minimal parameters", async () => {
260 | const mockReview = "Basic PR Review";
261 | mockGeminiService.reviewGitHubPullRequest.mockResolvedValue(mockReview);
262 |
263 | const args = {
264 | source: "github_pr" as const,
265 | prUrl: "https://github.com/owner/repo/pull/101",
266 | };
267 |
268 | const result = await geminiCodeReviewTool.execute(
269 | args,
270 | mockGeminiService as any
271 | );
272 |
273 | expect(mockGeminiService.reviewGitHubPullRequest).toHaveBeenCalledWith({
274 | owner: "owner",
275 | repo: "repo",
276 | prNumber: 101,
277 | modelName: undefined,
278 | reasoningEffort: undefined,
279 | reviewFocus: undefined,
280 | excludePatterns: undefined,
281 | customPrompt: undefined,
282 | });
283 |
284 | expect(result.content[0].text).toBe(mockReview);
285 | });
286 | });
287 |
288 | describe("URL Parsing and Validation", () => {
289 | it("should handle invalid GitHub repository URL", async () => {
290 | const args = {
291 | source: "github_repo" as const,
292 | repoUrl: "https://invalid-url.com/not-github",
293 | maxFiles: 100,
294 | };
295 |
296 | await expect(
297 | geminiCodeReviewTool.execute(args, mockGeminiService as any)
298 | ).rejects.toThrow("Invalid GitHub repository URL format");
299 | });
300 |
301 | it("should handle invalid GitHub PR URL", async () => {
302 | const args = {
303 | source: "github_pr" as const,
304 | prUrl: "https://github.com/owner/repo/issues/123", // issues instead of pull
305 | };
306 |
307 | await expect(
308 | geminiCodeReviewTool.execute(args, mockGeminiService as any)
309 | ).rejects.toThrow("Invalid GitHub pull request URL format");
310 | });
311 |
312 | it("should handle malformed GitHub PR URL", async () => {
313 | const args = {
314 | source: "github_pr" as const,
315 | prUrl: "https://github.com/owner/repo/pull/invalid-number",
316 | };
317 |
318 | await expect(
319 | geminiCodeReviewTool.execute(args, mockGeminiService as any)
320 | ).rejects.toThrow("Invalid GitHub pull request URL format");
321 | });
322 |
323 | it("should correctly parse GitHub repository URL", async () => {
324 | const mockReview = "Repository parsed correctly";
325 | mockGeminiService.reviewGitHubRepository.mockResolvedValue(mockReview);
326 |
327 | const args = {
328 | source: "github_repo" as const,
329 | repoUrl: "https://github.com/microsoft/typescript",
330 | branch: "main",
331 | maxFiles: 100,
332 | };
333 |
334 | await geminiCodeReviewTool.execute(args, mockGeminiService as any);
335 |
336 | expect(mockGeminiService.reviewGitHubRepository).toHaveBeenCalledWith(
337 | expect.objectContaining({
338 | owner: "microsoft",
339 | repo: "typescript",
340 | branch: "main",
341 | })
342 | );
343 | });
344 |
345 | it("should correctly parse GitHub PR URL and extract PR number", async () => {
346 | const mockReview = "PR parsed correctly";
347 | mockGeminiService.reviewGitHubPullRequest.mockResolvedValue(mockReview);
348 |
349 | const args = {
350 | source: "github_pr" as const,
351 | prUrl: "https://github.com/facebook/react/pull/12345",
352 | };
353 |
354 | await geminiCodeReviewTool.execute(args, mockGeminiService as any);
355 |
356 | expect(mockGeminiService.reviewGitHubPullRequest).toHaveBeenCalledWith(
357 | expect.objectContaining({
358 | owner: "facebook",
359 | repo: "react",
360 | prNumber: 12345,
361 | })
362 | );
363 | });
364 | });
365 |
366 | describe("Review Focus Array Handling", () => {
367 | it("should handle multiple review focus areas for local diff", async () => {
368 | const mockReview = "Multi-focus review";
369 | mockGeminiService.reviewGitDiff.mockResolvedValue(mockReview);
370 |
371 | const args = {
372 | source: "local_diff" as const,
373 | diffContent: "diff content",
374 | reviewFocus: "security" as const,
375 | };
376 |
377 | await geminiCodeReviewTool.execute(args, mockGeminiService as any);
378 |
379 | expect(mockGeminiService.reviewGitDiff).toHaveBeenCalledWith(
380 | expect.objectContaining({
381 | reviewFocus: "security", // Should take first value
382 | })
383 | );
384 | });
385 |
386 | it("should handle empty review focus array", async () => {
387 | const mockReview = "Default focus review";
388 | mockGeminiService.reviewGitDiff.mockResolvedValue(mockReview);
389 |
390 | const args = {
391 | source: "local_diff" as const,
392 | diffContent: "diff content",
393 | // No reviewFocus to test undefined behavior
394 | };
395 |
396 | await geminiCodeReviewTool.execute(args, mockGeminiService as any);
397 |
398 | expect(mockGeminiService.reviewGitDiff).toHaveBeenCalledWith(
399 | expect.objectContaining({
400 | reviewFocus: undefined, // Should be undefined for empty array
401 | })
402 | );
403 | });
404 |
405 | it("should handle single review focus area", async () => {
406 | const mockReview = "Single focus review";
407 | mockGeminiService.reviewGitDiff.mockResolvedValue(mockReview);
408 |
409 | const args = {
410 | source: "local_diff" as const,
411 | diffContent: "diff content",
412 | reviewFocus: "architecture" as const,
413 | };
414 |
415 | await geminiCodeReviewTool.execute(args, mockGeminiService as any);
416 |
417 | expect(mockGeminiService.reviewGitDiff).toHaveBeenCalledWith(
418 | expect.objectContaining({
419 | reviewFocus: "architecture",
420 | })
421 | );
422 | });
423 | });
424 |
425 | describe("Error Handling", () => {
426 | it("should handle GitHub API service errors for PR review", async () => {
427 | mockGeminiService.reviewGitHubPullRequest.mockRejectedValue(
428 | new Error("GitHub API rate limit exceeded")
429 | );
430 |
431 | const args = {
432 | source: "github_pr" as const,
433 | prUrl: "https://github.com/owner/repo/pull/123",
434 | };
435 |
436 | await expect(
437 | geminiCodeReviewTool.execute(args, mockGeminiService as any)
438 | ).rejects.toThrow();
439 | });
440 |
441 | it("should handle GitHub API service errors for repo review", async () => {
442 | mockGeminiService.reviewGitHubRepository.mockRejectedValue(
443 | new Error("Repository not found")
444 | );
445 |
446 | const args = {
447 | source: "github_repo" as const,
448 | repoUrl: "https://github.com/owner/nonexistent-repo",
449 | maxFiles: 100,
450 | };
451 |
452 | await expect(
453 | geminiCodeReviewTool.execute(args, mockGeminiService as any)
454 | ).rejects.toThrow();
455 | });
456 | });
457 |
458 | describe("Error Handling", () => {
459 | it("should handle service errors", async () => {
460 | mockGeminiService.reviewGitDiff.mockRejectedValue(new Error("API error"));
461 |
462 | const args = {
463 | source: "local_diff" as const,
464 | diffContent: "diff content",
465 | };
466 |
467 | await expect(
468 | geminiCodeReviewTool.execute(args, mockGeminiService as any)
469 | ).rejects.toThrow();
470 | });
471 |
472 | it("should handle unknown source type", async () => {
473 | const args = {
474 | source: "unknown" as unknown as
475 | | "local_diff"
476 | | "github_pr"
477 | | "github_repo",
478 | diffContent: "diff",
479 | };
480 |
481 | await expect(
482 | geminiCodeReviewTool.execute(args as any, mockGeminiService as any)
483 | ).rejects.toThrow("Unknown review source");
484 | });
485 | });
486 | });
487 |
488 | describe("geminiCodeReviewStreamTool", () => {
489 | let mockGeminiService: Pick<MockGeminiService, "reviewGitDiffStream">;
490 |
491 | beforeEach(() => {
492 | vi.clearAllMocks();
493 |
494 | mockGeminiService = {
495 | reviewGitDiffStream: vi.fn(),
496 | };
497 |
498 | vi.mocked(GeminiService).mockImplementation(() => mockGeminiService as any);
499 | });
500 |
501 | it("should stream local diff review", async () => {
502 | const mockChunks = ["Review chunk 1", "Review chunk 2", "Review chunk 3"];
503 |
504 | // Create an async generator mock
505 | mockGeminiService.reviewGitDiffStream.mockImplementation(
506 | async function* () {
507 | for (const chunk of mockChunks) {
508 | yield chunk;
509 | }
510 | }
511 | );
512 |
513 | const args = {
514 | source: "local_diff" as const,
515 | diffContent: "diff content",
516 | model: "gemini-2.5-pro-preview-05-06",
517 | };
518 |
519 | const results: Array<any> = [];
520 | const generator = await geminiCodeReviewStreamTool.execute(
521 | args,
522 | mockGeminiService as any
523 | );
524 | for await (const chunk of generator) {
525 | results.push(chunk);
526 | }
527 |
528 | expect(results).toHaveLength(3);
529 | expect(results[0].content[0].text).toBe("Review chunk 1");
530 | expect(results[1].content[0].text).toBe("Review chunk 2");
531 | expect(results[2].content[0].text).toBe("Review chunk 3");
532 | });
533 |
534 | it("should reject non-local_diff sources", async () => {
535 | const args = {
536 | source: "github_repo" as const,
537 | repoUrl: "https://github.com/owner/repo",
538 | maxFiles: 100,
539 | };
540 |
541 | await expect(async () => {
542 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
543 | const generator = await geminiCodeReviewStreamTool.execute(
544 | args,
545 | mockGeminiService as any
546 | );
547 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
548 | for await (const _chunk of generator) {
549 | // Should not reach here - this line should never execute
550 | break;
551 | }
552 | }).rejects.toThrow("Streaming is only supported for local_diff source");
553 | });
554 | });
555 |
```
--------------------------------------------------------------------------------
/tests/unit/services/mcp/McpClientService.test.vitest.ts:
--------------------------------------------------------------------------------
```typescript
1 | /// <reference types="../../../../vitest-globals.d.ts" />
2 | // Using vitest globals - see vitest.config.ts globals: true
3 |
4 | // Fixed UUID for testing
5 | const TEST_UUID = "test-uuid-value";
6 |
7 | // Import mock types first
8 | import {
9 | EVENT_SOURCE_STATES,
10 | MockEvent,
11 | MockEventSource,
12 | } from "../../../../tests/utils/mock-types.js";
13 |
14 | // Store the mock instance for access in tests
15 | let mockEventSourceInstance: MockEventSource;
16 |
17 | // Create mock objects for child_process
18 | export const mockStdout = {
19 | on: vi.fn(),
20 | removeAllListeners: vi.fn(),
21 | };
22 |
23 | export const mockStderr = {
24 | on: vi.fn(),
25 | removeAllListeners: vi.fn(),
26 | };
27 |
28 | export const mockStdin = {
29 | write: vi.fn(),
30 | };
31 |
32 | export const mockChildProcess = {
33 | stdout: mockStdout,
34 | stderr: mockStderr,
35 | stdin: mockStdin,
36 | on: vi.fn(),
37 | kill: vi.fn(),
38 | removeAllListeners: vi.fn(),
39 | };
40 |
41 | // Store the EventSource constructor for test expectations
42 | let EventSourceConstructor: any;
43 |
44 | // Setup mocks using doMock to avoid hoisting issues
45 | vi.doMock("eventsource", () => {
46 | EventSourceConstructor = vi.fn().mockImplementation(function (
47 | url: string,
48 | _options?: any
49 | ) {
50 | // Create mock instance
51 | const instance = {
52 | onopen: null,
53 | onmessage: null,
54 | onerror: null,
55 | readyState: 0,
56 | url: url,
57 | withCredentials: false,
58 | close: vi.fn(),
59 | addEventListener: vi.fn(),
60 | removeEventListener: vi.fn(),
61 | dispatchEvent: vi.fn().mockReturnValue(true),
62 | };
63 |
64 | // Store instance for test access
65 | mockEventSourceInstance = instance as any;
66 |
67 | return instance;
68 | });
69 |
70 | return {
71 | default: EventSourceConstructor,
72 | };
73 | });
74 |
75 | vi.doMock("uuid", () => ({
76 | v4: vi.fn(() => TEST_UUID),
77 | }));
78 |
79 | const mockSpawn = vi.fn(() => mockChildProcess);
80 |
81 | vi.doMock("child_process", () => ({
82 | spawn: mockSpawn,
83 | }));
84 |
85 | vi.doMock("node-fetch", () => ({
86 | default: vi.fn().mockResolvedValue({
87 | ok: true,
88 | status: 200,
89 | statusText: "OK",
90 | json: vi.fn().mockResolvedValue({ result: {} }),
91 | }),
92 | }));
93 |
94 | // Type helper for accessing private properties in tests - will be redefined after import
95 | type McpClientServicePrivate = any;
96 |
97 | describe("McpClientService", () => {
98 | let McpClientService: typeof import("../../../../src/services/mcp/McpClientService.js").McpClientService;
99 | let SdkMcpError: any;
100 | let logger: any;
101 | let service: any;
102 | let originalSetInterval: typeof global.setInterval;
103 | let originalClearInterval: typeof global.clearInterval;
104 |
105 | beforeAll(async () => {
106 | // Dynamic imports after mocks are set up
107 | const mcpService = await import(
108 | "../../../../src/services/mcp/McpClientService.js"
109 | );
110 | McpClientService = mcpService.McpClientService;
111 |
112 | const sdkTypes = await import("@modelcontextprotocol/sdk/types.js");
113 | SdkMcpError = sdkTypes.McpError;
114 |
115 | const loggerModule = await import("../../../../src/utils/logger.js");
116 | logger = loggerModule.logger;
117 | });
118 |
119 | beforeEach(() => {
120 | // Reset all mocks
121 | vi.clearAllMocks();
122 |
123 | // Save original timing functions
124 | originalSetInterval = global.setInterval;
125 | originalClearInterval = global.clearInterval;
126 |
127 | // Mock timers
128 | vi.useFakeTimers();
129 |
130 | // Mock logger
131 | vi.spyOn(logger, "info").mockImplementation(vi.fn());
132 | vi.spyOn(logger, "warn").mockImplementation(vi.fn());
133 | vi.spyOn(logger, "error").mockImplementation(vi.fn());
134 | vi.spyOn(logger, "debug").mockImplementation(vi.fn());
135 |
136 | // Create a new instance of the service
137 | service = new McpClientService();
138 | });
139 |
140 | afterEach(() => {
141 | // Restore originals
142 | global.setInterval = originalSetInterval;
143 | global.clearInterval = originalClearInterval;
144 |
145 | // Restore all mocks
146 | vi.restoreAllMocks();
147 | vi.useRealTimers();
148 | });
149 |
150 | describe("Constructor", () => {
151 | it("should initialize with empty connection maps", () => {
152 | expect(
153 | (service as McpClientServicePrivate).activeSseConnections.size
154 | ).toBe(0);
155 | expect(
156 | (service as McpClientServicePrivate).activeStdioConnections.size
157 | ).toBe(0);
158 | expect(
159 | (service as McpClientServicePrivate).pendingStdioRequests.size
160 | ).toBe(0);
161 | });
162 |
163 | it("should set up a cleanup interval", () => {
164 | expect(vi.getTimerCount()).toBeGreaterThan(0);
165 | });
166 | });
167 |
168 | describe("connect", () => {
169 | it("should validate serverId properly", async () => {
170 | await expect(
171 | service.connect("", { type: "sse", sseUrl: "http://test-url.com" })
172 | ).rejects.toThrow(SdkMcpError);
173 | await expect(
174 | service.connect("", { type: "sse", sseUrl: "http://test-url.com" })
175 | ).rejects.toThrow(/Server ID must be a non-empty string/);
176 | });
177 |
178 | it("should validate connection details properly", async () => {
179 | await expect(service.connect("server1", null as any)).rejects.toThrow(
180 | SdkMcpError
181 | );
182 | await expect(service.connect("server1", null as any)).rejects.toThrow(
183 | /Connection details must be an object/
184 | );
185 | });
186 |
187 | it("should validate connection type properly", async () => {
188 | // Using type assertion to test invalid inputs
189 | await expect(
190 | service.connect("server1", {
191 | type: "invalid" as unknown as "sse" | "stdio",
192 | })
193 | ).rejects.toThrow(SdkMcpError);
194 | // Using type assertion to test invalid inputs
195 | await expect(
196 | service.connect("server1", {
197 | type: "invalid" as unknown as "sse" | "stdio",
198 | })
199 | ).rejects.toThrow(/Connection type must be 'sse' or 'stdio'/);
200 | });
201 |
202 | it("should validate SSE URL properly", async () => {
203 | await expect(
204 | service.connect("server1", { type: "sse", sseUrl: "" })
205 | ).rejects.toThrow(SdkMcpError);
206 | await expect(
207 | service.connect("server1", { type: "sse", sseUrl: "" })
208 | ).rejects.toThrow(/sseUrl must be a non-empty string/);
209 |
210 | await expect(
211 | service.connect("server1", { type: "sse", sseUrl: "invalid-url" })
212 | ).rejects.toThrow(SdkMcpError);
213 | await expect(
214 | service.connect("server1", { type: "sse", sseUrl: "invalid-url" })
215 | ).rejects.toThrow(/valid URL format/);
216 | });
217 |
218 | it("should validate stdio command properly", async () => {
219 | await expect(
220 | service.connect("server1", { type: "stdio", stdioCommand: "" })
221 | ).rejects.toThrow(SdkMcpError);
222 | await expect(
223 | service.connect("server1", { type: "stdio", stdioCommand: "" })
224 | ).rejects.toThrow(/stdioCommand must be a non-empty string/);
225 | });
226 |
227 | it("should establish an SSE connection successfully", async () => {
228 | const connectPromise = service.connect("server1", {
229 | type: "sse",
230 | sseUrl: "http://test-server.com/sse",
231 | });
232 |
233 | // Wait for the EventSource to be created and callbacks to be assigned
234 | await new Promise((resolve) => setTimeout(resolve, 10));
235 |
236 | // Simulate successful connection by calling the onopen callback
237 | if (mockEventSourceInstance && mockEventSourceInstance.onopen) {
238 | mockEventSourceInstance.onopen({} as MockEvent);
239 | }
240 |
241 | const connectionId = await connectPromise;
242 |
243 | expect(connectionId).toBe(TEST_UUID);
244 | expect(
245 | (service as McpClientServicePrivate).activeSseConnections.size
246 | ).toBe(1);
247 |
248 | // Check correct parameters were used
249 | expect(EventSourceConstructor).toHaveBeenCalledWith(
250 | "http://test-server.com/sse"
251 | );
252 | });
253 |
254 | it("should establish a stdio connection successfully", async () => {
255 | const connectionId = await service.connect("server1", {
256 | type: "stdio",
257 | stdioCommand: "test-command",
258 | stdioArgs: ["arg1", "arg2"],
259 | });
260 |
261 | expect(connectionId).toBe(TEST_UUID);
262 | expect(
263 | (service as McpClientServicePrivate).activeStdioConnections.size
264 | ).toBe(1);
265 |
266 | // Check correct parameters were used
267 | expect(mockSpawn).toHaveBeenCalledWith(
268 | "test-command",
269 | ["arg1", "arg2"],
270 | expect.anything()
271 | );
272 | });
273 | });
274 |
275 | describe("cleanupStaleConnections", () => {
276 | it("should close stale SSE connections", async () => {
277 | // Create a connection
278 | const connectPromise = (service as McpClientServicePrivate).connectSse(
279 | "http://test-server.com/sse"
280 | );
281 | mockEventSourceInstance.onopen &&
282 | mockEventSourceInstance.onopen({} as MockEvent);
283 | const connectionId = await connectPromise;
284 |
285 | // Verify connection exists
286 | expect(
287 | (service as McpClientServicePrivate).activeSseConnections.size
288 | ).toBe(1);
289 |
290 | // Set the last activity timestamp to be stale (10 minutes + 1 second ago)
291 | const staleTimestamp = Date.now() - (600000 + 1000);
292 | (service as McpClientServicePrivate).activeSseConnections.get(
293 | connectionId
294 | ).lastActivityTimestamp = staleTimestamp;
295 |
296 | // Call the cleanup method
297 | (service as McpClientServicePrivate).cleanupStaleConnections();
298 |
299 | // Verify connection was closed
300 | expect(mockEventSourceInstance.close).toHaveBeenCalled();
301 | expect(
302 | (service as McpClientServicePrivate).activeSseConnections.size
303 | ).toBe(0);
304 | });
305 |
306 | it("should close stale stdio connections", async () => {
307 | // Create a connection
308 | const connectionId = await (
309 | service as McpClientServicePrivate
310 | ).connectStdio("test-command");
311 |
312 | // Verify connection exists
313 | expect(
314 | (service as McpClientServicePrivate).activeStdioConnections.size
315 | ).toBe(1);
316 |
317 | // Set the last activity timestamp to be stale (10 minutes + 1 second ago)
318 | const staleTimestamp = Date.now() - (600000 + 1000);
319 | (service as McpClientServicePrivate).activeStdioConnections.get(
320 | connectionId
321 | ).lastActivityTimestamp = staleTimestamp;
322 |
323 | // Call the cleanup method
324 | (service as McpClientServicePrivate).cleanupStaleConnections();
325 |
326 | // Verify connection was closed
327 | expect(mockChildProcess.kill).toHaveBeenCalled();
328 | expect(
329 | (service as McpClientServicePrivate).activeStdioConnections.size
330 | ).toBe(0);
331 | });
332 |
333 | it("should not close active connections", async () => {
334 | // Create a connection
335 | const connectPromise = (service as McpClientServicePrivate).connectSse(
336 | "http://test-server.com/sse"
337 | );
338 | mockEventSourceInstance.onopen &&
339 | mockEventSourceInstance.onopen({} as MockEvent);
340 | await connectPromise;
341 |
342 | // Verify connection exists (with current timestamp)
343 | expect(
344 | (service as McpClientServicePrivate).activeSseConnections.size
345 | ).toBe(1);
346 |
347 | // Call the cleanup method
348 | (service as McpClientServicePrivate).cleanupStaleConnections();
349 |
350 | // Verify connection was not closed
351 | expect(mockEventSourceInstance.close).not.toHaveBeenCalled();
352 | expect(
353 | (service as McpClientServicePrivate).activeSseConnections.size
354 | ).toBe(1);
355 | });
356 | });
357 |
358 | describe("SSE Connections", () => {
359 | const testUrl = "http://test-server.com/sse";
360 |
361 | it("should create an EventSource and return a connection ID when successful", async () => {
362 | const connectPromise = (service as McpClientServicePrivate).connectSse(
363 | testUrl
364 | );
365 |
366 | // Trigger the onopen event to simulate successful connection
367 | mockEventSourceInstance.onopen &&
368 | mockEventSourceInstance.onopen({} as MockEvent);
369 |
370 | const connectionId = await connectPromise;
371 |
372 | // Check EventSource was constructed with the correct URL
373 | expect(EventSourceConstructor).toHaveBeenCalledWith(testUrl);
374 |
375 | // Check the connection ID is returned
376 | expect(connectionId).toBe(TEST_UUID);
377 |
378 | // Check the connection was stored with last activity timestamp
379 | expect(
380 | (service as McpClientServicePrivate).activeSseConnections.size
381 | ).toBe(1);
382 | expect(
383 | (service as McpClientServicePrivate).activeSseConnections.has(TEST_UUID)
384 | ).toBe(true);
385 |
386 | const connection = (
387 | service as McpClientServicePrivate
388 | ).activeSseConnections.get(TEST_UUID);
389 | expect(connection.lastActivityTimestamp).toBeGreaterThan(0);
390 | });
391 |
392 | it("should handle SSE messages and pass them to the messageHandler", async () => {
393 | const messageHandler = vi.fn();
394 | const testData = { foo: "bar" };
395 |
396 | const connectPromise = (service as McpClientServicePrivate).connectSse(
397 | testUrl,
398 | messageHandler
399 | );
400 | // Manually trigger the onopen callback to resolve the connection promise
401 | mockEventSourceInstance.onopen &&
402 | mockEventSourceInstance.onopen({} as MockEvent);
403 | await connectPromise;
404 |
405 | // Get the initial activity timestamp
406 | const initialTimestamp = (
407 | service as McpClientServicePrivate
408 | ).activeSseConnections.get(TEST_UUID).lastActivityTimestamp;
409 |
410 | // Store original timestamp so we can mock a newer one
411 | const originalTimestamp = Date.now;
412 | // Mock Date.now to return a later timestamp
413 | Date.now = vi.fn().mockReturnValue(initialTimestamp + 1000);
414 |
415 | // Trigger the onmessage event with test data
416 | const messageEvent = { data: JSON.stringify(testData) };
417 | mockEventSourceInstance.onmessage &&
418 | mockEventSourceInstance.onmessage(messageEvent as MessageEvent);
419 |
420 | // Verify message handler was called with parsed data
421 | expect(messageHandler).toHaveBeenCalledWith(testData);
422 |
423 | // Verify last activity timestamp was updated
424 | const newTimestamp = (
425 | service as McpClientServicePrivate
426 | ).activeSseConnections.get(TEST_UUID).lastActivityTimestamp;
427 | expect(newTimestamp).toBeGreaterThan(initialTimestamp);
428 |
429 | // Restore original Date.now
430 | Date.now = originalTimestamp;
431 | });
432 |
433 | it("should handle SSE message parse errors and pass raw data to the messageHandler", async () => {
434 | const messageHandler = vi.fn();
435 | const invalidJson = "{ not valid json";
436 |
437 | const connectPromise = (service as McpClientServicePrivate).connectSse(
438 | testUrl,
439 | messageHandler
440 | );
441 | // Manually trigger the onopen callback to resolve the connection promise
442 | mockEventSourceInstance.onopen &&
443 | mockEventSourceInstance.onopen({} as MockEvent);
444 | await connectPromise;
445 |
446 | // Store original timestamp and mock it
447 | const originalTimestamp = Date.now;
448 | Date.now = vi.fn().mockReturnValue(Date.now() + 1000);
449 |
450 | // Trigger the onmessage event with invalid JSON
451 | const messageEvent = { data: invalidJson };
452 | mockEventSourceInstance.onmessage &&
453 | mockEventSourceInstance.onmessage(messageEvent as MessageEvent);
454 |
455 | // Verify message handler was called with raw data
456 | expect(messageHandler).toHaveBeenCalledWith(invalidJson);
457 |
458 | // Restore original Date.now
459 | Date.now = originalTimestamp;
460 | });
461 |
462 | it("should reject the promise when an SSE error occurs before connection", async () => {
463 | const connectPromise = (service as McpClientServicePrivate).connectSse(
464 | testUrl
465 | );
466 |
467 | // Trigger the onerror event before onopen
468 | const errorEvent: MockEvent = {
469 | type: "error",
470 | message: "Connection failed",
471 | };
472 | mockEventSourceInstance.onerror &&
473 | mockEventSourceInstance.onerror(errorEvent);
474 |
475 | // Expect the promise to reject
476 | await expect(connectPromise).rejects.toThrow(SdkMcpError);
477 | await expect(connectPromise).rejects.toThrow(
478 | /Failed to establish SSE connection/
479 | );
480 |
481 | // Verify no connection was stored
482 | expect(
483 | (service as McpClientServicePrivate).activeSseConnections.size
484 | ).toBe(0);
485 | });
486 |
487 | it("should close and remove the connection when an SSE error occurs after connection", async () => {
488 | // Successfully connect first
489 | const connectPromise = (service as McpClientServicePrivate).connectSse(
490 | testUrl
491 | );
492 | mockEventSourceInstance.onopen &&
493 | mockEventSourceInstance.onopen({} as MockEvent);
494 | const connectionId = await connectPromise;
495 |
496 | // Verify connection exists before error
497 | expect(
498 | (service as McpClientServicePrivate).activeSseConnections.size
499 | ).toBe(1);
500 | expect(
501 | (service as McpClientServicePrivate).activeSseConnections.has(
502 | connectionId
503 | )
504 | ).toBe(true);
505 |
506 | // Update readyState to simulate a connected then closed state
507 | mockEventSourceInstance.readyState = EVENT_SOURCE_STATES.CLOSED;
508 |
509 | // Trigger an error after successful connection
510 | const errorEvent: MockEvent = {
511 | type: "error",
512 | message: "Connection lost",
513 | };
514 | mockEventSourceInstance.onerror &&
515 | mockEventSourceInstance.onerror(errorEvent);
516 |
517 | // Verify connection was removed
518 | expect(
519 | (service as McpClientServicePrivate).activeSseConnections.size
520 | ).toBe(0);
521 | expect(
522 | (service as McpClientServicePrivate).activeSseConnections.has(
523 | connectionId
524 | )
525 | ).toBe(false);
526 | });
527 |
528 | it("should close an SSE connection on disconnect", async () => {
529 | // Reset mocks before this test to ensure clean state
530 | vi.clearAllMocks();
531 | service = new McpClientService();
532 |
533 | // In this test we're going to directly set up the activeSseConnections map to match the test scenario
534 | // This is necessary because the implementation uses the connectionId for storage and lookup
535 | const connectionId = TEST_UUID;
536 |
537 | // Manually set up the connection in the map
538 | (service as McpClientServicePrivate).activeSseConnections.set(
539 | connectionId,
540 | {
541 | eventSource: mockEventSourceInstance,
542 | baseUrl: testUrl,
543 | lastActivityTimestamp: Date.now(),
544 | }
545 | );
546 |
547 | // Verify connection exists before disconnecting
548 | expect(
549 | (service as McpClientServicePrivate).activeSseConnections.size
550 | ).toBe(1);
551 | expect(
552 | (service as McpClientServicePrivate).activeSseConnections.has(
553 | connectionId
554 | )
555 | ).toBe(true);
556 |
557 | // Disconnect
558 | const result = service.disconnect(connectionId);
559 |
560 | // Verify connection was closed
561 | expect(result).toBe(true);
562 | expect(mockEventSourceInstance.close).toHaveBeenCalled();
563 | expect(
564 | (service as McpClientServicePrivate).activeSseConnections.size
565 | ).toBe(0);
566 | });
567 |
568 | it("should throw an error when disconnecting from a non-existent connection", () => {
569 | expect(() => service.disconnect("non-existent-server")).toThrow(
570 | SdkMcpError
571 | );
572 | expect(() => service.disconnect("non-existent-server")).toThrow(
573 | /Connection not found/
574 | );
575 | });
576 | });
577 |
578 | // Additional tests for callTool, listTools, etc. would follow the same pattern
579 | });
580 |
```
--------------------------------------------------------------------------------
/tests/unit/services/gemini/GeminiChatService.test.vitest.ts:
--------------------------------------------------------------------------------
```typescript
1 | // Using vitest globals - see vitest.config.ts globals: true
2 | import { GeminiChatService } from "../../../../src/services/gemini/GeminiChatService.js";
3 | import {
4 | GeminiApiError,
5 | ValidationError as GeminiValidationError,
6 | } from "../../../../src/utils/errors.js";
7 |
8 | // Import necessary types
9 | import type {
10 | GenerateContentResponse,
11 | GenerationConfig,
12 | Content,
13 | SafetySetting,
14 | GoogleGenAI,
15 | } from "@google/genai";
16 |
17 | // Import the ChatSession type from our service
18 | import { ChatSession } from "../../../../src/services/gemini/GeminiTypes.js";
19 | import { FinishReason } from "../../../../src/types/googleGenAITypes.js";
20 |
21 | // Helper type for accessing private properties in tests
22 | type GeminiChatServiceTestAccess = {
23 | chatSessions: Map<string, ChatSession>;
24 | };
25 |
26 | // Define a partial version of GenerateContentResponse for mocking
27 | interface PartialGenerateContentResponse
28 | extends Partial<GenerateContentResponse> {
29 | response?: {
30 | candidates?: Array<{
31 | content?: {
32 | role?: string;
33 | parts?: Array<{
34 | text?: string;
35 | functionCall?: Record<string, unknown>;
36 | }>;
37 | };
38 | finishReason?: FinishReason;
39 | }>;
40 | promptFeedback?: {
41 | blockReason?: string;
42 | };
43 | };
44 | model?: string;
45 | contents?: Array<Content>;
46 | generationConfig?: GenerationConfig;
47 | safetySettings?: Array<SafetySetting>;
48 | candidates?: Array<{
49 | content?: {
50 | role?: string;
51 | parts?: Array<{
52 | text?: string;
53 | functionCall?: Record<string, unknown>;
54 | }>;
55 | };
56 | finishReason?: FinishReason;
57 | }>;
58 | text?: string;
59 | }
60 |
61 | // Mock uuid
62 | vi.mock("uuid", () => ({
63 | v4: () => "test-session-id",
64 | }));
65 |
66 | describe("GeminiChatService", () => {
67 | let chatService: GeminiChatService;
68 | const defaultModel = "gemini-1.5-pro";
69 |
70 | // Mock the GoogleGenAI class
71 | const mockGenerateContent = vi
72 | .fn()
73 | .mockResolvedValue({} as PartialGenerateContentResponse);
74 | const mockGoogleGenAI = {
75 | models: {
76 | generateContent: mockGenerateContent,
77 | getGenerativeModel: vi.fn().mockReturnValue({
78 | generateContent: mockGenerateContent,
79 | }),
80 | // Mock the required internal methods
81 | apiClient: {} as unknown,
82 | generateContentInternal: vi.fn(),
83 | generateContentStreamInternal: vi.fn(),
84 | },
85 | // Add other required properties for GoogleGenAI
86 | apiClient: {} as unknown,
87 | vertexai: {} as unknown,
88 | live: {} as unknown,
89 | chats: {} as unknown,
90 | upload: {} as unknown,
91 | caching: {} as unknown,
92 | } as unknown as GoogleGenAI;
93 |
94 | beforeEach(() => {
95 | // Reset mocks before each test
96 | vi.clearAllMocks();
97 |
98 | // Initialize chat service with mocked dependencies
99 | chatService = new GeminiChatService(mockGoogleGenAI, defaultModel);
100 | });
101 |
102 | describe("startChatSession", () => {
103 | it("should create a new chat session with default model when no model is provided", () => {
104 | const sessionId = chatService.startChatSession({});
105 |
106 | expect(sessionId).toBe("test-session-id");
107 |
108 | // Get the session from private map using proper type assertion
109 | const sessions = (chatService as unknown as GeminiChatServiceTestAccess)
110 | .chatSessions;
111 | const session = sessions.get("test-session-id") as ChatSession;
112 |
113 | expect(session.model).toBe(defaultModel);
114 | expect(session.history).toEqual([]);
115 | expect(session.config).toBeDefined();
116 | });
117 |
118 | it("should create a new chat session with provided model", () => {
119 | const customModel = "gemini-1.5-flash";
120 | const sessionId = chatService.startChatSession({
121 | modelName: customModel,
122 | });
123 |
124 | expect(sessionId).toBe("test-session-id");
125 |
126 | // Get the session from private map with proper type assertion
127 | const sessions = (chatService as unknown as GeminiChatServiceTestAccess)
128 | .chatSessions;
129 | const session = sessions.get("test-session-id") as ChatSession;
130 |
131 | expect(session.model).toBe(customModel);
132 | });
133 |
134 | it("should include history if provided", () => {
135 | const history = [
136 | { role: "user", parts: [{ text: "Hello" }] },
137 | { role: "model", parts: [{ text: "Hi there" }] },
138 | ];
139 |
140 | chatService.startChatSession({ history });
141 |
142 | // Get the session from private map with proper type assertion
143 | const sessions = (chatService as unknown as GeminiChatServiceTestAccess)
144 | .chatSessions;
145 | const session = sessions.get("test-session-id") as ChatSession;
146 |
147 | expect(session.history).toEqual(history);
148 | expect(session.config.history).toEqual(history);
149 | });
150 |
151 | it("should convert string systemInstruction to Content object", () => {
152 | const systemInstruction = "You are a helpful assistant";
153 |
154 | chatService.startChatSession({ systemInstruction });
155 |
156 | // Get the session from private map with proper type assertion
157 | const sessions = (chatService as unknown as GeminiChatServiceTestAccess)
158 | .chatSessions;
159 | const session = sessions.get("test-session-id") as ChatSession;
160 |
161 | expect(session.config.systemInstruction).toEqual({
162 | parts: [{ text: systemInstruction }],
163 | });
164 | });
165 |
166 | it("should throw when no model name is available", () => {
167 | // Create a service with no default model
168 | const noDefaultService = new GeminiChatService(
169 | mockGoogleGenAI as GoogleGenAI
170 | );
171 |
172 | expect(() => noDefaultService.startChatSession({})).toThrow(
173 | GeminiApiError
174 | );
175 | expect(() => noDefaultService.startChatSession({})).toThrow(
176 | "Model name must be provided"
177 | );
178 | });
179 | });
180 |
181 | describe("sendMessageToSession", () => {
182 | beforeEach(() => {
183 | // Create a test session first
184 | chatService.startChatSession({});
185 | });
186 |
187 | it("should send a message to an existing session", async () => {
188 | // Mock successful response with proper typing
189 | const mockResponse: PartialGenerateContentResponse = {
190 | candidates: [
191 | {
192 | content: {
193 | role: "model",
194 | parts: [{ text: "Hello, how can I help you?" }],
195 | },
196 | },
197 | ],
198 | text: "Hello, how can I help you?",
199 | };
200 | mockGenerateContent.mockResolvedValueOnce(mockResponse);
201 |
202 | const response = await chatService.sendMessageToSession({
203 | sessionId: "test-session-id",
204 | message: "Hi there",
205 | });
206 |
207 | // Verify generateContent was called with correct params
208 | expect(mockGenerateContent).toHaveBeenCalledTimes(1);
209 | const requestConfig = (
210 | mockGenerateContent.mock.calls[0] as unknown[]
211 | )[0] as Record<string, unknown>;
212 | expect(requestConfig.model).toBe(defaultModel);
213 | expect(requestConfig.contents).toBeDefined();
214 |
215 | // Just verify the message exists somewhere in the contents
216 | const contents = requestConfig.contents as Array<Record<string, unknown>>;
217 | const userContent = contents.find(
218 | (content: Record<string, unknown>) =>
219 | content.role === "user" &&
220 | (content.parts as Array<{ text?: string }>)?.[0]?.text === "Hi there"
221 | );
222 | expect(userContent).toBeDefined();
223 |
224 | // Verify response
225 | expect(response).toEqual(mockResponse);
226 |
227 | // Check that history was updated in the session
228 | const sessions = (chatService as unknown as GeminiChatServiceTestAccess)
229 | .chatSessions;
230 | const session = sessions.get("test-session-id") as ChatSession;
231 | expect(session.history.length).toBe(2); // User + model response
232 | });
233 |
234 | it("should throw when session doesn't exist", async () => {
235 | await expect(
236 | chatService.sendMessageToSession({
237 | sessionId: "non-existent-session",
238 | message: "Hi there",
239 | })
240 | ).rejects.toThrow(GeminiApiError);
241 |
242 | await expect(
243 | chatService.sendMessageToSession({
244 | sessionId: "non-existent-session",
245 | message: "Hi there",
246 | })
247 | ).rejects.toThrow("Chat session not found");
248 | });
249 |
250 | it("should apply per-message configuration options", async () => {
251 | // Mock successful response with proper typing
252 | const emptyResponse: PartialGenerateContentResponse = {};
253 | mockGenerateContent.mockResolvedValueOnce(emptyResponse);
254 |
255 | const generationConfig = { temperature: 0.7 };
256 | const safetySettings = [
257 | {
258 | category: "HARM_CATEGORY_HARASSMENT",
259 | threshold: "BLOCK_MEDIUM_AND_ABOVE",
260 | },
261 | ];
262 |
263 | await chatService.sendMessageToSession({
264 | sessionId: "test-session-id",
265 | message: "Hi there",
266 | generationConfig,
267 | safetySettings: safetySettings as SafetySetting[],
268 | });
269 |
270 | // Verify configuration was applied
271 | const requestConfig = (
272 | mockGenerateContent.mock.calls[0] as unknown[]
273 | )[0] as Record<string, unknown>;
274 | expect(requestConfig.generationConfig).toEqual(generationConfig);
275 | expect(requestConfig.safetySettings).toEqual(safetySettings);
276 | });
277 | });
278 |
279 | describe("sendFunctionResultToSession", () => {
280 | beforeEach(() => {
281 | // Create a test session first
282 | chatService.startChatSession({});
283 | });
284 |
285 | it("should send a function result to an existing session", async () => {
286 | // Mock successful response with proper typing
287 | const mockResponse: PartialGenerateContentResponse = {
288 | candidates: [
289 | {
290 | content: {
291 | role: "model",
292 | parts: [{ text: "I've processed that function result" }],
293 | },
294 | },
295 | ],
296 | };
297 | mockGenerateContent.mockResolvedValueOnce(mockResponse);
298 |
299 | const response = await chatService.sendFunctionResultToSession({
300 | sessionId: "test-session-id",
301 | functionResponse: '{"result": "success"}',
302 | functionCall: { name: "testFunction" },
303 | });
304 |
305 | // Verify generateContent was called with correct params
306 | expect(mockGenerateContent).toHaveBeenCalledTimes(1);
307 | const requestConfig = (
308 | mockGenerateContent.mock.calls[0] as unknown[]
309 | )[0] as Record<string, unknown>;
310 |
311 | // Verify content contains function response
312 | const contents = requestConfig.contents as Array<Record<string, unknown>>;
313 | const functionResponseContent = contents.find(
314 | (c: Record<string, unknown>) => c.role === "function"
315 | );
316 | expect(functionResponseContent).toBeDefined();
317 | const parts = (functionResponseContent as Record<string, unknown>)
318 | .parts as Array<Record<string, unknown>>;
319 | const functionResponse = parts[0].functionResponse as Record<
320 | string,
321 | unknown
322 | >;
323 | expect(functionResponse.name).toBe("testFunction");
324 |
325 | // Verify response
326 | expect(response).toEqual(mockResponse);
327 |
328 | // Check that history was updated in the session
329 | const sessions = (chatService as unknown as GeminiChatServiceTestAccess)
330 | .chatSessions;
331 | const session = sessions.get("test-session-id") as ChatSession;
332 | expect(session.history.length).toBe(2); // Function call + model response
333 | });
334 |
335 | it("should throw when session doesn't exist", async () => {
336 | await expect(
337 | chatService.sendFunctionResultToSession({
338 | sessionId: "non-existent-session",
339 | functionResponse: "{}",
340 | })
341 | ).rejects.toThrow(GeminiApiError);
342 |
343 | await expect(
344 | chatService.sendFunctionResultToSession({
345 | sessionId: "non-existent-session",
346 | functionResponse: "{}",
347 | })
348 | ).rejects.toThrow("Chat session not found");
349 | });
350 | });
351 |
352 | describe("routeMessage", () => {
353 | it("should validate input parameters", async () => {
354 | // Invalid parameters to trigger validation error
355 | await expect(
356 | chatService.routeMessage({
357 | message: "", // Empty message
358 | models: [], // Empty models array
359 | } as Parameters<typeof chatService.routeMessage>[0])
360 | ).rejects.toThrow(GeminiValidationError);
361 | });
362 |
363 | it("should use the first model to do routing and selected model for the message", async () => {
364 | // Mock successful routing response
365 | const routingResponse: PartialGenerateContentResponse = {
366 | text: "gemini-1.5-flash",
367 | };
368 | mockGenerateContent.mockResolvedValueOnce(routingResponse);
369 |
370 | // Mock successful content response
371 | const contentResponse: PartialGenerateContentResponse = {
372 | text: "Response from flash model",
373 | candidates: [
374 | {
375 | content: {
376 | parts: [{ text: "Response from flash model" }],
377 | },
378 | },
379 | ],
380 | };
381 | mockGenerateContent.mockResolvedValueOnce(contentResponse);
382 |
383 | const result = await chatService.routeMessage({
384 | message: "What is the capital of France?",
385 | models: ["gemini-1.5-pro", "gemini-1.5-flash"],
386 | });
387 |
388 | // Verify routing was done with the first model
389 | expect(mockGenerateContent).toHaveBeenCalledTimes(2);
390 | const routingConfig = (
391 | mockGenerateContent.mock.calls[0] as unknown[]
392 | )[0] as Record<string, unknown>;
393 | expect(routingConfig.model).toBe("gemini-1.5-pro");
394 | const routingContents = routingConfig.contents as Array<
395 | Record<string, unknown>
396 | >;
397 | const parts = routingContents[0].parts as Array<Record<string, unknown>>;
398 | expect(parts[0].text).toContain("router");
399 |
400 | // Verify final request used the chosen model
401 | const messageConfig = (
402 | mockGenerateContent.mock.calls[1] as unknown[]
403 | )[0] as Record<string, unknown>;
404 | expect(messageConfig.model).toBe("gemini-1.5-flash");
405 |
406 | // Verify result contains both response and chosen model
407 | expect(result.response).toBeDefined();
408 | expect(result.chosenModel).toBe("gemini-1.5-flash");
409 | });
410 |
411 | it("should use default model if routing fails to identify a model", async () => {
412 | // Mock routing response that doesn't match any model
413 | const unknownModelResponse: PartialGenerateContentResponse = {
414 | text: "unknown-model",
415 | };
416 | mockGenerateContent.mockResolvedValueOnce(unknownModelResponse);
417 |
418 | // Mock successful content response
419 | const defaultModelResponse: PartialGenerateContentResponse = {
420 | text: "Response from default model",
421 | candidates: [
422 | {
423 | content: {
424 | parts: [{ text: "Response from default model" }],
425 | },
426 | },
427 | ],
428 | };
429 | mockGenerateContent.mockResolvedValueOnce(defaultModelResponse);
430 |
431 | const result = await chatService.routeMessage({
432 | message: "What is the capital of France?",
433 | models: ["gemini-1.5-pro", "gemini-1.5-flash"],
434 | defaultModel: "gemini-1.5-pro",
435 | });
436 |
437 | // Verify final request used the default model
438 | const messageConfig = (
439 | mockGenerateContent.mock.calls[1] as unknown[]
440 | )[0] as Record<string, unknown>;
441 | expect(messageConfig.model).toBe("gemini-1.5-pro");
442 | expect(result.chosenModel).toBe("gemini-1.5-pro");
443 | });
444 |
445 | it("should throw if routing fails and no default model is provided", async () => {
446 | // Mock routing response that doesn't match any model
447 | const failedRoutingResponse: PartialGenerateContentResponse = {
448 | text: "unknown-model",
449 | };
450 | mockGenerateContent.mockResolvedValueOnce(failedRoutingResponse);
451 |
452 | await expect(
453 | chatService.routeMessage({
454 | message: "What is the capital of France?",
455 | models: ["gemini-1.5-pro", "gemini-1.5-flash"],
456 | })
457 | ).rejects.toThrow(GeminiApiError);
458 |
459 | await expect(
460 | chatService.routeMessage({
461 | message: "What is the capital of France?",
462 | models: ["gemini-1.5-pro", "gemini-1.5-flash"],
463 | })
464 | ).rejects.toThrow(/Routing failed|Failed to route message/);
465 | });
466 |
467 | it("should use custom routing prompt if provided", async () => {
468 | // Mock successful routing and content responses
469 | const customPromptRoutingResponse: PartialGenerateContentResponse = {
470 | text: "gemini-1.5-flash",
471 | };
472 | mockGenerateContent.mockResolvedValueOnce(customPromptRoutingResponse);
473 |
474 | const customPromptContentResponse: PartialGenerateContentResponse = {
475 | text: "Response",
476 | };
477 | mockGenerateContent.mockResolvedValueOnce(customPromptContentResponse);
478 |
479 | const customPrompt = "Choose the most performant model for this request";
480 |
481 | await chatService.routeMessage({
482 | message: "What is the capital of France?",
483 | models: ["gemini-1.5-pro", "gemini-1.5-flash"],
484 | routingPrompt: customPrompt,
485 | });
486 |
487 | // Verify routing was done with the custom prompt
488 | const routingConfig = (
489 | mockGenerateContent.mock.calls[0] as unknown[]
490 | )[0] as Record<string, unknown>;
491 | const routingContents = routingConfig.contents as Array<
492 | Record<string, unknown>
493 | >;
494 | const parts = routingContents[0].parts as Array<Record<string, unknown>>;
495 | const promptText = parts[0].text;
496 | expect(promptText).toContain(customPrompt);
497 | });
498 |
499 | it("should pass system instruction to both routing and content requests", async () => {
500 | // Mock successful routing and content responses
501 | const customPromptRoutingResponse: PartialGenerateContentResponse = {
502 | text: "gemini-1.5-flash",
503 | };
504 | mockGenerateContent.mockResolvedValueOnce(customPromptRoutingResponse);
505 |
506 | const customPromptContentResponse: PartialGenerateContentResponse = {
507 | text: "Response",
508 | };
509 | mockGenerateContent.mockResolvedValueOnce(customPromptContentResponse);
510 |
511 | const systemInstruction = "You are a helpful assistant";
512 |
513 | await chatService.routeMessage({
514 | message: "What is the capital of France?",
515 | models: ["gemini-1.5-pro", "gemini-1.5-flash"],
516 | systemInstruction,
517 | });
518 |
519 | // Verify system instruction was added to routing request
520 | const routingConfig = (
521 | mockGenerateContent.mock.calls[0] as unknown[]
522 | )[0] as Record<string, unknown>;
523 | const routingContents = routingConfig.contents as Array<
524 | Record<string, unknown>
525 | >;
526 | expect(routingContents[0].role).toBe("system");
527 | const routingParts = routingContents[0].parts as Array<
528 | Record<string, unknown>
529 | >;
530 | expect(routingParts[0].text).toBe(systemInstruction);
531 |
532 | // Verify system instruction was added to content request
533 | const messageConfig = (
534 | mockGenerateContent.mock.calls[1] as unknown[]
535 | )[0] as Record<string, unknown>;
536 | const messageContents = messageConfig.contents as Array<
537 | Record<string, unknown>
538 | >;
539 | expect(messageContents[0].role).toBe("system");
540 | const messageParts = messageContents[0].parts as Array<
541 | Record<string, unknown>
542 | >;
543 | expect(messageParts[0].text).toBe(systemInstruction);
544 | });
545 | });
546 | });
547 |
```
--------------------------------------------------------------------------------
/src/services/gemini/GeminiUrlContextService.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { ConfigurationManager } from "../../config/ConfigurationManager.js";
2 | import { logger } from "../../utils/logger.js";
3 | import {
4 | GeminiUrlFetchError,
5 | GeminiUrlValidationError,
6 | } from "../../utils/geminiErrors.js";
7 | import { UrlSecurityService } from "../../utils/UrlSecurityService.js";
8 | import { RetryService } from "../../utils/RetryService.js";
9 | import type { Content } from "@google/genai";
10 |
11 | export interface UrlFetchOptions {
12 | maxContentLength?: number; // Max bytes to fetch
13 | timeout?: number; // Fetch timeout in ms
14 | headers?: Record<string, string>;
15 | allowedDomains?: string[]; // Domain whitelist
16 | includeMetadata?: boolean; // Include URL metadata in response
17 | convertToMarkdown?: boolean; // Convert HTML to markdown
18 | followRedirects?: number; // Max redirects to follow
19 | userAgent?: string; // Custom user agent
20 | }
21 |
22 | export interface UrlContentMetadata {
23 | url: string;
24 | finalUrl?: string; // After redirects
25 | title?: string;
26 | description?: string;
27 | contentType: string;
28 | contentLength?: number;
29 | fetchedAt: Date;
30 | truncated: boolean;
31 | responseTime: number; // ms
32 | statusCode: number;
33 | encoding?: string;
34 | language?: string;
35 | canonicalUrl?: string;
36 | ogImage?: string;
37 | favicon?: string;
38 | }
39 |
40 | export interface UrlContentResult {
41 | content: string;
42 | metadata: UrlContentMetadata;
43 | }
44 |
45 | export interface UrlBatchResult {
46 | successful: UrlContentResult[];
47 | failed: Array<{
48 | url: string;
49 | error: Error;
50 | errorCode: string;
51 | }>;
52 | summary: {
53 | totalUrls: number;
54 | successCount: number;
55 | failureCount: number;
56 | totalContentSize: number;
57 | averageResponseTime: number;
58 | };
59 | }
60 |
61 | /**
62 | * Advanced URL Context Service for Gemini API integration
63 | * Handles URL fetching, content extraction, security validation, and metadata processing
64 | */
65 | export class GeminiUrlContextService {
66 | private readonly securityService: UrlSecurityService;
67 | private readonly retryService: RetryService;
68 | private readonly urlCache = new Map<
69 | string,
70 | { result: UrlContentResult; expiry: number }
71 | >();
72 | private readonly rateLimiter = new Map<
73 | string,
74 | { count: number; resetTime: number }
75 | >();
76 |
77 | constructor(private readonly config: ConfigurationManager) {
78 | this.securityService = new UrlSecurityService(config);
79 | this.retryService = new RetryService({
80 | maxAttempts: 3,
81 | initialDelayMs: 1000,
82 | maxDelayMs: 5000,
83 | backoffFactor: 2,
84 | });
85 | }
86 |
87 | /**
88 | * Fetch content from a single URL with comprehensive error handling and metadata extraction
89 | */
90 | async fetchUrlContent(
91 | url: string,
92 | options: UrlFetchOptions = {}
93 | ): Promise<UrlContentResult> {
94 | const startTime = Date.now();
95 |
96 | try {
97 | // Validate URL security and format
98 | await this.securityService.validateUrl(url, options.allowedDomains);
99 |
100 | // Check rate limiting
101 | this.checkRateLimit(url);
102 |
103 | // Check cache first
104 | const cached = this.getCachedResult(url);
105 | if (cached) {
106 | logger.debug("Returning cached URL content", { url });
107 | return cached;
108 | }
109 |
110 | // Fetch with retry logic
111 | const result = await this.retryService.execute(() =>
112 | this.performUrlFetch(url, options, startTime)
113 | );
114 |
115 | // Cache successful result
116 | this.cacheResult(url, result);
117 |
118 | // Update rate limiter
119 | this.updateRateLimit(url);
120 |
121 | return result;
122 | } catch (error) {
123 | const responseTime = Date.now() - startTime;
124 | logger.error("Failed to fetch URL content", {
125 | url,
126 | error: error instanceof Error ? error.message : String(error),
127 | responseTime,
128 | });
129 |
130 | if (
131 | error instanceof GeminiUrlFetchError ||
132 | error instanceof GeminiUrlValidationError
133 | ) {
134 | throw error;
135 | }
136 |
137 | throw new GeminiUrlFetchError(
138 | `Failed to fetch URL: ${error instanceof Error ? error.message : String(error)}`,
139 | url,
140 | undefined,
141 | error instanceof Error ? error : undefined
142 | );
143 | }
144 | }
145 |
146 | /**
147 | * Process multiple URLs in parallel with intelligent batching and error handling
148 | */
149 | async processUrlsForContext(
150 | urls: string[],
151 | options: UrlFetchOptions = {}
152 | ): Promise<{ contents: Content[]; batchResult: UrlBatchResult }> {
153 | if (urls.length === 0) {
154 | throw new Error("No URLs provided for processing");
155 | }
156 |
157 | const urlConfig = this.config.getUrlContextConfig();
158 | if (urls.length > urlConfig.maxUrlsPerRequest) {
159 | throw new Error(
160 | `Too many URLs: ${urls.length}. Maximum allowed: ${urlConfig.maxUrlsPerRequest}`
161 | );
162 | }
163 |
164 | const startTime = Date.now();
165 | const successful: UrlContentResult[] = [];
166 | const failed: Array<{ url: string; error: Error; errorCode: string }> = [];
167 |
168 | // Process URLs in controlled batches to prevent overwhelming target servers
169 | const batchSize = Math.min(5, urls.length);
170 | const batches = this.createBatches(urls, batchSize);
171 |
172 | for (const batch of batches) {
173 | const batchPromises = batch.map(async (url) => {
174 | try {
175 | const result = await this.fetchUrlContent(url, options);
176 | successful.push(result);
177 | return { success: true, url, result };
178 | } catch (error) {
179 | const errorInfo = {
180 | url,
181 | error: error instanceof Error ? error : new Error(String(error)),
182 | errorCode: this.getErrorCode(error),
183 | };
184 | failed.push(errorInfo);
185 | return { success: false, url, error: errorInfo };
186 | }
187 | });
188 |
189 | // Wait for current batch before processing next
190 | await Promise.allSettled(batchPromises);
191 |
192 | // Small delay between batches to be respectful to servers
193 | if (batches.indexOf(batch) < batches.length - 1) {
194 | await this.delay(200);
195 | }
196 | }
197 |
198 | const totalTime = Date.now() - startTime;
199 | const totalContentSize = successful.reduce(
200 | (sum, result) => sum + result.content.length,
201 | 0
202 | );
203 | const averageResponseTime =
204 | successful.length > 0
205 | ? successful.reduce(
206 | (sum, result) => sum + result.metadata.responseTime,
207 | 0
208 | ) / successful.length
209 | : 0;
210 |
211 | const batchResult: UrlBatchResult = {
212 | successful,
213 | failed,
214 | summary: {
215 | totalUrls: urls.length,
216 | successCount: successful.length,
217 | failureCount: failed.length,
218 | totalContentSize,
219 | averageResponseTime,
220 | },
221 | };
222 |
223 | // Convert successful results to Gemini Content format
224 | const contents = this.convertToGeminiContent(successful, options);
225 |
226 | logger.info("URL batch processing completed", {
227 | totalUrls: urls.length,
228 | successful: successful.length,
229 | failed: failed.length,
230 | totalTime,
231 | totalContentSize,
232 | });
233 |
234 | return { contents, batchResult };
235 | }
236 |
237 | /**
238 | * Perform the actual URL fetch with comprehensive metadata extraction
239 | */
240 | private async performUrlFetch(
241 | url: string,
242 | options: UrlFetchOptions,
243 | startTime: number
244 | ): Promise<UrlContentResult> {
245 | const urlConfig = this.config.getUrlContextConfig();
246 | const fetchOptions = {
247 | method: "GET",
248 | timeout: options.timeout || urlConfig.defaultTimeoutMs,
249 | headers: {
250 | "User-Agent":
251 | options.userAgent ||
252 | "MCP-Gemini-Server/1.0 (+hhttps://github.com/bsmi021/mcp-gemini-server)",
253 | Accept:
254 | "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
255 | "Accept-Language": "en-US,en;q=0.5",
256 | "Accept-Encoding": "gzip, deflate, br",
257 | "Cache-Control": "no-cache",
258 | Pragma: "no-cache",
259 | ...options.headers,
260 | },
261 | redirect: "follow" as RequestRedirect,
262 | follow: options.followRedirects || 3,
263 | size: options.maxContentLength || urlConfig.defaultMaxContentKb * 1024,
264 | };
265 |
266 | const response = await fetch(url, fetchOptions);
267 | const responseTime = Date.now() - startTime;
268 |
269 | if (!response.ok) {
270 | throw new GeminiUrlFetchError(
271 | `HTTP ${response.status}: ${response.statusText}`,
272 | url,
273 | response.status
274 | );
275 | }
276 |
277 | const contentType = response.headers.get("content-type") || "text/html";
278 | const encoding = this.extractEncodingFromContentType(contentType);
279 |
280 | // Check content type - only process text-based content
281 | if (!this.isTextBasedContent(contentType)) {
282 | throw new GeminiUrlFetchError(
283 | `Unsupported content type: ${contentType}`,
284 | url,
285 | response.status
286 | );
287 | }
288 |
289 | let rawContent = await response.text();
290 | const actualSize = Buffer.byteLength(rawContent, "utf8");
291 | const maxSize =
292 | options.maxContentLength || urlConfig.defaultMaxContentKb * 1024;
293 | let truncated = false;
294 |
295 | // Truncate if content is too large
296 | if (actualSize > maxSize) {
297 | rawContent = rawContent.substring(0, maxSize);
298 | truncated = true;
299 | }
300 |
301 | // Extract metadata from HTML
302 | const metadata = await this.extractMetadata(
303 | rawContent,
304 | url,
305 | response,
306 | responseTime,
307 | truncated,
308 | encoding
309 | );
310 |
311 | // Process content based on type and options
312 | let processedContent = rawContent;
313 | if (
314 | contentType.includes("text/html") &&
315 | (options.convertToMarkdown ?? urlConfig.convertToMarkdown)
316 | ) {
317 | processedContent = this.convertHtmlToMarkdown(rawContent);
318 | }
319 |
320 | // Clean and optimize content
321 | processedContent = this.cleanContent(processedContent);
322 |
323 | return {
324 | content: processedContent,
325 | metadata,
326 | };
327 | }
328 |
329 | /**
330 | * Extract comprehensive metadata from HTML content and HTTP response
331 | */
332 | private async extractMetadata(
333 | content: string,
334 | originalUrl: string,
335 | response: Response,
336 | responseTime: number,
337 | truncated: boolean,
338 | encoding?: string
339 | ): Promise<UrlContentMetadata> {
340 | const contentType = response.headers.get("content-type") || "";
341 | const contentLength = parseInt(
342 | response.headers.get("content-length") || "0"
343 | );
344 |
345 | const metadata: UrlContentMetadata = {
346 | url: originalUrl,
347 | finalUrl: response.url !== originalUrl ? response.url : undefined,
348 | contentType,
349 | contentLength: contentLength || content.length,
350 | fetchedAt: new Date(),
351 | truncated,
352 | responseTime,
353 | statusCode: response.status,
354 | encoding,
355 | };
356 |
357 | // Extract HTML metadata if content is HTML
358 | if (contentType.includes("text/html")) {
359 | const htmlMetadata = this.extractHtmlMetadata(content);
360 | Object.assign(metadata, htmlMetadata);
361 | }
362 |
363 | return metadata;
364 | }
365 |
366 | /**
367 | * Extract structured metadata from HTML content
368 | */
369 | private extractHtmlMetadata(html: string): Partial<UrlContentMetadata> {
370 | const metadata: Partial<UrlContentMetadata> = {};
371 |
372 | // Extract title
373 | const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
374 | if (titleMatch) {
375 | metadata.title = this.cleanText(titleMatch[1]);
376 | }
377 |
378 | // Extract meta description
379 | const descMatch = html.match(
380 | /<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']/i
381 | );
382 | if (descMatch) {
383 | metadata.description = this.cleanText(descMatch[1]);
384 | }
385 |
386 | // Extract language
387 | const langMatch =
388 | html.match(/<html[^>]+lang=["']([^"']+)["']/i) ||
389 | html.match(
390 | /<meta[^>]+http-equiv=["']content-language["'][^>]+content=["']([^"']+)["']/i
391 | );
392 | if (langMatch) {
393 | metadata.language = langMatch[1];
394 | }
395 |
396 | // Extract canonical URL
397 | const canonicalMatch = html.match(
398 | /<link[^>]+rel=["']canonical["'][^>]+href=["']([^"']+)["']/i
399 | );
400 | if (canonicalMatch) {
401 | metadata.canonicalUrl = canonicalMatch[1];
402 | }
403 |
404 | // Extract Open Graph image
405 | const ogImageMatch = html.match(
406 | /<meta[^>]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i
407 | );
408 | if (ogImageMatch) {
409 | metadata.ogImage = ogImageMatch[1];
410 | }
411 |
412 | // Extract favicon
413 | const faviconMatch = html.match(
414 | /<link[^>]+rel=["'](?:icon|shortcut icon)["'][^>]+href=["']([^"']+)["']/i
415 | );
416 | if (faviconMatch) {
417 | metadata.favicon = faviconMatch[1];
418 | }
419 |
420 | return metadata;
421 | }
422 |
423 | /**
424 | * Convert HTML content to clean markdown
425 | */
426 | private convertHtmlToMarkdown(html: string): string {
427 | // Remove script and style tags entirely
428 | html = html.replace(/<(script|style)[^>]*>[\s\S]*?<\/\1>/gi, "");
429 |
430 | // Remove comments
431 | html = html.replace(/<!--[\s\S]*?-->/g, "");
432 |
433 | // Convert headings
434 | html = html.replace(
435 | /<h([1-6])[^>]*>(.*?)<\/h\1>/gi,
436 | (_, level, content) => {
437 | const hashes = "#".repeat(parseInt(level));
438 | return `\n\n${hashes} ${this.cleanText(content)}\n\n`;
439 | }
440 | );
441 |
442 | // Convert paragraphs
443 | html = html.replace(/<p[^>]*>(.*?)<\/p>/gi, "\n\n$1\n\n");
444 |
445 | // Convert line breaks
446 | html = html.replace(/<br\s*\/?>/gi, "\n");
447 |
448 | // Convert lists
449 | html = html.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (_, content) => {
450 | return content.replace(/<li[^>]*>(.*?)<\/li>/gi, "- $1\n");
451 | });
452 |
453 | html = html.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (_, content) => {
454 | let counter = 1;
455 | return content.replace(
456 | /<li[^>]*>(.*?)<\/li>/gi,
457 | (_: string, itemContent: string) => `${counter++}. ${itemContent}\n`
458 | );
459 | });
460 |
461 | // Convert links
462 | html = html.replace(
463 | /<a[^>]+href=["']([^"']+)["'][^>]*>(.*?)<\/a>/gi,
464 | "[$2]($1)"
465 | );
466 |
467 | // Convert emphasis
468 | html = html.replace(/<(strong|b)[^>]*>(.*?)<\/\1>/gi, "**$2**");
469 | html = html.replace(/<(em|i)[^>]*>(.*?)<\/\1>/gi, "*$2*");
470 |
471 | // Convert code
472 | html = html.replace(/<code[^>]*>(.*?)<\/code>/gi, "`$1`");
473 | html = html.replace(/<pre[^>]*>(.*?)<\/pre>/gi, "\n```\n$1\n```\n");
474 |
475 | // Convert blockquotes
476 | html = html.replace(
477 | /<blockquote[^>]*>(.*?)<\/blockquote>/gi,
478 | (_, content) => {
479 | return content
480 | .split("\n")
481 | .map((line: string) => `> ${line}`)
482 | .join("\n");
483 | }
484 | );
485 |
486 | // Remove remaining HTML tags
487 | html = html.replace(/<[^>]+>/g, "");
488 |
489 | // Clean up the text
490 | return this.cleanContent(html);
491 | }
492 |
493 | /**
494 | * Clean and normalize text content
495 | */
496 | private cleanContent(content: string): string {
497 | // Decode HTML entities
498 | content = content
499 | .replace(/&/g, "&")
500 | .replace(/</g, "<")
501 | .replace(/>/g, ">")
502 | .replace(/"/g, '"')
503 | .replace(/'/g, "'")
504 | .replace(/ /g, " ")
505 | .replace(/—/g, "—")
506 | .replace(/–/g, "–")
507 | .replace(/…/g, "…");
508 |
509 | // Normalize whitespace
510 | content = content
511 | .replace(/\r\n/g, "\n")
512 | .replace(/\r/g, "\n")
513 | .replace(/\t/g, " ")
514 | .replace(/[ ]+/g, " ")
515 | .replace(/\n[ ]+/g, "\n")
516 | .replace(/[ ]+\n/g, "\n")
517 | .replace(/\n{3,}/g, "\n\n");
518 |
519 | // Trim and return
520 | return content.trim();
521 | }
522 |
523 | /**
524 | * Clean text by removing extra whitespace and HTML entities
525 | */
526 | private cleanText(text: string): string {
527 | return text
528 | .replace(/&/g, "&")
529 | .replace(/</g, "<")
530 | .replace(/>/g, ">")
531 | .replace(/"/g, '"')
532 | .replace(/'/g, "'")
533 | .replace(/ /g, " ")
534 | .replace(/\s+/g, " ")
535 | .trim();
536 | }
537 |
538 | /**
539 | * Convert URL content results to Gemini Content format
540 | */
541 | private convertToGeminiContent(
542 | results: UrlContentResult[],
543 | options: UrlFetchOptions
544 | ): Content[] {
545 | const includeMetadata = options.includeMetadata ?? true;
546 | const contents: Content[] = [];
547 |
548 | for (const result of results) {
549 | // Create content with URL context header
550 | let contentText = `## Content from ${result.metadata.url}\n\n`;
551 |
552 | if (includeMetadata && result.metadata.title) {
553 | contentText += `**Title:** ${result.metadata.title}\n\n`;
554 | }
555 |
556 | if (includeMetadata && result.metadata.description) {
557 | contentText += `**Description:** ${result.metadata.description}\n\n`;
558 | }
559 |
560 | contentText += result.content;
561 |
562 | contents.push({
563 | role: "user",
564 | parts: [
565 | {
566 | text: contentText,
567 | },
568 | ],
569 | });
570 | }
571 |
572 | return contents;
573 | }
574 |
575 | /**
576 | * Utility methods for caching, rate limiting, and validation
577 | */
578 | private getCachedResult(url: string): UrlContentResult | null {
579 | const cached = this.urlCache.get(url);
580 | if (cached && Date.now() < cached.expiry) {
581 | return cached.result;
582 | }
583 | this.urlCache.delete(url);
584 | return null;
585 | }
586 |
587 | private cacheResult(url: string, result: UrlContentResult): void {
588 | const cacheExpiry = Date.now() + 15 * 60 * 1000; // 15 minutes
589 | this.urlCache.set(url, { result, expiry: cacheExpiry });
590 |
591 | // Clean up expired cache entries
592 | if (this.urlCache.size > 1000) {
593 | const now = Date.now();
594 | for (const [key, value] of this.urlCache.entries()) {
595 | if (now >= value.expiry) {
596 | this.urlCache.delete(key);
597 | }
598 | }
599 | }
600 | }
601 |
602 | private checkRateLimit(url: string): void {
603 | const domain = new URL(url).hostname;
604 | const now = Date.now();
605 | const limit = this.rateLimiter.get(domain);
606 |
607 | if (limit) {
608 | if (now < limit.resetTime) {
609 | if (limit.count >= 10) {
610 | // Max 10 requests per minute per domain
611 | throw new GeminiUrlFetchError(
612 | `Rate limit exceeded for domain: ${domain}`,
613 | url
614 | );
615 | }
616 | } else {
617 | // Reset counter
618 | this.rateLimiter.set(domain, { count: 0, resetTime: now + 60000 });
619 | }
620 | } else {
621 | this.rateLimiter.set(domain, { count: 0, resetTime: now + 60000 });
622 | }
623 | }
624 |
625 | private updateRateLimit(url: string): void {
626 | const domain = new URL(url).hostname;
627 | const limit = this.rateLimiter.get(domain);
628 | if (limit) {
629 | limit.count++;
630 | }
631 | }
632 |
633 | private shouldRetryFetch(error: unknown): boolean {
634 | if (error instanceof GeminiUrlValidationError) {
635 | return false; // Don't retry validation errors
636 | }
637 |
638 | if (error instanceof GeminiUrlFetchError) {
639 | const status = error.statusCode;
640 | // Retry on server errors and certain client errors
641 | return !status || status >= 500 || status === 429 || status === 408;
642 | }
643 |
644 | return true; // Retry network errors
645 | }
646 |
647 | private createBatches<T>(items: T[], batchSize: number): T[][] {
648 | const batches: T[][] = [];
649 | for (let i = 0; i < items.length; i += batchSize) {
650 | batches.push(items.slice(i, i + batchSize));
651 | }
652 | return batches;
653 | }
654 |
655 | private delay(ms: number): Promise<void> {
656 | return new Promise((resolve) => setTimeout(resolve, ms));
657 | }
658 |
659 | private extractEncodingFromContentType(
660 | contentType: string
661 | ): string | undefined {
662 | const match = contentType.match(/charset=([^;]+)/i);
663 | return match ? match[1].toLowerCase() : undefined;
664 | }
665 |
666 | private isTextBasedContent(contentType: string): boolean {
667 | const textTypes = [
668 | "text/html",
669 | "text/plain",
670 | "text/xml",
671 | "text/markdown",
672 | "application/xml",
673 | "application/xhtml+xml",
674 | "application/json",
675 | "application/ld+json",
676 | ];
677 |
678 | return textTypes.some((type) => contentType.toLowerCase().includes(type));
679 | }
680 |
681 | private getErrorCode(error: unknown): string {
682 | if (error instanceof GeminiUrlValidationError) {
683 | return "VALIDATION_ERROR";
684 | }
685 | if (error instanceof GeminiUrlFetchError) {
686 | return error.statusCode ? `HTTP_${error.statusCode}` : "FETCH_ERROR";
687 | }
688 | return "UNKNOWN_ERROR";
689 | }
690 | }
691 |
```