mario-andreschak/mcp_video_recognition # codebase.md

# Directory Structure

```
├── .env.example
├── .gitignore
├── LICENSE
├── package.json
├── README.md
├── src
│   ├── index.ts
│   ├── server.ts
│   ├── services
│   │   └── gemini.ts
│   ├── tools
│   │   ├── audio-recognition.ts
│   │   ├── image-recognition.ts
│   │   └── video-recognition.ts
│   ├── types
│   │   └── index.ts
│   └── utils
│       └── logger.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------

```
 1 | # Google Gemini API key (required)
 2 | GOOGLE_API_KEY=your_api_key_here
 3 | 
 4 | # Transport type: 'stdio' or 'sse' (defaults to 'stdio')
 5 | TRANSPORT_TYPE=stdio
 6 | 
 7 | # Port for SSE transport (defaults to 3000)
 8 | PORT=3000
 9 | 
10 | # Log level: 'verbose', 'debug', 'info', 'warn', 'error', 'fatal' (defaults to 'fatal')
11 | LOG_LEVEL=fatal
12 | 
```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
 1 | # Dependencies
 2 | node_modules/
 3 | package-lock.json
 4 | yarn.lock
 5 | pnpm-lock.yaml
 6 | 
 7 | # Build output
 8 | dist/
 9 | build/
10 | *.tsbuildinfo
11 | 
12 | # Environment variables
13 | .env
14 | .env.local
15 | .env.*.local
16 | 
17 | # Logs
18 | logs/
19 | *.log
20 | npm-debug.log*
21 | yarn-debug.log*
22 | yarn-error.log*
23 | 
24 | # Editor directories and files
25 | .idea/
26 | .vscode/
27 | *.suo
28 | *.ntvs*
29 | *.njsproj
30 | *.sln
31 | *.sw?
32 | 
33 | # OS files
34 | .DS_Store
35 | Thumbs.db
36 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | # MCP Video Recognition Server
  2 | 
  3 | An MCP (Model Context Protocol) server that provides tools for image, audio, and video recognition using Google's Gemini AI.
  4 | 
  5 | <a href="https://glama.ai/mcp/servers/@mario-andreschak/mcp_video_recognition">
  6 |   <img width="380" height="200" src="https://glama.ai/mcp/servers/@mario-andreschak/mcp_video_recognition/badge" alt="Video Recognition Server MCP server" />
  7 | </a>
  8 | 
  9 | ## Features
 10 | 
 11 | - **Image Recognition**: Analyze and describe images using Google Gemini AI
 12 | - **Audio Recognition**: Analyze and transcribe audio using Google Gemini AI
 13 | - **Video Recognition**: Analyze and describe videos using Google Gemini AI
 14 | 
 15 | ## Prerequisites
 16 | 
 17 | - Node.js 18 or higher
 18 | - Google Gemini API key
 19 | 
 20 | ## Installation
 21 | 
 22 | ### Manual Installation
 23 | 
 24 | 1. Clone the repository:
 25 |    ```bash
 26 |    git clone https://github.com/yourusername/mcp-video-recognition.git
 27 |    cd mcp-video-recognition
 28 |    ```
 29 | 
 30 | 2. Install dependencies:
 31 |    ```bash
 32 |    npm install
 33 |    ```
 34 | 
 35 | 3. Build the project:
 36 |    ```bash
 37 |    npm run build
 38 |    ```
 39 | 
 40 | ### Installing in [FLUJO](https://github.com/mario-andreschak/FLUJO/)
 41 | 
 42 | 1. Click Add Server
 43 | 2. Copy & Paste Github URL into FLUJO
 44 | 3. Click Parse, Clone, Install, Build and Save.
 45 | 
 46 | ### Installing via Configuration Files
 47 | 
 48 | To integrate this MCP server with Cline or other MCP clients via configuration files:
 49 | 
 50 | 1. Open your Cline settings:
 51 |    - In VS Code, go to File -> Preferences -> Settings
 52 |    - Search for "Cline MCP Settings"
 53 |    - Click "Edit in settings.json"
 54 | 
 55 | 2. Add the server configuration to the `mcpServers` object:
 56 |    ```json
 57 |    {
 58 |      "mcpServers": {
 59 |        "video-recognition": {
 60 |          "command": "node",
 61 |          "args": [
 62 |            "/path/to/mcp-video-recognition/dist/index.js"
 63 |          ],
 64 |          "disabled": false,
 65 |          "autoApprove": []
 66 |        }
 67 |      }
 68 |    }
 69 |    ```
 70 | 
 71 | 3. Replace `/path/to/mcp-video-recognition/dist/index.js` with the actual path to the `index.js` file in your project directory. Use forward slashes (/) or double backslashes (\\\\) for the path on Windows.
 72 | 
 73 | 4. Save the settings file. Cline should automatically connect to the server.
 74 | 
 75 | ## Configuration
 76 | 
 77 | The server is configured using environment variables:
 78 | 
 79 | - `GOOGLE_API_KEY` (required): Your Google Gemini API key
 80 | - `TRANSPORT_TYPE`: Transport type to use (`stdio` or `sse`, defaults to `stdio`)
 81 | - `PORT`: Port number for SSE transport (defaults to 3000)
 82 | - `LOG_LEVEL`: Logging level (`verbose`, `debug`, `info`, `warn`, `error`, defaults to `info`)
 83 | 
 84 | ## Usage
 85 | 
 86 | ### Starting the Server
 87 | 
 88 | #### With stdio Transport (Default)
 89 | 
 90 | ```bash
 91 | GOOGLE_API_KEY=your_api_key npm start
 92 | ```
 93 | 
 94 | #### With SSE Transport
 95 | 
 96 | ```bash
 97 | GOOGLE_API_KEY=your_api_key TRANSPORT_TYPE=sse PORT=3000 npm start
 98 | ```
 99 | 
100 | ### Using the Tools
101 | 
102 | The server provides three tools that can be called by MCP clients:
103 | 
104 | #### Image Recognition
105 | 
106 | ```json
107 | {
108 |   "name": "image_recognition",
109 |   "arguments": {
110 |     "filepath": "/path/to/image.jpg",
111 |     "prompt": "Describe this image in detail",
112 |     "modelname": "gemini-2.0-flash"
113 |   }
114 | }
115 | ```
116 | 
117 | #### Audio Recognition
118 | 
119 | ```json
120 | {
121 |   "name": "audio_recognition",
122 |   "arguments": {
123 |     "filepath": "/path/to/audio.mp3",
124 |     "prompt": "Transcribe this audio",
125 |     "modelname": "gemini-2.0-flash"
126 |   }
127 | }
128 | ```
129 | 
130 | #### Video Recognition
131 | 
132 | ```json
133 | {
134 |   "name": "video_recognition",
135 |   "arguments": {
136 |     "filepath": "/path/to/video.mp4",
137 |     "prompt": "Describe what happens in this video",
138 |     "modelname": "gemini-2.0-flash"
139 |   }
140 | }
141 | ```
142 | 
143 | ### Tool Parameters
144 | 
145 | All tools accept the following parameters:
146 | 
147 | - `filepath` (required): Path to the media file to analyze
148 | - `prompt` (optional): Custom prompt for the recognition (defaults to "Describe this content")
149 | - `modelname` (optional): Gemini model to use for recognition (defaults to "gemini-2.0-flash")
150 | 
151 | ## Development
152 | 
153 | ### Running in Development Mode
154 | 
155 | ```bash
156 | GOOGLE_API_KEY=your_api_key npm run dev
157 | ```
158 | 
159 | ### Project Structure
160 | 
161 | - `src/index.ts`: Entry point
162 | - `src/server.ts`: MCP server implementation
163 | - `src/tools/`: Tool implementations
164 | - `src/services/`: Service implementations (Gemini API)
165 | - `src/types/`: Type definitions
166 | - `src/utils/`: Utility functions
167 | 
168 | ## License
169 | 
170 | MIT
```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "NodeNext",
 5 |     "moduleResolution": "NodeNext",
 6 |     "esModuleInterop": true,
 7 |     "strict": true,
 8 |     "outDir": "dist",
 9 |     "sourceMap": true,
10 |     "declaration": true,
11 |     "skipLibCheck": true,
12 |     "forceConsistentCasingInFileNames": true,
13 |     "resolveJsonModule": true
14 |   },
15 |   "include": ["src/**/*"],
16 |   "exclude": ["node_modules", "dist"]
17 | }
18 | 
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "mcp-video-recognition",
 3 |   "version": "1.0.0",
 4 |   "description": "MCP server for Google Gemini image, audio, and video recognition",
 5 |   "main": "dist/index.js",
 6 |   "type": "module",
 7 |   "scripts": {
 8 |     "build": "tsc",
 9 |     "start": "node dist/index.js",
10 |     "dev": "tsc -w & node --watch dist/index.js",
11 |     "debug": "tsc & npx @modelcontextprotocol/inspector node dist/index.js",
12 |     "lint": "eslint src --ext .ts",
13 |     "test": "echo \"Error: no test specified\" && exit 1"
14 |   },
15 |   "keywords": [
16 |     "mcp",
17 |     "gemini",
18 |     "video",
19 |     "audio",
20 |     "image",
21 |     "recognition"
22 |   ],
23 |   "author": "",
24 |   "license": "MIT",
25 |   "dependencies": {
26 |     "@google/genai": "^0.9.0",
27 |     "@modelcontextprotocol/sdk": "^1.10.1",
28 |     "express": "^5.1.0",
29 |     "zod": "^3.24.3"
30 |   },
31 |   "devDependencies": {
32 |     "@types/express": "^5.0.1",
33 |     "@types/node": "^22.14.1",
34 |     "typescript": "^5.8.3"
35 |   }
36 | }
37 | 
```

--------------------------------------------------------------------------------
/src/utils/logger.ts:
--------------------------------------------------------------------------------

```typescript
 1 | /**
 2 |  * Logger utility for the MCP server
 3 |  */
 4 | 
 5 | export enum LogLevel {
 6 |   VERBOSE = 'verbose',
 7 |   DEBUG = 'debug',
 8 |   INFO = 'info',
 9 |   WARN = 'warn',
10 |   ERROR = 'error',
11 |   FATAL = 'fatal'
12 | }
13 | 
14 | export class Logger {
15 |   private readonly name: string;
16 |   private static level: LogLevel = LogLevel.FATAL;
17 | 
18 |   constructor(name: string) {
19 |     this.name = name;
20 |   }
21 | 
22 |   static setLogLevel(level: LogLevel): void {
23 |     Logger.level = level;
24 |   }
25 | 
26 |   private shouldLog(level: LogLevel): boolean {
27 |     const levels = Object.values(LogLevel);
28 |     return levels.indexOf(level) >= levels.indexOf(Logger.level);
29 |   }
30 | 
31 |   private formatMessage(level: LogLevel, message: string): string {
32 |     const timestamp = new Date().toISOString();
33 |     return `[${timestamp}] [${level.toUpperCase()}] [${this.name}] ${message}`;
34 |   }
35 | 
36 |   verbose(message: string, data?: unknown): void {
37 |     if (this.shouldLog(LogLevel.VERBOSE)) {
38 |       const formattedData = data ? JSON.stringify(data) : '';
39 |       console.log(this.formatMessage(LogLevel.VERBOSE, message), formattedData);
40 |     }
41 |   }
42 | 
43 |   debug(message: string, data?: unknown): void {
44 |     if (this.shouldLog(LogLevel.DEBUG)) {
45 |       console.log(this.formatMessage(LogLevel.DEBUG, message), data || '');
46 |     }
47 |   }
48 | 
49 |   info(message: string, data?: unknown): void {
50 |     if (this.shouldLog(LogLevel.INFO)) {
51 |       console.log(this.formatMessage(LogLevel.INFO, message), data || '');
52 |     }
53 |   }
54 | 
55 |   warn(message: string, data?: unknown): void {
56 |     if (this.shouldLog(LogLevel.WARN)) {
57 |       console.warn(this.formatMessage(LogLevel.WARN, message), data || '');
58 |     }
59 |   }
60 | 
61 |   error(message: string, error?: unknown): void {
62 |     if (this.shouldLog(LogLevel.ERROR)) {
63 |       console.error(this.formatMessage(LogLevel.ERROR, message), error || '');
64 |     }
65 |   }
66 | 
67 |   fatal(message: string, error?: unknown): void {
68 |     if (this.shouldLog(LogLevel.FATAL)) {
69 |       console.error(this.formatMessage(LogLevel.FATAL, message), error || '');
70 |     }
71 |   }
72 | }
73 | 
74 | export const createLogger = (name: string): Logger => new Logger(name);
75 | 
```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
 1 | /**
 2 |  * Entry point for the MCP video recognition server
 3 |  */
 4 | 
 5 | import { Server } from './server.js';
 6 | import { createLogger, LogLevel, Logger } from './utils/logger.js';
 7 | import type { ServerConfig } from './server.js';
 8 | 
 9 | const log = createLogger('Main');
10 | 
11 | // Set log level from environment variable
12 | const logLevel = ( process.env.LOG_LEVEL || LogLevel.FATAL ) as LogLevel;
13 | Logger.setLogLevel(logLevel as LogLevel);
14 | 
15 | /**
16 |  * Load configuration from environment variables
17 |  */
18 | function loadConfig(): ServerConfig {
19 |   // Check for required environment variables
20 |   const apiKey = process.env.GOOGLE_API_KEY;
21 |   if (!apiKey) {
22 |     throw new Error('GOOGLE_API_KEY environment variable is required');
23 |   }
24 | 
25 |   // Determine transport type
26 |   const transportType = process.env.TRANSPORT_TYPE === 'sse' ? 'sse' : 'stdio';
27 |   
28 |   // Parse port if provided
29 |   const portStr = process.env.PORT;
30 |   const port = portStr ? parseInt(portStr, 10) : undefined;
31 |   
32 |   return {
33 |     gemini: {
34 |       apiKey
35 |     },
36 |     transport: transportType,
37 |     port
38 |   };
39 | }
40 | 
41 | /**
42 |  * Main function to start the server
43 |  */
44 | async function main(): Promise<void> {
45 |   try {
46 |     log.info('Starting MCP video recognition server');
47 |     
48 |     // Load configuration
49 |     const config = loadConfig();
50 |     log.info(`Using transport: ${config.transport}`);
51 |     
52 |     // Create and start server
53 |     const server = new Server(config);
54 |     await server.start();
55 |     
56 |     // Handle process termination
57 |     process.on('SIGINT', async () => {
58 |       log.info('Received SIGINT signal, shutting down...');
59 |       await server.stop();
60 |       process.exit(0);
61 |     });
62 |     
63 |     process.on('SIGTERM', async () => {
64 |       log.info('Received SIGTERM signal, shutting down...');
65 |       await server.stop();
66 |       process.exit(0);
67 |     });
68 |     
69 |     log.info('Server started successfully');
70 |   } catch (error) {
71 |     log.error('Failed to start server', error);
72 |     process.exit(1);
73 |   }
74 | }
75 | 
76 | // Start the server
77 | main().catch(error => {
78 |   console.error('Unhandled error:', error);
79 |   process.exit(1);
80 | });
81 | 
```

--------------------------------------------------------------------------------
/src/types/index.ts:
--------------------------------------------------------------------------------

```typescript
 1 | /**
 2 |  * Type definitions for the MCP server
 3 |  */
 4 | 
 5 | import { z } from 'zod';
 6 | import type { Tool, CallToolResult } from '@modelcontextprotocol/sdk/types.js';
 7 | 
 8 | /**
 9 |  * Common parameters for all recognition tools
10 |  */
11 | export const RecognitionParamsSchema = z.object({
12 |   filepath: z.string().describe('Path to the media file to analyze'),
13 |   prompt: z.string().default('Describe this content').describe('Custom prompt for the recognition'),
14 |   modelname: z.string().default('gemini-2.0-flash').describe('Gemini model to use for recognition')
15 | });
16 | 
17 | export type RecognitionParams = z.infer<typeof RecognitionParamsSchema>;
18 | 
19 | /**
20 |  * Video recognition specific types
21 |  */
22 | export const VideoRecognitionParamsSchema = RecognitionParamsSchema.extend({});
23 | export type VideoRecognitionParams = z.infer<typeof VideoRecognitionParamsSchema>;
24 | 
25 | /**
26 |  * Image recognition specific types
27 |  */
28 | export const ImageRecognitionParamsSchema = RecognitionParamsSchema.extend({});
29 | export type ImageRecognitionParams = z.infer<typeof ImageRecognitionParamsSchema>;
30 | 
31 | /**
32 |  * Audio recognition specific types
33 |  */
34 | export const AudioRecognitionParamsSchema = RecognitionParamsSchema.extend({});
35 | export type AudioRecognitionParams = z.infer<typeof AudioRecognitionParamsSchema>;
36 | 
37 | /**
38 |  * Tool definitions
39 |  */
40 | export interface ToolDefinition {
41 |   name: string;
42 |   description: string;
43 |   inputSchema: z.ZodObject<any>;
44 |   callback: (args: any) => Promise<CallToolResult>;
45 | }
46 | 
47 | /**
48 |  * Gemini API types
49 |  */
50 | export interface GeminiConfig {
51 |   apiKey: string;
52 | }
53 | 
54 | export interface GeminiFile {
55 |   uri: string;
56 |   mimeType: string;
57 |   name?: string;
58 |   state?: string;
59 | }
60 | 
61 | export interface ProcessedGeminiFile {
62 |   uri: string;
63 |   mimeType: string;
64 |   name: string;
65 |   state: string;
66 | }
67 | 
68 | export interface CachedFile {
69 |   fileId: string;
70 |   checksum: string;
71 |   uri: string;
72 |   mimeType: string;
73 |   name: string;
74 |   state: string;
75 |   timestamp: number;
76 | }
77 | 
78 | // File states from Gemini API
79 | export enum FileState {
80 |   UNSPECIFIED = 'STATE_UNSPECIFIED',
81 |   PROCESSING = 'PROCESSING',
82 |   ACTIVE = 'ACTIVE',
83 |   FAILED = 'FAILED'
84 | }
85 | 
86 | export interface GeminiResponse {
87 |   text: string;
88 |   isError?: boolean;
89 | }
90 | 
```

--------------------------------------------------------------------------------
/src/tools/audio-recognition.ts:
--------------------------------------------------------------------------------

```typescript
 1 | /**
 2 |  * Audio recognition tool for MCP server
 3 |  */
 4 | 
 5 | import { z } from 'zod';
 6 | import { createLogger } from '../utils/logger.js';
 7 | import { GeminiService } from '../services/gemini.js';
 8 | import { AudioRecognitionParamsSchema } from '../types/index.js';
 9 | import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
10 | import type { AudioRecognitionParams } from '../types/index.js';
11 | import * as fs from 'node:fs';
12 | import * as path from 'node:path';
13 | 
14 | const log = createLogger('AudioRecognitionTool');
15 | 
16 | export const createAudioRecognitionTool = (geminiService: GeminiService) => {
17 |   return {
18 |     name: 'audio_recognition',
19 |     description: 'Analyze and transcribe audio using Google Gemini AI',
20 |     inputSchema: AudioRecognitionParamsSchema,
21 |     callback: async (args: AudioRecognitionParams): Promise<CallToolResult> => {
22 |       try {
23 |         log.info(`Processing audio recognition request for file: ${args.filepath}`);
24 |         log.verbose('Audio recognition request', JSON.stringify(args));
25 |         
26 |         // Verify file exists
27 |         if (!fs.existsSync(args.filepath)) {
28 |           throw new Error(`Audio file not found: ${args.filepath}`);
29 |         }
30 |         
31 |         // Verify file is an audio
32 |         const ext = path.extname(args.filepath).toLowerCase();
33 |         if (!['.mp3', '.wav', '.ogg'].includes(ext)) {
34 |           throw new Error(`Unsupported audio format: ${ext}. Supported formats are: .mp3, .wav, .ogg`);
35 |         }
36 |         
37 |         // Default prompt if not provided
38 |         const prompt = args.prompt || 'Describe this audio';
39 |         const modelName = args.modelname || 'gemini-2.0-flash';
40 |         
41 |         // Upload the file
42 |         log.info('Uploading audio file...');
43 |         const file = await geminiService.uploadFile(args.filepath);
44 |         
45 |         // Process with Gemini
46 |         log.info('Generating content from audio...');
47 |         const result = await geminiService.processFile(file, prompt, modelName);
48 |         
49 |         if (result.isError) {
50 |           log.error(`Error in audio recognition: ${result.text}`);
51 |           return {
52 |             content: [
53 |               {
54 |                 type: 'text',
55 |                 text: result.text
56 |               }
57 |             ],
58 |             isError: true
59 |           };
60 |         }
61 |         
62 |         log.info('Audio recognition completed successfully');
63 |         log.verbose('Audio recognition result', JSON.stringify(result));
64 |         
65 |         return {
66 |           content: [
67 |             {
68 |               type: 'text',
69 |               text: result.text
70 |             }
71 |           ]
72 |         };
73 |       } catch (error) {
74 |         log.error('Error in audio recognition tool', error);
75 |         const errorMessage = error instanceof Error ? error.message : String(error);
76 |         
77 |         return {
78 |           content: [
79 |             {
80 |               type: 'text',
81 |               text: `Error processing audio: ${errorMessage}`
82 |             }
83 |           ],
84 |           isError: true
85 |         };
86 |       }
87 |     }
88 |   };
89 | };
90 | 
```

--------------------------------------------------------------------------------
/src/tools/image-recognition.ts:
--------------------------------------------------------------------------------

```typescript
 1 | /**
 2 |  * Image recognition tool for MCP server
 3 |  */
 4 | 
 5 | import { z } from 'zod';
 6 | import { createLogger } from '../utils/logger.js';
 7 | import { GeminiService } from '../services/gemini.js';
 8 | import { ImageRecognitionParamsSchema } from '../types/index.js';
 9 | import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
10 | import type { ImageRecognitionParams } from '../types/index.js';
11 | import * as fs from 'node:fs';
12 | import * as path from 'node:path';
13 | 
14 | const log = createLogger('ImageRecognitionTool');
15 | 
16 | export const createImageRecognitionTool = (geminiService: GeminiService) => {
17 |   return {
18 |     name: 'image_recognition',
19 |     description: 'Analyze and describe images using Google Gemini AI',
20 |     inputSchema: ImageRecognitionParamsSchema,
21 |     callback: async (args: ImageRecognitionParams): Promise<CallToolResult> => {
22 |       try {
23 |         log.info(`Processing image recognition request for file: ${args.filepath}`);
24 |         log.verbose('Image recognition request', JSON.stringify(args));
25 |         
26 |         // Verify file exists
27 |         if (!fs.existsSync(args.filepath)) {
28 |           throw new Error(`Image file not found: ${args.filepath}`);
29 |         }
30 |         
31 |         // Verify file is an image
32 |         const ext = path.extname(args.filepath).toLowerCase();
33 |         if (!['.jpg', '.jpeg', '.png', '.webp'].includes(ext)) {
34 |           throw new Error(`Unsupported image format: ${ext}. Supported formats are: .jpg, .jpeg, .png, .webp`);
35 |         }
36 |         
37 |         // Default prompt if not provided
38 |         const prompt = args.prompt || 'Describe this image';
39 |         const modelName = args.modelname || 'gemini-2.0-flash';
40 |         
41 |         // Upload the file
42 |         log.info('Uploading image file...');
43 |         const file = await geminiService.uploadFile(args.filepath);
44 |         
45 |         // Process with Gemini
46 |         log.info('Generating content from image...');
47 |         const result = await geminiService.processFile(file, prompt, modelName);
48 |         
49 |         if (result.isError) {
50 |           log.error(`Error in image recognition: ${result.text}`);
51 |           return {
52 |             content: [
53 |               {
54 |                 type: 'text',
55 |                 text: result.text
56 |               }
57 |             ],
58 |             isError: true
59 |           };
60 |         }
61 |         
62 |         log.info('Image recognition completed successfully');
63 |         log.verbose('Image recognition result', JSON.stringify(result));
64 |         
65 |         return {
66 |           content: [
67 |             {
68 |               type: 'text',
69 |               text: result.text
70 |             }
71 |           ]
72 |         };
73 |       } catch (error) {
74 |         log.error('Error in image recognition tool', error);
75 |         const errorMessage = error instanceof Error ? error.message : String(error);
76 |         
77 |         return {
78 |           content: [
79 |             {
80 |               type: 'text',
81 |               text: `Error processing image: ${errorMessage}`
82 |             }
83 |           ],
84 |           isError: true
85 |         };
86 |       }
87 |     }
88 |   };
89 | };
90 | 
```

--------------------------------------------------------------------------------
/src/tools/video-recognition.ts:
--------------------------------------------------------------------------------

```typescript
 1 | /**
 2 |  * Video recognition tool for MCP server
 3 |  */
 4 | 
 5 | import { z } from 'zod';
 6 | import { createLogger } from '../utils/logger.js';
 7 | import { GeminiService } from '../services/gemini.js';
 8 | import { VideoRecognitionParamsSchema, FileState } from '../types/index.js';
 9 | import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
10 | import type { VideoRecognitionParams } from '../types/index.js';
11 | import * as fs from 'node:fs';
12 | import * as path from 'node:path';
13 | 
14 | const log = createLogger('VideoRecognitionTool');
15 | 
16 | export const createVideoRecognitionTool = (geminiService: GeminiService) => {
17 |   return {
18 |     name: 'video_recognition',
19 |     description: 'Analyze and describe videos using Google Gemini AI',
20 |     inputSchema: VideoRecognitionParamsSchema,
21 |     callback: async (args: VideoRecognitionParams): Promise<CallToolResult> => {
22 |       try {
23 |         log.info(`Processing video recognition request for file: ${args.filepath}`);
24 |         log.verbose('Video recognition request', JSON.stringify(args));
25 |         
26 |         // Verify file exists
27 |         if (!fs.existsSync(args.filepath)) {
28 |           throw new Error(`Video file not found: ${args.filepath}`);
29 |         }
30 |         
31 |         // Verify file is a video
32 |         const ext = path.extname(args.filepath).toLowerCase();
33 |         if (ext !== '.mp4' && ext !== '.mpeg' && ext !== '.mov' && ext !== '.avi' && ext !== '.webm') {
34 |           throw new Error(`Unsupported video format: ${ext}. Supported formats are: .mp4, .mpeg, .mov, .avi, .webm`);
35 |         }
36 |         
37 |         // Default prompt if not provided
38 |         const prompt = args.prompt || 'Describe this video';
39 |         const modelName = args.modelname || 'gemini-2.0-flash';
40 |         
41 |         // Upload the file - this will handle waiting for video processing
42 |         log.info('Uploading and processing video file...');
43 |         const file = await geminiService.uploadFile(args.filepath);
44 |         
45 |         // Process with Gemini
46 |         log.info('Video processing complete, generating content...');
47 |         const result = await geminiService.processFile(file, prompt, modelName);
48 |         
49 |         if (result.isError) {
50 |           log.error(`Error in video recognition: ${result.text}`);
51 |           return {
52 |             content: [
53 |               {
54 |                 type: 'text',
55 |                 text: result.text
56 |               }
57 |             ],
58 |             isError: true
59 |           };
60 |         }
61 |         
62 |         log.info('Video recognition completed successfully');
63 |         log.verbose('Video recognition result', JSON.stringify(result));
64 |         
65 |         return {
66 |           content: [
67 |             {
68 |               type: 'text',
69 |               text: result.text
70 |             }
71 |           ]
72 |         };
73 |       } catch (error) {
74 |         log.error('Error in video recognition tool', error);
75 |         const errorMessage = error instanceof Error ? error.message : String(error);
76 |         
77 |         return {
78 |           content: [
79 |             {
80 |               type: 'text',
81 |               text: `Error processing video: ${errorMessage}`
82 |             }
83 |           ],
84 |           isError: true
85 |         };
86 |       }
87 |     }
88 |   };
89 | };
90 | 
```

--------------------------------------------------------------------------------
/src/server.ts:
--------------------------------------------------------------------------------

```typescript
  1 | /**
  2 |  * MCP server implementation
  3 |  */
  4 | 
  5 | import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
  6 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
  7 | import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
  8 | import { randomUUID } from 'crypto';
  9 | import type { Request, Response } from 'express';
 10 | import { createLogger } from './utils/logger.js';
 11 | import { GeminiService } from './services/gemini.js';
 12 | import { createImageRecognitionTool } from './tools/image-recognition.js';
 13 | import { createAudioRecognitionTool } from './tools/audio-recognition.js';
 14 | import { createVideoRecognitionTool } from './tools/video-recognition.js';
 15 | import type { GeminiConfig } from './types/index.js';
 16 | 
 17 | const log = createLogger('Server');
 18 | 
 19 | export interface ServerConfig {
 20 |   gemini: GeminiConfig;
 21 |   transport: 'stdio' | 'sse';
 22 |   port?: number;
 23 | }
 24 | 
 25 | export class Server {
 26 |   private readonly mcpServer: McpServer;
 27 |   private readonly geminiService: GeminiService;
 28 |   private readonly config: ServerConfig;
 29 | 
 30 |   constructor(config: ServerConfig) {
 31 |     this.config = config;
 32 |     
 33 |     // Initialize Gemini service
 34 |     this.geminiService = new GeminiService(config.gemini);
 35 |     
 36 |     // Create MCP server
 37 |     this.mcpServer = new McpServer({
 38 |       name: 'mcp-video-recognition',
 39 |       version: '1.0.0'
 40 |     });
 41 |     
 42 |     // Register tools
 43 |     this.registerTools();
 44 |     
 45 |     log.info('MCP server initialized');
 46 |   }
 47 | 
 48 |   /**
 49 |    * Register all tools with the MCP server
 50 |    */
 51 |   private registerTools(): void {
 52 |     // Create tools
 53 |     const imageRecognitionTool = createImageRecognitionTool(this.geminiService);
 54 |     const audioRecognitionTool = createAudioRecognitionTool(this.geminiService);
 55 |     const videoRecognitionTool = createVideoRecognitionTool(this.geminiService);
 56 |     
 57 |     // Register tools with MCP server
 58 |     this.mcpServer.tool(
 59 |       imageRecognitionTool.name,
 60 |       imageRecognitionTool.description,
 61 |       imageRecognitionTool.inputSchema.shape,
 62 |       imageRecognitionTool.callback
 63 |     );
 64 |     
 65 |     this.mcpServer.tool(
 66 |       audioRecognitionTool.name,
 67 |       audioRecognitionTool.description,
 68 |       audioRecognitionTool.inputSchema.shape,
 69 |       audioRecognitionTool.callback
 70 |     );
 71 |     
 72 |     this.mcpServer.tool(
 73 |       videoRecognitionTool.name,
 74 |       videoRecognitionTool.description,
 75 |       videoRecognitionTool.inputSchema.shape,
 76 |       videoRecognitionTool.callback
 77 |     );
 78 |     
 79 |     log.info('All tools registered with MCP server');
 80 |   }
 81 | 
 82 |   /**
 83 |    * Start the server with the configured transport
 84 |    */
 85 |   async start(): Promise<void> {
 86 |     try {
 87 |       if (this.config.transport === 'stdio') {
 88 |         await this.startWithStdio();
 89 |       } else if (this.config.transport === 'sse') {
 90 |         await this.startWithSSE();
 91 |       } else {
 92 |         throw new Error(`Unsupported transport: ${this.config.transport}`);
 93 |       }
 94 |     } catch (error) {
 95 |       log.error('Failed to start server', error);
 96 |       throw error;
 97 |     }
 98 |   }
 99 | 
100 |   /**
101 |    * Start the server with stdio transport
102 |    */
103 |   private async startWithStdio(): Promise<void> {
104 |     log.info('Starting server with stdio transport');
105 |     
106 |     const transport = new StdioServerTransport();
107 |     
108 |     transport.onclose = () => {
109 |       log.info('Stdio transport closed');
110 |     };
111 |     
112 |     transport.onerror = (error) => {
113 |       log.error('Stdio transport error', error);
114 |     };
115 |     
116 |     await this.mcpServer.connect(transport);
117 |     log.info('Server started with stdio transport');
118 |   }
119 | 
120 |   /**
121 |    * Start the server with SSE transport
122 |    */
123 |   private async startWithSSE(): Promise<void> {
124 |     log.info('Starting server with SSE transport');
125 |     
126 |     // Import express dynamically to avoid loading it when using stdio
127 |     const express = await import('express');
128 |     const app = express.default();
129 |     const port = this.config.port || 3000;
130 |     
131 |     app.use(express.json());
132 |     
133 |     // Map to store transports by session ID
134 |     const transports: { [sessionId: string]: StreamableHTTPServerTransport } = {};
135 |     
136 |     // Handle POST requests for client-to-server communication
137 |     app.post('/mcp', async (req, res) => {
138 |       try {
139 |         // Check for existing session ID
140 |         const sessionId = req.headers['mcp-session-id'] as string | undefined;
141 |         let transport: StreamableHTTPServerTransport;
142 |         
143 |         if (sessionId && transports[sessionId]) {
144 |           // Reuse existing transport
145 |           transport = transports[sessionId];
146 |           log.debug(`Using existing transport for session: ${sessionId}`);
147 |         } else {
148 |           log.error('No valid session ID provided');
149 |           res.status(400).json({
150 |             jsonrpc: '2.0',
151 |             error: {
152 |               code: -32000,
153 |               message: 'Bad Request: No valid session ID provided',
154 |             },
155 |             id: null,
156 |           });
157 |           return;
158 |         }
159 |         
160 |         // Handle the request
161 |         await transport.handleRequest(req, res, req.body);
162 |       } catch (error) {
163 |         log.error('Error handling MCP request', error);
164 |         if (!res.headersSent) {
165 |           res.status(500).json({
166 |             jsonrpc: '2.0',
167 |             error: {
168 |               code: -32603,
169 |               message: 'Internal server error',
170 |             },
171 |             id: null,
172 |           });
173 |         }
174 |       }
175 |     });
176 |     
177 |     // Reusable handler for GET and DELETE requests
178 |     const handleSessionRequest = async (req: Request, res: Response) => {
179 |       const sessionId = req.headers['mcp-session-id'] as string | undefined;
180 |       if (!sessionId || !transports[sessionId]) {
181 |         res.status(400).send('Invalid or missing session ID');
182 |         return;
183 |       }
184 |       
185 |       const transport = transports[sessionId];
186 |       await transport.handleRequest(req, res);
187 |     };
188 |     
189 |     // Handle GET requests for server-to-client notifications via SSE
190 |     app.get('/mcp', async (req, res) => {
191 |       try {
192 |         // Create a new transport for this connection
193 |         const transport = new StreamableHTTPServerTransport({
194 |           sessionIdGenerator: () => randomUUID(),
195 |           onsessioninitialized: (sessionId) => {
196 |             // Store the transport by session ID
197 |             transports[sessionId] = transport;
198 |             log.info(`New session initialized: ${sessionId}`);
199 |           }
200 |         });
201 |         
202 |         // Clean up transport when closed
203 |         transport.onclose = () => {
204 |           if (transport.sessionId) {
205 |             delete transports[transport.sessionId];
206 |             log.info(`Session closed: ${transport.sessionId}`);
207 |           }
208 |         };
209 |         
210 |         // Connect to the MCP server
211 |         await this.mcpServer.connect(transport);
212 |         
213 |         // Handle the initial GET request
214 |         await transport.handleRequest(req, res);
215 |       } catch (error) {
216 |         log.error('Error handling SSE connection', error);
217 |         if (!res.headersSent) {
218 |           res.status(500).send('Internal server error');
219 |         }
220 |       }
221 |     });
222 |     
223 |     // Handle DELETE requests for session termination
224 |     app.delete('/mcp', handleSessionRequest);
225 |     
226 |     // Start the HTTP server
227 |     app.listen(port, () => {
228 |       log.info(`Server started with SSE transport on port ${port}`);
229 |     });
230 |   }
231 | 
232 |   /**
233 |    * Stop the server
234 |    */
235 |   async stop(): Promise<void> {
236 |     try {
237 |       await this.mcpServer.close();
238 |       log.info('Server stopped');
239 |     } catch (error) {
240 |       log.error('Error stopping server', error);
241 |       throw error;
242 |     }
243 |   }
244 | }
245 | 
```

--------------------------------------------------------------------------------
/src/services/gemini.ts:
--------------------------------------------------------------------------------

```typescript
  1 | /**
  2 |  * Service for interacting with Google's Gemini API
  3 |  */
  4 | 
  5 | import { 
  6 |   GoogleGenAI,
  7 |   createUserContent,
  8 |   createPartFromUri
  9 | } from '@google/genai';
 10 | import { createLogger } from '../utils/logger.js';
 11 | import type { GeminiConfig, GeminiFile, GeminiResponse, CachedFile, ProcessedGeminiFile } from '../types/index.js';
 12 | import { FileState } from '../types/index.js';
 13 | import * as fs from 'node:fs';
 14 | import * as path from 'node:path';
 15 | import * as crypto from 'node:crypto';
 16 | 
 17 | const log = createLogger('GeminiService');
 18 | 
 19 | export class GeminiService {
 20 |   private readonly client: GoogleGenAI;
 21 |   private fileCache: Map<string, CachedFile> = new Map();
 22 |   private readonly cacheExpiration = 24 * 60 * 60 * 1000; // 24 hours in milliseconds
 23 | 
 24 |   constructor(config: GeminiConfig) {
 25 |     this.client = new GoogleGenAI({ apiKey: config.apiKey });
 26 |     log.info('Initialized Gemini service');
 27 |   }
 28 | 
 29 |   /**
 30 |    * Calculate checksum for a file
 31 |    */
 32 |   private async calculateChecksum(filePath: string): Promise<string> {
 33 |     return new Promise((resolve, reject) => {
 34 |       const hash = crypto.createHash('md5');
 35 |       const stream = fs.createReadStream(filePath);
 36 |       
 37 |       stream.on('error', err => reject(err));
 38 |       stream.on('data', chunk => hash.update(chunk));
 39 |       stream.on('end', () => resolve(hash.digest('hex')));
 40 |     });
 41 |   }
 42 | 
 43 |   /**
 44 |    * Check if a file exists in cache and is still valid
 45 |    */
 46 |   private isCacheValid(checksum: string): boolean {
 47 |     const cachedFile = this.fileCache.get(checksum);
 48 |     if (!cachedFile) return false;
 49 |     
 50 |     const now = Date.now();
 51 |     const isExpired = now - cachedFile.timestamp > this.cacheExpiration;
 52 |     
 53 |     return !isExpired;
 54 |   }
 55 | 
 56 |   /**
 57 |    * Get file from Gemini API by name
 58 |    */
 59 |   async getFile(name: string): Promise<GeminiFile> {
 60 |     try {
 61 |       const file = await this.client.files.get({ name });
 62 |       log.debug(`Retrieved file details for ${name}`);
 63 |       log.verbose('File details', JSON.stringify(file));
 64 |       
 65 |       if (!file.uri || !file.mimeType) {
 66 |         throw new Error(`Invalid file data returned for ${name}`);
 67 |       }
 68 |       
 69 |       return {
 70 |         uri: file.uri,
 71 |         mimeType: file.mimeType,
 72 |         name: file.name,
 73 |         state: file.state?.toString()
 74 |       };
 75 |     } catch (error) {
 76 |       log.error(`Error retrieving file ${name}`, error);
 77 |       throw error;
 78 |     }
 79 |   }
 80 | 
 81 |   /**
 82 |    * Wait for a video file to be processed
 83 |    */
 84 |   async waitForVideoProcessing(file: GeminiFile, maxWaitTimeMs = 300000): Promise<ProcessedGeminiFile> {
 85 |     if (!file.name) {
 86 |       throw new Error('File name is required to check processing status');
 87 |     }
 88 | 
 89 |     log.info(`Waiting for video processing: ${file.name}`);
 90 |     
 91 |     const startTime = Date.now();
 92 |     let currentFile = file;
 93 |     
 94 |     while (currentFile.state === FileState.PROCESSING) {
 95 |       // Check if we've exceeded the maximum wait time
 96 |       if (Date.now() - startTime > maxWaitTimeMs) {
 97 |         throw new Error(`Timeout waiting for video processing: ${file.name}`);
 98 |       }
 99 |       
100 |       // Wait 2 seconds before checking again
101 |       await new Promise(resolve => setTimeout(resolve, 2000));
102 |       
103 |       // Get updated file status
104 |       currentFile = await this.getFile(file.name);
105 |       log.debug(`Video processing status: ${currentFile.state}`);
106 |       
107 |       if (currentFile.state === FileState.FAILED) {
108 |         throw new Error(`Video processing failed: ${file.name}`);
109 |       }
110 |     }
111 |     
112 |     log.info(`Video processing completed: ${file.name}`);
113 |     
114 |     // Ensure all required fields are present
115 |     if (!currentFile.name || !currentFile.state) {
116 |       throw new Error('Missing required file information after processing');
117 |     }
118 |     
119 |     return {
120 |       uri: currentFile.uri,
121 |       mimeType: currentFile.mimeType,
122 |       name: currentFile.name,
123 |       state: currentFile.state
124 |     };
125 |   }
126 | 
127 |   /**
128 |    * Upload a file to Gemini API with caching
129 |    */
130 |   async uploadFile(filePath: string): Promise<GeminiFile> {
131 |     try {
132 |       log.debug(`Processing file upload request: ${filePath}`);
133 |       
134 |       // Calculate checksum for caching
135 |       const checksum = await this.calculateChecksum(filePath);
136 |       log.debug(`File checksum: ${checksum}`);
137 |       
138 |       // Check if file is in cache and still valid
139 |       if (this.isCacheValid(checksum)) {
140 |         const cachedFile = this.fileCache.get(checksum)!;
141 |         log.info(`Using cached file: ${cachedFile.name}`);
142 |         
143 |         // Return cached file info
144 |         return {
145 |           uri: cachedFile.uri,
146 |           mimeType: cachedFile.mimeType,
147 |           name: cachedFile.name,
148 |           state: cachedFile.state
149 |         };
150 |       }
151 |       
152 |       // Determine MIME type based on file extension
153 |       const ext = path.extname(filePath).toLowerCase();
154 |       let mimeType: string;
155 |       let isVideo = false;
156 |       
157 |       if (['.jpg', '.jpeg'].includes(ext)) {
158 |         mimeType = 'image/jpeg';
159 |       } else if (ext === '.png') {
160 |         mimeType = 'image/png';
161 |       } else if (ext === '.webp') {
162 |         mimeType = 'image/webp';
163 |       } else if (ext === '.mp4') {
164 |         mimeType = 'video/mp4';
165 |         isVideo = true;
166 |       } else if (ext === '.mp3') {
167 |         mimeType = 'audio/mp3';
168 |       } else if (ext === '.wav') {
169 |         mimeType = 'audio/wav';
170 |       } else if (ext === '.ogg') {
171 |         mimeType = 'audio/ogg';
172 |       } else {
173 |         throw new Error(`Unsupported file extension: ${ext}`);
174 |       }
175 |       
176 |       // Upload file to Google's servers
177 |       const uploadedFile = await this.client.files.upload({
178 |         file: filePath,
179 |         config: { mimeType }
180 |       });
181 |       
182 |       log.info(`File uploaded successfully: ${filePath}`);
183 |       log.verbose('Uploaded file details', JSON.stringify(uploadedFile));
184 |       
185 |       if (!uploadedFile.uri || !uploadedFile.name) {
186 |         throw new Error('File upload failed: Missing URI or name');
187 |       }
188 |       
189 |       // Create file object
190 |       const file: GeminiFile = {
191 |         uri: uploadedFile.uri,
192 |         mimeType,
193 |         name: uploadedFile.name,
194 |         state: uploadedFile.state?.toString()
195 |       };
196 |       
197 |       // For videos, wait for processing to complete
198 |       if (isVideo && file.state === FileState.PROCESSING) {
199 |         const processedFile = await this.waitForVideoProcessing(file);
200 |         
201 |         // Update cache with processed file
202 |         this.fileCache.set(checksum, {
203 |           fileId: processedFile.name!,
204 |           checksum,
205 |           uri: processedFile.uri,
206 |           mimeType: processedFile.mimeType,
207 |           name: processedFile.name!,
208 |           state: processedFile.state!,
209 |           timestamp: Date.now()
210 |         });
211 |         
212 |         return processedFile;
213 |       }
214 |       
215 |       // Add to cache
216 |       if (!file.name) {
217 |         throw new Error('File name is required for caching');
218 |       }
219 |       
220 |       this.fileCache.set(checksum, {
221 |         fileId: file.name,
222 |         checksum,
223 |         uri: file.uri,
224 |         mimeType: file.mimeType,
225 |         name: file.name,
226 |         state: file.state || FileState.ACTIVE,
227 |         timestamp: Date.now()
228 |       });
229 |       
230 |       return file;
231 |     } catch (error) {
232 |       log.error('Error uploading file', error);
233 |       throw error;
234 |     }
235 |   }
236 | 
237 |   /**
238 |    * Process a file with Gemini API
239 |    */
240 |   async processFile(file: GeminiFile, prompt: string, modelName: string): Promise<GeminiResponse> {
241 |     try {
242 |       log.debug(`Processing file with model ${modelName}`);
243 |       log.verbose('Processing with parameters', JSON.stringify({ file, prompt, modelName }));
244 |       
245 |       const response = await this.client.models.generateContent({
246 |         model: modelName,
247 |         contents: createUserContent([
248 |           createPartFromUri(file.uri, file.mimeType),
249 |           prompt
250 |         ])
251 |       });
252 |       
253 |       log.debug('Received response from Gemini API');
254 |       log.verbose('Gemini API response', JSON.stringify(response));
255 |       
256 |       const responseText = response.text || '';
257 |       
258 |       return {
259 |         text: responseText
260 |       };
261 |     } catch (error) {
262 |       log.error('Error processing file with Gemini API', error);
263 |       return {
264 |         text: `Error processing file: ${error instanceof Error ? error.message : String(error)}`,
265 |         isError: true
266 |       };
267 |     }
268 |   }
269 | }
270 | 
```