mario-andreschak/mcp_video_recognition # codebase.md

# Directory Structure

```
├── .env.example
├── .gitignore
├── LICENSE
├── package.json
├── README.md
├── src
│   ├── index.ts
│   ├── server.ts
│   ├── services
│   │   └── gemini.ts
│   ├── tools
│   │   ├── audio-recognition.ts
│   │   ├── image-recognition.ts
│   │   └── video-recognition.ts
│   ├── types
│   │   └── index.ts
│   └── utils
│       └── logger.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------

```
# Google Gemini API key (required)
GOOGLE_API_KEY=your_api_key_here

# Transport type: 'stdio' or 'sse' (defaults to 'stdio')
TRANSPORT_TYPE=stdio

# Port for SSE transport (defaults to 3000)
PORT=3000

# Log level: 'verbose', 'debug', 'info', 'warn', 'error', 'fatal' (defaults to 'fatal')
LOG_LEVEL=fatal

```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
# Dependencies
node_modules/
package-lock.json
yarn.lock
pnpm-lock.yaml

# Build output
dist/
build/
*.tsbuildinfo

# Environment variables
.env
.env.local
.env.*.local

# Logs
logs/
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Editor directories and files
.idea/
.vscode/
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

# OS files
.DS_Store
Thumbs.db

```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
# MCP Video Recognition Server

An MCP (Model Context Protocol) server that provides tools for image, audio, and video recognition using Google's Gemini AI.

<a href="https://glama.ai/mcp/servers/@mario-andreschak/mcp_video_recognition">
  <img width="380" height="200" src="https://glama.ai/mcp/servers/@mario-andreschak/mcp_video_recognition/badge" alt="Video Recognition Server MCP server" />
</a>

## Features

- **Image Recognition**: Analyze and describe images using Google Gemini AI
- **Audio Recognition**: Analyze and transcribe audio using Google Gemini AI
- **Video Recognition**: Analyze and describe videos using Google Gemini AI

## Prerequisites

- Node.js 18 or higher
- Google Gemini API key

## Installation

### Manual Installation

1. Clone the repository:
   ```bash
   git clone https://github.com/yourusername/mcp-video-recognition.git
   cd mcp-video-recognition
   ```

2. Install dependencies:
   ```bash
   npm install
   ```

3. Build the project:
   ```bash
   npm run build
   ```

### Installing in [FLUJO](https://github.com/mario-andreschak/FLUJO/)

1. Click Add Server
2. Copy & Paste Github URL into FLUJO
3. Click Parse, Clone, Install, Build and Save.

### Installing via Configuration Files

To integrate this MCP server with Cline or other MCP clients via configuration files:

1. Open your Cline settings:
   - In VS Code, go to File -> Preferences -> Settings
   - Search for "Cline MCP Settings"
   - Click "Edit in settings.json"

2. Add the server configuration to the `mcpServers` object:
   ```json
   {
     "mcpServers": {
       "video-recognition": {
         "command": "node",
         "args": [
           "/path/to/mcp-video-recognition/dist/index.js"
         ],
         "disabled": false,
         "autoApprove": []
       }
     }
   }
   ```

3. Replace `/path/to/mcp-video-recognition/dist/index.js` with the actual path to the `index.js` file in your project directory. Use forward slashes (/) or double backslashes (\\\\) for the path on Windows.

4. Save the settings file. Cline should automatically connect to the server.

## Configuration

The server is configured using environment variables:

- `GOOGLE_API_KEY` (required): Your Google Gemini API key
- `TRANSPORT_TYPE`: Transport type to use (`stdio` or `sse`, defaults to `stdio`)
- `PORT`: Port number for SSE transport (defaults to 3000)
- `LOG_LEVEL`: Logging level (`verbose`, `debug`, `info`, `warn`, `error`, defaults to `info`)

## Usage

### Starting the Server

#### With stdio Transport (Default)

```bash
GOOGLE_API_KEY=your_api_key npm start
```

#### With SSE Transport

```bash
GOOGLE_API_KEY=your_api_key TRANSPORT_TYPE=sse PORT=3000 npm start
```

### Using the Tools

The server provides three tools that can be called by MCP clients:

#### Image Recognition

```json
{
  "name": "image_recognition",
  "arguments": {
    "filepath": "/path/to/image.jpg",
    "prompt": "Describe this image in detail",
    "modelname": "gemini-2.0-flash"
  }
}
```

#### Audio Recognition

```json
{
  "name": "audio_recognition",
  "arguments": {
    "filepath": "/path/to/audio.mp3",
    "prompt": "Transcribe this audio",
    "modelname": "gemini-2.0-flash"
  }
}
```

#### Video Recognition

```json
{
  "name": "video_recognition",
  "arguments": {
    "filepath": "/path/to/video.mp4",
    "prompt": "Describe what happens in this video",
    "modelname": "gemini-2.0-flash"
  }
}
```

### Tool Parameters

All tools accept the following parameters:

- `filepath` (required): Path to the media file to analyze
- `prompt` (optional): Custom prompt for the recognition (defaults to "Describe this content")
- `modelname` (optional): Gemini model to use for recognition (defaults to "gemini-2.0-flash")

## Development

### Running in Development Mode

```bash
GOOGLE_API_KEY=your_api_key npm run dev
```

### Project Structure

- `src/index.ts`: Entry point
- `src/server.ts`: MCP server implementation
- `src/tools/`: Tool implementations
- `src/services/`: Service implementations (Gemini API)
- `src/types/`: Type definitions
- `src/utils/`: Utility functions

## License

MIT
```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
{
  "compilerOptions": {
    "target": "ES2022",
    "module": "NodeNext",
    "moduleResolution": "NodeNext",
    "esModuleInterop": true,
    "strict": true,
    "outDir": "dist",
    "sourceMap": true,
    "declaration": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true,
    "resolveJsonModule": true
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules", "dist"]
}

```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
{
  "name": "mcp-video-recognition",
  "version": "1.0.0",
  "description": "MCP server for Google Gemini image, audio, and video recognition",
  "main": "dist/index.js",
  "type": "module",
  "scripts": {
    "build": "tsc",
    "start": "node dist/index.js",
    "dev": "tsc -w & node --watch dist/index.js",
    "debug": "tsc & npx @modelcontextprotocol/inspector node dist/index.js",
    "lint": "eslint src --ext .ts",
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "keywords": [
    "mcp",
    "gemini",
    "video",
    "audio",
    "image",
    "recognition"
  ],
  "author": "",
  "license": "MIT",
  "dependencies": {
    "@google/genai": "^0.9.0",
    "@modelcontextprotocol/sdk": "^1.10.1",
    "express": "^5.1.0",
    "zod": "^3.24.3"
  },
  "devDependencies": {
    "@types/express": "^5.0.1",
    "@types/node": "^22.14.1",
    "typescript": "^5.8.3"
  }
}

```

--------------------------------------------------------------------------------
/src/utils/logger.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Logger utility for the MCP server
 */

export enum LogLevel {
  VERBOSE = 'verbose',
  DEBUG = 'debug',
  INFO = 'info',
  WARN = 'warn',
  ERROR = 'error',
  FATAL = 'fatal'
}

export class Logger {
  private readonly name: string;
  private static level: LogLevel = LogLevel.FATAL;

  constructor(name: string) {
    this.name = name;
  }

  static setLogLevel(level: LogLevel): void {
    Logger.level = level;
  }

  private shouldLog(level: LogLevel): boolean {
    const levels = Object.values(LogLevel);
    return levels.indexOf(level) >= levels.indexOf(Logger.level);
  }

  private formatMessage(level: LogLevel, message: string): string {
    const timestamp = new Date().toISOString();
    return `[${timestamp}] [${level.toUpperCase()}] [${this.name}] ${message}`;
  }

  verbose(message: string, data?: unknown): void {
    if (this.shouldLog(LogLevel.VERBOSE)) {
      const formattedData = data ? JSON.stringify(data) : '';
      console.log(this.formatMessage(LogLevel.VERBOSE, message), formattedData);
    }
  }

  debug(message: string, data?: unknown): void {
    if (this.shouldLog(LogLevel.DEBUG)) {
      console.log(this.formatMessage(LogLevel.DEBUG, message), data || '');
    }
  }

  info(message: string, data?: unknown): void {
    if (this.shouldLog(LogLevel.INFO)) {
      console.log(this.formatMessage(LogLevel.INFO, message), data || '');
    }
  }

  warn(message: string, data?: unknown): void {
    if (this.shouldLog(LogLevel.WARN)) {
      console.warn(this.formatMessage(LogLevel.WARN, message), data || '');
    }
  }

  error(message: string, error?: unknown): void {
    if (this.shouldLog(LogLevel.ERROR)) {
      console.error(this.formatMessage(LogLevel.ERROR, message), error || '');
    }
  }

  fatal(message: string, error?: unknown): void {
    if (this.shouldLog(LogLevel.FATAL)) {
      console.error(this.formatMessage(LogLevel.FATAL, message), error || '');
    }
  }
}

export const createLogger = (name: string): Logger => new Logger(name);

```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Entry point for the MCP video recognition server
 */

import { Server } from './server.js';
import { createLogger, LogLevel, Logger } from './utils/logger.js';
import type { ServerConfig } from './server.js';

const log = createLogger('Main');

// Set log level from environment variable
const logLevel = ( process.env.LOG_LEVEL || LogLevel.FATAL ) as LogLevel;
Logger.setLogLevel(logLevel as LogLevel);

/**
 * Load configuration from environment variables
 */
function loadConfig(): ServerConfig {
  // Check for required environment variables
  const apiKey = process.env.GOOGLE_API_KEY;
  if (!apiKey) {
    throw new Error('GOOGLE_API_KEY environment variable is required');
  }

  // Determine transport type
  const transportType = process.env.TRANSPORT_TYPE === 'sse' ? 'sse' : 'stdio';
  
  // Parse port if provided
  const portStr = process.env.PORT;
  const port = portStr ? parseInt(portStr, 10) : undefined;
  
  return {
    gemini: {
      apiKey
    },
    transport: transportType,
    port
  };
}

/**
 * Main function to start the server
 */
async function main(): Promise<void> {
  try {
    log.info('Starting MCP video recognition server');
    
    // Load configuration
    const config = loadConfig();
    log.info(`Using transport: ${config.transport}`);
    
    // Create and start server
    const server = new Server(config);
    await server.start();
    
    // Handle process termination
    process.on('SIGINT', async () => {
      log.info('Received SIGINT signal, shutting down...');
      await server.stop();
      process.exit(0);
    });
    
    process.on('SIGTERM', async () => {
      log.info('Received SIGTERM signal, shutting down...');
      await server.stop();
      process.exit(0);
    });
    
    log.info('Server started successfully');
  } catch (error) {
    log.error('Failed to start server', error);
    process.exit(1);
  }
}

// Start the server
main().catch(error => {
  console.error('Unhandled error:', error);
  process.exit(1);
});

```

--------------------------------------------------------------------------------
/src/types/index.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Type definitions for the MCP server
 */

import { z } from 'zod';
import type { Tool, CallToolResult } from '@modelcontextprotocol/sdk/types.js';

/**
 * Common parameters for all recognition tools
 */
export const RecognitionParamsSchema = z.object({
  filepath: z.string().describe('Path to the media file to analyze'),
  prompt: z.string().default('Describe this content').describe('Custom prompt for the recognition'),
  modelname: z.string().default('gemini-2.0-flash').describe('Gemini model to use for recognition')
});

export type RecognitionParams = z.infer<typeof RecognitionParamsSchema>;

/**
 * Video recognition specific types
 */
export const VideoRecognitionParamsSchema = RecognitionParamsSchema.extend({});
export type VideoRecognitionParams = z.infer<typeof VideoRecognitionParamsSchema>;

/**
 * Image recognition specific types
 */
export const ImageRecognitionParamsSchema = RecognitionParamsSchema.extend({});
export type ImageRecognitionParams = z.infer<typeof ImageRecognitionParamsSchema>;

/**
 * Audio recognition specific types
 */
export const AudioRecognitionParamsSchema = RecognitionParamsSchema.extend({});
export type AudioRecognitionParams = z.infer<typeof AudioRecognitionParamsSchema>;

/**
 * Tool definitions
 */
export interface ToolDefinition {
  name: string;
  description: string;
  inputSchema: z.ZodObject<any>;
  callback: (args: any) => Promise<CallToolResult>;
}

/**
 * Gemini API types
 */
export interface GeminiConfig {
  apiKey: string;
}

export interface GeminiFile {
  uri: string;
  mimeType: string;
  name?: string;
  state?: string;
}

export interface ProcessedGeminiFile {
  uri: string;
  mimeType: string;
  name: string;
  state: string;
}

export interface CachedFile {
  fileId: string;
  checksum: string;
  uri: string;
  mimeType: string;
  name: string;
  state: string;
  timestamp: number;
}

// File states from Gemini API
export enum FileState {
  UNSPECIFIED = 'STATE_UNSPECIFIED',
  PROCESSING = 'PROCESSING',
  ACTIVE = 'ACTIVE',
  FAILED = 'FAILED'
}

export interface GeminiResponse {
  text: string;
  isError?: boolean;
}

```

--------------------------------------------------------------------------------
/src/tools/audio-recognition.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Audio recognition tool for MCP server
 */

import { z } from 'zod';
import { createLogger } from '../utils/logger.js';
import { GeminiService } from '../services/gemini.js';
import { AudioRecognitionParamsSchema } from '../types/index.js';
import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import type { AudioRecognitionParams } from '../types/index.js';
import * as fs from 'node:fs';
import * as path from 'node:path';

const log = createLogger('AudioRecognitionTool');

export const createAudioRecognitionTool = (geminiService: GeminiService) => {
  return {
    name: 'audio_recognition',
    description: 'Analyze and transcribe audio using Google Gemini AI',
    inputSchema: AudioRecognitionParamsSchema,
    callback: async (args: AudioRecognitionParams): Promise<CallToolResult> => {
      try {
        log.info(`Processing audio recognition request for file: ${args.filepath}`);
        log.verbose('Audio recognition request', JSON.stringify(args));
        
        // Verify file exists
        if (!fs.existsSync(args.filepath)) {
          throw new Error(`Audio file not found: ${args.filepath}`);
        }
        
        // Verify file is an audio
        const ext = path.extname(args.filepath).toLowerCase();
        if (!['.mp3', '.wav', '.ogg'].includes(ext)) {
          throw new Error(`Unsupported audio format: ${ext}. Supported formats are: .mp3, .wav, .ogg`);
        }
        
        // Default prompt if not provided
        const prompt = args.prompt || 'Describe this audio';
        const modelName = args.modelname || 'gemini-2.0-flash';
        
        // Upload the file
        log.info('Uploading audio file...');
        const file = await geminiService.uploadFile(args.filepath);
        
        // Process with Gemini
        log.info('Generating content from audio...');
        const result = await geminiService.processFile(file, prompt, modelName);
        
        if (result.isError) {
          log.error(`Error in audio recognition: ${result.text}`);
          return {
            content: [
              {
                type: 'text',
                text: result.text
              }
            ],
            isError: true
          };
        }
        
        log.info('Audio recognition completed successfully');
        log.verbose('Audio recognition result', JSON.stringify(result));
        
        return {
          content: [
            {
              type: 'text',
              text: result.text
            }
          ]
        };
      } catch (error) {
        log.error('Error in audio recognition tool', error);
        const errorMessage = error instanceof Error ? error.message : String(error);
        
        return {
          content: [
            {
              type: 'text',
              text: `Error processing audio: ${errorMessage}`
            }
          ],
          isError: true
        };
      }
    }
  };
};

```

--------------------------------------------------------------------------------
/src/tools/image-recognition.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Image recognition tool for MCP server
 */

import { z } from 'zod';
import { createLogger } from '../utils/logger.js';
import { GeminiService } from '../services/gemini.js';
import { ImageRecognitionParamsSchema } from '../types/index.js';
import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import type { ImageRecognitionParams } from '../types/index.js';
import * as fs from 'node:fs';
import * as path from 'node:path';

const log = createLogger('ImageRecognitionTool');

export const createImageRecognitionTool = (geminiService: GeminiService) => {
  return {
    name: 'image_recognition',
    description: 'Analyze and describe images using Google Gemini AI',
    inputSchema: ImageRecognitionParamsSchema,
    callback: async (args: ImageRecognitionParams): Promise<CallToolResult> => {
      try {
        log.info(`Processing image recognition request for file: ${args.filepath}`);
        log.verbose('Image recognition request', JSON.stringify(args));
        
        // Verify file exists
        if (!fs.existsSync(args.filepath)) {
          throw new Error(`Image file not found: ${args.filepath}`);
        }
        
        // Verify file is an image
        const ext = path.extname(args.filepath).toLowerCase();
        if (!['.jpg', '.jpeg', '.png', '.webp'].includes(ext)) {
          throw new Error(`Unsupported image format: ${ext}. Supported formats are: .jpg, .jpeg, .png, .webp`);
        }
        
        // Default prompt if not provided
        const prompt = args.prompt || 'Describe this image';
        const modelName = args.modelname || 'gemini-2.0-flash';
        
        // Upload the file
        log.info('Uploading image file...');
        const file = await geminiService.uploadFile(args.filepath);
        
        // Process with Gemini
        log.info('Generating content from image...');
        const result = await geminiService.processFile(file, prompt, modelName);
        
        if (result.isError) {
          log.error(`Error in image recognition: ${result.text}`);
          return {
            content: [
              {
                type: 'text',
                text: result.text
              }
            ],
            isError: true
          };
        }
        
        log.info('Image recognition completed successfully');
        log.verbose('Image recognition result', JSON.stringify(result));
        
        return {
          content: [
            {
              type: 'text',
              text: result.text
            }
          ]
        };
      } catch (error) {
        log.error('Error in image recognition tool', error);
        const errorMessage = error instanceof Error ? error.message : String(error);
        
        return {
          content: [
            {
              type: 'text',
              text: `Error processing image: ${errorMessage}`
            }
          ],
          isError: true
        };
      }
    }
  };
};

```

--------------------------------------------------------------------------------
/src/tools/video-recognition.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Video recognition tool for MCP server
 */

import { z } from 'zod';
import { createLogger } from '../utils/logger.js';
import { GeminiService } from '../services/gemini.js';
import { VideoRecognitionParamsSchema, FileState } from '../types/index.js';
import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import type { VideoRecognitionParams } from '../types/index.js';
import * as fs from 'node:fs';
import * as path from 'node:path';

const log = createLogger('VideoRecognitionTool');

export const createVideoRecognitionTool = (geminiService: GeminiService) => {
  return {
    name: 'video_recognition',
    description: 'Analyze and describe videos using Google Gemini AI',
    inputSchema: VideoRecognitionParamsSchema,
    callback: async (args: VideoRecognitionParams): Promise<CallToolResult> => {
      try {
        log.info(`Processing video recognition request for file: ${args.filepath}`);
        log.verbose('Video recognition request', JSON.stringify(args));
        
        // Verify file exists
        if (!fs.existsSync(args.filepath)) {
          throw new Error(`Video file not found: ${args.filepath}`);
        }
        
        // Verify file is a video
        const ext = path.extname(args.filepath).toLowerCase();
        if (ext !== '.mp4' && ext !== '.mpeg' && ext !== '.mov' && ext !== '.avi' && ext !== '.webm') {
          throw new Error(`Unsupported video format: ${ext}. Supported formats are: .mp4, .mpeg, .mov, .avi, .webm`);
        }
        
        // Default prompt if not provided
        const prompt = args.prompt || 'Describe this video';
        const modelName = args.modelname || 'gemini-2.0-flash';
        
        // Upload the file - this will handle waiting for video processing
        log.info('Uploading and processing video file...');
        const file = await geminiService.uploadFile(args.filepath);
        
        // Process with Gemini
        log.info('Video processing complete, generating content...');
        const result = await geminiService.processFile(file, prompt, modelName);
        
        if (result.isError) {
          log.error(`Error in video recognition: ${result.text}`);
          return {
            content: [
              {
                type: 'text',
                text: result.text
              }
            ],
            isError: true
          };
        }
        
        log.info('Video recognition completed successfully');
        log.verbose('Video recognition result', JSON.stringify(result));
        
        return {
          content: [
            {
              type: 'text',
              text: result.text
            }
          ]
        };
      } catch (error) {
        log.error('Error in video recognition tool', error);
        const errorMessage = error instanceof Error ? error.message : String(error);
        
        return {
          content: [
            {
              type: 'text',
              text: `Error processing video: ${errorMessage}`
            }
          ],
          isError: true
        };
      }
    }
  };
};

```

--------------------------------------------------------------------------------
/src/server.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * MCP server implementation
 */

import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
import { randomUUID } from 'crypto';
import type { Request, Response } from 'express';
import { createLogger } from './utils/logger.js';
import { GeminiService } from './services/gemini.js';
import { createImageRecognitionTool } from './tools/image-recognition.js';
import { createAudioRecognitionTool } from './tools/audio-recognition.js';
import { createVideoRecognitionTool } from './tools/video-recognition.js';
import type { GeminiConfig } from './types/index.js';

const log = createLogger('Server');

export interface ServerConfig {
  gemini: GeminiConfig;
  transport: 'stdio' | 'sse';
  port?: number;
}

export class Server {
  private readonly mcpServer: McpServer;
  private readonly geminiService: GeminiService;
  private readonly config: ServerConfig;

  constructor(config: ServerConfig) {
    this.config = config;
    
    // Initialize Gemini service
    this.geminiService = new GeminiService(config.gemini);
    
    // Create MCP server
    this.mcpServer = new McpServer({
      name: 'mcp-video-recognition',
      version: '1.0.0'
    });
    
    // Register tools
    this.registerTools();
    
    log.info('MCP server initialized');
  }

  /**
   * Register all tools with the MCP server
   */
  private registerTools(): void {
    // Create tools
    const imageRecognitionTool = createImageRecognitionTool(this.geminiService);
    const audioRecognitionTool = createAudioRecognitionTool(this.geminiService);
    const videoRecognitionTool = createVideoRecognitionTool(this.geminiService);
    
    // Register tools with MCP server
    this.mcpServer.tool(
      imageRecognitionTool.name,
      imageRecognitionTool.description,
      imageRecognitionTool.inputSchema.shape,
      imageRecognitionTool.callback
    );
    
    this.mcpServer.tool(
      audioRecognitionTool.name,
      audioRecognitionTool.description,
      audioRecognitionTool.inputSchema.shape,
      audioRecognitionTool.callback
    );
    
    this.mcpServer.tool(
      videoRecognitionTool.name,
      videoRecognitionTool.description,
      videoRecognitionTool.inputSchema.shape,
      videoRecognitionTool.callback
    );
    
    log.info('All tools registered with MCP server');
  }

  /**
   * Start the server with the configured transport
   */
  async start(): Promise<void> {
    try {
      if (this.config.transport === 'stdio') {
        await this.startWithStdio();
      } else if (this.config.transport === 'sse') {
        await this.startWithSSE();
      } else {
        throw new Error(`Unsupported transport: ${this.config.transport}`);
      }
    } catch (error) {
      log.error('Failed to start server', error);
      throw error;
    }
  }

  /**
   * Start the server with stdio transport
   */
  private async startWithStdio(): Promise<void> {
    log.info('Starting server with stdio transport');
    
    const transport = new StdioServerTransport();
    
    transport.onclose = () => {
      log.info('Stdio transport closed');
    };
    
    transport.onerror = (error) => {
      log.error('Stdio transport error', error);
    };
    
    await this.mcpServer.connect(transport);
    log.info('Server started with stdio transport');
  }

  /**
   * Start the server with SSE transport
   */
  private async startWithSSE(): Promise<void> {
    log.info('Starting server with SSE transport');
    
    // Import express dynamically to avoid loading it when using stdio
    const express = await import('express');
    const app = express.default();
    const port = this.config.port || 3000;
    
    app.use(express.json());
    
    // Map to store transports by session ID
    const transports: { [sessionId: string]: StreamableHTTPServerTransport } = {};
    
    // Handle POST requests for client-to-server communication
    app.post('/mcp', async (req, res) => {
      try {
        // Check for existing session ID
        const sessionId = req.headers['mcp-session-id'] as string | undefined;
        let transport: StreamableHTTPServerTransport;
        
        if (sessionId && transports[sessionId]) {
          // Reuse existing transport
          transport = transports[sessionId];
          log.debug(`Using existing transport for session: ${sessionId}`);
        } else {
          log.error('No valid session ID provided');
          res.status(400).json({
            jsonrpc: '2.0',
            error: {
              code: -32000,
              message: 'Bad Request: No valid session ID provided',
            },
            id: null,
          });
          return;
        }
        
        // Handle the request
        await transport.handleRequest(req, res, req.body);
      } catch (error) {
        log.error('Error handling MCP request', error);
        if (!res.headersSent) {
          res.status(500).json({
            jsonrpc: '2.0',
            error: {
              code: -32603,
              message: 'Internal server error',
            },
            id: null,
          });
        }
      }
    });
    
    // Reusable handler for GET and DELETE requests
    const handleSessionRequest = async (req: Request, res: Response) => {
      const sessionId = req.headers['mcp-session-id'] as string | undefined;
      if (!sessionId || !transports[sessionId]) {
        res.status(400).send('Invalid or missing session ID');
        return;
      }
      
      const transport = transports[sessionId];
      await transport.handleRequest(req, res);
    };
    
    // Handle GET requests for server-to-client notifications via SSE
    app.get('/mcp', async (req, res) => {
      try {
        // Create a new transport for this connection
        const transport = new StreamableHTTPServerTransport({
          sessionIdGenerator: () => randomUUID(),
          onsessioninitialized: (sessionId) => {
            // Store the transport by session ID
            transports[sessionId] = transport;
            log.info(`New session initialized: ${sessionId}`);
          }
        });
        
        // Clean up transport when closed
        transport.onclose = () => {
          if (transport.sessionId) {
            delete transports[transport.sessionId];
            log.info(`Session closed: ${transport.sessionId}`);
          }
        };
        
        // Connect to the MCP server
        await this.mcpServer.connect(transport);
        
        // Handle the initial GET request
        await transport.handleRequest(req, res);
      } catch (error) {
        log.error('Error handling SSE connection', error);
        if (!res.headersSent) {
          res.status(500).send('Internal server error');
        }
      }
    });
    
    // Handle DELETE requests for session termination
    app.delete('/mcp', handleSessionRequest);
    
    // Start the HTTP server
    app.listen(port, () => {
      log.info(`Server started with SSE transport on port ${port}`);
    });
  }

  /**
   * Stop the server
   */
  async stop(): Promise<void> {
    try {
      await this.mcpServer.close();
      log.info('Server stopped');
    } catch (error) {
      log.error('Error stopping server', error);
      throw error;
    }
  }
}

```

--------------------------------------------------------------------------------
/src/services/gemini.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Service for interacting with Google's Gemini API
 */

import { 
  GoogleGenAI,
  createUserContent,
  createPartFromUri
} from '@google/genai';
import { createLogger } from '../utils/logger.js';
import type { GeminiConfig, GeminiFile, GeminiResponse, CachedFile, ProcessedGeminiFile } from '../types/index.js';
import { FileState } from '../types/index.js';
import * as fs from 'node:fs';
import * as path from 'node:path';
import * as crypto from 'node:crypto';

const log = createLogger('GeminiService');

export class GeminiService {
  private readonly client: GoogleGenAI;
  private fileCache: Map<string, CachedFile> = new Map();
  private readonly cacheExpiration = 24 * 60 * 60 * 1000; // 24 hours in milliseconds

  constructor(config: GeminiConfig) {
    this.client = new GoogleGenAI({ apiKey: config.apiKey });
    log.info('Initialized Gemini service');
  }

  /**
   * Calculate checksum for a file
   */
  private async calculateChecksum(filePath: string): Promise<string> {
    return new Promise((resolve, reject) => {
      const hash = crypto.createHash('md5');
      const stream = fs.createReadStream(filePath);
      
      stream.on('error', err => reject(err));
      stream.on('data', chunk => hash.update(chunk));
      stream.on('end', () => resolve(hash.digest('hex')));
    });
  }

  /**
   * Check if a file exists in cache and is still valid
   */
  private isCacheValid(checksum: string): boolean {
    const cachedFile = this.fileCache.get(checksum);
    if (!cachedFile) return false;
    
    const now = Date.now();
    const isExpired = now - cachedFile.timestamp > this.cacheExpiration;
    
    return !isExpired;
  }

  /**
   * Get file from Gemini API by name
   */
  async getFile(name: string): Promise<GeminiFile> {
    try {
      const file = await this.client.files.get({ name });
      log.debug(`Retrieved file details for ${name}`);
      log.verbose('File details', JSON.stringify(file));
      
      if (!file.uri || !file.mimeType) {
        throw new Error(`Invalid file data returned for ${name}`);
      }
      
      return {
        uri: file.uri,
        mimeType: file.mimeType,
        name: file.name,
        state: file.state?.toString()
      };
    } catch (error) {
      log.error(`Error retrieving file ${name}`, error);
      throw error;
    }
  }

  /**
   * Wait for a video file to be processed
   */
  async waitForVideoProcessing(file: GeminiFile, maxWaitTimeMs = 300000): Promise<ProcessedGeminiFile> {
    if (!file.name) {
      throw new Error('File name is required to check processing status');
    }

    log.info(`Waiting for video processing: ${file.name}`);
    
    const startTime = Date.now();
    let currentFile = file;
    
    while (currentFile.state === FileState.PROCESSING) {
      // Check if we've exceeded the maximum wait time
      if (Date.now() - startTime > maxWaitTimeMs) {
        throw new Error(`Timeout waiting for video processing: ${file.name}`);
      }
      
      // Wait 2 seconds before checking again
      await new Promise(resolve => setTimeout(resolve, 2000));
      
      // Get updated file status
      currentFile = await this.getFile(file.name);
      log.debug(`Video processing status: ${currentFile.state}`);
      
      if (currentFile.state === FileState.FAILED) {
        throw new Error(`Video processing failed: ${file.name}`);
      }
    }
    
    log.info(`Video processing completed: ${file.name}`);
    
    // Ensure all required fields are present
    if (!currentFile.name || !currentFile.state) {
      throw new Error('Missing required file information after processing');
    }
    
    return {
      uri: currentFile.uri,
      mimeType: currentFile.mimeType,
      name: currentFile.name,
      state: currentFile.state
    };
  }

  /**
   * Upload a file to Gemini API with caching
   */
  async uploadFile(filePath: string): Promise<GeminiFile> {
    try {
      log.debug(`Processing file upload request: ${filePath}`);
      
      // Calculate checksum for caching
      const checksum = await this.calculateChecksum(filePath);
      log.debug(`File checksum: ${checksum}`);
      
      // Check if file is in cache and still valid
      if (this.isCacheValid(checksum)) {
        const cachedFile = this.fileCache.get(checksum)!;
        log.info(`Using cached file: ${cachedFile.name}`);
        
        // Return cached file info
        return {
          uri: cachedFile.uri,
          mimeType: cachedFile.mimeType,
          name: cachedFile.name,
          state: cachedFile.state
        };
      }
      
      // Determine MIME type based on file extension
      const ext = path.extname(filePath).toLowerCase();
      let mimeType: string;
      let isVideo = false;
      
      if (['.jpg', '.jpeg'].includes(ext)) {
        mimeType = 'image/jpeg';
      } else if (ext === '.png') {
        mimeType = 'image/png';
      } else if (ext === '.webp') {
        mimeType = 'image/webp';
      } else if (ext === '.mp4') {
        mimeType = 'video/mp4';
        isVideo = true;
      } else if (ext === '.mp3') {
        mimeType = 'audio/mp3';
      } else if (ext === '.wav') {
        mimeType = 'audio/wav';
      } else if (ext === '.ogg') {
        mimeType = 'audio/ogg';
      } else {
        throw new Error(`Unsupported file extension: ${ext}`);
      }
      
      // Upload file to Google's servers
      const uploadedFile = await this.client.files.upload({
        file: filePath,
        config: { mimeType }
      });
      
      log.info(`File uploaded successfully: ${filePath}`);
      log.verbose('Uploaded file details', JSON.stringify(uploadedFile));
      
      if (!uploadedFile.uri || !uploadedFile.name) {
        throw new Error('File upload failed: Missing URI or name');
      }
      
      // Create file object
      const file: GeminiFile = {
        uri: uploadedFile.uri,
        mimeType,
        name: uploadedFile.name,
        state: uploadedFile.state?.toString()
      };
      
      // For videos, wait for processing to complete
      if (isVideo && file.state === FileState.PROCESSING) {
        const processedFile = await this.waitForVideoProcessing(file);
        
        // Update cache with processed file
        this.fileCache.set(checksum, {
          fileId: processedFile.name!,
          checksum,
          uri: processedFile.uri,
          mimeType: processedFile.mimeType,
          name: processedFile.name!,
          state: processedFile.state!,
          timestamp: Date.now()
        });
        
        return processedFile;
      }
      
      // Add to cache
      if (!file.name) {
        throw new Error('File name is required for caching');
      }
      
      this.fileCache.set(checksum, {
        fileId: file.name,
        checksum,
        uri: file.uri,
        mimeType: file.mimeType,
        name: file.name,
        state: file.state || FileState.ACTIVE,
        timestamp: Date.now()
      });
      
      return file;
    } catch (error) {
      log.error('Error uploading file', error);
      throw error;
    }
  }

  /**
   * Process a file with Gemini API
   */
  async processFile(file: GeminiFile, prompt: string, modelName: string): Promise<GeminiResponse> {
    try {
      log.debug(`Processing file with model ${modelName}`);
      log.verbose('Processing with parameters', JSON.stringify({ file, prompt, modelName }));
      
      const response = await this.client.models.generateContent({
        model: modelName,
        contents: createUserContent([
          createPartFromUri(file.uri, file.mimeType),
          prompt
        ])
      });
      
      log.debug('Received response from Gemini API');
      log.verbose('Gemini API response', JSON.stringify(response));
      
      const responseText = response.text || '';
      
      return {
        text: responseText
      };
    } catch (error) {
      log.error('Error processing file with Gemini API', error);
      return {
        text: `Error processing file: ${error instanceof Error ? error.message : String(error)}`,
        isError: true
      };
    }
  }
}

```