gyoridavid/short-video-maker # codebase.md

This is page 1 of 3. Use http://codebase.md/gyoridavid/short-video-maker?lines=true&page={x} to view the full context.

# Directory Structure

```
├── __mocks__
│   └── pexels-response.json
├── .dockerignore
├── .editorconfig
├── .env.example
├── .gitignore
├── .prettierrc
├── CONTRIBUTING.md
├── docker-compose.yml
├── eslint.config.mjs
├── LICENSE
├── main-cuda.Dockerfile
├── main-tiny.Dockerfile
├── main.Dockerfile
├── package.json
├── pnpm-lock.yaml
├── postcss.config.js
├── postcss.config.mjs
├── README.md
├── remotion.config.ts
├── rest.http
├── src
│   ├── components
│   │   ├── root
│   │   │   ├── index.ts
│   │   │   └── Root.tsx
│   │   ├── types.ts
│   │   ├── utils.ts
│   │   └── videos
│   │       ├── LandscapeVideo.tsx
│   │       ├── PortraitVideo.tsx
│   │       └── Test.tsx
│   ├── config.ts
│   ├── index.ts
│   ├── logger.ts
│   ├── scripts
│   │   ├── install.ts
│   │   └── normalizeMusic.ts
│   ├── server
│   │   ├── routers
│   │   │   ├── mcp.ts
│   │   │   └── rest.ts
│   │   ├── server.ts
│   │   └── validator.ts
│   ├── short-creator
│   │   ├── libraries
│   │   │   ├── FFmpeg.ts
│   │   │   ├── Kokoro.ts
│   │   │   ├── Pexels.test.ts
│   │   │   ├── Pexels.ts
│   │   │   ├── Remotion.ts
│   │   │   └── Whisper.ts
│   │   ├── music.ts
│   │   ├── ShortCreator.test.ts
│   │   └── ShortCreator.ts
│   ├── types
│   │   └── shorts.ts
│   └── ui
│       ├── App.tsx
│       ├── components
│       │   └── Layout.tsx
│       ├── index.html
│       ├── index.tsx
│       ├── pages
│       │   ├── VideoCreator.tsx
│       │   ├── VideoDetails.tsx
│       │   └── VideoList.tsx
│       ├── public
│       │   └── index.html
│       └── styles
│           └── index.css
├── static
│   └── music
│       ├── Aurora on the Boulevard - National Sweetheart.mp3
│       ├── Baby Animals Playing - Joel Cummins.mp3
│       ├── Banjo Doops - Joel Cummins.mp3
│       ├── Buckle Up - Jeremy Korpas.mp3
│       ├── Cafecito por la Manana - Cumbia Deli.mp3
│       ├── Champion - Telecasted.mp3
│       ├── Crystaline - Quincas Moreira.mp3
│       ├── Curse of the Witches - Jimena Contreras.mp3
│       ├── Delayed Baggage - Ryan Stasik.mp3
│       ├── Final Soliloquy - Asher Fulero.mp3
│       ├── Heartbeat Of The Wind - Asher Fulero.mp3
│       ├── Honey, I Dismembered The Kids - Ezra Lipp.mp3
│       ├── Hopeful - Nat Keefe.mp3
│       ├── Hopeful Freedom - Asher Fulero.mp3
│       ├── Hopeless - Jimena Contreras.mp3
│       ├── Jetski - Telecasted.mp3
│       ├── Like It Loud - Dyalla.mp3
│       ├── Name The Time And Place - Telecasted.mp3
│       ├── Night Hunt - Jimena Contreras.mp3
│       ├── No.2 Remembering Her - Esther Abrami.mp3
│       ├── Oh Please - Telecasted.mp3
│       ├── On The Hunt - Andrew Langdon.mp3
│       ├── Organic Guitar House - Dyalla.mp3
│       ├── Phantom - Density & Time.mp3
│       ├── README.md
│       ├── Restless Heart - Jimena Contreras.mp3
│       ├── Seagull - Telecasted.mp3
│       ├── Sinister - Anno Domini Beats.mp3
│       ├── Sly Sky - Telecasted.mp3
│       ├── Touch - Anno Domini Beats.mp3
│       ├── Traversing - Godmode.mp3
│       └── Twin Engines - Jeremy Korpas.mp3
├── tailwind.config.js
├── tsconfig.build.json
├── tsconfig.json
├── vite.config.ts
└── vitest.config.ts
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
1 | node_modules
2 | dist
3 | .DS_Store
4 | .env
5 | 
```

--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------

```
1 | node_modules
2 | .git
3 | .gitignore
4 | *.md
5 | dist
6 | 
```

--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------

```
1 | {
2 |   "useTabs": false,
3 |   "bracketSpacing": true,
4 |   "tabWidth": 2
5 | }
6 | 
7 | 
```

--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------

```
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = crlf
 5 | charset = utf-8
 6 | trim_trailing_whitespace = true
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | indent_size = 2
10 | 
```

--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------

```
1 | PEXELS_API_KEY= # crucial for the project to work
2 | LOG_LEVEL=trace # trace, debug, info, warn, error, fatal, silent
3 | WHISPER_VERBOSE=true
4 | PORT=3123
5 | DEV=true # local development mode
6 | DATA_DIR_PATH= # only for docker, otherwise leave empty
7 | 
```

--------------------------------------------------------------------------------
/static/music/README.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Music Library for Shorts Creator
 2 | 
 3 | This directory contains background music tracks for use in the shorts creator project. All music files are sourced from the YouTube audio library, and are free to use under their license. You can use this audio track in any of your videos, including videos that you monetize. No attribution is required.
 4 | 
 5 | ## Music Collection
 6 | 
 7 | The music is categorized by mood to match the `MusicMoodEnum` in the project:
 8 | 
 9 | ## Mood Categories
10 | 
11 | The following moods are defined in the project's `MusicMoodEnum`:
12 | 
13 | - sad
14 | - melancholic
15 | - happy
16 | - euphoric/high
17 | - excited
18 | - chill
19 | - uneasy
20 | - angry
21 | - dark
22 | - hopeful
23 | - contemplative
24 | - funny/quirky
25 | 
26 | ## How to Add New Music
27 | 
28 | To add new music to the project:
29 | 
30 | 1. Add your MP3 file to this directory (`static/music/`)
31 | 2. Update the `src/short-creator/music.ts` file by adding a new record to the `musicList` array:
32 | 
33 | ```typescript
34 | {
35 |   file: "your-new-music-file.mp3",  // Filename of your MP3
36 |   start: 5,                        // Start time in seconds (when to begin playing)
37 |   end: 30,                          // End time in seconds (when to stop playing)
38 |   mood: MusicMoodEnum.happy,        // Mood tag for the music
39 | }
40 | ```
41 | 
42 | ## Usage
43 | 
44 | The shorts creator uses these mood tags to filter and match appropriate music with video content. Choose tags carefully to ensure proper matching between music mood and video content.
45 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | ## [📚 Join our Skool community for support, premium content and more!](https://www.skool.com/ai-agents-az/about?s1m)
  2 | 
  3 | ### Be part of a growing community and help us create more content like this
  4 | 
  5 | # Description
  6 | 
  7 | An open source automated video creation tool for generating short-form video content. Short Video Maker combines text-to-speech, automatic captions, background videos, and music to create engaging short videos from simple text inputs.
  8 | 
  9 | This project is meant to provide a free alternative to heavy GPU-power hungry video generation (and a free alternative to expensive, third-party API calls). It doesn't generate a video from scratch based on an image or an image prompt.
 10 | 
 11 | The repository was open-sourced by the [AI Agents A-Z Youtube Channel](https://www.youtube.com/channel/UCloXqLhp_KGhHBe1kwaL2Tg). We encourage you to check out the channel for more AI-related content and tutorials.
 12 | 
 13 | The server exposes an [MCP](https://github.com/modelcontextprotocol) and a REST server.
 14 | 
 15 | While the MCP server can be used with an AI Agent (like n8n) the REST endpoints provide more flexibility for video generation.
 16 | 
 17 | You can find example n8n workflows created with the REST/MCP server [in this repository](https://github.com/gyoridavid/ai_agents_az/tree/main/episode_7).
 18 | 
 19 | # TOC
 20 | 
 21 | ## Getting started
 22 | 
 23 | - [Requirements](#general-requirements)
 24 | - [How to run the server](#getting-started-1)
 25 | - [Web UI](#web-ui)
 26 | - [Tutorial](#tutorial-with-n8n)
 27 | - [Examples](#examples)
 28 | 
 29 | ## Usage
 30 | 
 31 | - [Environment variables](#environment-variables)
 32 | - [REST API](#rest-api)
 33 | - [Configuration options](#configuration-options)
 34 | - [MCP](#mcp-server)
 35 | 
 36 | ## Info
 37 | 
 38 | - [Features](#features)
 39 | - [How it works](#how-it-works)
 40 | - [Limitations](#limitations)
 41 | - [Concepts](#concepts)
 42 | - [Troubleshooting](#troubleshooting)
 43 | - [Deploying in the cloud](#deploying-to-the-cloud)
 44 | - [FAQ](#faq)
 45 | - [Dependencies](#dependencies-for-the-video-generation)
 46 | - [Contributing](#how-to-contribute)
 47 | - [License](#license)
 48 | - [Acknowledgements](#acknowledgments)
 49 | 
 50 | # Tutorial with n8n
 51 | 
 52 | [![Automated faceless video generation (n8n + MCP) with captions, background music, local and 100% free](https://img.youtube.com/vi/jzsQpn-AciM/0.jpg)](https://www.youtube.com/watch?v=jzsQpn-AciM)
 53 | 
 54 | # Examples
 55 | 
 56 | <table>
 57 |   <tr>
 58 |     <td>
 59 |       <video src="https://github.com/user-attachments/assets/1b488e7d-1b40-439d-8767-6ab51dbc0922" width="480" height="270"></video>
 60 |     </td>
 61 |     <td>
 62 |       <video src="https://github.com/user-attachments/assets/bb7ce80f-e6e1-44e5-ba4e-9b13d917f55b" width="270" height="480"></video>
 63 |     </td>
 64 | <td>
 65 |   </tr>
 66 | </table>
 67 | 
 68 | # Features
 69 | 
 70 | - Generate complete short videos from text prompts
 71 | - Text-to-speech conversion
 72 | - Automatic caption generation and styling
 73 | - Background video search and selection via Pexels
 74 | - Background music with genre/mood selection
 75 | - Serve as both REST API and Model Context Protocol (MCP) server
 76 | 
 77 | # How It Works
 78 | 
 79 | Shorts Creator takes simple text inputs and search terms, then:
 80 | 
 81 | 1. Converts text to speech using Kokoro TTS
 82 | 2. Generates accurate captions via Whisper
 83 | 3. Finds relevant background videos from Pexels
 84 | 4. Composes all elements with Remotion
 85 | 5. Renders a professional-looking short video with perfectly timed captions
 86 | 
 87 | # Limitations
 88 | 
 89 | - The project only capable generating videos with English voiceover (kokoro-js doesn’t support other languages at the moment)
 90 | - The background videos are sourced from Pexels
 91 | 
 92 | # General Requirements
 93 | 
 94 | - internet
 95 | - free pexels api key
 96 | - ≥ 3 gb free RAM, my recommendation is 4gb RAM
 97 | - ≥ 2 vCPU
 98 | - ≥ 5gb disc space
 99 | 
100 | 
101 | # Concepts
102 | 
103 | ## Scene
104 | 
105 | Each video is assembled from multiple scenes. These scenes consists of
106 | 
107 | 1. Text: Narration, the text the TTS will read and create captions from.
108 | 2. Search terms: The keywords the server should use to find videos from Pexels API. If none can be found, joker terms are being used (`nature`, `globe`, `space`, `ocean`)
109 | 
110 | # Getting started
111 | 
112 | ## Docker (recommended)
113 | 
114 | There are three docker images, for three different use cases. Generally speaking, most of the time you want to spin up the `tiny` one.
115 | 
116 | ### Tiny
117 | 
118 | - Uses the `tiny.en` whisper.cpp model
119 | - Uses the `q4` quantized kokoro model
120 | - `CONCURRENCY=1` to overcome OOM errors coming from Remotion with limited resources
121 | - `VIDEO_CACHE_SIZE_IN_BYTES=2097152000` (2gb) to overcome OOM errors coming from Remotion with limited resources
122 | 
123 | ```jsx
124 | docker run -it --rm --name short-video-maker -p 3123:3123 -e LOG_LEVEL=debug -e PEXELS_API_KEY= gyoridavid/short-video-maker:latest-tiny
125 | ```
126 | 
127 | ### Normal
128 | 
129 | - Uses the `base.en` whisper.cpp model
130 | - Uses the `fp32` kokoro model
131 | - `CONCURRENCY=1` to overcome OOM errors coming from Remotion with limited resources
132 | - `VIDEO_CACHE_SIZE_IN_BYTES=2097152000` (2gb) to overcome OOM errors coming from Remotion with limited resources
133 | 
134 | ```jsx
135 | docker run -it --rm --name short-video-maker -p 3123:3123 -e LOG_LEVEL=debug -e PEXELS_API_KEY= gyoridavid/short-video-maker:latest
136 | ```
137 | 
138 | ### Cuda
139 | 
140 | If you own an Nvidia GPU and you want use a larger whisper model with GPU acceleration, you can use the CUDA optimised Docker image.
141 | 
142 | - Uses the `medium.en` whisper.cpp model (with GPU acceleration)
143 | - Uses `fp32` kokoro model
144 | - `CONCURRENCY=1` to overcome OOM errors coming from Remotion with limited resources
145 | - `VIDEO_CACHE_SIZE_IN_BYTES=2097152000` (2gb) to overcome OOM errors coming from Remotion with limited resources
146 | 
147 | ```jsx
148 | docker run -it --rm --name short-video-maker -p 3123:3123 -e LOG_LEVEL=debug -e PEXELS_API_KEY= --gpus=all gyoridavid/short-video-maker:latest-cuda
149 | ```
150 | 
151 | ## Docker compose
152 | 
153 | You might use Docker Compose to run n8n or other services, and you want to combine them. Make sure you add the shared network to the service configuration.
154 | 
155 | ```bash
156 | version: "3"
157 | 
158 | services:
159 |   short-video-maker:
160 |     image: gyoridavid/short-video-maker:latest-tiny
161 |     environment:
162 |       - LOG_LEVEL=debug
163 |       - PEXELS_API_KEY=
164 |     ports:
165 |       - "3123:3123"
166 |     volumes:
167 | 	    - ./videos:/app/data/videos # expose the generated videos
168 | 
169 | ```
170 | 
171 | If you are using the [Self-hosted AI starter kit](https://github.com/n8n-io/self-hosted-ai-starter-kit) you want to add `networks: ['demo']` to the\*\* `short-video-maker` service so you can reach it with http://short-video-maker:3123 in n8n.
172 | 
173 | # NPM
174 | 
175 | While Docker is the recommended way to run the project, you can run it with npm or npx.
176 | On top of the general requirements, the following are necessary to run the server.
177 | 
178 | ## Supported platforms
179 | 
180 | - Ubuntu ≥ 22.04 (libc 2.5 for Whisper.cpp)
181 |   - Required packages: `git wget cmake ffmpeg curl make libsdl2-dev libnss3 libdbus-1-3 libatk1.0-0 libgbm-dev libasound2 libxrandr2 libxkbcommon-dev libxfixes3 libxcomposite1 libxdamage1 libatk-bridge2.0-0 libpango-1.0-0 libcairo2 libcups2`
182 | - Mac OS
183 |   - ffmpeg (`brew install ffmpeg`)
184 |   - node.js (tested on 22+)
185 | 
186 | Windows is **NOT** supported at the moment (whisper.cpp installation fails occasionally).
187 | 
188 | # Web UI
189 | 
190 | @mushitori made a Web UI to generate the videos from your browser.
191 | 
192 | <table>
193 |   <tr>
194 |     <td>
195 |       <img width="1088" alt="Screenshot 2025-05-12 at 1 45 11 PM" src="https://github.com/user-attachments/assets/2ab64aea-f639-41b0-bd19-2fcf73bb1a3d" />
196 |     </td>
197 |     <td>
198 |       <img width="1075" alt="Screenshot 2025-05-12 at 1 45 44 PM" src="https://github.com/user-attachments/assets/0ff568fe-ddcb-4dad-ae62-2640290aef1e" />
199 |     </td>
200 |     <td>
201 |       <img width="1083" alt="Screenshot 2025-05-12 at 1 45 51 PM" src="https://github.com/user-attachments/assets/d3c1c826-3cb3-4313-b17c-605ff612fb63" />
202 |     </td>
203 |     <td>
204 |       <img width="1070" alt="Screenshot 2025-05-12 at 1 46 42 PM" src="https://github.com/user-attachments/assets/18edb1a0-9fc2-48b3-8896-e919e7dc57ff" />
205 |     </td>
206 |   </tr>
207 | </table>
208 | 
209 | You can load it on http://localhost:3123
210 | 
211 | # Environment variables
212 | 
213 | ## 🟢 Configuration
214 | 
215 | | key             | description                                                     | default |
216 | | --------------- | --------------------------------------------------------------- | ------- |
217 | | PEXELS_API_KEY  | [your (free) Pexels API key](https://www.pexels.com/api/)       |         |
218 | | LOG_LEVEL       | pino log level                                                  | info    |
219 | | WHISPER_VERBOSE | whether the output of whisper.cpp should be forwarded to stdout | false   |
220 | | PORT            | the port the server will listen on                              | 3123    |
221 | 
222 | ## ⚙️ System configuration
223 | 
224 | | key                       | description                                                                                                                                                                                                                                                                           | default                                                     |
225 | | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------- |
226 | | KOKORO_MODEL_PRECISION    | The size of the Kokoro model to use. Valid options are `fp32`, `fp16`, `q8`, `q4`, `q4f16`                                                                                                                                                                                            | depends, see the descriptions of the docker images above ^^ |
227 | | CONCURRENCY               | [concurrency refers to how many browser tabs are opened in parallel during a render. Each Chrome tab renders web content and then screenshots it.](https://www.remotion.dev/docs/terminology/concurrency). Tweaking this value helps with running the project with limited resources. | depends, see the descriptions of the docker images above ^^ |
228 | | VIDEO_CACHE_SIZE_IN_BYTES | Cache for [<OffthreadVideo>](https://remotion.dev/docs/offthreadvideo) frames in Remotion. Tweaking this value helps with running the project with limited resources.                                                                                                                 | depends, see the descriptions of the docker images above ^^ |
229 | 
230 | ## ⚠️ Danger zone
231 | 
232 | | key           | description                                                                                                                                                                              | default                                                                                              |
233 | | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- |
234 | | WHISPER_MODEL | Which whisper.cpp model to use. Valid options are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large-v1`, `large-v2`, `large-v3`, `large-v3-turbo` | Depends, see the descriptions of the docker images above. For npm, the default option is `medium.en` |
235 | | DATA_DIR_PATH | the data directory of the project                                                                                                                                                        | `~/.ai-agents-az-video-generator` with npm, `/app/data` in the Docker images                         |
236 | | DOCKER        | whether the project is running in a Docker container                                                                                                                                     | `true` for the docker images, otherwise `false`                                                      |
237 | | DEV           | guess! :)                                                                                                                                                                                | `false`                                                                                              |
238 | 
239 | # Configuration options
240 | 
241 | | key                    | description                                                                                                    | default    |
242 | | ---------------------- | -------------------------------------------------------------------------------------------------------------- | ---------- |
243 | | paddingBack            | The end screen, for how long the video should keep playing after the narration has finished (in milliseconds). | 0          |
244 | | music                  | The mood of the background music. Get the available options from the GET `/api/music-tags` endpoint.           | random     |
245 | | captionPosition        | The position where the captions should be rendered. Possible options: `top`, `center`, `bottom`. Default value | `bottom`   |
246 | | captionBackgroundColor | The background color of the active caption item.                                                               | `blue`     |
247 | | voice                  | The Kokoro voice.                                                                                              | `af_heart` |
248 | | orientation            | The video orientation. Possible options are `portrait` and `landscape`                                         | `portrait` |
249 | | musicVolume            | Set the volume of the background music. Possible options are `low` `medium` `high` and `muted`                 | `high`     |
250 | 
251 | # Usage
252 | 
253 | ## MCP server
254 | 
255 | ## Server URLs
256 | 
257 | `/mcp/sse`
258 | 
259 | `/mcp/messages`
260 | 
261 | ## Available tools
262 | 
263 | - `create-short-video` Creates a short video - the LLM will figure out the right configuration. If you want to use specific configuration, you need to specify those in you prompt.
264 | - `get-video-status` Somewhat useless, it’s meant for checking the status of the video, but since the AI agents aren’t really good with the concept of time, you’ll probably will end up using the REST API for that anyway.
265 | 
266 | # REST API
267 | 
268 | ### GET `/health`
269 | 
270 | Healthcheck endpoint
271 | 
272 | ```bash
273 | curl --location 'localhost:3123/health'
274 | ```
275 | 
276 | ```bash
277 | {
278 |     "status": "ok"
279 | }
280 | ```
281 | 
282 | ### POST `/api/short-video`
283 | 
284 | ```bash
285 | curl --location 'localhost:3123/api/short-video' \
286 | --header 'Content-Type: application/json' \
287 | --data '{
288 |     "scenes": [
289 |       {
290 |         "text": "Hello world!",
291 |         "searchTerms": ["river"]
292 |       }
293 |     ],
294 |     "config": {
295 |       "paddingBack": 1500,
296 |       "music": "chill"
297 |     }
298 | }'
299 | ```
300 | 
301 | ```bash
302 | {
303 |     "videoId": "cma9sjly700020jo25vwzfnv9"
304 | }
305 | ```
306 | 
307 | ### GET `/api/short-video/{id}/status`
308 | 
309 | ```bash
310 | curl --location 'localhost:3123/api/short-video/cm9ekme790000hysi5h4odlt1/status'
311 | ```
312 | 
313 | ```bash
314 | {
315 |     "status": "ready"
316 | }
317 | ```
318 | 
319 | ### GET `/api/short-video/{id}`
320 | 
321 | ```bash
322 | curl --location 'localhost:3123/api/short-video/cm9ekme790000hysi5h4odlt1'
323 | ```
324 | 
325 | Response: the binary data of the video.
326 | 
327 | ### GET `/api/short-videos`
328 | 
329 | ```bash
330 | curl --location 'localhost:3123/api/short-videos'
331 | ```
332 | 
333 | ```bash
334 | {
335 |     "videos": [
336 |         {
337 |             "id": "cma9wcwfc0000brsi60ur4lib",
338 |             "status": "processing"
339 |         }
340 |     ]
341 | }
342 | ```
343 | 
344 | ### DELETE `/api/short-video/{id}`
345 | 
346 | ```bash
347 | curl --location --request DELETE 'localhost:3123/api/short-video/cma9wcwfc0000brsi60ur4lib'
348 | ```
349 | 
350 | ```bash
351 | {
352 |     "success": true
353 | }
354 | ```
355 | 
356 | ### GET `/api/voices`
357 | 
358 | ```bash
359 | curl --location 'localhost:3123/api/voices'
360 | ```
361 | 
362 | ```bash
363 | [
364 |     "af_heart",
365 |     "af_alloy",
366 |     "af_aoede",
367 |     "af_bella",
368 |     "af_jessica",
369 |     "af_kore",
370 |     "af_nicole",
371 |     "af_nova",
372 |     "af_river",
373 |     "af_sarah",
374 |     "af_sky",
375 |     "am_adam",
376 |     "am_echo",
377 |     "am_eric",
378 |     "am_fenrir",
379 |     "am_liam",
380 |     "am_michael",
381 |     "am_onyx",
382 |     "am_puck",
383 |     "am_santa",
384 |     "bf_emma",
385 |     "bf_isabella",
386 |     "bm_george",
387 |     "bm_lewis",
388 |     "bf_alice",
389 |     "bf_lily",
390 |     "bm_daniel",
391 |     "bm_fable"
392 | ]
393 | ```
394 | 
395 | ### GET `/api/music-tags`
396 | 
397 | ```bash
398 | curl --location 'localhost:3123/api/music-tags'
399 | ```
400 | 
401 | ```bash
402 | [
403 |     "sad",
404 |     "melancholic",
405 |     "happy",
406 |     "euphoric/high",
407 |     "excited",
408 |     "chill",
409 |     "uneasy",
410 |     "angry",
411 |     "dark",
412 |     "hopeful",
413 |     "contemplative",
414 |     "funny/quirky"
415 | ]
416 | ```
417 | 
418 | # Troubleshooting
419 | 
420 | ## Docker
421 | 
422 | The server needs at least 3gb free memory. Make sure to allocate enough RAM to Docker.
423 | 
424 | If you are running the server from Windows and via wsl2, you need to set the resource limits from the [wsl utility 2](https://learn.microsoft.com/en-us/windows/wsl/wsl-config#configure-global-options-with-wslconfig) - otherwise set it from Docker Desktop. (Ubuntu is not restricting the resources unless specified with the run command).
425 | 
426 | ## NPM
427 | 
428 | Make sure all the necessary packages are installed.
429 | 
430 | # n8n
431 | 
432 | Setting up the MCP (or REST) server depends on how you run n8n and the server. Please follow the examples from the matrix below.
433 | 
434 | |                                                   | n8n is running locally, using `n8n start`              | n8n is running locally using Docker                                                                                                                                                                                           | n8n is running in the cloud                            |
435 | | ------------------------------------------------- | ------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------ |
436 | | `short-video-maker` is running in Docker, locally | `http://localhost:3123`                                | It depends. You can technically use `http://host.docker.internal:3123` as it points to the host, but you could configure to use the same network and use the service name to communicate like `http://short-video-maker:3123` | won’t work - deploy `short-video-maker` to the cloud   |
437 | | `short-video-maker` is running with npm/npx       | `http://localhost:3123`                                | `http://host.docker.internal:3123`                                                                                                                                                                                            | won’t work - deploy `short-video-maker` to the cloud   |
438 | | `short-video-maker` is running in the cloud       | You should use your IP address `http://{YOUR_IP}:3123` | You should use your IP address `http://{YOUR_IP}:3123`                                                                                                                                                                        | You should use your IP address `http://{YOUR_IP}:3123` |
439 | 
440 | # Deploying to the cloud
441 | 
442 | While each VPS provider is different, and it’s impossible to provide configuration to all of them, here are some tips.
443 | 
444 | - Use Ubuntu ≥ 22.04
445 | - Have ≥ 4gb RAM, ≥ 2vCPUs and ≥5gb storage
446 | - Use [pm2](https://pm2.keymetrics.io/) to run/manage the server
447 | - Put the environment variables to the `.bashrc` file (or similar)
448 | 
449 | # FAQ
450 | 
451 | ## Can I use other languages? (French, German etc.)
452 | 
453 | Unfortunately, it’s not possible at the moment. Kokoro-js only supports English.
454 | 
455 | ## Can I pass in images and videos and can it stitch it together
456 | 
457 | No
458 | 
459 | ## Should I run the project with `npm` or `docker`?
460 | 
461 | Docker is the recommended way to run the project.
462 | 
463 | ## How much GPU is being used for the video generation?
464 | 
465 | Honestly, not a lot - only whisper.cpp can be accelerated.
466 | 
467 | Remotion is CPU-heavy, and [Kokoro-js](https://github.com/hexgrad/kokoro) runs on the CPU.
468 | 
469 | ## Is there a UI that I can use to generate the videos
470 | 
471 | No (t yet)
472 | 
473 | ## Can I select different source for the videos than Pexels, or provide my own video
474 | 
475 | No
476 | 
477 | ## Can the project generate videos from images?
478 | 
479 | No
480 | 
481 | ## Dependencies for the video generation
482 | 
483 | | Dependency                                             | Version  | License                                                                           | Purpose                         |
484 | | ------------------------------------------------------ | -------- | --------------------------------------------------------------------------------- | ------------------------------- |
485 | | [Remotion](https://remotion.dev/)                      | ^4.0.286 | [Remotion License](https://github.com/remotion-dev/remotion/blob/main/LICENSE.md) | Video composition and rendering |
486 | | [Whisper CPP](https://github.com/ggml-org/whisper.cpp) | v1.5.5   | MIT                                                                               | Speech-to-text for captions     |
487 | | [FFmpeg](https://ffmpeg.org/)                          | ^2.1.3   | LGPL/GPL                                                                          | Audio/video manipulation        |
488 | | [Kokoro.js](https://www.npmjs.com/package/kokoro-js)   | ^1.2.0   | MIT                                                                               | Text-to-speech generation       |
489 | | [Pexels API](https://www.pexels.com/api/)              | N/A      | [Pexels Terms](https://www.pexels.com/license/)                                   | Background videos               |
490 | 
491 | ## How to contribute?
492 | 
493 | PRs are welcome.
494 | See the [CONTRIBUTING.md](CONTRIBUTING.md) file for instructions on setting up a local development environment.
495 | 
496 | ## License
497 | 
498 | This project is licensed under the [MIT License](LICENSE).
499 | 
500 | ## Acknowledgments
501 | 
502 | - ❤️ [Remotion](https://remotion.dev/) for programmatic video generation
503 | - ❤️ [Whisper](https://github.com/ggml-org/whisper.cpp) for speech-to-text
504 | - ❤️ [Pexels](https://www.pexels.com/) for video content
505 | - ❤️ [FFmpeg](https://ffmpeg.org/) for audio/video processing
506 | - ❤️ [Kokoro](https://github.com/hexgrad/kokoro) for TTS
507 | 
```

--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Contributing to Shorts Creator
 2 | 
 3 | ## How to setup the development environment
 4 | 
 5 | 1. Clone the repository
 6 | 
 7 |    ```bash
 8 |    git clone [email protected]:gyoridavid/short-video-maker.git
 9 |    cd shorts-video-maker
10 |    ```
11 | 
12 | 2. Install dependencies
13 | 
14 |    ```bash
15 |    pnpm install
16 |    ```
17 | 
18 | 3. Copy `.env.example` to `.env` and set the right environment variables.
19 | 
20 | 4. Start the server
21 |    ```bash
22 |    pnpm dev
23 |    ```
24 | 
25 | ## How to preview the videos and debug the rendering process
26 | 
27 | You can use Remotion Studio to preview videos. Make sure to update the template if the underlying data structure changes.
28 | 
29 | ```bash
30 | npx remotion studio
31 | ```
32 | 
```

--------------------------------------------------------------------------------
/postcss.config.mjs:
--------------------------------------------------------------------------------

```
1 | export default {
2 |   plugins: {
3 |     "@tailwindcss/postcss": {},
4 |   },
5 | };
6 | 
```

--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------

```
1 | import { config } from "@remotion/eslint-config-flat";
2 | 
3 | export default config;
4 | 
```

--------------------------------------------------------------------------------
/postcss.config.js:
--------------------------------------------------------------------------------

```javascript
1 | module.exports = {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | } 
```

--------------------------------------------------------------------------------
/src/logger.ts:
--------------------------------------------------------------------------------

```typescript
1 | import { logger } from "./config";
2 | 
3 | export default logger;
4 | export { logger };
5 | 
```

--------------------------------------------------------------------------------
/src/components/root/index.ts:
--------------------------------------------------------------------------------

```typescript
1 | import { registerRoot } from "remotion";
2 | import { RemotionRoot } from "./Root";
3 | 
4 | registerRoot(RemotionRoot);
5 | 
```

--------------------------------------------------------------------------------
/vitest.config.ts:
--------------------------------------------------------------------------------

```typescript
1 | import { defineConfig } from "vitest/config";
2 | 
3 | export default defineConfig({
4 |   test: {
5 |     // ...
6 |   },
7 | });
8 | 
```

--------------------------------------------------------------------------------
/tsconfig.build.json:
--------------------------------------------------------------------------------

```json
1 | {
2 |   "extends": "./tsconfig.json",
3 |   "compilerOptions": {
4 |     "outDir": "./dist"
5 |   },
6 |   "include": ["src/**/*"],
7 |   "exclude": ["**/*.test.ts", "src/ui"]
8 | }
9 | 
```

--------------------------------------------------------------------------------
/tailwind.config.js:
--------------------------------------------------------------------------------

```javascript
 1 | /** @type {import('tailwindcss').Config} */
 2 | module.exports = {
 3 |   content: [
 4 |     "./src/ui/**/*.{js,jsx,ts,tsx}",
 5 |   ],
 6 |   theme: {
 7 |     extend: {},
 8 |   },
 9 |   plugins: [],
10 | } 
```

--------------------------------------------------------------------------------
/src/components/types.ts:
--------------------------------------------------------------------------------

```typescript
 1 | export enum AvailableComponentsEnum {
 2 |   PortraitVideo = "ShortVideo",
 3 |   LandscapeVideo = "LandscapeVideo",
 4 | }
 5 | export type OrientationConfig = {
 6 |   width: number;
 7 |   height: number;
 8 |   component: AvailableComponentsEnum;
 9 | };
10 | 
```

--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------

```yaml
 1 | version: "3"
 2 | 
 3 | services:
 4 |   short-creator:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: main.Dockerfile
 8 |     env_file:
 9 |       - .env
10 |     environment:
11 |       - DEV=false
12 |     ports:
13 |       - "3123:3123"
14 |     entrypoint: ["node", "dist/index.js"]
15 | 
```

--------------------------------------------------------------------------------
/src/ui/index.tsx:
--------------------------------------------------------------------------------

```typescript
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom/client';
 3 | import App from './App';
 4 | import './styles/index.css';
 5 | 
 6 | const root = ReactDOM.createRoot(
 7 |   document.getElementById('root') as HTMLElement
 8 | );
 9 | 
10 | root.render(
11 |   <React.StrictMode>
12 |     <App />
13 |   </React.StrictMode>
14 | ); 
```

--------------------------------------------------------------------------------
/src/components/videos/Test.tsx:
--------------------------------------------------------------------------------

```typescript
 1 | import { AbsoluteFill, Sequence } from "remotion";
 2 | 
 3 | export const TestVideo: React.FC = () => {
 4 |   return (
 5 |     <AbsoluteFill>
 6 |       <AbsoluteFill>
 7 |         <AbsoluteFill>
 8 |           <h1>Hello</h1>
 9 |         </AbsoluteFill>
10 |         <Sequence from={10}>
11 |           <h1 style={{ marginTop: "60px" }}>World</h1>
12 |         </Sequence>
13 |       </AbsoluteFill>
14 |     </AbsoluteFill>
15 |   );
16 | };
17 | 
```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "NodeNext",
 5 |     "moduleResolution": "NodeNext",
 6 |     "esModuleInterop": true,
 7 |     "strict": true,
 8 |     "skipLibCheck": true,
 9 |     "forceConsistentCasingInFileNames": true,
10 |     "outDir": "dist",
11 |     "rootDir": "src",
12 |     "declaration": true,
13 |     "jsx": "react-jsx"
14 |   },
15 |   "exclude": [
16 |     "remotion.config.ts",
17 |     "node_modules",
18 |     "dist",
19 |     "vitest.config.ts",
20 |     "src/ui"
21 |   ]
22 | }
23 | 
```

--------------------------------------------------------------------------------
/src/ui/public/index.html:
--------------------------------------------------------------------------------

```html
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 6 |     <meta name="theme-color" content="#000000" />
 7 |     <meta
 8 |       name="description"
 9 |       content="Short Video Maker - Create amazing short videos"
10 |     />
11 |     <title>Short Video Maker</title>
12 |   </head>
13 |   <body>
14 |     <noscript>You need to enable JavaScript to run this app.</noscript>
15 |     <div id="root"></div>
16 |   </body>
17 | </html> 
```

--------------------------------------------------------------------------------
/remotion.config.ts:
--------------------------------------------------------------------------------

```typescript
 1 | // See all configuration options: https://remotion.dev/docs/config
 2 | // Each option also is available as a CLI flag: https://remotion.dev/docs/cli
 3 | 
 4 | // Note: When using the Node.JS APIs, the config file doesn't apply. Instead, pass options directly to the APIs
 5 | 
 6 | import { Config } from "@remotion/cli/config";
 7 | 
 8 | Config.setVideoImageFormat("jpeg");
 9 | Config.setOverwriteOutput(true);
10 | Config.setPublicDir("static/music");
11 | Config.setEntryPoint("src/components/root/index.ts");
12 | 
```

--------------------------------------------------------------------------------
/src/ui/index.html:
--------------------------------------------------------------------------------

```html
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 6 |     <meta name="theme-color" content="#000000" />
 7 |     <meta
 8 |       name="description"
 9 |       content="Short Video Maker - Create amazing short videos"
10 |     />
11 |     <title>Short Video Maker</title>
12 |   </head>
13 |   <body>
14 |     <noscript>You need to enable JavaScript to run this app.</noscript>
15 |     <div id="root"></div>
16 |     <script type="module" src="./index.tsx"></script>
17 |   </body>
18 | </html> 
```

--------------------------------------------------------------------------------
/src/ui/App.tsx:
--------------------------------------------------------------------------------

```typescript
 1 | import React from 'react';
 2 | import { BrowserRouter as Router, Routes, Route } from 'react-router-dom';
 3 | import VideoList from './pages/VideoList';
 4 | import VideoCreator from './pages/VideoCreator';
 5 | import VideoDetails from './pages/VideoDetails';
 6 | import Layout from './components/Layout';
 7 | 
 8 | const App: React.FC = () => {
 9 |   return (
10 |     <Router>
11 |       <Layout>
12 |         <Routes>
13 |           <Route path="/" element={<VideoList />} />
14 |           <Route path="/create" element={<VideoCreator />} />
15 |           <Route path="/video/:videoId" element={<VideoDetails />} />
16 |         </Routes>
17 |       </Layout>
18 |     </Router>
19 |   );
20 | };
21 | 
22 | export default App; 
```

--------------------------------------------------------------------------------
/vite.config.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import { defineConfig } from 'vite';
 2 | import react from '@vitejs/plugin-react';
 3 | import path from 'path';
 4 | 
 5 | export default defineConfig({
 6 |   plugins: [react()],
 7 |   root: 'src/ui',
 8 |   build: {
 9 |     outDir: path.resolve(__dirname, 'dist/ui'),
10 |     emptyOutDir: true,
11 |     rollupOptions: {
12 |       input: {
13 |         main: path.resolve(__dirname, 'src/ui/index.html'),
14 |       },
15 |     },
16 |   },
17 |   resolve: {
18 |     alias: {
19 |       '@': path.resolve(__dirname, './src/ui'),
20 |     },
21 |   },
22 |   server: {
23 |     port: 3000,
24 |     proxy: {
25 |       '/api': {
26 |         target: 'http://localhost:3123',
27 |         changeOrigin: true,
28 |       },
29 |       '/mcp': {
30 |         target: 'http://localhost:3123',
31 |         changeOrigin: true,
32 |       },
33 |     },
34 |   },
35 | }); 
```

--------------------------------------------------------------------------------
/src/ui/styles/index.css:
--------------------------------------------------------------------------------

```css
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | /* Base styles */
 6 | body {
 7 |   margin: 0;
 8 |   padding: 0;
 9 |   font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif;
10 |   -webkit-font-smoothing: antialiased;
11 |   -moz-osx-font-smoothing: grayscale;
12 | }
13 | 
14 | a {
15 |   color: inherit;
16 |   text-decoration: none;
17 | }
18 | 
19 | /* Scrollbar styles */
20 | ::-webkit-scrollbar {
21 |   width: 8px;
22 |   height: 8px;
23 | }
24 | 
25 | ::-webkit-scrollbar-track {
26 |   background: #f1f1f1;
27 | }
28 | 
29 | ::-webkit-scrollbar-thumb {
30 |   background: #888;
31 |   border-radius: 4px;
32 | }
33 | 
34 | ::-webkit-scrollbar-thumb:hover {
35 |   background: #555;
36 | }
37 | 
38 | /* Focus styles for accessibility */
39 | :focus-visible {
40 |   outline: 2px solid #1976d2;
41 | }
42 | 
43 | /* Transitions */
44 | .fade-enter {
45 |   opacity: 0;
46 | }
47 | 
48 | .fade-enter-active {
49 |   opacity: 1;
50 |   transition: opacity 300ms;
51 | }
52 | 
53 | .fade-exit {
54 |   opacity: 1;
55 | }
56 | 
57 | .fade-exit-active {
58 |   opacity: 0;
59 |   transition: opacity 300ms;
60 | } 
```

--------------------------------------------------------------------------------
/src/scripts/install.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import { ensureBrowser } from "@remotion/renderer";
 2 | 
 3 | import { logger } from "../logger";
 4 | import { Kokoro } from "../short-creator/libraries/Kokoro";
 5 | import { MusicManager } from "../short-creator/music";
 6 | import { Config } from "../config";
 7 | import { Whisper } from "../short-creator/libraries/Whisper";
 8 | 
 9 | // runs in docker
10 | export async function install() {
11 |   const config = new Config();
12 | 
13 |   logger.info("Installing dependencies...");
14 |   logger.info("Installing Kokoro...");
15 |   await Kokoro.init(config.kokoroModelPrecision);
16 |   logger.info("Installing browser shell...");
17 |   await ensureBrowser();
18 |   logger.info("Installing whisper.cpp");
19 |   await Whisper.init(config);
20 |   logger.info("Installing dependencies complete");
21 | 
22 |   logger.info("Ensuring the music files exist...");
23 |   const musicManager = new MusicManager(config);
24 |   try {
25 |     musicManager.ensureMusicFilesExist();
26 |   } catch (error: unknown) {
27 |     logger.error(error, "Missing music files");
28 |     process.exit(1);
29 |   }
30 | }
31 | 
32 | install()
33 |   .then(() => {
34 |     logger.info("Installation complete");
35 |   })
36 |   .catch((error: unknown) => {
37 |     logger.error(error, "Installation failed");
38 |   });
39 | 
```

--------------------------------------------------------------------------------
/src/server/validator.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import { createShortInput, CreateShortInput } from "../types/shorts";
 2 | import { logger } from "../logger";
 3 | import { ZodError } from "zod";
 4 | 
 5 | export interface ValidationErrorResult {
 6 |   message: string;
 7 |   missingFields: Record<string, string>;
 8 | }
 9 | 
10 | export function validateCreateShortInput(input: object): CreateShortInput {
11 |   const validated = createShortInput.safeParse(input);
12 |   logger.info({ validated }, "Validated input");
13 | 
14 |   if (validated.success) {
15 |     return validated.data;
16 |   }
17 | 
18 |   // Process the validation errors
19 |   const errorResult = formatZodError(validated.error);
20 | 
21 |   throw new Error(
22 |     JSON.stringify({
23 |       message: errorResult.message,
24 |       missingFields: errorResult.missingFields,
25 |     }),
26 |   );
27 | }
28 | 
29 | function formatZodError(error: ZodError): ValidationErrorResult {
30 |   const missingFields: Record<string, string> = {};
31 | 
32 |   // Extract all the errors into a human-readable format
33 |   error.errors.forEach((err) => {
34 |     const path = err.path.join(".");
35 |     missingFields[path] = err.message;
36 |   });
37 | 
38 |   // Create a human-readable message
39 |   const errorPaths = Object.keys(missingFields);
40 |   let message = `Validation failed for ${errorPaths.length} field(s): `;
41 |   message += errorPaths.join(", ");
42 | 
43 |   return {
44 |     message,
45 |     missingFields,
46 |   };
47 | }
48 | 
```

--------------------------------------------------------------------------------
/src/short-creator/libraries/Pexels.test.ts:
--------------------------------------------------------------------------------

```typescript
 1 | process.env.LOG_LEVEL = "debug";
 2 | 
 3 | import nock from "nock";
 4 | import { PexelsAPI } from "./Pexels";
 5 | import { test, assert, expect } from "vitest";
 6 | import fs from "fs-extra";
 7 | import path from "path";
 8 | import { OrientationEnum } from "../../types/shorts";
 9 | 
10 | test("test pexels", async () => {
11 |   const mockResponse = fs.readFileSync(
12 |     path.resolve("__mocks__/pexels-response.json"),
13 |     "utf-8",
14 |   );
15 |   nock("https://api.pexels.com")
16 |     .get(/videos\/search/)
17 |     .reply(200, mockResponse);
18 |   const pexels = new PexelsAPI("asdf");
19 |   const video = await pexels.findVideo(["dog"], 2.4, []);
20 |   console.log(video);
21 |   assert.isObject(video, "Video should be an object");
22 | });
23 | 
24 | test("should time out", async () => {
25 |   nock("https://api.pexels.com")
26 |     .get(/videos\/search/)
27 |     .delay(1000)
28 |     .times(30)
29 |     .reply(200, {});
30 |   expect(async () => {
31 |     const pexels = new PexelsAPI("asdf");
32 |     await pexels.findVideo(["dog"], 2.4, [], OrientationEnum.portrait, 100);
33 |   }).rejects.toThrow(
34 |     expect.objectContaining({
35 |       name: "TimeoutError",
36 |     }),
37 |   );
38 | });
39 | 
40 | test("should retry 3 times", async () => {
41 |   nock("https://api.pexels.com")
42 |     .get(/videos\/search/)
43 |     .delay(1000)
44 |     .times(2)
45 |     .reply(200, {});
46 |   const mockResponse = fs.readFileSync(
47 |     path.resolve("__mocks__/pexels-response.json"),
48 |     "utf-8",
49 |   );
50 |   nock("https://api.pexels.com")
51 |     .get(/videos\/search/)
52 |     .reply(200, mockResponse);
53 | 
54 |   const pexels = new PexelsAPI("asdf");
55 |   const video = await pexels.findVideo(["dog"], 2.4, []);
56 |   console.log(video);
57 |   assert.isObject(video, "Video should be an object");
58 | });
59 | 
```

--------------------------------------------------------------------------------
/src/scripts/normalizeMusic.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import ffmpeg from "fluent-ffmpeg";
 2 | import path from "path";
 3 | import("@ffmpeg-installer/ffmpeg");
 4 | import fs from "fs-extra";
 5 | 
 6 | import { logger } from "../logger";
 7 | import { MusicManager } from "../short-creator/music";
 8 | import { Config } from "../config";
 9 | 
10 | async function normalize(inputPath: string, outputPath: string) {
11 |   return new Promise((resolve, reject) => {
12 |     ffmpeg()
13 |       .input(inputPath)
14 |       .audioCodec("libmp3lame")
15 |       .audioBitrate(96)
16 |       .audioChannels(2)
17 |       .audioFrequency(44100)
18 |       .audioFilter("loudnorm,volume=0.1")
19 |       .toFormat("mp3")
20 |       .on("error", (err) => {
21 |         logger.error(err, "Error normalizing audio:");
22 |         reject(err);
23 |       })
24 |       .save(outputPath)
25 |       .on("end", () => {
26 |         logger.debug("Audio normalization complete");
27 |         resolve(outputPath);
28 |       });
29 |   });
30 | }
31 | 
32 | export async function normalizeMusic() {
33 |   const config = new Config();
34 |   const musicManager = new MusicManager(config);
35 |   try {
36 |     musicManager.ensureMusicFilesExist();
37 |   } catch (error: unknown) {
38 |     logger.error(error, "Missing music files");
39 |     process.exit(1);
40 |   }
41 |   const musicFiles = musicManager.musicList();
42 |   const normalizedDir = path.join(config.musicDirPath, "normalized");
43 |   fs.ensureDirSync(normalizedDir);
44 |   for (const musicFile of musicFiles) {
45 |     const inputPath = path.join(config.musicDirPath, musicFile.file);
46 |     const outputPath = path.join(normalizedDir, musicFile.file);
47 |     logger.debug({ inputPath, outputPath }, "Normalizing music file");
48 |     await normalize(inputPath, outputPath);
49 |   }
50 | }
51 | 
52 | normalizeMusic()
53 |   .then(() => {
54 |     logger.info(
55 |       "Music normalization completed successfully - make sure to replace the original files with the normalized ones",
56 |     );
57 |   })
58 |   .catch((error: unknown) => {
59 |     logger.error(error, "Error normalizing music files");
60 |   });
61 | 
```

--------------------------------------------------------------------------------
/src/server/server.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import http from "http";
 2 | import express from "express";
 3 | import type {
 4 |   Request as ExpressRequest,
 5 |   Response as ExpressResponse,
 6 | } from "express";
 7 | import path from "path";
 8 | import { ShortCreator } from "../short-creator/ShortCreator";
 9 | import { APIRouter } from "./routers/rest";
10 | import { MCPRouter } from "./routers/mcp";
11 | import { logger } from "../logger";
12 | import { Config } from "../config";
13 | 
14 | export class Server {
15 |   private app: express.Application;
16 |   private config: Config;
17 | 
18 |   constructor(config: Config, shortCreator: ShortCreator) {
19 |     this.config = config;
20 |     this.app = express();
21 | 
22 |     // add healthcheck endpoint
23 |     this.app.get("/health", (req: ExpressRequest, res: ExpressResponse) => {
24 |       res.status(200).json({ status: "ok" });
25 |     });
26 | 
27 |     const apiRouter = new APIRouter(config, shortCreator);
28 |     const mcpRouter = new MCPRouter(shortCreator);
29 |     this.app.use("/api", apiRouter.router);
30 |     this.app.use("/mcp", mcpRouter.router);
31 | 
32 |     // Serve static files from the UI build
33 |     this.app.use(express.static(path.join(__dirname, "../../dist/ui")));
34 |     this.app.use(
35 |       "/static",
36 |       express.static(path.join(__dirname, "../../static")),
37 |     );
38 | 
39 |     // Serve the React app for all other routes (must be last)
40 |     this.app.get("*", (req: ExpressRequest, res: ExpressResponse) => {
41 |       res.sendFile(path.join(__dirname, "../../dist/ui/index.html"));
42 |     });
43 |   }
44 | 
45 |   public start(): http.Server {
46 |     const server = this.app.listen(this.config.port, () => {
47 |       logger.info(
48 |         { port: this.config.port, mcp: "/mcp", api: "/api" },
49 |         "MCP and API server is running",
50 |       );
51 |       logger.info(
52 |         `UI server is running on http://localhost:${this.config.port}`,
53 |       );
54 |     });
55 | 
56 |     server.on("error", (error: Error) => {
57 |       logger.error(error, "Error starting server");
58 |     });
59 | 
60 |     return server;
61 |   }
62 | 
63 |   public getApp() {
64 |     return this.app;
65 |   }
66 | }
67 | 
```

--------------------------------------------------------------------------------
/src/ui/components/Layout.tsx:
--------------------------------------------------------------------------------

```typescript
 1 | import React from 'react';
 2 | import { useNavigate } from 'react-router-dom';
 3 | import {
 4 |   AppBar,
 5 |   Box,
 6 |   Container,
 7 |   CssBaseline,
 8 |   Toolbar,
 9 |   Typography,
10 |   Button,
11 |   ThemeProvider,
12 |   createTheme
13 | } from '@mui/material';
14 | import VideoIcon from '@mui/icons-material/VideoLibrary';
15 | import AddIcon from '@mui/icons-material/Add';
16 | 
17 | interface LayoutProps {
18 |   children: React.ReactNode;
19 | }
20 | 
21 | const theme = createTheme({
22 |   palette: {
23 |     mode: 'light',
24 |     primary: {
25 |       main: '#1976d2',
26 |     },
27 |     secondary: {
28 |       main: '#f50057',
29 |     },
30 |   },
31 |   typography: {
32 |     fontFamily: '"Roboto", "Helvetica", "Arial", sans-serif',
33 |   },
34 | });
35 | 
36 | const Layout: React.FC<LayoutProps> = ({ children }) => {
37 |   const navigate = useNavigate();
38 | 
39 |   return (
40 |     <ThemeProvider theme={theme}>
41 |       <CssBaseline />
42 |       <Box sx={{ display: 'flex', flexDirection: 'column', minHeight: '100vh' }}>
43 |         <AppBar position="static">
44 |           <Toolbar>
45 |             <VideoIcon sx={{ mr: 2 }} />
46 |             <Typography 
47 |               variant="h6" 
48 |               component="div" 
49 |               sx={{ flexGrow: 1, cursor: 'pointer' }}
50 |               onClick={() => navigate('/')}
51 |             >
52 |               Short Video Maker
53 |             </Typography>
54 |             <Button 
55 |               color="inherit" 
56 |               startIcon={<AddIcon />}
57 |               onClick={() => navigate('/create')}
58 |             >
59 |               Create Video
60 |             </Button>
61 |           </Toolbar>
62 |         </AppBar>
63 |         <Container component="main" sx={{ flexGrow: 1, py: 4 }}>
64 |           {children}
65 |         </Container>
66 |         <Box 
67 |           component="footer" 
68 |           sx={{ 
69 |             py: 3, 
70 |             mt: 'auto', 
71 |             backgroundColor: (theme) => theme.palette.grey[200],
72 |             textAlign: 'center'
73 |           }}
74 |         >
75 |           <Typography variant="body2" color="text.secondary">
76 |             Short Video Maker &copy; {new Date().getFullYear()}
77 |           </Typography>
78 |         </Box>
79 |       </Box>
80 |     </ThemeProvider>
81 |   );
82 | };
83 | 
84 | export default Layout; 
```

--------------------------------------------------------------------------------
/src/short-creator/libraries/Kokoro.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import { KokoroTTS, TextSplitterStream } from "kokoro-js";
 2 | import {
 3 |   VoiceEnum,
 4 |   type kokoroModelPrecision,
 5 |   type Voices,
 6 | } from "../../types/shorts";
 7 | import { KOKORO_MODEL, logger } from "../../config";
 8 | 
 9 | export class Kokoro {
10 |   constructor(private tts: KokoroTTS) {}
11 | 
12 |   async generate(
13 |     text: string,
14 |     voice: Voices,
15 |   ): Promise<{
16 |     audio: ArrayBuffer;
17 |     audioLength: number;
18 |   }> {
19 |     const splitter = new TextSplitterStream();
20 |     const stream = this.tts.stream(splitter, {
21 |       voice,
22 |     });
23 |     splitter.push(text);
24 |     splitter.close();
25 | 
26 |     const output = [];
27 |     for await (const audio of stream) {
28 |       output.push(audio);
29 |     }
30 | 
31 |     const audioBuffers: ArrayBuffer[] = [];
32 |     let audioLength = 0;
33 |     for (const audio of output) {
34 |       audioBuffers.push(audio.audio.toWav());
35 |       audioLength += audio.audio.audio.length / audio.audio.sampling_rate;
36 |     }
37 | 
38 |     const mergedAudioBuffer = Kokoro.concatWavBuffers(audioBuffers);
39 |     logger.debug({ text, voice, audioLength }, "Audio generated with Kokoro");
40 | 
41 |     return {
42 |       audio: mergedAudioBuffer,
43 |       audioLength: audioLength,
44 |     };
45 |   }
46 | 
47 |   static concatWavBuffers(buffers: ArrayBuffer[]): ArrayBuffer {
48 |     const header = Buffer.from(buffers[0].slice(0, 44));
49 |     let totalDataLength = 0;
50 | 
51 |     const dataParts = buffers.map((buf) => {
52 |       const b = Buffer.from(buf);
53 |       const data = b.slice(44);
54 |       totalDataLength += data.length;
55 |       return data;
56 |     });
57 | 
58 |     header.writeUInt32LE(36 + totalDataLength, 4);
59 |     header.writeUInt32LE(totalDataLength, 40);
60 | 
61 |     return Buffer.concat([header, ...dataParts]);
62 |   }
63 | 
64 |   static async init(dtype: kokoroModelPrecision): Promise<Kokoro> {
65 |     const tts = await KokoroTTS.from_pretrained(KOKORO_MODEL, {
66 |       dtype,
67 |       device: "cpu", // only "cpu" is supported in node
68 |     });
69 | 
70 |     return new Kokoro(tts);
71 |   }
72 | 
73 |   listAvailableVoices(): Voices[] {
74 |     const voices = Object.values(VoiceEnum) as Voices[];
75 |     return voices;
76 |   }
77 | }
78 | 
```

--------------------------------------------------------------------------------
/main.Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
 1 | FROM ubuntu:22.04 AS install-whisper
 2 | ENV DEBIAN_FRONTEND=noninteractive
 3 | RUN apt update
 4 | # whisper install dependencies
 5 | RUN apt install -y \
 6 |     git \
 7 |     build-essential \
 8 |     wget \
 9 |     cmake \
10 |     && apt-get clean \
11 |     && rm -rf /var/lib/apt/lists/*
12 | WORKDIR /whisper
13 | RUN git clone https://github.com/ggml-org/whisper.cpp.git .
14 | RUN git checkout v1.7.1
15 | RUN make
16 | WORKDIR /whisper/models
17 | RUN sh ./download-ggml-model.sh base.en
18 | 
19 | FROM node:22-bookworm-slim AS base
20 | ENV DEBIAN_FRONTEND=noninteractive
21 | WORKDIR /app
22 | RUN apt update
23 | RUN apt install -y \
24 |       # whisper dependencies
25 |       git \
26 |       wget \
27 |       cmake \
28 |       ffmpeg \
29 |       curl \
30 |       make \
31 |       libsdl2-dev \
32 |       # remotion dependencies
33 |       libnss3 \
34 |       libdbus-1-3 \
35 |       libatk1.0-0 \
36 |       libgbm-dev \
37 |       libasound2 \
38 |       libxrandr2 \
39 |       libxkbcommon-dev \
40 |       libxfixes3 \
41 |       libxcomposite1 \
42 |       libxdamage1 \
43 |       libatk-bridge2.0-0 \
44 |       libpango-1.0-0 \
45 |       libcairo2 \
46 |       libcups2 \
47 |     && apt-get clean \
48 |     && rm -rf /var/lib/apt/lists/*
49 | # setup pnpm
50 | ENV PNPM_HOME="/pnpm"
51 | ENV PATH="$PNPM_HOME:$PATH"
52 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0
53 | RUN corepack enable
54 | 
55 | FROM base AS prod-deps
56 | COPY package.json pnpm-lock.yaml* /app/
57 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile
58 | RUN pnpm install --prefer-offline --no-cache --prod
59 | 
60 | FROM prod-deps AS build
61 | COPY tsconfig.json /app
62 | COPY tsconfig.build.json /app
63 | COPY vite.config.ts /app
64 | COPY src /app/src
65 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
66 | RUN pnpm build
67 | 
68 | FROM base
69 | COPY static /app/static
70 | COPY --from=install-whisper /whisper /app/data/libs/whisper
71 | COPY --from=prod-deps /app/node_modules /app/node_modules
72 | COPY --from=build /app/dist /app/dist
73 | COPY package.json /app/
74 | 
75 | # app configuration via environment variables
76 | ENV DATA_DIR_PATH=/app/data
77 | ENV DOCKER=true
78 | ENV WHISPER_MODEL=base.en
79 | # number of chrome tabs to use for rendering
80 | ENV CONCURRENCY=1
81 | # video cache - 2000MB
82 | ENV VIDEO_CACHE_SIZE_IN_BYTES=2097152000
83 | 
84 | # install kokoro, headless chrome and ensure music files are present
85 | RUN node dist/scripts/install.js
86 | 
87 | CMD ["pnpm", "start"]
88 | 
```

--------------------------------------------------------------------------------
/main-tiny.Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
 1 | FROM ubuntu:22.04 AS install-whisper
 2 | ENV DEBIAN_FRONTEND=noninteractive
 3 | RUN apt update
 4 | # whisper install dependencies
 5 | RUN apt install -y \
 6 |     git \
 7 |     build-essential \
 8 |     wget \
 9 |     cmake \
10 |     && apt-get clean \
11 |     && rm -rf /var/lib/apt/lists/*
12 | WORKDIR /whisper
13 | RUN git clone https://github.com/ggml-org/whisper.cpp.git .
14 | RUN git checkout v1.7.1
15 | RUN make
16 | WORKDIR /whisper/models
17 | RUN sh ./download-ggml-model.sh tiny.en
18 | 
19 | FROM node:22-bookworm-slim AS base
20 | ENV DEBIAN_FRONTEND=noninteractive
21 | WORKDIR /app
22 | RUN apt update
23 | RUN apt install -y \
24 |       # whisper dependencies
25 |       git \
26 |       wget \
27 |       cmake \
28 |       ffmpeg \
29 |       curl \
30 |       make \
31 |       libsdl2-dev \
32 |       # remotion dependencies
33 |       libnss3 \
34 |       libdbus-1-3 \
35 |       libatk1.0-0 \
36 |       libgbm-dev \
37 |       libasound2 \
38 |       libxrandr2 \
39 |       libxkbcommon-dev \
40 |       libxfixes3 \
41 |       libxcomposite1 \
42 |       libxdamage1 \
43 |       libatk-bridge2.0-0 \
44 |       libpango-1.0-0 \
45 |       libcairo2 \
46 |       libcups2 \
47 |     && apt-get clean \
48 |     && rm -rf /var/lib/apt/lists/*
49 | # setup pnpm
50 | ENV PNPM_HOME="/pnpm"
51 | ENV PATH="$PNPM_HOME:$PATH"
52 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0
53 | RUN corepack enable
54 | 
55 | FROM base AS prod-deps
56 | COPY package.json pnpm-lock.yaml* /app/
57 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile
58 | RUN pnpm install --prefer-offline --no-cache --prod
59 | 
60 | FROM prod-deps AS build
61 | COPY tsconfig.json /app
62 | COPY tsconfig.build.json /app
63 | COPY vite.config.ts /app
64 | COPY src /app/src
65 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
66 | RUN pnpm build
67 | 
68 | FROM base
69 | COPY static /app/static
70 | COPY --from=install-whisper /whisper /app/data/libs/whisper
71 | COPY --from=prod-deps /app/node_modules /app/node_modules
72 | COPY --from=build /app/dist /app/dist
73 | COPY package.json /app/
74 | 
75 | # app configuration via environment variables
76 | ENV DATA_DIR_PATH=/app/data
77 | ENV DOCKER=true
78 | ENV WHISPER_MODEL=tiny.en
79 | ENV KOKORO_MODEL_PRECISION=q4
80 | # number of chrome tabs to use for rendering
81 | ENV CONCURRENCY=1
82 | # video cache - 2000MB
83 | ENV VIDEO_CACHE_SIZE_IN_BYTES=2097152000
84 | 
85 | # install kokoro, headless chrome and ensure music files are present
86 | RUN node dist/scripts/install.js
87 | 
88 | CMD ["pnpm", "start"]
89 | 
```

--------------------------------------------------------------------------------
/src/short-creator/libraries/FFmpeg.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import ffmpeg from "fluent-ffmpeg";
 2 | import { Readable } from "node:stream";
 3 | import { logger } from "../../logger";
 4 | 
 5 | export class FFMpeg {
 6 |   static async init(): Promise<FFMpeg> {
 7 |     return import("@ffmpeg-installer/ffmpeg").then((ffmpegInstaller) => {
 8 |       ffmpeg.setFfmpegPath(ffmpegInstaller.path);
 9 |       logger.info("FFmpeg path set to:", ffmpegInstaller.path);
10 |       return new FFMpeg();
11 |     });
12 |   }
13 | 
14 |   async saveNormalizedAudio(
15 |     audio: ArrayBuffer,
16 |     outputPath: string,
17 |   ): Promise<string> {
18 |     logger.debug("Normalizing audio for Whisper");
19 |     const inputStream = new Readable();
20 |     inputStream.push(Buffer.from(audio));
21 |     inputStream.push(null);
22 | 
23 |     return new Promise((resolve, reject) => {
24 |       ffmpeg()
25 |         .input(inputStream)
26 |         .audioCodec("pcm_s16le")
27 |         .audioChannels(1)
28 |         .audioFrequency(16000)
29 |         .toFormat("wav")
30 |         .on("end", () => {
31 |           logger.debug("Audio normalization complete");
32 |           resolve(outputPath);
33 |         })
34 |         .on("error", (error: unknown) => {
35 |           logger.error(error, "Error normalizing audio:");
36 |           reject(error);
37 |         })
38 |         .save(outputPath);
39 |     });
40 |   }
41 | 
42 |   async createMp3DataUri(audio: ArrayBuffer): Promise<string> {
43 |     const inputStream = new Readable();
44 |     inputStream.push(Buffer.from(audio));
45 |     inputStream.push(null);
46 |     return new Promise((resolve, reject) => {
47 |       const chunk: Buffer[] = [];
48 | 
49 |       ffmpeg()
50 |         .input(inputStream)
51 |         .audioCodec("libmp3lame")
52 |         .audioBitrate(128)
53 |         .audioChannels(2)
54 |         .toFormat("mp3")
55 |         .on("error", (err) => {
56 |           reject(err);
57 |         })
58 |         .pipe()
59 |         .on("data", (data: Buffer) => {
60 |           chunk.push(data);
61 |         })
62 |         .on("end", () => {
63 |           const buffer = Buffer.concat(chunk);
64 |           resolve(`data:audio/mp3;base64,${buffer.toString("base64")}`);
65 |         })
66 |         .on("error", (err) => {
67 |           reject(err);
68 |         });
69 |     });
70 |   }
71 | 
72 |   async saveToMp3(audio: ArrayBuffer, filePath: string): Promise<string> {
73 |     const inputStream = new Readable();
74 |     inputStream.push(Buffer.from(audio));
75 |     inputStream.push(null);
76 |     return new Promise((resolve, reject) => {
77 |       ffmpeg()
78 |         .input(inputStream)
79 |         .audioCodec("libmp3lame")
80 |         .audioBitrate(128)
81 |         .audioChannels(2)
82 |         .toFormat("mp3")
83 |         .save(filePath)
84 |         .on("end", () => {
85 |           logger.debug("Audio conversion complete");
86 |           resolve(filePath);
87 |         })
88 |         .on("error", (err) => {
89 |           reject(err);
90 |         });
91 |     });
92 |   }
93 | }
94 | 
```

--------------------------------------------------------------------------------
/src/short-creator/libraries/Remotion.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import z from "zod";
 2 | import { bundle } from "@remotion/bundler";
 3 | import { renderMedia, selectComposition } from "@remotion/renderer";
 4 | import path from "path";
 5 | import { ensureBrowser } from "@remotion/renderer";
 6 | 
 7 | import { Config } from "../../config";
 8 | import { shortVideoSchema } from "../../components/utils";
 9 | import { logger } from "../../logger";
10 | import { OrientationEnum } from "../../types/shorts";
11 | import { getOrientationConfig } from "../../components/utils";
12 | 
13 | export class Remotion {
14 |   constructor(
15 |     private bundled: string,
16 |     private config: Config,
17 |   ) {}
18 | 
19 |   static async init(config: Config): Promise<Remotion> {
20 |     await ensureBrowser();
21 | 
22 |     const bundled = await bundle({
23 |       entryPoint: path.join(
24 |         config.packageDirPath,
25 |         config.devMode ? "src" : "dist",
26 |         "components",
27 |         "root",
28 |         `index.${config.devMode ? "ts" : "js"}`,
29 |       ),
30 |     });
31 | 
32 |     return new Remotion(bundled, config);
33 |   }
34 | 
35 |   async render(
36 |     data: z.infer<typeof shortVideoSchema>,
37 |     id: string,
38 |     orientation: OrientationEnum,
39 |   ) {
40 |     const { component } = getOrientationConfig(orientation);
41 | 
42 |     const composition = await selectComposition({
43 |       serveUrl: this.bundled,
44 |       id: component,
45 |       inputProps: data,
46 |     });
47 | 
48 |     logger.debug({ component, videoID: id }, "Rendering video with Remotion");
49 | 
50 |     const outputLocation = path.join(this.config.videosDirPath, `${id}.mp4`);
51 | 
52 |     await renderMedia({
53 |       codec: "h264",
54 |       composition,
55 |       serveUrl: this.bundled,
56 |       outputLocation,
57 |       inputProps: data,
58 |       onProgress: ({ progress }) => {
59 |         logger.debug(`Rendering ${id} ${Math.floor(progress * 100)}% complete`);
60 |       },
61 |       // preventing memory issues with docker
62 |       concurrency: this.config.concurrency,
63 |       offthreadVideoCacheSizeInBytes: this.config.videoCacheSizeInBytes,
64 |     });
65 | 
66 |     logger.debug(
67 |       {
68 |         outputLocation,
69 |         component,
70 |         videoID: id,
71 |       },
72 |       "Video rendered with Remotion",
73 |     );
74 |   }
75 | 
76 |   async testRender(outputLocation: string) {
77 |     const composition = await selectComposition({
78 |       serveUrl: this.bundled,
79 |       id: "TestVideo",
80 |     });
81 | 
82 |     await renderMedia({
83 |       codec: "h264",
84 |       composition,
85 |       serveUrl: this.bundled,
86 |       outputLocation,
87 |       onProgress: ({ progress }) => {
88 |         logger.debug(
89 |           `Rendering test video: ${Math.floor(progress * 100)}% complete`,
90 |         );
91 |       },
92 |       // preventing memory issues with docker
93 |       concurrency: this.config.concurrency,
94 |       offthreadVideoCacheSizeInBytes: this.config.videoCacheSizeInBytes,
95 |     });
96 |   }
97 | }
98 | 
```

--------------------------------------------------------------------------------
/src/server/routers/mcp.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import express from "express";
 2 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 3 | import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
 4 | import z from "zod";
 5 | 
 6 | import { ShortCreator } from "../../short-creator/ShortCreator";
 7 | import { logger } from "../../logger";
 8 | import { renderConfig, sceneInput } from "../../types/shorts";
 9 | 
10 | export class MCPRouter {
11 |   router: express.Router;
12 |   shortCreator: ShortCreator;
13 |   transports: { [sessionId: string]: SSEServerTransport } = {};
14 |   mcpServer: McpServer;
15 |   constructor(shortCreator: ShortCreator) {
16 |     this.router = express.Router();
17 |     this.shortCreator = shortCreator;
18 | 
19 |     this.mcpServer = new McpServer({
20 |       name: "Short Creator",
21 |       version: "0.0.1",
22 |       capabilities: {
23 |         resources: {},
24 |         tools: {},
25 |       },
26 |     });
27 | 
28 |     this.setupMCPServer();
29 |     this.setupRoutes();
30 |   }
31 | 
32 |   private setupMCPServer() {
33 |     this.mcpServer.tool(
34 |       "get-video-status",
35 |       "Get the status of a video (ready, processing, failed)",
36 |       {
37 |         videoId: z.string().describe("The ID of the video"),
38 |       },
39 |       async ({ videoId }) => {
40 |         const status = this.shortCreator.status(videoId);
41 |         return {
42 |           content: [
43 |             {
44 |               type: "text",
45 |               text: status,
46 |             },
47 |           ],
48 |         };
49 |       },
50 |     );
51 | 
52 |     this.mcpServer.tool(
53 |       "create-short-video",
54 |       "Create a short video from a list of scenes",
55 |       {
56 |         scenes: z.array(sceneInput).describe("Each scene to be created"),
57 |         config: renderConfig.describe("Configuration for rendering the video"),
58 |       },
59 |       async ({ scenes, config }) => {
60 |         const videoId = await this.shortCreator.addToQueue(scenes, config);
61 | 
62 |         return {
63 |           content: [
64 |             {
65 |               type: "text",
66 |               text: videoId,
67 |             },
68 |           ],
69 |         };
70 |       },
71 |     );
72 |   }
73 | 
74 |   private setupRoutes() {
75 |     this.router.get("/sse", async (req, res) => {
76 |       logger.info("SSE GET request received");
77 | 
78 |       const transport = new SSEServerTransport("/mcp/messages", res);
79 |       this.transports[transport.sessionId] = transport;
80 |       res.on("close", () => {
81 |         delete this.transports[transport.sessionId];
82 |       });
83 |       await this.mcpServer.connect(transport);
84 |     });
85 | 
86 |     this.router.post("/messages", async (req, res) => {
87 |       logger.info("SSE POST request received");
88 | 
89 |       const sessionId = req.query.sessionId as string;
90 |       const transport = this.transports[sessionId];
91 |       if (transport) {
92 |         await transport.handlePostMessage(req, res);
93 |       } else {
94 |         res.status(400).send("No transport found for sessionId");
95 |       }
96 |     });
97 |   }
98 | }
99 | 
```

--------------------------------------------------------------------------------
/main-cuda.Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
  1 | ARG UBUNTU_VERSION=22.04
  2 | ARG CUDA_VERSION=12.3.1
  3 | ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
  4 | ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
  5 | 
  6 | # Ref: https://github.com/ggml-org/whisper.cpp
  7 | FROM ${BASE_CUDA_DEV_CONTAINER} AS install-whisper
  8 | ENV DEBIAN_FRONTEND=noninteractive
  9 | 
 10 | RUN apt-get update && \
 11 |     apt-get install --fix-missing --no-install-recommends -y bash git make vim wget g++ ffmpeg curl
 12 | 
 13 | WORKDIR /app/data/libs/whisper
 14 | RUN git clone https://github.com/ggerganov/whisper.cpp.git -b v1.7.1 --depth 1 .
 15 | 
 16 | RUN make clean
 17 | RUN GGML_CUDA=1 make -j
 18 | 
 19 | RUN sh ./models/download-ggml-model.sh medium.en
 20 | 
 21 | FROM ${BASE_CUDA_RUN_CONTAINER} AS base
 22 | 
 23 | # install node
 24 | RUN apt-get update && apt-get install -y \
 25 |     curl \
 26 |     ca-certificates \
 27 |     gnupg \
 28 |     lsb-release \
 29 |     && rm -rf /var/lib/apt/lists/*
 30 | RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
 31 |     && apt-get update && apt-get install -y nodejs \
 32 |     && rm -rf /var/lib/apt/lists/*
 33 | RUN node -v && npm -v
 34 | 
 35 | # install dependencies
 36 | ENV DEBIAN_FRONTEND=noninteractive
 37 | WORKDIR /app
 38 | RUN apt update
 39 | RUN apt install -y \
 40 |       # whisper dependencies
 41 |       git \
 42 |       wget \
 43 |       cmake \
 44 |       ffmpeg \
 45 |       curl \
 46 |       build-essential \
 47 |       make \
 48 |       # remotion dependencies
 49 |       libnss3 \
 50 |       libdbus-1-3 \
 51 |       libatk1.0-0 \
 52 |       libgbm-dev \
 53 |       libasound2 \
 54 |       libxrandr2 \
 55 |       libxkbcommon-dev \
 56 |       libxfixes3 \
 57 |       libxcomposite1 \
 58 |       libxdamage1 \
 59 |       libatk-bridge2.0-0 \
 60 |       libpango-1.0-0 \
 61 |       libcairo2 \
 62 |       libcups2 \
 63 |     && apt-get clean \
 64 |     && rm -rf /var/lib/apt/lists/*
 65 | # setup pnpm
 66 | ENV PNPM_HOME="/pnpm"
 67 | ENV PATH="$PNPM_HOME:$PATH"
 68 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0
 69 | RUN corepack enable
 70 | 
 71 | FROM base AS prod-deps
 72 | COPY package.json pnpm-lock.yaml* /app/
 73 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile
 74 | RUN pnpm install --prefer-offline --no-cache --prod
 75 | 
 76 | FROM prod-deps AS build
 77 | COPY tsconfig.json /app
 78 | COPY tsconfig.build.json /app
 79 | COPY vite.config.ts /app
 80 | COPY src /app/src
 81 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
 82 | RUN pnpm build
 83 | 
 84 | FROM base
 85 | COPY static /app/static
 86 | COPY --from=install-whisper /app/data/libs/whisper /app/data/libs/whisper
 87 | COPY --from=prod-deps /app/node_modules /app/node_modules
 88 | COPY --from=build /app/dist /app/dist
 89 | COPY package.json /app/
 90 | 
 91 | # app configuration via environment variables
 92 | ENV DATA_DIR_PATH=/app/data
 93 | ENV DOCKER=true
 94 | # number of chrome tabs to use for rendering
 95 | ENV CONCURRENCY=1
 96 | # video cache - 2000MB
 97 | ENV VIDEO_CACHE_SIZE_IN_BYTES=2097152000
 98 | 
 99 | # install kokoro, headless chrome and ensure music files are present
100 | RUN node dist/scripts/install.js
101 | 
102 | CMD ["pnpm", "start"]
103 | 
```

--------------------------------------------------------------------------------
/src/short-creator/libraries/Whisper.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import {
 2 |   downloadWhisperModel,
 3 |   installWhisperCpp,
 4 |   transcribe,
 5 | } from "@remotion/install-whisper-cpp";
 6 | import path from "path";
 7 | 
 8 | import { Config } from "../../config";
 9 | import type { Caption } from "../../types/shorts";
10 | import { logger } from "../../logger";
11 | 
12 | export const ErrorWhisper = new Error("There was an error with WhisperCpp");
13 | 
14 | export class Whisper {
15 |   constructor(private config: Config) {}
16 | 
17 |   static async init(config: Config): Promise<Whisper> {
18 |     if (!config.runningInDocker) {
19 |       logger.debug("Installing WhisperCpp");
20 |       await installWhisperCpp({
21 |         to: config.whisperInstallPath,
22 |         version: config.whisperVersion,
23 |         printOutput: true,
24 |       });
25 |       logger.debug("WhisperCpp installed");
26 |       logger.debug("Downloading Whisper model");
27 |       await downloadWhisperModel({
28 |         model: config.whisperModel,
29 |         folder: path.join(config.whisperInstallPath, "models"),
30 |         printOutput: config.whisperVerbose,
31 |         onProgress: (downloadedBytes, totalBytes) => {
32 |           const progress = `${Math.round((downloadedBytes / totalBytes) * 100)}%`;
33 |           logger.debug(
34 |             { progress, model: config.whisperModel },
35 |             "Downloading Whisper model",
36 |           );
37 |         },
38 |       });
39 |       // todo run the jfk command to check if everything is ok
40 |       logger.debug("Whisper model downloaded");
41 |     }
42 | 
43 |     return new Whisper(config);
44 |   }
45 | 
46 |   // todo shall we extract it to a Caption class?
47 |   async CreateCaption(audioPath: string): Promise<Caption[]> {
48 |     logger.debug({ audioPath }, "Starting to transcribe audio");
49 |     const { transcription } = await transcribe({
50 |       model: this.config.whisperModel,
51 |       whisperPath: this.config.whisperInstallPath,
52 |       modelFolder: path.join(this.config.whisperInstallPath, "models"),
53 |       whisperCppVersion: this.config.whisperVersion,
54 |       inputPath: audioPath,
55 |       tokenLevelTimestamps: true,
56 |       printOutput: this.config.whisperVerbose,
57 |       onProgress: (progress) => {
58 |         logger.debug({ audioPath }, `Transcribing is ${progress} complete`);
59 |       },
60 |     });
61 |     logger.debug({ audioPath }, "Transcription finished, creating captions");
62 | 
63 |     const captions: Caption[] = [];
64 |     transcription.forEach((record) => {
65 |       if (record.text === "") {
66 |         return;
67 |       }
68 | 
69 |       record.tokens.forEach((token) => {
70 |         if (token.text.startsWith("[_TT")) {
71 |           return;
72 |         }
73 |         // if token starts without space and the previous node didn't have space either, merge them
74 |         if (
75 |           captions.length > 0 &&
76 |           !token.text.startsWith(" ") &&
77 |           !captions[captions.length - 1].text.endsWith(" ")
78 |         ) {
79 |           captions[captions.length - 1].text += record.text;
80 |           captions[captions.length - 1].endMs = record.offsets.to;
81 |           return;
82 |         }
83 |         captions.push({
84 |           text: token.text,
85 |           startMs: record.offsets.from,
86 |           endMs: record.offsets.to,
87 |         });
88 |       });
89 |     });
90 |     logger.debug({ audioPath, captions }, "Captions created");
91 |     return captions;
92 |   }
93 | }
94 | 
```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
 1 | /* eslint-disable @typescript-eslint/no-unused-vars */
 2 | import path from "path";
 3 | import fs from "fs-extra";
 4 | 
 5 | import { Kokoro } from "./short-creator/libraries/Kokoro";
 6 | import { Remotion } from "./short-creator/libraries/Remotion";
 7 | import { Whisper } from "./short-creator/libraries/Whisper";
 8 | import { FFMpeg } from "./short-creator/libraries/FFmpeg";
 9 | import { PexelsAPI } from "./short-creator/libraries/Pexels";
10 | import { Config } from "./config";
11 | import { ShortCreator } from "./short-creator/ShortCreator";
12 | import { logger } from "./logger";
13 | import { Server } from "./server/server";
14 | import { MusicManager } from "./short-creator/music";
15 | 
16 | async function main() {
17 |   const config = new Config();
18 |   try {
19 |     config.ensureConfig();
20 |   } catch (err: unknown) {
21 |     logger.error(err, "Error in config");
22 |     process.exit(1);
23 |   }
24 | 
25 |   const musicManager = new MusicManager(config);
26 |   try {
27 |     logger.debug("checking music files");
28 |     musicManager.ensureMusicFilesExist();
29 |   } catch (error: unknown) {
30 |     logger.error(error, "Missing music files");
31 |     process.exit(1);
32 |   }
33 | 
34 |   logger.debug("initializing remotion");
35 |   const remotion = await Remotion.init(config);
36 |   logger.debug("initializing kokoro");
37 |   const kokoro = await Kokoro.init(config.kokoroModelPrecision);
38 |   logger.debug("initializing whisper");
39 |   const whisper = await Whisper.init(config);
40 |   logger.debug("initializing ffmpeg");
41 |   const ffmpeg = await FFMpeg.init();
42 |   const pexelsApi = new PexelsAPI(config.pexelsApiKey);
43 | 
44 |   logger.debug("initializing the short creator");
45 |   const shortCreator = new ShortCreator(
46 |     config,
47 |     remotion,
48 |     kokoro,
49 |     whisper,
50 |     ffmpeg,
51 |     pexelsApi,
52 |     musicManager,
53 |   );
54 | 
55 |   if (!config.runningInDocker) {
56 |     // the project is running with npm - we need to check if the installation is correct
57 |     if (fs.existsSync(config.installationSuccessfulPath)) {
58 |       logger.info("the installation is successful - starting the server");
59 |     } else {
60 |       logger.info(
61 |         "testing if the installation was successful - this may take a while...",
62 |       );
63 |       try {
64 |         const audioBuffer = (await kokoro.generate("hi", "af_heart")).audio;
65 |         await ffmpeg.createMp3DataUri(audioBuffer);
66 |         await pexelsApi.findVideo(["dog"], 2.4);
67 |         const testVideoPath = path.join(config.tempDirPath, "test.mp4");
68 |         await remotion.testRender(testVideoPath);
69 |         fs.rmSync(testVideoPath, { force: true });
70 |         fs.writeFileSync(config.installationSuccessfulPath, "ok", {
71 |           encoding: "utf-8",
72 |         });
73 |         logger.info("the installation was successful - starting the server");
74 |       } catch (error: unknown) {
75 |         logger.fatal(
76 |           error,
77 |           "The environment is not set up correctly - please follow the instructions in the README.md file https://github.com/gyoridavid/short-video-maker",
78 |         );
79 |         process.exit(1);
80 |       }
81 |     }
82 |   }
83 | 
84 |   logger.debug("initializing the server");
85 |   const server = new Server(config, shortCreator);
86 |   const app = server.start();
87 | 
88 |   // todo add shutdown handler
89 | }
90 | 
91 | main().catch((error: unknown) => {
92 |   logger.error(error, "Error starting server");
93 | });
94 | 
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
  1 | {
  2 |   "name": "short-video-maker",
  3 |   "version": "1.3.4",
  4 |   "description": "Creates short videos for TikTok, Instagram Reels, and YouTube Shorts using the Model Context Protocol (MCP) and a REST API.",
  5 |   "main": "index.js",
  6 |   "bugs": "https://github.com/gyoridavid/short-video-maker/issues",
  7 |   "homepage": "https://github.com/gyoridavid/short-video-maker",
  8 |   "scripts": {
  9 |     "build": "rimraf dist && tsc --project tsconfig.build.json && vite build",
 10 |     "dev": "vite build --watch | node --watch -r ts-node/register src/index.ts ",
 11 |     "start": "node dist/index.js",
 12 |     "test": "vitest",
 13 |     "prepublishOnly": "npm run build && echo \"#!/usr/bin/env node\n$(cat dist/index.js)\" > dist/index.js && chmod +x dist/index.js",
 14 |     "publish:docker": "npm run publish:docker:normal && npm run publish:docker:cuda && npm run publish:docker:tiny",
 15 |     "publish:docker:cuda": "docker buildx build --platform linux/amd64 -t gyoridavid/short-video-maker:latest-cuda -t gyoridavid/short-video-maker:${npm_package_version}-cuda -f main-cuda.Dockerfile --push ./",
 16 |     "publish:docker:normal": "docker buildx build --platform linux/amd64,linux/arm64 -t gyoridavid/short-video-maker:latest -t gyoridavid/short-video-maker:${npm_package_version} -f main.Dockerfile --push ./",
 17 |     "publish:docker:tiny": "docker buildx build --platform linux/amd64,linux/arm64 -t gyoridavid/short-video-maker:latest-tiny -t gyoridavid/short-video-maker:${npm_package_version}-tiny -f main-tiny.Dockerfile --push ./",
 18 |     "ui:dev": "vite",
 19 |     "ui:build": "vite build",
 20 |     "ui:preview": "vite preview"
 21 |   },
 22 |   "bin": {
 23 |     "short-video-maker": "dist/index.js"
 24 |   },
 25 |   "files": [
 26 |     "dist",
 27 |     "static"
 28 |   ],
 29 |   "keywords": [
 30 |     "shorts",
 31 |     "mcp",
 32 |     "model context protocol",
 33 |     "reels",
 34 |     "tiktok",
 35 |     "youtube shorts",
 36 |     "youtube",
 37 |     "short video",
 38 |     "video creation",
 39 |     "instagram",
 40 |     "video",
 41 |     "generator",
 42 |     "remotion",
 43 |     "faceless video"
 44 |   ],
 45 |   "author": "David Gyori",
 46 |   "license": "MIT",
 47 |   "dependencies": {
 48 |     "@emotion/react": "^11.11.3",
 49 |     "@emotion/styled": "^11.11.0",
 50 |     "@ffmpeg-installer/ffmpeg": "^1.1.0",
 51 |     "@modelcontextprotocol/sdk": "^1.9.0",
 52 |     "@mui/icons-material": "^5.15.10",
 53 |     "@mui/material": "^5.15.10",
 54 |     "@remotion/bundler": "^4.0.286",
 55 |     "@remotion/cli": "^4.0.286",
 56 |     "@remotion/google-fonts": "^4.0.286",
 57 |     "@remotion/install-whisper-cpp": "^4.0.286",
 58 |     "@remotion/renderer": "^4.0.286",
 59 |     "@remotion/zod-types": "^4.0.286",
 60 |     "@tanstack/react-query": "^5.18.0",
 61 |     "@types/react-dom": "^19.1.3",
 62 |     "@types/react-router-dom": "^5.3.3",
 63 |     "axios": "^1.9.0",
 64 |     "content-type": "^1.0.5",
 65 |     "cuid": "^3.0.0",
 66 |     "dotenv": "^16.4.7",
 67 |     "express": "^4.18.2",
 68 |     "fluent-ffmpeg": "^2.1.3",
 69 |     "fs-extra": "^11.3.0",
 70 |     "kokoro-js": "^1.2.0",
 71 |     "nock": "^14.0.3",
 72 |     "pino": "^9.6.0",
 73 |     "react": "^19.1.0",
 74 |     "react-dom": "^19.1.0",
 75 |     "react-router-dom": "^7.5.3",
 76 |     "remotion": "^4.0.286",
 77 |     "zod": "^3.24.2",
 78 |     "zod-to-json-schema": "^3.24.5"
 79 |   },
 80 |   "devDependencies": {
 81 |     "@remotion/eslint-config-flat": "^4.0.286",
 82 |     "@types/content-type": "^1.1.8",
 83 |     "@types/express": "^4.17.21",
 84 |     "@types/fluent-ffmpeg": "^2.1.27",
 85 |     "@types/fs-extra": "^11.0.4",
 86 |     "@types/nock": "^11.1.0",
 87 |     "@types/node": "^22.14.0",
 88 |     "@types/react": "^19.1.0",
 89 |     "@vitejs/plugin-react": "^4.4.1",
 90 |     "autoprefixer": "^10.4.16",
 91 |     "eslint": "^9.24.0",
 92 |     "postcss": "^8.4.31",
 93 |     "prettier": "^3.5.3",
 94 |     "rimraf": "^6.0.1",
 95 |     "tailwindcss": "^3.3.0",
 96 |     "ts-node": "^10.9.2",
 97 |     "typescript": "^5.8.3",
 98 |     "vite": "^6.3.4",
 99 |     "vitest": "^3.1.1"
100 |   }
101 | }
102 | 
```

--------------------------------------------------------------------------------
/src/config.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import path from "path";
  2 | import "dotenv/config";
  3 | import os from "os";
  4 | import fs from "fs-extra";
  5 | import pino from "pino";
  6 | import { kokoroModelPrecision, whisperModels } from "./types/shorts";
  7 | 
  8 | const defaultLogLevel: pino.Level = "info";
  9 | const defaultPort = 3123;
 10 | const whisperVersion = "1.7.1";
 11 | const defaultWhisperModel: whisperModels = "medium.en"; // possible options: "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"
 12 | 
 13 | // Create the global logger
 14 | const versionNumber = process.env.npm_package_version;
 15 | export const logger = pino({
 16 |   level: process.env.LOG_LEVEL || defaultLogLevel,
 17 |   timestamp: pino.stdTimeFunctions.isoTime,
 18 |   formatters: {
 19 |     level: (label) => {
 20 |       return { level: label };
 21 |     },
 22 |   },
 23 |   base: {
 24 |     pid: process.pid,
 25 |     version: versionNumber,
 26 |   },
 27 | });
 28 | 
 29 | export class Config {
 30 |   private dataDirPath: string;
 31 |   private libsDirPath: string;
 32 |   private staticDirPath: string;
 33 | 
 34 |   public installationSuccessfulPath: string;
 35 |   public whisperInstallPath: string;
 36 |   public videosDirPath: string;
 37 |   public tempDirPath: string;
 38 |   public packageDirPath: string;
 39 |   public musicDirPath: string;
 40 |   public pexelsApiKey: string;
 41 |   public logLevel: pino.Level;
 42 |   public whisperVerbose: boolean;
 43 |   public port: number;
 44 |   public runningInDocker: boolean;
 45 |   public devMode: boolean;
 46 |   public whisperVersion: string = whisperVersion;
 47 |   public whisperModel: whisperModels = defaultWhisperModel;
 48 |   public kokoroModelPrecision: kokoroModelPrecision = "fp32";
 49 | 
 50 |   // docker-specific, performance-related settings to prevent memory issues
 51 |   public concurrency?: number;
 52 |   public videoCacheSizeInBytes: number | null = null;
 53 | 
 54 |   constructor() {
 55 |     this.dataDirPath =
 56 |       process.env.DATA_DIR_PATH ||
 57 |       path.join(os.homedir(), ".ai-agents-az-video-generator");
 58 |     this.libsDirPath = path.join(this.dataDirPath, "libs");
 59 | 
 60 |     this.whisperInstallPath = path.join(this.libsDirPath, "whisper");
 61 |     this.videosDirPath = path.join(this.dataDirPath, "videos");
 62 |     this.tempDirPath = path.join(this.dataDirPath, "temp");
 63 |     this.installationSuccessfulPath = path.join(
 64 |       this.dataDirPath,
 65 |       "installation-successful",
 66 |     );
 67 | 
 68 |     fs.ensureDirSync(this.dataDirPath);
 69 |     fs.ensureDirSync(this.libsDirPath);
 70 |     fs.ensureDirSync(this.videosDirPath);
 71 |     fs.ensureDirSync(this.tempDirPath);
 72 | 
 73 |     this.packageDirPath = path.join(__dirname, "..");
 74 |     this.staticDirPath = path.join(this.packageDirPath, "static");
 75 |     this.musicDirPath = path.join(this.staticDirPath, "music");
 76 | 
 77 |     this.pexelsApiKey = process.env.PEXELS_API_KEY as string;
 78 |     this.logLevel = (process.env.LOG_LEVEL || defaultLogLevel) as pino.Level;
 79 |     this.whisperVerbose = process.env.WHISPER_VERBOSE === "true";
 80 |     this.port = process.env.PORT ? parseInt(process.env.PORT) : defaultPort;
 81 |     this.runningInDocker = process.env.DOCKER === "true";
 82 |     this.devMode = process.env.DEV === "true";
 83 | 
 84 |     if (process.env.WHISPER_MODEL) {
 85 |       this.whisperModel = process.env.WHISPER_MODEL as whisperModels;
 86 |     }
 87 |     if (process.env.KOKORO_MODEL_PRECISION) {
 88 |       this.kokoroModelPrecision = process.env
 89 |         .KOKORO_MODEL_PRECISION as kokoroModelPrecision;
 90 |     }
 91 | 
 92 |     this.concurrency = process.env.CONCURRENCY
 93 |       ? parseInt(process.env.CONCURRENCY)
 94 |       : undefined;
 95 | 
 96 |     if (process.env.VIDEO_CACHE_SIZE_IN_BYTES) {
 97 |       this.videoCacheSizeInBytes = parseInt(
 98 |         process.env.VIDEO_CACHE_SIZE_IN_BYTES,
 99 |       );
100 |     }
101 |   }
102 | 
103 |   public ensureConfig() {
104 |     if (!this.pexelsApiKey) {
105 |       throw new Error(
106 |         "PEXELS_API_KEY environment variable is missing. Get your free API key: https://www.pexels.com/api/key/ - see how to run the project: https://github.com/gyoridavid/short-video-maker",
107 |       );
108 |     }
109 |   }
110 | }
111 | 
112 | export const KOKORO_MODEL = "onnx-community/Kokoro-82M-v1.0-ONNX";
113 | 
```

--------------------------------------------------------------------------------
/src/components/utils.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { z } from "zod";
  2 | import {
  3 |   type Caption,
  4 |   type CaptionPage,
  5 |   type CaptionLine,
  6 |   type OrientationEnum,
  7 |   MusicVolumeEnum,
  8 | } from "../types/shorts";
  9 | import { AvailableComponentsEnum, type OrientationConfig } from "./types";
 10 | 
 11 | export const shortVideoSchema = z.object({
 12 |   scenes: z.array(
 13 |     z.object({
 14 |       captions: z.custom<Caption[]>(),
 15 |       audio: z.object({
 16 |         url: z.string(),
 17 |         duration: z.number(),
 18 |       }),
 19 |       video: z.string(),
 20 |     }),
 21 |   ),
 22 |   config: z.object({
 23 |     paddingBack: z.number().optional(),
 24 |     captionPosition: z.enum(["top", "center", "bottom"]).optional(),
 25 |     captionBackgroundColor: z.string().optional(),
 26 |     durationMs: z.number(),
 27 |     musicVolume: z.nativeEnum(MusicVolumeEnum).optional(),
 28 |   }),
 29 |   music: z.object({
 30 |     file: z.string(),
 31 |     url: z.string(),
 32 |     start: z.number(),
 33 |     end: z.number(),
 34 |   }),
 35 | });
 36 | 
 37 | export function createCaptionPages({
 38 |   captions,
 39 |   lineMaxLength,
 40 |   lineCount,
 41 |   maxDistanceMs,
 42 | }: {
 43 |   captions: Caption[];
 44 |   lineMaxLength: number;
 45 |   lineCount: number;
 46 |   maxDistanceMs: number;
 47 | }) {
 48 |   const pages = [];
 49 |   let currentPage: CaptionPage = {
 50 |     startMs: 0,
 51 |     endMs: 0,
 52 |     lines: [],
 53 |   };
 54 |   let currentLine: CaptionLine = {
 55 |     texts: [],
 56 |   };
 57 | 
 58 |   captions.forEach((caption, i) => {
 59 |     // Check if we need to start a new page due to time gap
 60 |     if (i > 0 && caption.startMs - currentPage.endMs > maxDistanceMs) {
 61 |       // Add current line if not empty
 62 |       if (currentLine.texts.length > 0) {
 63 |         currentPage.lines.push(currentLine);
 64 |       }
 65 |       // Add current page if not empty
 66 |       if (currentPage.lines.length > 0) {
 67 |         pages.push(currentPage);
 68 |       }
 69 |       // Start new page
 70 |       currentPage = {
 71 |         startMs: caption.startMs,
 72 |         endMs: caption.endMs,
 73 |         lines: [],
 74 |       };
 75 |       currentLine = {
 76 |         texts: [],
 77 |       };
 78 |     }
 79 | 
 80 |     // Check if adding this caption exceeds the line length
 81 |     const currentLineText = currentLine.texts.map((t) => t.text).join(" ");
 82 |     if (
 83 |       currentLine.texts.length > 0 &&
 84 |       currentLineText.length + 1 + caption.text.length > lineMaxLength
 85 |     ) {
 86 |       // Line is full, add it to current page
 87 |       currentPage.lines.push(currentLine);
 88 |       currentLine = {
 89 |         texts: [],
 90 |       };
 91 | 
 92 |       // Check if page is full
 93 |       if (currentPage.lines.length >= lineCount) {
 94 |         // Page is full, add it to pages
 95 |         pages.push(currentPage);
 96 |         // Start new page
 97 |         currentPage = {
 98 |           startMs: caption.startMs,
 99 |           endMs: caption.endMs,
100 |           lines: [],
101 |         };
102 |       }
103 |     }
104 | 
105 |     // Add caption to current line
106 |     currentLine.texts.push({
107 |       text: caption.text,
108 |       startMs: caption.startMs,
109 |       endMs: caption.endMs,
110 |     });
111 | 
112 |     // Update page timing
113 |     currentPage.endMs = caption.endMs;
114 |     if (i === 0 || currentPage.startMs === 0) {
115 |       currentPage.startMs = caption.startMs;
116 |     } else {
117 |       currentPage.startMs = Math.min(currentPage.startMs, caption.startMs);
118 |     }
119 |   });
120 | 
121 |   // Don't forget to add the last line and page
122 |   if (currentLine.texts.length > 0) {
123 |     currentPage.lines.push(currentLine);
124 |   }
125 |   if (currentPage.lines.length > 0) {
126 |     pages.push(currentPage);
127 |   }
128 | 
129 |   return pages;
130 | }
131 | 
132 | export function getOrientationConfig(orientation: OrientationEnum) {
133 |   const config: Record<OrientationEnum, OrientationConfig> = {
134 |     portrait: {
135 |       width: 1080,
136 |       height: 1920,
137 |       component: AvailableComponentsEnum.PortraitVideo,
138 |     },
139 |     landscape: {
140 |       width: 1920,
141 |       height: 1080,
142 |       component: AvailableComponentsEnum.LandscapeVideo,
143 |     },
144 |   };
145 | 
146 |   return config[orientation];
147 | }
148 | 
149 | export function calculateVolume(
150 |   level: MusicVolumeEnum = MusicVolumeEnum.high,
151 | ): [number, boolean] {
152 |   switch (level) {
153 |     case "muted":
154 |       return [0, true];
155 |     case "low":
156 |       return [0.2, false];
157 |     case "medium":
158 |       return [0.45, false];
159 |     case "high":
160 |       return [0.7, false];
161 |     default:
162 |       return [0.7, false];
163 |   }
164 | }
165 | 
```

--------------------------------------------------------------------------------
/src/types/shorts.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import z from "zod";
  2 | 
  3 | export enum MusicMoodEnum {
  4 |   sad = "sad",
  5 |   melancholic = "melancholic",
  6 |   happy = "happy",
  7 |   euphoric = "euphoric/high",
  8 |   excited = "excited",
  9 |   chill = "chill",
 10 |   uneasy = "uneasy",
 11 |   angry = "angry",
 12 |   dark = "dark",
 13 |   hopeful = "hopeful",
 14 |   contemplative = "contemplative",
 15 |   funny = "funny/quirky",
 16 | }
 17 | 
 18 | export enum CaptionPositionEnum {
 19 |   top = "top",
 20 |   center = "center",
 21 |   bottom = "bottom",
 22 | }
 23 | 
 24 | export type Scene = {
 25 |   captions: Caption[];
 26 |   video: string;
 27 |   audio: {
 28 |     url: string;
 29 |     duration: number;
 30 |   };
 31 | };
 32 | 
 33 | export const sceneInput = z.object({
 34 |   text: z.string().describe("Text to be spoken in the video"),
 35 |   searchTerms: z
 36 |     .array(z.string())
 37 |     .describe(
 38 |       "Search term for video, 1 word, and at least 2-3 search terms should be provided for each scene. Make sure to match the overall context with the word - regardless what the video search result would be.",
 39 |     ),
 40 | });
 41 | export type SceneInput = z.infer<typeof sceneInput>;
 42 | 
 43 | export enum VoiceEnum {
 44 |   af_heart = "af_heart",
 45 |   af_alloy = "af_alloy",
 46 |   af_aoede = "af_aoede",
 47 |   af_bella = "af_bella",
 48 |   af_jessica = "af_jessica",
 49 |   af_kore = "af_kore",
 50 |   af_nicole = "af_nicole",
 51 |   af_nova = "af_nova",
 52 |   af_river = "af_river",
 53 |   af_sarah = "af_sarah",
 54 |   af_sky = "af_sky",
 55 |   am_adam = "am_adam",
 56 |   am_echo = "am_echo",
 57 |   am_eric = "am_eric",
 58 |   am_fenrir = "am_fenrir",
 59 |   am_liam = "am_liam",
 60 |   am_michael = "am_michael",
 61 |   am_onyx = "am_onyx",
 62 |   am_puck = "am_puck",
 63 |   am_santa = "am_santa",
 64 |   bf_emma = "bf_emma",
 65 |   bf_isabella = "bf_isabella",
 66 |   bm_george = "bm_george",
 67 |   bm_lewis = "bm_lewis",
 68 |   bf_alice = "bf_alice",
 69 |   bf_lily = "bf_lily",
 70 |   bm_daniel = "bm_daniel",
 71 |   bm_fable = "bm_fable",
 72 | }
 73 | 
 74 | export enum OrientationEnum {
 75 |   landscape = "landscape",
 76 |   portrait = "portrait",
 77 | }
 78 | 
 79 | export enum MusicVolumeEnum {
 80 |   muted = "muted",
 81 |   low = "low",
 82 |   medium = "medium",
 83 |   high = "high",
 84 | }
 85 | 
 86 | export const renderConfig = z.object({
 87 |   paddingBack: z
 88 |     .number()
 89 |     .optional()
 90 |     .describe(
 91 |       "For how long the video should be playing after the speech is done, in milliseconds. 1500 is a good value.",
 92 |     ),
 93 |   music: z
 94 |     .nativeEnum(MusicMoodEnum)
 95 |     .optional()
 96 |     .describe("Music tag to be used to find the right music for the video"),
 97 |   captionPosition: z
 98 |     .nativeEnum(CaptionPositionEnum)
 99 |     .optional()
100 |     .describe("Position of the caption in the video"),
101 |   captionBackgroundColor: z
102 |     .string()
103 |     .optional()
104 |     .describe(
105 |       "Background color of the caption, a valid css color, default is blue",
106 |     ),
107 |   voice: z
108 |     .nativeEnum(VoiceEnum)
109 |     .optional()
110 |     .describe("Voice to be used for the speech, default is af_heart"),
111 |   orientation: z
112 |     .nativeEnum(OrientationEnum)
113 |     .optional()
114 |     .describe("Orientation of the video, default is portrait"),
115 |   musicVolume: z
116 |     .nativeEnum(MusicVolumeEnum)
117 |     .optional()
118 |     .describe("Volume of the music, default is high"),
119 | });
120 | export type RenderConfig = z.infer<typeof renderConfig>;
121 | 
122 | export type Voices = `${VoiceEnum}`;
123 | 
124 | export type Video = {
125 |   id: string;
126 |   url: string;
127 |   width: number;
128 |   height: number;
129 | };
130 | export type Caption = {
131 |   text: string;
132 |   startMs: number;
133 |   endMs: number;
134 | };
135 | 
136 | export type CaptionLine = {
137 |   texts: Caption[];
138 | };
139 | export type CaptionPage = {
140 |   startMs: number;
141 |   endMs: number;
142 |   lines: CaptionLine[];
143 | };
144 | 
145 | export const createShortInput = z.object({
146 |   scenes: z.array(sceneInput).describe("Each scene to be created"),
147 |   config: renderConfig.describe("Configuration for rendering the video"),
148 | });
149 | export type CreateShortInput = z.infer<typeof createShortInput>;
150 | 
151 | export type VideoStatus = "processing" | "ready" | "failed";
152 | 
153 | export type Music = {
154 |   file: string;
155 |   start: number;
156 |   end: number;
157 |   mood: string;
158 | };
159 | export type MusicForVideo = Music & {
160 |   url: string;
161 | };
162 | 
163 | export type MusicTag = `${MusicMoodEnum}`;
164 | 
165 | export type kokoroModelPrecision = "fp32" | "fp16" | "q8" | "q4" | "q4f16";
166 | 
167 | export type whisperModels =
168 |   | "tiny"
169 |   | "tiny.en"
170 |   | "base"
171 |   | "base.en"
172 |   | "small"
173 |   | "small.en"
174 |   | "medium"
175 |   | "medium.en"
176 |   | "large-v1"
177 |   | "large-v2"
178 |   | "large-v3"
179 |   | "large-v3-turbo";
180 | 
```

--------------------------------------------------------------------------------
/src/short-creator/music.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import path from "path";
  2 | import fs from "fs-extra";
  3 | 
  4 | import { type Music, MusicForVideo, MusicMoodEnum } from "../types/shorts";
  5 | import { Config } from "../config";
  6 | 
  7 | export class MusicManager {
  8 |   private static musicList: Music[] = [
  9 |     {
 10 |       file: "Sly Sky - Telecasted.mp3",
 11 |       start: 0,
 12 |       end: 152,
 13 |       mood: MusicMoodEnum.melancholic,
 14 |     },
 15 |     {
 16 |       file: "No.2 Remembering Her - Esther Abrami.mp3",
 17 |       start: 2,
 18 |       end: 134,
 19 |       mood: MusicMoodEnum.melancholic,
 20 |     },
 21 |     {
 22 |       file: "Champion - Telecasted.mp3",
 23 |       start: 0,
 24 |       end: 142,
 25 |       mood: MusicMoodEnum.chill,
 26 |     },
 27 |     {
 28 |       file: "Oh Please - Telecasted.mp3",
 29 |       start: 0,
 30 |       end: 154,
 31 |       mood: MusicMoodEnum.chill,
 32 |     },
 33 |     {
 34 |       file: "Jetski - Telecasted.mp3",
 35 |       start: 0,
 36 |       end: 142,
 37 |       mood: MusicMoodEnum.uneasy,
 38 |     },
 39 |     {
 40 |       file: "Phantom - Density & Time.mp3",
 41 |       start: 0,
 42 |       end: 178,
 43 |       mood: MusicMoodEnum.uneasy,
 44 |     },
 45 |     {
 46 |       file: "On The Hunt - Andrew Langdon.mp3",
 47 |       start: 0,
 48 |       end: 95,
 49 |       mood: MusicMoodEnum.uneasy,
 50 |     },
 51 |     {
 52 |       file: "Name The Time And Place - Telecasted.mp3",
 53 |       start: 0,
 54 |       end: 142,
 55 |       mood: MusicMoodEnum.excited,
 56 |     },
 57 |     {
 58 |       file: "Delayed Baggage - Ryan Stasik.mp3",
 59 |       start: 3,
 60 |       end: 108,
 61 |       mood: MusicMoodEnum.euphoric,
 62 |     },
 63 |     {
 64 |       file: "Like It Loud - Dyalla.mp3",
 65 |       start: 4,
 66 |       end: 160,
 67 |       mood: MusicMoodEnum.euphoric,
 68 |     },
 69 |     {
 70 |       file: "Organic Guitar House - Dyalla.mp3",
 71 |       start: 2,
 72 |       end: 160,
 73 |       mood: MusicMoodEnum.euphoric,
 74 |     },
 75 |     {
 76 |       file: "Honey, I Dismembered The Kids - Ezra Lipp.mp3",
 77 |       start: 2,
 78 |       end: 144,
 79 |       mood: MusicMoodEnum.dark,
 80 |     },
 81 |     {
 82 |       file: "Night Hunt - Jimena Contreras.mp3",
 83 |       start: 0,
 84 |       end: 88,
 85 |       mood: MusicMoodEnum.dark,
 86 |     },
 87 |     {
 88 |       file: "Curse of the Witches - Jimena Contreras.mp3",
 89 |       start: 0,
 90 |       end: 102,
 91 |       mood: MusicMoodEnum.dark,
 92 |     },
 93 |     {
 94 |       file: "Restless Heart - Jimena Contreras.mp3",
 95 |       start: 0,
 96 |       end: 94,
 97 |       mood: MusicMoodEnum.sad,
 98 |     },
 99 |     {
100 |       file: "Heartbeat Of The Wind - Asher Fulero.mp3",
101 |       start: 0,
102 |       end: 124,
103 |       mood: MusicMoodEnum.sad,
104 |     },
105 |     {
106 |       file: "Hopeless - Jimena Contreras.mp3",
107 |       start: 0,
108 |       end: 250,
109 |       mood: MusicMoodEnum.sad,
110 |     },
111 |     {
112 |       file: "Touch - Anno Domini Beats.mp3",
113 |       start: 0,
114 |       end: 165,
115 |       mood: MusicMoodEnum.happy,
116 |     },
117 |     {
118 |       file: "Cafecito por la Manana - Cumbia Deli.mp3",
119 |       start: 0,
120 |       end: 184,
121 |       mood: MusicMoodEnum.happy,
122 |     },
123 |     {
124 |       file: "Aurora on the Boulevard - National Sweetheart.mp3",
125 |       start: 0,
126 |       end: 130,
127 |       mood: MusicMoodEnum.happy,
128 |     },
129 |     {
130 |       file: "Buckle Up - Jeremy Korpas.mp3",
131 |       start: 0,
132 |       end: 128,
133 |       mood: MusicMoodEnum.angry,
134 |     },
135 |     {
136 |       file: "Twin Engines - Jeremy Korpas.mp3",
137 |       start: 0,
138 |       end: 120,
139 |       mood: MusicMoodEnum.angry,
140 |     },
141 |     {
142 |       file: "Hopeful - Nat Keefe.mp3",
143 |       start: 0,
144 |       end: 175,
145 |       mood: MusicMoodEnum.hopeful,
146 |     },
147 |     {
148 |       file: "Hopeful Freedom - Asher Fulero.mp3",
149 |       start: 1,
150 |       end: 172,
151 |       mood: MusicMoodEnum.hopeful,
152 |     },
153 |     {
154 |       file: "Crystaline - Quincas Moreira.mp3",
155 |       start: 0,
156 |       end: 140,
157 |       mood: MusicMoodEnum.contemplative,
158 |     },
159 |     {
160 |       file: "Final Soliloquy - Asher Fulero.mp3",
161 |       start: 1,
162 |       end: 178,
163 |       mood: MusicMoodEnum.contemplative,
164 |     },
165 |     {
166 |       file: "Seagull - Telecasted.mp3",
167 |       start: 0,
168 |       end: 123,
169 |       mood: MusicMoodEnum.funny,
170 |     },
171 |     {
172 |       file: "Banjo Doops - Joel Cummins.mp3",
173 |       start: 0,
174 |       end: 98,
175 |       mood: MusicMoodEnum.funny,
176 |     },
177 |     {
178 |       file: "Baby Animals Playing - Joel Cummins.mp3",
179 |       start: 0,
180 |       end: 124,
181 |       mood: MusicMoodEnum.funny,
182 |     },
183 |     {
184 |       file: "Sinister - Anno Domini Beats.mp3",
185 |       start: 0,
186 |       end: 215,
187 |       mood: MusicMoodEnum.dark,
188 |     },
189 |     {
190 |       file: "Traversing - Godmode.mp3",
191 |       start: 0,
192 |       end: 95,
193 |       mood: MusicMoodEnum.dark,
194 |     },
195 |   ];
196 | 
197 |   constructor(private config: Config) {}
198 |   public musicList(): MusicForVideo[] {
199 |     return MusicManager.musicList.map((music: Music) => ({
200 |       ...music,
201 |       url: `http://localhost:${this.config.port}/api/music/${encodeURIComponent(music.file)}`,
202 |     }));
203 |   }
204 |   private musicFileExist(music: Music): boolean {
205 |     return fs.existsSync(path.join(this.config.musicDirPath, music.file));
206 |   }
207 |   public ensureMusicFilesExist(): void {
208 |     for (const music of this.musicList()) {
209 |       if (!this.musicFileExist(music)) {
210 |         throw new Error(`Music file not found: ${music.file}`);
211 |       }
212 |     }
213 |   }
214 | }
215 | 
```

--------------------------------------------------------------------------------
/src/components/videos/PortraitVideo.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | import {
  2 |   AbsoluteFill,
  3 |   Sequence,
  4 |   useCurrentFrame,
  5 |   useVideoConfig,
  6 |   Audio,
  7 |   OffthreadVideo,
  8 | } from "remotion";
  9 | import { z } from "zod";
 10 | import { loadFont } from "@remotion/google-fonts/BarlowCondensed";
 11 | 
 12 | import {
 13 |   calculateVolume,
 14 |   createCaptionPages,
 15 |   shortVideoSchema,
 16 | } from "../utils";
 17 | 
 18 | const { fontFamily } = loadFont(); // "Barlow Condensed"
 19 | 
 20 | export const PortraitVideo: React.FC<z.infer<typeof shortVideoSchema>> = ({
 21 |   scenes,
 22 |   music,
 23 |   config,
 24 | }) => {
 25 |   const frame = useCurrentFrame();
 26 |   const { fps } = useVideoConfig();
 27 | 
 28 |   const captionBackgroundColor = config.captionBackgroundColor ?? "blue";
 29 | 
 30 |   const activeStyle = {
 31 |     backgroundColor: captionBackgroundColor,
 32 |     padding: "10px",
 33 |     marginLeft: "-10px",
 34 |     marginRight: "-10px",
 35 |     borderRadius: "10px",
 36 |   };
 37 | 
 38 |   const captionPosition = config.captionPosition ?? "center";
 39 |   let captionStyle = {};
 40 |   if (captionPosition === "top") {
 41 |     captionStyle = { top: 100 };
 42 |   }
 43 |   if (captionPosition === "center") {
 44 |     captionStyle = { top: "50%", transform: "translateY(-50%)" };
 45 |   }
 46 |   if (captionPosition === "bottom") {
 47 |     captionStyle = { bottom: 100 };
 48 |   }
 49 | 
 50 |   const [musicVolume, musicMuted] = calculateVolume(config.musicVolume);
 51 | 
 52 |   return (
 53 |     <AbsoluteFill style={{ backgroundColor: "white" }}>
 54 |       <Audio
 55 |         loop
 56 |         src={music.url}
 57 |         startFrom={music.start * fps}
 58 |         endAt={music.end * fps}
 59 |         volume={() => musicVolume}
 60 |         muted={musicMuted}
 61 |       />
 62 | 
 63 |       {scenes.map((scene, i) => {
 64 |         const { captions, audio, video } = scene;
 65 |         const pages = createCaptionPages({
 66 |           captions,
 67 |           lineMaxLength: 20,
 68 |           lineCount: 1,
 69 |           maxDistanceMs: 1000,
 70 |         });
 71 | 
 72 |         // Calculate the start and end time of the scene
 73 |         const startFrame =
 74 |           scenes.slice(0, i).reduce((acc, curr) => {
 75 |             return acc + curr.audio.duration;
 76 |           }, 0) * fps;
 77 |         let durationInFrames =
 78 |           scenes.slice(0, i + 1).reduce((acc, curr) => {
 79 |             return acc + curr.audio.duration;
 80 |           }, 0) * fps;
 81 |         if (config.paddingBack && i === scenes.length - 1) {
 82 |           durationInFrames += (config.paddingBack / 1000) * fps;
 83 |         }
 84 | 
 85 |         return (
 86 |           <Sequence
 87 |             from={startFrame}
 88 |             durationInFrames={durationInFrames}
 89 |             key={`scene-${i}`}
 90 |           >
 91 |             <OffthreadVideo src={video} muted />
 92 |             <Audio src={audio.url} />
 93 |             {pages.map((page, j) => {
 94 |               return (
 95 |                 <Sequence
 96 |                   key={`scene-${i}-page-${j}`}
 97 |                   from={Math.round((page.startMs / 1000) * fps)}
 98 |                   durationInFrames={Math.round(
 99 |                     ((page.endMs - page.startMs) / 1000) * fps,
100 |                   )}
101 |                 >
102 |                   <div
103 |                     style={{
104 |                       position: "absolute",
105 |                       left: 0,
106 |                       width: "100%",
107 |                       ...captionStyle,
108 |                     }}
109 |                   >
110 |                     {page.lines.map((line, k) => {
111 |                       return (
112 |                         <p
113 |                           style={{
114 |                             fontSize: "6em",
115 |                             fontFamily: fontFamily,
116 |                             fontWeight: "black",
117 |                             color: "white",
118 |                             WebkitTextStroke: "2px black",
119 |                             WebkitTextFillColor: "white",
120 |                             textShadow: "0px 0px 10px black",
121 |                             textAlign: "center",
122 |                             width: "100%",
123 |                             // uppercase
124 |                             textTransform: "uppercase",
125 |                           }}
126 |                           key={`scene-${i}-page-${j}-line-${k}`}
127 |                         >
128 |                           {line.texts.map((text, l) => {
129 |                             const active =
130 |                               frame >=
131 |                                 startFrame + (text.startMs / 1000) * fps &&
132 |                               frame <= startFrame + (text.endMs / 1000) * fps;
133 |                             return (
134 |                               <>
135 |                                 <span
136 |                                   style={{
137 |                                     fontWeight: "bold",
138 |                                     ...(active ? activeStyle : {}),
139 |                                   }}
140 |                                   key={`scene-${i}-page-${j}-line-${k}-text-${l}`}
141 |                                 >
142 |                                   {text.text}
143 |                                 </span>
144 |                                 {l < line.texts.length - 1 ? " " : ""}
145 |                               </>
146 |                             );
147 |                           })}
148 |                         </p>
149 |                       );
150 |                     })}
151 |                   </div>
152 |                 </Sequence>
153 |               );
154 |             })}
155 |           </Sequence>
156 |         );
157 |       })}
158 |     </AbsoluteFill>
159 |   );
160 | };
161 | 
```

--------------------------------------------------------------------------------
/src/components/videos/LandscapeVideo.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | import {
  2 |   AbsoluteFill,
  3 |   Sequence,
  4 |   useCurrentFrame,
  5 |   useVideoConfig,
  6 |   Audio,
  7 |   OffthreadVideo,
  8 | } from "remotion";
  9 | import { z } from "zod";
 10 | import { loadFont } from "@remotion/google-fonts/BarlowCondensed";
 11 | 
 12 | import {
 13 |   calculateVolume,
 14 |   createCaptionPages,
 15 |   shortVideoSchema,
 16 | } from "../utils";
 17 | 
 18 | const { fontFamily } = loadFont(); // "Barlow Condensed"
 19 | 
 20 | export const LandscapeVideo: React.FC<z.infer<typeof shortVideoSchema>> = ({
 21 |   scenes,
 22 |   music,
 23 |   config,
 24 | }) => {
 25 |   const frame = useCurrentFrame();
 26 |   const { fps } = useVideoConfig();
 27 | 
 28 |   const captionBackgroundColor = config.captionBackgroundColor ?? "blue";
 29 | 
 30 |   const activeStyle = {
 31 |     backgroundColor: captionBackgroundColor,
 32 |     padding: "10px",
 33 |     marginLeft: "-10px",
 34 |     marginRight: "-10px",
 35 |     borderRadius: "10px",
 36 |   };
 37 | 
 38 |   const captionPosition = config.captionPosition ?? "center";
 39 |   let captionStyle = {};
 40 |   if (captionPosition === "top") {
 41 |     captionStyle = { top: 100 };
 42 |   }
 43 |   if (captionPosition === "center") {
 44 |     captionStyle = { top: "50%", transform: "translateY(-50%)" };
 45 |   }
 46 |   if (captionPosition === "bottom") {
 47 |     captionStyle = { bottom: 100 };
 48 |   }
 49 | 
 50 |   const [musicVolume, musicMuted] = calculateVolume(config.musicVolume);
 51 | 
 52 |   return (
 53 |     <AbsoluteFill style={{ backgroundColor: "white" }}>
 54 |       <Audio
 55 |         loop
 56 |         src={music.url}
 57 |         startFrom={music.start * fps}
 58 |         endAt={music.end * fps}
 59 |         volume={() => musicVolume}
 60 |         muted={musicMuted}
 61 |       />
 62 | 
 63 |       {scenes.map((scene, i) => {
 64 |         const { captions, audio, video } = scene;
 65 |         const pages = createCaptionPages({
 66 |           captions,
 67 |           lineMaxLength: 30,
 68 |           lineCount: 1,
 69 |           maxDistanceMs: 1000,
 70 |         });
 71 | 
 72 |         // Calculate the start and end time of the scene
 73 |         const startFrame =
 74 |           scenes.slice(0, i).reduce((acc, curr) => {
 75 |             return acc + curr.audio.duration;
 76 |           }, 0) * fps;
 77 |         let durationInFrames =
 78 |           scenes.slice(0, i + 1).reduce((acc, curr) => {
 79 |             return acc + curr.audio.duration;
 80 |           }, 0) * fps;
 81 |         if (config.paddingBack && i === scenes.length - 1) {
 82 |           durationInFrames += (config.paddingBack / 1000) * fps;
 83 |         }
 84 | 
 85 |         return (
 86 |           <Sequence
 87 |             from={startFrame}
 88 |             durationInFrames={durationInFrames}
 89 |             key={`scene-${i}`}
 90 |           >
 91 |             <OffthreadVideo src={video} muted />
 92 |             <Audio src={audio.url} />
 93 |             {pages.map((page, j) => {
 94 |               return (
 95 |                 <Sequence
 96 |                   key={`scene-${i}-page-${j}`}
 97 |                   from={Math.round((page.startMs / 1000) * fps)}
 98 |                   durationInFrames={Math.round(
 99 |                     ((page.endMs - page.startMs) / 1000) * fps,
100 |                   )}
101 |                 >
102 |                   <div
103 |                     style={{
104 |                       position: "absolute",
105 |                       left: 0,
106 |                       width: "100%",
107 |                       ...captionStyle,
108 |                     }}
109 |                   >
110 |                     {page.lines.map((line, k) => {
111 |                       return (
112 |                         <p
113 |                           style={{
114 |                             fontSize: "8em",
115 |                             fontFamily: fontFamily,
116 |                             fontWeight: "black",
117 |                             color: "white",
118 |                             WebkitTextStroke: "2px black",
119 |                             WebkitTextFillColor: "white",
120 |                             textShadow: "0px 0px 10px black",
121 |                             textAlign: "center",
122 |                             width: "100%",
123 |                             // uppercase
124 |                             textTransform: "uppercase",
125 |                           }}
126 |                           key={`scene-${i}-page-${j}-line-${k}`}
127 |                         >
128 |                           {line.texts.map((text, l) => {
129 |                             const active =
130 |                               frame >=
131 |                                 startFrame + (text.startMs / 1000) * fps &&
132 |                               frame <= startFrame + (text.endMs / 1000) * fps;
133 |                             return (
134 |                               <>
135 |                                 <span
136 |                                   style={{
137 |                                     fontWeight: "bold",
138 |                                     ...(active ? activeStyle : {}),
139 |                                   }}
140 |                                   key={`scene-${i}-page-${j}-line-${k}-text-${l}`}
141 |                                 >
142 |                                   {text.text}
143 |                                 </span>
144 |                                 {l < line.texts.length - 1 ? " " : ""}
145 |                               </>
146 |                             );
147 |                           })}
148 |                         </p>
149 |                       );
150 |                     })}
151 |                   </div>
152 |                 </Sequence>
153 |               );
154 |             })}
155 |           </Sequence>
156 |         );
157 |       })}
158 |     </AbsoluteFill>
159 |   );
160 | };
161 | 
```

--------------------------------------------------------------------------------
/src/ui/pages/VideoList.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | import React, { useState, useEffect } from 'react';
  2 | import { useNavigate } from 'react-router-dom';
  3 | import axios from 'axios';
  4 | import { 
  5 |   Box, 
  6 |   Typography, 
  7 |   Paper, 
  8 |   Button, 
  9 |   CircularProgress, 
 10 |   Alert,
 11 |   List,
 12 |   ListItem,
 13 |   ListItemText,
 14 |   ListItemSecondaryAction,
 15 |   IconButton,
 16 |   Divider
 17 | } from '@mui/material';
 18 | import AddIcon from '@mui/icons-material/Add';
 19 | import PlayArrowIcon from '@mui/icons-material/PlayArrow';
 20 | import DeleteIcon from '@mui/icons-material/Delete';
 21 | 
 22 | interface VideoItem {
 23 |   id: string;
 24 |   status: string;
 25 | }
 26 | 
 27 | const VideoList: React.FC = () => {
 28 |   const navigate = useNavigate();
 29 |   const [videos, setVideos] = useState<VideoItem[]>([]);
 30 |   const [loading, setLoading] = useState(true);
 31 |   const [error, setError] = useState<string | null>(null);
 32 | 
 33 |   const fetchVideos = async () => {
 34 |     try {
 35 |       const response = await axios.get('/api/short-videos');
 36 |       setVideos(response.data.videos || []);
 37 |       setLoading(false);
 38 |     } catch (err) {
 39 |       setError('Failed to fetch videos');
 40 |       setLoading(false);
 41 |       console.error('Error fetching videos:', err);
 42 |     }
 43 |   };
 44 | 
 45 |   useEffect(() => {
 46 |     fetchVideos();
 47 |   }, []);
 48 | 
 49 |   const handleCreateNew = () => {
 50 |     navigate('/create');
 51 |   };
 52 | 
 53 |   const handleVideoClick = (id: string) => {
 54 |     navigate(`/video/${id}`);
 55 |   };
 56 | 
 57 |   const handleDeleteVideo = async (id: string, event: React.MouseEvent<HTMLButtonElement>) => {
 58 |     event.stopPropagation();
 59 |     
 60 |     try {
 61 |       await axios.delete(`/api/short-video/${id}`);
 62 |       fetchVideos();
 63 |     } catch (err) {
 64 |       setError('Failed to delete video');
 65 |       console.error('Error deleting video:', err);
 66 |     }
 67 |   };
 68 | 
 69 |   const capitalizeFirstLetter = (str: string) => {
 70 |     if (!str || typeof str !== 'string') return 'Unknown';
 71 |     return str.charAt(0).toUpperCase() + str.slice(1);
 72 |   };
 73 | 
 74 |   if (loading) {
 75 |     return (
 76 |       <Box display="flex" justifyContent="center" alignItems="center" height="80vh">
 77 |         <CircularProgress />
 78 |       </Box>
 79 |     );
 80 |   }
 81 | 
 82 |   return (
 83 |     <Box maxWidth="md" mx="auto" py={4}>
 84 |       <Box display="flex" justifyContent="space-between" alignItems="center" mb={4}>
 85 |         <Typography variant="h4" component="h1">
 86 |           Your Videos
 87 |         </Typography>
 88 |         <Button 
 89 |           variant="contained" 
 90 |           color="primary" 
 91 |           startIcon={<AddIcon />}
 92 |           onClick={handleCreateNew}
 93 |         >
 94 |           Create New Video
 95 |         </Button>
 96 |       </Box>
 97 |       
 98 |       {error && (
 99 |         <Alert severity="error" sx={{ mb: 3 }}>{error}</Alert>
100 |       )}
101 |       
102 |       {videos.length === 0 ? (
103 |         <Paper sx={{ p: 4, textAlign: 'center' }}>
104 |           <Typography variant="body1" color="text.secondary" gutterBottom>
105 |             You haven't created any videos yet.
106 |           </Typography>
107 |           <Button 
108 |             variant="outlined" 
109 |             startIcon={<AddIcon />}
110 |             onClick={handleCreateNew}
111 |             sx={{ mt: 2 }}
112 |           >
113 |             Create Your First Video
114 |           </Button>
115 |         </Paper>
116 |       ) : (
117 |         <Paper>
118 |           <List>
119 |             {videos.map((video, index) => {
120 |               const videoId = video?.id || '';
121 |               const videoStatus = video?.status || 'unknown';
122 |               
123 |               return (
124 |                 <div key={videoId}>
125 |                   {index > 0 && <Divider />}
126 |                   <ListItem 
127 |                     button 
128 |                     onClick={() => handleVideoClick(videoId)}
129 |                     sx={{ 
130 |                       py: 2,
131 |                       '&:hover': {
132 |                         backgroundColor: 'rgba(0, 0, 0, 0.04)'
133 |                       }
134 |                     }}
135 |                   >
136 |                     <ListItemText
137 |                       primary={`Video ${videoId.substring(0, 8)}...`}
138 |                       secondary={
139 |                         <Typography
140 |                           component="span"
141 |                           variant="body2"
142 |                           color={
143 |                             videoStatus === 'ready' ? 'success.main' : 
144 |                             videoStatus === 'processing' ? 'info.main' : 
145 |                             videoStatus === 'failed' ? 'error.main' : 'text.secondary'
146 |                           }
147 |                         >
148 |                           {capitalizeFirstLetter(videoStatus)}
149 |                         </Typography>
150 |                       }
151 |                     />
152 |                     <ListItemSecondaryAction>
153 |                       {videoStatus === 'ready' && (
154 |                         <IconButton 
155 |                           edge="end" 
156 |                           aria-label="play"
157 |                           onClick={() => handleVideoClick(videoId)}
158 |                           color="primary"
159 |                         >
160 |                           <PlayArrowIcon />
161 |                         </IconButton>
162 |                       )}
163 |                       <IconButton 
164 |                         edge="end" 
165 |                         aria-label="delete" 
166 |                         onClick={(e) => handleDeleteVideo(videoId, e)}
167 |                         color="error"
168 |                         sx={{ ml: 1 }}
169 |                       >
170 |                         <DeleteIcon />
171 |                       </IconButton>
172 |                     </ListItemSecondaryAction>
173 |                   </ListItem>
174 |                 </div>
175 |               );
176 |             })}
177 |           </List>
178 |         </Paper>
179 |       )}
180 |     </Box>
181 |   );
182 | };
183 | 
184 | export default VideoList; 
```

--------------------------------------------------------------------------------
/src/short-creator/libraries/Pexels.ts:
--------------------------------------------------------------------------------

```typescript
  1 | /* eslint-disable @remotion/deterministic-randomness */
  2 | import { getOrientationConfig } from "../../components/utils";
  3 | import { logger } from "../../logger";
  4 | import { OrientationEnum, type Video } from "../../types/shorts";
  5 | 
  6 | const jokerTerms: string[] = ["nature", "globe", "space", "ocean"];
  7 | const durationBufferSeconds = 3;
  8 | const defaultTimeoutMs = 5000;
  9 | const retryTimes = 3;
 10 | 
 11 | export class PexelsAPI {
 12 |   constructor(private API_KEY: string) {}
 13 | 
 14 |   private async _findVideo(
 15 |     searchTerm: string,
 16 |     minDurationSeconds: number,
 17 |     excludeIds: string[],
 18 |     orientation: OrientationEnum,
 19 |     timeout: number,
 20 |   ): Promise<Video> {
 21 |     if (!this.API_KEY) {
 22 |       throw new Error("API key not set");
 23 |     }
 24 |     logger.debug(
 25 |       { searchTerm, minDurationSeconds, orientation },
 26 |       "Searching for video in Pexels API",
 27 |     );
 28 |     const headers = new Headers();
 29 |     headers.append("Authorization", this.API_KEY);
 30 |     const response = await fetch(
 31 |       `https://api.pexels.com/videos/search?orientation=${orientation}&size=medium&per_page=80&query=${encodeURIComponent(searchTerm)}`,
 32 |       {
 33 |         method: "GET",
 34 |         headers,
 35 |         redirect: "follow",
 36 |         signal: AbortSignal.timeout(timeout),
 37 |       },
 38 |     )
 39 |       .then((res) => {
 40 |         if (!res.ok) {
 41 |           if (res.status === 401) {
 42 |             throw new Error(
 43 |               "Invalid Pexels API key - please make sure you get a valid key from https://www.pexels.com/api and set it in the environment variable PEXELS_API_KEY",
 44 |             );
 45 |           }
 46 |           throw new Error(`Pexels API error: ${res.status} ${res.statusText}`);
 47 |         }
 48 |         return res.json();
 49 |       })
 50 |       .catch((error: unknown) => {
 51 |         logger.error(error, "Error fetching videos from Pexels API");
 52 |         throw error;
 53 |       });
 54 |     const videos = response.videos as {
 55 |       id: string;
 56 |       duration: number;
 57 |       video_files: {
 58 |         fps: number;
 59 |         quality: string;
 60 |         width: number;
 61 |         height: number;
 62 |         id: string;
 63 |         link: string;
 64 |       }[];
 65 |     }[];
 66 | 
 67 |     const { width: requiredVideoWidth, height: requiredVideoHeight } =
 68 |       getOrientationConfig(orientation);
 69 | 
 70 |     if (!videos || videos.length === 0) {
 71 |       logger.error(
 72 |         { searchTerm, orientation },
 73 |         "No videos found in Pexels API",
 74 |       );
 75 |       throw new Error("No videos found");
 76 |     }
 77 | 
 78 |     // find all the videos that fits the criteria, then select one randomly
 79 |     const filteredVideos = videos
 80 |       .map((video) => {
 81 |         if (excludeIds.includes(video.id)) {
 82 |           return;
 83 |         }
 84 |         if (!video.video_files.length) {
 85 |           return;
 86 |         }
 87 | 
 88 |         // calculate the real duration of the video by converting the FPS to 25
 89 |         const fps = video.video_files[0].fps;
 90 |         const duration =
 91 |           fps < 25 ? video.duration * (fps / 25) : video.duration;
 92 | 
 93 |         if (duration >= minDurationSeconds + durationBufferSeconds) {
 94 |           for (const file of video.video_files) {
 95 |             if (
 96 |               file.quality === "hd" &&
 97 |               file.width === requiredVideoWidth &&
 98 |               file.height === requiredVideoHeight
 99 |             ) {
100 |               return {
101 |                 id: video.id,
102 |                 url: file.link,
103 |                 width: file.width,
104 |                 height: file.height,
105 |               };
106 |             }
107 |           }
108 |         }
109 |       })
110 |       .filter(Boolean);
111 |     if (!filteredVideos.length) {
112 |       logger.error({ searchTerm }, "No videos found in Pexels API");
113 |       throw new Error("No videos found");
114 |     }
115 | 
116 |     const video = filteredVideos[
117 |       Math.floor(Math.random() * filteredVideos.length)
118 |     ] as Video;
119 | 
120 |     logger.debug(
121 |       { searchTerm, video: video, minDurationSeconds, orientation },
122 |       "Found video from Pexels API",
123 |     );
124 | 
125 |     return video;
126 |   }
127 | 
128 |   async findVideo(
129 |     searchTerms: string[],
130 |     minDurationSeconds: number,
131 |     excludeIds: string[] = [],
132 |     orientation: OrientationEnum = OrientationEnum.portrait,
133 |     timeout: number = defaultTimeoutMs,
134 |     retryCounter: number = 0,
135 |   ): Promise<Video> {
136 |     // shuffle the search terms to randomize the search order
137 |     const shuffledJokerTerms = jokerTerms.sort(() => Math.random() - 0.5);
138 |     const shuffledSearchTerms = searchTerms.sort(() => Math.random() - 0.5);
139 | 
140 |     for (const searchTerm of [...shuffledSearchTerms, ...shuffledJokerTerms]) {
141 |       try {
142 |         return await this._findVideo(
143 |           searchTerm,
144 |           minDurationSeconds,
145 |           excludeIds,
146 |           orientation,
147 |           timeout,
148 |         );
149 |       } catch (error: unknown) {
150 |         if (
151 |           error instanceof Error &&
152 |           error instanceof DOMException &&
153 |           error.name === "TimeoutError"
154 |         ) {
155 |           if (retryCounter < retryTimes) {
156 |             logger.warn(
157 |               { searchTerm, retryCounter },
158 |               "Timeout error, retrying...",
159 |             );
160 |             return await this.findVideo(
161 |               searchTerms,
162 |               minDurationSeconds,
163 |               excludeIds,
164 |               orientation,
165 |               timeout,
166 |               retryCounter + 1,
167 |             );
168 |           }
169 |           logger.error(
170 |             { searchTerm, retryCounter },
171 |             "Timeout error, retry limit reached",
172 |           );
173 |           throw error;
174 |         }
175 | 
176 |         logger.error(error, "Error finding video in Pexels API for term");
177 |       }
178 |     }
179 |     logger.error(
180 |       { searchTerms },
181 |       "No videos found in Pexels API for the given terms",
182 |     );
183 |     throw new Error("No videos found in Pexels API");
184 |   }
185 | }
186 | 
```

--------------------------------------------------------------------------------
/src/ui/pages/VideoDetails.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | import React, { useState, useEffect, useRef } from 'react';
  2 | import { useParams, useNavigate } from 'react-router-dom';
  3 | import axios from 'axios';
  4 | import { 
  5 |   Box, 
  6 |   Typography, 
  7 |   Paper, 
  8 |   Button, 
  9 |   CircularProgress, 
 10 |   Alert,
 11 |   Grid
 12 | } from '@mui/material';
 13 | import ArrowBackIcon from '@mui/icons-material/ArrowBack';
 14 | import DownloadIcon from '@mui/icons-material/Download';
 15 | import { VideoStatus } from '../../types/shorts';
 16 | 
 17 | const VideoDetails: React.FC = () => {
 18 |   const { videoId } = useParams<{ videoId: string }>();
 19 |   const navigate = useNavigate();
 20 |   const [loading, setLoading] = useState(true);
 21 |   const [error, setError] = useState<string | null>(null);
 22 |   const [status, setStatus] = useState<VideoStatus>('processing');
 23 |   const intervalRef = useRef<NodeJS.Timeout | null>(null);
 24 |   const isMounted = useRef(true);
 25 | 
 26 |   const checkVideoStatus = async () => {
 27 |     try {
 28 |       const response = await axios.get(`/api/short-video/${videoId}/status`);
 29 |       const videoStatus = response.data.status;
 30 | 
 31 |       if (isMounted.current) {
 32 |         setStatus(videoStatus || 'unknown');
 33 |         console.log("videoStatus", videoStatus);
 34 |         
 35 |         if (videoStatus !== 'processing') {
 36 |           console.log("video is not processing");
 37 |           console.log("interval", intervalRef.current);
 38 |           
 39 |           if (intervalRef.current) {
 40 |             console.log("clearing interval");
 41 |             clearInterval(intervalRef.current);
 42 |             intervalRef.current = null;
 43 |           }
 44 |         }
 45 |         
 46 |         setLoading(false);
 47 |       }
 48 |     } catch (error) {
 49 |       if (isMounted.current) {
 50 |         setError('Failed to fetch video status');
 51 |         setStatus('failed');
 52 |         setLoading(false);
 53 |         console.error('Error fetching video status:', error);
 54 |         
 55 |         if (intervalRef.current) {
 56 |           clearInterval(intervalRef.current);
 57 |           intervalRef.current = null;
 58 |         }
 59 |       }
 60 |     }
 61 |   };
 62 | 
 63 |   useEffect(() => {
 64 |     checkVideoStatus();
 65 |     
 66 |     intervalRef.current = setInterval(() => {
 67 |       checkVideoStatus();
 68 |     }, 5000);
 69 |     
 70 |     return () => {
 71 |       isMounted.current = false;
 72 |       if (intervalRef.current) {
 73 |         clearInterval(intervalRef.current);
 74 |         intervalRef.current = null;
 75 |       }
 76 |     };
 77 |   }, [videoId]);
 78 | 
 79 |   const handleBack = () => {
 80 |     navigate('/');
 81 |   };
 82 | 
 83 |   const renderContent = () => {
 84 |     if (loading) {
 85 |       return (
 86 |         <Box display="flex" justifyContent="center" alignItems="center" minHeight="30vh">
 87 |           <CircularProgress />
 88 |         </Box>
 89 |       );
 90 |     }
 91 | 
 92 |     if (error) {
 93 |       return <Alert severity="error">{error}</Alert>;
 94 |     }
 95 | 
 96 |     if (status === 'processing') {
 97 |       return (
 98 |         <Box textAlign="center" py={4}>
 99 |           <CircularProgress size={60} sx={{ mb: 2 }} />
100 |           <Typography variant="h6">Your video is being created...</Typography>
101 |           <Typography variant="body1" color="text.secondary">
102 |             This may take a few minutes. Please wait.
103 |           </Typography>
104 |         </Box>
105 |       );
106 |     }
107 | 
108 |     if (status === 'ready') {
109 |       return (
110 |         <Box>
111 |           <Box mb={3} textAlign="center">
112 |             <Typography variant="h6" color="success.main" gutterBottom>
113 |               Your video is ready!
114 |             </Typography>
115 |           </Box>
116 |           
117 |           <Box sx={{ 
118 |             position: 'relative', 
119 |             paddingTop: '56.25%',
120 |             mb: 3,
121 |             backgroundColor: '#000'
122 |           }}>
123 |             <video
124 |               controls
125 |               autoPlay
126 |               style={{
127 |                 position: 'absolute',
128 |                 top: 0,
129 |                 left: 0,
130 |                 width: '100%',
131 |                 height: '100%',
132 |               }}
133 |               src={`/api/short-video/${videoId}`}
134 |             />
135 |           </Box>
136 |           
137 |           <Box textAlign="center">
138 |             <Button 
139 |               component="a"
140 |               href={`/api/short-video/${videoId}`}
141 |               download
142 |               variant="contained" 
143 |               color="primary" 
144 |               startIcon={<DownloadIcon />}
145 |               sx={{ textDecoration: 'none' }}
146 |             >
147 |               Download Video
148 |             </Button>
149 |           </Box>
150 |         </Box>
151 |       );
152 |     }
153 | 
154 |     if (status === 'failed') {
155 |       return (
156 |         <Alert severity="error" sx={{ mb: 3 }}>
157 |           Video processing failed. Please try again with different settings.
158 |         </Alert>
159 |       );
160 |     }
161 | 
162 |     return (
163 |       <Alert severity="info" sx={{ mb: 3 }}>
164 |         Unknown video status. Please try refreshing the page.
165 |       </Alert>
166 |     );
167 |   };
168 | 
169 |   const capitalizeFirstLetter = (str: string) => {
170 |     if (!str || typeof str !== 'string') return 'Unknown';
171 |     return str.charAt(0).toUpperCase() + str.slice(1);
172 |   };
173 | 
174 |   return (
175 |     <Box maxWidth="md" mx="auto" py={4}>
176 |       <Box display="flex" alignItems="center" mb={3}>
177 |         <Button 
178 |           startIcon={<ArrowBackIcon />} 
179 |           onClick={handleBack}
180 |           sx={{ mr: 2 }}
181 |         >
182 |           Back to videos
183 |         </Button>
184 |         <Typography variant="h4" component="h1">
185 |           Video Details
186 |         </Typography>
187 |       </Box>
188 | 
189 |       <Paper sx={{ p: 3 }}>
190 |         <Grid container spacing={2} mb={3}>
191 |           <Grid item xs={12} sm={6}>
192 |             <Typography variant="body2" color="text.secondary">
193 |               Video ID
194 |             </Typography>
195 |             <Typography variant="body1">
196 |               {videoId || 'Unknown'}
197 |             </Typography>
198 |           </Grid>
199 |           <Grid item xs={12} sm={6}>
200 |             <Typography variant="body2" color="text.secondary">
201 |               Status
202 |             </Typography>
203 |             <Typography 
204 |               variant="body1" 
205 |               color={
206 |                 status === 'ready' ? 'success.main' : 
207 |                 status === 'processing' ? 'info.main' : 
208 |                 status === 'failed' ? 'error.main' : 'text.primary'
209 |               }
210 |             >
211 |               {capitalizeFirstLetter(status)}
212 |             </Typography>
213 |           </Grid>
214 |         </Grid>
215 |         
216 |         {renderContent()}
217 |       </Paper>
218 |     </Box>
219 |   );
220 | };
221 | 
222 | export default VideoDetails; 
```

--------------------------------------------------------------------------------
/src/server/routers/rest.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import express from "express";
  2 | import type {
  3 |   Request as ExpressRequest,
  4 |   Response as ExpressResponse,
  5 | } from "express";
  6 | import fs from "fs-extra";
  7 | import path from "path";
  8 | 
  9 | import { validateCreateShortInput } from "../validator";
 10 | import { ShortCreator } from "../../short-creator/ShortCreator";
 11 | import { logger } from "../../logger";
 12 | import { Config } from "../../config";
 13 | 
 14 | // todo abstract class
 15 | export class APIRouter {
 16 |   public router: express.Router;
 17 |   private shortCreator: ShortCreator;
 18 |   private config: Config;
 19 | 
 20 |   constructor(config: Config, shortCreator: ShortCreator) {
 21 |     this.config = config;
 22 |     this.router = express.Router();
 23 |     this.shortCreator = shortCreator;
 24 | 
 25 |     this.router.use(express.json());
 26 | 
 27 |     this.setupRoutes();
 28 |   }
 29 | 
 30 |   private setupRoutes() {
 31 |     this.router.post(
 32 |       "/short-video",
 33 |       async (req: ExpressRequest, res: ExpressResponse) => {
 34 |         try {
 35 |           const input = validateCreateShortInput(req.body);
 36 | 
 37 |           logger.info({ input }, "Creating short video");
 38 | 
 39 |           const videoId = this.shortCreator.addToQueue(
 40 |             input.scenes,
 41 |             input.config,
 42 |           );
 43 | 
 44 |           res.status(201).json({
 45 |             videoId,
 46 |           });
 47 |         } catch (error: unknown) {
 48 |           logger.error(error, "Error validating input");
 49 | 
 50 |           // Handle validation errors specifically
 51 |           if (error instanceof Error && error.message.startsWith("{")) {
 52 |             try {
 53 |               const errorData = JSON.parse(error.message);
 54 |               res.status(400).json({
 55 |                 error: "Validation failed",
 56 |                 message: errorData.message,
 57 |                 missingFields: errorData.missingFields,
 58 |               });
 59 |               return;
 60 |             } catch (parseError: unknown) {
 61 |               logger.error(parseError, "Error parsing validation error");
 62 |             }
 63 |           }
 64 | 
 65 |           // Fallback for other errors
 66 |           res.status(400).json({
 67 |             error: "Invalid input",
 68 |             message: error instanceof Error ? error.message : "Unknown error",
 69 |           });
 70 |         }
 71 |       },
 72 |     );
 73 | 
 74 |     this.router.get(
 75 |       "/short-video/:videoId/status",
 76 |       async (req: ExpressRequest, res: ExpressResponse) => {
 77 |         const { videoId } = req.params;
 78 |         if (!videoId) {
 79 |           res.status(400).json({
 80 |             error: "videoId is required",
 81 |           });
 82 |           return;
 83 |         }
 84 |         const status = this.shortCreator.status(videoId);
 85 |         res.status(200).json({
 86 |           status,
 87 |         });
 88 |       },
 89 |     );
 90 | 
 91 |     this.router.get(
 92 |       "/music-tags",
 93 |       (req: ExpressRequest, res: ExpressResponse) => {
 94 |         res.status(200).json(this.shortCreator.ListAvailableMusicTags());
 95 |       },
 96 |     );
 97 | 
 98 |     this.router.get("/voices", (req: ExpressRequest, res: ExpressResponse) => {
 99 |       res.status(200).json(this.shortCreator.ListAvailableVoices());
100 |     });
101 | 
102 |     this.router.get(
103 |       "/short-videos",
104 |       (req: ExpressRequest, res: ExpressResponse) => {
105 |         const videos = this.shortCreator.listAllVideos();
106 |         res.status(200).json({
107 |           videos,
108 |         });
109 |       },
110 |     );
111 | 
112 |     this.router.delete(
113 |       "/short-video/:videoId",
114 |       (req: ExpressRequest, res: ExpressResponse) => {
115 |         const { videoId } = req.params;
116 |         if (!videoId) {
117 |           res.status(400).json({
118 |             error: "videoId is required",
119 |           });
120 |           return;
121 |         }
122 |         this.shortCreator.deleteVideo(videoId);
123 |         res.status(200).json({
124 |           success: true,
125 |         });
126 |       },
127 |     );
128 | 
129 |     this.router.get(
130 |       "/tmp/:tmpFile",
131 |       (req: ExpressRequest, res: ExpressResponse) => {
132 |         const { tmpFile } = req.params;
133 |         if (!tmpFile) {
134 |           res.status(400).json({
135 |             error: "tmpFile is required",
136 |           });
137 |           return;
138 |         }
139 |         const tmpFilePath = path.join(this.config.tempDirPath, tmpFile);
140 |         if (!fs.existsSync(tmpFilePath)) {
141 |           res.status(404).json({
142 |             error: "tmpFile not found",
143 |           });
144 |           return;
145 |         }
146 | 
147 |         if (tmpFile.endsWith(".mp3")) {
148 |           res.setHeader("Content-Type", "audio/mpeg");
149 |         }
150 |         if (tmpFile.endsWith(".wav")) {
151 |           res.setHeader("Content-Type", "audio/wav");
152 |         }
153 |         if (tmpFile.endsWith(".mp4")) {
154 |           res.setHeader("Content-Type", "video/mp4");
155 |         }
156 | 
157 |         const tmpFileStream = fs.createReadStream(tmpFilePath);
158 |         tmpFileStream.on("error", (error) => {
159 |           logger.error(error, "Error reading tmp file");
160 |           res.status(500).json({
161 |             error: "Error reading tmp file",
162 |             tmpFile,
163 |           });
164 |         });
165 |         tmpFileStream.pipe(res);
166 |       },
167 |     );
168 | 
169 |     this.router.get(
170 |       "/music/:fileName",
171 |       (req: ExpressRequest, res: ExpressResponse) => {
172 |         const { fileName } = req.params;
173 |         if (!fileName) {
174 |           res.status(400).json({
175 |             error: "fileName is required",
176 |           });
177 |           return;
178 |         }
179 |         const musicFilePath = path.join(this.config.musicDirPath, fileName);
180 |         if (!fs.existsSync(musicFilePath)) {
181 |           res.status(404).json({
182 |             error: "music file not found",
183 |           });
184 |           return;
185 |         }
186 |         const musicFileStream = fs.createReadStream(musicFilePath);
187 |         musicFileStream.on("error", (error) => {
188 |           logger.error(error, "Error reading music file");
189 |           res.status(500).json({
190 |             error: "Error reading music file",
191 |             fileName,
192 |           });
193 |         });
194 |         musicFileStream.pipe(res);
195 |       },
196 |     );
197 | 
198 |     this.router.get(
199 |       "/short-video/:videoId",
200 |       (req: ExpressRequest, res: ExpressResponse) => {
201 |         try {
202 |           const { videoId } = req.params;
203 |           if (!videoId) {
204 |             res.status(400).json({
205 |               error: "videoId is required",
206 |             });
207 |             return;
208 |           }
209 |           const video = this.shortCreator.getVideo(videoId);
210 |           res.setHeader("Content-Type", "video/mp4");
211 |           res.setHeader(
212 |             "Content-Disposition",
213 |             `inline; filename=${videoId}.mp4`,
214 |           );
215 |           res.send(video);
216 |         } catch (error: unknown) {
217 |           logger.error(error, "Error getting video");
218 |           res.status(404).json({
219 |             error: "Video not found",
220 |           });
221 |         }
222 |       },
223 |     );
224 |   }
225 | }
226 | 
```

--------------------------------------------------------------------------------
/src/short-creator/ShortCreator.test.ts:
--------------------------------------------------------------------------------

```typescript
  1 | process.env.LOG_LEVEL = "debug";
  2 | 
  3 | import { test, expect, vi } from "vitest";
  4 | import fs from "fs-extra";
  5 | 
  6 | import { ShortCreator } from "./ShortCreator";
  7 | import { Kokoro } from "./libraries/Kokoro";
  8 | import { Remotion } from "./libraries/Remotion";
  9 | import { Whisper } from "./libraries/Whisper";
 10 | import { FFMpeg } from "./libraries/FFmpeg";
 11 | import { PexelsAPI } from "./libraries/Pexels";
 12 | import { Config } from "../config";
 13 | import { MusicManager } from "./music";
 14 | 
 15 | // mock fs-extra
 16 | vi.mock("fs-extra", async () => {
 17 |   const { createFsFromVolume, Volume } = await import("memfs");
 18 |   const vol = Volume.fromJSON({
 19 |     "/Users/gyoridavid/.ai-agents-az-video-generator/videos/video-1.mp4":
 20 |       "mock video content 1",
 21 |     "/Users/gyoridavid/.ai-agents-az-video-generator/videos/video-2.mp4":
 22 |       "mock video content 2",
 23 |     "/Users/gyoridavid/.ai-agents-az-video-generator/temp": null,
 24 |     "/Users/gyoridavid/.ai-agents-az-video-generator/libs": null,
 25 |     "/static/music/happy-music.mp3": "mock music content",
 26 |     "/static/music/sad-music.mp3": "mock music content",
 27 |     "/static/music/chill-music.mp3": "mock music content",
 28 |   });
 29 |   const memfs = createFsFromVolume(vol);
 30 | 
 31 |   const fsExtra = {
 32 |     ...memfs,
 33 |     // fs-extra specific methods
 34 |     ensureDirSync: vi.fn((path) => {
 35 |       try {
 36 |         memfs.mkdirSync(path, { recursive: true });
 37 |       } catch (error) {}
 38 |     }),
 39 |     removeSync: vi.fn((path) => {
 40 |       try {
 41 |         const stats = memfs.statSync(path);
 42 |         if (stats.isDirectory()) {
 43 |           // This is simplified and won't handle nested directories
 44 |           memfs.rmdirSync(path);
 45 |         } else {
 46 |           memfs.unlinkSync(path);
 47 |         }
 48 |       } catch (error) {}
 49 |     }),
 50 |     createWriteStream: vi.fn(() => ({
 51 |       on: vi.fn(),
 52 |       write: vi.fn(),
 53 |       end: vi.fn(),
 54 |     })),
 55 |     readFileSync: vi.fn((path) => {
 56 |       return memfs.readFileSync(path);
 57 |     }),
 58 |   };
 59 |   return {
 60 |     ...fsExtra,
 61 |     default: fsExtra,
 62 |   };
 63 | });
 64 | 
 65 | // Mock fluent-ffmpeg
 66 | vi.mock("fluent-ffmpeg", () => {
 67 |   const mockOn = vi.fn().mockReturnThis();
 68 |   const mockSave = vi.fn().mockReturnThis();
 69 |   const mockPipe = vi.fn().mockReturnThis();
 70 | 
 71 |   const ffmpegMock = vi.fn(() => ({
 72 |     input: vi.fn().mockReturnThis(),
 73 |     audioCodec: vi.fn().mockReturnThis(),
 74 |     audioBitrate: vi.fn().mockReturnThis(),
 75 |     audioChannels: vi.fn().mockReturnThis(),
 76 |     audioFrequency: vi.fn().mockReturnThis(),
 77 |     toFormat: vi.fn().mockReturnThis(),
 78 |     on: mockOn,
 79 |     save: mockSave,
 80 |     pipe: mockPipe,
 81 |   }));
 82 | 
 83 |   ffmpegMock.setFfmpegPath = vi.fn();
 84 | 
 85 |   return { default: ffmpegMock };
 86 | });
 87 | 
 88 | // mock kokoro-js
 89 | vi.mock("kokoro-js", () => {
 90 |   return {
 91 |     KokoroTTS: {
 92 |       from_pretrained: vi.fn().mockResolvedValue({
 93 |         generate: vi.fn().mockResolvedValue({
 94 |           toWav: vi.fn().mockReturnValue(new ArrayBuffer(8)),
 95 |           audio: new ArrayBuffer(8),
 96 |           sampling_rate: 44100,
 97 |         }),
 98 |       }),
 99 |     },
100 |   };
101 | });
102 | 
103 | // mock remotion
104 | vi.mock("@remotion/bundler", () => {
105 |   return {
106 |     bundle: vi.fn().mockResolvedValue("mocked-bundled-url"),
107 |   };
108 | });
109 | vi.mock("@remotion/renderer", () => {
110 |   return {
111 |     renderMedia: vi.fn().mockResolvedValue(undefined),
112 |     selectComposition: vi.fn().mockResolvedValue({
113 |       width: 1080,
114 |       height: 1920,
115 |       fps: 30,
116 |       durationInFrames: 300,
117 |     }),
118 |     ensureBrowser: vi.fn().mockResolvedValue(undefined),
119 |   };
120 | });
121 | 
122 | // mock whisper
123 | vi.mock("@remotion/install-whisper-cpp", () => {
124 |   return {
125 |     downloadWhisperModel: vi.fn().mockResolvedValue(undefined),
126 |     installWhisperCpp: vi.fn().mockResolvedValue(undefined),
127 |     transcribe: vi.fn().mockResolvedValue({
128 |       transcription: [
129 |         {
130 |           text: "This is a mock transcription.",
131 |           offsets: { from: 0, to: 2000 },
132 |           tokens: [
133 |             { text: "This", timestamp: { from: 0, to: 500 } },
134 |             { text: " is", timestamp: { from: 500, to: 800 } },
135 |             { text: " a", timestamp: { from: 800, to: 1000 } },
136 |             { text: " mock", timestamp: { from: 1000, to: 1500 } },
137 |             { text: " transcription.", timestamp: { from: 1500, to: 2000 } },
138 |           ],
139 |         },
140 |       ],
141 |     }),
142 |   };
143 | });
144 | 
145 | test("test me", async () => {
146 |   const kokoro = await Kokoro.init("fp16");
147 |   const ffmpeg = await FFMpeg.init();
148 | 
149 |   vi.spyOn(ffmpeg, "saveNormalizedAudio").mockResolvedValue("mocked-path.wav");
150 |   vi.spyOn(ffmpeg, "saveToMp3").mockResolvedValue("mocked-path.mp3");
151 | 
152 |   const pexelsAPI = new PexelsAPI("mock-api-key");
153 |   vi.spyOn(pexelsAPI, "findVideo").mockResolvedValue({
154 |     id: "mock-video-id-1",
155 |     url: "https://example.com/mock-video-1.mp4",
156 |     width: 1080,
157 |     height: 1920,
158 |   });
159 | 
160 |   const config = new Config();
161 |   const remotion = await Remotion.init(config);
162 | 
163 |   // control the render promise resolution
164 |   let resolveRenderPromise: () => void;
165 |   const renderPromiseMock: Promise<void> = new Promise((resolve) => {
166 |     resolveRenderPromise = resolve;
167 |   });
168 |   vi.spyOn(remotion, "render").mockReturnValue(renderPromiseMock);
169 | 
170 |   const whisper = await Whisper.init(config);
171 | 
172 |   vi.spyOn(whisper, "CreateCaption").mockResolvedValue([
173 |     { text: "This", startMs: 0, endMs: 500 },
174 |     { text: " is", startMs: 500, endMs: 800 },
175 |     { text: " a", startMs: 800, endMs: 1000 },
176 |     { text: " mock", startMs: 1000, endMs: 1500 },
177 |     { text: " transcription.", startMs: 1500, endMs: 2000 },
178 |   ]);
179 | 
180 |   const musicManager = new MusicManager(config);
181 | 
182 |   const shortCreator = new ShortCreator(
183 |     config,
184 |     remotion,
185 |     kokoro,
186 |     whisper,
187 |     ffmpeg,
188 |     pexelsAPI,
189 |     musicManager,
190 |   );
191 | 
192 |   const videoId = shortCreator.addToQueue(
193 |     [
194 |       {
195 |         text: "test",
196 |         searchTerms: ["test"],
197 |       },
198 |     ],
199 |     {},
200 |   );
201 | 
202 |   // list videos while the video is being processed
203 |   let videos = shortCreator.listAllVideos();
204 |   expect(videos.find((v) => v.id === videoId)?.status).toBe("processing");
205 | 
206 |   // create the video file on the file system and check the status again
207 |   fs.writeFileSync(shortCreator.getVideoPath(videoId), "mock video content");
208 |   videos = shortCreator.listAllVideos();
209 |   expect(videos.find((v) => v.id === videoId)?.status).toBe("processing");
210 | 
211 |   // resolve the render promise to simulate the video being processed, and check the status again
212 |   resolveRenderPromise();
213 |   await new Promise((resolve) => setTimeout(resolve, 100)); // let the queue process the video
214 |   videos = shortCreator.listAllVideos();
215 |   expect(videos.find((v) => v.id === videoId)?.status).toBe("ready");
216 | 
217 |   // check the status of the video directly
218 |   const status = shortCreator.status(videoId);
219 |   expect(status).toBe("ready");
220 | });
221 | 
```

--------------------------------------------------------------------------------
/src/components/root/Root.tsx:
--------------------------------------------------------------------------------

```typescript
  1 | import { CalculateMetadataFunction, Composition } from "remotion";
  2 | import { shortVideoSchema } from "../utils";
  3 | import { PortraitVideo } from "../videos/PortraitVideo";
  4 | import { LandscapeVideo } from "../videos/LandscapeVideo";
  5 | import { TestVideo } from "../videos/Test";
  6 | import z from "zod";
  7 | import { AvailableComponentsEnum } from "../types";
  8 | 
  9 | const FPS = 25;
 10 | 
 11 | export const calculateMetadata: CalculateMetadataFunction<
 12 |   z.infer<typeof shortVideoSchema>
 13 | > = async ({ props }) => {
 14 |   const durationInFrames = Math.floor((props.config.durationMs / 1000) * FPS);
 15 |   return {
 16 |     ...props,
 17 |     durationInFrames,
 18 |   };
 19 | };
 20 | 
 21 | export const RemotionRoot: React.FC = () => {
 22 |   return (
 23 |     <>
 24 |       <Composition
 25 |         id={AvailableComponentsEnum.PortraitVideo}
 26 |         component={PortraitVideo}
 27 |         durationInFrames={30}
 28 |         fps={FPS}
 29 |         width={1080}
 30 |         height={1920}
 31 |         defaultProps={{
 32 |           music: {
 33 |             url:
 34 |               "http://localhost:3123/api/music/" +
 35 |               encodeURIComponent(
 36 |                 "Aurora on the Boulevard - National Sweetheart.mp3",
 37 |               ),
 38 |             file: "mellow-smooth-rap-beat-20230107-132480.mp3",
 39 |             start: 0,
 40 |             end: 175,
 41 |           },
 42 |           scenes: [
 43 |             {
 44 |               captions: [
 45 |                 { text: " Hello", startMs: 390, endMs: 990 },
 46 |                 { text: " World.", startMs: 990, endMs: 2000 },
 47 |               ],
 48 |               video:
 49 |                 "https://videos.pexels.com/video-files/4625747/4625747-hd_1080_1920_24fps.mp4",
 50 |               audio: {
 51 |                 url: "http://localhost:3123/api/tmp/cma1lgean0001rlsi52b8h3n3.mp3",
 52 |                 duration: 3.15,
 53 |               },
 54 |             },
 55 |           ],
 56 |           config: {
 57 |             durationMs: 4650,
 58 |             paddingBack: 1500,
 59 |             captionBackgroundColor: "blue",
 60 |             captionPosition: "bottom",
 61 |           },
 62 |         }}
 63 |         calculateMetadata={calculateMetadata}
 64 |       />
 65 |       <Composition
 66 |         id={AvailableComponentsEnum.LandscapeVideo}
 67 |         component={LandscapeVideo}
 68 |         durationInFrames={30}
 69 |         fps={FPS}
 70 |         width={1920}
 71 |         height={1080}
 72 |         defaultProps={{
 73 |           music: {
 74 |             url:
 75 |               "http://localhost:3123/api/music/" +
 76 |               encodeURIComponent(
 77 |                 "Aurora on the Boulevard - National Sweetheart.mp3",
 78 |               ),
 79 |             file: "mellow-smooth-rap-beat-20230107-132480.mp3",
 80 |             start: 0,
 81 |             end: 175,
 82 |           },
 83 |           scenes: [
 84 |             {
 85 |               captions: [
 86 |                 {
 87 |                   text: " A",
 88 |                   startMs: 110,
 89 |                   endMs: 320,
 90 |                 },
 91 |                 {
 92 |                   text: " week",
 93 |                   startMs: 320,
 94 |                   endMs: 590,
 95 |                 },
 96 |                 {
 97 |                   text: " ago,",
 98 |                   startMs: 590,
 99 |                   endMs: 1220,
100 |                 },
101 |                 {
102 |                   text: " a",
103 |                   startMs: 1220,
104 |                   endMs: 1280,
105 |                 },
106 |                 {
107 |                   text: " friend",
108 |                   startMs: 1280,
109 |                   endMs: 1490,
110 |                 },
111 |                 {
112 |                   text: " invited",
113 |                   startMs: 1490,
114 |                   endMs: 1820,
115 |                 },
116 |                 {
117 |                   text: " a",
118 |                   startMs: 1820,
119 |                   endMs: 1880,
120 |                 },
121 |                 {
122 |                   text: " couple",
123 |                   startMs: 1880,
124 |                   endMs: 2310,
125 |                 },
126 |                 {
127 |                   text: " of",
128 |                   startMs: 2310,
129 |                   endMs: 2350,
130 |                 },
131 |                 {
132 |                   text: " other",
133 |                   startMs: 2350,
134 |                   endMs: 2640,
135 |                 },
136 |                 {
137 |                   text: " couples",
138 |                   startMs: 2640,
139 |                   endMs: 3080,
140 |                 },
141 |                 {
142 |                   text: " over",
143 |                   startMs: 3080,
144 |                   endMs: 3400,
145 |                 },
146 |                 {
147 |                   text: " for",
148 |                   startMs: 3400,
149 |                   endMs: 3620,
150 |                 },
151 |                 {
152 |                   text: " dinner.",
153 |                   startMs: 3620,
154 |                   endMs: 4340,
155 |                 },
156 |                 {
157 |                   text: " Eventually,",
158 |                   startMs: 4340,
159 |                   endMs: 5520,
160 |                 },
161 |                 {
162 |                   text: " the",
163 |                   startMs: 5520,
164 |                   endMs: 5550,
165 |                 },
166 |                 {
167 |                   text: " food,",
168 |                   startMs: 5550,
169 |                   endMs: 6300,
170 |                 },
171 |                 {
172 |                   text: " but",
173 |                   startMs: 6300,
174 |                   endMs: 6360,
175 |                 },
176 |                 {
177 |                   text: " not",
178 |                   startMs: 6360,
179 |                   endMs: 6540,
180 |                 },
181 |                 {
182 |                   text: " the",
183 |                   startMs: 6540,
184 |                   endMs: 6780,
185 |                 },
186 |                 {
187 |                   text: " wine,",
188 |                   startMs: 6780,
189 |                   endMs: 7210,
190 |                 },
191 |                 {
192 |                   text: " was",
193 |                   startMs: 7210,
194 |                   endMs: 7400,
195 |                 },
196 |                 {
197 |                   text: " cleared",
198 |                   startMs: 7400,
199 |                   endMs: 7870,
200 |                 },
201 |                 {
202 |                   text: " off",
203 |                   startMs: 7870,
204 |                   endMs: 7980,
205 |                 },
206 |                 {
207 |                   text: " the",
208 |                   startMs: 7980,
209 |                   endMs: 8180,
210 |                 },
211 |                 {
212 |                   text: " table",
213 |                   startMs: 8180,
214 |                   endMs: 8480,
215 |                 },
216 |                 {
217 |                   text: " for",
218 |                   startMs: 8480,
219 |                   endMs: 8770,
220 |                 },
221 |                 {
222 |                   text: " what",
223 |                   startMs: 8770,
224 |                   endMs: 8880,
225 |                 },
226 |                 {
227 |                   text: " turned",
228 |                   startMs: 8880,
229 |                   endMs: 9230,
230 |                 },
231 |                 {
232 |                   text: " out",
233 |                   startMs: 9230,
234 |                   endMs: 9390,
235 |                 },
236 |                 {
237 |                   text: " to",
238 |                   startMs: 9390,
239 |                   endMs: 9510,
240 |                 },
241 |                 {
242 |                   text: " be",
243 |                   startMs: 9510,
244 |                   endMs: 9620,
245 |                 },
246 |                 {
247 |                   text: " some",
248 |                   startMs: 9620,
249 |                   endMs: 9850,
250 |                 },
251 |                 {
252 |                   text: " fierce",
253 |                   startMs: 9850,
254 |                   endMs: 10200,
255 |                 },
256 |                 {
257 |                   text: " scrabbling.",
258 |                   startMs: 10200,
259 |                   endMs: 11000,
260 |                 },
261 |               ],
262 |               video:
263 |                 "https://videos.pexels.com/video-files/1168989/1168989-hd_1920_1080_30fps.mp4",
264 |               audio: {
265 |                 url: "http://localhost:3123/api/tmp/cma9ctvpo0001aqsia12i82db.mp3",
266 |                 duration: 12.8,
267 |               },
268 |             },
269 |           ],
270 |           config: {
271 |             durationMs: 14300,
272 |             paddingBack: 1500,
273 |             captionBackgroundColor: "#ff0000",
274 |             captionPosition: "center",
275 |           },
276 |         }}
277 |         calculateMetadata={calculateMetadata}
278 |       />
279 |       <Composition
280 |         id="TestVideo"
281 |         component={TestVideo}
282 |         durationInFrames={14}
283 |         fps={23}
284 |         width={100}
285 |         height={100}
286 |       />
287 |     </>
288 |   );
289 | };
290 | 
```

--------------------------------------------------------------------------------
/src/short-creator/ShortCreator.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { OrientationEnum } from "./../types/shorts";
  2 | /* eslint-disable @remotion/deterministic-randomness */
  3 | import fs from "fs-extra";
  4 | import cuid from "cuid";
  5 | import path from "path";
  6 | import https from "https";
  7 | import http from "http";
  8 | 
  9 | import { Kokoro } from "./libraries/Kokoro";
 10 | import { Remotion } from "./libraries/Remotion";
 11 | import { Whisper } from "./libraries/Whisper";
 12 | import { FFMpeg } from "./libraries/FFmpeg";
 13 | import { PexelsAPI } from "./libraries/Pexels";
 14 | import { Config } from "../config";
 15 | import { logger } from "../logger";
 16 | import { MusicManager } from "./music";
 17 | import type {
 18 |   SceneInput,
 19 |   RenderConfig,
 20 |   Scene,
 21 |   VideoStatus,
 22 |   MusicMoodEnum,
 23 |   MusicTag,
 24 |   MusicForVideo,
 25 | } from "../types/shorts";
 26 | 
 27 | export class ShortCreator {
 28 |   private queue: {
 29 |     sceneInput: SceneInput[];
 30 |     config: RenderConfig;
 31 |     id: string;
 32 |   }[] = [];
 33 |   constructor(
 34 |     private config: Config,
 35 |     private remotion: Remotion,
 36 |     private kokoro: Kokoro,
 37 |     private whisper: Whisper,
 38 |     private ffmpeg: FFMpeg,
 39 |     private pexelsApi: PexelsAPI,
 40 |     private musicManager: MusicManager,
 41 |   ) {}
 42 | 
 43 |   public status(id: string): VideoStatus {
 44 |     const videoPath = this.getVideoPath(id);
 45 |     if (this.queue.find((item) => item.id === id)) {
 46 |       return "processing";
 47 |     }
 48 |     if (fs.existsSync(videoPath)) {
 49 |       return "ready";
 50 |     }
 51 |     return "failed";
 52 |   }
 53 | 
 54 |   public addToQueue(sceneInput: SceneInput[], config: RenderConfig): string {
 55 |     // todo add mutex lock
 56 |     const id = cuid();
 57 |     this.queue.push({
 58 |       sceneInput,
 59 |       config,
 60 |       id,
 61 |     });
 62 |     if (this.queue.length === 1) {
 63 |       this.processQueue();
 64 |     }
 65 |     return id;
 66 |   }
 67 | 
 68 |   private async processQueue(): Promise<void> {
 69 |     // todo add a semaphore
 70 |     if (this.queue.length === 0) {
 71 |       return;
 72 |     }
 73 |     const { sceneInput, config, id } = this.queue[0];
 74 |     logger.debug(
 75 |       { sceneInput, config, id },
 76 |       "Processing video item in the queue",
 77 |     );
 78 |     try {
 79 |       await this.createShort(id, sceneInput, config);
 80 |       logger.debug({ id }, "Video created successfully");
 81 |     } catch (error: unknown) {
 82 |       logger.error(error, "Error creating video");
 83 |     } finally {
 84 |       this.queue.shift();
 85 |       this.processQueue();
 86 |     }
 87 |   }
 88 | 
 89 |   private async createShort(
 90 |     videoId: string,
 91 |     inputScenes: SceneInput[],
 92 |     config: RenderConfig,
 93 |   ): Promise<string> {
 94 |     logger.debug(
 95 |       {
 96 |         inputScenes,
 97 |         config,
 98 |       },
 99 |       "Creating short video",
100 |     );
101 |     const scenes: Scene[] = [];
102 |     let totalDuration = 0;
103 |     const excludeVideoIds = [];
104 |     const tempFiles = [];
105 | 
106 |     const orientation: OrientationEnum =
107 |       config.orientation || OrientationEnum.portrait;
108 | 
109 |     let index = 0;
110 |     for (const scene of inputScenes) {
111 |       const audio = await this.kokoro.generate(
112 |         scene.text,
113 |         config.voice ?? "af_heart",
114 |       );
115 |       let { audioLength } = audio;
116 |       const { audio: audioStream } = audio;
117 | 
118 |       // add the paddingBack in seconds to the last scene
119 |       if (index + 1 === inputScenes.length && config.paddingBack) {
120 |         audioLength += config.paddingBack / 1000;
121 |       }
122 | 
123 |       const tempId = cuid();
124 |       const tempWavFileName = `${tempId}.wav`;
125 |       const tempMp3FileName = `${tempId}.mp3`;
126 |       const tempVideoFileName = `${tempId}.mp4`;
127 |       const tempWavPath = path.join(this.config.tempDirPath, tempWavFileName);
128 |       const tempMp3Path = path.join(this.config.tempDirPath, tempMp3FileName);
129 |       const tempVideoPath = path.join(
130 |         this.config.tempDirPath,
131 |         tempVideoFileName,
132 |       );
133 |       tempFiles.push(tempVideoPath);
134 |       tempFiles.push(tempWavPath, tempMp3Path);
135 | 
136 |       await this.ffmpeg.saveNormalizedAudio(audioStream, tempWavPath);
137 |       const captions = await this.whisper.CreateCaption(tempWavPath);
138 | 
139 |       await this.ffmpeg.saveToMp3(audioStream, tempMp3Path);
140 |       const video = await this.pexelsApi.findVideo(
141 |         scene.searchTerms,
142 |         audioLength,
143 |         excludeVideoIds,
144 |         orientation,
145 |       );
146 | 
147 |       logger.debug(`Downloading video from ${video.url} to ${tempVideoPath}`);
148 | 
149 |       await new Promise<void>((resolve, reject) => {
150 |         const fileStream = fs.createWriteStream(tempVideoPath);
151 |         https
152 |           .get(video.url, (response: http.IncomingMessage) => {
153 |             if (response.statusCode !== 200) {
154 |               reject(
155 |                 new Error(`Failed to download video: ${response.statusCode}`),
156 |               );
157 |               return;
158 |             }
159 | 
160 |             response.pipe(fileStream);
161 | 
162 |             fileStream.on("finish", () => {
163 |               fileStream.close();
164 |               logger.debug(`Video downloaded successfully to ${tempVideoPath}`);
165 |               resolve();
166 |             });
167 |           })
168 |           .on("error", (err: Error) => {
169 |             fs.unlink(tempVideoPath, () => {}); // Delete the file if download failed
170 |             logger.error(err, "Error downloading video:");
171 |             reject(err);
172 |           });
173 |       });
174 | 
175 |       excludeVideoIds.push(video.id);
176 | 
177 |       scenes.push({
178 |         captions,
179 |         video: `http://localhost:${this.config.port}/api/tmp/${tempVideoFileName}`,
180 |         audio: {
181 |           url: `http://localhost:${this.config.port}/api/tmp/${tempMp3FileName}`,
182 |           duration: audioLength,
183 |         },
184 |       });
185 | 
186 |       totalDuration += audioLength;
187 |       index++;
188 |     }
189 |     if (config.paddingBack) {
190 |       totalDuration += config.paddingBack / 1000;
191 |     }
192 | 
193 |     const selectedMusic = this.findMusic(totalDuration, config.music);
194 |     logger.debug({ selectedMusic }, "Selected music for the video");
195 | 
196 |     await this.remotion.render(
197 |       {
198 |         music: selectedMusic,
199 |         scenes,
200 |         config: {
201 |           durationMs: totalDuration * 1000,
202 |           paddingBack: config.paddingBack,
203 |           ...{
204 |             captionBackgroundColor: config.captionBackgroundColor,
205 |             captionPosition: config.captionPosition,
206 |           },
207 |           musicVolume: config.musicVolume,
208 |         },
209 |       },
210 |       videoId,
211 |       orientation,
212 |     );
213 | 
214 |     for (const file of tempFiles) {
215 |       fs.removeSync(file);
216 |     }
217 | 
218 |     return videoId;
219 |   }
220 | 
221 |   public getVideoPath(videoId: string): string {
222 |     return path.join(this.config.videosDirPath, `${videoId}.mp4`);
223 |   }
224 | 
225 |   public deleteVideo(videoId: string): void {
226 |     const videoPath = this.getVideoPath(videoId);
227 |     fs.removeSync(videoPath);
228 |     logger.debug({ videoId }, "Deleted video file");
229 |   }
230 | 
231 |   public getVideo(videoId: string): Buffer {
232 |     const videoPath = this.getVideoPath(videoId);
233 |     if (!fs.existsSync(videoPath)) {
234 |       throw new Error(`Video ${videoId} not found`);
235 |     }
236 |     return fs.readFileSync(videoPath);
237 |   }
238 | 
239 |   private findMusic(videoDuration: number, tag?: MusicMoodEnum): MusicForVideo {
240 |     const musicFiles = this.musicManager.musicList().filter((music) => {
241 |       if (tag) {
242 |         return music.mood === tag;
243 |       }
244 |       return true;
245 |     });
246 |     return musicFiles[Math.floor(Math.random() * musicFiles.length)];
247 |   }
248 | 
249 |   public ListAvailableMusicTags(): MusicTag[] {
250 |     const tags = new Set<MusicTag>();
251 |     this.musicManager.musicList().forEach((music) => {
252 |       tags.add(music.mood as MusicTag);
253 |     });
254 |     return Array.from(tags.values());
255 |   }
256 | 
257 |   public listAllVideos(): { id: string; status: VideoStatus }[] {
258 |     const videos: { id: string; status: VideoStatus }[] = [];
259 | 
260 |     // Check if videos directory exists
261 |     if (!fs.existsSync(this.config.videosDirPath)) {
262 |       return videos;
263 |     }
264 | 
265 |     // Read all files in the videos directory
266 |     const files = fs.readdirSync(this.config.videosDirPath);
267 | 
268 |     // Filter for MP4 files and extract video IDs
269 |     for (const file of files) {
270 |       if (file.endsWith(".mp4")) {
271 |         const videoId = file.replace(".mp4", "");
272 | 
273 |         let status: VideoStatus = "ready";
274 |         const inQueue = this.queue.find((item) => item.id === videoId);
275 |         if (inQueue) {
276 |           status = "processing";
277 |         }
278 | 
279 |         videos.push({ id: videoId, status });
280 |       }
281 |     }
282 | 
283 |     // Add videos that are in the queue but not yet rendered
284 |     for (const queueItem of this.queue) {
285 |       const existingVideo = videos.find((v) => v.id === queueItem.id);
286 |       if (!existingVideo) {
287 |         videos.push({ id: queueItem.id, status: "processing" });
288 |       }
289 |     }
290 | 
291 |     return videos;
292 |   }
293 | 
294 |   public ListAvailableVoices(): string[] {
295 |     return this.kokoro.listAvailableVoices();
296 |   }
297 | }
298 | 
```