This is page 1 of 3. Use http://codebase.md/gyoridavid/short-video-maker?lines=true&page={x} to view the full context. # Directory Structure ``` ├── __mocks__ │ └── pexels-response.json ├── .dockerignore ├── .editorconfig ├── .env.example ├── .gitignore ├── .prettierrc ├── CONTRIBUTING.md ├── docker-compose.yml ├── eslint.config.mjs ├── LICENSE ├── main-cuda.Dockerfile ├── main-tiny.Dockerfile ├── main.Dockerfile ├── package.json ├── pnpm-lock.yaml ├── postcss.config.js ├── postcss.config.mjs ├── README.md ├── remotion.config.ts ├── rest.http ├── src │ ├── components │ │ ├── root │ │ │ ├── index.ts │ │ │ └── Root.tsx │ │ ├── types.ts │ │ ├── utils.ts │ │ └── videos │ │ ├── LandscapeVideo.tsx │ │ ├── PortraitVideo.tsx │ │ └── Test.tsx │ ├── config.ts │ ├── index.ts │ ├── logger.ts │ ├── scripts │ │ ├── install.ts │ │ └── normalizeMusic.ts │ ├── server │ │ ├── routers │ │ │ ├── mcp.ts │ │ │ └── rest.ts │ │ ├── server.ts │ │ └── validator.ts │ ├── short-creator │ │ ├── libraries │ │ │ ├── FFmpeg.ts │ │ │ ├── Kokoro.ts │ │ │ ├── Pexels.test.ts │ │ │ ├── Pexels.ts │ │ │ ├── Remotion.ts │ │ │ └── Whisper.ts │ │ ├── music.ts │ │ ├── ShortCreator.test.ts │ │ └── ShortCreator.ts │ ├── types │ │ └── shorts.ts │ └── ui │ ├── App.tsx │ ├── components │ │ └── Layout.tsx │ ├── index.html │ ├── index.tsx │ ├── pages │ │ ├── VideoCreator.tsx │ │ ├── VideoDetails.tsx │ │ └── VideoList.tsx │ ├── public │ │ └── index.html │ └── styles │ └── index.css ├── static │ └── music │ ├── Aurora on the Boulevard - National Sweetheart.mp3 │ ├── Baby Animals Playing - Joel Cummins.mp3 │ ├── Banjo Doops - Joel Cummins.mp3 │ ├── Buckle Up - Jeremy Korpas.mp3 │ ├── Cafecito por la Manana - Cumbia Deli.mp3 │ ├── Champion - Telecasted.mp3 │ ├── Crystaline - Quincas Moreira.mp3 │ ├── Curse of the Witches - Jimena Contreras.mp3 │ ├── Delayed Baggage - Ryan Stasik.mp3 │ ├── Final Soliloquy - Asher Fulero.mp3 │ ├── Heartbeat Of The Wind - Asher Fulero.mp3 │ ├── Honey, I Dismembered The Kids - Ezra Lipp.mp3 │ ├── Hopeful - Nat Keefe.mp3 │ ├── Hopeful Freedom - Asher Fulero.mp3 │ ├── Hopeless - Jimena Contreras.mp3 │ ├── Jetski - Telecasted.mp3 │ ├── Like It Loud - Dyalla.mp3 │ ├── Name The Time And Place - Telecasted.mp3 │ ├── Night Hunt - Jimena Contreras.mp3 │ ├── No.2 Remembering Her - Esther Abrami.mp3 │ ├── Oh Please - Telecasted.mp3 │ ├── On The Hunt - Andrew Langdon.mp3 │ ├── Organic Guitar House - Dyalla.mp3 │ ├── Phantom - Density & Time.mp3 │ ├── README.md │ ├── Restless Heart - Jimena Contreras.mp3 │ ├── Seagull - Telecasted.mp3 │ ├── Sinister - Anno Domini Beats.mp3 │ ├── Sly Sky - Telecasted.mp3 │ ├── Touch - Anno Domini Beats.mp3 │ ├── Traversing - Godmode.mp3 │ └── Twin Engines - Jeremy Korpas.mp3 ├── tailwind.config.js ├── tsconfig.build.json ├── tsconfig.json ├── vite.config.ts └── vitest.config.ts ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | node_modules 2 | dist 3 | .DS_Store 4 | .env 5 | ``` -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- ``` 1 | node_modules 2 | .git 3 | .gitignore 4 | *.md 5 | dist 6 | ``` -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- ``` 1 | { 2 | "useTabs": false, 3 | "bracketSpacing": true, 4 | "tabWidth": 2 5 | } 6 | 7 | ``` -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- ``` 1 | root = true 2 | 3 | [*] 4 | end_of_line = crlf 5 | charset = utf-8 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 2 10 | ``` -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- ``` 1 | PEXELS_API_KEY= # crucial for the project to work 2 | LOG_LEVEL=trace # trace, debug, info, warn, error, fatal, silent 3 | WHISPER_VERBOSE=true 4 | PORT=3123 5 | DEV=true # local development mode 6 | DATA_DIR_PATH= # only for docker, otherwise leave empty 7 | ``` -------------------------------------------------------------------------------- /static/music/README.md: -------------------------------------------------------------------------------- ```markdown 1 | # Music Library for Shorts Creator 2 | 3 | This directory contains background music tracks for use in the shorts creator project. All music files are sourced from the YouTube audio library, and are free to use under their license. You can use this audio track in any of your videos, including videos that you monetize. No attribution is required. 4 | 5 | ## Music Collection 6 | 7 | The music is categorized by mood to match the `MusicMoodEnum` in the project: 8 | 9 | ## Mood Categories 10 | 11 | The following moods are defined in the project's `MusicMoodEnum`: 12 | 13 | - sad 14 | - melancholic 15 | - happy 16 | - euphoric/high 17 | - excited 18 | - chill 19 | - uneasy 20 | - angry 21 | - dark 22 | - hopeful 23 | - contemplative 24 | - funny/quirky 25 | 26 | ## How to Add New Music 27 | 28 | To add new music to the project: 29 | 30 | 1. Add your MP3 file to this directory (`static/music/`) 31 | 2. Update the `src/short-creator/music.ts` file by adding a new record to the `musicList` array: 32 | 33 | ```typescript 34 | { 35 | file: "your-new-music-file.mp3", // Filename of your MP3 36 | start: 5, // Start time in seconds (when to begin playing) 37 | end: 30, // End time in seconds (when to stop playing) 38 | mood: MusicMoodEnum.happy, // Mood tag for the music 39 | } 40 | ``` 41 | 42 | ## Usage 43 | 44 | The shorts creator uses these mood tags to filter and match appropriate music with video content. Choose tags carefully to ensure proper matching between music mood and video content. 45 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | ## [📚 Join our Skool community for support, premium content and more!](https://www.skool.com/ai-agents-az/about?s1m) 2 | 3 | ### Be part of a growing community and help us create more content like this 4 | 5 | # Description 6 | 7 | An open source automated video creation tool for generating short-form video content. Short Video Maker combines text-to-speech, automatic captions, background videos, and music to create engaging short videos from simple text inputs. 8 | 9 | This project is meant to provide a free alternative to heavy GPU-power hungry video generation (and a free alternative to expensive, third-party API calls). It doesn't generate a video from scratch based on an image or an image prompt. 10 | 11 | The repository was open-sourced by the [AI Agents A-Z Youtube Channel](https://www.youtube.com/channel/UCloXqLhp_KGhHBe1kwaL2Tg). We encourage you to check out the channel for more AI-related content and tutorials. 12 | 13 | The server exposes an [MCP](https://github.com/modelcontextprotocol) and a REST server. 14 | 15 | While the MCP server can be used with an AI Agent (like n8n) the REST endpoints provide more flexibility for video generation. 16 | 17 | You can find example n8n workflows created with the REST/MCP server [in this repository](https://github.com/gyoridavid/ai_agents_az/tree/main/episode_7). 18 | 19 | # TOC 20 | 21 | ## Getting started 22 | 23 | - [Requirements](#general-requirements) 24 | - [How to run the server](#getting-started-1) 25 | - [Web UI](#web-ui) 26 | - [Tutorial](#tutorial-with-n8n) 27 | - [Examples](#examples) 28 | 29 | ## Usage 30 | 31 | - [Environment variables](#environment-variables) 32 | - [REST API](#rest-api) 33 | - [Configuration options](#configuration-options) 34 | - [MCP](#mcp-server) 35 | 36 | ## Info 37 | 38 | - [Features](#features) 39 | - [How it works](#how-it-works) 40 | - [Limitations](#limitations) 41 | - [Concepts](#concepts) 42 | - [Troubleshooting](#troubleshooting) 43 | - [Deploying in the cloud](#deploying-to-the-cloud) 44 | - [FAQ](#faq) 45 | - [Dependencies](#dependencies-for-the-video-generation) 46 | - [Contributing](#how-to-contribute) 47 | - [License](#license) 48 | - [Acknowledgements](#acknowledgments) 49 | 50 | # Tutorial with n8n 51 | 52 | [](https://www.youtube.com/watch?v=jzsQpn-AciM) 53 | 54 | # Examples 55 | 56 | <table> 57 | <tr> 58 | <td> 59 | <video src="https://github.com/user-attachments/assets/1b488e7d-1b40-439d-8767-6ab51dbc0922" width="480" height="270"></video> 60 | </td> 61 | <td> 62 | <video src="https://github.com/user-attachments/assets/bb7ce80f-e6e1-44e5-ba4e-9b13d917f55b" width="270" height="480"></video> 63 | </td> 64 | <td> 65 | </tr> 66 | </table> 67 | 68 | # Features 69 | 70 | - Generate complete short videos from text prompts 71 | - Text-to-speech conversion 72 | - Automatic caption generation and styling 73 | - Background video search and selection via Pexels 74 | - Background music with genre/mood selection 75 | - Serve as both REST API and Model Context Protocol (MCP) server 76 | 77 | # How It Works 78 | 79 | Shorts Creator takes simple text inputs and search terms, then: 80 | 81 | 1. Converts text to speech using Kokoro TTS 82 | 2. Generates accurate captions via Whisper 83 | 3. Finds relevant background videos from Pexels 84 | 4. Composes all elements with Remotion 85 | 5. Renders a professional-looking short video with perfectly timed captions 86 | 87 | # Limitations 88 | 89 | - The project only capable generating videos with English voiceover (kokoro-js doesn’t support other languages at the moment) 90 | - The background videos are sourced from Pexels 91 | 92 | # General Requirements 93 | 94 | - internet 95 | - free pexels api key 96 | - ≥ 3 gb free RAM, my recommendation is 4gb RAM 97 | - ≥ 2 vCPU 98 | - ≥ 5gb disc space 99 | 100 | 101 | # Concepts 102 | 103 | ## Scene 104 | 105 | Each video is assembled from multiple scenes. These scenes consists of 106 | 107 | 1. Text: Narration, the text the TTS will read and create captions from. 108 | 2. Search terms: The keywords the server should use to find videos from Pexels API. If none can be found, joker terms are being used (`nature`, `globe`, `space`, `ocean`) 109 | 110 | # Getting started 111 | 112 | ## Docker (recommended) 113 | 114 | There are three docker images, for three different use cases. Generally speaking, most of the time you want to spin up the `tiny` one. 115 | 116 | ### Tiny 117 | 118 | - Uses the `tiny.en` whisper.cpp model 119 | - Uses the `q4` quantized kokoro model 120 | - `CONCURRENCY=1` to overcome OOM errors coming from Remotion with limited resources 121 | - `VIDEO_CACHE_SIZE_IN_BYTES=2097152000` (2gb) to overcome OOM errors coming from Remotion with limited resources 122 | 123 | ```jsx 124 | docker run -it --rm --name short-video-maker -p 3123:3123 -e LOG_LEVEL=debug -e PEXELS_API_KEY= gyoridavid/short-video-maker:latest-tiny 125 | ``` 126 | 127 | ### Normal 128 | 129 | - Uses the `base.en` whisper.cpp model 130 | - Uses the `fp32` kokoro model 131 | - `CONCURRENCY=1` to overcome OOM errors coming from Remotion with limited resources 132 | - `VIDEO_CACHE_SIZE_IN_BYTES=2097152000` (2gb) to overcome OOM errors coming from Remotion with limited resources 133 | 134 | ```jsx 135 | docker run -it --rm --name short-video-maker -p 3123:3123 -e LOG_LEVEL=debug -e PEXELS_API_KEY= gyoridavid/short-video-maker:latest 136 | ``` 137 | 138 | ### Cuda 139 | 140 | If you own an Nvidia GPU and you want use a larger whisper model with GPU acceleration, you can use the CUDA optimised Docker image. 141 | 142 | - Uses the `medium.en` whisper.cpp model (with GPU acceleration) 143 | - Uses `fp32` kokoro model 144 | - `CONCURRENCY=1` to overcome OOM errors coming from Remotion with limited resources 145 | - `VIDEO_CACHE_SIZE_IN_BYTES=2097152000` (2gb) to overcome OOM errors coming from Remotion with limited resources 146 | 147 | ```jsx 148 | docker run -it --rm --name short-video-maker -p 3123:3123 -e LOG_LEVEL=debug -e PEXELS_API_KEY= --gpus=all gyoridavid/short-video-maker:latest-cuda 149 | ``` 150 | 151 | ## Docker compose 152 | 153 | You might use Docker Compose to run n8n or other services, and you want to combine them. Make sure you add the shared network to the service configuration. 154 | 155 | ```bash 156 | version: "3" 157 | 158 | services: 159 | short-video-maker: 160 | image: gyoridavid/short-video-maker:latest-tiny 161 | environment: 162 | - LOG_LEVEL=debug 163 | - PEXELS_API_KEY= 164 | ports: 165 | - "3123:3123" 166 | volumes: 167 | - ./videos:/app/data/videos # expose the generated videos 168 | 169 | ``` 170 | 171 | If you are using the [Self-hosted AI starter kit](https://github.com/n8n-io/self-hosted-ai-starter-kit) you want to add `networks: ['demo']` to the\*\* `short-video-maker` service so you can reach it with http://short-video-maker:3123 in n8n. 172 | 173 | # NPM 174 | 175 | While Docker is the recommended way to run the project, you can run it with npm or npx. 176 | On top of the general requirements, the following are necessary to run the server. 177 | 178 | ## Supported platforms 179 | 180 | - Ubuntu ≥ 22.04 (libc 2.5 for Whisper.cpp) 181 | - Required packages: `git wget cmake ffmpeg curl make libsdl2-dev libnss3 libdbus-1-3 libatk1.0-0 libgbm-dev libasound2 libxrandr2 libxkbcommon-dev libxfixes3 libxcomposite1 libxdamage1 libatk-bridge2.0-0 libpango-1.0-0 libcairo2 libcups2` 182 | - Mac OS 183 | - ffmpeg (`brew install ffmpeg`) 184 | - node.js (tested on 22+) 185 | 186 | Windows is **NOT** supported at the moment (whisper.cpp installation fails occasionally). 187 | 188 | # Web UI 189 | 190 | @mushitori made a Web UI to generate the videos from your browser. 191 | 192 | <table> 193 | <tr> 194 | <td> 195 | <img width="1088" alt="Screenshot 2025-05-12 at 1 45 11 PM" src="https://github.com/user-attachments/assets/2ab64aea-f639-41b0-bd19-2fcf73bb1a3d" /> 196 | </td> 197 | <td> 198 | <img width="1075" alt="Screenshot 2025-05-12 at 1 45 44 PM" src="https://github.com/user-attachments/assets/0ff568fe-ddcb-4dad-ae62-2640290aef1e" /> 199 | </td> 200 | <td> 201 | <img width="1083" alt="Screenshot 2025-05-12 at 1 45 51 PM" src="https://github.com/user-attachments/assets/d3c1c826-3cb3-4313-b17c-605ff612fb63" /> 202 | </td> 203 | <td> 204 | <img width="1070" alt="Screenshot 2025-05-12 at 1 46 42 PM" src="https://github.com/user-attachments/assets/18edb1a0-9fc2-48b3-8896-e919e7dc57ff" /> 205 | </td> 206 | </tr> 207 | </table> 208 | 209 | You can load it on http://localhost:3123 210 | 211 | # Environment variables 212 | 213 | ## 🟢 Configuration 214 | 215 | | key | description | default | 216 | | --------------- | --------------------------------------------------------------- | ------- | 217 | | PEXELS_API_KEY | [your (free) Pexels API key](https://www.pexels.com/api/) | | 218 | | LOG_LEVEL | pino log level | info | 219 | | WHISPER_VERBOSE | whether the output of whisper.cpp should be forwarded to stdout | false | 220 | | PORT | the port the server will listen on | 3123 | 221 | 222 | ## ⚙️ System configuration 223 | 224 | | key | description | default | 225 | | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------- | 226 | | KOKORO_MODEL_PRECISION | The size of the Kokoro model to use. Valid options are `fp32`, `fp16`, `q8`, `q4`, `q4f16` | depends, see the descriptions of the docker images above ^^ | 227 | | CONCURRENCY | [concurrency refers to how many browser tabs are opened in parallel during a render. Each Chrome tab renders web content and then screenshots it.](https://www.remotion.dev/docs/terminology/concurrency). Tweaking this value helps with running the project with limited resources. | depends, see the descriptions of the docker images above ^^ | 228 | | VIDEO_CACHE_SIZE_IN_BYTES | Cache for [<OffthreadVideo>](https://remotion.dev/docs/offthreadvideo) frames in Remotion. Tweaking this value helps with running the project with limited resources. | depends, see the descriptions of the docker images above ^^ | 229 | 230 | ## ⚠️ Danger zone 231 | 232 | | key | description | default | 233 | | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | 234 | | WHISPER_MODEL | Which whisper.cpp model to use. Valid options are `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large-v1`, `large-v2`, `large-v3`, `large-v3-turbo` | Depends, see the descriptions of the docker images above. For npm, the default option is `medium.en` | 235 | | DATA_DIR_PATH | the data directory of the project | `~/.ai-agents-az-video-generator` with npm, `/app/data` in the Docker images | 236 | | DOCKER | whether the project is running in a Docker container | `true` for the docker images, otherwise `false` | 237 | | DEV | guess! :) | `false` | 238 | 239 | # Configuration options 240 | 241 | | key | description | default | 242 | | ---------------------- | -------------------------------------------------------------------------------------------------------------- | ---------- | 243 | | paddingBack | The end screen, for how long the video should keep playing after the narration has finished (in milliseconds). | 0 | 244 | | music | The mood of the background music. Get the available options from the GET `/api/music-tags` endpoint. | random | 245 | | captionPosition | The position where the captions should be rendered. Possible options: `top`, `center`, `bottom`. Default value | `bottom` | 246 | | captionBackgroundColor | The background color of the active caption item. | `blue` | 247 | | voice | The Kokoro voice. | `af_heart` | 248 | | orientation | The video orientation. Possible options are `portrait` and `landscape` | `portrait` | 249 | | musicVolume | Set the volume of the background music. Possible options are `low` `medium` `high` and `muted` | `high` | 250 | 251 | # Usage 252 | 253 | ## MCP server 254 | 255 | ## Server URLs 256 | 257 | `/mcp/sse` 258 | 259 | `/mcp/messages` 260 | 261 | ## Available tools 262 | 263 | - `create-short-video` Creates a short video - the LLM will figure out the right configuration. If you want to use specific configuration, you need to specify those in you prompt. 264 | - `get-video-status` Somewhat useless, it’s meant for checking the status of the video, but since the AI agents aren’t really good with the concept of time, you’ll probably will end up using the REST API for that anyway. 265 | 266 | # REST API 267 | 268 | ### GET `/health` 269 | 270 | Healthcheck endpoint 271 | 272 | ```bash 273 | curl --location 'localhost:3123/health' 274 | ``` 275 | 276 | ```bash 277 | { 278 | "status": "ok" 279 | } 280 | ``` 281 | 282 | ### POST `/api/short-video` 283 | 284 | ```bash 285 | curl --location 'localhost:3123/api/short-video' \ 286 | --header 'Content-Type: application/json' \ 287 | --data '{ 288 | "scenes": [ 289 | { 290 | "text": "Hello world!", 291 | "searchTerms": ["river"] 292 | } 293 | ], 294 | "config": { 295 | "paddingBack": 1500, 296 | "music": "chill" 297 | } 298 | }' 299 | ``` 300 | 301 | ```bash 302 | { 303 | "videoId": "cma9sjly700020jo25vwzfnv9" 304 | } 305 | ``` 306 | 307 | ### GET `/api/short-video/{id}/status` 308 | 309 | ```bash 310 | curl --location 'localhost:3123/api/short-video/cm9ekme790000hysi5h4odlt1/status' 311 | ``` 312 | 313 | ```bash 314 | { 315 | "status": "ready" 316 | } 317 | ``` 318 | 319 | ### GET `/api/short-video/{id}` 320 | 321 | ```bash 322 | curl --location 'localhost:3123/api/short-video/cm9ekme790000hysi5h4odlt1' 323 | ``` 324 | 325 | Response: the binary data of the video. 326 | 327 | ### GET `/api/short-videos` 328 | 329 | ```bash 330 | curl --location 'localhost:3123/api/short-videos' 331 | ``` 332 | 333 | ```bash 334 | { 335 | "videos": [ 336 | { 337 | "id": "cma9wcwfc0000brsi60ur4lib", 338 | "status": "processing" 339 | } 340 | ] 341 | } 342 | ``` 343 | 344 | ### DELETE `/api/short-video/{id}` 345 | 346 | ```bash 347 | curl --location --request DELETE 'localhost:3123/api/short-video/cma9wcwfc0000brsi60ur4lib' 348 | ``` 349 | 350 | ```bash 351 | { 352 | "success": true 353 | } 354 | ``` 355 | 356 | ### GET `/api/voices` 357 | 358 | ```bash 359 | curl --location 'localhost:3123/api/voices' 360 | ``` 361 | 362 | ```bash 363 | [ 364 | "af_heart", 365 | "af_alloy", 366 | "af_aoede", 367 | "af_bella", 368 | "af_jessica", 369 | "af_kore", 370 | "af_nicole", 371 | "af_nova", 372 | "af_river", 373 | "af_sarah", 374 | "af_sky", 375 | "am_adam", 376 | "am_echo", 377 | "am_eric", 378 | "am_fenrir", 379 | "am_liam", 380 | "am_michael", 381 | "am_onyx", 382 | "am_puck", 383 | "am_santa", 384 | "bf_emma", 385 | "bf_isabella", 386 | "bm_george", 387 | "bm_lewis", 388 | "bf_alice", 389 | "bf_lily", 390 | "bm_daniel", 391 | "bm_fable" 392 | ] 393 | ``` 394 | 395 | ### GET `/api/music-tags` 396 | 397 | ```bash 398 | curl --location 'localhost:3123/api/music-tags' 399 | ``` 400 | 401 | ```bash 402 | [ 403 | "sad", 404 | "melancholic", 405 | "happy", 406 | "euphoric/high", 407 | "excited", 408 | "chill", 409 | "uneasy", 410 | "angry", 411 | "dark", 412 | "hopeful", 413 | "contemplative", 414 | "funny/quirky" 415 | ] 416 | ``` 417 | 418 | # Troubleshooting 419 | 420 | ## Docker 421 | 422 | The server needs at least 3gb free memory. Make sure to allocate enough RAM to Docker. 423 | 424 | If you are running the server from Windows and via wsl2, you need to set the resource limits from the [wsl utility 2](https://learn.microsoft.com/en-us/windows/wsl/wsl-config#configure-global-options-with-wslconfig) - otherwise set it from Docker Desktop. (Ubuntu is not restricting the resources unless specified with the run command). 425 | 426 | ## NPM 427 | 428 | Make sure all the necessary packages are installed. 429 | 430 | # n8n 431 | 432 | Setting up the MCP (or REST) server depends on how you run n8n and the server. Please follow the examples from the matrix below. 433 | 434 | | | n8n is running locally, using `n8n start` | n8n is running locally using Docker | n8n is running in the cloud | 435 | | ------------------------------------------------- | ------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------ | 436 | | `short-video-maker` is running in Docker, locally | `http://localhost:3123` | It depends. You can technically use `http://host.docker.internal:3123` as it points to the host, but you could configure to use the same network and use the service name to communicate like `http://short-video-maker:3123` | won’t work - deploy `short-video-maker` to the cloud | 437 | | `short-video-maker` is running with npm/npx | `http://localhost:3123` | `http://host.docker.internal:3123` | won’t work - deploy `short-video-maker` to the cloud | 438 | | `short-video-maker` is running in the cloud | You should use your IP address `http://{YOUR_IP}:3123` | You should use your IP address `http://{YOUR_IP}:3123` | You should use your IP address `http://{YOUR_IP}:3123` | 439 | 440 | # Deploying to the cloud 441 | 442 | While each VPS provider is different, and it’s impossible to provide configuration to all of them, here are some tips. 443 | 444 | - Use Ubuntu ≥ 22.04 445 | - Have ≥ 4gb RAM, ≥ 2vCPUs and ≥5gb storage 446 | - Use [pm2](https://pm2.keymetrics.io/) to run/manage the server 447 | - Put the environment variables to the `.bashrc` file (or similar) 448 | 449 | # FAQ 450 | 451 | ## Can I use other languages? (French, German etc.) 452 | 453 | Unfortunately, it’s not possible at the moment. Kokoro-js only supports English. 454 | 455 | ## Can I pass in images and videos and can it stitch it together 456 | 457 | No 458 | 459 | ## Should I run the project with `npm` or `docker`? 460 | 461 | Docker is the recommended way to run the project. 462 | 463 | ## How much GPU is being used for the video generation? 464 | 465 | Honestly, not a lot - only whisper.cpp can be accelerated. 466 | 467 | Remotion is CPU-heavy, and [Kokoro-js](https://github.com/hexgrad/kokoro) runs on the CPU. 468 | 469 | ## Is there a UI that I can use to generate the videos 470 | 471 | No (t yet) 472 | 473 | ## Can I select different source for the videos than Pexels, or provide my own video 474 | 475 | No 476 | 477 | ## Can the project generate videos from images? 478 | 479 | No 480 | 481 | ## Dependencies for the video generation 482 | 483 | | Dependency | Version | License | Purpose | 484 | | ------------------------------------------------------ | -------- | --------------------------------------------------------------------------------- | ------------------------------- | 485 | | [Remotion](https://remotion.dev/) | ^4.0.286 | [Remotion License](https://github.com/remotion-dev/remotion/blob/main/LICENSE.md) | Video composition and rendering | 486 | | [Whisper CPP](https://github.com/ggml-org/whisper.cpp) | v1.5.5 | MIT | Speech-to-text for captions | 487 | | [FFmpeg](https://ffmpeg.org/) | ^2.1.3 | LGPL/GPL | Audio/video manipulation | 488 | | [Kokoro.js](https://www.npmjs.com/package/kokoro-js) | ^1.2.0 | MIT | Text-to-speech generation | 489 | | [Pexels API](https://www.pexels.com/api/) | N/A | [Pexels Terms](https://www.pexels.com/license/) | Background videos | 490 | 491 | ## How to contribute? 492 | 493 | PRs are welcome. 494 | See the [CONTRIBUTING.md](CONTRIBUTING.md) file for instructions on setting up a local development environment. 495 | 496 | ## License 497 | 498 | This project is licensed under the [MIT License](LICENSE). 499 | 500 | ## Acknowledgments 501 | 502 | - ❤️ [Remotion](https://remotion.dev/) for programmatic video generation 503 | - ❤️ [Whisper](https://github.com/ggml-org/whisper.cpp) for speech-to-text 504 | - ❤️ [Pexels](https://www.pexels.com/) for video content 505 | - ❤️ [FFmpeg](https://ffmpeg.org/) for audio/video processing 506 | - ❤️ [Kokoro](https://github.com/hexgrad/kokoro) for TTS 507 | ``` -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- ```markdown 1 | # Contributing to Shorts Creator 2 | 3 | ## How to setup the development environment 4 | 5 | 1. Clone the repository 6 | 7 | ```bash 8 | git clone [email protected]:gyoridavid/short-video-maker.git 9 | cd shorts-video-maker 10 | ``` 11 | 12 | 2. Install dependencies 13 | 14 | ```bash 15 | pnpm install 16 | ``` 17 | 18 | 3. Copy `.env.example` to `.env` and set the right environment variables. 19 | 20 | 4. Start the server 21 | ```bash 22 | pnpm dev 23 | ``` 24 | 25 | ## How to preview the videos and debug the rendering process 26 | 27 | You can use Remotion Studio to preview videos. Make sure to update the template if the underlying data structure changes. 28 | 29 | ```bash 30 | npx remotion studio 31 | ``` 32 | ``` -------------------------------------------------------------------------------- /postcss.config.mjs: -------------------------------------------------------------------------------- ``` 1 | export default { 2 | plugins: { 3 | "@tailwindcss/postcss": {}, 4 | }, 5 | }; 6 | ``` -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- ``` 1 | import { config } from "@remotion/eslint-config-flat"; 2 | 3 | export default config; 4 | ``` -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- ```javascript 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } ``` -------------------------------------------------------------------------------- /src/logger.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { logger } from "./config"; 2 | 3 | export default logger; 4 | export { logger }; 5 | ``` -------------------------------------------------------------------------------- /src/components/root/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { registerRoot } from "remotion"; 2 | import { RemotionRoot } from "./Root"; 3 | 4 | registerRoot(RemotionRoot); 5 | ``` -------------------------------------------------------------------------------- /vitest.config.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { defineConfig } from "vitest/config"; 2 | 3 | export default defineConfig({ 4 | test: { 5 | // ... 6 | }, 7 | }); 8 | ``` -------------------------------------------------------------------------------- /tsconfig.build.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "./dist" 5 | }, 6 | "include": ["src/**/*"], 7 | "exclude": ["**/*.test.ts", "src/ui"] 8 | } 9 | ``` -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- ```javascript 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: [ 4 | "./src/ui/**/*.{js,jsx,ts,tsx}", 5 | ], 6 | theme: { 7 | extend: {}, 8 | }, 9 | plugins: [], 10 | } ``` -------------------------------------------------------------------------------- /src/components/types.ts: -------------------------------------------------------------------------------- ```typescript 1 | export enum AvailableComponentsEnum { 2 | PortraitVideo = "ShortVideo", 3 | LandscapeVideo = "LandscapeVideo", 4 | } 5 | export type OrientationConfig = { 6 | width: number; 7 | height: number; 8 | component: AvailableComponentsEnum; 9 | }; 10 | ``` -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- ```yaml 1 | version: "3" 2 | 3 | services: 4 | short-creator: 5 | build: 6 | context: . 7 | dockerfile: main.Dockerfile 8 | env_file: 9 | - .env 10 | environment: 11 | - DEV=false 12 | ports: 13 | - "3123:3123" 14 | entrypoint: ["node", "dist/index.js"] 15 | ``` -------------------------------------------------------------------------------- /src/ui/index.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom/client'; 3 | import App from './App'; 4 | import './styles/index.css'; 5 | 6 | const root = ReactDOM.createRoot( 7 | document.getElementById('root') as HTMLElement 8 | ); 9 | 10 | root.render( 11 | <React.StrictMode> 12 | <App /> 13 | </React.StrictMode> 14 | ); ``` -------------------------------------------------------------------------------- /src/components/videos/Test.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import { AbsoluteFill, Sequence } from "remotion"; 2 | 3 | export const TestVideo: React.FC = () => { 4 | return ( 5 | <AbsoluteFill> 6 | <AbsoluteFill> 7 | <AbsoluteFill> 8 | <h1>Hello</h1> 9 | </AbsoluteFill> 10 | <Sequence from={10}> 11 | <h1 style={{ marginTop: "60px" }}>World</h1> 12 | </Sequence> 13 | </AbsoluteFill> 14 | </AbsoluteFill> 15 | ); 16 | }; 17 | ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "esModuleInterop": true, 7 | "strict": true, 8 | "skipLibCheck": true, 9 | "forceConsistentCasingInFileNames": true, 10 | "outDir": "dist", 11 | "rootDir": "src", 12 | "declaration": true, 13 | "jsx": "react-jsx" 14 | }, 15 | "exclude": [ 16 | "remotion.config.ts", 17 | "node_modules", 18 | "dist", 19 | "vitest.config.ts", 20 | "src/ui" 21 | ] 22 | } 23 | ``` -------------------------------------------------------------------------------- /src/ui/public/index.html: -------------------------------------------------------------------------------- ```html 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | <head> 4 | <meta charset="utf-8" /> 5 | <meta name="viewport" content="width=device-width, initial-scale=1" /> 6 | <meta name="theme-color" content="#000000" /> 7 | <meta 8 | name="description" 9 | content="Short Video Maker - Create amazing short videos" 10 | /> 11 | <title>Short Video Maker</title> 12 | </head> 13 | <body> 14 | <noscript>You need to enable JavaScript to run this app.</noscript> 15 | <div id="root"></div> 16 | </body> 17 | </html> ``` -------------------------------------------------------------------------------- /remotion.config.ts: -------------------------------------------------------------------------------- ```typescript 1 | // See all configuration options: https://remotion.dev/docs/config 2 | // Each option also is available as a CLI flag: https://remotion.dev/docs/cli 3 | 4 | // Note: When using the Node.JS APIs, the config file doesn't apply. Instead, pass options directly to the APIs 5 | 6 | import { Config } from "@remotion/cli/config"; 7 | 8 | Config.setVideoImageFormat("jpeg"); 9 | Config.setOverwriteOutput(true); 10 | Config.setPublicDir("static/music"); 11 | Config.setEntryPoint("src/components/root/index.ts"); 12 | ``` -------------------------------------------------------------------------------- /src/ui/index.html: -------------------------------------------------------------------------------- ```html 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | <head> 4 | <meta charset="utf-8" /> 5 | <meta name="viewport" content="width=device-width, initial-scale=1" /> 6 | <meta name="theme-color" content="#000000" /> 7 | <meta 8 | name="description" 9 | content="Short Video Maker - Create amazing short videos" 10 | /> 11 | <title>Short Video Maker</title> 12 | </head> 13 | <body> 14 | <noscript>You need to enable JavaScript to run this app.</noscript> 15 | <div id="root"></div> 16 | <script type="module" src="./index.tsx"></script> 17 | </body> 18 | </html> ``` -------------------------------------------------------------------------------- /src/ui/App.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import React from 'react'; 2 | import { BrowserRouter as Router, Routes, Route } from 'react-router-dom'; 3 | import VideoList from './pages/VideoList'; 4 | import VideoCreator from './pages/VideoCreator'; 5 | import VideoDetails from './pages/VideoDetails'; 6 | import Layout from './components/Layout'; 7 | 8 | const App: React.FC = () => { 9 | return ( 10 | <Router> 11 | <Layout> 12 | <Routes> 13 | <Route path="/" element={<VideoList />} /> 14 | <Route path="/create" element={<VideoCreator />} /> 15 | <Route path="/video/:videoId" element={<VideoDetails />} /> 16 | </Routes> 17 | </Layout> 18 | </Router> 19 | ); 20 | }; 21 | 22 | export default App; ``` -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { defineConfig } from 'vite'; 2 | import react from '@vitejs/plugin-react'; 3 | import path from 'path'; 4 | 5 | export default defineConfig({ 6 | plugins: [react()], 7 | root: 'src/ui', 8 | build: { 9 | outDir: path.resolve(__dirname, 'dist/ui'), 10 | emptyOutDir: true, 11 | rollupOptions: { 12 | input: { 13 | main: path.resolve(__dirname, 'src/ui/index.html'), 14 | }, 15 | }, 16 | }, 17 | resolve: { 18 | alias: { 19 | '@': path.resolve(__dirname, './src/ui'), 20 | }, 21 | }, 22 | server: { 23 | port: 3000, 24 | proxy: { 25 | '/api': { 26 | target: 'http://localhost:3123', 27 | changeOrigin: true, 28 | }, 29 | '/mcp': { 30 | target: 'http://localhost:3123', 31 | changeOrigin: true, 32 | }, 33 | }, 34 | }, 35 | }); ``` -------------------------------------------------------------------------------- /src/ui/styles/index.css: -------------------------------------------------------------------------------- ```css 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | /* Base styles */ 6 | body { 7 | margin: 0; 8 | padding: 0; 9 | font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif; 10 | -webkit-font-smoothing: antialiased; 11 | -moz-osx-font-smoothing: grayscale; 12 | } 13 | 14 | a { 15 | color: inherit; 16 | text-decoration: none; 17 | } 18 | 19 | /* Scrollbar styles */ 20 | ::-webkit-scrollbar { 21 | width: 8px; 22 | height: 8px; 23 | } 24 | 25 | ::-webkit-scrollbar-track { 26 | background: #f1f1f1; 27 | } 28 | 29 | ::-webkit-scrollbar-thumb { 30 | background: #888; 31 | border-radius: 4px; 32 | } 33 | 34 | ::-webkit-scrollbar-thumb:hover { 35 | background: #555; 36 | } 37 | 38 | /* Focus styles for accessibility */ 39 | :focus-visible { 40 | outline: 2px solid #1976d2; 41 | } 42 | 43 | /* Transitions */ 44 | .fade-enter { 45 | opacity: 0; 46 | } 47 | 48 | .fade-enter-active { 49 | opacity: 1; 50 | transition: opacity 300ms; 51 | } 52 | 53 | .fade-exit { 54 | opacity: 1; 55 | } 56 | 57 | .fade-exit-active { 58 | opacity: 0; 59 | transition: opacity 300ms; 60 | } ``` -------------------------------------------------------------------------------- /src/scripts/install.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { ensureBrowser } from "@remotion/renderer"; 2 | 3 | import { logger } from "../logger"; 4 | import { Kokoro } from "../short-creator/libraries/Kokoro"; 5 | import { MusicManager } from "../short-creator/music"; 6 | import { Config } from "../config"; 7 | import { Whisper } from "../short-creator/libraries/Whisper"; 8 | 9 | // runs in docker 10 | export async function install() { 11 | const config = new Config(); 12 | 13 | logger.info("Installing dependencies..."); 14 | logger.info("Installing Kokoro..."); 15 | await Kokoro.init(config.kokoroModelPrecision); 16 | logger.info("Installing browser shell..."); 17 | await ensureBrowser(); 18 | logger.info("Installing whisper.cpp"); 19 | await Whisper.init(config); 20 | logger.info("Installing dependencies complete"); 21 | 22 | logger.info("Ensuring the music files exist..."); 23 | const musicManager = new MusicManager(config); 24 | try { 25 | musicManager.ensureMusicFilesExist(); 26 | } catch (error: unknown) { 27 | logger.error(error, "Missing music files"); 28 | process.exit(1); 29 | } 30 | } 31 | 32 | install() 33 | .then(() => { 34 | logger.info("Installation complete"); 35 | }) 36 | .catch((error: unknown) => { 37 | logger.error(error, "Installation failed"); 38 | }); 39 | ``` -------------------------------------------------------------------------------- /src/server/validator.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { createShortInput, CreateShortInput } from "../types/shorts"; 2 | import { logger } from "../logger"; 3 | import { ZodError } from "zod"; 4 | 5 | export interface ValidationErrorResult { 6 | message: string; 7 | missingFields: Record<string, string>; 8 | } 9 | 10 | export function validateCreateShortInput(input: object): CreateShortInput { 11 | const validated = createShortInput.safeParse(input); 12 | logger.info({ validated }, "Validated input"); 13 | 14 | if (validated.success) { 15 | return validated.data; 16 | } 17 | 18 | // Process the validation errors 19 | const errorResult = formatZodError(validated.error); 20 | 21 | throw new Error( 22 | JSON.stringify({ 23 | message: errorResult.message, 24 | missingFields: errorResult.missingFields, 25 | }), 26 | ); 27 | } 28 | 29 | function formatZodError(error: ZodError): ValidationErrorResult { 30 | const missingFields: Record<string, string> = {}; 31 | 32 | // Extract all the errors into a human-readable format 33 | error.errors.forEach((err) => { 34 | const path = err.path.join("."); 35 | missingFields[path] = err.message; 36 | }); 37 | 38 | // Create a human-readable message 39 | const errorPaths = Object.keys(missingFields); 40 | let message = `Validation failed for ${errorPaths.length} field(s): `; 41 | message += errorPaths.join(", "); 42 | 43 | return { 44 | message, 45 | missingFields, 46 | }; 47 | } 48 | ``` -------------------------------------------------------------------------------- /src/short-creator/libraries/Pexels.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | process.env.LOG_LEVEL = "debug"; 2 | 3 | import nock from "nock"; 4 | import { PexelsAPI } from "./Pexels"; 5 | import { test, assert, expect } from "vitest"; 6 | import fs from "fs-extra"; 7 | import path from "path"; 8 | import { OrientationEnum } from "../../types/shorts"; 9 | 10 | test("test pexels", async () => { 11 | const mockResponse = fs.readFileSync( 12 | path.resolve("__mocks__/pexels-response.json"), 13 | "utf-8", 14 | ); 15 | nock("https://api.pexels.com") 16 | .get(/videos\/search/) 17 | .reply(200, mockResponse); 18 | const pexels = new PexelsAPI("asdf"); 19 | const video = await pexels.findVideo(["dog"], 2.4, []); 20 | console.log(video); 21 | assert.isObject(video, "Video should be an object"); 22 | }); 23 | 24 | test("should time out", async () => { 25 | nock("https://api.pexels.com") 26 | .get(/videos\/search/) 27 | .delay(1000) 28 | .times(30) 29 | .reply(200, {}); 30 | expect(async () => { 31 | const pexels = new PexelsAPI("asdf"); 32 | await pexels.findVideo(["dog"], 2.4, [], OrientationEnum.portrait, 100); 33 | }).rejects.toThrow( 34 | expect.objectContaining({ 35 | name: "TimeoutError", 36 | }), 37 | ); 38 | }); 39 | 40 | test("should retry 3 times", async () => { 41 | nock("https://api.pexels.com") 42 | .get(/videos\/search/) 43 | .delay(1000) 44 | .times(2) 45 | .reply(200, {}); 46 | const mockResponse = fs.readFileSync( 47 | path.resolve("__mocks__/pexels-response.json"), 48 | "utf-8", 49 | ); 50 | nock("https://api.pexels.com") 51 | .get(/videos\/search/) 52 | .reply(200, mockResponse); 53 | 54 | const pexels = new PexelsAPI("asdf"); 55 | const video = await pexels.findVideo(["dog"], 2.4, []); 56 | console.log(video); 57 | assert.isObject(video, "Video should be an object"); 58 | }); 59 | ``` -------------------------------------------------------------------------------- /src/scripts/normalizeMusic.ts: -------------------------------------------------------------------------------- ```typescript 1 | import ffmpeg from "fluent-ffmpeg"; 2 | import path from "path"; 3 | import("@ffmpeg-installer/ffmpeg"); 4 | import fs from "fs-extra"; 5 | 6 | import { logger } from "../logger"; 7 | import { MusicManager } from "../short-creator/music"; 8 | import { Config } from "../config"; 9 | 10 | async function normalize(inputPath: string, outputPath: string) { 11 | return new Promise((resolve, reject) => { 12 | ffmpeg() 13 | .input(inputPath) 14 | .audioCodec("libmp3lame") 15 | .audioBitrate(96) 16 | .audioChannels(2) 17 | .audioFrequency(44100) 18 | .audioFilter("loudnorm,volume=0.1") 19 | .toFormat("mp3") 20 | .on("error", (err) => { 21 | logger.error(err, "Error normalizing audio:"); 22 | reject(err); 23 | }) 24 | .save(outputPath) 25 | .on("end", () => { 26 | logger.debug("Audio normalization complete"); 27 | resolve(outputPath); 28 | }); 29 | }); 30 | } 31 | 32 | export async function normalizeMusic() { 33 | const config = new Config(); 34 | const musicManager = new MusicManager(config); 35 | try { 36 | musicManager.ensureMusicFilesExist(); 37 | } catch (error: unknown) { 38 | logger.error(error, "Missing music files"); 39 | process.exit(1); 40 | } 41 | const musicFiles = musicManager.musicList(); 42 | const normalizedDir = path.join(config.musicDirPath, "normalized"); 43 | fs.ensureDirSync(normalizedDir); 44 | for (const musicFile of musicFiles) { 45 | const inputPath = path.join(config.musicDirPath, musicFile.file); 46 | const outputPath = path.join(normalizedDir, musicFile.file); 47 | logger.debug({ inputPath, outputPath }, "Normalizing music file"); 48 | await normalize(inputPath, outputPath); 49 | } 50 | } 51 | 52 | normalizeMusic() 53 | .then(() => { 54 | logger.info( 55 | "Music normalization completed successfully - make sure to replace the original files with the normalized ones", 56 | ); 57 | }) 58 | .catch((error: unknown) => { 59 | logger.error(error, "Error normalizing music files"); 60 | }); 61 | ``` -------------------------------------------------------------------------------- /src/server/server.ts: -------------------------------------------------------------------------------- ```typescript 1 | import http from "http"; 2 | import express from "express"; 3 | import type { 4 | Request as ExpressRequest, 5 | Response as ExpressResponse, 6 | } from "express"; 7 | import path from "path"; 8 | import { ShortCreator } from "../short-creator/ShortCreator"; 9 | import { APIRouter } from "./routers/rest"; 10 | import { MCPRouter } from "./routers/mcp"; 11 | import { logger } from "../logger"; 12 | import { Config } from "../config"; 13 | 14 | export class Server { 15 | private app: express.Application; 16 | private config: Config; 17 | 18 | constructor(config: Config, shortCreator: ShortCreator) { 19 | this.config = config; 20 | this.app = express(); 21 | 22 | // add healthcheck endpoint 23 | this.app.get("/health", (req: ExpressRequest, res: ExpressResponse) => { 24 | res.status(200).json({ status: "ok" }); 25 | }); 26 | 27 | const apiRouter = new APIRouter(config, shortCreator); 28 | const mcpRouter = new MCPRouter(shortCreator); 29 | this.app.use("/api", apiRouter.router); 30 | this.app.use("/mcp", mcpRouter.router); 31 | 32 | // Serve static files from the UI build 33 | this.app.use(express.static(path.join(__dirname, "../../dist/ui"))); 34 | this.app.use( 35 | "/static", 36 | express.static(path.join(__dirname, "../../static")), 37 | ); 38 | 39 | // Serve the React app for all other routes (must be last) 40 | this.app.get("*", (req: ExpressRequest, res: ExpressResponse) => { 41 | res.sendFile(path.join(__dirname, "../../dist/ui/index.html")); 42 | }); 43 | } 44 | 45 | public start(): http.Server { 46 | const server = this.app.listen(this.config.port, () => { 47 | logger.info( 48 | { port: this.config.port, mcp: "/mcp", api: "/api" }, 49 | "MCP and API server is running", 50 | ); 51 | logger.info( 52 | `UI server is running on http://localhost:${this.config.port}`, 53 | ); 54 | }); 55 | 56 | server.on("error", (error: Error) => { 57 | logger.error(error, "Error starting server"); 58 | }); 59 | 60 | return server; 61 | } 62 | 63 | public getApp() { 64 | return this.app; 65 | } 66 | } 67 | ``` -------------------------------------------------------------------------------- /src/ui/components/Layout.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import React from 'react'; 2 | import { useNavigate } from 'react-router-dom'; 3 | import { 4 | AppBar, 5 | Box, 6 | Container, 7 | CssBaseline, 8 | Toolbar, 9 | Typography, 10 | Button, 11 | ThemeProvider, 12 | createTheme 13 | } from '@mui/material'; 14 | import VideoIcon from '@mui/icons-material/VideoLibrary'; 15 | import AddIcon from '@mui/icons-material/Add'; 16 | 17 | interface LayoutProps { 18 | children: React.ReactNode; 19 | } 20 | 21 | const theme = createTheme({ 22 | palette: { 23 | mode: 'light', 24 | primary: { 25 | main: '#1976d2', 26 | }, 27 | secondary: { 28 | main: '#f50057', 29 | }, 30 | }, 31 | typography: { 32 | fontFamily: '"Roboto", "Helvetica", "Arial", sans-serif', 33 | }, 34 | }); 35 | 36 | const Layout: React.FC<LayoutProps> = ({ children }) => { 37 | const navigate = useNavigate(); 38 | 39 | return ( 40 | <ThemeProvider theme={theme}> 41 | <CssBaseline /> 42 | <Box sx={{ display: 'flex', flexDirection: 'column', minHeight: '100vh' }}> 43 | <AppBar position="static"> 44 | <Toolbar> 45 | <VideoIcon sx={{ mr: 2 }} /> 46 | <Typography 47 | variant="h6" 48 | component="div" 49 | sx={{ flexGrow: 1, cursor: 'pointer' }} 50 | onClick={() => navigate('/')} 51 | > 52 | Short Video Maker 53 | </Typography> 54 | <Button 55 | color="inherit" 56 | startIcon={<AddIcon />} 57 | onClick={() => navigate('/create')} 58 | > 59 | Create Video 60 | </Button> 61 | </Toolbar> 62 | </AppBar> 63 | <Container component="main" sx={{ flexGrow: 1, py: 4 }}> 64 | {children} 65 | </Container> 66 | <Box 67 | component="footer" 68 | sx={{ 69 | py: 3, 70 | mt: 'auto', 71 | backgroundColor: (theme) => theme.palette.grey[200], 72 | textAlign: 'center' 73 | }} 74 | > 75 | <Typography variant="body2" color="text.secondary"> 76 | Short Video Maker © {new Date().getFullYear()} 77 | </Typography> 78 | </Box> 79 | </Box> 80 | </ThemeProvider> 81 | ); 82 | }; 83 | 84 | export default Layout; ``` -------------------------------------------------------------------------------- /src/short-creator/libraries/Kokoro.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { KokoroTTS, TextSplitterStream } from "kokoro-js"; 2 | import { 3 | VoiceEnum, 4 | type kokoroModelPrecision, 5 | type Voices, 6 | } from "../../types/shorts"; 7 | import { KOKORO_MODEL, logger } from "../../config"; 8 | 9 | export class Kokoro { 10 | constructor(private tts: KokoroTTS) {} 11 | 12 | async generate( 13 | text: string, 14 | voice: Voices, 15 | ): Promise<{ 16 | audio: ArrayBuffer; 17 | audioLength: number; 18 | }> { 19 | const splitter = new TextSplitterStream(); 20 | const stream = this.tts.stream(splitter, { 21 | voice, 22 | }); 23 | splitter.push(text); 24 | splitter.close(); 25 | 26 | const output = []; 27 | for await (const audio of stream) { 28 | output.push(audio); 29 | } 30 | 31 | const audioBuffers: ArrayBuffer[] = []; 32 | let audioLength = 0; 33 | for (const audio of output) { 34 | audioBuffers.push(audio.audio.toWav()); 35 | audioLength += audio.audio.audio.length / audio.audio.sampling_rate; 36 | } 37 | 38 | const mergedAudioBuffer = Kokoro.concatWavBuffers(audioBuffers); 39 | logger.debug({ text, voice, audioLength }, "Audio generated with Kokoro"); 40 | 41 | return { 42 | audio: mergedAudioBuffer, 43 | audioLength: audioLength, 44 | }; 45 | } 46 | 47 | static concatWavBuffers(buffers: ArrayBuffer[]): ArrayBuffer { 48 | const header = Buffer.from(buffers[0].slice(0, 44)); 49 | let totalDataLength = 0; 50 | 51 | const dataParts = buffers.map((buf) => { 52 | const b = Buffer.from(buf); 53 | const data = b.slice(44); 54 | totalDataLength += data.length; 55 | return data; 56 | }); 57 | 58 | header.writeUInt32LE(36 + totalDataLength, 4); 59 | header.writeUInt32LE(totalDataLength, 40); 60 | 61 | return Buffer.concat([header, ...dataParts]); 62 | } 63 | 64 | static async init(dtype: kokoroModelPrecision): Promise<Kokoro> { 65 | const tts = await KokoroTTS.from_pretrained(KOKORO_MODEL, { 66 | dtype, 67 | device: "cpu", // only "cpu" is supported in node 68 | }); 69 | 70 | return new Kokoro(tts); 71 | } 72 | 73 | listAvailableVoices(): Voices[] { 74 | const voices = Object.values(VoiceEnum) as Voices[]; 75 | return voices; 76 | } 77 | } 78 | ``` -------------------------------------------------------------------------------- /main.Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | FROM ubuntu:22.04 AS install-whisper 2 | ENV DEBIAN_FRONTEND=noninteractive 3 | RUN apt update 4 | # whisper install dependencies 5 | RUN apt install -y \ 6 | git \ 7 | build-essential \ 8 | wget \ 9 | cmake \ 10 | && apt-get clean \ 11 | && rm -rf /var/lib/apt/lists/* 12 | WORKDIR /whisper 13 | RUN git clone https://github.com/ggml-org/whisper.cpp.git . 14 | RUN git checkout v1.7.1 15 | RUN make 16 | WORKDIR /whisper/models 17 | RUN sh ./download-ggml-model.sh base.en 18 | 19 | FROM node:22-bookworm-slim AS base 20 | ENV DEBIAN_FRONTEND=noninteractive 21 | WORKDIR /app 22 | RUN apt update 23 | RUN apt install -y \ 24 | # whisper dependencies 25 | git \ 26 | wget \ 27 | cmake \ 28 | ffmpeg \ 29 | curl \ 30 | make \ 31 | libsdl2-dev \ 32 | # remotion dependencies 33 | libnss3 \ 34 | libdbus-1-3 \ 35 | libatk1.0-0 \ 36 | libgbm-dev \ 37 | libasound2 \ 38 | libxrandr2 \ 39 | libxkbcommon-dev \ 40 | libxfixes3 \ 41 | libxcomposite1 \ 42 | libxdamage1 \ 43 | libatk-bridge2.0-0 \ 44 | libpango-1.0-0 \ 45 | libcairo2 \ 46 | libcups2 \ 47 | && apt-get clean \ 48 | && rm -rf /var/lib/apt/lists/* 49 | # setup pnpm 50 | ENV PNPM_HOME="/pnpm" 51 | ENV PATH="$PNPM_HOME:$PATH" 52 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0 53 | RUN corepack enable 54 | 55 | FROM base AS prod-deps 56 | COPY package.json pnpm-lock.yaml* /app/ 57 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile 58 | RUN pnpm install --prefer-offline --no-cache --prod 59 | 60 | FROM prod-deps AS build 61 | COPY tsconfig.json /app 62 | COPY tsconfig.build.json /app 63 | COPY vite.config.ts /app 64 | COPY src /app/src 65 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile 66 | RUN pnpm build 67 | 68 | FROM base 69 | COPY static /app/static 70 | COPY --from=install-whisper /whisper /app/data/libs/whisper 71 | COPY --from=prod-deps /app/node_modules /app/node_modules 72 | COPY --from=build /app/dist /app/dist 73 | COPY package.json /app/ 74 | 75 | # app configuration via environment variables 76 | ENV DATA_DIR_PATH=/app/data 77 | ENV DOCKER=true 78 | ENV WHISPER_MODEL=base.en 79 | # number of chrome tabs to use for rendering 80 | ENV CONCURRENCY=1 81 | # video cache - 2000MB 82 | ENV VIDEO_CACHE_SIZE_IN_BYTES=2097152000 83 | 84 | # install kokoro, headless chrome and ensure music files are present 85 | RUN node dist/scripts/install.js 86 | 87 | CMD ["pnpm", "start"] 88 | ``` -------------------------------------------------------------------------------- /main-tiny.Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | FROM ubuntu:22.04 AS install-whisper 2 | ENV DEBIAN_FRONTEND=noninteractive 3 | RUN apt update 4 | # whisper install dependencies 5 | RUN apt install -y \ 6 | git \ 7 | build-essential \ 8 | wget \ 9 | cmake \ 10 | && apt-get clean \ 11 | && rm -rf /var/lib/apt/lists/* 12 | WORKDIR /whisper 13 | RUN git clone https://github.com/ggml-org/whisper.cpp.git . 14 | RUN git checkout v1.7.1 15 | RUN make 16 | WORKDIR /whisper/models 17 | RUN sh ./download-ggml-model.sh tiny.en 18 | 19 | FROM node:22-bookworm-slim AS base 20 | ENV DEBIAN_FRONTEND=noninteractive 21 | WORKDIR /app 22 | RUN apt update 23 | RUN apt install -y \ 24 | # whisper dependencies 25 | git \ 26 | wget \ 27 | cmake \ 28 | ffmpeg \ 29 | curl \ 30 | make \ 31 | libsdl2-dev \ 32 | # remotion dependencies 33 | libnss3 \ 34 | libdbus-1-3 \ 35 | libatk1.0-0 \ 36 | libgbm-dev \ 37 | libasound2 \ 38 | libxrandr2 \ 39 | libxkbcommon-dev \ 40 | libxfixes3 \ 41 | libxcomposite1 \ 42 | libxdamage1 \ 43 | libatk-bridge2.0-0 \ 44 | libpango-1.0-0 \ 45 | libcairo2 \ 46 | libcups2 \ 47 | && apt-get clean \ 48 | && rm -rf /var/lib/apt/lists/* 49 | # setup pnpm 50 | ENV PNPM_HOME="/pnpm" 51 | ENV PATH="$PNPM_HOME:$PATH" 52 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0 53 | RUN corepack enable 54 | 55 | FROM base AS prod-deps 56 | COPY package.json pnpm-lock.yaml* /app/ 57 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile 58 | RUN pnpm install --prefer-offline --no-cache --prod 59 | 60 | FROM prod-deps AS build 61 | COPY tsconfig.json /app 62 | COPY tsconfig.build.json /app 63 | COPY vite.config.ts /app 64 | COPY src /app/src 65 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile 66 | RUN pnpm build 67 | 68 | FROM base 69 | COPY static /app/static 70 | COPY --from=install-whisper /whisper /app/data/libs/whisper 71 | COPY --from=prod-deps /app/node_modules /app/node_modules 72 | COPY --from=build /app/dist /app/dist 73 | COPY package.json /app/ 74 | 75 | # app configuration via environment variables 76 | ENV DATA_DIR_PATH=/app/data 77 | ENV DOCKER=true 78 | ENV WHISPER_MODEL=tiny.en 79 | ENV KOKORO_MODEL_PRECISION=q4 80 | # number of chrome tabs to use for rendering 81 | ENV CONCURRENCY=1 82 | # video cache - 2000MB 83 | ENV VIDEO_CACHE_SIZE_IN_BYTES=2097152000 84 | 85 | # install kokoro, headless chrome and ensure music files are present 86 | RUN node dist/scripts/install.js 87 | 88 | CMD ["pnpm", "start"] 89 | ``` -------------------------------------------------------------------------------- /src/short-creator/libraries/FFmpeg.ts: -------------------------------------------------------------------------------- ```typescript 1 | import ffmpeg from "fluent-ffmpeg"; 2 | import { Readable } from "node:stream"; 3 | import { logger } from "../../logger"; 4 | 5 | export class FFMpeg { 6 | static async init(): Promise<FFMpeg> { 7 | return import("@ffmpeg-installer/ffmpeg").then((ffmpegInstaller) => { 8 | ffmpeg.setFfmpegPath(ffmpegInstaller.path); 9 | logger.info("FFmpeg path set to:", ffmpegInstaller.path); 10 | return new FFMpeg(); 11 | }); 12 | } 13 | 14 | async saveNormalizedAudio( 15 | audio: ArrayBuffer, 16 | outputPath: string, 17 | ): Promise<string> { 18 | logger.debug("Normalizing audio for Whisper"); 19 | const inputStream = new Readable(); 20 | inputStream.push(Buffer.from(audio)); 21 | inputStream.push(null); 22 | 23 | return new Promise((resolve, reject) => { 24 | ffmpeg() 25 | .input(inputStream) 26 | .audioCodec("pcm_s16le") 27 | .audioChannels(1) 28 | .audioFrequency(16000) 29 | .toFormat("wav") 30 | .on("end", () => { 31 | logger.debug("Audio normalization complete"); 32 | resolve(outputPath); 33 | }) 34 | .on("error", (error: unknown) => { 35 | logger.error(error, "Error normalizing audio:"); 36 | reject(error); 37 | }) 38 | .save(outputPath); 39 | }); 40 | } 41 | 42 | async createMp3DataUri(audio: ArrayBuffer): Promise<string> { 43 | const inputStream = new Readable(); 44 | inputStream.push(Buffer.from(audio)); 45 | inputStream.push(null); 46 | return new Promise((resolve, reject) => { 47 | const chunk: Buffer[] = []; 48 | 49 | ffmpeg() 50 | .input(inputStream) 51 | .audioCodec("libmp3lame") 52 | .audioBitrate(128) 53 | .audioChannels(2) 54 | .toFormat("mp3") 55 | .on("error", (err) => { 56 | reject(err); 57 | }) 58 | .pipe() 59 | .on("data", (data: Buffer) => { 60 | chunk.push(data); 61 | }) 62 | .on("end", () => { 63 | const buffer = Buffer.concat(chunk); 64 | resolve(`data:audio/mp3;base64,${buffer.toString("base64")}`); 65 | }) 66 | .on("error", (err) => { 67 | reject(err); 68 | }); 69 | }); 70 | } 71 | 72 | async saveToMp3(audio: ArrayBuffer, filePath: string): Promise<string> { 73 | const inputStream = new Readable(); 74 | inputStream.push(Buffer.from(audio)); 75 | inputStream.push(null); 76 | return new Promise((resolve, reject) => { 77 | ffmpeg() 78 | .input(inputStream) 79 | .audioCodec("libmp3lame") 80 | .audioBitrate(128) 81 | .audioChannels(2) 82 | .toFormat("mp3") 83 | .save(filePath) 84 | .on("end", () => { 85 | logger.debug("Audio conversion complete"); 86 | resolve(filePath); 87 | }) 88 | .on("error", (err) => { 89 | reject(err); 90 | }); 91 | }); 92 | } 93 | } 94 | ``` -------------------------------------------------------------------------------- /src/short-creator/libraries/Remotion.ts: -------------------------------------------------------------------------------- ```typescript 1 | import z from "zod"; 2 | import { bundle } from "@remotion/bundler"; 3 | import { renderMedia, selectComposition } from "@remotion/renderer"; 4 | import path from "path"; 5 | import { ensureBrowser } from "@remotion/renderer"; 6 | 7 | import { Config } from "../../config"; 8 | import { shortVideoSchema } from "../../components/utils"; 9 | import { logger } from "../../logger"; 10 | import { OrientationEnum } from "../../types/shorts"; 11 | import { getOrientationConfig } from "../../components/utils"; 12 | 13 | export class Remotion { 14 | constructor( 15 | private bundled: string, 16 | private config: Config, 17 | ) {} 18 | 19 | static async init(config: Config): Promise<Remotion> { 20 | await ensureBrowser(); 21 | 22 | const bundled = await bundle({ 23 | entryPoint: path.join( 24 | config.packageDirPath, 25 | config.devMode ? "src" : "dist", 26 | "components", 27 | "root", 28 | `index.${config.devMode ? "ts" : "js"}`, 29 | ), 30 | }); 31 | 32 | return new Remotion(bundled, config); 33 | } 34 | 35 | async render( 36 | data: z.infer<typeof shortVideoSchema>, 37 | id: string, 38 | orientation: OrientationEnum, 39 | ) { 40 | const { component } = getOrientationConfig(orientation); 41 | 42 | const composition = await selectComposition({ 43 | serveUrl: this.bundled, 44 | id: component, 45 | inputProps: data, 46 | }); 47 | 48 | logger.debug({ component, videoID: id }, "Rendering video with Remotion"); 49 | 50 | const outputLocation = path.join(this.config.videosDirPath, `${id}.mp4`); 51 | 52 | await renderMedia({ 53 | codec: "h264", 54 | composition, 55 | serveUrl: this.bundled, 56 | outputLocation, 57 | inputProps: data, 58 | onProgress: ({ progress }) => { 59 | logger.debug(`Rendering ${id} ${Math.floor(progress * 100)}% complete`); 60 | }, 61 | // preventing memory issues with docker 62 | concurrency: this.config.concurrency, 63 | offthreadVideoCacheSizeInBytes: this.config.videoCacheSizeInBytes, 64 | }); 65 | 66 | logger.debug( 67 | { 68 | outputLocation, 69 | component, 70 | videoID: id, 71 | }, 72 | "Video rendered with Remotion", 73 | ); 74 | } 75 | 76 | async testRender(outputLocation: string) { 77 | const composition = await selectComposition({ 78 | serveUrl: this.bundled, 79 | id: "TestVideo", 80 | }); 81 | 82 | await renderMedia({ 83 | codec: "h264", 84 | composition, 85 | serveUrl: this.bundled, 86 | outputLocation, 87 | onProgress: ({ progress }) => { 88 | logger.debug( 89 | `Rendering test video: ${Math.floor(progress * 100)}% complete`, 90 | ); 91 | }, 92 | // preventing memory issues with docker 93 | concurrency: this.config.concurrency, 94 | offthreadVideoCacheSizeInBytes: this.config.videoCacheSizeInBytes, 95 | }); 96 | } 97 | } 98 | ``` -------------------------------------------------------------------------------- /src/server/routers/mcp.ts: -------------------------------------------------------------------------------- ```typescript 1 | import express from "express"; 2 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 3 | import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js"; 4 | import z from "zod"; 5 | 6 | import { ShortCreator } from "../../short-creator/ShortCreator"; 7 | import { logger } from "../../logger"; 8 | import { renderConfig, sceneInput } from "../../types/shorts"; 9 | 10 | export class MCPRouter { 11 | router: express.Router; 12 | shortCreator: ShortCreator; 13 | transports: { [sessionId: string]: SSEServerTransport } = {}; 14 | mcpServer: McpServer; 15 | constructor(shortCreator: ShortCreator) { 16 | this.router = express.Router(); 17 | this.shortCreator = shortCreator; 18 | 19 | this.mcpServer = new McpServer({ 20 | name: "Short Creator", 21 | version: "0.0.1", 22 | capabilities: { 23 | resources: {}, 24 | tools: {}, 25 | }, 26 | }); 27 | 28 | this.setupMCPServer(); 29 | this.setupRoutes(); 30 | } 31 | 32 | private setupMCPServer() { 33 | this.mcpServer.tool( 34 | "get-video-status", 35 | "Get the status of a video (ready, processing, failed)", 36 | { 37 | videoId: z.string().describe("The ID of the video"), 38 | }, 39 | async ({ videoId }) => { 40 | const status = this.shortCreator.status(videoId); 41 | return { 42 | content: [ 43 | { 44 | type: "text", 45 | text: status, 46 | }, 47 | ], 48 | }; 49 | }, 50 | ); 51 | 52 | this.mcpServer.tool( 53 | "create-short-video", 54 | "Create a short video from a list of scenes", 55 | { 56 | scenes: z.array(sceneInput).describe("Each scene to be created"), 57 | config: renderConfig.describe("Configuration for rendering the video"), 58 | }, 59 | async ({ scenes, config }) => { 60 | const videoId = await this.shortCreator.addToQueue(scenes, config); 61 | 62 | return { 63 | content: [ 64 | { 65 | type: "text", 66 | text: videoId, 67 | }, 68 | ], 69 | }; 70 | }, 71 | ); 72 | } 73 | 74 | private setupRoutes() { 75 | this.router.get("/sse", async (req, res) => { 76 | logger.info("SSE GET request received"); 77 | 78 | const transport = new SSEServerTransport("/mcp/messages", res); 79 | this.transports[transport.sessionId] = transport; 80 | res.on("close", () => { 81 | delete this.transports[transport.sessionId]; 82 | }); 83 | await this.mcpServer.connect(transport); 84 | }); 85 | 86 | this.router.post("/messages", async (req, res) => { 87 | logger.info("SSE POST request received"); 88 | 89 | const sessionId = req.query.sessionId as string; 90 | const transport = this.transports[sessionId]; 91 | if (transport) { 92 | await transport.handlePostMessage(req, res); 93 | } else { 94 | res.status(400).send("No transport found for sessionId"); 95 | } 96 | }); 97 | } 98 | } 99 | ``` -------------------------------------------------------------------------------- /main-cuda.Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | ARG UBUNTU_VERSION=22.04 2 | ARG CUDA_VERSION=12.3.1 3 | ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} 4 | ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} 5 | 6 | # Ref: https://github.com/ggml-org/whisper.cpp 7 | FROM ${BASE_CUDA_DEV_CONTAINER} AS install-whisper 8 | ENV DEBIAN_FRONTEND=noninteractive 9 | 10 | RUN apt-get update && \ 11 | apt-get install --fix-missing --no-install-recommends -y bash git make vim wget g++ ffmpeg curl 12 | 13 | WORKDIR /app/data/libs/whisper 14 | RUN git clone https://github.com/ggerganov/whisper.cpp.git -b v1.7.1 --depth 1 . 15 | 16 | RUN make clean 17 | RUN GGML_CUDA=1 make -j 18 | 19 | RUN sh ./models/download-ggml-model.sh medium.en 20 | 21 | FROM ${BASE_CUDA_RUN_CONTAINER} AS base 22 | 23 | # install node 24 | RUN apt-get update && apt-get install -y \ 25 | curl \ 26 | ca-certificates \ 27 | gnupg \ 28 | lsb-release \ 29 | && rm -rf /var/lib/apt/lists/* 30 | RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ 31 | && apt-get update && apt-get install -y nodejs \ 32 | && rm -rf /var/lib/apt/lists/* 33 | RUN node -v && npm -v 34 | 35 | # install dependencies 36 | ENV DEBIAN_FRONTEND=noninteractive 37 | WORKDIR /app 38 | RUN apt update 39 | RUN apt install -y \ 40 | # whisper dependencies 41 | git \ 42 | wget \ 43 | cmake \ 44 | ffmpeg \ 45 | curl \ 46 | build-essential \ 47 | make \ 48 | # remotion dependencies 49 | libnss3 \ 50 | libdbus-1-3 \ 51 | libatk1.0-0 \ 52 | libgbm-dev \ 53 | libasound2 \ 54 | libxrandr2 \ 55 | libxkbcommon-dev \ 56 | libxfixes3 \ 57 | libxcomposite1 \ 58 | libxdamage1 \ 59 | libatk-bridge2.0-0 \ 60 | libpango-1.0-0 \ 61 | libcairo2 \ 62 | libcups2 \ 63 | && apt-get clean \ 64 | && rm -rf /var/lib/apt/lists/* 65 | # setup pnpm 66 | ENV PNPM_HOME="/pnpm" 67 | ENV PATH="$PNPM_HOME:$PATH" 68 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0 69 | RUN corepack enable 70 | 71 | FROM base AS prod-deps 72 | COPY package.json pnpm-lock.yaml* /app/ 73 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile 74 | RUN pnpm install --prefer-offline --no-cache --prod 75 | 76 | FROM prod-deps AS build 77 | COPY tsconfig.json /app 78 | COPY tsconfig.build.json /app 79 | COPY vite.config.ts /app 80 | COPY src /app/src 81 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile 82 | RUN pnpm build 83 | 84 | FROM base 85 | COPY static /app/static 86 | COPY --from=install-whisper /app/data/libs/whisper /app/data/libs/whisper 87 | COPY --from=prod-deps /app/node_modules /app/node_modules 88 | COPY --from=build /app/dist /app/dist 89 | COPY package.json /app/ 90 | 91 | # app configuration via environment variables 92 | ENV DATA_DIR_PATH=/app/data 93 | ENV DOCKER=true 94 | # number of chrome tabs to use for rendering 95 | ENV CONCURRENCY=1 96 | # video cache - 2000MB 97 | ENV VIDEO_CACHE_SIZE_IN_BYTES=2097152000 98 | 99 | # install kokoro, headless chrome and ensure music files are present 100 | RUN node dist/scripts/install.js 101 | 102 | CMD ["pnpm", "start"] 103 | ``` -------------------------------------------------------------------------------- /src/short-creator/libraries/Whisper.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { 2 | downloadWhisperModel, 3 | installWhisperCpp, 4 | transcribe, 5 | } from "@remotion/install-whisper-cpp"; 6 | import path from "path"; 7 | 8 | import { Config } from "../../config"; 9 | import type { Caption } from "../../types/shorts"; 10 | import { logger } from "../../logger"; 11 | 12 | export const ErrorWhisper = new Error("There was an error with WhisperCpp"); 13 | 14 | export class Whisper { 15 | constructor(private config: Config) {} 16 | 17 | static async init(config: Config): Promise<Whisper> { 18 | if (!config.runningInDocker) { 19 | logger.debug("Installing WhisperCpp"); 20 | await installWhisperCpp({ 21 | to: config.whisperInstallPath, 22 | version: config.whisperVersion, 23 | printOutput: true, 24 | }); 25 | logger.debug("WhisperCpp installed"); 26 | logger.debug("Downloading Whisper model"); 27 | await downloadWhisperModel({ 28 | model: config.whisperModel, 29 | folder: path.join(config.whisperInstallPath, "models"), 30 | printOutput: config.whisperVerbose, 31 | onProgress: (downloadedBytes, totalBytes) => { 32 | const progress = `${Math.round((downloadedBytes / totalBytes) * 100)}%`; 33 | logger.debug( 34 | { progress, model: config.whisperModel }, 35 | "Downloading Whisper model", 36 | ); 37 | }, 38 | }); 39 | // todo run the jfk command to check if everything is ok 40 | logger.debug("Whisper model downloaded"); 41 | } 42 | 43 | return new Whisper(config); 44 | } 45 | 46 | // todo shall we extract it to a Caption class? 47 | async CreateCaption(audioPath: string): Promise<Caption[]> { 48 | logger.debug({ audioPath }, "Starting to transcribe audio"); 49 | const { transcription } = await transcribe({ 50 | model: this.config.whisperModel, 51 | whisperPath: this.config.whisperInstallPath, 52 | modelFolder: path.join(this.config.whisperInstallPath, "models"), 53 | whisperCppVersion: this.config.whisperVersion, 54 | inputPath: audioPath, 55 | tokenLevelTimestamps: true, 56 | printOutput: this.config.whisperVerbose, 57 | onProgress: (progress) => { 58 | logger.debug({ audioPath }, `Transcribing is ${progress} complete`); 59 | }, 60 | }); 61 | logger.debug({ audioPath }, "Transcription finished, creating captions"); 62 | 63 | const captions: Caption[] = []; 64 | transcription.forEach((record) => { 65 | if (record.text === "") { 66 | return; 67 | } 68 | 69 | record.tokens.forEach((token) => { 70 | if (token.text.startsWith("[_TT")) { 71 | return; 72 | } 73 | // if token starts without space and the previous node didn't have space either, merge them 74 | if ( 75 | captions.length > 0 && 76 | !token.text.startsWith(" ") && 77 | !captions[captions.length - 1].text.endsWith(" ") 78 | ) { 79 | captions[captions.length - 1].text += record.text; 80 | captions[captions.length - 1].endMs = record.offsets.to; 81 | return; 82 | } 83 | captions.push({ 84 | text: token.text, 85 | startMs: record.offsets.from, 86 | endMs: record.offsets.to, 87 | }); 88 | }); 89 | }); 90 | logger.debug({ audioPath, captions }, "Captions created"); 91 | return captions; 92 | } 93 | } 94 | ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | /* eslint-disable @typescript-eslint/no-unused-vars */ 2 | import path from "path"; 3 | import fs from "fs-extra"; 4 | 5 | import { Kokoro } from "./short-creator/libraries/Kokoro"; 6 | import { Remotion } from "./short-creator/libraries/Remotion"; 7 | import { Whisper } from "./short-creator/libraries/Whisper"; 8 | import { FFMpeg } from "./short-creator/libraries/FFmpeg"; 9 | import { PexelsAPI } from "./short-creator/libraries/Pexels"; 10 | import { Config } from "./config"; 11 | import { ShortCreator } from "./short-creator/ShortCreator"; 12 | import { logger } from "./logger"; 13 | import { Server } from "./server/server"; 14 | import { MusicManager } from "./short-creator/music"; 15 | 16 | async function main() { 17 | const config = new Config(); 18 | try { 19 | config.ensureConfig(); 20 | } catch (err: unknown) { 21 | logger.error(err, "Error in config"); 22 | process.exit(1); 23 | } 24 | 25 | const musicManager = new MusicManager(config); 26 | try { 27 | logger.debug("checking music files"); 28 | musicManager.ensureMusicFilesExist(); 29 | } catch (error: unknown) { 30 | logger.error(error, "Missing music files"); 31 | process.exit(1); 32 | } 33 | 34 | logger.debug("initializing remotion"); 35 | const remotion = await Remotion.init(config); 36 | logger.debug("initializing kokoro"); 37 | const kokoro = await Kokoro.init(config.kokoroModelPrecision); 38 | logger.debug("initializing whisper"); 39 | const whisper = await Whisper.init(config); 40 | logger.debug("initializing ffmpeg"); 41 | const ffmpeg = await FFMpeg.init(); 42 | const pexelsApi = new PexelsAPI(config.pexelsApiKey); 43 | 44 | logger.debug("initializing the short creator"); 45 | const shortCreator = new ShortCreator( 46 | config, 47 | remotion, 48 | kokoro, 49 | whisper, 50 | ffmpeg, 51 | pexelsApi, 52 | musicManager, 53 | ); 54 | 55 | if (!config.runningInDocker) { 56 | // the project is running with npm - we need to check if the installation is correct 57 | if (fs.existsSync(config.installationSuccessfulPath)) { 58 | logger.info("the installation is successful - starting the server"); 59 | } else { 60 | logger.info( 61 | "testing if the installation was successful - this may take a while...", 62 | ); 63 | try { 64 | const audioBuffer = (await kokoro.generate("hi", "af_heart")).audio; 65 | await ffmpeg.createMp3DataUri(audioBuffer); 66 | await pexelsApi.findVideo(["dog"], 2.4); 67 | const testVideoPath = path.join(config.tempDirPath, "test.mp4"); 68 | await remotion.testRender(testVideoPath); 69 | fs.rmSync(testVideoPath, { force: true }); 70 | fs.writeFileSync(config.installationSuccessfulPath, "ok", { 71 | encoding: "utf-8", 72 | }); 73 | logger.info("the installation was successful - starting the server"); 74 | } catch (error: unknown) { 75 | logger.fatal( 76 | error, 77 | "The environment is not set up correctly - please follow the instructions in the README.md file https://github.com/gyoridavid/short-video-maker", 78 | ); 79 | process.exit(1); 80 | } 81 | } 82 | } 83 | 84 | logger.debug("initializing the server"); 85 | const server = new Server(config, shortCreator); 86 | const app = server.start(); 87 | 88 | // todo add shutdown handler 89 | } 90 | 91 | main().catch((error: unknown) => { 92 | logger.error(error, "Error starting server"); 93 | }); 94 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "short-video-maker", 3 | "version": "1.3.4", 4 | "description": "Creates short videos for TikTok, Instagram Reels, and YouTube Shorts using the Model Context Protocol (MCP) and a REST API.", 5 | "main": "index.js", 6 | "bugs": "https://github.com/gyoridavid/short-video-maker/issues", 7 | "homepage": "https://github.com/gyoridavid/short-video-maker", 8 | "scripts": { 9 | "build": "rimraf dist && tsc --project tsconfig.build.json && vite build", 10 | "dev": "vite build --watch | node --watch -r ts-node/register src/index.ts ", 11 | "start": "node dist/index.js", 12 | "test": "vitest", 13 | "prepublishOnly": "npm run build && echo \"#!/usr/bin/env node\n$(cat dist/index.js)\" > dist/index.js && chmod +x dist/index.js", 14 | "publish:docker": "npm run publish:docker:normal && npm run publish:docker:cuda && npm run publish:docker:tiny", 15 | "publish:docker:cuda": "docker buildx build --platform linux/amd64 -t gyoridavid/short-video-maker:latest-cuda -t gyoridavid/short-video-maker:${npm_package_version}-cuda -f main-cuda.Dockerfile --push ./", 16 | "publish:docker:normal": "docker buildx build --platform linux/amd64,linux/arm64 -t gyoridavid/short-video-maker:latest -t gyoridavid/short-video-maker:${npm_package_version} -f main.Dockerfile --push ./", 17 | "publish:docker:tiny": "docker buildx build --platform linux/amd64,linux/arm64 -t gyoridavid/short-video-maker:latest-tiny -t gyoridavid/short-video-maker:${npm_package_version}-tiny -f main-tiny.Dockerfile --push ./", 18 | "ui:dev": "vite", 19 | "ui:build": "vite build", 20 | "ui:preview": "vite preview" 21 | }, 22 | "bin": { 23 | "short-video-maker": "dist/index.js" 24 | }, 25 | "files": [ 26 | "dist", 27 | "static" 28 | ], 29 | "keywords": [ 30 | "shorts", 31 | "mcp", 32 | "model context protocol", 33 | "reels", 34 | "tiktok", 35 | "youtube shorts", 36 | "youtube", 37 | "short video", 38 | "video creation", 39 | "instagram", 40 | "video", 41 | "generator", 42 | "remotion", 43 | "faceless video" 44 | ], 45 | "author": "David Gyori", 46 | "license": "MIT", 47 | "dependencies": { 48 | "@emotion/react": "^11.11.3", 49 | "@emotion/styled": "^11.11.0", 50 | "@ffmpeg-installer/ffmpeg": "^1.1.0", 51 | "@modelcontextprotocol/sdk": "^1.9.0", 52 | "@mui/icons-material": "^5.15.10", 53 | "@mui/material": "^5.15.10", 54 | "@remotion/bundler": "^4.0.286", 55 | "@remotion/cli": "^4.0.286", 56 | "@remotion/google-fonts": "^4.0.286", 57 | "@remotion/install-whisper-cpp": "^4.0.286", 58 | "@remotion/renderer": "^4.0.286", 59 | "@remotion/zod-types": "^4.0.286", 60 | "@tanstack/react-query": "^5.18.0", 61 | "@types/react-dom": "^19.1.3", 62 | "@types/react-router-dom": "^5.3.3", 63 | "axios": "^1.9.0", 64 | "content-type": "^1.0.5", 65 | "cuid": "^3.0.0", 66 | "dotenv": "^16.4.7", 67 | "express": "^4.18.2", 68 | "fluent-ffmpeg": "^2.1.3", 69 | "fs-extra": "^11.3.0", 70 | "kokoro-js": "^1.2.0", 71 | "nock": "^14.0.3", 72 | "pino": "^9.6.0", 73 | "react": "^19.1.0", 74 | "react-dom": "^19.1.0", 75 | "react-router-dom": "^7.5.3", 76 | "remotion": "^4.0.286", 77 | "zod": "^3.24.2", 78 | "zod-to-json-schema": "^3.24.5" 79 | }, 80 | "devDependencies": { 81 | "@remotion/eslint-config-flat": "^4.0.286", 82 | "@types/content-type": "^1.1.8", 83 | "@types/express": "^4.17.21", 84 | "@types/fluent-ffmpeg": "^2.1.27", 85 | "@types/fs-extra": "^11.0.4", 86 | "@types/nock": "^11.1.0", 87 | "@types/node": "^22.14.0", 88 | "@types/react": "^19.1.0", 89 | "@vitejs/plugin-react": "^4.4.1", 90 | "autoprefixer": "^10.4.16", 91 | "eslint": "^9.24.0", 92 | "postcss": "^8.4.31", 93 | "prettier": "^3.5.3", 94 | "rimraf": "^6.0.1", 95 | "tailwindcss": "^3.3.0", 96 | "ts-node": "^10.9.2", 97 | "typescript": "^5.8.3", 98 | "vite": "^6.3.4", 99 | "vitest": "^3.1.1" 100 | } 101 | } 102 | ``` -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- ```typescript 1 | import path from "path"; 2 | import "dotenv/config"; 3 | import os from "os"; 4 | import fs from "fs-extra"; 5 | import pino from "pino"; 6 | import { kokoroModelPrecision, whisperModels } from "./types/shorts"; 7 | 8 | const defaultLogLevel: pino.Level = "info"; 9 | const defaultPort = 3123; 10 | const whisperVersion = "1.7.1"; 11 | const defaultWhisperModel: whisperModels = "medium.en"; // possible options: "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo" 12 | 13 | // Create the global logger 14 | const versionNumber = process.env.npm_package_version; 15 | export const logger = pino({ 16 | level: process.env.LOG_LEVEL || defaultLogLevel, 17 | timestamp: pino.stdTimeFunctions.isoTime, 18 | formatters: { 19 | level: (label) => { 20 | return { level: label }; 21 | }, 22 | }, 23 | base: { 24 | pid: process.pid, 25 | version: versionNumber, 26 | }, 27 | }); 28 | 29 | export class Config { 30 | private dataDirPath: string; 31 | private libsDirPath: string; 32 | private staticDirPath: string; 33 | 34 | public installationSuccessfulPath: string; 35 | public whisperInstallPath: string; 36 | public videosDirPath: string; 37 | public tempDirPath: string; 38 | public packageDirPath: string; 39 | public musicDirPath: string; 40 | public pexelsApiKey: string; 41 | public logLevel: pino.Level; 42 | public whisperVerbose: boolean; 43 | public port: number; 44 | public runningInDocker: boolean; 45 | public devMode: boolean; 46 | public whisperVersion: string = whisperVersion; 47 | public whisperModel: whisperModels = defaultWhisperModel; 48 | public kokoroModelPrecision: kokoroModelPrecision = "fp32"; 49 | 50 | // docker-specific, performance-related settings to prevent memory issues 51 | public concurrency?: number; 52 | public videoCacheSizeInBytes: number | null = null; 53 | 54 | constructor() { 55 | this.dataDirPath = 56 | process.env.DATA_DIR_PATH || 57 | path.join(os.homedir(), ".ai-agents-az-video-generator"); 58 | this.libsDirPath = path.join(this.dataDirPath, "libs"); 59 | 60 | this.whisperInstallPath = path.join(this.libsDirPath, "whisper"); 61 | this.videosDirPath = path.join(this.dataDirPath, "videos"); 62 | this.tempDirPath = path.join(this.dataDirPath, "temp"); 63 | this.installationSuccessfulPath = path.join( 64 | this.dataDirPath, 65 | "installation-successful", 66 | ); 67 | 68 | fs.ensureDirSync(this.dataDirPath); 69 | fs.ensureDirSync(this.libsDirPath); 70 | fs.ensureDirSync(this.videosDirPath); 71 | fs.ensureDirSync(this.tempDirPath); 72 | 73 | this.packageDirPath = path.join(__dirname, ".."); 74 | this.staticDirPath = path.join(this.packageDirPath, "static"); 75 | this.musicDirPath = path.join(this.staticDirPath, "music"); 76 | 77 | this.pexelsApiKey = process.env.PEXELS_API_KEY as string; 78 | this.logLevel = (process.env.LOG_LEVEL || defaultLogLevel) as pino.Level; 79 | this.whisperVerbose = process.env.WHISPER_VERBOSE === "true"; 80 | this.port = process.env.PORT ? parseInt(process.env.PORT) : defaultPort; 81 | this.runningInDocker = process.env.DOCKER === "true"; 82 | this.devMode = process.env.DEV === "true"; 83 | 84 | if (process.env.WHISPER_MODEL) { 85 | this.whisperModel = process.env.WHISPER_MODEL as whisperModels; 86 | } 87 | if (process.env.KOKORO_MODEL_PRECISION) { 88 | this.kokoroModelPrecision = process.env 89 | .KOKORO_MODEL_PRECISION as kokoroModelPrecision; 90 | } 91 | 92 | this.concurrency = process.env.CONCURRENCY 93 | ? parseInt(process.env.CONCURRENCY) 94 | : undefined; 95 | 96 | if (process.env.VIDEO_CACHE_SIZE_IN_BYTES) { 97 | this.videoCacheSizeInBytes = parseInt( 98 | process.env.VIDEO_CACHE_SIZE_IN_BYTES, 99 | ); 100 | } 101 | } 102 | 103 | public ensureConfig() { 104 | if (!this.pexelsApiKey) { 105 | throw new Error( 106 | "PEXELS_API_KEY environment variable is missing. Get your free API key: https://www.pexels.com/api/key/ - see how to run the project: https://github.com/gyoridavid/short-video-maker", 107 | ); 108 | } 109 | } 110 | } 111 | 112 | export const KOKORO_MODEL = "onnx-community/Kokoro-82M-v1.0-ONNX"; 113 | ``` -------------------------------------------------------------------------------- /src/components/utils.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { z } from "zod"; 2 | import { 3 | type Caption, 4 | type CaptionPage, 5 | type CaptionLine, 6 | type OrientationEnum, 7 | MusicVolumeEnum, 8 | } from "../types/shorts"; 9 | import { AvailableComponentsEnum, type OrientationConfig } from "./types"; 10 | 11 | export const shortVideoSchema = z.object({ 12 | scenes: z.array( 13 | z.object({ 14 | captions: z.custom<Caption[]>(), 15 | audio: z.object({ 16 | url: z.string(), 17 | duration: z.number(), 18 | }), 19 | video: z.string(), 20 | }), 21 | ), 22 | config: z.object({ 23 | paddingBack: z.number().optional(), 24 | captionPosition: z.enum(["top", "center", "bottom"]).optional(), 25 | captionBackgroundColor: z.string().optional(), 26 | durationMs: z.number(), 27 | musicVolume: z.nativeEnum(MusicVolumeEnum).optional(), 28 | }), 29 | music: z.object({ 30 | file: z.string(), 31 | url: z.string(), 32 | start: z.number(), 33 | end: z.number(), 34 | }), 35 | }); 36 | 37 | export function createCaptionPages({ 38 | captions, 39 | lineMaxLength, 40 | lineCount, 41 | maxDistanceMs, 42 | }: { 43 | captions: Caption[]; 44 | lineMaxLength: number; 45 | lineCount: number; 46 | maxDistanceMs: number; 47 | }) { 48 | const pages = []; 49 | let currentPage: CaptionPage = { 50 | startMs: 0, 51 | endMs: 0, 52 | lines: [], 53 | }; 54 | let currentLine: CaptionLine = { 55 | texts: [], 56 | }; 57 | 58 | captions.forEach((caption, i) => { 59 | // Check if we need to start a new page due to time gap 60 | if (i > 0 && caption.startMs - currentPage.endMs > maxDistanceMs) { 61 | // Add current line if not empty 62 | if (currentLine.texts.length > 0) { 63 | currentPage.lines.push(currentLine); 64 | } 65 | // Add current page if not empty 66 | if (currentPage.lines.length > 0) { 67 | pages.push(currentPage); 68 | } 69 | // Start new page 70 | currentPage = { 71 | startMs: caption.startMs, 72 | endMs: caption.endMs, 73 | lines: [], 74 | }; 75 | currentLine = { 76 | texts: [], 77 | }; 78 | } 79 | 80 | // Check if adding this caption exceeds the line length 81 | const currentLineText = currentLine.texts.map((t) => t.text).join(" "); 82 | if ( 83 | currentLine.texts.length > 0 && 84 | currentLineText.length + 1 + caption.text.length > lineMaxLength 85 | ) { 86 | // Line is full, add it to current page 87 | currentPage.lines.push(currentLine); 88 | currentLine = { 89 | texts: [], 90 | }; 91 | 92 | // Check if page is full 93 | if (currentPage.lines.length >= lineCount) { 94 | // Page is full, add it to pages 95 | pages.push(currentPage); 96 | // Start new page 97 | currentPage = { 98 | startMs: caption.startMs, 99 | endMs: caption.endMs, 100 | lines: [], 101 | }; 102 | } 103 | } 104 | 105 | // Add caption to current line 106 | currentLine.texts.push({ 107 | text: caption.text, 108 | startMs: caption.startMs, 109 | endMs: caption.endMs, 110 | }); 111 | 112 | // Update page timing 113 | currentPage.endMs = caption.endMs; 114 | if (i === 0 || currentPage.startMs === 0) { 115 | currentPage.startMs = caption.startMs; 116 | } else { 117 | currentPage.startMs = Math.min(currentPage.startMs, caption.startMs); 118 | } 119 | }); 120 | 121 | // Don't forget to add the last line and page 122 | if (currentLine.texts.length > 0) { 123 | currentPage.lines.push(currentLine); 124 | } 125 | if (currentPage.lines.length > 0) { 126 | pages.push(currentPage); 127 | } 128 | 129 | return pages; 130 | } 131 | 132 | export function getOrientationConfig(orientation: OrientationEnum) { 133 | const config: Record<OrientationEnum, OrientationConfig> = { 134 | portrait: { 135 | width: 1080, 136 | height: 1920, 137 | component: AvailableComponentsEnum.PortraitVideo, 138 | }, 139 | landscape: { 140 | width: 1920, 141 | height: 1080, 142 | component: AvailableComponentsEnum.LandscapeVideo, 143 | }, 144 | }; 145 | 146 | return config[orientation]; 147 | } 148 | 149 | export function calculateVolume( 150 | level: MusicVolumeEnum = MusicVolumeEnum.high, 151 | ): [number, boolean] { 152 | switch (level) { 153 | case "muted": 154 | return [0, true]; 155 | case "low": 156 | return [0.2, false]; 157 | case "medium": 158 | return [0.45, false]; 159 | case "high": 160 | return [0.7, false]; 161 | default: 162 | return [0.7, false]; 163 | } 164 | } 165 | ``` -------------------------------------------------------------------------------- /src/types/shorts.ts: -------------------------------------------------------------------------------- ```typescript 1 | import z from "zod"; 2 | 3 | export enum MusicMoodEnum { 4 | sad = "sad", 5 | melancholic = "melancholic", 6 | happy = "happy", 7 | euphoric = "euphoric/high", 8 | excited = "excited", 9 | chill = "chill", 10 | uneasy = "uneasy", 11 | angry = "angry", 12 | dark = "dark", 13 | hopeful = "hopeful", 14 | contemplative = "contemplative", 15 | funny = "funny/quirky", 16 | } 17 | 18 | export enum CaptionPositionEnum { 19 | top = "top", 20 | center = "center", 21 | bottom = "bottom", 22 | } 23 | 24 | export type Scene = { 25 | captions: Caption[]; 26 | video: string; 27 | audio: { 28 | url: string; 29 | duration: number; 30 | }; 31 | }; 32 | 33 | export const sceneInput = z.object({ 34 | text: z.string().describe("Text to be spoken in the video"), 35 | searchTerms: z 36 | .array(z.string()) 37 | .describe( 38 | "Search term for video, 1 word, and at least 2-3 search terms should be provided for each scene. Make sure to match the overall context with the word - regardless what the video search result would be.", 39 | ), 40 | }); 41 | export type SceneInput = z.infer<typeof sceneInput>; 42 | 43 | export enum VoiceEnum { 44 | af_heart = "af_heart", 45 | af_alloy = "af_alloy", 46 | af_aoede = "af_aoede", 47 | af_bella = "af_bella", 48 | af_jessica = "af_jessica", 49 | af_kore = "af_kore", 50 | af_nicole = "af_nicole", 51 | af_nova = "af_nova", 52 | af_river = "af_river", 53 | af_sarah = "af_sarah", 54 | af_sky = "af_sky", 55 | am_adam = "am_adam", 56 | am_echo = "am_echo", 57 | am_eric = "am_eric", 58 | am_fenrir = "am_fenrir", 59 | am_liam = "am_liam", 60 | am_michael = "am_michael", 61 | am_onyx = "am_onyx", 62 | am_puck = "am_puck", 63 | am_santa = "am_santa", 64 | bf_emma = "bf_emma", 65 | bf_isabella = "bf_isabella", 66 | bm_george = "bm_george", 67 | bm_lewis = "bm_lewis", 68 | bf_alice = "bf_alice", 69 | bf_lily = "bf_lily", 70 | bm_daniel = "bm_daniel", 71 | bm_fable = "bm_fable", 72 | } 73 | 74 | export enum OrientationEnum { 75 | landscape = "landscape", 76 | portrait = "portrait", 77 | } 78 | 79 | export enum MusicVolumeEnum { 80 | muted = "muted", 81 | low = "low", 82 | medium = "medium", 83 | high = "high", 84 | } 85 | 86 | export const renderConfig = z.object({ 87 | paddingBack: z 88 | .number() 89 | .optional() 90 | .describe( 91 | "For how long the video should be playing after the speech is done, in milliseconds. 1500 is a good value.", 92 | ), 93 | music: z 94 | .nativeEnum(MusicMoodEnum) 95 | .optional() 96 | .describe("Music tag to be used to find the right music for the video"), 97 | captionPosition: z 98 | .nativeEnum(CaptionPositionEnum) 99 | .optional() 100 | .describe("Position of the caption in the video"), 101 | captionBackgroundColor: z 102 | .string() 103 | .optional() 104 | .describe( 105 | "Background color of the caption, a valid css color, default is blue", 106 | ), 107 | voice: z 108 | .nativeEnum(VoiceEnum) 109 | .optional() 110 | .describe("Voice to be used for the speech, default is af_heart"), 111 | orientation: z 112 | .nativeEnum(OrientationEnum) 113 | .optional() 114 | .describe("Orientation of the video, default is portrait"), 115 | musicVolume: z 116 | .nativeEnum(MusicVolumeEnum) 117 | .optional() 118 | .describe("Volume of the music, default is high"), 119 | }); 120 | export type RenderConfig = z.infer<typeof renderConfig>; 121 | 122 | export type Voices = `${VoiceEnum}`; 123 | 124 | export type Video = { 125 | id: string; 126 | url: string; 127 | width: number; 128 | height: number; 129 | }; 130 | export type Caption = { 131 | text: string; 132 | startMs: number; 133 | endMs: number; 134 | }; 135 | 136 | export type CaptionLine = { 137 | texts: Caption[]; 138 | }; 139 | export type CaptionPage = { 140 | startMs: number; 141 | endMs: number; 142 | lines: CaptionLine[]; 143 | }; 144 | 145 | export const createShortInput = z.object({ 146 | scenes: z.array(sceneInput).describe("Each scene to be created"), 147 | config: renderConfig.describe("Configuration for rendering the video"), 148 | }); 149 | export type CreateShortInput = z.infer<typeof createShortInput>; 150 | 151 | export type VideoStatus = "processing" | "ready" | "failed"; 152 | 153 | export type Music = { 154 | file: string; 155 | start: number; 156 | end: number; 157 | mood: string; 158 | }; 159 | export type MusicForVideo = Music & { 160 | url: string; 161 | }; 162 | 163 | export type MusicTag = `${MusicMoodEnum}`; 164 | 165 | export type kokoroModelPrecision = "fp32" | "fp16" | "q8" | "q4" | "q4f16"; 166 | 167 | export type whisperModels = 168 | | "tiny" 169 | | "tiny.en" 170 | | "base" 171 | | "base.en" 172 | | "small" 173 | | "small.en" 174 | | "medium" 175 | | "medium.en" 176 | | "large-v1" 177 | | "large-v2" 178 | | "large-v3" 179 | | "large-v3-turbo"; 180 | ``` -------------------------------------------------------------------------------- /src/short-creator/music.ts: -------------------------------------------------------------------------------- ```typescript 1 | import path from "path"; 2 | import fs from "fs-extra"; 3 | 4 | import { type Music, MusicForVideo, MusicMoodEnum } from "../types/shorts"; 5 | import { Config } from "../config"; 6 | 7 | export class MusicManager { 8 | private static musicList: Music[] = [ 9 | { 10 | file: "Sly Sky - Telecasted.mp3", 11 | start: 0, 12 | end: 152, 13 | mood: MusicMoodEnum.melancholic, 14 | }, 15 | { 16 | file: "No.2 Remembering Her - Esther Abrami.mp3", 17 | start: 2, 18 | end: 134, 19 | mood: MusicMoodEnum.melancholic, 20 | }, 21 | { 22 | file: "Champion - Telecasted.mp3", 23 | start: 0, 24 | end: 142, 25 | mood: MusicMoodEnum.chill, 26 | }, 27 | { 28 | file: "Oh Please - Telecasted.mp3", 29 | start: 0, 30 | end: 154, 31 | mood: MusicMoodEnum.chill, 32 | }, 33 | { 34 | file: "Jetski - Telecasted.mp3", 35 | start: 0, 36 | end: 142, 37 | mood: MusicMoodEnum.uneasy, 38 | }, 39 | { 40 | file: "Phantom - Density & Time.mp3", 41 | start: 0, 42 | end: 178, 43 | mood: MusicMoodEnum.uneasy, 44 | }, 45 | { 46 | file: "On The Hunt - Andrew Langdon.mp3", 47 | start: 0, 48 | end: 95, 49 | mood: MusicMoodEnum.uneasy, 50 | }, 51 | { 52 | file: "Name The Time And Place - Telecasted.mp3", 53 | start: 0, 54 | end: 142, 55 | mood: MusicMoodEnum.excited, 56 | }, 57 | { 58 | file: "Delayed Baggage - Ryan Stasik.mp3", 59 | start: 3, 60 | end: 108, 61 | mood: MusicMoodEnum.euphoric, 62 | }, 63 | { 64 | file: "Like It Loud - Dyalla.mp3", 65 | start: 4, 66 | end: 160, 67 | mood: MusicMoodEnum.euphoric, 68 | }, 69 | { 70 | file: "Organic Guitar House - Dyalla.mp3", 71 | start: 2, 72 | end: 160, 73 | mood: MusicMoodEnum.euphoric, 74 | }, 75 | { 76 | file: "Honey, I Dismembered The Kids - Ezra Lipp.mp3", 77 | start: 2, 78 | end: 144, 79 | mood: MusicMoodEnum.dark, 80 | }, 81 | { 82 | file: "Night Hunt - Jimena Contreras.mp3", 83 | start: 0, 84 | end: 88, 85 | mood: MusicMoodEnum.dark, 86 | }, 87 | { 88 | file: "Curse of the Witches - Jimena Contreras.mp3", 89 | start: 0, 90 | end: 102, 91 | mood: MusicMoodEnum.dark, 92 | }, 93 | { 94 | file: "Restless Heart - Jimena Contreras.mp3", 95 | start: 0, 96 | end: 94, 97 | mood: MusicMoodEnum.sad, 98 | }, 99 | { 100 | file: "Heartbeat Of The Wind - Asher Fulero.mp3", 101 | start: 0, 102 | end: 124, 103 | mood: MusicMoodEnum.sad, 104 | }, 105 | { 106 | file: "Hopeless - Jimena Contreras.mp3", 107 | start: 0, 108 | end: 250, 109 | mood: MusicMoodEnum.sad, 110 | }, 111 | { 112 | file: "Touch - Anno Domini Beats.mp3", 113 | start: 0, 114 | end: 165, 115 | mood: MusicMoodEnum.happy, 116 | }, 117 | { 118 | file: "Cafecito por la Manana - Cumbia Deli.mp3", 119 | start: 0, 120 | end: 184, 121 | mood: MusicMoodEnum.happy, 122 | }, 123 | { 124 | file: "Aurora on the Boulevard - National Sweetheart.mp3", 125 | start: 0, 126 | end: 130, 127 | mood: MusicMoodEnum.happy, 128 | }, 129 | { 130 | file: "Buckle Up - Jeremy Korpas.mp3", 131 | start: 0, 132 | end: 128, 133 | mood: MusicMoodEnum.angry, 134 | }, 135 | { 136 | file: "Twin Engines - Jeremy Korpas.mp3", 137 | start: 0, 138 | end: 120, 139 | mood: MusicMoodEnum.angry, 140 | }, 141 | { 142 | file: "Hopeful - Nat Keefe.mp3", 143 | start: 0, 144 | end: 175, 145 | mood: MusicMoodEnum.hopeful, 146 | }, 147 | { 148 | file: "Hopeful Freedom - Asher Fulero.mp3", 149 | start: 1, 150 | end: 172, 151 | mood: MusicMoodEnum.hopeful, 152 | }, 153 | { 154 | file: "Crystaline - Quincas Moreira.mp3", 155 | start: 0, 156 | end: 140, 157 | mood: MusicMoodEnum.contemplative, 158 | }, 159 | { 160 | file: "Final Soliloquy - Asher Fulero.mp3", 161 | start: 1, 162 | end: 178, 163 | mood: MusicMoodEnum.contemplative, 164 | }, 165 | { 166 | file: "Seagull - Telecasted.mp3", 167 | start: 0, 168 | end: 123, 169 | mood: MusicMoodEnum.funny, 170 | }, 171 | { 172 | file: "Banjo Doops - Joel Cummins.mp3", 173 | start: 0, 174 | end: 98, 175 | mood: MusicMoodEnum.funny, 176 | }, 177 | { 178 | file: "Baby Animals Playing - Joel Cummins.mp3", 179 | start: 0, 180 | end: 124, 181 | mood: MusicMoodEnum.funny, 182 | }, 183 | { 184 | file: "Sinister - Anno Domini Beats.mp3", 185 | start: 0, 186 | end: 215, 187 | mood: MusicMoodEnum.dark, 188 | }, 189 | { 190 | file: "Traversing - Godmode.mp3", 191 | start: 0, 192 | end: 95, 193 | mood: MusicMoodEnum.dark, 194 | }, 195 | ]; 196 | 197 | constructor(private config: Config) {} 198 | public musicList(): MusicForVideo[] { 199 | return MusicManager.musicList.map((music: Music) => ({ 200 | ...music, 201 | url: `http://localhost:${this.config.port}/api/music/${encodeURIComponent(music.file)}`, 202 | })); 203 | } 204 | private musicFileExist(music: Music): boolean { 205 | return fs.existsSync(path.join(this.config.musicDirPath, music.file)); 206 | } 207 | public ensureMusicFilesExist(): void { 208 | for (const music of this.musicList()) { 209 | if (!this.musicFileExist(music)) { 210 | throw new Error(`Music file not found: ${music.file}`); 211 | } 212 | } 213 | } 214 | } 215 | ``` -------------------------------------------------------------------------------- /src/components/videos/PortraitVideo.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import { 2 | AbsoluteFill, 3 | Sequence, 4 | useCurrentFrame, 5 | useVideoConfig, 6 | Audio, 7 | OffthreadVideo, 8 | } from "remotion"; 9 | import { z } from "zod"; 10 | import { loadFont } from "@remotion/google-fonts/BarlowCondensed"; 11 | 12 | import { 13 | calculateVolume, 14 | createCaptionPages, 15 | shortVideoSchema, 16 | } from "../utils"; 17 | 18 | const { fontFamily } = loadFont(); // "Barlow Condensed" 19 | 20 | export const PortraitVideo: React.FC<z.infer<typeof shortVideoSchema>> = ({ 21 | scenes, 22 | music, 23 | config, 24 | }) => { 25 | const frame = useCurrentFrame(); 26 | const { fps } = useVideoConfig(); 27 | 28 | const captionBackgroundColor = config.captionBackgroundColor ?? "blue"; 29 | 30 | const activeStyle = { 31 | backgroundColor: captionBackgroundColor, 32 | padding: "10px", 33 | marginLeft: "-10px", 34 | marginRight: "-10px", 35 | borderRadius: "10px", 36 | }; 37 | 38 | const captionPosition = config.captionPosition ?? "center"; 39 | let captionStyle = {}; 40 | if (captionPosition === "top") { 41 | captionStyle = { top: 100 }; 42 | } 43 | if (captionPosition === "center") { 44 | captionStyle = { top: "50%", transform: "translateY(-50%)" }; 45 | } 46 | if (captionPosition === "bottom") { 47 | captionStyle = { bottom: 100 }; 48 | } 49 | 50 | const [musicVolume, musicMuted] = calculateVolume(config.musicVolume); 51 | 52 | return ( 53 | <AbsoluteFill style={{ backgroundColor: "white" }}> 54 | <Audio 55 | loop 56 | src={music.url} 57 | startFrom={music.start * fps} 58 | endAt={music.end * fps} 59 | volume={() => musicVolume} 60 | muted={musicMuted} 61 | /> 62 | 63 | {scenes.map((scene, i) => { 64 | const { captions, audio, video } = scene; 65 | const pages = createCaptionPages({ 66 | captions, 67 | lineMaxLength: 20, 68 | lineCount: 1, 69 | maxDistanceMs: 1000, 70 | }); 71 | 72 | // Calculate the start and end time of the scene 73 | const startFrame = 74 | scenes.slice(0, i).reduce((acc, curr) => { 75 | return acc + curr.audio.duration; 76 | }, 0) * fps; 77 | let durationInFrames = 78 | scenes.slice(0, i + 1).reduce((acc, curr) => { 79 | return acc + curr.audio.duration; 80 | }, 0) * fps; 81 | if (config.paddingBack && i === scenes.length - 1) { 82 | durationInFrames += (config.paddingBack / 1000) * fps; 83 | } 84 | 85 | return ( 86 | <Sequence 87 | from={startFrame} 88 | durationInFrames={durationInFrames} 89 | key={`scene-${i}`} 90 | > 91 | <OffthreadVideo src={video} muted /> 92 | <Audio src={audio.url} /> 93 | {pages.map((page, j) => { 94 | return ( 95 | <Sequence 96 | key={`scene-${i}-page-${j}`} 97 | from={Math.round((page.startMs / 1000) * fps)} 98 | durationInFrames={Math.round( 99 | ((page.endMs - page.startMs) / 1000) * fps, 100 | )} 101 | > 102 | <div 103 | style={{ 104 | position: "absolute", 105 | left: 0, 106 | width: "100%", 107 | ...captionStyle, 108 | }} 109 | > 110 | {page.lines.map((line, k) => { 111 | return ( 112 | <p 113 | style={{ 114 | fontSize: "6em", 115 | fontFamily: fontFamily, 116 | fontWeight: "black", 117 | color: "white", 118 | WebkitTextStroke: "2px black", 119 | WebkitTextFillColor: "white", 120 | textShadow: "0px 0px 10px black", 121 | textAlign: "center", 122 | width: "100%", 123 | // uppercase 124 | textTransform: "uppercase", 125 | }} 126 | key={`scene-${i}-page-${j}-line-${k}`} 127 | > 128 | {line.texts.map((text, l) => { 129 | const active = 130 | frame >= 131 | startFrame + (text.startMs / 1000) * fps && 132 | frame <= startFrame + (text.endMs / 1000) * fps; 133 | return ( 134 | <> 135 | <span 136 | style={{ 137 | fontWeight: "bold", 138 | ...(active ? activeStyle : {}), 139 | }} 140 | key={`scene-${i}-page-${j}-line-${k}-text-${l}`} 141 | > 142 | {text.text} 143 | </span> 144 | {l < line.texts.length - 1 ? " " : ""} 145 | </> 146 | ); 147 | })} 148 | </p> 149 | ); 150 | })} 151 | </div> 152 | </Sequence> 153 | ); 154 | })} 155 | </Sequence> 156 | ); 157 | })} 158 | </AbsoluteFill> 159 | ); 160 | }; 161 | ``` -------------------------------------------------------------------------------- /src/components/videos/LandscapeVideo.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import { 2 | AbsoluteFill, 3 | Sequence, 4 | useCurrentFrame, 5 | useVideoConfig, 6 | Audio, 7 | OffthreadVideo, 8 | } from "remotion"; 9 | import { z } from "zod"; 10 | import { loadFont } from "@remotion/google-fonts/BarlowCondensed"; 11 | 12 | import { 13 | calculateVolume, 14 | createCaptionPages, 15 | shortVideoSchema, 16 | } from "../utils"; 17 | 18 | const { fontFamily } = loadFont(); // "Barlow Condensed" 19 | 20 | export const LandscapeVideo: React.FC<z.infer<typeof shortVideoSchema>> = ({ 21 | scenes, 22 | music, 23 | config, 24 | }) => { 25 | const frame = useCurrentFrame(); 26 | const { fps } = useVideoConfig(); 27 | 28 | const captionBackgroundColor = config.captionBackgroundColor ?? "blue"; 29 | 30 | const activeStyle = { 31 | backgroundColor: captionBackgroundColor, 32 | padding: "10px", 33 | marginLeft: "-10px", 34 | marginRight: "-10px", 35 | borderRadius: "10px", 36 | }; 37 | 38 | const captionPosition = config.captionPosition ?? "center"; 39 | let captionStyle = {}; 40 | if (captionPosition === "top") { 41 | captionStyle = { top: 100 }; 42 | } 43 | if (captionPosition === "center") { 44 | captionStyle = { top: "50%", transform: "translateY(-50%)" }; 45 | } 46 | if (captionPosition === "bottom") { 47 | captionStyle = { bottom: 100 }; 48 | } 49 | 50 | const [musicVolume, musicMuted] = calculateVolume(config.musicVolume); 51 | 52 | return ( 53 | <AbsoluteFill style={{ backgroundColor: "white" }}> 54 | <Audio 55 | loop 56 | src={music.url} 57 | startFrom={music.start * fps} 58 | endAt={music.end * fps} 59 | volume={() => musicVolume} 60 | muted={musicMuted} 61 | /> 62 | 63 | {scenes.map((scene, i) => { 64 | const { captions, audio, video } = scene; 65 | const pages = createCaptionPages({ 66 | captions, 67 | lineMaxLength: 30, 68 | lineCount: 1, 69 | maxDistanceMs: 1000, 70 | }); 71 | 72 | // Calculate the start and end time of the scene 73 | const startFrame = 74 | scenes.slice(0, i).reduce((acc, curr) => { 75 | return acc + curr.audio.duration; 76 | }, 0) * fps; 77 | let durationInFrames = 78 | scenes.slice(0, i + 1).reduce((acc, curr) => { 79 | return acc + curr.audio.duration; 80 | }, 0) * fps; 81 | if (config.paddingBack && i === scenes.length - 1) { 82 | durationInFrames += (config.paddingBack / 1000) * fps; 83 | } 84 | 85 | return ( 86 | <Sequence 87 | from={startFrame} 88 | durationInFrames={durationInFrames} 89 | key={`scene-${i}`} 90 | > 91 | <OffthreadVideo src={video} muted /> 92 | <Audio src={audio.url} /> 93 | {pages.map((page, j) => { 94 | return ( 95 | <Sequence 96 | key={`scene-${i}-page-${j}`} 97 | from={Math.round((page.startMs / 1000) * fps)} 98 | durationInFrames={Math.round( 99 | ((page.endMs - page.startMs) / 1000) * fps, 100 | )} 101 | > 102 | <div 103 | style={{ 104 | position: "absolute", 105 | left: 0, 106 | width: "100%", 107 | ...captionStyle, 108 | }} 109 | > 110 | {page.lines.map((line, k) => { 111 | return ( 112 | <p 113 | style={{ 114 | fontSize: "8em", 115 | fontFamily: fontFamily, 116 | fontWeight: "black", 117 | color: "white", 118 | WebkitTextStroke: "2px black", 119 | WebkitTextFillColor: "white", 120 | textShadow: "0px 0px 10px black", 121 | textAlign: "center", 122 | width: "100%", 123 | // uppercase 124 | textTransform: "uppercase", 125 | }} 126 | key={`scene-${i}-page-${j}-line-${k}`} 127 | > 128 | {line.texts.map((text, l) => { 129 | const active = 130 | frame >= 131 | startFrame + (text.startMs / 1000) * fps && 132 | frame <= startFrame + (text.endMs / 1000) * fps; 133 | return ( 134 | <> 135 | <span 136 | style={{ 137 | fontWeight: "bold", 138 | ...(active ? activeStyle : {}), 139 | }} 140 | key={`scene-${i}-page-${j}-line-${k}-text-${l}`} 141 | > 142 | {text.text} 143 | </span> 144 | {l < line.texts.length - 1 ? " " : ""} 145 | </> 146 | ); 147 | })} 148 | </p> 149 | ); 150 | })} 151 | </div> 152 | </Sequence> 153 | ); 154 | })} 155 | </Sequence> 156 | ); 157 | })} 158 | </AbsoluteFill> 159 | ); 160 | }; 161 | ``` -------------------------------------------------------------------------------- /src/ui/pages/VideoList.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import React, { useState, useEffect } from 'react'; 2 | import { useNavigate } from 'react-router-dom'; 3 | import axios from 'axios'; 4 | import { 5 | Box, 6 | Typography, 7 | Paper, 8 | Button, 9 | CircularProgress, 10 | Alert, 11 | List, 12 | ListItem, 13 | ListItemText, 14 | ListItemSecondaryAction, 15 | IconButton, 16 | Divider 17 | } from '@mui/material'; 18 | import AddIcon from '@mui/icons-material/Add'; 19 | import PlayArrowIcon from '@mui/icons-material/PlayArrow'; 20 | import DeleteIcon from '@mui/icons-material/Delete'; 21 | 22 | interface VideoItem { 23 | id: string; 24 | status: string; 25 | } 26 | 27 | const VideoList: React.FC = () => { 28 | const navigate = useNavigate(); 29 | const [videos, setVideos] = useState<VideoItem[]>([]); 30 | const [loading, setLoading] = useState(true); 31 | const [error, setError] = useState<string | null>(null); 32 | 33 | const fetchVideos = async () => { 34 | try { 35 | const response = await axios.get('/api/short-videos'); 36 | setVideos(response.data.videos || []); 37 | setLoading(false); 38 | } catch (err) { 39 | setError('Failed to fetch videos'); 40 | setLoading(false); 41 | console.error('Error fetching videos:', err); 42 | } 43 | }; 44 | 45 | useEffect(() => { 46 | fetchVideos(); 47 | }, []); 48 | 49 | const handleCreateNew = () => { 50 | navigate('/create'); 51 | }; 52 | 53 | const handleVideoClick = (id: string) => { 54 | navigate(`/video/${id}`); 55 | }; 56 | 57 | const handleDeleteVideo = async (id: string, event: React.MouseEvent<HTMLButtonElement>) => { 58 | event.stopPropagation(); 59 | 60 | try { 61 | await axios.delete(`/api/short-video/${id}`); 62 | fetchVideos(); 63 | } catch (err) { 64 | setError('Failed to delete video'); 65 | console.error('Error deleting video:', err); 66 | } 67 | }; 68 | 69 | const capitalizeFirstLetter = (str: string) => { 70 | if (!str || typeof str !== 'string') return 'Unknown'; 71 | return str.charAt(0).toUpperCase() + str.slice(1); 72 | }; 73 | 74 | if (loading) { 75 | return ( 76 | <Box display="flex" justifyContent="center" alignItems="center" height="80vh"> 77 | <CircularProgress /> 78 | </Box> 79 | ); 80 | } 81 | 82 | return ( 83 | <Box maxWidth="md" mx="auto" py={4}> 84 | <Box display="flex" justifyContent="space-between" alignItems="center" mb={4}> 85 | <Typography variant="h4" component="h1"> 86 | Your Videos 87 | </Typography> 88 | <Button 89 | variant="contained" 90 | color="primary" 91 | startIcon={<AddIcon />} 92 | onClick={handleCreateNew} 93 | > 94 | Create New Video 95 | </Button> 96 | </Box> 97 | 98 | {error && ( 99 | <Alert severity="error" sx={{ mb: 3 }}>{error}</Alert> 100 | )} 101 | 102 | {videos.length === 0 ? ( 103 | <Paper sx={{ p: 4, textAlign: 'center' }}> 104 | <Typography variant="body1" color="text.secondary" gutterBottom> 105 | You haven't created any videos yet. 106 | </Typography> 107 | <Button 108 | variant="outlined" 109 | startIcon={<AddIcon />} 110 | onClick={handleCreateNew} 111 | sx={{ mt: 2 }} 112 | > 113 | Create Your First Video 114 | </Button> 115 | </Paper> 116 | ) : ( 117 | <Paper> 118 | <List> 119 | {videos.map((video, index) => { 120 | const videoId = video?.id || ''; 121 | const videoStatus = video?.status || 'unknown'; 122 | 123 | return ( 124 | <div key={videoId}> 125 | {index > 0 && <Divider />} 126 | <ListItem 127 | button 128 | onClick={() => handleVideoClick(videoId)} 129 | sx={{ 130 | py: 2, 131 | '&:hover': { 132 | backgroundColor: 'rgba(0, 0, 0, 0.04)' 133 | } 134 | }} 135 | > 136 | <ListItemText 137 | primary={`Video ${videoId.substring(0, 8)}...`} 138 | secondary={ 139 | <Typography 140 | component="span" 141 | variant="body2" 142 | color={ 143 | videoStatus === 'ready' ? 'success.main' : 144 | videoStatus === 'processing' ? 'info.main' : 145 | videoStatus === 'failed' ? 'error.main' : 'text.secondary' 146 | } 147 | > 148 | {capitalizeFirstLetter(videoStatus)} 149 | </Typography> 150 | } 151 | /> 152 | <ListItemSecondaryAction> 153 | {videoStatus === 'ready' && ( 154 | <IconButton 155 | edge="end" 156 | aria-label="play" 157 | onClick={() => handleVideoClick(videoId)} 158 | color="primary" 159 | > 160 | <PlayArrowIcon /> 161 | </IconButton> 162 | )} 163 | <IconButton 164 | edge="end" 165 | aria-label="delete" 166 | onClick={(e) => handleDeleteVideo(videoId, e)} 167 | color="error" 168 | sx={{ ml: 1 }} 169 | > 170 | <DeleteIcon /> 171 | </IconButton> 172 | </ListItemSecondaryAction> 173 | </ListItem> 174 | </div> 175 | ); 176 | })} 177 | </List> 178 | </Paper> 179 | )} 180 | </Box> 181 | ); 182 | }; 183 | 184 | export default VideoList; ``` -------------------------------------------------------------------------------- /src/short-creator/libraries/Pexels.ts: -------------------------------------------------------------------------------- ```typescript 1 | /* eslint-disable @remotion/deterministic-randomness */ 2 | import { getOrientationConfig } from "../../components/utils"; 3 | import { logger } from "../../logger"; 4 | import { OrientationEnum, type Video } from "../../types/shorts"; 5 | 6 | const jokerTerms: string[] = ["nature", "globe", "space", "ocean"]; 7 | const durationBufferSeconds = 3; 8 | const defaultTimeoutMs = 5000; 9 | const retryTimes = 3; 10 | 11 | export class PexelsAPI { 12 | constructor(private API_KEY: string) {} 13 | 14 | private async _findVideo( 15 | searchTerm: string, 16 | minDurationSeconds: number, 17 | excludeIds: string[], 18 | orientation: OrientationEnum, 19 | timeout: number, 20 | ): Promise<Video> { 21 | if (!this.API_KEY) { 22 | throw new Error("API key not set"); 23 | } 24 | logger.debug( 25 | { searchTerm, minDurationSeconds, orientation }, 26 | "Searching for video in Pexels API", 27 | ); 28 | const headers = new Headers(); 29 | headers.append("Authorization", this.API_KEY); 30 | const response = await fetch( 31 | `https://api.pexels.com/videos/search?orientation=${orientation}&size=medium&per_page=80&query=${encodeURIComponent(searchTerm)}`, 32 | { 33 | method: "GET", 34 | headers, 35 | redirect: "follow", 36 | signal: AbortSignal.timeout(timeout), 37 | }, 38 | ) 39 | .then((res) => { 40 | if (!res.ok) { 41 | if (res.status === 401) { 42 | throw new Error( 43 | "Invalid Pexels API key - please make sure you get a valid key from https://www.pexels.com/api and set it in the environment variable PEXELS_API_KEY", 44 | ); 45 | } 46 | throw new Error(`Pexels API error: ${res.status} ${res.statusText}`); 47 | } 48 | return res.json(); 49 | }) 50 | .catch((error: unknown) => { 51 | logger.error(error, "Error fetching videos from Pexels API"); 52 | throw error; 53 | }); 54 | const videos = response.videos as { 55 | id: string; 56 | duration: number; 57 | video_files: { 58 | fps: number; 59 | quality: string; 60 | width: number; 61 | height: number; 62 | id: string; 63 | link: string; 64 | }[]; 65 | }[]; 66 | 67 | const { width: requiredVideoWidth, height: requiredVideoHeight } = 68 | getOrientationConfig(orientation); 69 | 70 | if (!videos || videos.length === 0) { 71 | logger.error( 72 | { searchTerm, orientation }, 73 | "No videos found in Pexels API", 74 | ); 75 | throw new Error("No videos found"); 76 | } 77 | 78 | // find all the videos that fits the criteria, then select one randomly 79 | const filteredVideos = videos 80 | .map((video) => { 81 | if (excludeIds.includes(video.id)) { 82 | return; 83 | } 84 | if (!video.video_files.length) { 85 | return; 86 | } 87 | 88 | // calculate the real duration of the video by converting the FPS to 25 89 | const fps = video.video_files[0].fps; 90 | const duration = 91 | fps < 25 ? video.duration * (fps / 25) : video.duration; 92 | 93 | if (duration >= minDurationSeconds + durationBufferSeconds) { 94 | for (const file of video.video_files) { 95 | if ( 96 | file.quality === "hd" && 97 | file.width === requiredVideoWidth && 98 | file.height === requiredVideoHeight 99 | ) { 100 | return { 101 | id: video.id, 102 | url: file.link, 103 | width: file.width, 104 | height: file.height, 105 | }; 106 | } 107 | } 108 | } 109 | }) 110 | .filter(Boolean); 111 | if (!filteredVideos.length) { 112 | logger.error({ searchTerm }, "No videos found in Pexels API"); 113 | throw new Error("No videos found"); 114 | } 115 | 116 | const video = filteredVideos[ 117 | Math.floor(Math.random() * filteredVideos.length) 118 | ] as Video; 119 | 120 | logger.debug( 121 | { searchTerm, video: video, minDurationSeconds, orientation }, 122 | "Found video from Pexels API", 123 | ); 124 | 125 | return video; 126 | } 127 | 128 | async findVideo( 129 | searchTerms: string[], 130 | minDurationSeconds: number, 131 | excludeIds: string[] = [], 132 | orientation: OrientationEnum = OrientationEnum.portrait, 133 | timeout: number = defaultTimeoutMs, 134 | retryCounter: number = 0, 135 | ): Promise<Video> { 136 | // shuffle the search terms to randomize the search order 137 | const shuffledJokerTerms = jokerTerms.sort(() => Math.random() - 0.5); 138 | const shuffledSearchTerms = searchTerms.sort(() => Math.random() - 0.5); 139 | 140 | for (const searchTerm of [...shuffledSearchTerms, ...shuffledJokerTerms]) { 141 | try { 142 | return await this._findVideo( 143 | searchTerm, 144 | minDurationSeconds, 145 | excludeIds, 146 | orientation, 147 | timeout, 148 | ); 149 | } catch (error: unknown) { 150 | if ( 151 | error instanceof Error && 152 | error instanceof DOMException && 153 | error.name === "TimeoutError" 154 | ) { 155 | if (retryCounter < retryTimes) { 156 | logger.warn( 157 | { searchTerm, retryCounter }, 158 | "Timeout error, retrying...", 159 | ); 160 | return await this.findVideo( 161 | searchTerms, 162 | minDurationSeconds, 163 | excludeIds, 164 | orientation, 165 | timeout, 166 | retryCounter + 1, 167 | ); 168 | } 169 | logger.error( 170 | { searchTerm, retryCounter }, 171 | "Timeout error, retry limit reached", 172 | ); 173 | throw error; 174 | } 175 | 176 | logger.error(error, "Error finding video in Pexels API for term"); 177 | } 178 | } 179 | logger.error( 180 | { searchTerms }, 181 | "No videos found in Pexels API for the given terms", 182 | ); 183 | throw new Error("No videos found in Pexels API"); 184 | } 185 | } 186 | ``` -------------------------------------------------------------------------------- /src/ui/pages/VideoDetails.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import React, { useState, useEffect, useRef } from 'react'; 2 | import { useParams, useNavigate } from 'react-router-dom'; 3 | import axios from 'axios'; 4 | import { 5 | Box, 6 | Typography, 7 | Paper, 8 | Button, 9 | CircularProgress, 10 | Alert, 11 | Grid 12 | } from '@mui/material'; 13 | import ArrowBackIcon from '@mui/icons-material/ArrowBack'; 14 | import DownloadIcon from '@mui/icons-material/Download'; 15 | import { VideoStatus } from '../../types/shorts'; 16 | 17 | const VideoDetails: React.FC = () => { 18 | const { videoId } = useParams<{ videoId: string }>(); 19 | const navigate = useNavigate(); 20 | const [loading, setLoading] = useState(true); 21 | const [error, setError] = useState<string | null>(null); 22 | const [status, setStatus] = useState<VideoStatus>('processing'); 23 | const intervalRef = useRef<NodeJS.Timeout | null>(null); 24 | const isMounted = useRef(true); 25 | 26 | const checkVideoStatus = async () => { 27 | try { 28 | const response = await axios.get(`/api/short-video/${videoId}/status`); 29 | const videoStatus = response.data.status; 30 | 31 | if (isMounted.current) { 32 | setStatus(videoStatus || 'unknown'); 33 | console.log("videoStatus", videoStatus); 34 | 35 | if (videoStatus !== 'processing') { 36 | console.log("video is not processing"); 37 | console.log("interval", intervalRef.current); 38 | 39 | if (intervalRef.current) { 40 | console.log("clearing interval"); 41 | clearInterval(intervalRef.current); 42 | intervalRef.current = null; 43 | } 44 | } 45 | 46 | setLoading(false); 47 | } 48 | } catch (error) { 49 | if (isMounted.current) { 50 | setError('Failed to fetch video status'); 51 | setStatus('failed'); 52 | setLoading(false); 53 | console.error('Error fetching video status:', error); 54 | 55 | if (intervalRef.current) { 56 | clearInterval(intervalRef.current); 57 | intervalRef.current = null; 58 | } 59 | } 60 | } 61 | }; 62 | 63 | useEffect(() => { 64 | checkVideoStatus(); 65 | 66 | intervalRef.current = setInterval(() => { 67 | checkVideoStatus(); 68 | }, 5000); 69 | 70 | return () => { 71 | isMounted.current = false; 72 | if (intervalRef.current) { 73 | clearInterval(intervalRef.current); 74 | intervalRef.current = null; 75 | } 76 | }; 77 | }, [videoId]); 78 | 79 | const handleBack = () => { 80 | navigate('/'); 81 | }; 82 | 83 | const renderContent = () => { 84 | if (loading) { 85 | return ( 86 | <Box display="flex" justifyContent="center" alignItems="center" minHeight="30vh"> 87 | <CircularProgress /> 88 | </Box> 89 | ); 90 | } 91 | 92 | if (error) { 93 | return <Alert severity="error">{error}</Alert>; 94 | } 95 | 96 | if (status === 'processing') { 97 | return ( 98 | <Box textAlign="center" py={4}> 99 | <CircularProgress size={60} sx={{ mb: 2 }} /> 100 | <Typography variant="h6">Your video is being created...</Typography> 101 | <Typography variant="body1" color="text.secondary"> 102 | This may take a few minutes. Please wait. 103 | </Typography> 104 | </Box> 105 | ); 106 | } 107 | 108 | if (status === 'ready') { 109 | return ( 110 | <Box> 111 | <Box mb={3} textAlign="center"> 112 | <Typography variant="h6" color="success.main" gutterBottom> 113 | Your video is ready! 114 | </Typography> 115 | </Box> 116 | 117 | <Box sx={{ 118 | position: 'relative', 119 | paddingTop: '56.25%', 120 | mb: 3, 121 | backgroundColor: '#000' 122 | }}> 123 | <video 124 | controls 125 | autoPlay 126 | style={{ 127 | position: 'absolute', 128 | top: 0, 129 | left: 0, 130 | width: '100%', 131 | height: '100%', 132 | }} 133 | src={`/api/short-video/${videoId}`} 134 | /> 135 | </Box> 136 | 137 | <Box textAlign="center"> 138 | <Button 139 | component="a" 140 | href={`/api/short-video/${videoId}`} 141 | download 142 | variant="contained" 143 | color="primary" 144 | startIcon={<DownloadIcon />} 145 | sx={{ textDecoration: 'none' }} 146 | > 147 | Download Video 148 | </Button> 149 | </Box> 150 | </Box> 151 | ); 152 | } 153 | 154 | if (status === 'failed') { 155 | return ( 156 | <Alert severity="error" sx={{ mb: 3 }}> 157 | Video processing failed. Please try again with different settings. 158 | </Alert> 159 | ); 160 | } 161 | 162 | return ( 163 | <Alert severity="info" sx={{ mb: 3 }}> 164 | Unknown video status. Please try refreshing the page. 165 | </Alert> 166 | ); 167 | }; 168 | 169 | const capitalizeFirstLetter = (str: string) => { 170 | if (!str || typeof str !== 'string') return 'Unknown'; 171 | return str.charAt(0).toUpperCase() + str.slice(1); 172 | }; 173 | 174 | return ( 175 | <Box maxWidth="md" mx="auto" py={4}> 176 | <Box display="flex" alignItems="center" mb={3}> 177 | <Button 178 | startIcon={<ArrowBackIcon />} 179 | onClick={handleBack} 180 | sx={{ mr: 2 }} 181 | > 182 | Back to videos 183 | </Button> 184 | <Typography variant="h4" component="h1"> 185 | Video Details 186 | </Typography> 187 | </Box> 188 | 189 | <Paper sx={{ p: 3 }}> 190 | <Grid container spacing={2} mb={3}> 191 | <Grid item xs={12} sm={6}> 192 | <Typography variant="body2" color="text.secondary"> 193 | Video ID 194 | </Typography> 195 | <Typography variant="body1"> 196 | {videoId || 'Unknown'} 197 | </Typography> 198 | </Grid> 199 | <Grid item xs={12} sm={6}> 200 | <Typography variant="body2" color="text.secondary"> 201 | Status 202 | </Typography> 203 | <Typography 204 | variant="body1" 205 | color={ 206 | status === 'ready' ? 'success.main' : 207 | status === 'processing' ? 'info.main' : 208 | status === 'failed' ? 'error.main' : 'text.primary' 209 | } 210 | > 211 | {capitalizeFirstLetter(status)} 212 | </Typography> 213 | </Grid> 214 | </Grid> 215 | 216 | {renderContent()} 217 | </Paper> 218 | </Box> 219 | ); 220 | }; 221 | 222 | export default VideoDetails; ``` -------------------------------------------------------------------------------- /src/server/routers/rest.ts: -------------------------------------------------------------------------------- ```typescript 1 | import express from "express"; 2 | import type { 3 | Request as ExpressRequest, 4 | Response as ExpressResponse, 5 | } from "express"; 6 | import fs from "fs-extra"; 7 | import path from "path"; 8 | 9 | import { validateCreateShortInput } from "../validator"; 10 | import { ShortCreator } from "../../short-creator/ShortCreator"; 11 | import { logger } from "../../logger"; 12 | import { Config } from "../../config"; 13 | 14 | // todo abstract class 15 | export class APIRouter { 16 | public router: express.Router; 17 | private shortCreator: ShortCreator; 18 | private config: Config; 19 | 20 | constructor(config: Config, shortCreator: ShortCreator) { 21 | this.config = config; 22 | this.router = express.Router(); 23 | this.shortCreator = shortCreator; 24 | 25 | this.router.use(express.json()); 26 | 27 | this.setupRoutes(); 28 | } 29 | 30 | private setupRoutes() { 31 | this.router.post( 32 | "/short-video", 33 | async (req: ExpressRequest, res: ExpressResponse) => { 34 | try { 35 | const input = validateCreateShortInput(req.body); 36 | 37 | logger.info({ input }, "Creating short video"); 38 | 39 | const videoId = this.shortCreator.addToQueue( 40 | input.scenes, 41 | input.config, 42 | ); 43 | 44 | res.status(201).json({ 45 | videoId, 46 | }); 47 | } catch (error: unknown) { 48 | logger.error(error, "Error validating input"); 49 | 50 | // Handle validation errors specifically 51 | if (error instanceof Error && error.message.startsWith("{")) { 52 | try { 53 | const errorData = JSON.parse(error.message); 54 | res.status(400).json({ 55 | error: "Validation failed", 56 | message: errorData.message, 57 | missingFields: errorData.missingFields, 58 | }); 59 | return; 60 | } catch (parseError: unknown) { 61 | logger.error(parseError, "Error parsing validation error"); 62 | } 63 | } 64 | 65 | // Fallback for other errors 66 | res.status(400).json({ 67 | error: "Invalid input", 68 | message: error instanceof Error ? error.message : "Unknown error", 69 | }); 70 | } 71 | }, 72 | ); 73 | 74 | this.router.get( 75 | "/short-video/:videoId/status", 76 | async (req: ExpressRequest, res: ExpressResponse) => { 77 | const { videoId } = req.params; 78 | if (!videoId) { 79 | res.status(400).json({ 80 | error: "videoId is required", 81 | }); 82 | return; 83 | } 84 | const status = this.shortCreator.status(videoId); 85 | res.status(200).json({ 86 | status, 87 | }); 88 | }, 89 | ); 90 | 91 | this.router.get( 92 | "/music-tags", 93 | (req: ExpressRequest, res: ExpressResponse) => { 94 | res.status(200).json(this.shortCreator.ListAvailableMusicTags()); 95 | }, 96 | ); 97 | 98 | this.router.get("/voices", (req: ExpressRequest, res: ExpressResponse) => { 99 | res.status(200).json(this.shortCreator.ListAvailableVoices()); 100 | }); 101 | 102 | this.router.get( 103 | "/short-videos", 104 | (req: ExpressRequest, res: ExpressResponse) => { 105 | const videos = this.shortCreator.listAllVideos(); 106 | res.status(200).json({ 107 | videos, 108 | }); 109 | }, 110 | ); 111 | 112 | this.router.delete( 113 | "/short-video/:videoId", 114 | (req: ExpressRequest, res: ExpressResponse) => { 115 | const { videoId } = req.params; 116 | if (!videoId) { 117 | res.status(400).json({ 118 | error: "videoId is required", 119 | }); 120 | return; 121 | } 122 | this.shortCreator.deleteVideo(videoId); 123 | res.status(200).json({ 124 | success: true, 125 | }); 126 | }, 127 | ); 128 | 129 | this.router.get( 130 | "/tmp/:tmpFile", 131 | (req: ExpressRequest, res: ExpressResponse) => { 132 | const { tmpFile } = req.params; 133 | if (!tmpFile) { 134 | res.status(400).json({ 135 | error: "tmpFile is required", 136 | }); 137 | return; 138 | } 139 | const tmpFilePath = path.join(this.config.tempDirPath, tmpFile); 140 | if (!fs.existsSync(tmpFilePath)) { 141 | res.status(404).json({ 142 | error: "tmpFile not found", 143 | }); 144 | return; 145 | } 146 | 147 | if (tmpFile.endsWith(".mp3")) { 148 | res.setHeader("Content-Type", "audio/mpeg"); 149 | } 150 | if (tmpFile.endsWith(".wav")) { 151 | res.setHeader("Content-Type", "audio/wav"); 152 | } 153 | if (tmpFile.endsWith(".mp4")) { 154 | res.setHeader("Content-Type", "video/mp4"); 155 | } 156 | 157 | const tmpFileStream = fs.createReadStream(tmpFilePath); 158 | tmpFileStream.on("error", (error) => { 159 | logger.error(error, "Error reading tmp file"); 160 | res.status(500).json({ 161 | error: "Error reading tmp file", 162 | tmpFile, 163 | }); 164 | }); 165 | tmpFileStream.pipe(res); 166 | }, 167 | ); 168 | 169 | this.router.get( 170 | "/music/:fileName", 171 | (req: ExpressRequest, res: ExpressResponse) => { 172 | const { fileName } = req.params; 173 | if (!fileName) { 174 | res.status(400).json({ 175 | error: "fileName is required", 176 | }); 177 | return; 178 | } 179 | const musicFilePath = path.join(this.config.musicDirPath, fileName); 180 | if (!fs.existsSync(musicFilePath)) { 181 | res.status(404).json({ 182 | error: "music file not found", 183 | }); 184 | return; 185 | } 186 | const musicFileStream = fs.createReadStream(musicFilePath); 187 | musicFileStream.on("error", (error) => { 188 | logger.error(error, "Error reading music file"); 189 | res.status(500).json({ 190 | error: "Error reading music file", 191 | fileName, 192 | }); 193 | }); 194 | musicFileStream.pipe(res); 195 | }, 196 | ); 197 | 198 | this.router.get( 199 | "/short-video/:videoId", 200 | (req: ExpressRequest, res: ExpressResponse) => { 201 | try { 202 | const { videoId } = req.params; 203 | if (!videoId) { 204 | res.status(400).json({ 205 | error: "videoId is required", 206 | }); 207 | return; 208 | } 209 | const video = this.shortCreator.getVideo(videoId); 210 | res.setHeader("Content-Type", "video/mp4"); 211 | res.setHeader( 212 | "Content-Disposition", 213 | `inline; filename=${videoId}.mp4`, 214 | ); 215 | res.send(video); 216 | } catch (error: unknown) { 217 | logger.error(error, "Error getting video"); 218 | res.status(404).json({ 219 | error: "Video not found", 220 | }); 221 | } 222 | }, 223 | ); 224 | } 225 | } 226 | ``` -------------------------------------------------------------------------------- /src/short-creator/ShortCreator.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | process.env.LOG_LEVEL = "debug"; 2 | 3 | import { test, expect, vi } from "vitest"; 4 | import fs from "fs-extra"; 5 | 6 | import { ShortCreator } from "./ShortCreator"; 7 | import { Kokoro } from "./libraries/Kokoro"; 8 | import { Remotion } from "./libraries/Remotion"; 9 | import { Whisper } from "./libraries/Whisper"; 10 | import { FFMpeg } from "./libraries/FFmpeg"; 11 | import { PexelsAPI } from "./libraries/Pexels"; 12 | import { Config } from "../config"; 13 | import { MusicManager } from "./music"; 14 | 15 | // mock fs-extra 16 | vi.mock("fs-extra", async () => { 17 | const { createFsFromVolume, Volume } = await import("memfs"); 18 | const vol = Volume.fromJSON({ 19 | "/Users/gyoridavid/.ai-agents-az-video-generator/videos/video-1.mp4": 20 | "mock video content 1", 21 | "/Users/gyoridavid/.ai-agents-az-video-generator/videos/video-2.mp4": 22 | "mock video content 2", 23 | "/Users/gyoridavid/.ai-agents-az-video-generator/temp": null, 24 | "/Users/gyoridavid/.ai-agents-az-video-generator/libs": null, 25 | "/static/music/happy-music.mp3": "mock music content", 26 | "/static/music/sad-music.mp3": "mock music content", 27 | "/static/music/chill-music.mp3": "mock music content", 28 | }); 29 | const memfs = createFsFromVolume(vol); 30 | 31 | const fsExtra = { 32 | ...memfs, 33 | // fs-extra specific methods 34 | ensureDirSync: vi.fn((path) => { 35 | try { 36 | memfs.mkdirSync(path, { recursive: true }); 37 | } catch (error) {} 38 | }), 39 | removeSync: vi.fn((path) => { 40 | try { 41 | const stats = memfs.statSync(path); 42 | if (stats.isDirectory()) { 43 | // This is simplified and won't handle nested directories 44 | memfs.rmdirSync(path); 45 | } else { 46 | memfs.unlinkSync(path); 47 | } 48 | } catch (error) {} 49 | }), 50 | createWriteStream: vi.fn(() => ({ 51 | on: vi.fn(), 52 | write: vi.fn(), 53 | end: vi.fn(), 54 | })), 55 | readFileSync: vi.fn((path) => { 56 | return memfs.readFileSync(path); 57 | }), 58 | }; 59 | return { 60 | ...fsExtra, 61 | default: fsExtra, 62 | }; 63 | }); 64 | 65 | // Mock fluent-ffmpeg 66 | vi.mock("fluent-ffmpeg", () => { 67 | const mockOn = vi.fn().mockReturnThis(); 68 | const mockSave = vi.fn().mockReturnThis(); 69 | const mockPipe = vi.fn().mockReturnThis(); 70 | 71 | const ffmpegMock = vi.fn(() => ({ 72 | input: vi.fn().mockReturnThis(), 73 | audioCodec: vi.fn().mockReturnThis(), 74 | audioBitrate: vi.fn().mockReturnThis(), 75 | audioChannels: vi.fn().mockReturnThis(), 76 | audioFrequency: vi.fn().mockReturnThis(), 77 | toFormat: vi.fn().mockReturnThis(), 78 | on: mockOn, 79 | save: mockSave, 80 | pipe: mockPipe, 81 | })); 82 | 83 | ffmpegMock.setFfmpegPath = vi.fn(); 84 | 85 | return { default: ffmpegMock }; 86 | }); 87 | 88 | // mock kokoro-js 89 | vi.mock("kokoro-js", () => { 90 | return { 91 | KokoroTTS: { 92 | from_pretrained: vi.fn().mockResolvedValue({ 93 | generate: vi.fn().mockResolvedValue({ 94 | toWav: vi.fn().mockReturnValue(new ArrayBuffer(8)), 95 | audio: new ArrayBuffer(8), 96 | sampling_rate: 44100, 97 | }), 98 | }), 99 | }, 100 | }; 101 | }); 102 | 103 | // mock remotion 104 | vi.mock("@remotion/bundler", () => { 105 | return { 106 | bundle: vi.fn().mockResolvedValue("mocked-bundled-url"), 107 | }; 108 | }); 109 | vi.mock("@remotion/renderer", () => { 110 | return { 111 | renderMedia: vi.fn().mockResolvedValue(undefined), 112 | selectComposition: vi.fn().mockResolvedValue({ 113 | width: 1080, 114 | height: 1920, 115 | fps: 30, 116 | durationInFrames: 300, 117 | }), 118 | ensureBrowser: vi.fn().mockResolvedValue(undefined), 119 | }; 120 | }); 121 | 122 | // mock whisper 123 | vi.mock("@remotion/install-whisper-cpp", () => { 124 | return { 125 | downloadWhisperModel: vi.fn().mockResolvedValue(undefined), 126 | installWhisperCpp: vi.fn().mockResolvedValue(undefined), 127 | transcribe: vi.fn().mockResolvedValue({ 128 | transcription: [ 129 | { 130 | text: "This is a mock transcription.", 131 | offsets: { from: 0, to: 2000 }, 132 | tokens: [ 133 | { text: "This", timestamp: { from: 0, to: 500 } }, 134 | { text: " is", timestamp: { from: 500, to: 800 } }, 135 | { text: " a", timestamp: { from: 800, to: 1000 } }, 136 | { text: " mock", timestamp: { from: 1000, to: 1500 } }, 137 | { text: " transcription.", timestamp: { from: 1500, to: 2000 } }, 138 | ], 139 | }, 140 | ], 141 | }), 142 | }; 143 | }); 144 | 145 | test("test me", async () => { 146 | const kokoro = await Kokoro.init("fp16"); 147 | const ffmpeg = await FFMpeg.init(); 148 | 149 | vi.spyOn(ffmpeg, "saveNormalizedAudio").mockResolvedValue("mocked-path.wav"); 150 | vi.spyOn(ffmpeg, "saveToMp3").mockResolvedValue("mocked-path.mp3"); 151 | 152 | const pexelsAPI = new PexelsAPI("mock-api-key"); 153 | vi.spyOn(pexelsAPI, "findVideo").mockResolvedValue({ 154 | id: "mock-video-id-1", 155 | url: "https://example.com/mock-video-1.mp4", 156 | width: 1080, 157 | height: 1920, 158 | }); 159 | 160 | const config = new Config(); 161 | const remotion = await Remotion.init(config); 162 | 163 | // control the render promise resolution 164 | let resolveRenderPromise: () => void; 165 | const renderPromiseMock: Promise<void> = new Promise((resolve) => { 166 | resolveRenderPromise = resolve; 167 | }); 168 | vi.spyOn(remotion, "render").mockReturnValue(renderPromiseMock); 169 | 170 | const whisper = await Whisper.init(config); 171 | 172 | vi.spyOn(whisper, "CreateCaption").mockResolvedValue([ 173 | { text: "This", startMs: 0, endMs: 500 }, 174 | { text: " is", startMs: 500, endMs: 800 }, 175 | { text: " a", startMs: 800, endMs: 1000 }, 176 | { text: " mock", startMs: 1000, endMs: 1500 }, 177 | { text: " transcription.", startMs: 1500, endMs: 2000 }, 178 | ]); 179 | 180 | const musicManager = new MusicManager(config); 181 | 182 | const shortCreator = new ShortCreator( 183 | config, 184 | remotion, 185 | kokoro, 186 | whisper, 187 | ffmpeg, 188 | pexelsAPI, 189 | musicManager, 190 | ); 191 | 192 | const videoId = shortCreator.addToQueue( 193 | [ 194 | { 195 | text: "test", 196 | searchTerms: ["test"], 197 | }, 198 | ], 199 | {}, 200 | ); 201 | 202 | // list videos while the video is being processed 203 | let videos = shortCreator.listAllVideos(); 204 | expect(videos.find((v) => v.id === videoId)?.status).toBe("processing"); 205 | 206 | // create the video file on the file system and check the status again 207 | fs.writeFileSync(shortCreator.getVideoPath(videoId), "mock video content"); 208 | videos = shortCreator.listAllVideos(); 209 | expect(videos.find((v) => v.id === videoId)?.status).toBe("processing"); 210 | 211 | // resolve the render promise to simulate the video being processed, and check the status again 212 | resolveRenderPromise(); 213 | await new Promise((resolve) => setTimeout(resolve, 100)); // let the queue process the video 214 | videos = shortCreator.listAllVideos(); 215 | expect(videos.find((v) => v.id === videoId)?.status).toBe("ready"); 216 | 217 | // check the status of the video directly 218 | const status = shortCreator.status(videoId); 219 | expect(status).toBe("ready"); 220 | }); 221 | ``` -------------------------------------------------------------------------------- /src/components/root/Root.tsx: -------------------------------------------------------------------------------- ```typescript 1 | import { CalculateMetadataFunction, Composition } from "remotion"; 2 | import { shortVideoSchema } from "../utils"; 3 | import { PortraitVideo } from "../videos/PortraitVideo"; 4 | import { LandscapeVideo } from "../videos/LandscapeVideo"; 5 | import { TestVideo } from "../videos/Test"; 6 | import z from "zod"; 7 | import { AvailableComponentsEnum } from "../types"; 8 | 9 | const FPS = 25; 10 | 11 | export const calculateMetadata: CalculateMetadataFunction< 12 | z.infer<typeof shortVideoSchema> 13 | > = async ({ props }) => { 14 | const durationInFrames = Math.floor((props.config.durationMs / 1000) * FPS); 15 | return { 16 | ...props, 17 | durationInFrames, 18 | }; 19 | }; 20 | 21 | export const RemotionRoot: React.FC = () => { 22 | return ( 23 | <> 24 | <Composition 25 | id={AvailableComponentsEnum.PortraitVideo} 26 | component={PortraitVideo} 27 | durationInFrames={30} 28 | fps={FPS} 29 | width={1080} 30 | height={1920} 31 | defaultProps={{ 32 | music: { 33 | url: 34 | "http://localhost:3123/api/music/" + 35 | encodeURIComponent( 36 | "Aurora on the Boulevard - National Sweetheart.mp3", 37 | ), 38 | file: "mellow-smooth-rap-beat-20230107-132480.mp3", 39 | start: 0, 40 | end: 175, 41 | }, 42 | scenes: [ 43 | { 44 | captions: [ 45 | { text: " Hello", startMs: 390, endMs: 990 }, 46 | { text: " World.", startMs: 990, endMs: 2000 }, 47 | ], 48 | video: 49 | "https://videos.pexels.com/video-files/4625747/4625747-hd_1080_1920_24fps.mp4", 50 | audio: { 51 | url: "http://localhost:3123/api/tmp/cma1lgean0001rlsi52b8h3n3.mp3", 52 | duration: 3.15, 53 | }, 54 | }, 55 | ], 56 | config: { 57 | durationMs: 4650, 58 | paddingBack: 1500, 59 | captionBackgroundColor: "blue", 60 | captionPosition: "bottom", 61 | }, 62 | }} 63 | calculateMetadata={calculateMetadata} 64 | /> 65 | <Composition 66 | id={AvailableComponentsEnum.LandscapeVideo} 67 | component={LandscapeVideo} 68 | durationInFrames={30} 69 | fps={FPS} 70 | width={1920} 71 | height={1080} 72 | defaultProps={{ 73 | music: { 74 | url: 75 | "http://localhost:3123/api/music/" + 76 | encodeURIComponent( 77 | "Aurora on the Boulevard - National Sweetheart.mp3", 78 | ), 79 | file: "mellow-smooth-rap-beat-20230107-132480.mp3", 80 | start: 0, 81 | end: 175, 82 | }, 83 | scenes: [ 84 | { 85 | captions: [ 86 | { 87 | text: " A", 88 | startMs: 110, 89 | endMs: 320, 90 | }, 91 | { 92 | text: " week", 93 | startMs: 320, 94 | endMs: 590, 95 | }, 96 | { 97 | text: " ago,", 98 | startMs: 590, 99 | endMs: 1220, 100 | }, 101 | { 102 | text: " a", 103 | startMs: 1220, 104 | endMs: 1280, 105 | }, 106 | { 107 | text: " friend", 108 | startMs: 1280, 109 | endMs: 1490, 110 | }, 111 | { 112 | text: " invited", 113 | startMs: 1490, 114 | endMs: 1820, 115 | }, 116 | { 117 | text: " a", 118 | startMs: 1820, 119 | endMs: 1880, 120 | }, 121 | { 122 | text: " couple", 123 | startMs: 1880, 124 | endMs: 2310, 125 | }, 126 | { 127 | text: " of", 128 | startMs: 2310, 129 | endMs: 2350, 130 | }, 131 | { 132 | text: " other", 133 | startMs: 2350, 134 | endMs: 2640, 135 | }, 136 | { 137 | text: " couples", 138 | startMs: 2640, 139 | endMs: 3080, 140 | }, 141 | { 142 | text: " over", 143 | startMs: 3080, 144 | endMs: 3400, 145 | }, 146 | { 147 | text: " for", 148 | startMs: 3400, 149 | endMs: 3620, 150 | }, 151 | { 152 | text: " dinner.", 153 | startMs: 3620, 154 | endMs: 4340, 155 | }, 156 | { 157 | text: " Eventually,", 158 | startMs: 4340, 159 | endMs: 5520, 160 | }, 161 | { 162 | text: " the", 163 | startMs: 5520, 164 | endMs: 5550, 165 | }, 166 | { 167 | text: " food,", 168 | startMs: 5550, 169 | endMs: 6300, 170 | }, 171 | { 172 | text: " but", 173 | startMs: 6300, 174 | endMs: 6360, 175 | }, 176 | { 177 | text: " not", 178 | startMs: 6360, 179 | endMs: 6540, 180 | }, 181 | { 182 | text: " the", 183 | startMs: 6540, 184 | endMs: 6780, 185 | }, 186 | { 187 | text: " wine,", 188 | startMs: 6780, 189 | endMs: 7210, 190 | }, 191 | { 192 | text: " was", 193 | startMs: 7210, 194 | endMs: 7400, 195 | }, 196 | { 197 | text: " cleared", 198 | startMs: 7400, 199 | endMs: 7870, 200 | }, 201 | { 202 | text: " off", 203 | startMs: 7870, 204 | endMs: 7980, 205 | }, 206 | { 207 | text: " the", 208 | startMs: 7980, 209 | endMs: 8180, 210 | }, 211 | { 212 | text: " table", 213 | startMs: 8180, 214 | endMs: 8480, 215 | }, 216 | { 217 | text: " for", 218 | startMs: 8480, 219 | endMs: 8770, 220 | }, 221 | { 222 | text: " what", 223 | startMs: 8770, 224 | endMs: 8880, 225 | }, 226 | { 227 | text: " turned", 228 | startMs: 8880, 229 | endMs: 9230, 230 | }, 231 | { 232 | text: " out", 233 | startMs: 9230, 234 | endMs: 9390, 235 | }, 236 | { 237 | text: " to", 238 | startMs: 9390, 239 | endMs: 9510, 240 | }, 241 | { 242 | text: " be", 243 | startMs: 9510, 244 | endMs: 9620, 245 | }, 246 | { 247 | text: " some", 248 | startMs: 9620, 249 | endMs: 9850, 250 | }, 251 | { 252 | text: " fierce", 253 | startMs: 9850, 254 | endMs: 10200, 255 | }, 256 | { 257 | text: " scrabbling.", 258 | startMs: 10200, 259 | endMs: 11000, 260 | }, 261 | ], 262 | video: 263 | "https://videos.pexels.com/video-files/1168989/1168989-hd_1920_1080_30fps.mp4", 264 | audio: { 265 | url: "http://localhost:3123/api/tmp/cma9ctvpo0001aqsia12i82db.mp3", 266 | duration: 12.8, 267 | }, 268 | }, 269 | ], 270 | config: { 271 | durationMs: 14300, 272 | paddingBack: 1500, 273 | captionBackgroundColor: "#ff0000", 274 | captionPosition: "center", 275 | }, 276 | }} 277 | calculateMetadata={calculateMetadata} 278 | /> 279 | <Composition 280 | id="TestVideo" 281 | component={TestVideo} 282 | durationInFrames={14} 283 | fps={23} 284 | width={100} 285 | height={100} 286 | /> 287 | </> 288 | ); 289 | }; 290 | ``` -------------------------------------------------------------------------------- /src/short-creator/ShortCreator.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { OrientationEnum } from "./../types/shorts"; 2 | /* eslint-disable @remotion/deterministic-randomness */ 3 | import fs from "fs-extra"; 4 | import cuid from "cuid"; 5 | import path from "path"; 6 | import https from "https"; 7 | import http from "http"; 8 | 9 | import { Kokoro } from "./libraries/Kokoro"; 10 | import { Remotion } from "./libraries/Remotion"; 11 | import { Whisper } from "./libraries/Whisper"; 12 | import { FFMpeg } from "./libraries/FFmpeg"; 13 | import { PexelsAPI } from "./libraries/Pexels"; 14 | import { Config } from "../config"; 15 | import { logger } from "../logger"; 16 | import { MusicManager } from "./music"; 17 | import type { 18 | SceneInput, 19 | RenderConfig, 20 | Scene, 21 | VideoStatus, 22 | MusicMoodEnum, 23 | MusicTag, 24 | MusicForVideo, 25 | } from "../types/shorts"; 26 | 27 | export class ShortCreator { 28 | private queue: { 29 | sceneInput: SceneInput[]; 30 | config: RenderConfig; 31 | id: string; 32 | }[] = []; 33 | constructor( 34 | private config: Config, 35 | private remotion: Remotion, 36 | private kokoro: Kokoro, 37 | private whisper: Whisper, 38 | private ffmpeg: FFMpeg, 39 | private pexelsApi: PexelsAPI, 40 | private musicManager: MusicManager, 41 | ) {} 42 | 43 | public status(id: string): VideoStatus { 44 | const videoPath = this.getVideoPath(id); 45 | if (this.queue.find((item) => item.id === id)) { 46 | return "processing"; 47 | } 48 | if (fs.existsSync(videoPath)) { 49 | return "ready"; 50 | } 51 | return "failed"; 52 | } 53 | 54 | public addToQueue(sceneInput: SceneInput[], config: RenderConfig): string { 55 | // todo add mutex lock 56 | const id = cuid(); 57 | this.queue.push({ 58 | sceneInput, 59 | config, 60 | id, 61 | }); 62 | if (this.queue.length === 1) { 63 | this.processQueue(); 64 | } 65 | return id; 66 | } 67 | 68 | private async processQueue(): Promise<void> { 69 | // todo add a semaphore 70 | if (this.queue.length === 0) { 71 | return; 72 | } 73 | const { sceneInput, config, id } = this.queue[0]; 74 | logger.debug( 75 | { sceneInput, config, id }, 76 | "Processing video item in the queue", 77 | ); 78 | try { 79 | await this.createShort(id, sceneInput, config); 80 | logger.debug({ id }, "Video created successfully"); 81 | } catch (error: unknown) { 82 | logger.error(error, "Error creating video"); 83 | } finally { 84 | this.queue.shift(); 85 | this.processQueue(); 86 | } 87 | } 88 | 89 | private async createShort( 90 | videoId: string, 91 | inputScenes: SceneInput[], 92 | config: RenderConfig, 93 | ): Promise<string> { 94 | logger.debug( 95 | { 96 | inputScenes, 97 | config, 98 | }, 99 | "Creating short video", 100 | ); 101 | const scenes: Scene[] = []; 102 | let totalDuration = 0; 103 | const excludeVideoIds = []; 104 | const tempFiles = []; 105 | 106 | const orientation: OrientationEnum = 107 | config.orientation || OrientationEnum.portrait; 108 | 109 | let index = 0; 110 | for (const scene of inputScenes) { 111 | const audio = await this.kokoro.generate( 112 | scene.text, 113 | config.voice ?? "af_heart", 114 | ); 115 | let { audioLength } = audio; 116 | const { audio: audioStream } = audio; 117 | 118 | // add the paddingBack in seconds to the last scene 119 | if (index + 1 === inputScenes.length && config.paddingBack) { 120 | audioLength += config.paddingBack / 1000; 121 | } 122 | 123 | const tempId = cuid(); 124 | const tempWavFileName = `${tempId}.wav`; 125 | const tempMp3FileName = `${tempId}.mp3`; 126 | const tempVideoFileName = `${tempId}.mp4`; 127 | const tempWavPath = path.join(this.config.tempDirPath, tempWavFileName); 128 | const tempMp3Path = path.join(this.config.tempDirPath, tempMp3FileName); 129 | const tempVideoPath = path.join( 130 | this.config.tempDirPath, 131 | tempVideoFileName, 132 | ); 133 | tempFiles.push(tempVideoPath); 134 | tempFiles.push(tempWavPath, tempMp3Path); 135 | 136 | await this.ffmpeg.saveNormalizedAudio(audioStream, tempWavPath); 137 | const captions = await this.whisper.CreateCaption(tempWavPath); 138 | 139 | await this.ffmpeg.saveToMp3(audioStream, tempMp3Path); 140 | const video = await this.pexelsApi.findVideo( 141 | scene.searchTerms, 142 | audioLength, 143 | excludeVideoIds, 144 | orientation, 145 | ); 146 | 147 | logger.debug(`Downloading video from ${video.url} to ${tempVideoPath}`); 148 | 149 | await new Promise<void>((resolve, reject) => { 150 | const fileStream = fs.createWriteStream(tempVideoPath); 151 | https 152 | .get(video.url, (response: http.IncomingMessage) => { 153 | if (response.statusCode !== 200) { 154 | reject( 155 | new Error(`Failed to download video: ${response.statusCode}`), 156 | ); 157 | return; 158 | } 159 | 160 | response.pipe(fileStream); 161 | 162 | fileStream.on("finish", () => { 163 | fileStream.close(); 164 | logger.debug(`Video downloaded successfully to ${tempVideoPath}`); 165 | resolve(); 166 | }); 167 | }) 168 | .on("error", (err: Error) => { 169 | fs.unlink(tempVideoPath, () => {}); // Delete the file if download failed 170 | logger.error(err, "Error downloading video:"); 171 | reject(err); 172 | }); 173 | }); 174 | 175 | excludeVideoIds.push(video.id); 176 | 177 | scenes.push({ 178 | captions, 179 | video: `http://localhost:${this.config.port}/api/tmp/${tempVideoFileName}`, 180 | audio: { 181 | url: `http://localhost:${this.config.port}/api/tmp/${tempMp3FileName}`, 182 | duration: audioLength, 183 | }, 184 | }); 185 | 186 | totalDuration += audioLength; 187 | index++; 188 | } 189 | if (config.paddingBack) { 190 | totalDuration += config.paddingBack / 1000; 191 | } 192 | 193 | const selectedMusic = this.findMusic(totalDuration, config.music); 194 | logger.debug({ selectedMusic }, "Selected music for the video"); 195 | 196 | await this.remotion.render( 197 | { 198 | music: selectedMusic, 199 | scenes, 200 | config: { 201 | durationMs: totalDuration * 1000, 202 | paddingBack: config.paddingBack, 203 | ...{ 204 | captionBackgroundColor: config.captionBackgroundColor, 205 | captionPosition: config.captionPosition, 206 | }, 207 | musicVolume: config.musicVolume, 208 | }, 209 | }, 210 | videoId, 211 | orientation, 212 | ); 213 | 214 | for (const file of tempFiles) { 215 | fs.removeSync(file); 216 | } 217 | 218 | return videoId; 219 | } 220 | 221 | public getVideoPath(videoId: string): string { 222 | return path.join(this.config.videosDirPath, `${videoId}.mp4`); 223 | } 224 | 225 | public deleteVideo(videoId: string): void { 226 | const videoPath = this.getVideoPath(videoId); 227 | fs.removeSync(videoPath); 228 | logger.debug({ videoId }, "Deleted video file"); 229 | } 230 | 231 | public getVideo(videoId: string): Buffer { 232 | const videoPath = this.getVideoPath(videoId); 233 | if (!fs.existsSync(videoPath)) { 234 | throw new Error(`Video ${videoId} not found`); 235 | } 236 | return fs.readFileSync(videoPath); 237 | } 238 | 239 | private findMusic(videoDuration: number, tag?: MusicMoodEnum): MusicForVideo { 240 | const musicFiles = this.musicManager.musicList().filter((music) => { 241 | if (tag) { 242 | return music.mood === tag; 243 | } 244 | return true; 245 | }); 246 | return musicFiles[Math.floor(Math.random() * musicFiles.length)]; 247 | } 248 | 249 | public ListAvailableMusicTags(): MusicTag[] { 250 | const tags = new Set<MusicTag>(); 251 | this.musicManager.musicList().forEach((music) => { 252 | tags.add(music.mood as MusicTag); 253 | }); 254 | return Array.from(tags.values()); 255 | } 256 | 257 | public listAllVideos(): { id: string; status: VideoStatus }[] { 258 | const videos: { id: string; status: VideoStatus }[] = []; 259 | 260 | // Check if videos directory exists 261 | if (!fs.existsSync(this.config.videosDirPath)) { 262 | return videos; 263 | } 264 | 265 | // Read all files in the videos directory 266 | const files = fs.readdirSync(this.config.videosDirPath); 267 | 268 | // Filter for MP4 files and extract video IDs 269 | for (const file of files) { 270 | if (file.endsWith(".mp4")) { 271 | const videoId = file.replace(".mp4", ""); 272 | 273 | let status: VideoStatus = "ready"; 274 | const inQueue = this.queue.find((item) => item.id === videoId); 275 | if (inQueue) { 276 | status = "processing"; 277 | } 278 | 279 | videos.push({ id: videoId, status }); 280 | } 281 | } 282 | 283 | // Add videos that are in the queue but not yet rendered 284 | for (const queueItem of this.queue) { 285 | const existingVideo = videos.find((v) => v.id === queueItem.id); 286 | if (!existingVideo) { 287 | videos.push({ id: queueItem.id, status: "processing" }); 288 | } 289 | } 290 | 291 | return videos; 292 | } 293 | 294 | public ListAvailableVoices(): string[] { 295 | return this.kokoro.listAvailableVoices(); 296 | } 297 | } 298 | ```