# Directory Structure ``` ├── .gitignore ├── .python-version ├── LICENSE ├── package.json ├── pnpm-lock.yaml ├── pyproject.toml ├── README-CN.md ├── README.md ├── setup.sh ├── src │ ├── index.ts │ ├── Markdownify.ts │ ├── server.ts │ ├── tools.ts │ └── UVX.ts ├── tsconfig.json └── uv.lock ``` # Files -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- ``` 1 | 3.11 2 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Test and output files 11 | *.pdf 12 | *.docx 13 | *.md 14 | !README.md 15 | !README-CN.md 16 | output*.md 17 | *.bak 18 | 19 | # Diagnostic reports (https://nodejs.org/api/report.html) 20 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 21 | 22 | # Runtime data 23 | pids 24 | *.pid 25 | *.seed 26 | *.pid.lock 27 | 28 | # Directory for instrumented libs generated by jscoverage/JSCover 29 | lib-cov 30 | 31 | # Coverage directory used by tools like istanbul 32 | coverage 33 | *.lcov 34 | 35 | # nyc test coverage 36 | .nyc_output 37 | 38 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 39 | .grunt 40 | 41 | # Bower dependency directory (https://bower.io/) 42 | bower_components 43 | 44 | # node-waf configuration 45 | .lock-wscript 46 | 47 | # Compiled binary addons (https://nodejs.org/api/addons.html) 48 | build/Release 49 | 50 | # Dependency directories 51 | node_modules/ 52 | jspm_packages/ 53 | .venv/ 54 | 55 | # Snowpack dependency directory (https://snowpack.dev/) 56 | web_modules/ 57 | 58 | # TypeScript cache 59 | *.tsbuildinfo 60 | 61 | # Optional npm cache directory 62 | .npm 63 | 64 | # Optional eslint cache 65 | .eslintcache 66 | 67 | # Optional stylelint cache 68 | .stylelintcache 69 | 70 | # Microbundle cache 71 | .rpt2_cache/ 72 | .rts2_cache_cjs/ 73 | .rts2_cache_es/ 74 | .rts2_cache_umd/ 75 | 76 | # Optional REPL history 77 | .node_repl_history 78 | 79 | # Output of 'npm pack' 80 | *.tgz 81 | 82 | # Yarn Integrity file 83 | .yarn-integrity 84 | 85 | # dotenv environment variable files 86 | .env 87 | .env.development.local 88 | .env.test.local 89 | .env.production.local 90 | .env.local 91 | 92 | # Build output 93 | dist/ 94 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | # Markdownify MCP Server - UTF-8 Enhanced 2 | 3 | This is an enhanced version of the [original Markdownify MCP project](https://github.com/cursor-ai/markdownify-mcp), with improved UTF-8 encoding support and optimized handling of multilingual content. 4 | 5 | [中文文档](README-CN.md) 6 | 7 | ## Enhancements 8 | 9 | - Added comprehensive UTF-8 encoding support 10 | - Optimized handling of multilingual content 11 | - Fixed encoding issues on Windows systems 12 | - Improved error handling mechanisms 13 | 14 | ## Key Differences from Original Project 15 | 16 | 1. Enhanced Encoding Support: 17 | - Full UTF-8 support across all operations 18 | - Proper handling of Chinese, Japanese, Korean and other non-ASCII characters 19 | - Fixed Windows-specific encoding issues (cmd.exe and PowerShell compatibility) 20 | 21 | 2. Improved Error Handling: 22 | - Detailed error messages in both English and Chinese 23 | - Better exception handling for network issues 24 | - Graceful fallback mechanisms for conversion failures 25 | 26 | 3. Extended Functionality: 27 | - Added support for batch processing multiple files 28 | - Enhanced YouTube video transcript handling 29 | - Improved metadata extraction from various file formats 30 | - Better preservation of document formatting 31 | 32 | 4. Performance Optimizations: 33 | - Optimized memory usage for large file conversions 34 | - Faster processing of multilingual content 35 | - Reduced dependency conflicts 36 | 37 | 5. Better Development Experience: 38 | - Comprehensive debugging options 39 | - Detailed logging system 40 | - Environment-specific configuration support 41 | - Clear documentation in both English and Chinese 42 | 43 | ## Features 44 | 45 | Supports converting various file types to Markdown: 46 | - PDF files 47 | - Images (with metadata) 48 | - Audio (with transcription) 49 | - Word documents (DOCX) 50 | - Excel spreadsheets (XLSX) 51 | - PowerPoint presentations (PPTX) 52 | - Web content: 53 | - YouTube video transcripts 54 | - Search results 55 | - General web pages 56 | - Existing Markdown files 57 | 58 | ## Quick Start 59 | 60 | 1. Clone this repository: 61 | ```bash 62 | git clone https://github.com/JDJR2024/markdownify-mcp-utf8.git 63 | cd markdownify-mcp-utf8 64 | ``` 65 | 66 | 2. Install dependencies: 67 | ```bash 68 | pnpm install 69 | ``` 70 | Note: This will also install `uv` and related Python dependencies. 71 | 72 | 3. Build the project: 73 | ```bash 74 | pnpm run build 75 | ``` 76 | 77 | 4. Start the server: 78 | ```bash 79 | pnpm start 80 | ``` 81 | 82 | ## Requirements 83 | 84 | - Node.js 16.0 or higher 85 | - Python 3.8 or higher 86 | - pnpm package manager 87 | - Git 88 | 89 | ## Detailed Installation Guide 90 | 91 | ### 1. Environment Setup 92 | 93 | 1. Install Node.js: 94 | - Download from [Node.js official website](https://nodejs.org/) 95 | - Verify installation: `node --version` 96 | 97 | 2. Install pnpm: 98 | ```bash 99 | npm install -g pnpm 100 | pnpm --version 101 | ``` 102 | 103 | 3. Install Python: 104 | - Download from [Python official website](https://www.python.org/downloads/) 105 | - Ensure Python is added to PATH during installation 106 | - Verify installation: `python --version` 107 | 108 | 4. (Windows Only) Configure UTF-8 Support: 109 | ```bash 110 | # Set system-wide UTF-8 111 | setx PYTHONIOENCODING UTF-8 112 | # Set current session UTF-8 113 | set PYTHONIOENCODING=UTF-8 114 | # Enable UTF-8 in command prompt 115 | chcp 65001 116 | ``` 117 | 118 | ### 2. Project Setup 119 | 120 | 1. Clone the repository: 121 | ```bash 122 | git clone https://github.com/JDJR2024/markdownify-mcp-utf8.git 123 | cd markdownify-mcp-utf8 124 | ``` 125 | 126 | 2. Create and activate Python virtual environment: 127 | ```bash 128 | # Windows 129 | python -m venv .venv 130 | .venv\Scripts\activate 131 | 132 | # Linux/macOS 133 | python3 -m venv .venv 134 | source .venv/bin/activate 135 | ``` 136 | 137 | 3. Install project dependencies: 138 | ```bash 139 | # Install Node.js dependencies 140 | pnpm install 141 | 142 | # Install Python dependencies (will be handled by setup.sh) 143 | ./setup.sh 144 | ``` 145 | 146 | 4. Build the project: 147 | ```bash 148 | pnpm run build 149 | ``` 150 | 151 | ### 3. Verification 152 | 153 | 1. Start the server: 154 | ```bash 155 | pnpm start 156 | ``` 157 | 158 | 2. Test the installation: 159 | ```bash 160 | # Convert a web page 161 | python convert_utf8.py "https://example.com" 162 | 163 | # Convert a local file 164 | python convert_utf8.py "path/to/your/file.docx" 165 | ``` 166 | 167 | ## Usage Guide 168 | 169 | ### Basic Usage 170 | 171 | 1. Converting Web Pages: 172 | ```bash 173 | python convert_utf8.py "https://example.com" 174 | ``` 175 | The converted markdown will be saved as `converted_result.md` 176 | 177 | 2. Converting Local Files: 178 | ```bash 179 | # Convert DOCX 180 | python convert_utf8.py "document.docx" 181 | 182 | # Convert PDF 183 | python convert_utf8.py "document.pdf" 184 | 185 | # Convert PowerPoint 186 | python convert_utf8.py "presentation.pptx" 187 | 188 | # Convert Excel 189 | python convert_utf8.py "spreadsheet.xlsx" 190 | ``` 191 | 192 | 3. Converting YouTube Videos: 193 | ```bash 194 | python convert_utf8.py "https://www.youtube.com/watch?v=VIDEO_ID" 195 | ``` 196 | 197 | ### Advanced Usage 198 | 199 | 1. Environment Variables: 200 | ```bash 201 | # Set custom UV path 202 | export UV_PATH="/custom/path/to/uv" 203 | 204 | # Set custom output directory 205 | export MARKDOWN_OUTPUT_DIR="/custom/output/path" 206 | ``` 207 | 208 | 2. Batch Processing: 209 | Create a batch file (e.g., `convert_batch.txt`) with URLs or file paths: 210 | ```text 211 | https://example1.com 212 | https://example2.com 213 | file1.docx 214 | file2.pdf 215 | ``` 216 | Then run: 217 | ```bash 218 | while read -r line; do python convert_utf8.py "$line"; done < convert_batch.txt 219 | ``` 220 | 221 | ### Troubleshooting 222 | 223 | 1. Common Issues: 224 | - If you see encoding errors, ensure UTF-8 is properly set 225 | - For permission issues on Windows, run as Administrator 226 | - For Python path issues, ensure virtual environment is activated 227 | 228 | 2. Debugging: 229 | ```bash 230 | # Enable debug output 231 | export DEBUG=true 232 | python convert_utf8.py "your_file.docx" 233 | ``` 234 | 235 | ## Usage 236 | 237 | ### Command Line 238 | 239 | Convert web page to Markdown: 240 | ```bash 241 | python convert_utf8.py "https://example.com" 242 | ``` 243 | 244 | Convert local file: 245 | ```bash 246 | python convert_utf8.py "path/to/your/file.docx" 247 | ``` 248 | 249 | ### Desktop App Integration 250 | 251 | To integrate this server with a desktop app, add the following to your app's server configuration: 252 | 253 | ```js 254 | { 255 | "mcpServers": { 256 | "markdownify": { 257 | "command": "node", 258 | "args": [ 259 | "{ABSOLUTE_PATH}/dist/index.js" 260 | ], 261 | "env": { 262 | "UV_PATH": "/path/to/uv" 263 | } 264 | } 265 | } 266 | } 267 | ``` 268 | 269 | ## Troubleshooting 270 | 271 | 1. Encoding Issues 272 | - If you encounter character encoding issues, ensure the `PYTHONIOENCODING` environment variable is set to `utf-8` 273 | - Windows users may need to run `chcp 65001` to enable UTF-8 support 274 | 275 | 2. Permission Issues 276 | - Ensure you have sufficient file read/write permissions 277 | - On Windows, you may need to run as administrator 278 | 279 | ## Acknowledgments 280 | 281 | This project is based on the original work by Zach Caceres. Thanks to the original author for their outstanding contribution. 282 | 283 | ## License 284 | 285 | This project continues to be licensed under the MIT License. See the [LICENSE](LICENSE) file for details. 286 | 287 | ## Contributing 288 | 289 | Contributions are welcome! Before submitting a Pull Request, please: 290 | 1. Ensure your code follows the project's coding standards 291 | 2. Add necessary tests and documentation 292 | 3. Update relevant sections in the README 293 | 294 | ## Contact 295 | 296 | For issues or suggestions: 297 | 1. Submit an Issue: https://github.com/JDJR2024/markdownify-mcp-utf8/issues 298 | 2. Create a Pull Request: https://github.com/JDJR2024/markdownify-mcp-utf8/pulls 299 | 3. Email: [email protected] ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [project] 2 | name = "ocr" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | dependencies = [ 8 | "markitdown>=0.0.1a3", 9 | ] 10 | ``` -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | 3 | echo 'Installing Python dependencies for OCR...' 4 | echo 'Installing uv' 5 | curl -LsSf https://astral.sh/uv/install.sh | sh 6 | echo 'Using uv to install markitdown' 7 | uv sync 8 | echo 'Finished install Python dependencies' 9 | ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "Node16", 5 | "moduleResolution": "node16", 6 | "outDir": "./dist", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true 12 | }, 13 | "include": ["src/**/*.ts"], 14 | "exclude": ["node_modules", "dist", "src/**/*.test.ts"] 15 | } 16 | ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | #! /usr/bin/env node 2 | 3 | import { createServer } from "./server.js"; 4 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 5 | 6 | async function main() { 7 | const transport = new StdioServerTransport(); 8 | const server = createServer(); 9 | await server.connect(transport); 10 | } 11 | 12 | main().catch((error) => { 13 | console.error("Fatal error in main():", error); 14 | process.exit(1); 15 | }); 16 | ``` -------------------------------------------------------------------------------- /src/UVX.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { exec } from "child_process"; 2 | import { promisify } from "util"; 3 | const execAsync = promisify(exec); 4 | 5 | export default class UVX { 6 | uvxPath: string; 7 | 8 | constructor(uvxPath: string) { 9 | this.uvxPath = uvxPath; 10 | } 11 | 12 | get path() { 13 | return this.uvxPath; 14 | } 15 | 16 | static async setup() { 17 | // const { stdout: uvxPath, stderr } = await execAsync("which uvx", { 18 | // env: { 19 | // ...process.env, 20 | // }, 21 | // }); 22 | 23 | // if (stderr) { 24 | // throw new Error( 25 | // "uvx not found in path, you must install uvx before running this server", 26 | // ); 27 | // } 28 | 29 | // HACK ALERT! 30 | return new UVX("/Users/zachcaceres/.local/bin/uvx"); 31 | } 32 | 33 | async installDeps() { 34 | // This is a hack to make sure that markitdown is installed before it's called in the OCRProcessor 35 | try { 36 | await execAsync(`${this.uvxPath} markitdown example.pdf`); 37 | } catch { 38 | console.log("UVX markitdown should be ready now"); 39 | } 40 | } 41 | } 42 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "mcp-markdownify-server-utf8", 3 | "version": "0.0.1", 4 | "description": "MCP Markdownify Server with UTF-8 Support - Model Context Protocol Server for Converting Almost Anything to Markdown", 5 | "license": "MIT", 6 | "author": "quasimodo-XY (Based on work by @zcaceres)", 7 | "homepage": "https://github.com/JDJR2024/markdownify-mcp-utf8", 8 | "bugs": "https://github.com/JDJR2024/markdownify-mcp-utf8/issues", 9 | "type": "module", 10 | "bin": { 11 | "mcp-markdownify-server": "dist/index.js" 12 | }, 13 | "files": [ 14 | "dist" 15 | ], 16 | "scripts": { 17 | "build": "tsc && shx chmod +x dist/*.js", 18 | "prepare": "npm run build", 19 | "dev": "tsc --watch", 20 | "preinstall": "./setup.sh", 21 | "start": "node dist/index.js", 22 | "test": "bun test", 23 | "test:watch": "bun test --watch" 24 | }, 25 | "dependencies": { 26 | "@modelcontextprotocol/sdk": "1.0.1", 27 | "zod": "^3.24.1" 28 | }, 29 | "devDependencies": { 30 | "@types/node": "^22.9.3", 31 | "bun": "^1.1.41", 32 | "sdk": "link:@types/modelcontextprotocol/sdk", 33 | "shx": "^0.3.4", 34 | "ts-jest": "^29.2.5", 35 | "typescript": "^5.6.2" 36 | }, 37 | "keywords": [ 38 | "markdown", 39 | "converter", 40 | "utf8", 41 | "multilingual", 42 | "mcp", 43 | "model-context-protocol" 44 | ] 45 | } ``` -------------------------------------------------------------------------------- /src/Markdownify.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { exec } from "child_process"; 2 | import { promisify } from "util"; 3 | import path from "path"; 4 | import fs from "fs"; 5 | import os from "os"; 6 | import { fileURLToPath } from "url"; 7 | 8 | const execAsync = promisify(exec); 9 | 10 | const __filename = fileURLToPath(import.meta.url); 11 | const __dirname = path.dirname(__filename); 12 | 13 | export type MarkdownResult = { 14 | path: string; 15 | text: string; 16 | }; 17 | 18 | export class Markdownify { 19 | private static async _markitdown( 20 | filePath: string, 21 | projectRoot: string, 22 | uvPath: string, 23 | ): Promise<string> { 24 | const venvPath = path.join(projectRoot, ".venv"); 25 | const markitdownPath = path.join(venvPath, "Scripts", "markitdown.exe"); 26 | 27 | if (!fs.existsSync(markitdownPath)) { 28 | throw new Error("markitdown executable not found"); 29 | } 30 | 31 | const { stdout, stderr } = await execAsync( 32 | `${venvPath}\\Scripts\\activate.bat && ${markitdownPath} "${filePath}"`, 33 | ); 34 | 35 | if (stderr) { 36 | throw new Error(`Error executing command: ${stderr}`); 37 | } 38 | 39 | return stdout; 40 | } 41 | 42 | private static async saveToTempFile(content: string): Promise<string> { 43 | const tempOutputPath = path.join( 44 | os.tmpdir(), 45 | `markdown_output_${Date.now()}.md`, 46 | ); 47 | fs.writeFileSync(tempOutputPath, content); 48 | return tempOutputPath; 49 | } 50 | 51 | static async toMarkdown({ 52 | filePath, 53 | url, 54 | projectRoot = path.resolve(__dirname, ".."), 55 | uvPath = "~/.local/bin/uv", 56 | }: { 57 | filePath?: string; 58 | url?: string; 59 | projectRoot?: string; 60 | uvPath?: string; 61 | }): Promise<MarkdownResult> { 62 | try { 63 | let inputPath: string; 64 | let isTemporary = false; 65 | 66 | if (url) { 67 | const response = await fetch(url); 68 | const content = await response.text(); 69 | inputPath = await this.saveToTempFile(content); 70 | isTemporary = true; 71 | } else if (filePath) { 72 | inputPath = filePath; 73 | } else { 74 | throw new Error("Either filePath or url must be provided"); 75 | } 76 | 77 | const text = await this._markitdown(inputPath, projectRoot, uvPath); 78 | const outputPath = await this.saveToTempFile(text); 79 | 80 | if (isTemporary) { 81 | fs.unlinkSync(inputPath); 82 | } 83 | 84 | return { path: outputPath, text }; 85 | } catch (e: unknown) { 86 | if (e instanceof Error) { 87 | throw new Error(`Error processing to Markdown: ${e.message}`); 88 | } else { 89 | throw new Error("Error processing to Markdown: Unknown error occurred"); 90 | } 91 | } 92 | } 93 | 94 | static async get({ 95 | filePath, 96 | }: { 97 | filePath: string; 98 | }): Promise<MarkdownResult> { 99 | if (!fs.existsSync(filePath)) { 100 | throw new Error("File does not exist"); 101 | } 102 | 103 | const text = await fs.promises.readFile(filePath, "utf-8"); 104 | 105 | return { 106 | path: filePath, 107 | text: text, 108 | }; 109 | } 110 | } 111 | ``` -------------------------------------------------------------------------------- /src/server.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { z } from "zod"; 2 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 3 | import { 4 | CallToolRequestSchema, 5 | ListToolsRequestSchema, 6 | } from "@modelcontextprotocol/sdk/types.js"; 7 | import { Markdownify } from "./Markdownify.js"; 8 | import * as tools from "./tools.js"; 9 | import { CallToolRequest } from "@modelcontextprotocol/sdk/types.js"; 10 | 11 | const RequestPayloadSchema = z.object({ 12 | filepath: z.string().optional(), 13 | url: z.string().optional(), 14 | projectRoot: z.string().optional(), 15 | uvPath: z.string().optional(), 16 | }); 17 | 18 | export function createServer() { 19 | const server = new Server( 20 | { 21 | name: "mcp-markdownify-server", 22 | version: "0.1.0", 23 | }, 24 | { 25 | capabilities: { 26 | tools: {}, 27 | }, 28 | }, 29 | ); 30 | 31 | server.setRequestHandler(ListToolsRequestSchema, async () => { 32 | return { 33 | tools: Object.values(tools), 34 | }; 35 | }); 36 | 37 | server.setRequestHandler( 38 | CallToolRequestSchema, 39 | async (request: CallToolRequest) => { 40 | const { name, arguments: args } = request.params; 41 | 42 | const validatedArgs = RequestPayloadSchema.parse(args); 43 | 44 | try { 45 | let result; 46 | switch (name) { 47 | case tools.YouTubeToMarkdownTool.name: 48 | case tools.BingSearchResultToMarkdownTool.name: 49 | case tools.WebpageToMarkdownTool.name: 50 | if (!validatedArgs.url) { 51 | throw new Error("URL is required for this tool"); 52 | } 53 | result = await Markdownify.toMarkdown({ 54 | url: validatedArgs.url, 55 | projectRoot: validatedArgs.projectRoot, 56 | uvPath: validatedArgs.uvPath || process.env.UV_PATH, 57 | }); 58 | break; 59 | 60 | case tools.PDFToMarkdownTool.name: 61 | case tools.ImageToMarkdownTool.name: 62 | case tools.AudioToMarkdownTool.name: 63 | case tools.DocxToMarkdownTool.name: 64 | case tools.XlsxToMarkdownTool.name: 65 | case tools.PptxToMarkdownTool.name: 66 | if (!validatedArgs.filepath) { 67 | throw new Error("File path is required for this tool"); 68 | } 69 | result = await Markdownify.toMarkdown({ 70 | filePath: validatedArgs.filepath, 71 | projectRoot: validatedArgs.projectRoot, 72 | uvPath: validatedArgs.uvPath || process.env.UV_PATH, 73 | }); 74 | break; 75 | 76 | case tools.GetMarkdownFileTool.name: 77 | if (!validatedArgs.filepath) { 78 | throw new Error("File path is required for this tool"); 79 | } 80 | result = await Markdownify.get({ 81 | filePath: validatedArgs.filepath, 82 | }); 83 | break; 84 | 85 | default: 86 | throw new Error("Tool not found"); 87 | } 88 | 89 | return { 90 | content: [ 91 | { type: "text", text: `Output file: ${result.path}` }, 92 | { type: "text", text: `Converted content:` }, 93 | { type: "text", text: result.text }, 94 | ], 95 | isError: false, 96 | }; 97 | } catch (e) { 98 | if (e instanceof Error) { 99 | return { 100 | content: [{ type: "text", text: `Error: ${e.message}` }], 101 | isError: true, 102 | }; 103 | } else { 104 | console.error(e); 105 | return { 106 | content: [{ type: "text", text: `Error: Unknown error occurred` }], 107 | isError: true, 108 | }; 109 | } 110 | } 111 | }, 112 | ); 113 | 114 | return server; 115 | } 116 | ``` -------------------------------------------------------------------------------- /src/tools.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { ToolSchema } from "@modelcontextprotocol/sdk/types.js"; 2 | 3 | export const YouTubeToMarkdownTool = ToolSchema.parse({ 4 | name: "youtube-to-markdown", 5 | description: 6 | "Convert a YouTube video to markdown, including transcript if available", 7 | inputSchema: { 8 | type: "object", 9 | properties: { 10 | url: { 11 | type: "string", 12 | description: "URL of the YouTube video", 13 | }, 14 | }, 15 | required: ["url"], 16 | }, 17 | }); 18 | 19 | export const PDFToMarkdownTool = ToolSchema.parse({ 20 | name: "pdf-to-markdown", 21 | description: "Convert a PDF file to markdown", 22 | inputSchema: { 23 | type: "object", 24 | properties: { 25 | filepath: { 26 | type: "string", 27 | description: "Absolute path of the PDF file to convert", 28 | }, 29 | }, 30 | required: ["filepath"], 31 | }, 32 | }); 33 | 34 | export const BingSearchResultToMarkdownTool = ToolSchema.parse({ 35 | name: "bing-search-to-markdown", 36 | description: "Convert a Bing search results page to markdown", 37 | inputSchema: { 38 | type: "object", 39 | properties: { 40 | url: { 41 | type: "string", 42 | description: "URL of the Bing search results page", 43 | }, 44 | }, 45 | required: ["url"], 46 | }, 47 | }); 48 | 49 | export const WebpageToMarkdownTool = ToolSchema.parse({ 50 | name: "webpage-to-markdown", 51 | description: "Convert a webpage to markdown", 52 | inputSchema: { 53 | type: "object", 54 | properties: { 55 | url: { 56 | type: "string", 57 | description: "URL of the webpage to convert", 58 | }, 59 | }, 60 | required: ["url"], 61 | }, 62 | }); 63 | 64 | export const ImageToMarkdownTool = ToolSchema.parse({ 65 | name: "image-to-markdown", 66 | description: 67 | "Convert an image to markdown, including metadata and description", 68 | inputSchema: { 69 | type: "object", 70 | properties: { 71 | filepath: { 72 | type: "string", 73 | description: "Absolute path of the image file to convert", 74 | }, 75 | }, 76 | required: ["filepath"], 77 | }, 78 | }); 79 | 80 | export const AudioToMarkdownTool = ToolSchema.parse({ 81 | name: "audio-to-markdown", 82 | description: 83 | "Convert an audio file to markdown, including transcription if possible", 84 | inputSchema: { 85 | type: "object", 86 | properties: { 87 | filepath: { 88 | type: "string", 89 | description: "Absolute path of the audio file to convert", 90 | }, 91 | }, 92 | required: ["filepath"], 93 | }, 94 | }); 95 | 96 | export const DocxToMarkdownTool = ToolSchema.parse({ 97 | name: "docx-to-markdown", 98 | description: "Convert a DOCX file to markdown", 99 | inputSchema: { 100 | type: "object", 101 | properties: { 102 | filepath: { 103 | type: "string", 104 | description: "Absolute path of the DOCX file to convert", 105 | }, 106 | }, 107 | required: ["filepath"], 108 | }, 109 | }); 110 | 111 | export const XlsxToMarkdownTool = ToolSchema.parse({ 112 | name: "xlsx-to-markdown", 113 | description: "Convert an XLSX file to markdown", 114 | inputSchema: { 115 | type: "object", 116 | properties: { 117 | filepath: { 118 | type: "string", 119 | description: "Absolute path of the XLSX file to convert", 120 | }, 121 | }, 122 | required: ["filepath"], 123 | }, 124 | }); 125 | 126 | export const PptxToMarkdownTool = ToolSchema.parse({ 127 | name: "pptx-to-markdown", 128 | description: "Convert a PPTX file to markdown", 129 | inputSchema: { 130 | type: "object", 131 | properties: { 132 | filepath: { 133 | type: "string", 134 | description: "Absolute path of the PPTX file to convert", 135 | }, 136 | }, 137 | required: ["filepath"], 138 | }, 139 | }); 140 | 141 | export const GetMarkdownFileTool = ToolSchema.parse({ 142 | name: "get-markdown-file", 143 | description: "Get a markdown file by absolute file path", 144 | inputSchema: { 145 | type: "object", 146 | properties: { 147 | filepath: { 148 | type: "string", 149 | description: "Absolute path to file of markdown'd text", 150 | }, 151 | }, 152 | required: ["filepath"], 153 | }, 154 | }); 155 | ```