cablate/mcp-doc-forge # codebase.md

# Directory Structure

```
├── .gitignore
├── Dockerfile
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── smithery.yaml
├── src
│   ├── index.ts
│   ├── tools
│   │   ├── _index.ts
│   │   ├── documentReader.ts
│   │   ├── docxTools.ts
│   │   ├── excelTools.ts
│   │   ├── formatConverterPlus.ts
│   │   ├── htmlTools.ts
│   │   ├── pdfTools.ts
│   │   └── txtTools.ts
│   └── xhr-sync-worker.js
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
1 | dist/*
2 | node_modules/*
3 | CodeReview.md
4 | todo.md
5 | develop-prompt-plan.md
6 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | [![MseeP.ai Security Assessment Badge](https://mseep.net/pr/cablate-mcp-doc-forge-badge.png)](https://mseep.ai/app/cablate-mcp-doc-forge)
  2 | 
  3 | # Simple Document Processing MCP Server
  4 | [![smithery badge](https://smithery.ai/badge/@cablate/mcp-doc-forge)](https://smithery.ai/server/@cablate/mcp-doc-forge)
  5 | 
  6 | A powerful Model Context Protocol (MCP) server providing comprehensive document processing capabilities.
  7 | 
  8 | <a href="https://glama.ai/mcp/servers/pb9df6lnel"><img width="380" height="200" src="https://glama.ai/mcp/servers/pb9df6lnel/badge" alt="Simple Document Processing Server MCP server" /></a>
  9 | 
 10 | ## Features
 11 | 
 12 | ### Document Reader
 13 | - Read DOCX, PDF, TXT, HTML, CSV
 14 | 
 15 | ### Document Conversion
 16 | - DOCX to HTML/PDF conversion
 17 | - HTML to TXT/Markdown conversion
 18 | - PDF manipulation (merge, split)
 19 | 
 20 | ### Text Processing
 21 | - Multi-encoding transfer support (UTF-8, Big5, GBK)
 22 | - Text formatting and cleaning
 23 | - Text comparison and diff generation
 24 | - Text splitting by lines or delimiter
 25 | 
 26 | ### HTML Processing
 27 | - HTML cleaning and formatting
 28 | - Resource extraction (images, links, videos)
 29 | - Structure-preserving conversion
 30 | 
 31 | ## Installation
 32 | 
 33 | ### Installing via Smithery
 34 | 
 35 | To install Document Processing Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@cablate/mcp-doc-forge):
 36 | 
 37 | ```bash
 38 | npx -y @smithery/cli install @cablate/mcp-doc-forge --client claude
 39 | ```
 40 | 
 41 | ### Manual Installation
 42 | ```bash
 43 | npm install -g @cablate/mcp-doc-forge
 44 | ```
 45 | 
 46 | 
 47 | ## Usage
 48 | 
 49 | ### Cli
 50 | 
 51 | ```bash
 52 | mcp-doc-forge
 53 | ```
 54 | 
 55 | ### With [Dive Desktop](https://github.com/OpenAgentPlatform/Dive)
 56 | 
 57 | 1. Click "+ Add MCP Server" in Dive Desktop
 58 | 2. Copy and paste this configuration:
 59 | 
 60 | ```json
 61 | {
 62 |   "mcpServers": {
 63 |     "searxng": {
 64 |       "command": "npx",
 65 |       "args": [
 66 |         "-y",
 67 |         "@cablate/mcp-doc-forge"
 68 |       ],
 69 |       "enabled": true
 70 |     }
 71 |   }
 72 | }
 73 | ```
 74 | 
 75 | 3. Click "Save" to install the MCP server
 76 | 
 77 | ## License
 78 | 
 79 | MIT
 80 | 
 81 | ## Contributing
 82 | 
 83 | Welcome community participation and contributions! Here are ways to contribute:
 84 | 
 85 | - ⭐️ Star the project if you find it helpful
 86 | - 🐛 Submit Issues: Report problems or provide suggestions
 87 | - 🔧 Create Pull Requests: Submit code improvements
 88 | 
 89 | ## Contact
 90 | 
 91 | If you have any questions or suggestions, feel free to reach out:
 92 | 
 93 | - 📧 Email: [[email protected]](mailto:[email protected])
 94 | - 📧 GitHub: [CabLate](https://github.com/cablate/)
 95 | - 🤝 Collaboration: Welcome to discuss project cooperation
 96 | - 📚 Technical Guidance: Sincere welcome for suggestions and guidance
 97 | 
 98 | 
 99 | 
100 | 
```

--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     properties: {}
 9 |   commandFunction:
10 |     # A function that produces the CLI command to start the MCP on stdio.
11 |     |-
12 |     config => ({ command: 'node', args: ['dist/index.cjs'], env: {} })
13 | 
```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "strict": true,
 5 |     "esModuleInterop": true,
 6 |     "skipLibCheck": true,
 7 |     "forceConsistentCasingInFileNames": true,
 8 |     "resolveJsonModule": true,
 9 |     "outDir": "./dist",
10 |     "rootDir": "./src",
11 |     "moduleResolution": "NodeNext",
12 |     "module": "NodeNext",
13 |     "noImplicitAny": false
14 |   },
15 |   "exclude": ["node_modules"],
16 |   "include": ["src/**/*"]
17 | }
18 | 
```

--------------------------------------------------------------------------------
/src/xhr-sync-worker.js:
--------------------------------------------------------------------------------

```javascript
 1 | // This is a minimal implementation of a synchronous XMLHttpRequest worker
 2 | // It's needed by jsdom for synchronous XHR operations
 3 | 
 4 | self.onmessage = function(e) {
 5 |   const xhr = new XMLHttpRequest();
 6 |   xhr.open(e.data.method, e.data.url, false); // false = synchronous
 7 |   
 8 |   if (e.data.headers) {
 9 |     Object.keys(e.data.headers).forEach(function(key) {
10 |       xhr.setRequestHeader(key, e.data.headers[key]);
11 |     });
12 |   }
13 |   
14 |   try {
15 |     xhr.send(e.data.data || null);
16 |     self.postMessage({
17 |       status: xhr.status,
18 |       statusText: xhr.statusText,
19 |       headers: xhr.getAllResponseHeaders(),
20 |       response: xhr.response
21 |     });
22 |   } catch (error) {
23 |     self.postMessage({
24 |       error: error.message
25 |     });
26 |   }
27 | }; 
```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | # Use an official Node.js runtime as a parent image for the build
 3 | FROM node:22.12-alpine AS builder
 4 | 
 5 | # Set the working directory in the container
 6 | WORKDIR /app
 7 | 
 8 | # Copy the package.json and package-lock.json into the container
 9 | COPY package.json package-lock.json ./
10 | 
11 | # Install dependencies, skipping the `prepare` step to avoid running build scripts
12 | RUN npm install --ignore-scripts
13 | 
14 | # Copy the source code into the container
15 | COPY src ./src
16 | 
17 | # Build the project using the specified build command in the package.json
18 | RUN npm run build
19 | 
20 | # Use a smaller base image for the runtime
21 | FROM node:22.12-alpine AS runner
22 | 
23 | # Set the working directory in the container
24 | WORKDIR /app
25 | 
26 | # Copy the build artifacts from the builder stage
27 | COPY --from=builder /app/dist ./dist
28 | 
29 | # Copy the necessary node modules
30 | COPY --from=builder /app/node_modules ./node_modules
31 | 
32 | # Define environment variable
33 | ENV NODE_ENV=production
34 | 
35 | # Define the command to run the application
36 | ENTRYPOINT ["node", "dist/index.cjs"]
37 | 
```

--------------------------------------------------------------------------------
/src/tools/_index.ts:
--------------------------------------------------------------------------------

```typescript
 1 | import { DOCUMENT_READER_TOOL } from "./documentReader.js";
 2 | import { DOCX_TO_HTML_TOOL, DOCX_TO_PDF_TOOL } from "./docxTools.js";
 3 | import { EXCEL_READ_TOOL } from "./excelTools.js";
 4 | import { FORMAT_CONVERTER_TOOL } from "./formatConverterPlus.js";
 5 | import { HTML_CLEAN_TOOL, HTML_EXTRACT_RESOURCES_TOOL, HTML_FORMAT_TOOL, HTML_TO_MARKDOWN_TOOL, HTML_TO_TEXT_TOOL } from "./htmlTools.js";
 6 | import { PDF_MERGE_TOOL, PDF_SPLIT_TOOL } from "./pdfTools.js";
 7 | import { TEXT_DIFF_TOOL, TEXT_ENCODING_CONVERT_TOOL, TEXT_FORMAT_TOOL, TEXT_SPLIT_TOOL } from "./txtTools.js";
 8 | 
 9 | export const tools = [DOCUMENT_READER_TOOL, PDF_MERGE_TOOL, PDF_SPLIT_TOOL, DOCX_TO_PDF_TOOL, DOCX_TO_HTML_TOOL, HTML_CLEAN_TOOL, HTML_TO_TEXT_TOOL, HTML_TO_MARKDOWN_TOOL, HTML_EXTRACT_RESOURCES_TOOL, HTML_FORMAT_TOOL, TEXT_DIFF_TOOL, TEXT_SPLIT_TOOL, TEXT_FORMAT_TOOL, TEXT_ENCODING_CONVERT_TOOL, EXCEL_READ_TOOL, FORMAT_CONVERTER_TOOL];
10 | 
11 | export * from "./documentReader.js";
12 | export * from "./docxTools.js";
13 | export * from "./excelTools.js";
14 | export * from "./formatConverterPlus.js";
15 | export * from "./htmlTools.js";
16 | export * from "./pdfTools.js";
17 | export * from "./txtTools.js";
18 | 
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "@cablate/mcp-doc-forge",
 3 |   "version": "0.0.8",
 4 |   "type": "module",
 5 |   "description": "MCP server that provides doc forge capabilities",
 6 |   "main": "dist/index.cjs",
 7 |   "license": "MIT",
 8 |   "scripts": {
 9 |     "build": "esbuild src/index.ts --bundle --platform=node --outfile=dist/index.cjs --external:jsdom && shx chmod +x dist/index.cjs",
10 |     "start": "node dist/index.cjs",
11 |     "dev": "ts-node src/index.ts"
12 |   },
13 |   "dependencies": {
14 |     "@modelcontextprotocol/sdk": "^1.0.4",
15 |     "csv-parse": "^5.6.0",
16 |     "diff": "^5.1.0",
17 |     "docxtemplater": "^3.42.0",
18 |     "epub": "^1.2.1",
19 |     "exceljs": "^4.4.0",
20 |     "iconv-lite": "^0.6.3",
21 |     "image-size": "^1.1.1",
22 |     "jsdom": "^25.0.1",
23 |     "libreoffice-convert": "^1.6.0",
24 |     "mammoth": "^1.6.0",
25 |     "marked": "^15.0.7",
26 |     "pdf-lib": "^1.17.1",
27 |     "pdf2pic": "^3.1.3",
28 |     "pdfreader": "^3.0.6",
29 |     "pizzip": "^3.1.4",
30 |     "sharp": "^0.33.2",
31 |     "turndown": "^7.2.0",
32 |     "unzipper": "^0.12.3",
33 |     "util": "^0.12.5",
34 |     "xml2js": "^0.6.2"
35 |   },
36 |   "devDependencies": {
37 |     "@modelcontextprotocol/sdk": "^1.0.4",
38 |     "@types/jsdom": "^21.1.7",
39 |     "@types/node": "^20.17.10",
40 |     "esbuild": "^0.20.2",
41 |     "shx": "^0.3.4",
42 |     "ts-node": "^10.9.2",
43 |     "typescript": "^5.0.0"
44 |   },
45 |   "author": "CabLate",
46 |   "files": [
47 |     "dist",
48 |     "dist/**/*.map",
49 |     "README.md"
50 |   ],
51 |   "bin": {
52 |     "mcp-doc-forge": "./dist/index.cjs"
53 |   },
54 |   "keywords": [
55 |     "mcp",
56 |     "mcp-server",
57 |     "doc-forge",
58 |     "document",
59 |     "pdf",
60 |     "docx",
61 |     "txt",
62 |     "html",
63 |     "csv",
64 |     "ai",
65 |     "dive"
66 |   ],
67 |   "homepage": "https://github.com/cablate/mcp-doc-forge#readme",
68 |   "repository": {
69 |     "type": "git",
70 |     "url": "git+https://github.com/cablate/mcp-doc-forge.git"
71 |   },
72 |   "bugs": {
73 |     "url": "https://github.com/cablate/mcp-doc-forge/issues"
74 |   }
75 | }
76 | 
```

--------------------------------------------------------------------------------
/src/tools/documentReader.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { Tool } from "@modelcontextprotocol/sdk/types.js";
  2 | import { parse } from "csv-parse";
  3 | import * as fs from "fs/promises";
  4 | import { JSDOM } from "jsdom";
  5 | import mammoth from "mammoth";
  6 | import * as path from "path";
  7 | import { Item, ItemHandler, PdfReader } from "pdfreader";
  8 | 
  9 | export const DOCUMENT_READER_TOOL: Tool = {
 10 |   name: "document_reader",
 11 |   description:
 12 |     "Read content from non-image document-files at specified paths, supporting various file formats: .pdf, .docx, .txt, .html, .csv",
 13 |   inputSchema: {
 14 |     type: "object",
 15 |     properties: {
 16 |       filePath: {
 17 |         type: "string",
 18 |         description: "Path to the file to be read",
 19 |       },
 20 |     },
 21 |     required: ["filePath"],
 22 |   },
 23 | };
 24 | 
 25 | export interface FileReaderArgs {
 26 |   filePath: string;
 27 | }
 28 | 
 29 | export function isFileReaderArgs(args: unknown): args is FileReaderArgs {
 30 |   return (
 31 |     typeof args === "object" &&
 32 |     args !== null &&
 33 |     "filePath" in args &&
 34 |     typeof (args as FileReaderArgs).filePath === "string"
 35 |   );
 36 | }
 37 | 
 38 | async function readTextFile(filePath: string): Promise<string> {
 39 |   return await fs.readFile(filePath, "utf-8");
 40 | }
 41 | 
 42 | async function readPDFFile(filePath: string): Promise<string> {
 43 |   const buffer = await fs.readFile(filePath);
 44 | 
 45 |   return new Promise((resolve, reject) => {
 46 |     let content = "";
 47 |     const reader = new PdfReader();
 48 | 
 49 |     reader.parseBuffer(buffer, ((err: null | Error, item: Item | undefined) => {
 50 |       if (err) {
 51 |         reject(err);
 52 |       } else if (!item) {
 53 |         resolve(content);
 54 |       } else if (item.text) {
 55 |         content += item.text + " ";
 56 |       }
 57 |     }) as ItemHandler);
 58 |   });
 59 | }
 60 | 
 61 | async function readDocxFile(filePath: string): Promise<string> {
 62 |   const buffer = await fs.readFile(filePath);
 63 |   const result = await mammoth.extractRawText({ buffer });
 64 |   return result.value;
 65 | }
 66 | 
 67 | async function readCSVFile(filePath: string): Promise<string> {
 68 |   const content = await fs.readFile(filePath, "utf-8");
 69 |   return new Promise((resolve, reject) => {
 70 |     parse(content, (err, records) => {
 71 |       if (err) reject(err);
 72 |       resolve(JSON.stringify(records));
 73 |     });
 74 |   });
 75 | }
 76 | 
 77 | async function readHTMLFile(filePath: string): Promise<string> {
 78 |   const content = await fs.readFile(filePath, "utf-8");
 79 |   const dom = new JSDOM(content);
 80 |   return dom.window.document.body.textContent || "";
 81 | }
 82 | 
 83 | export async function readFile(filePath: string) {
 84 |   try {
 85 |     const ext = path.extname(filePath).toLowerCase();
 86 |     let content: string;
 87 | 
 88 |     switch (ext) {
 89 |       case ".pdf":
 90 |         content = await readPDFFile(filePath);
 91 |         break;
 92 |       case ".docx":
 93 |         content = await readDocxFile(filePath);
 94 |         break;
 95 |       case ".txt":
 96 |         content = await readTextFile(filePath);
 97 |         break;
 98 |       case ".html":
 99 |         content = await readHTMLFile(filePath);
100 |         break;
101 |       case ".csv":
102 |         content = await readCSVFile(filePath);
103 |         break;
104 |       default:
105 |         throw new Error(`Unsupported file format: ${ext}`);
106 |     }
107 | 
108 |     return {
109 |       success: true,
110 |       data: content,
111 |     };
112 |   } catch (error) {
113 |     return {
114 |       success: false,
115 |       error: error instanceof Error ? error.message : "Unknown error",
116 |     };
117 |   }
118 | } 
```

--------------------------------------------------------------------------------
/src/tools/docxTools.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { Tool } from "@modelcontextprotocol/sdk/types.js";
  2 | import { randomBytes } from "crypto";
  3 | import * as fs from "fs/promises";
  4 | import mammoth from "mammoth";
  5 | import * as path from "path";
  6 | // @ts-ignore
  7 | import { convert } from "libreoffice-convert";
  8 | import { promisify } from "util";
  9 | 
 10 | function generateUniqueId(): string {
 11 |   return randomBytes(9).toString("hex");
 12 | }
 13 | 
 14 | // DOCX 轉 HTML 工具
 15 | export const DOCX_TO_HTML_TOOL: Tool = {
 16 |   name: "docx_to_html",
 17 |   description: "Convert DOCX to HTML while preserving formatting",
 18 |   inputSchema: {
 19 |     type: "object",
 20 |     properties: {
 21 |       inputPath: {
 22 |         type: "string",
 23 |         description: "Path to the input DOCX file",
 24 |       },
 25 |       outputDir: {
 26 |         type: "string",
 27 |         description: "Directory where HTML should be saved",
 28 |       },
 29 |     },
 30 |     required: ["inputPath", "outputDir"],
 31 |   },
 32 | };
 33 | 
 34 | // DOCX 轉 PDF 工具
 35 | export const DOCX_TO_PDF_TOOL: Tool = {
 36 |   name: "docx_to_pdf",
 37 |   description: "Convert DOCX files to PDF format",
 38 |   inputSchema: {
 39 |     type: "object",
 40 |     properties: {
 41 |       inputPath: {
 42 |         type: "string",
 43 |         description: "Path to the input DOCX file",
 44 |       },
 45 |       outputPath: {
 46 |         type: "string",
 47 |         description: "Path where the output PDF file should be saved",
 48 |       },
 49 |     },
 50 |     required: ["inputPath", "outputPath"],
 51 |   },
 52 | };
 53 | export interface DocxToPdfArgs {
 54 |   inputPath: string;
 55 |   outputPath: string;
 56 | }
 57 | 
 58 | // DOCX 轉 HTML 實作
 59 | export async function docxToHtml(inputPath: string, outputDir: string) {
 60 |   try {
 61 |     console.error(`Starting DOCX to HTML conversion...`);
 62 |     console.error(`Input file: ${inputPath}`);
 63 |     console.error(`Output directory: ${outputDir}`);
 64 | 
 65 |     // 確保輸出目錄存在
 66 |     try {
 67 |       await fs.access(outputDir);
 68 |       console.error(`Output directory exists: ${outputDir}`);
 69 |     } catch {
 70 |       console.error(`Creating output directory: ${outputDir}`);
 71 |       await fs.mkdir(outputDir, { recursive: true });
 72 |       console.error(`Created output directory: ${outputDir}`);
 73 |     }
 74 | 
 75 |     const uniqueId = generateUniqueId();
 76 |     const buffer = await fs.readFile(inputPath);
 77 | 
 78 |     const result = await mammoth.convertToHtml({ buffer });
 79 |     console.error(
 80 |       `Conversion completed with ${result.messages.length} messages`
 81 |     );
 82 | 
 83 |     const outputPath = path.join(outputDir, `converted_${uniqueId}.html`);
 84 |     await fs.writeFile(outputPath, result.value);
 85 |     console.error(`Written HTML to ${outputPath}`);
 86 | 
 87 |     return {
 88 |       success: true,
 89 |       data: `Successfully converted DOCX to HTML: ${outputPath}`,
 90 |     };
 91 |   } catch (error) {
 92 |     console.error(`Error in docxToHtml:`, error);
 93 |     return {
 94 |       success: false,
 95 |       error: error instanceof Error ? error.message : "Unknown error",
 96 |     };
 97 |   }
 98 | }
 99 | 
100 | // DOCX 轉 PDF 實作
101 | export function isDocxToPdfArgs(args: unknown): args is DocxToPdfArgs {
102 |   return (
103 |     typeof args === "object" &&
104 |     args !== null &&
105 |     "inputPath" in args &&
106 |     "outputPath" in args &&
107 |     typeof (args as DocxToPdfArgs).inputPath === "string" &&
108 |     typeof (args as DocxToPdfArgs).outputPath === "string"
109 |   );
110 | }
111 | const convertAsyncPromise = promisify(convert);
112 | export async function convertDocxToPdf(inputPath: string, outputPath: string) {
113 |   try {
114 |     const ext = path.extname(inputPath).toLowerCase();
115 |     if (ext !== ".docx") {
116 |       throw new Error("Input file must be a .docx file");
117 |     }
118 | 
119 |     if (path.extname(outputPath).toLowerCase() !== ".pdf") {
120 |       throw new Error("Output file must have .pdf extension");
121 |     }
122 | 
123 |     const docxBuffer = await fs.readFile(inputPath);
124 |     const pdfBuffer = await convertAsyncPromise(docxBuffer, ".pdf", undefined);
125 |     await fs.writeFile(outputPath, pdfBuffer);
126 | 
127 |     return {
128 |       success: true,
129 |       data: `Successfully converted ${inputPath} to ${outputPath}`,
130 |     };
131 |   } catch (error) {
132 |     return {
133 |       success: false,
134 |       error: error instanceof Error ? error.message : "Unknown error",
135 |     };
136 |   }
137 | }
138 | 
```

--------------------------------------------------------------------------------
/src/tools/formatConverterPlus.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { Tool } from "@modelcontextprotocol/sdk/types.js";
  2 | import { marked } from "marked";
  3 | import * as xml2js from "xml2js";
  4 | 
  5 | /**
  6 |  * Supported format types for conversion
  7 |  */
  8 | export enum FormatType {
  9 |   MARKDOWN = "markdown",
 10 |   HTML = "html",
 11 |   XML = "xml",
 12 |   JSON = "json",
 13 | }
 14 | 
 15 | /**
 16 |  * Format converter tool
 17 |  */
 18 | export const FORMAT_CONVERTER_TOOL: Tool = {
 19 |   name: "format_convert",
 20 |   description: "Convert between different document formats (Markdown, HTML, XML, JSON)",
 21 |   inputSchema: {
 22 |     type: "object",
 23 |     properties: {
 24 |       input: {
 25 |         type: "string",
 26 |         description: "Input content to convert",
 27 |       },
 28 |       fromFormat: {
 29 |         type: "string",
 30 |         enum: Object.values(FormatType),
 31 |         description: "Source format",
 32 |       },
 33 |       toFormat: {
 34 |         type: "string",
 35 |         enum: Object.values(FormatType),
 36 |         description: "Target format",
 37 |       },
 38 |     },
 39 |     required: ["input", "fromFormat", "toFormat"],
 40 |   },
 41 | };
 42 | 
 43 | export interface FormatConverterArgs {
 44 |   input: string;
 45 |   fromFormat: FormatType;
 46 |   toFormat: FormatType;
 47 | }
 48 | 
 49 | /**
 50 |  * Type check function
 51 |  */
 52 | export function isFormatConverterArgs(args: unknown): args is FormatConverterArgs {
 53 |   return typeof args === "object" && args !== null && "input" in args && "fromFormat" in args && "toFormat" in args && typeof (args as FormatConverterArgs).input === "string" && Object.values(FormatType).includes((args as FormatConverterArgs).fromFormat) && Object.values(FormatType).includes((args as FormatConverterArgs).toFormat);
 54 | }
 55 | 
 56 | // XML 解析器和建構器
 57 | const xmlParser = new xml2js.Parser();
 58 | const xmlBuilder = new xml2js.Builder();
 59 | 
 60 | /**
 61 |  * Converts Markdown to HTML
 62 |  */
 63 | async function markdownToHtml(input: string): Promise<string> {
 64 |   return marked(input);
 65 | }
 66 | 
 67 | /**
 68 |  * Converts XML to JSON
 69 |  */
 70 | async function xmlToJson(input: string): Promise<string> {
 71 |   try {
 72 |     const result = await xmlParser.parseStringPromise(input);
 73 |     return JSON.stringify(result, null, 2);
 74 |   } catch (error: unknown) {
 75 |     const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
 76 |     throw new Error(`Failed to parse XML: ${errorMessage}`);
 77 |   }
 78 | }
 79 | 
 80 | /**
 81 |  * Converts JSON to XML
 82 |  */
 83 | function jsonToXml(input: string): string {
 84 |   try {
 85 |     const obj = JSON.parse(input);
 86 |     return xmlBuilder.buildObject(obj);
 87 |   } catch (error: unknown) {
 88 |     const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
 89 |     throw new Error(`Failed to parse JSON: ${errorMessage}`);
 90 |   }
 91 | }
 92 | 
 93 | /**
 94 |  * Converts content from one format to another
 95 |  * @param input Input content to convert
 96 |  * @param fromFormat Source format
 97 |  * @param toFormat Target format
 98 |  * @returns Promise resolving to the converted content
 99 |  */
100 | export async function convertFormat(input: string, fromFormat: FormatType, toFormat: FormatType) {
101 |   try {
102 |     console.log(`Converting from ${fromFormat} to ${toFormat}`);
103 | 
104 |     // Validate formats
105 |     if (!Object.values(FormatType).includes(fromFormat)) {
106 |       return {
107 |         success: false,
108 |         error: `Unsupported source format: ${fromFormat}`,
109 |       };
110 |     }
111 |     if (!Object.values(FormatType).includes(toFormat)) {
112 |       return {
113 |         success: false,
114 |         error: `Unsupported target format: ${toFormat}`,
115 |       };
116 |     }
117 | 
118 |     // Handle different conversion paths
119 |     let result: string;
120 |     switch (`${fromFormat}-${toFormat}`) {
121 |       case `${FormatType.MARKDOWN}-${FormatType.HTML}`:
122 |         result = await markdownToHtml(input);
123 |         break;
124 |       case `${FormatType.HTML}-${FormatType.MARKDOWN}`:
125 |         return {
126 |           success: false,
127 |           error: "HTML to Markdown conversion is not supported yet",
128 |         };
129 |       case `${FormatType.XML}-${FormatType.JSON}`:
130 |         result = await xmlToJson(input);
131 |         break;
132 |       case `${FormatType.JSON}-${FormatType.XML}`:
133 |         result = jsonToXml(input);
134 |         break;
135 |       default:
136 |         return {
137 |           success: false,
138 |           error: `Unsupported conversion path: ${fromFormat} to ${toFormat}`,
139 |         };
140 |     }
141 | 
142 |     return {
143 |       success: true,
144 |       data: result,
145 |     };
146 |   } catch (error: unknown) {
147 |     const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
148 |     console.error(`Error converting format: ${errorMessage}`);
149 |     return {
150 |       success: false,
151 |       error: errorMessage,
152 |     };
153 |   }
154 | }
155 | 
```

--------------------------------------------------------------------------------
/src/tools/excelTools.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { Tool } from "@modelcontextprotocol/sdk/types.js";
  2 | import * as ExcelJS from "exceljs";
  3 | import * as fs from "fs";
  4 | import * as path from "path";
  5 | 
  6 | /**
  7 |  * Interface for Excel file processing options
  8 |  */
  9 | interface ExcelProcessOptions {
 10 |   sheetName?: string;
 11 |   includeHeaders?: boolean;
 12 | }
 13 | 
 14 | // Excel 讀取工具
 15 | export const EXCEL_READ_TOOL: Tool = {
 16 |   name: "excel_read",
 17 |   description: "Read Excel file and convert to JSON format while preserving structure",
 18 |   inputSchema: {
 19 |     type: "object",
 20 |     properties: {
 21 |       inputPath: {
 22 |         type: "string",
 23 |         description: "Path to the input Excel file",
 24 |       },
 25 |       includeHeaders: {
 26 |         type: "boolean",
 27 |         description: "Whether to include headers in the output",
 28 |         default: true,
 29 |       },
 30 |     },
 31 |     required: ["inputPath"],
 32 |   },
 33 | };
 34 | 
 35 | export interface ExcelReadArgs {
 36 |   inputPath: string;
 37 |   includeHeaders?: boolean;
 38 | }
 39 | 
 40 | // 類型檢查函數
 41 | export function isExcelReadArgs(args: unknown): args is ExcelReadArgs {
 42 |   return typeof args === "object" && args !== null && "inputPath" in args && typeof (args as ExcelReadArgs).inputPath === "string" && (typeof (args as ExcelReadArgs).includeHeaders === "undefined" || typeof (args as ExcelReadArgs).includeHeaders === "boolean");
 43 | }
 44 | 
 45 | /**
 46 |  * Class for handling Excel file operations
 47 |  */
 48 | export class ExcelTools {
 49 |   /**
 50 |    * Reads an Excel file and returns its content as JSON
 51 |    * @param filePath Path to the Excel file
 52 |    * @param options Processing options
 53 |    * @returns Promise resolving to the parsed Excel data
 54 |    */
 55 |   public static async readExcelFile(filePath: string, options: ExcelProcessOptions = { includeHeaders: true }): Promise<any> {
 56 |     try {
 57 |       // Verify file exists
 58 |       if (!fs.existsSync(filePath)) {
 59 |         throw new Error(`File not found: ${filePath}`);
 60 |       }
 61 | 
 62 |       // Verify file extension
 63 |       const ext = path.extname(filePath).toLowerCase();
 64 |       if (ext !== ".xlsx" && ext !== ".xls") {
 65 |         throw new Error(`Unsupported file format: ${ext}`);
 66 |       }
 67 | 
 68 |       console.log(`Reading Excel file: ${filePath}`);
 69 |       const workbook = new ExcelJS.Workbook();
 70 |       await workbook.xlsx.readFile(filePath);
 71 | 
 72 |       const result: any = {};
 73 | 
 74 |       workbook.worksheets.forEach((worksheet) => {
 75 |         const sheetName = worksheet.name;
 76 |         const rows: any[] = [];
 77 | 
 78 |         worksheet.eachRow((row, rowNumber) => {
 79 |           const rowData: any = {};
 80 |           row.eachCell((cell, colNumber) => {
 81 |             if (options.includeHeaders && rowNumber === 1) {
 82 |               // Handle headers
 83 |               rows.push(cell.value);
 84 |             } else {
 85 |               // Handle data rows
 86 |               rowData[colNumber] = cell.value;
 87 |             }
 88 |           });
 89 |           if (rowNumber > 1 || !options.includeHeaders) {
 90 |             rows.push(rowData);
 91 |           }
 92 |         });
 93 | 
 94 |         result[sheetName] = rows;
 95 |       });
 96 | 
 97 |       console.log(`Successfully parsed Excel file: ${filePath}`);
 98 |       return result;
 99 |     } catch (error: any) {
100 |       console.error(`Error processing Excel file: ${error.message}`);
101 |       throw error;
102 |     }
103 |   }
104 | }
105 | 
106 | // Excel 讀取實作
107 | export async function readExcelFile(inputPath: string, includeHeaders: boolean = true) {
108 |   try {
109 |     // 驗證檔案存在
110 |     if (!fs.existsSync(inputPath)) {
111 |       return {
112 |         success: false,
113 |         error: `File not found: ${inputPath}`,
114 |       };
115 |     }
116 | 
117 |     // 驗證檔案副檔名
118 |     const ext = path.extname(inputPath).toLowerCase();
119 |     if (ext !== ".xlsx" && ext !== ".xls") {
120 |       return {
121 |         success: false,
122 |         error: `Unsupported file format: ${ext}`,
123 |       };
124 |     }
125 | 
126 |     console.log(`Reading Excel file: ${inputPath}`);
127 |     const workbook = new ExcelJS.Workbook();
128 |     await workbook.xlsx.readFile(inputPath);
129 | 
130 |     const result: Record<string, any[]> = {};
131 | 
132 |     workbook.worksheets.forEach((worksheet) => {
133 |       const sheetName = worksheet.name;
134 |       const rows: any[] = [];
135 | 
136 |       worksheet.eachRow((row, rowNumber) => {
137 |         const rowData: Record<number, any> = {};
138 |         row.eachCell((cell, colNumber) => {
139 |           if (includeHeaders && rowNumber === 1) {
140 |             rows.push(cell.value);
141 |           } else {
142 |             rowData[colNumber] = cell.value;
143 |           }
144 |         });
145 |         if (rowNumber > 1 || !includeHeaders) {
146 |           rows.push(rowData);
147 |         }
148 |       });
149 | 
150 |       result[sheetName] = rows;
151 |     });
152 | 
153 |     console.log(`Successfully parsed Excel file: ${inputPath}`);
154 |     return {
155 |       success: true,
156 |       data: result,
157 |     };
158 |   } catch (error: unknown) {
159 |     const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
160 |     console.error(`Error processing Excel file: ${errorMessage}`);
161 |     return {
162 |       success: false,
163 |       error: errorMessage,
164 |     };
165 |   }
166 | }
167 | 
```

--------------------------------------------------------------------------------
/src/tools/pdfTools.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { Tool } from "@modelcontextprotocol/sdk/types.js";
  2 | import { randomBytes } from "crypto";
  3 | import * as fs from "fs/promises";
  4 | import * as path from "path";
  5 | import { PDFDocument } from "pdf-lib";
  6 | import { fromPath } from "pdf2pic";
  7 | 
  8 | function generateUniqueId(): string {
  9 |   return randomBytes(9).toString("hex");
 10 | }
 11 | 
 12 | // PDF 合併工具
 13 | export const PDF_MERGE_TOOL: Tool = {
 14 |   name: "pdf_merger",
 15 |   description: "Merge multiple PDF files into one",
 16 |   inputSchema: {
 17 |     type: "object",
 18 |     properties: {
 19 |       inputPaths: {
 20 |         type: "array",
 21 |         items: { type: "string" },
 22 |         description: "Paths to the input PDF files",
 23 |       },
 24 |       outputDir: {
 25 |         type: "string",
 26 |         description: "Directory where merged PDFs should be saved",
 27 |       },
 28 |     },
 29 |     required: ["inputPaths", "outputDir"],
 30 |   },
 31 | };
 32 | 
 33 | // PDF 分割工具
 34 | export const PDF_SPLIT_TOOL: Tool = {
 35 |   name: "pdf_splitter",
 36 |   description: "Split a PDF file into multiple files",
 37 |   inputSchema: {
 38 |     type: "object",
 39 |     properties: {
 40 |       inputPath: {
 41 |         type: "string",
 42 |         description: "Path to the input PDF file",
 43 |       },
 44 |       outputDir: {
 45 |         type: "string",
 46 |         description: "Directory where split PDFs should be saved",
 47 |       },
 48 |       pageRanges: {
 49 |         type: "array",
 50 |         items: {
 51 |           type: "object",
 52 |           properties: {
 53 |             start: { type: "number" },
 54 |             end: { type: "number" },
 55 |           },
 56 |         },
 57 |         description: "Array of page ranges to split",
 58 |       },
 59 |     },
 60 |     required: ["inputPath", "outputDir", "pageRanges"],
 61 |   },
 62 | };
 63 | 
 64 | // 實作函數
 65 | export async function mergePDFs(inputPaths: string[], outputDir: string) {
 66 |   try {
 67 |     console.error(`Starting PDF merge operation...`);
 68 |     console.error(`Input files:`, inputPaths);
 69 |     console.error(`Output directory: ${outputDir}`);
 70 | 
 71 |     // 確保輸出目錄存在
 72 |     try {
 73 |       await fs.access(outputDir);
 74 |       console.error(`Output directory exists: ${outputDir}`);
 75 |     } catch {
 76 |       console.error(`Creating output directory: ${outputDir}`);
 77 |       await fs.mkdir(outputDir, { recursive: true });
 78 |       console.error(`Created output directory: ${outputDir}`);
 79 |     }
 80 | 
 81 |     const uniqueId = generateUniqueId();
 82 |     console.error(`Generated unique ID for this batch: ${uniqueId}`);
 83 | 
 84 |     // 修改輸出檔案名稱，加入 uniqueId
 85 |     const outputPath = path.join(outputDir, `merged_${uniqueId}.pdf`);
 86 |     console.error(`New output path with unique ID: ${outputPath}`);
 87 | 
 88 |     const mergedPdf = await PDFDocument.create();
 89 | 
 90 |     for (const filePath of inputPaths) {
 91 |       console.error(`Processing input file: ${filePath}`);
 92 |       const pdfBytes = await fs.readFile(filePath);
 93 |       console.error(`Read ${pdfBytes.length} bytes from ${filePath}`);
 94 | 
 95 |       const pdf = await PDFDocument.load(pdfBytes);
 96 |       const pageCount = pdf.getPageCount();
 97 |       console.error(`Loaded PDF with ${pageCount} pages from ${filePath}`);
 98 | 
 99 |       const copiedPages = await mergedPdf.copyPages(pdf, pdf.getPageIndices());
100 |       console.error(`Copied ${copiedPages.length} pages from ${filePath}`);
101 | 
102 |       copiedPages.forEach((page, index) => {
103 |         mergedPdf.addPage(page);
104 |         console.error(`Added page ${index + 1} from ${filePath}`);
105 |       });
106 |     }
107 | 
108 |     const mergedPdfBytes = await mergedPdf.save();
109 |     console.error(`Generated merged PDF: ${mergedPdfBytes.length} bytes`);
110 | 
111 |     await fs.writeFile(outputPath, mergedPdfBytes);
112 |     console.error(`Successfully wrote merged PDF to ${outputPath}`);
113 | 
114 |     return {
115 |       success: true,
116 |       data: `Successfully merged ${inputPaths.length} PDFs into ${outputPath}`,
117 |     };
118 |   } catch (error) {
119 |     console.error(`Error in mergePDFs:`);
120 |     console.error(error);
121 |     if (error instanceof Error) {
122 |       console.error(`Error name: ${error.name}`);
123 |       console.error(`Error message: ${error.message}`);
124 |       console.error(`Error stack: ${error.stack}`);
125 |     }
126 |     return {
127 |       success: false,
128 |       error: error instanceof Error ? error.message : "Unknown error",
129 |     };
130 |   }
131 | }
132 | 
133 | export async function splitPDF(
134 |   inputPath: string,
135 |   outputDir: string,
136 |   pageRanges: Array<{ start: number; end: number }>
137 | ) {
138 |   try {
139 |     console.error(`Starting PDF split operation...`);
140 |     console.error(`Input file: ${inputPath}`);
141 |     console.error(`Output directory: ${outputDir}`);
142 |     console.error(`Page ranges:`, JSON.stringify(pageRanges, null, 2));
143 | 
144 |     // 確保輸出目錄存在
145 |     try {
146 |       await fs.access(outputDir);
147 |       console.error(`Output directory exists: ${outputDir}`);
148 |     } catch {
149 |       console.error(`Creating output directory: ${outputDir}`);
150 |       await fs.mkdir(outputDir, { recursive: true });
151 |       console.error(`Created output directory: ${outputDir}`);
152 |     }
153 | 
154 |     const pdfBytes = await fs.readFile(inputPath);
155 |     console.error(
156 |       `Successfully read input PDF, size: ${pdfBytes.length} bytes`
157 |     );
158 | 
159 |     const pdf = await PDFDocument.load(pdfBytes);
160 |     const totalPages = pdf.getPageCount();
161 |     console.error(`PDF loaded successfully. Total pages: ${totalPages}`);
162 | 
163 |     const uniqueId = generateUniqueId();
164 |     console.error(`Generated unique ID for this batch: ${uniqueId}`);
165 |     const results: string[] = [];
166 | 
167 |     for (let i = 0; i < pageRanges.length; i++) {
168 |       const { start, end } = pageRanges[i];
169 |       console.error(`Processing range ${i + 1}: pages ${start} to ${end}`);
170 | 
171 |       if (start > totalPages || end > totalPages) {
172 |         throw new Error(
173 |           `Invalid page range: ${start}-${end}. PDF only has ${totalPages} pages`
174 |         );
175 |       }
176 | 
177 |       if (start > end) {
178 |         throw new Error(
179 |           `Invalid page range: start (${start}) is greater than end (${end})`
180 |         );
181 |       }
182 | 
183 |       const newPdf = await PDFDocument.create();
184 |       const pageIndexes = Array.from(
185 |         { length: end - start + 1 },
186 |         (_, i) => start - 1 + i
187 |       );
188 |       console.error(`Copying pages with indexes:`, pageIndexes);
189 | 
190 |       const pages = await newPdf.copyPages(pdf, pageIndexes);
191 |       console.error(`Successfully copied ${pages.length} pages`);
192 | 
193 |       pages.forEach((page, pageIndex) => {
194 |         newPdf.addPage(page);
195 |         console.error(`Added page ${pageIndex + 1} to new PDF`);
196 |       });
197 | 
198 |       const outputPath = path.join(outputDir, `split_${uniqueId}_${i + 1}.pdf`);
199 |       console.error(`Saving split PDF to: ${outputPath}`);
200 | 
201 |       const newPdfBytes = await newPdf.save();
202 |       console.error(`Generated PDF bytes: ${newPdfBytes.length}`);
203 | 
204 |       await fs.writeFile(outputPath, newPdfBytes);
205 |       console.error(`Successfully wrote PDF to ${outputPath}`);
206 | 
207 |       results.push(outputPath);
208 |     }
209 | 
210 |     console.error(`Split operation completed successfully`);
211 |     return {
212 |       success: true,
213 |       data: `Successfully split PDF into ${
214 |         results.length
215 |       } files: ${results.join(", ")}`,
216 |     };
217 |   } catch (error) {
218 |     console.error(`Error in splitPDF:`);
219 |     console.error(error);
220 |     if (error instanceof Error) {
221 |       console.error(`Error name: ${error.name}`);
222 |       console.error(`Error message: ${error.message}`);
223 |       console.error(`Error stack: ${error.stack}`);
224 |     }
225 |     return {
226 |       success: false,
227 |       error: error instanceof Error ? error.message : "Unknown error",
228 |     };
229 |   }
230 | }
231 | 
232 | export async function pdfToImages(
233 |   inputPath: string,
234 |   outputDir: string,
235 |   format: "png" | "jpeg" = "png",
236 |   dpi: number = 300
237 | ) {
238 |   try {
239 |     // 確保輸出目錄存在
240 |     try {
241 |       await fs.access(outputDir);
242 |       console.error(`Output directory exists: ${outputDir}`);
243 |     } catch {
244 |       console.error(`Creating output directory: ${outputDir}`);
245 |       await fs.mkdir(outputDir, { recursive: true });
246 |       console.error(`Created output directory: ${outputDir}`);
247 |     }
248 | 
249 |     const uniqueId = generateUniqueId();
250 |     console.error(`Generated unique ID for this batch: ${uniqueId}`);
251 | 
252 |     const convert = fromPath(inputPath, {
253 |       density: dpi,
254 |       format: format as string,
255 |       width: 2048,
256 |       height: 2048,
257 |       saveFilename: `page_${uniqueId}`,
258 |       savePath: outputDir,
259 |     });
260 | 
261 |     const pdfBytes = await fs.readFile(inputPath);
262 |     const pdf = await PDFDocument.load(pdfBytes);
263 |     const pageCount = pdf.getPageCount();
264 | 
265 |     const results: string[] = [];
266 |     for (let i = 1; i <= pageCount; i++) {
267 |       const result = await convert(i);
268 |       result.path && results.push(result.path);
269 |     }
270 | 
271 |     return {
272 |       success: true,
273 |       data: `Successfully converted ${pageCount} pages to images in ${outputDir}`,
274 |     };
275 |   } catch (error) {
276 |     return {
277 |       success: false,
278 |       error: error instanceof Error ? error.message : "Unknown error",
279 |     };
280 |   }
281 | }
282 | 
```

--------------------------------------------------------------------------------
/src/tools/txtTools.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { Tool } from "@modelcontextprotocol/sdk/types.js";
  2 | import { randomBytes } from "crypto";
  3 | import { diffLines } from "diff";
  4 | import * as fs from "fs/promises";
  5 | import iconv from "iconv-lite";
  6 | import * as path from "path";
  7 | 
  8 | function generateUniqueId(): string {
  9 |   return randomBytes(9).toString("hex");
 10 | }
 11 | 
 12 | // 文字編碼轉換工具
 13 | export const TEXT_ENCODING_CONVERT_TOOL: Tool = {
 14 |   name: "text_encoding_converter",
 15 |   description: "Convert text between different encodings",
 16 |   inputSchema: {
 17 |     type: "object",
 18 |     properties: {
 19 |       inputPath: {
 20 |         type: "string",
 21 |         description: "Path to the input text file",
 22 |       },
 23 |       outputDir: {
 24 |         type: "string",
 25 |         description: "Directory where converted file should be saved",
 26 |       },
 27 |       fromEncoding: {
 28 |         type: "string",
 29 |         description: "Source encoding (e.g., 'big5', 'gbk', 'utf8')",
 30 |       },
 31 |       toEncoding: {
 32 |         type: "string",
 33 |         description: "Target encoding (e.g., 'utf8', 'big5', 'gbk')",
 34 |       },
 35 |     },
 36 |     required: ["inputPath", "outputDir", "fromEncoding", "toEncoding"],
 37 |   },
 38 | };
 39 | 
 40 | // 文字格式化工具
 41 | export const TEXT_FORMAT_TOOL: Tool = {
 42 |   name: "text_formatter",
 43 |   description: "Format text with proper indentation and line spacing",
 44 |   inputSchema: {
 45 |     type: "object",
 46 |     properties: {
 47 |       inputPath: {
 48 |         type: "string",
 49 |         description: "Path to the input text file",
 50 |       },
 51 |       outputDir: {
 52 |         type: "string",
 53 |         description: "Directory where formatted file should be saved",
 54 |       },
 55 |     },
 56 |     required: ["inputPath", "outputDir"],
 57 |   },
 58 | };
 59 | 
 60 | // 文字比較工具
 61 | export const TEXT_DIFF_TOOL: Tool = {
 62 |   name: "text_diff",
 63 |   description: "Compare two text files and show differences",
 64 |   inputSchema: {
 65 |     type: "object",
 66 |     properties: {
 67 |       file1Path: {
 68 |         type: "string",
 69 |         description: "Path to the first text file",
 70 |       },
 71 |       file2Path: {
 72 |         type: "string",
 73 |         description: "Path to the second text file",
 74 |       },
 75 |       outputDir: {
 76 |         type: "string",
 77 |         description: "Directory where diff result should be saved",
 78 |       },
 79 |     },
 80 |     required: ["file1Path", "file2Path", "outputDir"],
 81 |   },
 82 | };
 83 | 
 84 | // 文字分割工具
 85 | export const TEXT_SPLIT_TOOL: Tool = {
 86 |   name: "text_splitter",
 87 |   description: "Split text file by specified delimiter or line count",
 88 |   inputSchema: {
 89 |     type: "object",
 90 |     properties: {
 91 |       inputPath: {
 92 |         type: "string",
 93 |         description: "Path to the input text file",
 94 |       },
 95 |       outputDir: {
 96 |         type: "string",
 97 |         description: "Directory where split files should be saved",
 98 |       },
 99 |       splitBy: {
100 |         type: "string",
101 |         enum: ["lines", "delimiter"],
102 |         description: "Split method: by line count or delimiter",
103 |       },
104 |       value: {
105 |         type: "string",
106 |         description: "Line count (number) or delimiter string",
107 |       },
108 |     },
109 |     required: ["inputPath", "outputDir", "splitBy", "value"],
110 |   },
111 | };
112 | 
113 | // 文字編碼轉換實作
114 | export async function convertTextEncoding(
115 |   inputPath: string,
116 |   outputDir: string,
117 |   fromEncoding: string,
118 |   toEncoding: string
119 | ) {
120 |   try {
121 |     console.error(`Starting text encoding conversion...`);
122 |     console.error(`Input file: ${inputPath}`);
123 |     console.error(`Output directory: ${outputDir}`);
124 |     console.error(`From encoding: ${fromEncoding}`);
125 |     console.error(`To encoding: ${toEncoding}`);
126 | 
127 |     // 確保輸出目錄存在
128 |     try {
129 |       await fs.access(outputDir);
130 |       console.error(`Output directory exists: ${outputDir}`);
131 |     } catch {
132 |       console.error(`Creating output directory: ${outputDir}`);
133 |       await fs.mkdir(outputDir, { recursive: true });
134 |       console.error(`Created output directory: ${outputDir}`);
135 |     }
136 | 
137 |     const uniqueId = generateUniqueId();
138 |     const content = await fs.readFile(inputPath);
139 |     const text = iconv.decode(content, fromEncoding);
140 |     const converted = iconv.encode(text, toEncoding);
141 | 
142 |     const outputPath = path.join(outputDir, `converted_${uniqueId}.txt`);
143 |     await fs.writeFile(outputPath, converted);
144 |     console.error(`Written converted text to ${outputPath}`);
145 | 
146 |     return {
147 |       success: true,
148 |       data: `Successfully converted text encoding: ${outputPath}`,
149 |     };
150 |   } catch (error) {
151 |     console.error(`Error in convertTextEncoding:`, error);
152 |     return {
153 |       success: false,
154 |       error: error instanceof Error ? error.message : "Unknown error",
155 |     };
156 |   }
157 | }
158 | 
159 | // 文字格式化實作
160 | export async function formatText(inputPath: string, outputDir: string) {
161 |   try {
162 |     console.error(`Starting text formatting...`);
163 |     console.error(`Input file: ${inputPath}`);
164 |     console.error(`Output directory: ${outputDir}`);
165 | 
166 |     // 確保輸出目錄存在
167 |     try {
168 |       await fs.access(outputDir);
169 |       console.error(`Output directory exists: ${outputDir}`);
170 |     } catch {
171 |       console.error(`Creating output directory: ${outputDir}`);
172 |       await fs.mkdir(outputDir, { recursive: true });
173 |       console.error(`Created output directory: ${outputDir}`);
174 |     }
175 | 
176 |     const uniqueId = generateUniqueId();
177 |     const content = await fs.readFile(inputPath, "utf-8");
178 | 
179 |     // 基本格式化：移除多餘空白行，統一縮排
180 |     const formatted = content
181 |       .split("\n")
182 |       .map((line) => line.trim())
183 |       .filter((line, index, array) => !(line === "" && array[index - 1] === ""))
184 |       .join("\n");
185 | 
186 |     const outputPath = path.join(outputDir, `formatted_${uniqueId}.txt`);
187 |     await fs.writeFile(outputPath, formatted);
188 |     console.error(`Written formatted text to ${outputPath}`);
189 | 
190 |     return {
191 |       success: true,
192 |       data: `Successfully formatted text: ${outputPath}`,
193 |     };
194 |   } catch (error) {
195 |     console.error(`Error in formatText:`, error);
196 |     return {
197 |       success: false,
198 |       error: error instanceof Error ? error.message : "Unknown error",
199 |     };
200 |   }
201 | }
202 | 
203 | // 文字比較實作
204 | export async function compareTexts(
205 |   file1Path: string,
206 |   file2Path: string,
207 |   outputDir: string
208 | ) {
209 |   try {
210 |     console.error(`Starting text comparison...`);
211 |     console.error(`File 1: ${file1Path}`);
212 |     console.error(`File 2: ${file2Path}`);
213 |     console.error(`Output directory: ${outputDir}`);
214 | 
215 |     // 確保輸出目錄存在
216 |     try {
217 |       await fs.access(outputDir);
218 |       console.error(`Output directory exists: ${outputDir}`);
219 |     } catch {
220 |       console.error(`Creating output directory: ${outputDir}`);
221 |       await fs.mkdir(outputDir, { recursive: true });
222 |       console.error(`Created output directory: ${outputDir}`);
223 |     }
224 | 
225 |     const uniqueId = generateUniqueId();
226 |     const text1 = await fs.readFile(file1Path, "utf-8");
227 |     const text2 = await fs.readFile(file2Path, "utf-8");
228 | 
229 |     const diff = diffLines(text1, text2);
230 |     const diffResult = diff
231 |       .map((part) => {
232 |         const prefix = part.added ? "+ " : part.removed ? "- " : "  ";
233 |         return prefix + part.value;
234 |       })
235 |       .join("");
236 | 
237 |     const outputPath = path.join(outputDir, `diff_${uniqueId}.txt`);
238 |     await fs.writeFile(outputPath, diffResult);
239 |     console.error(`Written diff result to ${outputPath}`);
240 | 
241 |     return {
242 |       success: true,
243 |       data: `Successfully compared texts: ${outputPath}`,
244 |     };
245 |   } catch (error) {
246 |     console.error(`Error in compareTexts:`, error);
247 |     return {
248 |       success: false,
249 |       error: error instanceof Error ? error.message : "Unknown error",
250 |     };
251 |   }
252 | }
253 | 
254 | // 文字分割實作
255 | export async function splitText(
256 |   inputPath: string,
257 |   outputDir: string,
258 |   splitBy: "lines" | "delimiter",
259 |   value: string
260 | ) {
261 |   try {
262 |     console.error(`Starting text splitting...`);
263 |     console.error(`Input file: ${inputPath}`);
264 |     console.error(`Output directory: ${outputDir}`);
265 |     console.error(`Split by: ${splitBy}`);
266 |     console.error(`Value: ${value}`);
267 | 
268 |     // 確保輸出目錄存在
269 |     try {
270 |       await fs.access(outputDir);
271 |       console.error(`Output directory exists: ${outputDir}`);
272 |     } catch {
273 |       console.error(`Creating output directory: ${outputDir}`);
274 |       await fs.mkdir(outputDir, { recursive: true });
275 |       console.error(`Created output directory: ${outputDir}`);
276 |     }
277 | 
278 |     const uniqueId = generateUniqueId();
279 |     const content = await fs.readFile(inputPath, "utf-8");
280 |     const parts: string[] = [];
281 | 
282 |     if (splitBy === "lines") {
283 |       const lineCount = parseInt(value, 10);
284 |       if (isNaN(lineCount) || lineCount <= 0) {
285 |         throw new Error("Invalid line count");
286 |       }
287 | 
288 |       const lines = content.split("\n");
289 |       for (let i = 0; i < lines.length; i += lineCount) {
290 |         parts.push(lines.slice(i, i + lineCount).join("\n"));
291 |       }
292 |     } else {
293 |       parts.push(...content.split(value));
294 |     }
295 | 
296 |     const results: string[] = [];
297 |     for (let i = 0; i < parts.length; i++) {
298 |       const outputPath = path.join(outputDir, `part_${uniqueId}_${i + 1}.txt`);
299 |       await fs.writeFile(outputPath, parts[i]);
300 |       results.push(outputPath);
301 |       console.error(`Written part ${i + 1} to ${outputPath}`);
302 |     }
303 | 
304 |     return {
305 |       success: true,
306 |       data: `Successfully split text into ${parts.length} parts: ${results.join(
307 |         ", "
308 |       )}`,
309 |     };
310 |   } catch (error) {
311 |     console.error(`Error in splitText:`, error);
312 |     return {
313 |       success: false,
314 |       error: error instanceof Error ? error.message : "Unknown error",
315 |     };
316 |   }
317 | }
318 | 
```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
  1 | #!/usr/bin/env node
  2 | 
  3 | console.log = () => {};
  4 | console.error = () => {};
  5 | 
  6 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
  7 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
  8 | import {
  9 |   CallToolRequestSchema,
 10 |   ListToolsRequestSchema,
 11 | } from "@modelcontextprotocol/sdk/types.js";
 12 | 
 13 | import {
 14 |   cleanHtml,
 15 |   compareTexts,
 16 |   convertDocxToPdf,
 17 |   convertTextEncoding,
 18 |   docxToHtml,
 19 |   extractHtmlResources,
 20 |   formatHtml,
 21 |   formatText,
 22 |   htmlToMarkdown,
 23 |   htmlToText,
 24 |   isDocxToPdfArgs,
 25 |   isFileReaderArgs,
 26 |   mergePDFs,
 27 |   readFile,
 28 |   splitPDF,
 29 |   splitText,
 30 |   tools,
 31 | } from "./tools/_index.js";
 32 | 
 33 | const server = new Server(
 34 |   {
 35 |     name: "mcp-server/common_doc_executor",
 36 |     version: "0.0.1",
 37 |   },
 38 |   {
 39 |     capabilities: {
 40 |       description:
 41 |         "A MCP server providing file reading capabilities for various file formats!",
 42 |       tools: {},
 43 |     },
 44 |   }
 45 | );
 46 | 
 47 | server.setRequestHandler(ListToolsRequestSchema, async () => ({
 48 |   tools,
 49 | }));
 50 | 
 51 | server.setRequestHandler(CallToolRequestSchema, async (request) => {
 52 |   try {
 53 |     const { name, arguments: args } = request.params;
 54 | 
 55 |     if (!args) {
 56 |       throw new Error("No arguments provided");
 57 |     }
 58 | 
 59 |     if (name === "document_reader") {
 60 |       if (!isFileReaderArgs(args)) {
 61 |         throw new Error("Invalid arguments for document_reader");
 62 |       }
 63 | 
 64 |       const result = await readFile(args.filePath);
 65 |       if (!result.success) {
 66 |         return {
 67 |           content: [{ type: "text", text: `Error: ${result.error}` }],
 68 |           isError: true,
 69 |         };
 70 |       }
 71 |       return {
 72 |         content: [{ type: "text", text: result.data }],
 73 |         isError: false,
 74 |       };
 75 |     }
 76 | 
 77 |     if (name === "docx_to_pdf") {
 78 |       if (!isDocxToPdfArgs(args)) {
 79 |         throw new Error("Invalid arguments for docx_to_pdf");
 80 |       }
 81 | 
 82 |       const result = await convertDocxToPdf(args.inputPath, args.outputPath);
 83 |       if (!result.success) {
 84 |         return {
 85 |           content: [{ type: "text", text: `Error: ${result.error}` }],
 86 |           isError: true,
 87 |         };
 88 |       }
 89 |       return {
 90 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
 91 |         isError: false,
 92 |       };
 93 |     }
 94 | 
 95 |     if (name === "pdf_merger") {
 96 |       const { inputPaths, outputDir } = args as {
 97 |         inputPaths: string[];
 98 |         outputDir: string;
 99 |       };
100 |       const result = await mergePDFs(inputPaths, outputDir);
101 |       if (!result.success) {
102 |         return {
103 |           content: [{ type: "text", text: `Error: ${result.error}` }],
104 |           isError: true,
105 |         };
106 |       }
107 |       return {
108 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
109 |         isError: false,
110 |       };
111 |     }
112 | 
113 |     if (name === "pdf_splitter") {
114 |       const { inputPath, outputDir, pageRanges } = args as {
115 |         inputPath: string;
116 |         outputDir: string;
117 |         pageRanges: Array<{ start: number; end: number }>;
118 |       };
119 |       const result = await splitPDF(inputPath, outputDir, pageRanges);
120 |       if (!result.success) {
121 |         return {
122 |           content: [{ type: "text", text: `Error: ${result.error}` }],
123 |           isError: true,
124 |         };
125 |       }
126 |       return {
127 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
128 |         isError: false,
129 |       };
130 |     }
131 | 
132 |     if (name === "docx_to_html") {
133 |       const { inputPath, outputDir } = args as {
134 |         inputPath: string;
135 |         outputDir: string;
136 |       };
137 |       const result = await docxToHtml(inputPath, outputDir);
138 |       if (!result.success) {
139 |         return {
140 |           content: [{ type: "text", text: `Error: ${result.error}` }],
141 |           isError: true,
142 |         };
143 |       }
144 |       return {
145 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
146 |         isError: false,
147 |       };
148 |     }
149 | 
150 |     if (name === "html_cleaner") {
151 |       const { inputPath, outputDir } = args as {
152 |         inputPath: string;
153 |         outputDir: string;
154 |       };
155 |       const result = await cleanHtml(inputPath, outputDir);
156 |       if (!result.success) {
157 |         return {
158 |           content: [{ type: "text", text: `Error: ${result.error}` }],
159 |           isError: true,
160 |         };
161 |       }
162 |       return {
163 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
164 |         isError: false,
165 |       };
166 |     }
167 | 
168 |     if (name === "html_to_text") {
169 |       const { inputPath, outputDir } = args as {
170 |         inputPath: string;
171 |         outputDir: string;
172 |       };
173 |       const result = await htmlToText(inputPath, outputDir);
174 |       if (!result.success) {
175 |         return {
176 |           content: [{ type: "text", text: `Error: ${result.error}` }],
177 |           isError: true,
178 |         };
179 |       }
180 |       return {
181 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
182 |         isError: false,
183 |       };
184 |     }
185 | 
186 |     if (name === "html_to_markdown") {
187 |       const { inputPath, outputDir } = args as {
188 |         inputPath: string;
189 |         outputDir: string;
190 |       };
191 |       const result = await htmlToMarkdown(inputPath, outputDir);
192 |       if (!result.success) {
193 |         return {
194 |           content: [{ type: "text", text: `Error: ${result.error}` }],
195 |           isError: true,
196 |         };
197 |       }
198 |       return {
199 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
200 |         isError: false,
201 |       };
202 |     }
203 | 
204 |     if (name === "html_extract_resources") {
205 |       const { inputPath, outputDir } = args as {
206 |         inputPath: string;
207 |         outputDir: string;
208 |       };
209 |       const result = await extractHtmlResources(inputPath, outputDir);
210 |       if (!result.success) {
211 |         return {
212 |           content: [{ type: "text", text: `Error: ${result.error}` }],
213 |           isError: true,
214 |         };
215 |       }
216 |       return {
217 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
218 |         isError: false,
219 |       };
220 |     }
221 | 
222 |     if (name === "html_formatter") {
223 |       const { inputPath, outputDir } = args as {
224 |         inputPath: string;
225 |         outputDir: string;
226 |       };
227 |       const result = await formatHtml(inputPath, outputDir);
228 |       if (!result.success) {
229 |         return {
230 |           content: [{ type: "text", text: `Error: ${result.error}` }],
231 |           isError: true,
232 |         };
233 |       }
234 |       return {
235 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
236 |         isError: false,
237 |       };
238 |     }
239 | 
240 |     if (name === "text_encoding_converter") {
241 |       const { inputPath, outputDir, fromEncoding, toEncoding } = args as {
242 |         inputPath: string;
243 |         outputDir: string;
244 |         fromEncoding: string;
245 |         toEncoding: string;
246 |       };
247 |       const result = await convertTextEncoding(
248 |         inputPath,
249 |         outputDir,
250 |         fromEncoding,
251 |         toEncoding
252 |       );
253 |       if (!result.success) {
254 |         return {
255 |           content: [{ type: "text", text: `Error: ${result.error}` }],
256 |           isError: true,
257 |         };
258 |       }
259 |       return {
260 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
261 |         isError: false,
262 |       };
263 |     }
264 | 
265 |     if (name === "text_formatter") {
266 |       const { inputPath, outputDir } = args as {
267 |         inputPath: string;
268 |         outputDir: string;
269 |       };
270 |       const result = await formatText(inputPath, outputDir);
271 |       if (!result.success) {
272 |         return {
273 |           content: [{ type: "text", text: `Error: ${result.error}` }],
274 |           isError: true,
275 |         };
276 |       }
277 |       return {
278 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
279 |         isError: false,
280 |       };
281 |     }
282 | 
283 |     if (name === "text_diff") {
284 |       const { file1Path, file2Path, outputDir } = args as {
285 |         file1Path: string;
286 |         file2Path: string;
287 |         outputDir: string;
288 |       };
289 |       const result = await compareTexts(file1Path, file2Path, outputDir);
290 |       if (!result.success) {
291 |         return {
292 |           content: [{ type: "text", text: `Error: ${result.error}` }],
293 |           isError: true,
294 |         };
295 |       }
296 |       return {
297 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
298 |         isError: false,
299 |       };
300 |     }
301 | 
302 |     if (name === "text_splitter") {
303 |       const { inputPath, outputDir, splitBy, value } = args as {
304 |         inputPath: string;
305 |         outputDir: string;
306 |         splitBy: "lines" | "delimiter";
307 |         value: string;
308 |       };
309 |       const result = await splitText(inputPath, outputDir, splitBy, value);
310 |       if (!result.success) {
311 |         return {
312 |           content: [{ type: "text", text: `Error: ${result.error}` }],
313 |           isError: true,
314 |         };
315 |       }
316 |       return {
317 |         content: [{ type: "text", text: fileOperationResponse(result.data) }],
318 |         isError: false,
319 |       };
320 |     }
321 | 
322 |     return {
323 |       content: [{ type: "text", text: `Unknown tool: ${name}` }],
324 |       isError: true,
325 |     };
326 |   } catch (error) {
327 |     return {
328 |       content: [
329 |         {
330 |           type: "text",
331 |           text: `Error: ${
332 |             error instanceof Error ? error.message : String(error)
333 |           }`,
334 |         },
335 |       ],
336 |       isError: true,
337 |     };
338 |   }
339 | });
340 | 
341 | async function runServer() {
342 |   const transport = new StdioServerTransport();
343 |   await server.connect(transport);
344 |   console.log("MCP Doc Forge Server is running");
345 | }
346 | 
347 | runServer().catch((error) => {
348 |   console.error("Fatal error running server:", error);
349 |   process.exit(1);
350 | });
351 | 
352 | function fileOperationResponse(data: any) {
353 |   return `
354 |       Note: This operation has generated a file.
355 |       The file path is in <result>
356 |       Please provide a blank_link download for the file.
357 |       ex: The download link: [file_name](/filepath)
358 |       <result>
359 |         ${data}
360 |       </result>
361 |   `;
362 | }
363 | 
```

--------------------------------------------------------------------------------
/src/tools/htmlTools.ts:
--------------------------------------------------------------------------------

```typescript
  1 | import { Tool } from "@modelcontextprotocol/sdk/types.js";
  2 | import { randomBytes } from "crypto";
  3 | import * as fs from "fs/promises";
  4 | import { JSDOM } from "jsdom";
  5 | import * as path from "path";
  6 | import TurndownService from "turndown";
  7 | 
  8 | function generateUniqueId(): string {
  9 |   return randomBytes(9).toString("hex");
 10 | }
 11 | 
 12 | // HTML 清理工具
 13 | export const HTML_CLEAN_TOOL: Tool = {
 14 |   name: "html_cleaner",
 15 |   description: "Clean HTML by removing unnecessary tags and attributes",
 16 |   inputSchema: {
 17 |     type: "object",
 18 |     properties: {
 19 |       inputPath: {
 20 |         type: "string",
 21 |         description: "Path to the input HTML file",
 22 |       },
 23 |       outputDir: {
 24 |         type: "string",
 25 |         description: "Directory where cleaned HTML should be saved",
 26 |       },
 27 |     },
 28 |     required: ["inputPath", "outputDir"],
 29 |   },
 30 | };
 31 | 
 32 | // HTML 轉純文字工具
 33 | export const HTML_TO_TEXT_TOOL: Tool = {
 34 |   name: "html_to_text",
 35 |   description: "Convert HTML to plain text while preserving structure",
 36 |   inputSchema: {
 37 |     type: "object",
 38 |     properties: {
 39 |       inputPath: {
 40 |         type: "string",
 41 |         description: "Path to the input HTML file",
 42 |       },
 43 |       outputDir: {
 44 |         type: "string",
 45 |         description: "Directory where text file should be saved",
 46 |       },
 47 |     },
 48 |     required: ["inputPath", "outputDir"],
 49 |   },
 50 | };
 51 | 
 52 | // HTML 轉 Markdown 工具
 53 | export const HTML_TO_MARKDOWN_TOOL: Tool = {
 54 |   name: "html_to_markdown",
 55 |   description: "Convert HTML to Markdown format",
 56 |   inputSchema: {
 57 |     type: "object",
 58 |     properties: {
 59 |       inputPath: {
 60 |         type: "string",
 61 |         description: "Path to the input HTML file",
 62 |       },
 63 |       outputDir: {
 64 |         type: "string",
 65 |         description: "Directory where Markdown file should be saved",
 66 |       },
 67 |     },
 68 |     required: ["inputPath", "outputDir"],
 69 |   },
 70 | };
 71 | 
 72 | // HTML 資源提取工具
 73 | export const HTML_EXTRACT_RESOURCES_TOOL: Tool = {
 74 |   name: "html_extract_resources",
 75 |   description: "Extract all resources (images, videos, links) from HTML",
 76 |   inputSchema: {
 77 |     type: "object",
 78 |     properties: {
 79 |       inputPath: {
 80 |         type: "string",
 81 |         description: "Path to the input HTML file",
 82 |       },
 83 |       outputDir: {
 84 |         type: "string",
 85 |         description: "Directory where resources should be saved",
 86 |       },
 87 |     },
 88 |     required: ["inputPath", "outputDir"],
 89 |   },
 90 | };
 91 | 
 92 | // HTML 格式化工具
 93 | export const HTML_FORMAT_TOOL: Tool = {
 94 |   name: "html_formatter",
 95 |   description: "Format and beautify HTML code",
 96 |   inputSchema: {
 97 |     type: "object",
 98 |     properties: {
 99 |       inputPath: {
100 |         type: "string",
101 |         description: "Path to the input HTML file",
102 |       },
103 |       outputDir: {
104 |         type: "string",
105 |         description: "Directory where formatted HTML should be saved",
106 |       },
107 |     },
108 |     required: ["inputPath", "outputDir"],
109 |   },
110 | };
111 | 
112 | // HTML 清理實作
113 | export async function cleanHtml(inputPath: string, outputDir: string) {
114 |   try {
115 |     console.error(`Starting HTML cleaning...`);
116 |     console.error(`Input file: ${inputPath}`);
117 |     console.error(`Output directory: ${outputDir}`);
118 | 
119 |     // 確保輸出目錄存在
120 |     try {
121 |       await fs.access(outputDir);
122 |       console.error(`Output directory exists: ${outputDir}`);
123 |     } catch {
124 |       console.error(`Creating output directory: ${outputDir}`);
125 |       await fs.mkdir(outputDir, { recursive: true });
126 |       console.error(`Created output directory: ${outputDir}`);
127 |     }
128 | 
129 |     const uniqueId = generateUniqueId();
130 |     const htmlContent = await fs.readFile(inputPath, "utf-8");
131 |     const dom = new JSDOM(htmlContent);
132 |     const { document } = dom.window;
133 | 
134 |     // 移除不必要的標籤和屬性
135 |     const unwantedTags = ["script", "style", "iframe", "noscript"];
136 |     const unwantedAttrs = ["onclick", "onload", "onerror", "style"];
137 | 
138 |     unwantedTags.forEach((tag) => {
139 |       document.querySelectorAll(tag).forEach((el) => el.remove());
140 |     });
141 | 
142 |     document.querySelectorAll("*").forEach((el) => {
143 |       unwantedAttrs.forEach((attr) => el.removeAttribute(attr));
144 |     });
145 | 
146 |     const cleanedHtml = dom.serialize();
147 |     const outputPath = path.join(outputDir, `cleaned_${uniqueId}.html`);
148 |     await fs.writeFile(outputPath, cleanedHtml);
149 |     console.error(`Written cleaned HTML to ${outputPath}`);
150 | 
151 |     return {
152 |       success: true,
153 |       data: `Successfully cleaned HTML and saved to ${outputPath}`,
154 |     };
155 |   } catch (error) {
156 |     console.error(`Error in cleanHtml:`, error);
157 |     return {
158 |       success: false,
159 |       error: error instanceof Error ? error.message : "Unknown error",
160 |     };
161 |   }
162 | }
163 | 
164 | // HTML 轉純文字實作
165 | export async function htmlToText(inputPath: string, outputDir: string) {
166 |   try {
167 |     console.error(`Starting HTML to text conversion...`);
168 |     console.error(`Input file: ${inputPath}`);
169 |     console.error(`Output directory: ${outputDir}`);
170 | 
171 |     // 確保輸出目錄存在
172 |     try {
173 |       await fs.access(outputDir);
174 |       console.error(`Output directory exists: ${outputDir}`);
175 |     } catch {
176 |       console.error(`Creating output directory: ${outputDir}`);
177 |       await fs.mkdir(outputDir, { recursive: true });
178 |       console.error(`Created output directory: ${outputDir}`);
179 |     }
180 | 
181 |     const uniqueId = generateUniqueId();
182 |     const htmlContent = await fs.readFile(inputPath, "utf-8");
183 |     const dom = new JSDOM(htmlContent);
184 |     const { document } = dom.window;
185 | 
186 |     // 保留結構的文字轉換
187 |     const text = document.body.textContent?.trim() || "";
188 |     const outputPath = path.join(outputDir, `text_${uniqueId}.txt`);
189 |     await fs.writeFile(outputPath, text);
190 |     console.error(`Written text to ${outputPath}`);
191 | 
192 |     return {
193 |       success: true,
194 |       data: `Successfully converted HTML to text: ${outputPath}`,
195 |     };
196 |   } catch (error) {
197 |     console.error(`Error in htmlToText:`, error);
198 |     return {
199 |       success: false,
200 |       error: error instanceof Error ? error.message : "Unknown error",
201 |     };
202 |   }
203 | }
204 | 
205 | // HTML 轉 Markdown 實作
206 | export async function htmlToMarkdown(inputPath: string, outputDir: string) {
207 |   try {
208 |     console.error(`Starting HTML to Markdown conversion...`);
209 |     console.error(`Input file: ${inputPath}`);
210 |     console.error(`Output directory: ${outputDir}`);
211 | 
212 |     // 確保輸出目錄存在
213 |     try {
214 |       await fs.access(outputDir);
215 |       console.error(`Output directory exists: ${outputDir}`);
216 |     } catch {
217 |       console.error(`Creating output directory: ${outputDir}`);
218 |       await fs.mkdir(outputDir, { recursive: true });
219 |       console.error(`Created output directory: ${outputDir}`);
220 |     }
221 | 
222 |     const uniqueId = generateUniqueId();
223 |     const htmlContent = await fs.readFile(inputPath, "utf-8");
224 |     const turndownService = new TurndownService();
225 |     const markdown = turndownService.turndown(htmlContent);
226 | 
227 |     const outputPath = path.join(outputDir, `markdown_${uniqueId}.md`);
228 |     await fs.writeFile(outputPath, markdown);
229 |     console.error(`Written Markdown to ${outputPath}`);
230 | 
231 |     return {
232 |       success: true,
233 |       data: `Successfully converted HTML to Markdown: ${outputPath}`,
234 |     };
235 |   } catch (error) {
236 |     console.error(`Error in htmlToMarkdown:`, error);
237 |     return {
238 |       success: false,
239 |       error: error instanceof Error ? error.message : "Unknown error",
240 |     };
241 |   }
242 | }
243 | 
244 | // HTML 資源提取實作
245 | export async function extractHtmlResources(
246 |   inputPath: string,
247 |   outputDir: string
248 | ) {
249 |   try {
250 |     console.error(`Starting resource extraction...`);
251 |     console.error(`Input file: ${inputPath}`);
252 |     console.error(`Output directory: ${outputDir}`);
253 | 
254 |     // 確保輸出目錄存在
255 |     try {
256 |       await fs.access(outputDir);
257 |       console.error(`Output directory exists: ${outputDir}`);
258 |     } catch {
259 |       console.error(`Creating output directory: ${outputDir}`);
260 |       await fs.mkdir(outputDir, { recursive: true });
261 |       console.error(`Created output directory: ${outputDir}`);
262 |     }
263 | 
264 |     const uniqueId = generateUniqueId();
265 |     const htmlContent = await fs.readFile(inputPath, "utf-8");
266 |     const dom = new JSDOM(htmlContent);
267 |     const { document } = dom.window;
268 | 
269 |     // 提取資源
270 |     const resources = {
271 |       images: Array.from(document.querySelectorAll("img")).map(
272 |         (img) => (img as HTMLImageElement).src
273 |       ),
274 |       links: Array.from(document.querySelectorAll("a")).map(
275 |         (a) => (a as HTMLAnchorElement).href
276 |       ),
277 |       videos: Array.from(document.querySelectorAll("video source")).map(
278 |         (video) => (video as HTMLSourceElement).src
279 |       ),
280 |     };
281 | 
282 |     const outputPath = path.join(outputDir, `resources_${uniqueId}.json`);
283 |     await fs.writeFile(outputPath, JSON.stringify(resources, null, 2));
284 |     console.error(`Written resources to ${outputPath}`);
285 | 
286 |     return {
287 |       success: true,
288 |       data: `Successfully extracted resources: ${outputPath}`,
289 |     };
290 |   } catch (error) {
291 |     console.error(`Error in extractHtmlResources:`, error);
292 |     return {
293 |       success: false,
294 |       error: error instanceof Error ? error.message : "Unknown error",
295 |     };
296 |   }
297 | }
298 | 
299 | // HTML 格式化實作
300 | export async function formatHtml(inputPath: string, outputDir: string) {
301 |   try {
302 |     console.error(`Starting HTML formatting...`);
303 |     console.error(`Input file: ${inputPath}`);
304 |     console.error(`Output directory: ${outputDir}`);
305 | 
306 |     // 確保輸出目錄存在
307 |     try {
308 |       await fs.access(outputDir);
309 |       console.error(`Output directory exists: ${outputDir}`);
310 |     } catch {
311 |       console.error(`Creating output directory: ${outputDir}`);
312 |       await fs.mkdir(outputDir, { recursive: true });
313 |       console.error(`Created output directory: ${outputDir}`);
314 |     }
315 | 
316 |     const uniqueId = generateUniqueId();
317 |     const htmlContent = await fs.readFile(inputPath, "utf-8");
318 |     const dom = new JSDOM(htmlContent);
319 |     const { document } = dom.window;
320 | 
321 |     // 格式化 HTML
322 |     const formattedHtml = dom.serialize();
323 |     const outputPath = path.join(outputDir, `formatted_${uniqueId}.html`);
324 |     await fs.writeFile(outputPath, formattedHtml);
325 |     console.error(`Written formatted HTML to ${outputPath}`);
326 | 
327 |     return {
328 |       success: true,
329 |       data: `Successfully formatted HTML: ${outputPath}`,
330 |     };
331 |   } catch (error) {
332 |     console.error(`Error in formatHtml:`, error);
333 |     return {
334 |       success: false,
335 |       error: error instanceof Error ? error.message : "Unknown error",
336 |     };
337 |   }
338 | }
339 | 
```