# Directory Structure
```
├── .gitignore
├── .python-version
├── mcp_excel_server
│ ├── __init__.py
│ └── server.py
├── pyproject.toml
├── README.md
├── sample_data.xlsx
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
```
1 | 3.10
2 |
```
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | # Python-generated files
2 | __pycache__/
3 | *.py[oc]
4 | build/
5 | dist/
6 | wheels/
7 | temp/
8 | test/
9 | *.egg-info
10 |
11 | # Virtual environments
12 | .venv
13 | .pyirc
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # Excel MCP Server
2 |
3 | An MCP server that provides comprehensive Excel file management and data analysis capabilities.
4 |
5 | ## Features
6 |
7 | - **Excel File Operations**
8 | - Read multiple Excel formats (XLSX, XLS, CSV, TSV, JSON)
9 | - Write and update Excel files
10 | - Get file information and sheet names
11 |
12 | - **Data Analysis**
13 | - Summary statistics and descriptive analysis
14 | - Data quality assessment
15 | - Pivot tables
16 | - Filtering and querying data
17 |
18 | - **Visualization**
19 | - Generate charts and plots from Excel data
20 | - Create data previews
21 | - Export visualizations as images
22 |
23 | ## Installation
24 |
25 | 1. Create a new Python environment (recommended):
26 |
27 | ```bash
28 | # Using uv (recommended)
29 | uv init excel-mcp-server
30 | cd excel-mcp-server
31 | uv venv
32 | source .venv/bin/activate # On Windows: .venv\Scripts\activate
33 |
34 | # Or using pip
35 | python -m venv .venv
36 | source .venv/bin/activate # On Windows: .venv\Scripts\activate
37 | ```
38 |
39 | 2. Install dependencies:
40 |
41 | ```bash
42 | # Using uv
43 | uv pip install -e .
44 | ```
45 |
46 | ## Integration with Claude Desktop
47 |
48 | 1. Install [Claude Desktop](https://claude.ai/download)
49 | 2. Open Settings and go to the Developer tab
50 | 3. Edit `claude_desktop_config.json`:
51 |
52 | ```json
53 | {
54 | "mcpServers": {
55 | "command": "uvx",
56 | "args": [
57 | "mcp-excel-server"
58 | ],
59 | "env": {
60 | "PYTHONPATH": "/path/to/your/python"
61 | }
62 | }
63 | }
64 | ```
65 |
66 | ## Available Tools
67 |
68 | ### File Reading
69 | - `read_excel`: Read Excel files
70 | - `get_excel_info`: Get file details
71 | - `get_sheet_names`: List worksheet names
72 |
73 | ### Data Analysis
74 | - `analyze_excel`: Perform statistical analysis
75 | - `filter_excel`: Filter data by conditions
76 | - `pivot_table`: Create pivot tables
77 | - `data_summary`: Generate comprehensive data summary
78 |
79 | ### Data Visualization
80 | - `export_chart`: Generate charts
81 | - Supports line charts, bar charts, scatter plots, histograms
82 |
83 | ### File Operations
84 | - `write_excel`: Write new Excel files
85 | - `update_excel`: Update existing Excel files
86 |
87 | ## Available Resources
88 |
89 | - `excel://{file_path}`: Get file content
90 | - `excel://{file_path}/info`: Get file structure information
91 | - `excel://{file_path}/preview`: Generate data preview image
92 |
93 | ## Prompt Templates
94 |
95 | - `analyze_excel_data`: Guided template for Excel data analysis
96 | - `create_chart`: Help create data visualizations
97 | - `data_cleaning`: Assist with data cleaning
98 |
99 | ## Usage Examples
100 |
101 | - "Analyze my sales_data.xlsx file"
102 | - "Create a bar chart for product_sales.csv"
103 | - "Filter employees over 30 in employees.xlsx"
104 | - "Generate a pivot table of department sales"
105 |
106 | ## Security Considerations
107 |
108 | - Read files only from specified paths
109 | - Limit file size
110 | - Prevent accidental file overwriting
111 | - Strictly control data transformation operations
112 |
113 | ## Dependencies
114 |
115 | - pandas
116 | - numpy
117 | - matplotlib
118 | - seaborn
119 |
120 | ## License
121 |
122 | MIT License
```
--------------------------------------------------------------------------------
/mcp_excel_server/__init__.py:
--------------------------------------------------------------------------------
```python
1 |
```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
```toml
1 | [build-system]
2 | requires = ["setuptools>=61.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "mcp-excel-server"
7 | version = "1.0.4"
8 | description = "MCP Excel Server for Excel operations through the Model Context Protocol"
9 | authors = [
10 | {name = "yzfly", email = "[email protected]"}
11 | ]
12 | readme = "README.md"
13 | license = {text = "MIT"}
14 | requires-python = ">=3.10"
15 |
16 | dependencies = [
17 | "mcp[cli]>=1.2.0",
18 | "pandas>=1.3.0",
19 | "numpy>=1.20.0",
20 | "matplotlib>=3.4.0",
21 | "seaborn>=0.11.0",
22 | "openpyxl>=3.0.0",
23 | "build>=1.2.2.post1",
24 | "twine>=6.1.0",
25 | ]
26 |
27 | [project.optional-dependencies]
28 | dev = [
29 | "pytest>=7.0.0",
30 | "black>=23.0.0",
31 | "isort>=5.0.0",
32 | "flake8>=6.0.0",
33 | ]
34 |
35 | [project.urls]
36 | Homepage = "https://github.com/yzfly/mcp-excel-server"
37 | Documentation = "https://github.com/yzfly/mcp-excel-server/blob/main/README.md"
38 | Repository = "https://github.com/yzfly/mcp-excel-server.git"
39 | Issues = "https://github.com/yzfly/mcp-excel-server/issues"
40 |
41 | [project.scripts]
42 | mcp-excel-server = "mcp_excel_server.server:main"
```
--------------------------------------------------------------------------------
/mcp_excel_server/server.py:
--------------------------------------------------------------------------------
```python
1 | import os
2 | import io
3 | import json
4 | import pandas as pd
5 | import numpy as np
6 | from typing import Optional, Dict, List, Union, Tuple, Any
7 | from dataclasses import dataclass
8 | import base64
9 | from datetime import datetime
10 | from mcp.server.fastmcp import FastMCP, Context, Image
11 |
12 | # Create the MCP server
13 | mcp = FastMCP("Excel Data Manager")
14 |
15 | # Helper functions
16 | def _read_excel_file(file_path: str) -> Tuple[pd.DataFrame, str]:
17 | """
18 | Read an Excel file and return a DataFrame and the file extension.
19 | Supports .xlsx, .xls, .csv, and other formats pandas can read.
20 | """
21 | # Check if file exists
22 | if not os.path.exists(file_path):
23 | raise FileNotFoundError(f"File not found: {file_path}")
24 |
25 | # Get file extension
26 | _, ext = os.path.splitext(file_path)
27 | ext = ext.lower()
28 |
29 | # Read based on file extension
30 | if ext in ['.xlsx', '.xls', '.xlsm']:
31 | df = pd.read_excel(file_path)
32 | elif ext == '.csv':
33 | df = pd.read_csv(file_path)
34 | elif ext == '.tsv':
35 | df = pd.read_csv(file_path, sep='\t')
36 | elif ext == '.json':
37 | df = pd.read_json(file_path)
38 | else:
39 | raise ValueError(f"Unsupported file extension: {ext}")
40 |
41 | return df, ext
42 |
43 | def _get_dataframe_info(df: pd.DataFrame) -> Dict[str, Any]:
44 | """Generate summary information about a DataFrame."""
45 | # Basic info
46 | info = {
47 | "shape": df.shape,
48 | "columns": list(df.columns),
49 | "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
50 | "missing_values": df.isnull().sum().to_dict(),
51 | "total_memory_usage": df.memory_usage(deep=True).sum(),
52 | }
53 |
54 | # Sample data (first 5 rows)
55 | info["sample"] = df.head(5).to_dict(orient='records')
56 |
57 | # Numeric column stats
58 | numeric_cols = df.select_dtypes(include=['number']).columns
59 | if len(numeric_cols) > 0:
60 | info["numeric_stats"] = {}
61 | for col in numeric_cols:
62 | info["numeric_stats"][col] = {
63 | "min": float(df[col].min()) if not pd.isna(df[col].min()) else None,
64 | "max": float(df[col].max()) if not pd.isna(df[col].max()) else None,
65 | "mean": float(df[col].mean()) if not pd.isna(df[col].mean()) else None,
66 | "median": float(df[col].median()) if not pd.isna(df[col].median()) else None,
67 | "std": float(df[col].std()) if not pd.isna(df[col].std()) else None
68 | }
69 |
70 | return info
71 |
72 | # Resource Handlers
73 |
74 | @mcp.resource("excel://{file_path}")
75 | def get_excel_file(file_path: str) -> str:
76 | """
77 | Retrieve content of an Excel file as a formatted text representation.
78 |
79 | Args:
80 | file_path: Path to the Excel file to read
81 |
82 | Returns:
83 | String representation of the Excel data
84 | """
85 | df, _ = _read_excel_file(file_path)
86 | return df.to_string(index=False)
87 |
88 | @mcp.resource("excel://{file_path}/info")
89 | def get_excel_info(file_path: str) -> str:
90 | """
91 | Retrieve information about an Excel file including structure and stats.
92 |
93 | Args:
94 | file_path: Path to the Excel file to analyze
95 |
96 | Returns:
97 | JSON string with information about the Excel file
98 | """
99 | df, ext = _read_excel_file(file_path)
100 | info = _get_dataframe_info(df)
101 | info["file_path"] = file_path
102 | info["file_type"] = ext
103 | return json.dumps(info, indent=2, default=str)
104 |
105 | @mcp.resource("excel://{file_path}/sheet_names")
106 | def get_sheet_names(file_path: str) -> str:
107 | """
108 | Get the names of all sheets in an Excel workbook.
109 |
110 | Args:
111 | file_path: Path to the Excel file
112 |
113 | Returns:
114 | JSON string with sheet names
115 | """
116 | _, ext = os.path.splitext(file_path)
117 | ext = ext.lower()
118 |
119 | if ext not in ['.xlsx', '.xls', '.xlsm']:
120 | return json.dumps({"error": "File is not an Excel workbook"})
121 |
122 | xls = pd.ExcelFile(file_path)
123 | return json.dumps({"sheet_names": xls.sheet_names})
124 |
125 | @mcp.resource("excel://{file_path}/preview")
126 | def get_excel_preview(file_path: str) -> Image:
127 | """
128 | Generate a visual preview of an Excel file.
129 |
130 | Args:
131 | file_path: Path to the Excel file
132 |
133 | Returns:
134 | Image of the data visualization
135 | """
136 | import matplotlib.pyplot as plt
137 | import seaborn as sns
138 |
139 | df, _ = _read_excel_file(file_path)
140 |
141 | # Create a styled preview
142 | plt.figure(figsize=(10, 6))
143 |
144 | # If DataFrame is small enough, show as a table
145 | if df.shape[0] <= 10 and df.shape[1] <= 10:
146 | plt.axis('tight')
147 | plt.axis('off')
148 | table = plt.table(cellText=df.values,
149 | colLabels=df.columns,
150 | cellLoc='center',
151 | loc='center')
152 | table.auto_set_font_size(False)
153 | table.set_fontsize(9)
154 | table.scale(1.2, 1.2)
155 | else:
156 | # For larger DataFrames, show a heatmap of the first 10x10 section
157 | preview_df = df.iloc[:10, :10]
158 | sns.heatmap(preview_df.select_dtypes(include=['number']),
159 | cmap='viridis',
160 | annot=False,
161 | linewidths=.5)
162 | plt.title(f"Preview of {os.path.basename(file_path)}")
163 |
164 | # Save to bytes buffer
165 | buf = io.BytesIO()
166 | plt.savefig(buf, format='png', bbox_inches='tight')
167 | buf.seek(0)
168 |
169 | # Convert to Image
170 | plt.close()
171 | return Image(data=buf.getvalue(), format="png")
172 |
173 | # Tool Handlers
174 |
175 | @mcp.tool()
176 | def read_excel(file_path: str, sheet_name: Optional[str] = None,
177 | nrows: Optional[int] = None, header: Optional[int] = 0) -> str:
178 | """
179 | Read an Excel file and return its contents as a string.
180 |
181 | Args:
182 | file_path: Path to the Excel file
183 | sheet_name: Name of the sheet to read (only for .xlsx, .xls)
184 | nrows: Maximum number of rows to read
185 | header: Row to use as header (0-indexed)
186 |
187 | Returns:
188 | String representation of the Excel data
189 | """
190 | _, ext = os.path.splitext(file_path)
191 | ext = ext.lower()
192 |
193 | read_params = {"header": header}
194 | if nrows is not None:
195 | read_params["nrows"] = nrows
196 |
197 | if ext in ['.xlsx', '.xls', '.xlsm']:
198 | if sheet_name is not None:
199 | read_params["sheet_name"] = sheet_name
200 | df = pd.read_excel(file_path, **read_params)
201 | elif ext == '.csv':
202 | df = pd.read_csv(file_path, **read_params)
203 | elif ext == '.tsv':
204 | df = pd.read_csv(file_path, sep='\t', **read_params)
205 | elif ext == '.json':
206 | df = pd.read_json(file_path)
207 | else:
208 | return f"Unsupported file extension: {ext}"
209 |
210 | return df.to_string(index=False)
211 |
212 | @mcp.tool()
213 | def write_excel(file_path: str, data: str, sheet_name: Optional[str] = "Sheet1",
214 | format: Optional[str] = "csv") -> str:
215 | """
216 | Write data to an Excel file.
217 |
218 | Args:
219 | file_path: Path to save the Excel file
220 | data: Data in CSV or JSON format
221 | sheet_name: Name of the sheet (for Excel files)
222 | format: Format of the input data ('csv' or 'json')
223 |
224 | Returns:
225 | Confirmation message
226 | """
227 | try:
228 | if format.lower() == 'csv':
229 | df = pd.read_csv(io.StringIO(data))
230 | elif format.lower() == 'json':
231 | df = pd.read_json(io.StringIO(data))
232 | else:
233 | return f"Unsupported data format: {format}"
234 |
235 | _, ext = os.path.splitext(file_path)
236 | ext = ext.lower()
237 |
238 | if ext in ['.xlsx', '.xls', '.xlsm']:
239 | df.to_excel(file_path, sheet_name=sheet_name, index=False)
240 | elif ext == '.csv':
241 | df.to_csv(file_path, index=False)
242 | elif ext == '.tsv':
243 | df.to_csv(file_path, sep='\t', index=False)
244 | elif ext == '.json':
245 | df.to_json(file_path, orient='records')
246 | else:
247 | return f"Unsupported output file extension: {ext}"
248 |
249 | return f"Successfully wrote data to {file_path}"
250 | except Exception as e:
251 | return f"Error writing data: {str(e)}"
252 |
253 | @mcp.tool()
254 | def update_excel(file_path: str, data: str, sheet_name: Optional[str] = "Sheet1",
255 | format: Optional[str] = "csv") -> str:
256 | """
257 | Update an existing Excel file with new data.
258 |
259 | Args:
260 | file_path: Path to the Excel file to update
261 | data: New data in CSV or JSON format
262 | sheet_name: Name of the sheet to update (for Excel files)
263 | format: Format of the input data ('csv' or 'json')
264 |
265 | Returns:
266 | Confirmation message
267 | """
268 | try:
269 | # Check if file exists
270 | if not os.path.exists(file_path):
271 | return f"File not found: {file_path}"
272 |
273 | # Load new data
274 | if format.lower() == 'csv':
275 | new_df = pd.read_csv(io.StringIO(data))
276 | elif format.lower() == 'json':
277 | new_df = pd.read_json(io.StringIO(data))
278 | else:
279 | return f"Unsupported data format: {format}"
280 |
281 | # Get file extension
282 | _, ext = os.path.splitext(file_path)
283 | ext = ext.lower()
284 |
285 | # Read existing file
286 | if ext in ['.xlsx', '.xls', '.xlsm']:
287 | # For Excel files, we need to read all sheets
288 | excel_file = pd.ExcelFile(file_path)
289 | with pd.ExcelWriter(file_path) as writer:
290 | # Copy all existing sheets
291 | for sheet in excel_file.sheet_names:
292 | if sheet != sheet_name:
293 | df = pd.read_excel(excel_file, sheet_name=sheet)
294 | df.to_excel(writer, sheet_name=sheet, index=False)
295 |
296 | # Write new data to specified sheet
297 | new_df.to_excel(writer, sheet_name=sheet_name, index=False)
298 | elif ext == '.csv':
299 | new_df.to_csv(file_path, index=False)
300 | elif ext == '.tsv':
301 | new_df.to_csv(file_path, sep='\t', index=False)
302 | elif ext == '.json':
303 | new_df.to_json(file_path, orient='records')
304 | else:
305 | return f"Unsupported file extension: {ext}"
306 |
307 | return f"Successfully updated {file_path}"
308 | except Exception as e:
309 | return f"Error updating file: {str(e)}"
310 |
311 | @mcp.tool()
312 | def analyze_excel(file_path: str, columns: Optional[str] = None,
313 | sheet_name: Optional[str] = None) -> str:
314 | """
315 | Perform statistical analysis on Excel data.
316 |
317 | Args:
318 | file_path: Path to the Excel file
319 | columns: Comma-separated list of columns to analyze (analyzes all numeric columns if None)
320 | sheet_name: Name of the sheet to analyze (for Excel files)
321 |
322 | Returns:
323 | JSON string with statistical analysis
324 | """
325 | try:
326 | # Read file
327 | _, ext = os.path.splitext(file_path)
328 | ext = ext.lower()
329 |
330 | read_params = {}
331 | if ext in ['.xlsx', '.xls', '.xlsm'] and sheet_name is not None:
332 | read_params["sheet_name"] = sheet_name
333 |
334 | if ext in ['.xlsx', '.xls', '.xlsm']:
335 | df = pd.read_excel(file_path, **read_params)
336 | elif ext == '.csv':
337 | df = pd.read_csv(file_path)
338 | elif ext == '.tsv':
339 | df = pd.read_csv(file_path, sep='\t')
340 | elif ext == '.json':
341 | df = pd.read_json(file_path)
342 | else:
343 | return f"Unsupported file extension: {ext}"
344 |
345 | # Filter columns if specified
346 | if columns:
347 | column_list = [c.strip() for c in columns.split(',')]
348 | df = df[column_list]
349 |
350 | # Select only numeric columns for analysis
351 | numeric_df = df.select_dtypes(include=['number'])
352 |
353 | if numeric_df.empty:
354 | return json.dumps({"error": "No numeric columns found for analysis"})
355 |
356 | # Perform analysis
357 | analysis = {
358 | "descriptive_stats": numeric_df.describe().to_dict(),
359 | "correlation": numeric_df.corr().to_dict(),
360 | "missing_values": numeric_df.isnull().sum().to_dict(),
361 | "unique_values": {col: int(numeric_df[col].nunique()) for col in numeric_df.columns}
362 | }
363 |
364 | return json.dumps(analysis, indent=2, default=str)
365 | except Exception as e:
366 | return json.dumps({"error": str(e)})
367 |
368 | @mcp.tool()
369 | def filter_excel(file_path: str, query: str, sheet_name: Optional[str] = None) -> str:
370 | """
371 | Filter Excel data using a pandas query string.
372 |
373 | Args:
374 | file_path: Path to the Excel file
375 | query: Pandas query string (e.g., "Age > 30 and Department == 'Sales'")
376 | sheet_name: Name of the sheet to filter (for Excel files)
377 |
378 | Returns:
379 | Filtered data as string
380 | """
381 | try:
382 | # Read file
383 | _, ext = os.path.splitext(file_path)
384 | ext = ext.lower()
385 |
386 | read_params = {}
387 | if ext in ['.xlsx', '.xls', '.xlsm'] and sheet_name is not None:
388 | read_params["sheet_name"] = sheet_name
389 |
390 | if ext in ['.xlsx', '.xls', '.xlsm']:
391 | df = pd.read_excel(file_path, **read_params)
392 | elif ext == '.csv':
393 | df = pd.read_csv(file_path)
394 | elif ext == '.tsv':
395 | df = pd.read_csv(file_path, sep='\t')
396 | elif ext == '.json':
397 | df = pd.read_json(file_path)
398 | else:
399 | return f"Unsupported file extension: {ext}"
400 |
401 | # Apply filter
402 | filtered_df = df.query(query)
403 |
404 | # Return results
405 | if filtered_df.empty:
406 | return "No data matches the filter criteria."
407 |
408 | return filtered_df.to_string(index=False)
409 | except Exception as e:
410 | return f"Error filtering data: {str(e)}"
411 |
412 | @mcp.tool()
413 | def pivot_table(file_path: str, index: str, columns: Optional[str] = None,
414 | values: str = None, aggfunc: str = "mean",
415 | sheet_name: Optional[str] = None) -> str:
416 | """
417 | Create a pivot table from Excel data.
418 |
419 | Args:
420 | file_path: Path to the Excel file
421 | index: Column to use as the pivot table index
422 | columns: Optional column to use as the pivot table columns
423 | values: Column to use as the pivot table values
424 | aggfunc: Aggregation function ('mean', 'sum', 'count', etc.)
425 | sheet_name: Name of the sheet to pivot (for Excel files)
426 |
427 | Returns:
428 | Pivot table as string
429 | """
430 | try:
431 | # Read file
432 | _, ext = os.path.splitext(file_path)
433 | ext = ext.lower()
434 |
435 | read_params = {}
436 | if ext in ['.xlsx', '.xls', '.xlsm'] and sheet_name is not None:
437 | read_params["sheet_name"] = sheet_name
438 |
439 | if ext in ['.xlsx', '.xls', '.xlsm']:
440 | df = pd.read_excel(file_path, **read_params)
441 | elif ext == '.csv':
442 | df = pd.read_csv(file_path)
443 | elif ext == '.tsv':
444 | df = pd.read_csv(file_path, sep='\t')
445 | elif ext == '.json':
446 | df = pd.read_json(file_path)
447 | else:
448 | return f"Unsupported file extension: {ext}"
449 |
450 | # Configure pivot table params
451 | pivot_params = {"index": index}
452 | if columns:
453 | pivot_params["columns"] = columns
454 | if values:
455 | pivot_params["values"] = values
456 |
457 | # Map string aggfunc to actual function
458 | if aggfunc == "mean":
459 | pivot_params["aggfunc"] = np.mean
460 | elif aggfunc == "sum":
461 | pivot_params["aggfunc"] = np.sum
462 | elif aggfunc == "count":
463 | pivot_params["aggfunc"] = len
464 | elif aggfunc == "min":
465 | pivot_params["aggfunc"] = np.min
466 | elif aggfunc == "max":
467 | pivot_params["aggfunc"] = np.max
468 | else:
469 | return f"Unsupported aggregation function: {aggfunc}"
470 |
471 | # Create pivot table
472 | pivot = pd.pivot_table(df, **pivot_params)
473 |
474 | return pivot.to_string()
475 | except Exception as e:
476 | return f"Error creating pivot table: {str(e)}"
477 |
478 | @mcp.tool()
479 | def export_chart(file_path: str, x_column: str, y_column: str,
480 | chart_type: str = "line", sheet_name: Optional[str] = None) -> Image:
481 | """
482 | Create a chart from Excel data and return as an image.
483 |
484 | Args:
485 | file_path: Path to the Excel file
486 | x_column: Column to use for x-axis
487 | y_column: Column to use for y-axis
488 | chart_type: Type of chart ('line', 'bar', 'scatter', 'hist')
489 | sheet_name: Name of the sheet to chart (for Excel files)
490 |
491 | Returns:
492 | Chart as image
493 | """
494 | import matplotlib.pyplot as plt
495 | import seaborn as sns
496 |
497 | try:
498 | # Read file
499 | _, ext = os.path.splitext(file_path)
500 | ext = ext.lower()
501 |
502 | read_params = {}
503 | if ext in ['.xlsx', '.xls', '.xlsm'] and sheet_name is not None:
504 | read_params["sheet_name"] = sheet_name
505 |
506 | if ext in ['.xlsx', '.xls', '.xlsm']:
507 | df = pd.read_excel(file_path, **read_params)
508 | elif ext == '.csv':
509 | df = pd.read_csv(file_path)
510 | elif ext == '.tsv':
511 | df = pd.read_csv(file_path, sep='\t')
512 | elif ext == '.json':
513 | df = pd.read_json(file_path)
514 | else:
515 | raise ValueError(f"Unsupported file extension: {ext}")
516 |
517 | # Create chart
518 | plt.figure(figsize=(10, 6))
519 |
520 | if chart_type == "line":
521 | sns.lineplot(data=df, x=x_column, y=y_column)
522 | elif chart_type == "bar":
523 | sns.barplot(data=df, x=x_column, y=y_column)
524 | elif chart_type == "scatter":
525 | sns.scatterplot(data=df, x=x_column, y=y_column)
526 | elif chart_type == "hist":
527 | df[y_column].hist()
528 | plt.xlabel(y_column)
529 | else:
530 | raise ValueError(f"Unsupported chart type: {chart_type}")
531 |
532 | plt.title(f"{chart_type.capitalize()} Chart: {y_column} by {x_column}")
533 | plt.tight_layout()
534 |
535 | # Save to bytes buffer
536 | buf = io.BytesIO()
537 | plt.savefig(buf, format='png')
538 | buf.seek(0)
539 |
540 | # Convert to Image
541 | plt.close()
542 | return Image(data=buf.getvalue(), format="png")
543 | except Exception as e:
544 | # Return error image
545 | plt.figure(figsize=(8, 2))
546 | plt.text(0.5, 0.5, f"Error creating chart: {str(e)}",
547 | horizontalalignment='center', fontsize=12, color='red')
548 | plt.axis('off')
549 |
550 | buf = io.BytesIO()
551 | plt.savefig(buf, format='png')
552 | buf.seek(0)
553 | plt.close()
554 |
555 | return Image(data=buf.getvalue(), format="png")
556 |
557 | @mcp.tool()
558 | def data_summary(file_path: str, sheet_name: Optional[str] = None) -> str:
559 | """
560 | Generate a comprehensive summary of the data in an Excel file.
561 |
562 | Args:
563 | file_path: Path to the Excel file
564 | sheet_name: Name of the sheet to summarize (for Excel files)
565 |
566 | Returns:
567 | Comprehensive data summary as string
568 | """
569 | try:
570 | # Read file
571 | _, ext = os.path.splitext(file_path)
572 | ext = ext.lower()
573 |
574 | read_params = {}
575 | if ext in ['.xlsx', '.xls', '.xlsm'] and sheet_name is not None:
576 | read_params["sheet_name"] = sheet_name
577 |
578 | if ext in ['.xlsx', '.xls', '.xlsm']:
579 | df = pd.read_excel(file_path, **read_params)
580 | elif ext == '.csv':
581 | df = pd.read_csv(file_path)
582 | elif ext == '.tsv':
583 | df = pd.read_csv(file_path, sep='\t')
584 | elif ext == '.json':
585 | df = pd.read_json(file_path)
586 | else:
587 | return f"Unsupported file extension: {ext}"
588 |
589 | # Basic file info
590 | file_info = {
591 | "file_name": os.path.basename(file_path),
592 | "file_type": ext,
593 | "file_size": f"{os.path.getsize(file_path) / 1024:.2f} KB",
594 | "last_modified": datetime.fromtimestamp(os.path.getmtime(file_path)).strftime('%Y-%m-%d %H:%M:%S')
595 | }
596 |
597 | # Data structure
598 | data_structure = {
599 | "rows": df.shape[0],
600 | "columns": df.shape[1],
601 | "column_names": list(df.columns),
602 | "column_types": {col: str(dtype) for col, dtype in df.dtypes.items()},
603 | "memory_usage": f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB"
604 | }
605 |
606 | # Data quality
607 | data_quality = {
608 | "missing_values": {col: int(count) for col, count in df.isnull().sum().items()},
609 | "missing_percentage": {col: f"{count/len(df)*100:.2f}%" for col, count in df.isnull().sum().items()},
610 | "duplicate_rows": int(df.duplicated().sum()),
611 | "unique_values": {col: int(df[col].nunique()) for col in df.columns}
612 | }
613 |
614 | # Statistical summary
615 | numeric_cols = df.select_dtypes(include=['number']).columns
616 | categorical_cols = df.select_dtypes(include=['object', 'category']).columns
617 | datetime_cols = df.select_dtypes(include=['datetime', 'datetime64']).columns
618 |
619 | statistics = {}
620 | if len(numeric_cols) > 0:
621 | statistics["numeric"] = df[numeric_cols].describe().to_dict()
622 |
623 | if len(categorical_cols) > 0:
624 | statistics["categorical"] = {
625 | col: {
626 | "unique_values": int(df[col].nunique()),
627 | "top_values": df[col].value_counts().head(5).to_dict()
628 | } for col in categorical_cols
629 | }
630 |
631 | if len(datetime_cols) > 0:
632 | statistics["datetime"] = {
633 | col: {
634 | "min": df[col].min().strftime('%Y-%m-%d') if pd.notna(df[col].min()) else None,
635 | "max": df[col].max().strftime('%Y-%m-%d') if pd.notna(df[col].max()) else None,
636 | "range_days": (df[col].max() - df[col].min()).days if pd.notna(df[col].min()) and pd.notna(df[col].max()) else None
637 | } for col in datetime_cols
638 | }
639 |
640 | # Combine all info
641 | summary = {
642 | "file_info": file_info,
643 | "data_structure": data_structure,
644 | "data_quality": data_quality,
645 | "statistics": statistics
646 | }
647 |
648 | return json.dumps(summary, indent=2, default=str)
649 | except Exception as e:
650 | return f"Error generating summary: {str(e)}"
651 |
652 | # Add prompt templates for common Excel operations
653 | @mcp.prompt()
654 | def analyze_excel_data(file_path: str) -> str:
655 | """
656 | Create a prompt for analyzing Excel data
657 | """
658 | return f"""
659 | I have an Excel file at {file_path} that I'd like to analyze.
660 | Could you help me understand the data structure, perform basic statistical analysis,
661 | and identify any patterns or insights in the data?
662 | """
663 |
664 | @mcp.prompt()
665 | def create_chart(file_path: str) -> str:
666 | """
667 | Create a prompt for generating charts from Excel data
668 | """
669 | return f"""
670 | I have an Excel file at {file_path} and I want to create some visualizations.
671 | Could you suggest some appropriate charts based on the data and help me create them?
672 | """
673 |
674 | @mcp.prompt()
675 | def data_cleaning(file_path: str) -> str:
676 | """
677 | Create a prompt for cleaning and preprocessing Excel data
678 | """
679 | return f"""
680 | I have an Excel file at {file_path} that needs some cleaning and preprocessing.
681 | Could you help me identify and fix issues like missing values, outliers,
682 | inconsistent formatting, and other data quality problems?
683 | """
684 |
685 | def main():
686 | mcp.run()
687 |
688 | # Main function to run server
689 | if __name__ == "__main__":
690 | main()
```