#
tokens: 44958/50000 51/59 files (page 1/5)
lines: off (toggle) GitHub
raw markdown copy
This is page 1 of 5. Use http://codebase.md/hithereiamaliff/mcp-datagovmy?page={x} to view the full context.

# Directory Structure

```
├── .eslintrc.json
├── .github
│   └── workflows
│       └── deploy-vps.yml
├── .gitignore
├── .npmignore
├── .prettierrc
├── .smithery
│   └── index.cjs
├── deploy
│   ├── DEPLOYMENT.md
│   └── nginx-mcp.conf
├── docker-compose.yml
├── Dockerfile
├── index.js
├── LICENSE
├── malaysia_open_data_mcp_plan.md
├── mcp-server.js
├── package-lock.json
├── package.json
├── PROMPT.md
├── README.md
├── response.txt
├── scripts
│   ├── build.js
│   ├── catalogue-index.d.ts
│   ├── catalogue-index.js
│   ├── catalogue-index.ts
│   ├── dashboards-index.d.ts
│   ├── dashboards-index.js
│   ├── deploy.js
│   ├── extract-dataset-ids.js
│   ├── extracted-datasets.js
│   ├── index-catalogue-files.cjs
│   ├── index-dashboards.cjs
│   └── update-tool-names.ts
├── smithery.yaml
├── src
│   ├── api
│   │   ├── catalogue.js
│   │   ├── client.js
│   │   ├── dosm.js
│   │   ├── transport.js
│   │   └── weather.js
│   ├── catalogue.tools.ts
│   ├── dashboards.tools.ts
│   ├── datacatalogue.tools.ts
│   ├── dosm.tools.ts
│   ├── firebase-analytics.ts
│   ├── flood.tools.ts
│   ├── gtfs.tools.ts
│   ├── http-server.ts
│   ├── index.cjs
│   ├── index.js
│   ├── index.ts
│   ├── parquet.tools.ts
│   ├── tools
│   │   ├── catalogue.js
│   │   ├── dosm.js
│   │   ├── test.js
│   │   ├── transport.js
│   │   └── weather.js
│   ├── transport.tools.ts
│   ├── types.d.ts
│   ├── unified-search.tools.ts
│   ├── utils
│   │   ├── query-builder.js
│   │   └── tool-naming.ts
│   └── weather.tools.ts
├── TOOLS.md
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------

```
{
  "semi": true,
  "singleQuote": true,
  "trailingComma": "es5",
  "printWidth": 100,
  "tabWidth": 2
}

```

--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------

```json
{
  "root": true,
  "parser": "@typescript-eslint/parser",
  "plugins": ["@typescript-eslint", "prettier"],
  "extends": [
    "eslint:recommended",
    "plugin:@typescript-eslint/recommended",
    "prettier"
  ],
  "rules": {
    "prettier/prettier": "error",
    "no-console": "off"
  },
  "env": {
    "node": true,
    "es6": true
  }
}

```

--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------

```
# Source files
src/

# Development files
.vscode/
.github/
.env
.env.example
.eslintrc.json
.prettierrc
tsconfig.json
smithery.yaml
Dockerfile

# Build artifacts
node_modules/
coverage/
.nyc_output/

# Logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Editor directories and files
.idea/
*.swp
*.swo

# Test files
__tests__/
test/
tests/

```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
# Dependencies
node_modules/
npm-debug.log
yarn-error.log
yarn-debug.log

# Environment variables
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
.env.example

# Build output
dist/
build/
.smithery/

# Test files
test/

# TypeScript
*.tsbuildinfo

# Editor directories and files
.idea/
.vscode/
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
# Malaysia Open Data MCP

**MCP Endpoint:** `https://mcp.techmavie.digital/datagovmy/mcp`

**Analytics Dashboard:** [`https://mcp.techmavie.digital/datagovmy/analytics/dashboard`](https://mcp.techmavie.digital/datagovmy/analytics/dashboard)

MCP (Model Context Protocol) server for Malaysia's Open Data APIs, providing easy access to government datasets and collections.

Do note that this is **NOT** an official MCP server by the Government of Malaysia or anyone from Malaysia's Open Data/Jabatan Digital Negara/Ministry of Digital team.

## Features

- **Enhanced Unified Search** with flexible tokenization and synonym expansion
  - Intelligent query handling with term normalization
  - Support for plurals and common prefixes (e.g., "e" in "epayment")
  - Smart prioritization for different data types
- **Parquet File Support** using pure JavaScript
  - Parse Parquet files directly in the browser or Node.js
  - Support for BROTLI compression
  - Intelligent date field handling for empty date objects
  - Increased row limits (up to 500 rows) for comprehensive data retrieval
  - Fallback to metadata estimation when parsing fails
  - Automatic dashboard URL mapping for visualization
- **Hybrid Data Access Architecture**
  - Pre-generated static indexes for efficient searching
  - Dynamic API calls for detailed metadata
- **Multi-Provider Geocoding**
  - Support for Google Maps, GrabMaps, and Nominatim (OpenStreetMap)
  - Intelligent service selection based on location and available API keys
  - GrabMaps optimization for locations in Malaysia
  - Automatic fallback between providers
- **Comprehensive Data Sources**
  - Malaysia's Data Catalogue with rich metadata
  - Interactive Dashboards for data visualization
  - Department of Statistics Malaysia (DOSM) data
  - Weather forecast and warnings
  - Public transport and GTFS data
- **Multi-Provider Malaysian Geocoding**
  - Optimized for Malaysian addresses and locations
  - Three-tier geocoding system: GrabMaps, Google Maps, and Nominatim
  - Prioritizes local knowledge with GrabMaps for better Malaysian coverage
  - Automatic fallback to Nominatim when no API keys are provided

## Architecture

This MCP server implements a hybrid approach for efficient data access:

- **Pre-generated Static Indexes** for listing and searching datasets and dashboards
- **Dynamic API Calls** only when specific dataset or dashboard details are requested

This approach provides several benefits:
- Faster search and listing operations
- Reduced API calls to external services
- Consistent data access patterns
- Up-to-date detailed information when needed

## Documentation

- **[TOOLS.md](./TOOLS.md)** - Detailed information about available tools and best practices
- **[PROMPT.md](./PROMPT.md)** - AI integration guidelines and usage patterns

## AI Integration

When integrating this MCP server with AI models:

1. **Use the unified search tool first** - Always start with `search_all` for any data queries
2. **Follow the correct URL patterns** - Use `https://data.gov.my/...` and `https://open.dosm.gov.my/...`
3. **Leverage Parquet file tools** - Use `parse_parquet_file` to access data directly or `get_parquet_info` for metadata
4. **Use the hybrid approach** - Static indexes for listing/searching, API calls for details
5. **Consider dashboard visualization** - For complex data, use the dashboard links provided by `find_dashboard_for_parquet`
6. **Leverage the multi-provider Malaysian geocoding** - For Malaysian location queries, the system automatically selects the best provider (GrabMaps, Google Maps, or Nominatim) with fallback to Nominatim when no API keys are configured

Refer to [PROMPT.md](./PROMPT.md) for comprehensive AI integration guidelines.

## Installation

```bash
npm install
```

## Quick Start (Hosted Server)

The easiest way to use this MCP server is via the hosted endpoint. **No installation required!**

**Server URL:**
```
https://mcp.techmavie.digital/datagovmy/mcp
```

#### Using Your Own API Keys

You can provide your own API keys via URL query parameters:

```
https://mcp.techmavie.digital/datagovmy/mcp?googleMapsApiKey=YOUR_KEY
```

Or via headers:
- `X-Google-Maps-Api-Key: YOUR_KEY`
- `X-GrabMaps-Api-Key: YOUR_KEY`
- `X-AWS-Access-Key-Id: YOUR_KEY`
- `X-AWS-Secret-Access-Key: YOUR_KEY`
- `X-AWS-Region: ap-southeast-5`

**Supported Query Parameters:**

| Parameter | Description |
|-----------|-------------|
| `googleMapsApiKey` | Google Maps API key for geocoding |
| `grabMapsApiKey` | GrabMaps API key for Southeast Asia geocoding |
| `awsAccessKeyId` | AWS Access Key ID for AWS Location Service |
| `awsSecretAccessKey` | AWS Secret Access Key |
| `awsRegion` | AWS Region (default: ap-southeast-5) |

> **⚠️ Important: GrabMaps Requirements**
> 
> To use GrabMaps geocoding, you need **ALL FOUR** parameters:
> - `grabMapsApiKey`
> - `awsAccessKeyId`
> - `awsSecretAccessKey`
> - `awsRegion`
> 
> GrabMaps uses AWS Location Service under the hood, so AWS credentials are required alongside the GrabMaps API key.

### Client Configuration

For Claude Desktop / Cursor / Windsurf, add to your MCP configuration:

```json
{
  "mcpServers": {
    "malaysia-opendata": {
      "transport": "streamable-http",
      "url": "https://mcp.techmavie.digital/datagovmy/mcp"
    }
  }
}
```

With your own API key:
```json
{
  "mcpServers": {
    "malaysia-opendata": {
      "transport": "streamable-http",
      "url": "https://mcp.techmavie.digital/datagovmy/mcp?googleMapsApiKey=YOUR_KEY"
    }
  }
}
```

## Self-Hosted (VPS)

If you prefer to run your own instance, see [deploy/DEPLOYMENT.md](deploy/DEPLOYMENT.md) for detailed VPS deployment instructions with Docker and Nginx.

## Analytics Dashboard

The hosted server includes a built-in analytics dashboard:

**Dashboard URL:** [`https://mcp.techmavie.digital/datagovmy/analytics/dashboard`](https://mcp.techmavie.digital/datagovmy/analytics/dashboard)

### Analytics Endpoints

| Endpoint | Description |
|----------|-------------|
| `/analytics` | Full analytics summary (JSON) |
| `/analytics/tools` | Detailed tool usage stats (JSON) |
| `/analytics/dashboard` | Visual dashboard with charts (HTML) |

The dashboard tracks:
- Total requests and tool calls
- Tool usage distribution
- Hourly request trends (last 24 hours)
- Requests by endpoint
- Top clients by user agent
- Recent tool calls feed

Auto-refreshes every 30 seconds.

## Available Tools

### Data Catalogue

- `list_datasets`: Lists available datasets in the Data Catalogue
- `get_dataset`: Gets data from a specific dataset in the Data Catalogue
- `search_datasets`: Searches for datasets in the Data Catalogue

### Department of Statistics Malaysia (DOSM)

- `list_dosm_datasets`: Lists available datasets from DOSM
- `get_dosm_dataset`: Gets data from a specific DOSM dataset

### Parquet File Handling

- `parse_parquet_file`: Parse and display data from a Parquet file URL
  - Supports up to 500 rows for comprehensive data analysis
  - Automatically handles empty date objects with appropriate formatting
  - Processes BigInt values for proper JSON serialization
- `get_parquet_info`: Get metadata and structure information about a Parquet file
- `find_dashboard_for_parquet`: Find the corresponding dashboard URL for a Parquet file

### Weather

- `get_weather_forecast`: Gets weather forecast for Malaysia
- `get_weather_warnings`: Gets current weather warnings for Malaysia
- `get_earthquake_warnings`: Gets earthquake warnings for Malaysia

### Transport

- `list_transport_agencies`: Lists available transport agencies with GTFS data
- `get_transport_data`: Gets GTFS data for a specific transport agency

### GTFS Parsing

- `parse_gtfs_static`: Parses GTFS Static data (ZIP files with CSV data) for a specific transport provider
- `parse_gtfs_realtime`: Parses GTFS Realtime data (Protocol Buffer format) for vehicle positions
- `get_transit_routes`: Extracts route information from GTFS data
- `get_transit_stops`: Extracts stop information from GTFS data, optionally filtered by route

### Test

- `hello`: A simple test tool to verify that the MCP server is working correctly

## Data-Catalogue Information Retrieval

The MCP server provides robust handling for data-catalogue information retrieval:

### Date Handling in Parquet Files

- **Empty Date Objects**: The system automatically detects and handles empty date objects in parquet files
- **Dataset-Specific Handling**: Special handling for known datasets like `employment_sector` with annual data from 2001-2022
- **Pattern Recognition**: Detects date patterns in existing data to maintain consistent formatting
- **Increased Row Limits**: Supports up to 500 rows (increased from 100) for more comprehensive data analysis

### BigInt Processing

- **Automatic Serialization**: BigInt values are automatically converted to strings for proper JSON serialization
- **Type Preservation**: Original types are preserved in the schema information

### Schema Detection

- **Automatic Type Inference**: Detects column types including special handling for date fields
- **Consistent Representation**: Ensures date fields are consistently represented as strings

## Usage Examples

### Get Weather Forecast

```javascript
const result = await tools.get_weather_forecast({
  location: "Kuala Lumpur",
  days: 3
});
```

### Search Datasets

```javascript
const result = await tools.search_datasets({
  query: "population",
  limit: 5
});
```

### Parse GTFS Data

```javascript
// Parse GTFS Static data
const staticData = await tools.parse_gtfs_static({
  provider: "ktmb"
});

// Get real-time vehicle positions
const realtimeData = await tools.parse_gtfs_realtime({
  provider: "prasarana",
  category: "rapid-rail-kl"
});

// Get transit routes
const routes = await tools.get_transit_routes({
  provider: "mybas-johor"
});

// Get stops for a specific route
const stops = await tools.get_transit_stops({
  provider: "prasarana",
  category: "rapid-rail-kl",
  route_id: "LRT-KJ"
});
```

## API Rate Limits

Please be aware of rate limits for the underlying APIs. Excessive requests may be throttled.

## Project Structure

- `src/index.ts`: Main MCP server implementation and tool registration
- `src/http-server.ts`: Streamable HTTP server for VPS deployment
- `src/datacatalogue.tools.ts`: Data Catalogue API tools
- `src/dashboards.tools.ts`: Dashboard access and search tools
- `src/dosm.tools.ts`: Department of Statistics Malaysia tools
- `src/unified-search.tools.ts`: Enhanced unified search with tokenization and synonym expansion
- `src/parquet.tools.ts`: Parquet file parsing and metadata tools
- `src/weather.tools.ts`: Weather forecast and warnings tools
- `src/transport.tools.ts`: Transport and GTFS data tools
- `src/gtfs.tools.ts`: GTFS parsing and analysis tools
- `src/flood.tools.ts`: Flood warning and monitoring tools
- `Dockerfile`: Docker configuration for VPS deployment
- `docker-compose.yml`: Docker Compose configuration
- `deploy/`: Deployment files (nginx config, deployment guide)
- `package.json`: Project dependencies and scripts
- `tsconfig.json`: TypeScript configuration

## Local Development

```bash
# Install dependencies
npm install

# Run HTTP server in development mode
npm run dev:http

# Or build and run production version
npm run build
npm run start:http

# Test health endpoint
curl http://localhost:8080/health

# Test MCP endpoint
curl -X POST http://localhost:8080/mcp \
  -H "Content-Type: application/json" \
  -d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}'
```

## Troubleshooting

### Container Issues

```bash
# Check container status
docker compose ps

# View logs
docker compose logs -f

# Restart container
docker compose restart
```

### Test MCP Connection

```bash
# List tools
curl -X POST https://mcp.techmavie.digital/datagovmy/mcp \
  -H "Content-Type: application/json" \
  -d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}'

# Call hello tool
curl -X POST https://mcp.techmavie.digital/datagovmy/mcp \
  -H "Content-Type: application/json" \
  -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"my_hello","arguments":{}}}'
```

## Configuration

### Environment Variables

This project supports the following configuration options:

**Geocoding Credentials (Optional. Only for GTFS Transit Features Usage)**:

The following credentials are **only needed if you plan to use the GTFS transit tools** that require geocoding services. Other features like data catalogue access, weather forecasts, and DOSM data do not require these credentials.

- **googleMapsApiKey**: Optional. If provided, the system will use Google Maps API for geocoding location names to coordinates.
- **grabMapsApiKey**: Optional. Required for GrabMaps geocoding, which is optimized for locations in Malaysia.
- **awsAccessKeyId**: Required for GrabMaps integration. AWS access key for GrabMaps API authentication.
- **awsSecretAccessKey**: Required for GrabMaps integration. AWS secret key for GrabMaps API authentication.
- **awsRegion**: Required for GrabMaps integration. AWS region for GrabMaps API (e.g. 'ap-southeast-5' for Malaysia region or ap-southeast-1 for Singapore region).

If neither Google Maps nor GrabMaps API keys are provided, the GTFS transit tools will automatically fall back to using Nominatim (OpenStreetMap) API for geocoding, which is free and doesn't require credentials.

You can set these configuration options in two ways:

1. **Via URL query parameters** when connecting to the hosted server (see Quick Start section)
2. **As environment variables** for local development or self-hosted deployment

#### Setting up environment variables

Create a `.env` file in the root directory:

```env
GOOGLE_MAPS_API_KEY=your_google_api_key_here
GRABMAPS_API_KEY=your_grab_api_key_here
AWS_ACCESS_KEY_ID=your_aws_access_key_for_grabmaps
AWS_SECRET_ACCESS_KEY=your_aws_secret_key_for_grabmaps
AWS_REGION=ap-southeast-5
```

The variables will be automatically loaded when you run the server.

**Note:** For Malaysian locations, GrabMaps provides the most accurate geocoding results, followed by Google Maps. If you don't provide either API key, the system will automatically use Nominatim API instead, which is free but may have less accurate results for some locations in Malaysia.

**Important:** These geocoding credentials are only required for the following GTFS transit tools:
- `get_transit_routes` - When converting location names to coordinates
- `get_transit_stops` - When converting location names to coordinates
- `parse_gtfs_static` - When geocoding is needed for stop locations

**Note about GTFS Realtime Tools:** The `parse_gtfs_realtime` tool is currently in development and has limited availability. Real-time data access through this MCP is experimental and may not be available for all providers or routes. For up-to-date train and bus schedules, bus locations, and arrivals in real-time, please use official transit apps like Google Maps, MyRapid PULSE, Moovit, or Lugo.

All other tools like data catalogue access, dashboard search, weather forecasts, and DOSM data do not require any geocoding credentials.

## License

MIT - See [LICENSE](./LICENSE) file for details.

## Acknowledgments

- [Malaysia Open Data Portal](https://data.gov.my/)
- [Department of Statistics Malaysia](https://open.dosm.gov.my/)
- [Malaysian Meteorological Department](https://www.met.gov.my/)
- [Google Maps Platform](https://developers.google.com/maps) for geocoding
- [GrabMaps](https://grabmaps.grab.com/solutions/service-apis) for geocoding
- [Nominatim](https://nominatim.org/) for geocoding
- [Model Context Protocol](https://modelcontextprotocol.io/) for the MCP framework

```

--------------------------------------------------------------------------------
/response.txt:
--------------------------------------------------------------------------------

```

```

--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------

```yaml
runtime: typescript

```

--------------------------------------------------------------------------------
/scripts/dashboards-index.d.ts:
--------------------------------------------------------------------------------

```typescript
export declare const DASHBOARDS_INDEX: any[];

```

--------------------------------------------------------------------------------
/src/types.d.ts:
--------------------------------------------------------------------------------

```typescript
declare module './datacatalogue.tools';
declare module './dosm.tools';
declare module './weather.tools';
declare module './transport.tools';

```

--------------------------------------------------------------------------------
/src/tools/test.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Test Tool
 * 
 * A simple test tool to verify that the MCP server is working correctly.
 */

/**
 * Returns a simple hello message
 * @returns {Promise<Object>} - Hello message
 */
async function hello() {
  return {
    message: 'Hello from Malaysia Open Data MCP!',
    timestamp: new Date().toISOString()
  };
}

module.exports = {
  hello
};

```

--------------------------------------------------------------------------------
/src/utils/tool-naming.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Helper functions for consistent tool naming
 */

/**
 * Adds the "datagovmy_" prefix to a tool name
 * @param toolName The original tool name
 * @returns The prefixed tool name
 */
export function prefixToolName(toolName: string): string {
  // Don't add prefix if it already exists
  if (toolName.startsWith('datagovmy_')) {
    return toolName;
  }
  return `datagovmy_${toolName}`;
}

```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
{
  "compilerOptions": {
    "target": "ES2022",
    "module": "NodeNext",
    "moduleResolution": "NodeNext",
    "esModuleInterop": true,
    "strict": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true,
    "outDir": "dist",
    "declaration": true,
    "resolveJsonModule": true,
    "allowSyntheticDefaultImports": true,
    "baseUrl": "."
  },
  "include": [
    "src/**/*",
    "scripts/**/*.d.ts"
  ],
  "exclude": [
    "node_modules",
    "dist"
  ]
}

```

--------------------------------------------------------------------------------
/scripts/deploy.js:
--------------------------------------------------------------------------------

```javascript
#!/usr/bin/env node

const { execSync } = require('child_process');
const path = require('path');

console.log('🚀 Deploying Malaysia Open Data MCP to Smithery...');

try {
  // Build the project first
  console.log('📦 Building the project...');
  execSync('cmd.exe /c npm run build', { stdio: 'inherit', cwd: path.join(__dirname, '..') });

  // Deploy to Smithery
  console.log('🚀 Deploying to Smithery...');
  execSync('cmd.exe /c npx @smithery/cli deploy', { stdio: 'inherit', cwd: path.join(__dirname, '..') });

  console.log('✅ Deployment completed successfully!');
} catch (error) {
  console.error('❌ Deployment failed:', error.message);
  process.exit(1);
}

```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
# Malaysia Open Data MCP Server - Streamable HTTP
# For self-hosting on VPS with nginx reverse proxy

FROM node:22-slim

# Set the working directory
WORKDIR /app

# Copy package files
COPY package*.json ./

# Install ALL dependencies (including devDependencies for build)
RUN npm ci

# Copy source code and configuration
COPY tsconfig.json ./
COPY src ./src
COPY scripts ./scripts

# Build TypeScript code
RUN npm run build

# Copy scripts to dist (needed for runtime imports)
RUN cp -r scripts dist/

# Remove devDependencies after build
RUN npm prune --production

# Expose port for HTTP server
EXPOSE 8080

# Environment variables (can be overridden at runtime)
ENV PORT=8080
ENV HOST=0.0.0.0

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
  CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1

# Start the HTTP server
CMD ["node", "dist/src/http-server.js"]

```

--------------------------------------------------------------------------------
/scripts/catalogue-index.d.ts:
--------------------------------------------------------------------------------

```typescript
// Generated by scripts/index-catalogue-files.js on 2025-07-27T03:58:23.566Z

declare module '../scripts/catalogue-index.js' {
  export interface SiteCategory {
    site: string;
    category_en: string;
    category_ms: string;
    category_sort: number;
    subcategory_en: string;
    subcategory_ms: string;
    subcategory_sort: number;
  }

  export interface DatasetMetadata {
    id: string;
    title_en: string;
    title_ms: string;
    description_en: string;
    description_ms: string;
    frequency: string;
    geography: string[];
    demography: string[];
    dataset_begin: number | null;
    dataset_end: number | null;
    data_source: string[];
    data_as_of: string;
    last_updated: string;
    next_update: string;
    link_parquet: string;
    link_csv: string;
    link_preview: string;
    site_category: SiteCategory[];
  }

  export interface CatalogueFilters {
    categories: string[];
    geographies: string[];
    frequencies: string[];
    demographies: string[];
    dataSources: string[];
  }

  export const CATALOGUE_INDEX: DatasetMetadata[];
  export const CATALOGUE_FILTERS: CatalogueFilters;
}
```

--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------

```yaml
# Malaysia Open Data MCP Server - Docker Compose
# For self-hosting on VPS at mcp.techmavie.digital/datagovmy

services:
  mcp-datagovmy:
    build: .
    container_name: mcp-datagovmy
    restart: unless-stopped
    ports:
      - "8083:8080"
    environment:
      - PORT=8080
      - HOST=0.0.0.0
      - NODE_ENV=production
      - ANALYTICS_DIR=/app/data
      # Optional API keys - uncomment and set if needed
      # - GOOGLE_MAPS_API_KEY=${GOOGLE_MAPS_API_KEY}
      # - GRABMAPS_API_KEY=${GRABMAPS_API_KEY}
      # - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
      # - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
      # - AWS_REGION=${AWS_REGION}
    env_file:
      - .env
    volumes:
      - analytics-data:/app/data
      - ./.credentials:/app/.credentials:ro
    healthcheck:
      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 10s
    networks:
      - mcp-network
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

networks:
  mcp-network:
    driver: bridge

volumes:
  analytics-data:
    driver: local
```

--------------------------------------------------------------------------------
/src/api/dosm.js:
--------------------------------------------------------------------------------

```javascript
/**
 * OpenDOSM API Client
 * 
 * Handles communication with the OpenDOSM API endpoint
 */

const { createClient } = require('./client');

/**
 * Creates an OpenDOSM API client
 * 
 * @param {Object} options - Client configuration options
 * @returns {Object} - OpenDOSM API client
 */
function createDosmClient(options = {}) {
  const client = createClient(options);
  const ENDPOINT = '/opendosm';
  
  return {
    /**
     * Gets data from a specific OpenDOSM dataset
     * 
     * @param {string} datasetId - Dataset ID
     * @param {Object} params - Additional query parameters
     * @returns {Promise<Object>} - Dataset data
     */
    async getDataset(datasetId, params = {}) {
      return client.request(ENDPOINT, {
        id: datasetId,
        ...params
      });
    },
    
    /**
     * Lists available OpenDOSM datasets
     * 
     * @param {Object} params - List parameters
     * @returns {Promise<Object>} - List of datasets
     */
    async listDatasets(params = {}) {
      // For listing datasets, we'll need to use the meta parameter
      // to get information about available datasets
      return client.request(ENDPOINT, {
        meta: true,
        ...params
      });
    }
  };
}

module.exports = {
  createDosmClient
};

```

--------------------------------------------------------------------------------
/scripts/build.js:
--------------------------------------------------------------------------------

```javascript
#!/usr/bin/env node

const { execSync } = require('child_process');
const fs = require('fs');
const path = require('path');

// Ensure scripts directory exists
const scriptsDir = path.join(__dirname);
if (!fs.existsSync(scriptsDir)) {
  fs.mkdirSync(scriptsDir, { recursive: true });
}

console.log('🚀 Building Malaysia Open Data MCP...');

try {
  // Clean previous build
  console.log('📦 Cleaning previous build...');
  if (fs.existsSync(path.join(__dirname, '..', 'dist'))) {
    if (process.platform === 'win32') {
      execSync('cmd.exe /c rmdir /s /q dist', { stdio: 'inherit', cwd: path.join(__dirname, '..') });
    } else {
      execSync('rm -rf dist', { stdio: 'inherit', cwd: path.join(__dirname, '..') });
    }
  }

  // Run TypeScript compiler
  console.log('📦 Compiling TypeScript...');
  execSync('npx tsc', { stdio: 'inherit', cwd: path.join(__dirname, '..') });

  // Copy smithery.yaml to dist
  console.log('📦 Copying configuration files...');
  fs.copyFileSync(
    path.join(__dirname, '..', 'smithery.yaml'),
    path.join(__dirname, '..', 'dist', 'smithery.yaml')
  );

  console.log('✅ Build completed successfully!');
  console.log('\nTo start the development server:');
  console.log('  npx @smithery/cli dev');
  console.log('\nTo deploy to Smithery:');
  console.log('  npx @smithery/cli deploy');
} catch (error) {
  console.error('❌ Build failed:', error.message);
  process.exit(1);
}

```

--------------------------------------------------------------------------------
/src/api/weather.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Weather API Client
 * 
 * Handles communication with the Weather API endpoints
 */

const { createClient } = require('./client');

/**
 * Creates a Weather API client
 * 
 * @param {Object} options - Client configuration options
 * @returns {Object} - Weather API client
 */
function createWeatherClient(options = {}) {
  const client = createClient(options);
  
  // Weather API endpoints
  const FORECAST_ENDPOINT = '/weather/forecast';
  const WARNING_ENDPOINT = '/weather/warning';
  const EARTHQUAKE_WARNING_ENDPOINT = '/weather/warning/earthquake';
  
  return {
    /**
     * Gets 7-day general forecast data
     * 
     * @param {Object} params - Query parameters
     * @returns {Promise<Object>} - Forecast data
     */
    async getForecast(params = {}) {
      return client.request(FORECAST_ENDPOINT, params);
    },
    
    /**
     * Gets weather warning data
     * 
     * @param {Object} params - Query parameters
     * @returns {Promise<Object>} - Warning data
     */
    async getWarnings(params = {}) {
      return client.request(WARNING_ENDPOINT, params);
    },
    
    /**
     * Gets earthquake warning data
     * 
     * @param {Object} params - Query parameters
     * @returns {Promise<Object>} - Earthquake warning data
     */
    async getEarthquakeWarnings(params = {}) {
      return client.request(EARTHQUAKE_WARNING_ENDPOINT, params);
    }
  };
}

module.exports = {
  createWeatherClient
};

```

--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Malaysia Open Data MCP - Main Entry Point
 * 
 * This file serves as the main entry point for the Malaysia Open Data MCP server.
 * It exports all the tools that will be available through the MCP.
 */

// Import tools
const catalogueTools = require('./tools/catalogue');
const dosmTools = require('./tools/dosm');
const weatherTools = require('./tools/weather');
const transportTools = require('./tools/transport');
const testTools = require('./tools/test');

// Define the server function that Smithery expects
function server({ sessionId, config }) {
  // Define all the tools
  const tools = {
  // Data Catalogue Tools
  list_datasets: catalogueTools.listDatasets,
  get_dataset: catalogueTools.getDataset,
  search_datasets: catalogueTools.searchDatasets,
  
  // OpenDOSM Tools
  list_dosm_datasets: dosmTools.listDatasets,
  get_dosm_dataset: dosmTools.getDataset,
  
  // Weather Tools
  get_weather_forecast: weatherTools.getForecast,
  get_weather_warnings: weatherTools.getWarnings,
  get_earthquake_warnings: weatherTools.getEarthquakeWarnings,
  
  // Transport Tools
  list_transport_agencies: transportTools.listAgencies,
  get_transport_data: transportTools.getData,
  
  // Test Tools
  hello: testTools.hello,
  };
  
  // Return an object with a connect method that returns the tools
  return {
    connect: () => tools
  };
}

// Export the server function for CommonJS
module.exports = server;
module.exports.default = server;

```

--------------------------------------------------------------------------------
/.github/workflows/deploy-vps.yml:
--------------------------------------------------------------------------------

```yaml
name: Deploy to VPS

on:
  push:
    branches:
      - main

jobs:
  deploy:
    runs-on: ubuntu-latest
    
    steps:
      - name: Deploy to VPS via SSH
        uses: appleboy/[email protected]
        with:
          host: ${{ secrets.VPS_HOST }}
          username: ${{ secrets.VPS_USERNAME }}
          key: ${{ secrets.VPS_SSH_KEY }}
          port: ${{ secrets.VPS_PORT }}
          script: |
            cd /opt/mcp-servers/datagovmy
            git pull origin main
            
            # Create credentials directory if it doesn't exist
            mkdir -p .credentials
            
            # Copy shared Firebase credentials from parent directory
            if [ ! -f .credentials/firebase-service-account.json ]; then
              cp ../.credentials/firebase-service-account.json .credentials/
              echo "✅ Firebase credentials copied from parent directory"
            else
              echo "✅ Firebase credentials already exist"
            fi
            
            # Rebuild and restart with no cache
            docker compose down
            docker compose build --no-cache
            docker compose up -d
            
            # Wait for container to start
            sleep 5
            
            # Check if container is running
            docker compose ps
            
            # Show recent logs
            docker compose logs --tail 30
            
            echo "🚀 Deployment completed at $(date)"
```

--------------------------------------------------------------------------------
/src/index.cjs:
--------------------------------------------------------------------------------

```
/**
 * Malaysia Open Data MCP - CommonJS Entry Point
 * 
 * This file serves as the CommonJS entry point for the Malaysia Open Data MCP server.
 * It exports all the tools that will be available through the MCP.
 */

// Import tools
const catalogueTools = require('./tools/catalogue');
const dosmTools = require('./tools/dosm');
const weatherTools = require('./tools/weather');
const transportTools = require('./tools/transport');
const testTools = require('./tools/test');

// Define the server function that Smithery expects
function server({ sessionId, config }) {
  // Define all the tools
  const tools = {
    // Data Catalogue Tools
    list_datasets: catalogueTools.listDatasets,
    get_dataset: catalogueTools.getDataset,
    search_datasets: catalogueTools.searchDatasets,
    
    // OpenDOSM Tools
    list_dosm_datasets: dosmTools.listDatasets,
    get_dosm_dataset: dosmTools.getDataset,
    
    // Weather Tools
    get_weather_forecast: weatherTools.getForecast,
    get_weather_warnings: weatherTools.getWarnings,
    get_earthquake_warnings: weatherTools.getEarthquakeWarnings,
    
    // Transport Tools
    list_transport_agencies: transportTools.listAgencies,
    get_transport_data: transportTools.getData,
    
    // Test Tools
    hello: testTools.hello,
  };
  
  // Return an object with a connect method that returns the tools
  return {
    connect: () => tools
  };
}

// Export the server function
module.exports = server;
module.exports.default = server;

```

--------------------------------------------------------------------------------
/PROMPT.md:
--------------------------------------------------------------------------------

```markdown
# Malaysia Open Data MCP Server - AI Prompt Guide

When using the Malaysia Open Data MCP server, follow these guidelines to ensure optimal results:

## Primary Search Tool

**ALWAYS use the `search_all` tool first** when searching for any data, statistics, or visualizations related to Malaysia's open data. This tool provides unified search across both datasets and dashboards, with intelligent fallback to ensure comprehensive results.

Example:
```
search_all
{
  "query": "e-payment statistics"
}
```

Only use specific dataset or dashboard search tools if you need to explicitly limit your search to one type of content.

## Data Access Pattern

1. Start with `search_all` to find relevant resources
2. For detailed dataset information, use `get_dataset_details` with the dataset ID
3. For dashboard information, use `get_dashboard_by_name` with the dashboard name
4. For dashboard charts, use `get_dashboard_charts` with the dashboard name

## URL References

When referring to resources in responses:
- Use `https://data.gov.my/...` for general data portal resources
- Use `https://open.dosm.gov.my/...` for OpenDOSM resources

## Data Format Limitations

- Dashboard data is visualized on the web interface. Raw data files (e.g., parquet) cannot be directly accessed through this API.
- Dataset metadata is available through this API. For downloading the actual data files, users should visit the dataset page on the data portal.

## Hybrid Architecture

This MCP server uses a hybrid approach:
- Pre-generated static indexes for efficient listing and searching
- Dynamic API calls only when specific dataset or dashboard details are requested

This ensures fast responses while maintaining up-to-date information.

```

--------------------------------------------------------------------------------
/src/api/catalogue.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Data Catalogue API Client
 * 
 * Handles communication with the Data Catalogue API endpoint
 */

const { createClient } = require('./client');

/**
 * Creates a Data Catalogue API client
 * 
 * @param {Object} options - Client configuration options
 * @returns {Object} - Data Catalogue API client
 */
function createCatalogueClient(options = {}) {
  const client = createClient(options);
  const ENDPOINT = '/data-catalogue';
  
  return {
    /**
     * Gets data from a specific dataset
     * 
     * @param {string} datasetId - Dataset ID
     * @param {Object} params - Additional query parameters
     * @returns {Promise<Object>} - Dataset data
     */
    async getDataset(datasetId, params = {}) {
      return client.request(ENDPOINT, {
        id: datasetId,
        ...params
      });
    },
    
    /**
     * Searches across datasets
     * 
     * @param {Object} params - Search parameters
     * @returns {Promise<Object>} - Search results
     */
    async searchDatasets(params = {}) {
      return client.request(ENDPOINT, {
        meta: true,
        ...params
      });
    },
    
    /**
     * Lists available datasets
     * 
     * @param {Object} params - List parameters
     * @returns {Promise<Object>} - List of datasets
     */
    async listDatasets(params = {}) {
      // For listing datasets, we'll need to use the meta parameter
      // to get information about available datasets
      return client.request(ENDPOINT, {
        meta: true,
        ...params
      });
    }
  };
}

// Export both as a named export and as the default export for maximum compatibility
module.exports = createCatalogueClient;
module.exports.createCatalogueClient = createCatalogueClient;
module.exports.default = createCatalogueClient;

```

--------------------------------------------------------------------------------
/src/tools/dosm.js:
--------------------------------------------------------------------------------

```javascript
/**
 * OpenDOSM MCP Tools
 * 
 * Tools for accessing and searching the OpenDOSM data
 */

const { createDosmClient } = require('../api/dosm');

// Create client instance with default configuration
const dosmClient = createDosmClient();

/**
 * Lists available datasets in the OpenDOSM data catalogue
 * 
 * @param {Object} params - Optional parameters
 * @param {number} params.limit - Maximum number of datasets to return
 * @returns {Promise<Object>} - List of datasets
 */
async function listDatasets(params = {}) {
  try {
    const result = await dosmClient.listDatasets(params);
    
    return {
      success: true,
      message: 'Successfully retrieved OpenDOSM datasets',
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to list OpenDOSM datasets: ${error.message}`,
      error: error.message
    };
  }
}

/**
 * Gets data from a specific OpenDOSM dataset
 * 
 * @param {Object} params - Parameters
 * @param {string} params.id - Dataset ID
 * @param {Object} params.filter - Optional filter parameters
 * @param {Object} params.sort - Optional sort parameters
 * @param {number} params.limit - Optional limit parameter
 * @returns {Promise<Object>} - Dataset data
 */
async function getDataset(params = {}) {
  try {
    if (!params.id) {
      throw new Error('Dataset ID is required');
    }
    
    const { id, ...queryParams } = params;
    const result = await dosmClient.getDataset(id, queryParams);
    
    return {
      success: true,
      message: `Successfully retrieved OpenDOSM dataset: ${id}`,
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to get OpenDOSM dataset: ${error.message}`,
      error: error.message
    };
  }
}

module.exports = {
  listDatasets,
  getDataset
};

```

--------------------------------------------------------------------------------
/scripts/index-dashboards.cjs:
--------------------------------------------------------------------------------

```
/**
 * Script to generate a dashboard index file from individual dashboard JSON files
 */
const fs = require('fs');
const path = require('path');

// Path to dashboards directory
const dashboardsDir = path.join(process.cwd(), 'dashboards');

// Path to output file
const outputFile = path.join(process.cwd(), 'scripts', 'dashboards-index.js');

// Read all dashboard files
try {
  console.log('Reading dashboard files from:', dashboardsDir);
  const files = fs.readdirSync(dashboardsDir).filter(file => file.endsWith('.json'));
  console.log(`Found ${files.length} dashboard files`);
  
  // Read and parse each file
  const dashboards = files.map(file => {
    try {
      const content = fs.readFileSync(path.join(dashboardsDir, file), 'utf8');
      return JSON.parse(content);
    } catch (error) {
      console.error(`Error reading dashboard file ${file}:`, error);
      return null;
    }
  }).filter(Boolean);
  
  // Generate the output file content
  const timestamp = new Date().toISOString();
  const outputContent = `// Generated from local dashboard files
// Timestamp: ${timestamp}
// Total dashboards: ${dashboards.length}

export const DASHBOARDS_INDEX = ${JSON.stringify(dashboards, null, 2)};
`;

  // Write the output file
  fs.writeFileSync(outputFile, outputContent, 'utf8');
  console.log(`Dashboard index generated successfully: ${outputFile}`);
  
  // Create TypeScript declaration file
  const declarationFile = path.join(process.cwd(), 'scripts', 'dashboards-index.d.ts');
  const declarationContent = `export declare const DASHBOARDS_INDEX: any[];
`;
  fs.writeFileSync(declarationFile, declarationContent, 'utf8');
  console.log(`TypeScript declaration file generated: ${declarationFile}`);
  
} catch (error) {
  console.error('Error generating dashboard index:', error);
  process.exit(1);
}

```

--------------------------------------------------------------------------------
/src/tools/transport.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Transport MCP Tools
 * 
 * Tools for accessing public transportation data from the Malaysia Open Data API
 */

const { createTransportClient } = require('../api/transport');

// Create client instance with default configuration
const transportClient = createTransportClient();

/**
 * Lists available transport agencies
 * 
 * @returns {Promise<Object>} - List of available agencies
 */
async function listAgencies() {
  try {
    const result = await transportClient.listAgencies();
    
    return {
      success: true,
      message: 'Successfully retrieved transport agencies',
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to list transport agencies: ${error.message}`,
      error: error.message
    };
  }
}

/**
 * Gets GTFS data for a specific agency
 * 
 * @param {Object} params - Parameters
 * @param {string} params.agencyId - Agency ID (e.g., 'mybas-jb', 'ktmb', 'prasarana')
 * @param {string} params.dataType - Data type ('static' or 'realtime')
 * @param {Object} params.filter - Optional filter parameters
 * @param {number} params.limit - Optional limit parameter
 * @returns {Promise<Object>} - GTFS data
 */
async function getData(params = {}) {
  try {
    if (!params.agencyId) {
      throw new Error('Agency ID is required');
    }
    
    const { agencyId, dataType = 'static', filter, limit, ...otherParams } = params;
    
    const queryParams = {
      ...otherParams,
      ...(filter && { filter }),
      ...(limit && { limit })
    };
    
    const result = await transportClient.getData({
      agencyId,
      dataType,
      queryParams
    });
    
    return {
      success: true,
      message: `Successfully retrieved ${dataType} GTFS data for agency: ${agencyId}`,
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to get transport data: ${error.message}`,
      error: error.message
    };
  }
}

module.exports = {
  listAgencies,
  getData
};

```

--------------------------------------------------------------------------------
/deploy/nginx-mcp.conf:
--------------------------------------------------------------------------------

```
# Nginx location block for Malaysia Open Data MCP Server
# Add this to your existing mcp.techmavie.digital server block
#
# Endpoint: https://mcp.techmavie.digital/datagovmy/mcp

# ============================================================================
# Malaysia Open Data MCP Server
# Endpoint: https://mcp.techmavie.digital/datagovmy/mcp
# ============================================================================
location /datagovmy/ {
    # Proxy to Malaysia Open Data MCP container (port 8083)
    proxy_pass http://127.0.0.1:8083/;
    
    # Required headers for Streamable HTTP transport
    proxy_http_version 1.1;
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header X-Forwarded-Proto $scheme;
    
    # Important for streaming responses
    proxy_buffering off;
    proxy_cache off;
    proxy_read_timeout 3600s;
    proxy_send_timeout 3600s;
    
    # For potential WebSocket upgrade (future compatibility)
    proxy_set_header Upgrade $http_upgrade;
    proxy_set_header Connection "upgrade";
    
    # MCP Session ID header
    proxy_set_header Mcp-Session-Id $http_mcp_session_id;
    proxy_pass_header Mcp-Session-Id;
    
    # CORS headers (if not handled by the app)
    add_header Access-Control-Allow-Origin "*" always;
    add_header Access-Control-Allow-Methods "GET, POST, DELETE, OPTIONS" always;
    add_header Access-Control-Allow-Headers "Content-Type, Accept, Authorization, Mcp-Session-Id" always;
    add_header Access-Control-Expose-Headers "Mcp-Session-Id" always;
    
    # Handle preflight requests
    if ($request_method = 'OPTIONS') {
        add_header Access-Control-Allow-Origin "*";
        add_header Access-Control-Allow-Methods "GET, POST, DELETE, OPTIONS";
        add_header Access-Control-Allow-Headers "Content-Type, Accept, Authorization, Mcp-Session-Id";
        add_header Access-Control-Max-Age 86400;
        add_header Content-Length 0;
        add_header Content-Type text/plain;
        return 204;
    }
}

```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
{
  "name": "mcp-datagovmy",
  "version": "1.0.0",
  "description": "Malaysia Open Data MCP Server for Smithery",
  "main": "./src/index.ts",
  "module": "./src/index.ts",
  "type": "module",
  "files": [
    "dist/**/*",
    "README.md",
    "LICENSE",
    "TOOLS.md",
    "PROMPT.md"
  ],
  "scripts": {
    "start": "node index.js",
    "start:http": "node dist/src/http-server.js",
    "build": "tsc",
    "prepublishOnly": "npm run build",
    "dev": "npx @smithery/[email protected] dev",
    "dev:http": "tsx src/http-server.ts",
    "deploy": "npm run build && npx @smithery/[email protected] deploy",
    "test": "echo \"Error: no test specified\" && exit 1",
    "lint": "eslint src/**/*.ts",
    "lint:fix": "eslint src/**/*.ts --fix",
    "format": "prettier --write src/**/*.ts",
    "format:check": "prettier --check src/**/*.ts",
    "typecheck": "tsc --noEmit"
  },
  "keywords": [
    "mcp",
    "smithery",
    "malaysia",
    "open-data",
    "data-catalogue",
    "dosm",
    "weather",
    "transport"
  ],
  "author": "hithereiamaliff",
  "license": "MIT",
  "repository": {
    "type": "git",
    "url": "https://github.com/hithereiamaliff/mcp-datagovmy.git"
  },
  "dependencies": {
    "@aws-sdk/client-location": "^3.848.0",
    "@modelcontextprotocol/sdk": "^1.12.1",
    "axios": "^1.11.0",
    "cors": "^2.8.5",
    "csv-parser": "^3.0.0",
    "dotenv": "^17.2.1",
    "express": "^4.21.0",
    "firebase-admin": "^13.6.0",
    "gtfs-realtime-bindings": "^1.1.1",
    "hyparquet": "^1.17.1",
    "hyparquet-compressors": "^1.1.1",
    "jszip": "^3.10.1",
    "p-limit": "^5.0.0",
    "zod": "^3.25.57"
  },
  "devDependencies": {
    "@smithery/cli": "^1.2.16",
    "@types/cors": "^2.8.17",
    "@types/express": "^4.17.21",
    "@types/node": "^22.15.31",
    "@typescript-eslint/eslint-plugin": "^8.34.0",
    "@typescript-eslint/parser": "^8.34.0",
    "eslint": "^9.28.0",
    "eslint-config-prettier": "^10.1.5",
    "eslint-plugin-prettier": "^5.4.1",
    "prettier": "^3.5.3",
    "tsx": "^4.7.0",
    "typescript": "^5.8.3"
  },
  "engines": {
    "node": ">=18.0.0"
  }
}

```

--------------------------------------------------------------------------------
/mcp-server.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Malaysia Open Data MCP - Universal Server Entry Point
 * 
 * This file is a self-contained, simplified entry point designed for robust deployment on Smithery.
 */

// All tool logic is included here to avoid pathing issues.
const catalogueTools = {
  listDatasets: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
  getDataset: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
  searchDatasets: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
};

const dosmTools = {
  listDatasets: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
  getDataset: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
};

const weatherTools = {
  getForecast: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
  getWarnings: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
  getEarthquakeWarnings: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
};

const transportTools = {
  listAgencies: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
  getData: async (params) => ({ message: 'Tool not fully implemented in this version', ...params }),
};

const testTools = {
  hello: async () => ({ message: 'Hello from the simplified MCP server!' }),
};

/**
 * Main server function that Smithery expects.
 */
function server({ sessionId, config }) {
  const tools = {
    // Data Catalogue Tools
    list_datasets: catalogueTools.listDatasets,
    get_dataset: catalogueTools.getDataset,
    search_datasets: catalogueTools.searchDatasets,
    
    // OpenDOSM Tools
    list_dosm_datasets: dosmTools.listDatasets,
    get_dosm_dataset: dosmTools.getDataset,
    
    // Weather Tools
    get_weather_forecast: weatherTools.getForecast,
    get_weather_warnings: weatherTools.getWarnings,
    get_earthquake_warnings: weatherTools.getEarthquakeWarnings,
    
    // Transport Tools
    list_transport_agencies: transportTools.listAgencies,
    get_transport_data: transportTools.getData,
    
    // Test Tools
    hello: testTools.hello,
  };
  
  return {
    connect: () => tools,
  };
}

// Export the server function for CommonJS compatibility.
module.exports = server;
module.exports.default = server;

```

--------------------------------------------------------------------------------
/src/tools/catalogue.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Data Catalogue MCP Tools
 * 
 * Tools for accessing and searching the Malaysia Open Data Catalogue
 */

// Import the createCatalogueClient function directly
const createCatalogueClient = require('../api/catalogue');

// Create client instance with default configuration
const catalogueClient = createCatalogueClient();

/**
 * Lists available datasets in the Data Catalogue
 * 
 * @param {Object} params - Optional parameters
 * @param {number} params.limit - Maximum number of datasets to return
 * @returns {Promise<Object>} - List of datasets
 */
async function listDatasets(params = {}) {
  try {
    const result = await catalogueClient.listDatasets(params);
    
    return {
      success: true,
      message: 'Successfully retrieved datasets',
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to list datasets: ${error.message}`,
      error: error.message
    };
  }
}

/**
 * Gets data from a specific dataset
 * 
 * @param {Object} params - Parameters
 * @param {string} params.id - Dataset ID
 * @param {Object} params.filter - Optional filter parameters
 * @param {Object} params.sort - Optional sort parameters
 * @param {number} params.limit - Optional limit parameter
 * @returns {Promise<Object>} - Dataset data
 */
async function getDataset(params = {}) {
  try {
    if (!params.id) {
      throw new Error('Dataset ID is required');
    }
    
    const { id, ...queryParams } = params;
    const result = await catalogueClient.getDataset(id, queryParams);
    
    return {
      success: true,
      message: `Successfully retrieved dataset: ${id}`,
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to get dataset: ${error.message}`,
      error: error.message
    };
  }
}

/**
 * Searches across datasets in the Data Catalogue
 * 
 * @param {Object} params - Search parameters
 * @param {string} params.query - Search query
 * @param {number} params.limit - Maximum number of results to return
 * @returns {Promise<Object>} - Search results
 */
async function searchDatasets(params = {}) {
  try {
    // For searching across datasets, we'll use the contains parameter
    // to search for the query in dataset metadata
    const result = await catalogueClient.searchDatasets({
      meta: true,
      ...(params.query && { contains: params.query }),
      ...(params.limit && { limit: params.limit })
    });
    
    return {
      success: true,
      message: 'Successfully searched datasets',
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to search datasets: ${error.message}`,
      error: error.message
    };
  }
}

module.exports = {
  listDatasets,
  getDataset,
  searchDatasets
};

```

--------------------------------------------------------------------------------
/src/dosm.tools.ts:
--------------------------------------------------------------------------------

```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import axios from 'axios';
import { prefixToolName } from './utils/tool-naming.js';

// API Base URL for Malaysia Open Data API
const API_BASE_URL = 'https://api.data.gov.my';

// OpenDOSM endpoint - correct endpoint for Malaysia Open Data API
const OPENDOSM_ENDPOINT = '/opendosm';

export function registerDosmTools(server: McpServer) {
  // List DOSM datasets
  server.tool(
    prefixToolName('list_dosm_datasets'),
    'Lists available datasets from the Department of Statistics Malaysia',
    {
      dataset_id: z.string().optional().describe('Optional specific dataset ID to list (e.g., "cpi_core", "cpi_strata")'),
      limit: z.number().min(1).optional().describe('Maximum number of datasets to return'),
      offset: z.number().min(0).optional().describe('Number of datasets to skip'),
    },
    async ({ dataset_id, limit = 10, offset = 0 }) => {
      try {
        // Use the correct endpoint structure
        const url = `${API_BASE_URL}${OPENDOSM_ENDPOINT}`;
        // If dataset_id is provided, get specific dataset, otherwise list available datasets
        const params: Record<string, any> = { limit, meta: true };
        
        if (dataset_id) {
          params.id = dataset_id;
        }

        const response = await axios.get(url, { params });
        const data = response.data;

        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(data, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: `Error fetching DOSM datasets: ${error instanceof Error ? error.message : 'Unknown error'}`,
            },
          ],
        };
      }
    }
  );

  // Get DOSM dataset
  server.tool(
    prefixToolName('get_dosm_dataset'),
    'Gets data from a specific DOSM dataset',
    {
      id: z.string().describe('ID of the dataset to retrieve (e.g., "cpi_core", "cpi_strata")'),
      limit: z.number().min(1).optional().describe('Maximum number of records to return'),
      offset: z.number().min(0).optional().describe('Number of records to skip'),
    },
    async ({ id, limit = 10, offset = 0 }) => {
      try {
        // Use the correct endpoint structure with dataset ID as query parameter
        const url = `${API_BASE_URL}${OPENDOSM_ENDPOINT}`;
        const params = { id, limit, offset };

        const response = await axios.get(url, { params });
        const data = response.data;

        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(data, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: `Error fetching DOSM dataset: ${error instanceof Error ? error.message : 'Unknown error'}`,
            },
          ],
        };
      }
    }
  );
}

```

--------------------------------------------------------------------------------
/src/api/transport.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Transport API Client
 * 
 * Handles communication with the GTFS Static and GTFS Realtime API endpoints
 */

const { createClient } = require('./client');

/**
 * Creates a Transport API client
 * 
 * @param {Object} options - Client configuration options
 * @returns {Object} - Transport API client
 */
function createTransportClient(options = {}) {
  const client = createClient(options);
  
  // Transport API endpoints
  const GTFS_STATIC_ENDPOINT = '/gtfs-static';
  const GTFS_REALTIME_ENDPOINT = '/gtfs-realtime';
  
  // Available transport agencies
  const AGENCIES = {
    MYBAS_JB: 'mybas-jb',
    KTMB: 'ktmb',
    PRASARANA: 'prasarana'
  };
  
  return {
    /**
     * Lists available transport agencies
     * 
     * @returns {Promise<Object>} - List of available agencies
     */
    async listAgencies() {
      return {
        agencies: [
          {
            id: AGENCIES.MYBAS_JB,
            name: 'myBAS Johor Bahru',
            description: 'Bus service operator in Johor Bahru',
            website: 'https://www.causewaylink.com.my/mybas/en/'
          },
          {
            id: AGENCIES.KTMB,
            name: 'KTMB (Keretapi Tanah Melayu Berhad)',
            description: 'Railway operator providing train services across Malaysia',
            website: 'https://www.ktmb.com.my/'
          },
          {
            id: AGENCIES.PRASARANA,
            name: 'Prasarana',
            description: 'Public transport operator responsible for LRT, MRT, monorail, and bus services',
            website: 'https://myrapid.com.my/'
          }
        ]
      };
    },
    
    /**
     * Gets GTFS Static data for a specific agency
     * 
     * @param {string} agencyId - Agency ID
     * @param {Object} params - Query parameters
     * @returns {Promise<Object>} - GTFS Static data
     */
    async getStaticData(agencyId, params = {}) {
      return client.request(`${GTFS_STATIC_ENDPOINT}/${agencyId}`, params);
    },
    
    /**
     * Gets GTFS Realtime data for a specific agency
     * 
     * @param {string} agencyId - Agency ID
     * @param {Object} params - Query parameters
     * @returns {Promise<Object>} - GTFS Realtime data
     */
    async getRealtimeData(agencyId, params = {}) {
      return client.request(`${GTFS_REALTIME_ENDPOINT}/${agencyId}`, params);
    },
    
    /**
     * Gets GTFS data (static or realtime) for a specific agency
     * 
     * @param {Object} params - Parameters
     * @param {string} params.agencyId - Agency ID
     * @param {string} params.dataType - Data type ('static' or 'realtime')
     * @param {Object} params.queryParams - Additional query parameters
     * @returns {Promise<Object>} - GTFS data
     */
    async getData({ agencyId, dataType = 'static', queryParams = {} }) {
      if (!agencyId) {
        throw new Error('Agency ID is required');
      }
      
      if (dataType === 'static') {
        return this.getStaticData(agencyId, queryParams);
      } else if (dataType === 'realtime') {
        return this.getRealtimeData(agencyId, queryParams);
      } else {
        throw new Error('Invalid data type. Must be "static" or "realtime"');
      }
    }
  };
}

module.exports = {
  createTransportClient
};

```

--------------------------------------------------------------------------------
/scripts/update-tool-names.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Script to update all tool names with the datagovmy_ prefix
 * 
 * This script will:
 * 1. Find all server.tool() calls in the src directory
 * 2. Update them to use prefixToolName() helper
 * 3. Create a report of all changes made
 */

import * as fs from 'fs';
import * as path from 'path';

// Define the directory to search
const srcDir = path.join(process.cwd(), 'src');

// Regular expression to find server.tool calls
const toolRegex = /server\.tool\(\s*['"]([^'"]+)['"]/g;

// Function to process a file
function processFile(filePath: string): { file: string, changes: { original: string, updated: string }[] } {
  // Read the file content
  let content = fs.readFileSync(filePath, 'utf8');
  const originalContent = content;
  
  // Find all tool registrations
  const changes: { original: string, updated: string }[] = [];
  let match;
  
  // Add the import if it doesn't exist
  if (!content.includes("import { prefixToolName }")) {
    // Find the last import statement
    const lastImportIndex = content.lastIndexOf('import');
    if (lastImportIndex !== -1) {
      const endOfImport = content.indexOf(';', lastImportIndex) + 1;
      const beforeImports = content.substring(0, endOfImport);
      const afterImports = content.substring(endOfImport);
      content = beforeImports + "\nimport { prefixToolName } from './utils/tool-naming.js';" + afterImports;
    }
  }
  
  // Replace all tool registrations
  while ((match = toolRegex.exec(originalContent)) !== null) {
    const toolName = match[1];
    const original = `server.tool(\n    '${toolName}'`;
    const updated = `server.tool(\n    prefixToolName('${toolName}')`;
    
    // Only update if the tool name doesn't already have the prefix
    if (!toolName.startsWith('datagovmy_')) {
      content = content.replace(
        `server.tool(\n    '${toolName}'`, 
        `server.tool(\n    prefixToolName('${toolName}')`
      );
      content = content.replace(
        `server.tool('${toolName}'`, 
        `server.tool(prefixToolName('${toolName}')`
      );
      changes.push({ original: toolName, updated: `datagovmy_${toolName}` });
    }
  }
  
  // Write the updated content back to the file
  if (originalContent !== content) {
    fs.writeFileSync(filePath, content, 'utf8');
  }
  
  return { file: filePath, changes };
}

// Function to recursively process all files in a directory
function processDirectory(dir: string): { file: string, changes: { original: string, updated: string }[] }[] {
  const results: { file: string, changes: { original: string, updated: string }[] }[] = [];
  const files = fs.readdirSync(dir);
  
  for (const file of files) {
    const filePath = path.join(dir, file);
    const stats = fs.statSync(filePath);
    
    if (stats.isDirectory()) {
      results.push(...processDirectory(filePath));
    } else if (stats.isFile() && (file.endsWith('.ts') || file.endsWith('.js'))) {
      const result = processFile(filePath);
      if (result.changes.length > 0) {
        results.push(result);
      }
    }
  }
  
  return results;
}

// Main function
function main() {
  console.log('Updating tool names with datagovmy_ prefix...');
  
  // Process all files
  const results = processDirectory(srcDir);
  
  // Print the results
  console.log('\nChanges made:');
  let totalChanges = 0;
  
  for (const result of results) {
    console.log(`\nFile: ${path.relative(process.cwd(), result.file)}`);
    for (const change of result.changes) {
      console.log(`  ${change.original} -> ${change.updated}`);
      totalChanges++;
    }
  }
  
  console.log(`\nTotal changes: ${totalChanges}`);
}

main();

```

--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Malaysia Open Data MCP - Simplified Server
 * 
 * A completely standalone MCP server implementation designed for direct deployment to Smithery.
 * This file contains everything needed to run the server without external dependencies.
 */

// Define our tools
const tools = {
  // Simple test tool
  hello: async () => {
    return {
      message: "Hello from Malaysia Open Data MCP!",
      timestamp: new Date().toISOString()
    };
  },
  
  // Data Catalogue Tools
  list_datasets: async ({ limit = 10, offset = 0 }) => {
    return {
      message: "This is a placeholder for the list_datasets tool",
      params: { limit, offset },
      datasets: [
        { id: "dataset-1", name: "Economic Indicators" },
        { id: "dataset-2", name: "Population Statistics" },
        { id: "dataset-3", name: "Education Metrics" }
      ]
    };
  },
  
  get_dataset: async ({ id, limit = 10, offset = 0, filter = "" }) => {
    return {
      message: `This is a placeholder for the get_dataset tool with ID: ${id}`,
      params: { id, limit, offset, filter },
      data: [
        { year: 2023, value: 100 },
        { year: 2024, value: 120 },
        { year: 2025, value: 150 }
      ]
    };
  },
  
  // Add more tool implementations as needed...
};

/**
 * Main MCP server function
 */
function server({ sessionId }) {
  console.log(`Starting MCP server session: ${sessionId}`);
  
  return {
    connect: () => tools
  };
}

// Export for Smithery compatibility
module.exports = server;
module.exports.default = server;

// If this file is run directly, start an HTTP server
if (require.main === module) {
  const http = require('http');
  const PORT = process.env.PORT || 8182;
  
  const httpServer = http.createServer((req, res) => {
    // Enable CORS
    res.setHeader('Access-Control-Allow-Origin', '*');
    res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
    res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
    
    if (req.method === 'OPTIONS') {
      res.writeHead(204);
      res.end();
      return;
    }
    
    // Root endpoint
    if (req.url === '/' && req.method === 'GET') {
      res.writeHead(200, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify({
        name: 'datagovmy-mcp-hithereiamaliff',
        displayName: 'Malaysia Open Data MCP',
        description: 'MCP server for accessing Malaysia\'s Open Data APIs',
        version: '1.0.0',
        tools: Object.keys(tools)
      }));
      return;
    }
    
    // Handle tool invocation
    if (req.url.startsWith('/invoke/') && req.method === 'POST') {
      const toolName = req.url.split('/')[2];
      
      if (!tools[toolName]) {
        res.writeHead(404, { 'Content-Type': 'application/json' });
        res.end(JSON.stringify({ error: `Tool '${toolName}' not found` }));
        return;
      }
      
      let body = '';
      req.on('data', chunk => {
        body += chunk.toString();
      });
      
      req.on('end', async () => {
        try {
          const params = body ? JSON.parse(body) : {};
          const result = await tools[toolName](params);
          
          res.writeHead(200, { 'Content-Type': 'application/json' });
          res.end(JSON.stringify({ result }));
        } catch (error) {
          console.error(`Error processing tool ${toolName}:`, error);
          res.writeHead(500, { 'Content-Type': 'application/json' });
          res.end(JSON.stringify({ error: error.message || 'Internal server error' }));
        }
      });
      return;
    }
    
    // Not found
    res.writeHead(404, { 'Content-Type': 'application/json' });
    res.end(JSON.stringify({ error: 'Not found' }));
  });
  
  httpServer.listen(PORT, () => {
    console.log(`Malaysia Open Data MCP server running on port ${PORT}`);
  });
}

```

--------------------------------------------------------------------------------
/src/api/client.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Base API Client for Malaysia Open Data API
 * 
 * Handles communication with the Malaysia Open Data API, including:
 * - Authentication
 * - Rate limiting
 * - Request/response handling
 */

const axios = require('axios');
const pLimit = require('p-limit');
const { buildQueryParams } = require('../utils/query-builder');

// Base URL for all API requests
const BASE_URL = 'https://api.data.gov.my';

// Rate limiting configuration
// Default: 5 requests per minute (300 requests per hour)
const DEFAULT_RATE_LIMIT = 5;
const DEFAULT_INTERVAL_MS = 12000; // 12 seconds between requests

/**
 * Creates a rate-limited API client for the Malaysia Open Data API
 * 
 * @param {Object} options - Client configuration options
 * @param {string} options.apiToken - Optional API token for authentication
 * @param {number} options.rateLimit - Maximum number of requests per minute
 * @param {number} options.intervalMs - Minimum interval between requests in milliseconds
 * @returns {Object} - API client instance
 */
function createClient(options = {}) {
  const {
    apiToken,
    rateLimit = DEFAULT_RATE_LIMIT,
    intervalMs = DEFAULT_INTERVAL_MS
  } = options;

  // Create rate limiter
  const limit = pLimit(1); // Only 1 concurrent request
  const queue = [];
  let lastRequestTime = 0;

  // Create axios instance with default configuration
  const axiosInstance = axios.create({
    baseURL: BASE_URL,
    headers: {
      'Accept': 'application/json',
      'Content-Type': 'application/json',
      ...(apiToken && { 'Authorization': `Token ${apiToken}` })
    }
  });

  /**
   * Makes a rate-limited request to the API
   * 
   * @param {string} endpoint - API endpoint (without base URL)
   * @param {Object} params - Query parameters
   * @returns {Promise<Object>} - API response data
   */
  async function request(endpoint, params = {}) {
    return limit(async () => {
      // Enforce minimum interval between requests
      const now = Date.now();
      const timeSinceLastRequest = now - lastRequestTime;
      
      if (timeSinceLastRequest < intervalMs) {
        await new Promise(resolve => setTimeout(resolve, intervalMs - timeSinceLastRequest));
      }
      
      try {
        // Build query parameters
        const queryParams = buildQueryParams(params);
        
        // Make request
        const response = await axiosInstance.get(endpoint, { params: queryParams });
        
        // Update last request time
        lastRequestTime = Date.now();
        
        return response.data;
      } catch (error) {
        if (error.response) {
          // The request was made and the server responded with a status code
          // that falls out of the range of 2xx
          const { status, data } = error.response;
          
          if (status === 429) {
            // Too Many Requests - retry after a delay
            console.warn('Rate limit exceeded. Retrying after delay...');
            await new Promise(resolve => setTimeout(resolve, intervalMs * 2));
            return request(endpoint, params);
          }
          
          throw new Error(`API Error (${status}): ${JSON.stringify(data)}`);
        } else if (error.request) {
          // The request was made but no response was received
          throw new Error('No response received from API');
        } else {
          // Something happened in setting up the request
          throw new Error(`Request Error: ${error.message}`);
        }
      }
    });
  }

  return {
    request,
    
    /**
     * Gets the current API client configuration
     * 
     * @returns {Object} - Current configuration
     */
    getConfig() {
      return {
        baseURL: BASE_URL,
        hasToken: !!apiToken,
        rateLimit,
        intervalMs
      };
    }
  };
}

module.exports = {
  createClient
};

```

--------------------------------------------------------------------------------
/scripts/extract-dataset-ids.js:
--------------------------------------------------------------------------------

```javascript
import fs from 'fs';
import path from 'path';
import https from 'https';
import { fileURLToPath } from 'url';

// Get current directory in ES modules
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

// GitHub API URL for the data-catalogue directory
const apiUrl = 'https://api.github.com/repos/data-gov-my/datagovmy-meta/contents/data-catalogue';

// Function to fetch data from GitHub API
function fetchFromGitHub(url) {
  return new Promise((resolve, reject) => {
    const options = {
      headers: {
        'User-Agent': 'Node.js GitHub Dataset Extractor'
      }
    };

    https.get(url, options, (res) => {
      let data = '';
      
      res.on('data', (chunk) => {
        data += chunk;
      });
      
      res.on('end', () => {
        try {
          const jsonData = JSON.parse(data);
          resolve(jsonData);
        } catch (error) {
          reject(error);
        }
      });
    }).on('error', (error) => {
      reject(error);
    });
  });
}

// Function to extract dataset details from a single file
async function extractDatasetDetails(fileInfo) {
  // Extract dataset ID from filename (remove .json extension)
  const datasetId = path.basename(fileInfo.name, '.json');
  
  // Fetch the file content to get the title
  const contentUrl = fileInfo.download_url;
  
  try {
    // Fetch the raw content
    const rawContent = await new Promise((resolve, reject) => {
      https.get(contentUrl, (res) => {
        let data = '';
        
        res.on('data', (chunk) => {
          data += chunk;
        });
        
        res.on('end', () => {
          resolve(data);
        });
      }).on('error', (error) => {
        reject(error);
      });
    });
    
    // Parse the JSON content
    const content = JSON.parse(rawContent);
    
    // Extract the English title
    const description = content.title_en || datasetId;
    
    return { id: datasetId, description };
  } catch (error) {
    console.error(`Error fetching details for ${datasetId}:`, error.message);
    // Return basic info if we can't get the title
    return { id: datasetId, description: datasetId };
  }
}

// Main function to extract all dataset IDs
async function extractAllDatasetIds() {
  try {
    // Fetch the list of files in the data-catalogue directory
    const files = await fetchFromGitHub(apiUrl);
    
    // Filter for JSON files only
    const jsonFiles = files.filter(file => file.name.endsWith('.json'));
    
    console.log(`Found ${jsonFiles.length} JSON files in the data-catalogue directory`);
    
    // Extract dataset details from each file
    const datasets = [];
    for (const file of jsonFiles) {
      const dataset = await extractDatasetDetails(file);
      datasets.push(dataset);
      console.log(`Processed: ${dataset.id} - ${dataset.description}`);
    }
    
    // Sort datasets alphabetically by ID
    datasets.sort((a, b) => a.id.localeCompare(b.id));
    
    // Format the datasets as JavaScript code
    const formattedDatasets = datasets.map(dataset => 
      `  { id: '${dataset.id}', description: '${dataset.description.replace(/'/g, "\\'")}' }`
    ).join(',\n');
    
    // Write to a file
    const outputContent = `// Generated from GitHub repository: data-gov-my/datagovmy-meta
// Timestamp: ${new Date().toISOString()}
// Total datasets: ${datasets.length}

const EXTRACTED_DATASETS = [
${formattedDatasets}
];

export default EXTRACTED_DATASETS;
`;
    
    const outputPath = path.join(__dirname, 'extracted-datasets.js');
    fs.writeFileSync(outputPath, outputContent);
    console.log(`Successfully extracted ${datasets.length} dataset IDs to extracted-datasets.js`);
    
    return datasets;
  } catch (error) {
    console.error('Error extracting dataset IDs:', error);
    throw error;
  }
}

// Run the extraction
extractAllDatasetIds().catch(console.error);

```

--------------------------------------------------------------------------------
/src/utils/query-builder.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Query Parameter Builder for Malaysia Open Data API
 * 
 * Utility functions to build query parameters for API requests
 * based on the Malaysia Open Data API query syntax.
 */

/**
 * Builds query parameters for API requests
 * 
 * @param {Object} params - Parameters to build query from
 * @returns {Object} - Formatted query parameters
 */
function buildQueryParams(params = {}) {
  const queryParams = {};
  
  // Handle dataset ID
  if (params.id) {
    queryParams.id = params.id;
  }
  
  // Handle filter parameters
  if (params.filter) {
    queryParams.filter = formatFilterParam(params.filter);
  }
  
  if (params.ifilter) {
    queryParams.ifilter = formatFilterParam(params.ifilter);
  }
  
  if (params.contains) {
    queryParams.contains = formatFilterParam(params.contains);
  }
  
  if (params.icontains) {
    queryParams.icontains = formatFilterParam(params.icontains);
  }
  
  // Handle range parameter
  if (params.range) {
    if (typeof params.range === 'object') {
      const { column, begin, end } = params.range;
      queryParams.range = `${column}[${begin ?? ''}:${end ?? ''}]`;
    } else {
      queryParams.range = params.range;
    }
  }
  
  // Handle sort parameter
  if (params.sort) {
    if (Array.isArray(params.sort)) {
      queryParams.sort = params.sort.join(',');
    } else {
      queryParams.sort = params.sort;
    }
  }
  
  // Handle date parameters
  if (params.date_start) {
    queryParams.date_start = formatDateParam(params.date_start);
  }
  
  if (params.date_end) {
    queryParams.date_end = formatDateParam(params.date_end);
  }
  
  // Handle timestamp parameters
  if (params.timestamp_start) {
    queryParams.timestamp_start = formatTimestampParam(params.timestamp_start);
  }
  
  if (params.timestamp_end) {
    queryParams.timestamp_end = formatTimestampParam(params.timestamp_end);
  }
  
  // Handle limit parameter
  if (params.limit !== undefined) {
    queryParams.limit = params.limit;
  }
  
  // Handle include/exclude parameters
  if (params.include) {
    if (Array.isArray(params.include)) {
      queryParams.include = params.include.join(',');
    } else {
      queryParams.include = params.include;
    }
  }
  
  if (params.exclude) {
    if (Array.isArray(params.exclude)) {
      queryParams.exclude = params.exclude.join(',');
    } else {
      queryParams.exclude = params.exclude;
    }
  }
  
  // Handle meta parameter
  if (params.meta !== undefined) {
    queryParams.meta = params.meta.toString();
  }
  
  return queryParams;
}

/**
 * Formats filter parameters (filter, ifilter, contains, icontains)
 * 
 * @param {Object|string} filter - Filter configuration
 * @returns {string} - Formatted filter parameter
 */
function formatFilterParam(filter) {
  if (typeof filter === 'string') {
    return filter;
  }
  
  if (typeof filter === 'object') {
    return Object.entries(filter)
      .map(([column, value]) => `${value}@${column}`)
      .join(',');
  }
  
  return filter;
}

/**
 * Formats date parameters (date_start, date_end)
 * 
 * @param {Object|string} dateParam - Date parameter configuration
 * @returns {string} - Formatted date parameter
 */
function formatDateParam(dateParam) {
  if (typeof dateParam === 'string') {
    return dateParam;
  }
  
  if (typeof dateParam === 'object') {
    const { date, column } = dateParam;
    return `${date}@${column}`;
  }
  
  return dateParam;
}

/**
 * Formats timestamp parameters (timestamp_start, timestamp_end)
 * 
 * @param {Object|string} timestampParam - Timestamp parameter configuration
 * @returns {string} - Formatted timestamp parameter
 */
function formatTimestampParam(timestampParam) {
  if (typeof timestampParam === 'string') {
    return timestampParam;
  }
  
  if (typeof timestampParam === 'object') {
    const { timestamp, column } = timestampParam;
    return `${timestamp}@${column}`;
  }
  
  return timestampParam;
}

module.exports = {
  buildQueryParams
};

```

--------------------------------------------------------------------------------
/src/flood.tools.ts:
--------------------------------------------------------------------------------

```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import axios from 'axios';
import { prefixToolName } from './utils/tool-naming.js';

// Base URL for Malaysia Open Data API
const API_BASE_URL = 'https://api.data.gov.my';
const FLOOD_WARNING_ENDPOINT = '/flood-warning';

/**
 * Register flood warning tools with the MCP server
 * @param server MCP server instance
 */
export function registerFloodTools(server: McpServer) {
  server.tool(
    prefixToolName('get_flood_warnings'),
    'Gets current flood warnings for Malaysia',
    {
      state: z.string().optional().describe('State name to filter warnings (e.g., "Selangor", "Johor")'),
      district: z.string().optional().describe('District name to filter warnings'),
      severity: z.string().optional().describe('Severity level to filter (e.g., "warning", "alert", "danger")'),
    },
    async ({ state, district, severity }) => {
      try {
        // Make a real API call to the Malaysia Open Data API
        const url = `${API_BASE_URL}${FLOOD_WARNING_ENDPOINT}`;
        const params: Record<string, any> = { meta: true };
        
        // Only add parameters if they are provided
        if (state) params.filter = `${state}@state`;
        if (district) params.filter = `${district}@district`;
        if (severity) params.filter = `${severity}@severity`;
        
        const response = await axios.get(url, { params });
        
        // Return the actual API response
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: 'Flood warnings retrieved successfully',
                params: { state, district, severity },
                endpoint: `${API_BASE_URL}${FLOOD_WARNING_ENDPOINT}`,
                warnings: response.data,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        console.error('Error fetching flood warnings:', error);
        
        // If the API is unavailable, fall back to mock data for demonstration
        if (axios.isAxiosError(error) && (error.code === 'ECONNREFUSED' || error.response?.status === 404)) {
          console.warn('API unavailable, using mock data');
          return {
            content: [
              {
                type: 'text',
                text: JSON.stringify({
                  message: 'API unavailable, using mock data',
                  params: { state, district, severity },
                  endpoint: `${API_BASE_URL}${FLOOD_WARNING_ENDPOINT}`,
                  warnings: [
                    {
                      id: 'mock-flood-1',
                      state: 'Selangor',
                      district: 'Klang',
                      location: 'Taman Sri Muda',
                      severity: 'warning',
                      water_level: '3.5m',
                      timestamp: new Date().toISOString()
                    },
                    {
                      id: 'mock-flood-2',
                      state: 'Johor',
                      district: 'Kluang',
                      location: 'Kampung Contoh',
                      severity: 'danger',
                      water_level: '4.2m',
                      timestamp: new Date().toISOString()
                    }
                  ],
                  note: 'This is mock data as the real API is currently unavailable'
                }, null, 2),
              },
            ],
          };
        }
        
        // Return error information
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to fetch flood warnings',
                message: error instanceof Error ? error.message : 'Unknown error',
                status: axios.isAxiosError(error) ? error.response?.status : undefined,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
}

```

--------------------------------------------------------------------------------
/src/tools/weather.js:
--------------------------------------------------------------------------------

```javascript
/**
 * Weather MCP Tools
 * 
 * Tools for accessing weather forecasts and warnings from the Malaysia Open Data API
 */

const { createWeatherClient } = require('../api/weather');

// Create client instance with default configuration
const weatherClient = createWeatherClient();

/**
 * Gets 7-day general weather forecast data
 * 
 * @param {Object} params - Parameters
 * @param {string} params.location - Optional location filter
 * @param {string} params.locationCategory - Optional location category filter (St, Rc, Ds, Tn, Dv)
 * @param {string} params.date - Optional date filter (YYYY-MM-DD)
 * @param {number} params.limit - Optional limit parameter
 * @returns {Promise<Object>} - Forecast data
 */
async function getForecast(params = {}) {
  try {
    const queryParams = {};
    
    // Handle location filter
    if (params.location) {
      queryParams.contains = `${params.location}@location__location_name`;
    }
    
    // Handle location category filter
    if (params.locationCategory) {
      queryParams.contains = `${params.locationCategory}@location__location_id`;
    }
    
    // Handle date filter
    if (params.date) {
      queryParams.filter = `${params.date}@date`;
    }
    
    // Handle limit
    if (params.limit) {
      queryParams.limit = params.limit;
    }
    
    const result = await weatherClient.getForecast(queryParams);
    
    return {
      success: true,
      message: 'Successfully retrieved weather forecast data',
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to get weather forecast: ${error.message}`,
      error: error.message
    };
  }
}

/**
 * Gets weather warning data
 * 
 * @param {Object} params - Parameters
 * @param {string} params.district - Optional district filter
 * @param {string} params.state - Optional state filter
 * @param {string} params.warningType - Optional warning type filter
 * @param {number} params.limit - Optional limit parameter
 * @returns {Promise<Object>} - Warning data
 */
async function getWarnings(params = {}) {
  try {
    const queryParams = {};
    
    // Handle district filter
    if (params.district) {
      queryParams.filter = `${params.district}@district`;
    }
    
    // Handle state filter
    if (params.state) {
      queryParams.filter = `${params.state}@state`;
    }
    
    // Handle warning type filter
    if (params.warningType) {
      queryParams.filter = `${params.warningType}@warning_type`;
    }
    
    // Handle limit
    if (params.limit) {
      queryParams.limit = params.limit;
    }
    
    const result = await weatherClient.getWarnings(queryParams);
    
    return {
      success: true,
      message: 'Successfully retrieved weather warning data',
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to get weather warnings: ${error.message}`,
      error: error.message
    };
  }
}

/**
 * Gets earthquake warning data
 * 
 * @param {Object} params - Parameters
 * @param {number} params.magnitude - Optional minimum magnitude filter
 * @param {string} params.region - Optional region filter
 * @param {number} params.limit - Optional limit parameter
 * @returns {Promise<Object>} - Earthquake warning data
 */
async function getEarthquakeWarnings(params = {}) {
  try {
    const queryParams = {};
    
    // Handle magnitude filter
    if (params.magnitude) {
      queryParams.range = `magnitude[${params.magnitude}:]`;
    }
    
    // Handle region filter
    if (params.region) {
      queryParams.contains = `${params.region}@region`;
    }
    
    // Handle limit
    if (params.limit) {
      queryParams.limit = params.limit;
    }
    
    const result = await weatherClient.getEarthquakeWarnings(queryParams);
    
    return {
      success: true,
      message: 'Successfully retrieved earthquake warning data',
      data: result
    };
  } catch (error) {
    return {
      success: false,
      message: `Failed to get earthquake warnings: ${error.message}`,
      error: error.message
    };
  }
}

module.exports = {
  getForecast,
  getWarnings,
  getEarthquakeWarnings
};

```

--------------------------------------------------------------------------------
/src/weather.tools.ts:
--------------------------------------------------------------------------------

```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import axios from 'axios';
import { prefixToolName } from './utils/tool-naming.js';

// API Base URL for Malaysia Open Data API
const API_BASE_URL = 'https://api.data.gov.my';
// Weather API endpoints - using realtime API endpoints
const WEATHER_FORECAST_ENDPOINT = '/weather/forecast';
const WEATHER_WARNING_ENDPOINT = '/weather/warning';
const EARTHQUAKE_WARNING_ENDPOINT = '/weather/warning/earthquake';

export function registerWeatherTools(server: McpServer) {
  // Get weather forecast
  server.tool(
    prefixToolName('get_weather_forecast'),
    'Gets weather forecast for Malaysia',
    {
      location: z.string().describe('Location name (e.g., "Kuala Lumpur", "Penang")'),
      days: z.number().min(1).max(7).optional().describe('Number of days to forecast (1-7)'),
    },
    async ({ location, days = 3 }) => {
      try {
        const url = `${API_BASE_URL}${WEATHER_FORECAST_ENDPOINT}`;
        const params: Record<string, any> = { limit: 100 };
        
        if (location) {
          params.contains = `${location}@location__location_name`;
        }
        
        if (days) {
          params.limit = days;
        }

        const response = await axios.get(url, { params });
        const data = response.data;

        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(data, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: `Error fetching weather forecast: ${error instanceof Error ? error.message : 'Unknown error'}`,
            },
          ],
        };
      }
    }
  );

  // Get weather warnings
  server.tool(
    prefixToolName('get_weather_warnings'),
    'Gets current weather warnings for Malaysia',
    {
      type: z.string().optional().describe('Type of warning (e.g., "rain", "flood", "all")'),
      location: z.string().optional().describe('Location name to filter warnings'),
    },
    async ({ type = 'all', location }) => {
      try {
        const url = `${API_BASE_URL}${WEATHER_WARNING_ENDPOINT}`;
        const params: Record<string, any> = { limit: 100 };
        
        if (type && type !== 'all') {
          params.contains = `${type}@warning_issue__title_en`;
        }
        
        if (location) {
          params.contains = `${location}@text_en`;
        }

        const response = await axios.get(url, { params });
        const data = response.data;

        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(data, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: `Error fetching weather warnings: ${error instanceof Error ? error.message : 'Unknown error'}`,
            },
          ],
        };
      }
    }
  );

  // Get earthquake warnings
  server.tool(
    prefixToolName('get_earthquake_warnings'),
    'Gets earthquake warnings for Malaysia',
    {
      days: z.number().min(1).max(30).optional().describe('Number of days to look back (1-30)'),
      magnitude: z.number().min(0).optional().describe('Minimum magnitude to include'),
    },
    async ({ days = 7, magnitude = 4.0 }) => {
      try {
        const url = `${API_BASE_URL}${EARTHQUAKE_WARNING_ENDPOINT}`;
        const params: Record<string, any> = { limit: 100, meta: true };
        
        if (days) {
          // Convert days to timestamp for filtering
          const pastDate = new Date();
          pastDate.setDate(pastDate.getDate() - days);
          params.timestamp_start = pastDate.toISOString().split('T')[0] + ' 00:00:00@utcdatetime';
        }
        
        if (magnitude) {
          params.number_min = `${magnitude}@magdefault`;
        }

        const response = await axios.get(url, { params });
        const data = response.data;

        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(data, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: `Error fetching earthquake warnings: ${error instanceof Error ? error.message : 'Unknown error'}`,
            },
          ],
        };
      }
    }
  );
}

```

--------------------------------------------------------------------------------
/scripts/index-catalogue-files.cjs:
--------------------------------------------------------------------------------

```
const fs = require('fs');
const path = require('path');

// Data catalogue directory
const cataloguePath = path.join(__dirname, '..', 'data-catalogue');

// Output file paths
const outputJSPath = path.join(__dirname, 'catalogue-index.js');
const outputDTSPath = path.join(__dirname, 'catalogue-index.d.ts');

/**
 * Indexes all JSON files in the data catalogue directory
 * and generates a single JS module with the data and a corresponding d.ts file.
 */
function indexCatalogueFiles() {
  try {
    const files = fs.readdirSync(cataloguePath).filter(file => file.endsWith('.json'));
    console.log(`Found ${files.length} JSON files in data-catalogue.`);
    
    const catalogueIndex = [];
    const categories = new Set();
    const geographies = new Set();
    const frequencies = new Set();
    const demographies = new Set();
    const dataSources = new Set();

    for (const file of files) {
      try {
        const metadata = JSON.parse(fs.readFileSync(path.join(cataloguePath, file), 'utf8'));
        
        const dataset = {
          id: metadata.id || file.replace('.json', ''),
          title_en: metadata.title_en || '',
          title_ms: metadata.title_ms || '',
          description_en: metadata.description_en || '',
          description_ms: metadata.description_ms || '',
          frequency: metadata.frequency || '',
          geography: metadata.geography || [],
          demography: metadata.demography || [],
          dataset_begin: metadata.dataset_begin || null,
          dataset_end: metadata.dataset_end || null,
          data_source: metadata.data_source || [],
          data_as_of: metadata.data_as_of || '',
          last_updated: metadata.last_updated || '',
          next_update: metadata.next_update || '',
          link_parquet: metadata.link_parquet || '',
          link_csv: metadata.link_csv || '',
          link_preview: metadata.link_preview || '',
          site_category: metadata.site_category || []
        };

        catalogueIndex.push(dataset);

        // Collect filter values
        if (dataset.frequency) frequencies.add(dataset.frequency);
        dataset.geography.forEach(g => geographies.add(g));
        dataset.demography.forEach(d => demographies.add(d));
        dataset.data_source.forEach(s => dataSources.add(s));
        dataset.site_category.forEach(sc => {
          if (sc.category_en) categories.add(sc.category_en);
          if (sc.subcategory_en) categories.add(sc.subcategory_en);
        });

      } catch (e) {
        console.error(`Skipping invalid JSON file: ${file}`, e);
      }
    }
    
    catalogueIndex.sort((a, b) => a.id.localeCompare(b.id));
    
    const filters = {
      categories: Array.from(categories).sort(),
      geographies: Array.from(geographies).sort(),
      frequencies: Array.from(frequencies).sort(),
      demographies: Array.from(demographies).sort(),
      dataSources: Array.from(dataSources).sort()
    };
    
    const jsContent = `// Generated from local data catalogue files\n// Timestamp: ${new Date().toISOString()}\n// Total datasets: ${catalogueIndex.length}\n\nexport const CATALOGUE_INDEX = ${JSON.stringify(catalogueIndex, null, 2)};\n\nexport const CATALOGUE_FILTERS = ${JSON.stringify(filters, null, 2)};`;

    const dtsContent = `// Generated by scripts/index-catalogue-files.js on ${new Date().toISOString()}\n\ndeclare module '../scripts/catalogue-index.js' {\n  export interface SiteCategory {\n    site: string;\n    category_en: string;\n    category_ms: string;\n    category_sort: number;\n    subcategory_en: string;\n    subcategory_ms: string;\n    subcategory_sort: number;\n  }\n\n  export interface DatasetMetadata {\n    id: string;\n    title_en: string;\n    title_ms: string;\n    description_en: string;\n    description_ms: string;\n    frequency: string;\n    geography: string[];\n    demography: string[];\n    dataset_begin: number | null;\n    dataset_end: number | null;\n    data_source: string[];\n    data_as_of: string;\n    last_updated: string;\n    next_update: string;\n    link_parquet: string;\n    link_csv: string;\n    link_preview: string;\n    site_category: SiteCategory[];\n  }\n\n  export interface CatalogueFilters {\n    categories: string[];\n    geographies: string[];\n    frequencies: string[];\n    demographies: string[];\n    dataSources: string[];\n  }\n\n  export const CATALOGUE_INDEX: DatasetMetadata[];\n  export const CATALOGUE_FILTERS: CatalogueFilters;\n}`;

    fs.writeFileSync(outputJSPath, jsContent);
    fs.writeFileSync(outputDTSPath, dtsContent);
    console.log(`Successfully indexed ${catalogueIndex.length} datasets to catalogue-index.js and catalogue-index.d.ts`);
  
  } catch (error) {
    console.error('Error indexing catalogue files:', error);
    throw error;
  }
}

// Run the indexing
indexCatalogueFiles();

```

--------------------------------------------------------------------------------
/src/firebase-analytics.ts:
--------------------------------------------------------------------------------

```typescript
import { initializeApp, cert, ServiceAccount } from 'firebase-admin/app';
import { getDatabase } from 'firebase-admin/database';
import fs from 'fs';
import path from 'path';

const SERVER_NAME = 'mcp-datagovmy';

interface ToolCall {
  tool: string;
  timestamp: string;
  clientIp: string;
  userAgent: string;
}

interface Analytics {
  serverStartTime: string;
  totalRequests: number;
  totalToolCalls: number;
  requestsByMethod: Record<string, number>;
  requestsByEndpoint: Record<string, number>;
  toolCalls: Record<string, number>;
  recentToolCalls: ToolCall[];
  clientsByIp: Record<string, number>;
  clientsByUserAgent: Record<string, number>;
  hourlyRequests: Record<string, number>;
}

let firebaseInitialized = false;
let database: ReturnType<typeof getDatabase> | null = null;

function initializeFirebase() {
  if (firebaseInitialized) return;

  try {
    const credentialsPath = path.join(process.cwd(), '.credentials', 'firebase-service-account.json');
    
    if (!fs.existsSync(credentialsPath)) {
      console.warn(`⚠️  Firebase credentials not found at ${credentialsPath}`);
      console.warn('   Analytics will only be saved locally');
      return;
    }

    const serviceAccount = JSON.parse(fs.readFileSync(credentialsPath, 'utf-8')) as ServiceAccount;

    initializeApp({
      credential: cert(serviceAccount),
      databaseURL: 'https://mcp-analytics-49b45-default-rtdb.asia-southeast1.firebasedatabase.app'
    });

    database = getDatabase();
    firebaseInitialized = true;
    console.log('✅ Firebase initialized successfully');
  } catch (error) {
    console.error('❌ Failed to initialize Firebase:', error);
  }
}

/**
 * Sanitize keys for Firebase - replace invalid characters with safe alternatives
 * Firebase keys cannot contain: . # $ / [ ]
 */
function sanitizeKey(key: string): string {
  return key
    .replace(/\./g, '_dot_')
    .replace(/#/g, '_hash_')
    .replace(/\$/g, '_dollar_')
    .replace(/\//g, '_slash_')
    .replace(/\[/g, '_lbracket_')
    .replace(/\]/g, '_rbracket_');
}

/**
 * Sanitize an object's keys recursively for Firebase compatibility
 */
function sanitizeObject(obj: any): any {
  if (obj === null || obj === undefined) {
    return obj;
  }

  if (Array.isArray(obj)) {
    return obj.map(item => sanitizeObject(item));
  }

  if (typeof obj === 'object') {
    const sanitized: any = {};
    for (const [key, value] of Object.entries(obj)) {
      const sanitizedKey = sanitizeKey(key);
      sanitized[sanitizedKey] = sanitizeObject(value);
    }
    return sanitized;
  }

  return obj;
}

/**
 * Desanitize keys when loading from Firebase
 */
function desanitizeKey(key: string): string {
  return key
    .replace(/_dot_/g, '.')
    .replace(/_hash_/g, '#')
    .replace(/_dollar_/g, '$')
    .replace(/_slash_/g, '/')
    .replace(/_lbracket_/g, '[')
    .replace(/_rbracket_/g, ']');
}

/**
 * Desanitize an object's keys recursively
 */
function desanitizeObject(obj: any): any {
  if (obj === null || obj === undefined) {
    return obj;
  }

  if (Array.isArray(obj)) {
    return obj.map(item => desanitizeObject(item));
  }

  if (typeof obj === 'object') {
    const desanitized: any = {};
    for (const [key, value] of Object.entries(obj)) {
      const desanitizedKey = desanitizeKey(key);
      desanitized[desanitizedKey] = desanitizeObject(value);
    }
    return desanitized;
  }

  return obj;
}

export async function saveAnalyticsToFirebase(analytics: Analytics): Promise<void> {
  if (!firebaseInitialized) {
    initializeFirebase();
  }

  if (!database) {
    console.log('📝 Firebase not available, skipping cloud save');
    return;
  }

  try {
    const ref = database.ref(`mcp-analytics/${SERVER_NAME}`);
    
    // Sanitize the analytics object before saving
    const sanitizedAnalytics = sanitizeObject(analytics);
    
    await ref.set(sanitizedAnalytics);
    console.log(`📊 Analytics saved to Firebase: ${SERVER_NAME}`);
  } catch (error) {
    console.error('Failed to save to Firebase:', error);
  }
}

export async function loadAnalyticsFromFirebase(): Promise<Analytics | null> {
  if (!firebaseInitialized) {
    initializeFirebase();
  }

  if (!database) {
    console.log('Firebase not available for loading');
    return null;
  }

  try {
    const ref = database.ref(`mcp-analytics/${SERVER_NAME}`);
    const snapshot = await ref.get();
    
    if (snapshot.exists()) {
      const sanitizedData = snapshot.val();
      
      // Desanitize the data when loading
      const data = desanitizeObject(sanitizedData) as Analytics;
      
      console.log(`📊 Loaded analytics from Firebase: ${SERVER_NAME}`);
      console.log(`   Total requests: ${data.totalRequests.toLocaleString()}, Tool calls: ${data.totalToolCalls}`);
      return data;
    }
    
    console.log('No existing analytics in Firebase');
    return null;
  } catch (error) {
    console.error('Failed to load from Firebase:', error);
    return null;
  }
}
```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Malaysia Open Data MCP Server
 * Provides tools to access Malaysia's open data APIs
 *
 * Load environment variables from .env file
 */
import dotenv from 'dotenv';

// Initialize dotenv to load environment variables from .env file
dotenv.config();

/**
 * =====================================================================
 * IMPORTANT GUIDANCE FOR AI MODELS USING THIS MCP SERVER:
 * =====================================================================
 * 1. ALWAYS use the 'search_all' tool FIRST for any data queries
 *    This tool searches both datasets and dashboards simultaneously
 *    with intelligent fallback to ensure comprehensive results.
 * 
 * 2. Only use specific dataset or dashboard search tools if you need to
 *    explicitly limit your search to one type of content.
 *
 * 3. Use correct URL patterns:
 *    - https://data.gov.my/... for general data portal resources
 *    - https://open.dosm.gov.my/... for OpenDOSM resources
 *
 * 4. Parquet files can now be accessed and parsed using the 'parse_parquet_file'
 *    and 'get_parquet_info' tools. These tools use the hyparquet JavaScript library
 *    and are fully compatible with the Smithery deployment environment.
 * =====================================================================
 */

import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';

// Import external tools
import { registerFloodTools } from './flood.tools.js';
import { registerWeatherTools } from './weather.tools.js';
import { registerTransportTools } from './transport.tools.js';
import { registerDataCatalogueTools } from './datacatalogue.tools.js';
import { registerDosmTools } from './dosm.tools.js';
import { registerDashboardTools } from './dashboards.tools.js';
import { registerUnifiedSearchTools } from './unified-search.tools.js';
import { registerParquetTools } from './parquet.tools.js';
import { registerGtfsTools } from './gtfs.tools.js';
import { prefixToolName } from './utils/tool-naming.js';

// Type definition for tool registration functions
type ToolRegistrationFn = (server: McpServer) => void;

// Define the config schema
export const configSchema = z.object({
  // Optional Google Maps API key for geocoding
  googleMapsApiKey: z.string()
    .optional()
    .describe('Google Maps API key for improved location detection. If not provided, will use OpenStreetMap Nominatim API as fallback.'),
  
  // Optional GrabMaps API key for Southeast Asia geocoding
  grabMapsApiKey: z.string()
    .optional()
    .describe('GrabMaps API key for improved geocoding in Southeast Asia.'),
  
  // Optional AWS credentials for GrabMaps integration via AWS Location Service
  awsRegion: z.string()
    .optional()
    .describe('AWS Region where your Place Index is created. Default: ap-southeast-5 (Malaysia)'),
  
  awsAccessKeyId: z.string()
    .optional()
    .describe('AWS Access Key ID with permissions to access AWS Location Service.'),
  
  awsSecretAccessKey: z.string()
    .optional()
    .describe('AWS Secret Access Key with permissions to access AWS Location Service.'),
});

/**
 * Creates a stateless MCP server for Malaysia Open Data API
 */
export default function createStatelessServer({
  config: _config,
}: {
  config: z.infer<typeof configSchema>;
}) {
  const server = new McpServer({
    name: 'Malaysia Open Data MCP Server',
    version: '1.0.0',
  });

  // Extract config values
  const { googleMapsApiKey, grabMapsApiKey, awsAccessKeyId, awsSecretAccessKey, awsRegion } = _config;
  
  // Set API keys in process.env if provided in config
  if (googleMapsApiKey) {
    process.env.GOOGLE_MAPS_API_KEY = googleMapsApiKey;
    console.log('Using Google Maps API key from configuration');
  }
  
  // Set GrabMaps API key
  if (grabMapsApiKey) {
    process.env.GRABMAPS_API_KEY = grabMapsApiKey;
    console.log('Using GrabMaps API key from configuration');
  }
  
  // Set AWS credentials for GrabMaps integration via AWS Location Service
  if (awsAccessKeyId) {
    process.env.AWS_ACCESS_KEY_ID = awsAccessKeyId;
    console.log('Using AWS Access Key ID from configuration');
  }
  
  if (awsSecretAccessKey) {
    process.env.AWS_SECRET_ACCESS_KEY = awsSecretAccessKey;
    console.log('Using AWS Secret Access Key from configuration');
  }
  
  if (awsRegion) {
    process.env.AWS_REGION = awsRegion;
    console.log(`Using AWS Region: ${awsRegion} from configuration`);
  }
  
  // Register all tool sets
  const toolSets: ToolRegistrationFn[] = [
    registerDataCatalogueTools,
    registerDosmTools,
    registerWeatherTools,
    registerDashboardTools,
    registerUnifiedSearchTools,
    registerParquetTools,
    registerGtfsTools,
    registerTransportTools,
    registerFloodTools,
  ];
  
  // Register all tools
  toolSets.forEach((toolSet) => toolSet(server));

  // Register a simple hello tool for testing
  server.tool(
    prefixToolName('hello'),
    'A simple test tool to verify that the MCP server is working correctly',
    {},
    async () => {
      return {
        content: [
          {
            type: 'text',
            text: JSON.stringify({
              message: 'Hello from Malaysia Open Data MCP!',
              timestamp: new Date().toISOString(),
            }, null, 2),
          },
        ],
      };
    }
  );

  return server.server;
}

// If this file is run directly, log a message
console.log('Malaysia Open Data MCP module loaded');


```

--------------------------------------------------------------------------------
/deploy/DEPLOYMENT.md:
--------------------------------------------------------------------------------

```markdown
# VPS Deployment Guide for Malaysia Open Data MCP

This guide explains how to deploy the Malaysia Open Data MCP server on your VPS at `mcp.techmavie.digital/datagovmy`.

## Prerequisites

- VPS with Ubuntu/Debian
- Docker and Docker Compose installed
- Nginx installed
- Domain `mcp.techmavie.digital` pointing to your VPS IP
- SSL certificate (via Certbot/Let's Encrypt)

## Architecture

```
Client (Claude, Cursor, etc.)
    ↓ HTTPS
https://mcp.techmavie.digital/datagovmy/mcp
    ↓
Nginx (SSL termination + reverse proxy)
    ↓ HTTP
Docker Container (port 8082 → 8080)
    ↓
Malaysia Open Data APIs (data.gov.my, OpenDOSM, etc.)
```

## Deployment Steps

### 1. SSH into your VPS

```bash
ssh root@your-vps-ip
```

### 2. Create directory for the MCP server

```bash
mkdir -p /opt/mcp-servers/datagovmy
cd /opt/mcp-servers/datagovmy
```

### 3. Clone the repository

```bash
git clone https://github.com/hithereiamaliff/mcp-datagovmy.git .
```

### 4. Create environment file (optional)

```bash
cp .env.example .env
nano .env
```

Add optional API keys if needed:
```env
GOOGLE_MAPS_API_KEY=your_api_key_here
GRABMAPS_API_KEY=your_api_key_here
AWS_ACCESS_KEY_ID=your_aws_key
AWS_SECRET_ACCESS_KEY=your_aws_secret
AWS_REGION=ap-southeast-5
```

### 5. Build and start the Docker container

```bash
docker compose up -d --build
```

### 6. Verify the container is running

```bash
docker compose ps
docker compose logs -f
```

### 7. Test the health endpoint

```bash
curl http://localhost:8082/health
```

### 8. Configure Nginx

Add the location block from `deploy/nginx-mcp.conf` to your existing nginx config for `mcp.techmavie.digital`:

```bash
# Edit your existing nginx config
sudo nano /etc/nginx/sites-available/mcp.techmavie.digital

# Add the location block from deploy/nginx-mcp.conf inside the server block
# Make sure it's at the same level as other location blocks (not nested)

# Test nginx config
sudo nginx -t

# Reload nginx
sudo systemctl reload nginx
```

### 9. Test the MCP endpoint

```bash
# Test health endpoint through nginx
curl https://mcp.techmavie.digital/datagovmy/health

# Test MCP endpoint
curl -X POST https://mcp.techmavie.digital/datagovmy/mcp \
  -H "Content-Type: application/json" \
  -H "Accept: application/json" \
  -d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}'
```

## Client Configuration

### For Claude Desktop / Cursor / Windsurf

Add to your MCP configuration:

```json
{
  "mcpServers": {
    "malaysia-opendata": {
      "transport": "streamable-http",
      "url": "https://mcp.techmavie.digital/datagovmy/mcp"
    }
  }
}
```

### Using Your Own API Keys

You can provide your own API keys via URL query parameters:

```
https://mcp.techmavie.digital/datagovmy/mcp?googleMapsApiKey=YOUR_KEY
```

Or via headers:
- `X-Google-Maps-Api-Key: YOUR_KEY`
- `X-GrabMaps-Api-Key: YOUR_KEY`
- `X-AWS-Access-Key-Id: YOUR_KEY`
- `X-AWS-Secret-Access-Key: YOUR_KEY`
- `X-AWS-Region: ap-southeast-5`

**Supported Query Parameters:**

| Parameter | Description |
|-----------|-------------|
| `googleMapsApiKey` | Google Maps API key for geocoding |
| `grabMapsApiKey` | GrabMaps API key for Southeast Asia geocoding |
| `awsAccessKeyId` | AWS Access Key ID for AWS Location Service |
| `awsSecretAccessKey` | AWS Secret Access Key |
| `awsRegion` | AWS Region (default: ap-southeast-5) |

User-provided keys take priority over server defaults.

> **⚠️ Important: GrabMaps Requirements**
> 
> To use GrabMaps geocoding, you need **ALL FOUR** of these parameters:
> - `grabMapsApiKey`
> - `awsAccessKeyId`
> - `awsSecretAccessKey`
> - `awsRegion`
> 
> GrabMaps uses AWS Location Service under the hood, so AWS credentials are required alongside the GrabMaps API key. Without any one of these, GrabMaps will not work.

### For MCP Inspector

```bash
npx @modelcontextprotocol/inspector
# Select "Streamable HTTP"
# Enter URL: https://mcp.techmavie.digital/datagovmy/mcp
```

## Analytics Dashboard

The MCP server includes a built-in analytics dashboard that tracks:
- **Total requests and tool calls**
- **Tool usage distribution** (doughnut chart)
- **Hourly request trends** (last 24 hours)
- **Requests by endpoint** (bar chart)
- **Top clients by user agent**
- **Recent tool calls feed**

### Analytics Endpoints

| Endpoint | Description |
|----------|-------------|
| `/analytics` | Full analytics summary (JSON) |
| `/analytics/tools` | Detailed tool usage stats (JSON) |
| `/analytics/dashboard` | Visual dashboard with charts (HTML) |

**Dashboard URL:** `https://mcp.techmavie.digital/datagovmy/analytics/dashboard`

The dashboard auto-refreshes every 30 seconds.

## Management Commands

### View logs

```bash
cd /opt/mcp-servers/datagovmy
docker compose logs -f
```

### Restart the server

```bash
docker compose restart
```

### Update to latest version

```bash
git pull origin main
docker compose up -d --build
```

### Stop the server

```bash
docker compose down
```

## GitHub Actions Auto-Deploy

The repository includes a GitHub Actions workflow (`.github/workflows/deploy-vps.yml`) that automatically deploys to your VPS when you push to the `main` branch.

### Required GitHub Secrets

Set these in your repository settings (Settings → Secrets and variables → Actions):

| Secret | Description |
|--------|-------------|
| `VPS_HOST` | Your VPS IP address |
| `VPS_USERNAME` | SSH username (e.g., root) |
| `VPS_SSH_KEY` | Your private SSH key |
| `VPS_PORT` | SSH port (usually 22) |

## Environment Variables

| Variable | Default | Description |
|----------|---------|-------------|
| `PORT` | 8080 | HTTP server port (internal) |
| `HOST` | 0.0.0.0 | Bind address |
| `GOOGLE_MAPS_API_KEY` | (optional) | For enhanced geocoding |
| `GRABMAPS_API_KEY` | (optional) | For Southeast Asia geocoding |
| `AWS_ACCESS_KEY_ID` | (optional) | For AWS Location Service |
| `AWS_SECRET_ACCESS_KEY` | (optional) | For AWS Location Service |
| `AWS_REGION` | ap-southeast-5 | AWS region for Location Service |

## Troubleshooting

### Container not starting

```bash
docker compose logs mcp-datagovmy
```

### Nginx 502 Bad Gateway

- Check if container is running: `docker compose ps`
- Check container logs: `docker compose logs`
- Verify port binding: `docker port mcp-datagovmy`

### Test MCP connection

```bash
# List tools
curl -X POST https://mcp.techmavie.digital/datagovmy/mcp \
  -H "Content-Type: application/json" \
  -d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}'

# Call hello tool
curl -X POST https://mcp.techmavie.digital/datagovmy/mcp \
  -H "Content-Type: application/json" \
  -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"my_hello","arguments":{}}}'
```

## Security Notes

- The MCP server runs behind nginx with SSL
- CORS is configured to allow all origins (required for MCP clients)
- No authentication is required (public open data)
- Rate limiting can be added at nginx level if needed

## Available Tools

This MCP server provides tools for:

- **Data Catalogue** - Search and access datasets from data.gov.my
- **OpenDOSM** - Department of Statistics Malaysia data
- **Weather** - Forecasts and warnings from MET Malaysia
- **Transport** - GTFS data for public transit
- **Flood** - Flood warning information
- **Parquet** - Parse parquet data files
- **Unified Search** - Search across all data sources

```

--------------------------------------------------------------------------------
/src/transport.tools.ts:
--------------------------------------------------------------------------------

```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import axios from 'axios';
import { prefixToolName } from './utils/tool-naming.js';

// API Base URL for Malaysia Open Data API
const API_BASE_URL = 'https://api.data.gov.my';

// GTFS endpoints - correct endpoints for Malaysia Open Data API
const DATA_CATALOGUE_ENDPOINT = '/data-catalogue';
const GTFS_STATIC_ENDPOINT = '/gtfs-static';
const GTFS_REALTIME_ENDPOINT = '/gtfs-realtime';

export function registerTransportTools(server: McpServer) {
  // List transport agencies
  server.tool(
    prefixToolName('list_transport_agencies'),
    'Lists available transport agencies with GTFS data',
    {
      limit: z.number().min(1).optional().describe('Maximum number of agencies to return'),
      offset: z.number().min(0).optional().describe('Number of agencies to skip'),
    },
    async ({ limit = 10, offset = 0 }) => {
      try {
        // Using data catalogue to list GTFS datasets
        const url = `${API_BASE_URL}${DATA_CATALOGUE_ENDPOINT}`;
        const params: Record<string, any> = { 
          limit,
          meta: true,
          contains: 'gtfs' // Search for GTFS datasets
        };

        const response = await axios.get(url, { params });
        const data = response.data;

        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(data, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: `Error fetching transport agencies: ${error instanceof Error ? error.message : 'Unknown error'}`,
            },
          ],
        };
      }
    }
  );

  // Get transport data
  server.tool(
    prefixToolName('get_transport_data'),
    'Gets GTFS data for a specific transport agency',
    {
      dataset_id: z.string().describe('ID of the GTFS dataset (e.g., "gtfs_rapidkl", "gtfs_prasarana")'),
      limit: z.number().min(1).optional().describe('Maximum number of records to return'),
      offset: z.number().min(0).optional().describe('Number of records to skip'),
    },
    async ({ dataset_id, limit = 10, offset = 0 }) => {
      try {
        const url = `${API_BASE_URL}${DATA_CATALOGUE_ENDPOINT}`;
        const params = { id: dataset_id, limit, offset };

        const response = await axios.get(url, { params });
        const data = response.data;

        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(data, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: `Error fetching transport data: ${error instanceof Error ? error.message : 'Unknown error'}`,
            },
          ],
        };
      }
    }
  );

  // GTFS Static API
  server.tool(
    prefixToolName('get_gtfs_static'),
    'Gets GTFS static data for a specific transport provider',
    {
      provider: z.string().describe('Provider name (e.g., "rapidkl", "ktmb", "prasarana")'),
      category: z.string().optional().describe('Category for Prasarana data (required only for prasarana provider)'),
      limit: z.number().min(1).optional().describe('Maximum number of records to return'),
      offset: z.number().min(0).optional().describe('Number of records to skip'),
    },
    async ({ provider, category, limit = 10, offset = 0 }) => {
      try {
        // Use the GTFS static endpoint with provider as path parameter
        const url = `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${provider}`;
        const params: Record<string, any> = { meta: true };
        
        if (provider === 'prasarana' && !category) {
          return {
            content: [
              {
                type: 'text',
                text: JSON.stringify({
                  error: 'Category parameter is required for prasarana provider',
                }, null, 2),
              },
            ],
          };
        }
        
        if (category) {
          params.category = category;
        }

        // In a real implementation, this would download a ZIP file
        // For now, return the URL that would be used
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: `GTFS static data URL for provider: ${provider}${category ? `, category: ${category}` : ''}`,
                url,
                params,
                note: 'This endpoint returns a ZIP file in the actual implementation'
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to get GTFS static data',
                message: error instanceof Error ? error.message : 'Unknown error',
              }, null, 2),
            },
          ],
        };
      }
    }
  );

  // GTFS Realtime API
  server.tool(
    prefixToolName('get_gtfs_realtime_vehicle_position'),
    'Gets GTFS realtime vehicle position data for a specific transport provider',
    {
      provider: z.string().describe('Provider name (e.g., "rapidkl", "ktmb", "prasarana")'),
      category: z.string().optional().describe('Category for Prasarana data (required only for prasarana provider)'),
      limit: z.number().min(1).optional().describe('Maximum number of records to return'),
      offset: z.number().min(0).optional().describe('Number of records to skip'),
    },
    async ({ provider, category, limit = 10, offset = 0 }) => {
      try {
        // Use the GTFS realtime endpoint with provider as path parameter
        const url = `${API_BASE_URL}${GTFS_REALTIME_ENDPOINT}/${provider}`;
        const params: Record<string, any> = { meta: true };
        
        if (provider === 'prasarana' && !category) {
          return {
            content: [
              {
                type: 'text',
                text: JSON.stringify({
                  error: 'Category parameter is required for prasarana provider',
                }, null, 2),
              },
            ],
          };
        }
        
        if (category) {
          params.category = category;
        }

        // In a real implementation, this would return a Protocol Buffer file
        // For now, return the URL that would be used
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: `GTFS realtime vehicle position data URL for provider: ${provider}${category ? `, category: ${category}` : ''}`,
                url,
                params,
                note: 'This endpoint returns a Protocol Buffer file in the actual implementation'
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to get GTFS realtime data',
                message: error instanceof Error ? error.message : 'Unknown error',
              }, null, 2),
            },
          ],
        };
      }
    }
  );
}

```

--------------------------------------------------------------------------------
/malaysia_open_data_mcp_plan.md:
--------------------------------------------------------------------------------

```markdown
# Malaysia Open Data MCP Development Plan

## Table of Contents
- [Understanding Malaysia's Open Data API](#understanding-malaysias-open-data-api)
  - [Core Components](#core-components)
  - [Available APIs](#available-apis)
  - [Query Parameters](#query-parameters)
  - [Response Format](#response-format)
- [Reference: Singapore's Gahmen MCP](#reference-singapores-gahmen-mcp)
  - [Overview](#overview)
  - [Features](#features)
  - [Available Tools](#available-tools)
- [Malaysia Open Data MCP Development Plan](#malaysia-open-data-mcp-development-plan-1)
  - [Proposed MCP Structure and Tools](#proposed-mcp-structure-and-tools)
  - [Implementation Approach](#implementation-approach)
  - [Technical Considerations](#technical-considerations)
  - [Example Implementation Structure](#example-implementation-structure)
  - [Next Steps](#next-steps)

## Understanding Malaysia's Open Data API

### Core Components

1. **Base Structure**: 
   - Malaysia's Open Data API is a RESTful API built using the Django framework
   - Designed to provide transparent data access to all citizens
   - Goals include transparent data access, ease of use, and diverse data sets

2. **Base URL**: 
   - All API requests use `https://api.data.gov.my` as the base URL
   - Specific endpoints are appended to this base URL

3. **Authentication**: 
   - The API can be used with or without an API token
   - Tokens provide higher rate limits and are available upon request
   - To request a token, email [email protected] with:
     - Your name
     - Your email address
     - Reason for requesting an increased rate limit
   - Authentication header format: `Authorization: Token <YOUR_TOKEN_HERE>`

4. **Rate Limits**: 
   - Different rate limits apply based on whether you're using an API token or not
   - If limits are exceeded, a 429 Too Many Requests error is returned

### Available APIs

1. **Static APIs**:
   - **Data Catalogue API**: 
     - Endpoint: `https://api.data.gov.my/data-catalogue`
     - Required parameter: `id` (dataset identifier)
     - Example: `https://api.data.gov.my/data-catalogue?id=fuelprice`
     - Provides access to various datasets in the data catalogue
     - Dataset IDs can be found on the [Data Catalogue page](https://data.gov.my/data-catalogue)

   - **OpenDOSM API**: 
     - Endpoint: `https://api.data.gov.my/opendosm`
     - Required parameter: `id` (dataset identifier)
     - Example: `https://api.data.gov.my/opendosm?id=cpi_core`
     - Provides access to Department of Statistics Malaysia data
     - Dataset IDs can be found on the [OpenDOSM Data Catalogue page](https://open.dosm.gov.my/data-catalogue)

2. **Realtime APIs**:
   - **Weather API**: 
     - Endpoints:
       - 7-day forecast: `https://api.data.gov.my/weather/forecast`
       - Weather warnings: `https://api.data.gov.my/weather/warning`
       - Earthquake warnings: `https://api.data.gov.my/weather/warning/earthquake`
     - Data source: Malaysian Meteorological Department (MET Malaysia)
     - Update frequency:
       - 7-day forecast: Updated daily
       - Warning data: Updated when required

   - **Transport API (GTFS Static)**: 
     - Endpoint: `https://api.data.gov.my/gtfs-static/<agency>`
     - Provides standardized public transportation schedules and geographic information
     - Available agencies:
       - myBAS Johor Bahru: Bus service in Johor Bahru
       - KTMB: Railway operator across Malaysia
       - Prasarana: Operator of LRT, MRT, monorail, and bus services
     - Update frequency:
       - myBAS Johor Bahru: As required
       - Prasarana: As required
       - KTMB: Daily at 00:01:00

   - **Transport API (GTFS Realtime)**: 
     - Provides real-time updates to public transportation data

### Query Parameters

The API supports various filtering options:

1. **Row-level filtering**:
   - `filter`: Case-sensitive exact string match
     - Format: `?filter=<value>@<column>` or `?filter=<value_1>@<column_1>,<value_2>@<column_2>,...`
   
   - `ifilter`: Case-insensitive exact string match
     - Format: `?ifilter=<value>@<column>`
   
   - `contains`: Case-sensitive partial string match
     - Format: `?contains=<value>@<column>`
   
   - `icontains`: Case-insensitive partial string match
     - Format: `?icontains=<value>@<column>`
   
   - `range`: Filter by numerical range
     - Format: `?range=<column>[<begin>:<end>]`
   
   - `date_start`/`date_end`: Filter by date range
     - Format: `?date_start=<YYYY-MM-DD>@<date_column>` and `?date_end=<YYYY-MM-DD>@<date_column>`
   
   - `timestamp_start`/`timestamp_end`: Filter by timestamp range
     - Format: `?timestamp_start=<YYYY-MM-DD HH:MM:SS>@<timestamp_column>` and `?timestamp_end=<YYYY-MM-DD HH:MM:SS>@<timestamp_column>`

2. **Result manipulation**:
   - `sort`: Sort results by specified columns
     - Format: `?sort=<column>` or `?sort=<column1>,<column2>,...`
     - Prefix column with `-` for descending order (e.g., `-column`)
   
   - `limit`: Limit number of records returned
     - Format: `?limit=<value>`

3. **Column-level filtering**:
   - `include`: Specify which columns to include
     - Format: `?include=<column1,column2,...>`
   
   - `exclude`: Specify which columns to exclude
     - Format: `?exclude=<column1,column2,...>`
   - Note: When both are provided, `include` takes precedence

### Response Format

1. **Successful Responses**:
   - Status code: 200 OK
   - Default format: List of records
   - With `meta=true` parameter:
     ```json
     {
       "meta": {...},
       "data": [...]
     }
     ```
     - `meta`: Basic information about the requested resource
     - `data`: Collection of requested records

2. **Error Responses**:
   - Format:
     ```json
     {
       "status": <int>,
       "errors": [...]
     }
     ```
     - `status`: Response code corresponding to the error
     - `errors`: Error messages or descriptions

## Reference: Singapore's Gahmen MCP

### Overview

The Gahmen MCP provides a Model Context Protocol server for Singapore's data.gov.sg APIs, making government datasets easily accessible through AI systems.

### Features

- Access to data.gov.sg collections and datasets
- Search functionality within datasets using CKAN datastore API
- Dataset download with filtering support
- Built-in rate limiting (5 requests per minute, 12-second minimum interval)
- No authentication required (data.gov.sg APIs are public)

### Available Tools

1. **Collections**:
   - `list_collections`: List all collections on data.gov.sg
   - `get_collection`: Get metadata for a specific collection

2. **Datasets**:
   - `list_datasets`: List all datasets on data.gov.sg
   - `get_dataset_metadata`: Get metadata for a specific dataset
   - `search_dataset`: Search for data within a dataset using CKAN datastore
   - `initiate_download`: Start downloading a dataset with optional filtering
   - `poll_download`: Check download status and get download URL

3. **Usage Examples**:
   ```javascript
   // Search population data
   search_dataset({
     resource_id: "d_8b84c4ee58e3cfc0ece0d773c8ca6abc",
     q: { "year": "2023" },
     limit: 10
   })
   
   // Get collection 522 with all dataset metadata
   get_collection({
     collectionId: "522",
     withDatasetMetadata: true
   })
   ```

## Malaysia Open Data MCP Development Plan

Based on both the Malaysia Open Data API and the Gahmen MCP reference, here's our plan for developing an MCP for Malaysia's Open Data:

### Proposed MCP Structure and Tools

1. **Data Catalogue Tools**:
   - `list_datasets`: List available datasets in the Data Catalogue
   - `get_dataset`: Get data from a specific dataset with filtering options
   - `search_datasets`: Search across datasets by keywords

2. **OpenDOSM Tools**:
   - `list_dosm_datasets`: List available DOSM datasets
   - `get_dosm_dataset`: Get data from a specific DOSM dataset with filtering

3. **Weather Tools**:
   - `get_weather_forecast`: Get 7-day weather forecast with location filtering
   - `get_weather_warnings`: Get current weather warnings
   - `get_earthquake_warnings`: Get earthquake warnings

4. **Transport Tools**:
   - `list_transport_agencies`: List available transport agencies
   - `get_transport_data`: Get GTFS data for a specific agency

5. **General Tools**:
   - `search_all`: Search across all available datasets

### Implementation Approach

1. **Setup MCP Server**:
   - Use Smithery CLI for development and building
   - Structure the project with clear separation of concerns

2. **API Integration**:
   - Create wrapper functions for Malaysia Open Data API endpoints
   - Implement proper error handling and rate limiting
   - Support authentication for higher rate limits

3. **Query Parameter Handling**:
   - Create helper functions to build query parameters
   - Support all filtering options provided by the API

4. **Response Processing**:
   - Parse and format API responses for MCP consumption
   - Handle pagination and large result sets

5. **Documentation**:
   - Provide clear documentation for each tool
   - Include usage examples

### Technical Considerations

1. **Rate Limiting**:
   - Implement rate limiting to respect API quotas
   - Consider different limits for authenticated vs. unauthenticated requests

2. **Caching**:
   - Implement caching for frequently accessed data
   - Respect data update frequencies

3. **Error Handling**:
   - Provide meaningful error messages
   - Implement retries for transient failures

4. **Authentication**:
   - Support API token authentication
   - Store tokens securely

### Example Implementation Structure

```
/
├── src/
│   ├── index.js            # Main entry point
│   ├── tools/              # MCP tools implementation
│   │   ├── catalogue.js    # Data Catalogue tools
│   │   ├── dosm.js         # OpenDOSM tools
│   │   ├── weather.js      # Weather tools
│   │   └── transport.js    # Transport tools
│   ├── api/                # API client implementations
│   │   ├── client.js       # Base API client
│   │   ├── catalogue.js    # Data Catalogue API
│   │   ├── dosm.js         # OpenDOSM API
│   │   ├── weather.js      # Weather API
│   │   └── transport.js    # Transport API
│   └── utils/              # Utility functions
│       ├── rate-limiter.js # Rate limiting
│       ├── query-builder.js # Query parameter builder
│       └── response-parser.js # Response parsing
├── package.json
└── README.md
```

### Next Steps

1. Set up the development environment with Smithery CLI
2. Create the base API client with rate limiting
3. Implement the Data Catalogue tools as a starting point
4. Expand to other APIs (OpenDOSM, Weather, Transport)
5. Test thoroughly with various query parameters
6. Document the MCP and provide usage examples

```

--------------------------------------------------------------------------------
/src/unified-search.tools.ts:
--------------------------------------------------------------------------------

```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';

// Import search functions and types from both modules
import { searchDatasets, getAllDatasets, DatasetMetadata } from './datacatalogue.tools.js';
import { searchDashboards, getAllDashboards, DashboardMetadata } from './dashboards.tools.js';
import { prefixToolName } from './utils/tool-naming.js';

// Define result interfaces
interface SearchResult {
  type: 'dataset' | 'dashboard';
  id: string;
  title: string;
  description?: string;
  url?: string;
  score: number;
}

/**
 * Unified search across both datasets and dashboards
 * @param query Search query
 * @param prioritizeType Optional type to prioritize in results ('dataset' or 'dashboard')
 * @returns Combined search results from both sources
 */
// Helper function to tokenize a query into individual terms
function tokenizeQuery(query: string): string[] {
  // Remove special characters and split by spaces
  return query.toLowerCase()
    .replace(/[^a-z0-9\s]/g, ' ')
    .split(/\s+/)
    .filter(term => term.length > 0);
}

// Helper function to normalize terms by removing common prefixes and handling variations
function normalizeTerm(term: string): string[] {
  // Remove hyphens and normalize spacing
  let normalized = term.replace(/-/g, '').trim();
  
  // Handle common prefixes/variations
  if (normalized.startsWith('e') && normalized.length > 1) {
    // e.g., 'epayment' -> also try 'payment'
    return [normalized, normalized.substring(1)];
  }
  
  return [normalized];
}

// A small set of common synonyms for frequently used terms
const COMMON_SYNONYMS: Record<string, string[]> = {
  'payment': ['payment', 'pay', 'transaction'],
  'electronic': ['electronic', 'digital', 'online', 'cashless'],
  'statistics': ['statistics', 'stats', 'data', 'figures', 'numbers'],
  'dashboard': ['dashboard', 'visualization', 'chart', 'graph'],
  'dataset': ['dataset', 'data set', 'database', 'data'],
};

// Helper function to expand search terms for better matching
function expandSearchTerms(term: string): string[] {
  const normalizedTerm = term.toLowerCase().trim();
  
  // Start with the original term
  let expanded = [normalizedTerm];
  
  // Add normalized variations
  expanded = expanded.concat(normalizeTerm(normalizedTerm));
  
  // Check for common synonyms
  for (const [key, synonyms] of Object.entries(COMMON_SYNONYMS)) {
    if (normalizedTerm === key || synonyms.includes(normalizedTerm)) {
      expanded = expanded.concat(synonyms);
      break;
    }
  }
  
  // Basic stemming for plurals
  if (normalizedTerm.endsWith('s')) {
    expanded.push(normalizedTerm.slice(0, -1)); // Remove trailing 's'
  } else {
    expanded.push(normalizedTerm + 's'); // Add trailing 's'
  }
  
  // Remove duplicates and return
  return [...new Set(expanded)];
}

function unifiedSearch(query: string, prioritizeType?: 'dataset' | 'dashboard'): SearchResult[] {
  // Tokenize the query into individual terms
  const queryTerms = tokenizeQuery(query);
  
  // Expand each term with better matching
  const expandedTerms = queryTerms.flatMap(term => expandSearchTerms(term));
  
  // Search in datasets with improved scoring
  const datasetResults = searchDatasets(query).map((dataset: DatasetMetadata) => {
    const title = dataset.title_en.toLowerCase();
    const id = dataset.id.toLowerCase();
    const description = dataset.description_en.toLowerCase();
    
    // Calculate score based on term matches
    let score = 0;
    
    // Check for exact query match (highest priority)
    if (title.includes(query.toLowerCase())) score += 10;
    if (description.includes(query.toLowerCase())) score += 5;
    
    // Check for individual term matches
    expandedTerms.forEach(term => {
      if (title.includes(term)) score += 3;
      if (id.includes(term)) score += 2;
      if (description.includes(term)) score += 1;
    });
    
    return {
      type: 'dataset' as const,
      id: dataset.id,
      title: dataset.title_en,
      description: dataset.description_en,
      url: `https://data.gov.my/data-catalogue/${dataset.id}`,
      score
    };
  });

  // Search in dashboards with improved scoring
  const dashboardResults = searchDashboards(query).map((dashboard: DashboardMetadata) => {
    const name = dashboard.dashboard_name.toLowerCase();
    const route = (dashboard.route || '').toLowerCase();
    
    // Calculate score based on term matches
    let score = 0;
    
    // Check for exact query match (highest priority)
    if (name.includes(query.toLowerCase())) score += 10;
    if (route.includes(query.toLowerCase())) score += 5;
    
    // Check for individual term matches
    expandedTerms.forEach(term => {
      if (name.includes(term)) score += 3;
      if (route.includes(term)) score += 2;
    });
    
    return {
      type: 'dashboard' as const,
      id: dashboard.dashboard_name,
      title: dashboard.dashboard_name.replace(/_/g, ' ').replace(/\b\w/g, (l: string) => l.toUpperCase()),
      description: dashboard.route || '',
      url: dashboard.route ? 
        (dashboard.sites?.includes('opendosm') ? `https://open.dosm.gov.my${dashboard.route}` : `https://data.gov.my${dashboard.route}`) 
        : `/dashboard/${dashboard.dashboard_name}`,
      score
    };
  });

  // Combine results
  let combinedResults = [...datasetResults, ...dashboardResults];

  // If a type is prioritized, boost its score
  if (prioritizeType) {
    combinedResults = combinedResults.map(result => {
      if (result.type === prioritizeType) {
        return { ...result, score: result.score + 5 };
      }
      return result;
    });
  }

  // Sort by score (descending)
  return combinedResults.sort((a, b) => b.score - a.score);
}

/**
 * Check if a query might be referring to a dashboard based on keywords
 * @param query Search query
 * @returns True if the query likely refers to a dashboard
 */
function isDashboardQuery(query: string): boolean {
  const dashboardKeywords = ['dashboard', 'chart', 'graph', 'visualization', 'visualisation', 'stats', 'statistics'];
  const lowerQuery = query.toLowerCase();
  return dashboardKeywords.some(keyword => lowerQuery.includes(keyword));
}

/**
 * Check if a query might be referring to a dataset based on keywords
 * @param query Search query
 * @returns True if the query likely refers to a dataset
 */
function isDatasetQuery(query: string): boolean {
  const datasetKeywords = ['dataset', 'data', 'catalogue', 'catalog', 'file', 'download', 'csv', 'excel', 'raw'];
  const lowerQuery = query.toLowerCase();
  return datasetKeywords.some(keyword => lowerQuery.includes(keyword));
}

/**
 * Performs an intelligent search that automatically falls back to searching both datasets and dashboards
 * if the primary search returns no results
 * @param query Search query
 * @param prioritizeType Optional type to prioritize in results ('dataset' or 'dashboard')
 * @param limit Maximum number of results to return
 * @returns Search results with fallback if needed
 */
function intelligentSearch(query: string, prioritizeType?: 'dataset' | 'dashboard', limit: number = 10): {
  results: SearchResult[];
  usedFallback: boolean;
  fallbackType?: 'dataset' | 'dashboard';
  originalType?: 'dataset' | 'dashboard';
} {
  // First try with the prioritized type
  const initialResults = unifiedSearch(query, prioritizeType);
  
  // If we have enough results, return them
  if (initialResults.length >= 3 || !prioritizeType) {
    return {
      results: initialResults.slice(0, limit),
      usedFallback: false,
      originalType: prioritizeType
    };
  }
  
  // If we have few results, try the opposite type as fallback
  const fallbackType = prioritizeType === 'dataset' ? 'dashboard' : 'dataset';
  const fallbackResults = unifiedSearch(query, fallbackType);
  
  // If fallback has results, return combined results with fallback first
  if (fallbackResults.length > 0) {
    const combinedResults = [...fallbackResults, ...initialResults]
      .sort((a, b) => b.score - a.score)
      .slice(0, limit);
      
    return {
      results: combinedResults,
      usedFallback: true,
      fallbackType,
      originalType: prioritizeType
    };
  }
  
  // If neither search yielded good results, return the initial results
  return {
    results: initialResults.slice(0, limit),
    usedFallback: false,
    originalType: prioritizeType
  };
}

export function registerUnifiedSearchTools(server: McpServer) {
  // Unified search across datasets and dashboards
  server.tool(
    prefixToolName('search_all'),
    '⭐⭐⭐ PRIMARY SEARCH TOOL: Always use this first for any data or visualization queries. Searches across both datasets and dashboards with intelligent fallback. ⭐⭐⭐',
    {
      query: z.string().describe('Search query to match against all content'),
      limit: z.number().min(1).max(20).optional().describe('Number of results to return (1-20)'),
      prioritize: z.enum(['dataset', 'dashboard']).optional().describe('Type of content to prioritize in results'),
    },
    async ({ query, limit = 10, prioritize }) => {
      try {
        // Determine if query suggests a specific content type
        let prioritizeType = prioritize;
        if (!prioritizeType) {
          if (isDashboardQuery(query)) {
            prioritizeType = 'dashboard';
          } else if (isDatasetQuery(query)) {
            prioritizeType = 'dataset';
          }
          
          // Special case for domain-specific queries
          const lowerQuery = query.toLowerCase();
          
          // Payment-related terms are more likely to be found in dashboards
          if (lowerQuery.includes('payment') || 
              lowerQuery.includes('pay') || 
              lowerQuery.includes('transaction') || 
              lowerQuery.includes('electronic') || 
              lowerQuery.includes('digital')) {
            prioritizeType = 'dashboard';
          }
          
          // Statistics-related terms are more likely to be found in dashboards
          if (lowerQuery.includes('statistics') || 
              lowerQuery.includes('stats') || 
              lowerQuery.includes('chart') || 
              lowerQuery.includes('graph')) {
            prioritizeType = 'dashboard';
          }
        }

        // Get intelligent search results with automatic fallback
        const { 
          results: searchResults, 
          usedFallback, 
          fallbackType,
          originalType 
        } = intelligentSearch(query, prioritizeType as 'dataset' | 'dashboard' | undefined, limit);
        
        // Group results by type for better presentation
        const groupedResults = {
          datasets: searchResults.filter(r => r.type === 'dataset'),
          dashboards: searchResults.filter(r => r.type === 'dashboard')
        };
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: 'Unified search results',
                query,
                total_matches: searchResults.length,
                showing: searchResults.length,
                prioritized_type: originalType || 'none',
                used_fallback: usedFallback,
                fallback_type: fallbackType,
                search_note: usedFallback ? 
                  `Limited results found in ${originalType} search, automatically included relevant ${fallbackType} results` : 
                  undefined,
                results: searchResults,
                grouped_results: groupedResults,
                data_access_notes: {
                  dashboards: 'Dashboard data is visualized on the web interface. Raw data files (e.g., parquet) cannot be directly accessed through this API.',
                  datasets: 'Dataset metadata is available through this API. For downloading the actual data files, please visit the dataset page on the data portal.',
                },
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to perform unified search',
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
}

```

--------------------------------------------------------------------------------
/TOOLS.md:
--------------------------------------------------------------------------------

```markdown
# Malaysia Open Data MCP Server Tools

This document provides an overview of the available tools in the Malaysia Open Data MCP Server and best practices for using them.

## Available Tools

### Unified Search Tools

#### `search_all`

This is the **recommended primary search tool** that searches across both datasets and dashboards simultaneously. It intelligently determines whether to prioritize datasets or dashboards based on the query and automatically falls back to the other type if limited results are found.

```json
{
  "query": "your search query",
  "limit": 10, // optional, default is 10
  "prioritize": "dataset" // optional, can be "dataset" or "dashboard"
}
```

**Example usage:**
```
search_all
{
  "query": "e-payment statistics"
}
```

**When to use:** This should be your default search tool for most queries, as it provides the most comprehensive results.

### Data Catalogue Tools

#### `list_datasets_catalogue`

Lists all datasets from the comprehensive catalogue with rich metadata.

#### `search_datasets_catalogue`

Searches datasets by keywords in title or description. Only use this if you specifically need to search only in datasets.

#### `filter_datasets_catalogue`

Filters datasets by various criteria such as frequency, geography, etc.

#### `get_dataset_filters`

Gets available filter options for datasets.

#### `get_dataset_details`

Gets comprehensive metadata for a dataset by ID.

### Dashboard Tools

#### `list_dashboards`

Lists all available dashboards with pagination.

#### `search_dashboards`

Searches dashboards by name or route. Only use this if you specifically need to search only in dashboards.

#### `get_dashboard_by_name`

Gets detailed information about a specific dashboard by name.

#### `get_dashboard_charts`

Gets chart information for a specific dashboard.

## Best Practices

1. **Always start with `search_all`** for any general query about data or visualizations. This tool will search both datasets and dashboards and provide the most relevant results.

2. **Use specific tools only when needed** - for example, if you already know you need a specific dataset ID or dashboard name.

3. **Be aware of data format limitations**:
   - Dashboard data is visualized on the web interface. Raw data files (e.g., parquet) cannot be directly accessed through this API.
   - Dataset metadata is available through this API. For downloading the actual data files, users should visit the dataset page on the data portal.

4. **Use correct URLs** when referring to resources:
   - For general data portal resources: `https://data.gov.my/...`
   - For OpenDOSM resources: `https://open.dosm.gov.my/...`

5. **Handle empty results properly** - if a search returns no results, try broadening the search terms or using the `search_all` tool which automatically searches both datasets and dashboards.

### Geocoding Tools

#### `geocode_location`

Geocode a location name to coordinates using available geocoding services. This tool supports multiple geocoding providers with intelligent selection and fallback.

```json
{
  "query": "location name or address", // required: the location to geocode
  "country": "my", // optional: country code to limit results (default: "my" for Malaysia)
  "provider": "auto" // optional: preferred geocoding provider ("google", "grab", "nominatim", or "auto")
}
```

**Geocoding Providers:**

1. **Google Maps** - Requires a valid Google Maps API key set in `GOOGLE_MAPS_API_KEY` environment variable or MCP server config.

2. **GrabMaps via AWS Location Service** - For Southeast Asian locations. Requires proper setup:
   - GrabMaps API key set in environment variables or MCP server config:
     - `GRABMAPS_API_KEY`: Your GrabMaps API key
   - Valid AWS credentials with Location Service permissions:
     - `AWS_ACCESS_KEY_ID`: Your AWS Access Key ID
     - `AWS_SECRET_ACCESS_KEY`: Your AWS Secret Access Key
     - `AWS_REGION`: AWS region where your Place Index is created (default: `ap-southeast-5` for Malaysia)
   - A Place Index created in AWS Location Service with GrabMaps as the data provider (named `explore.place.Grab`)

3. **Nominatim (OpenStreetMap)** - Free, open-source geocoding service. No API key required, but has usage limits.

**Provider Selection Logic:**

When `provider` is set to `auto` (default), the tool uses the following logic:
- For Southeast Asian countries (MY, SG, TH, VN, PH, ID, MM, KH), tries GrabMaps first if credentials are available
- Tries Google Maps if API key is available
- Falls back to GrabMaps for non-Southeast Asian locations if not already tried
- Uses Nominatim as the final fallback option

**Supported providers:**
- `auto` - Automatically select the best provider based on location and available API keys
- `google` - Use Google Maps API (requires API key)
- `grab` - Use GrabMaps API (requires API key, optimized for Southeast Asia)
- `nominatim` - Use OpenStreetMap Nominatim API (free, no API key required)

**Example usage:**
```
geocode_location
{
  "query": "KLCC",
  "provider": "grab"
}
```

**When to use:** Use this tool when you need to convert a location name or address to geographic coordinates. For Malaysian locations, the GrabMaps provider is recommended when available as it's optimized for Southeast Asian locations.

### GTFS Transit Data Tools

The GTFS tools support intelligent provider and category normalization, allowing users to use common names instead of exact API parameters. For example, you can use "rapid penang" instead of specifying "prasarana" as the provider and "rapid-bus-penang" as the category.

### Geocoding Providers

The GTFS tools use geocoding to convert location names to coordinates. The following providers are supported:

1. **GrabMaps via AWS Location Service** - Preferred for Southeast Asia, requires AWS credentials and GrabMaps API key
2. **Google Maps** - Requires API key in `GOOGLE_MAPS_API_KEY` environment variable
3. **Nominatim (OpenStreetMap)** - Default fallback provider, no API key required

#### Provider Configuration

##### GrabMaps via AWS Location Service

To use GrabMaps for geocoding (recommended for Southeast Asia), you need:

1. AWS credentials with permissions to access AWS Location Service
2. A Place Index created in AWS Location Service with GrabMaps as the data provider
3. GrabMaps API key

Set the following environment variables:

```
GRABMAPS_API_KEY=your_grabmaps_api_key
AWS_ACCESS_KEY_ID=your_aws_access_key_id
AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
AWS_REGION=ap-southeast-5  # Malaysia region, or your preferred region
```

The Place Index name is expected to be `explore.place.Grab`. If you use a different name, you'll need to modify the code in `src/gtfs.tools.ts`.

##### Google Maps

To use Google Maps for geocoding, set the `GOOGLE_MAPS_API_KEY` environment variable.

##### Nominatim (OpenStreetMap)

Nominatim is always available as a fallback and requires no configuration.

#### Geocoding Provider Priority

The system will attempt to use providers in this order:

1. GrabMaps (if configured and the query is for a Southeast Asian country)
2. Google Maps (if configured)
3. Nominatim (always available as fallback)

This ensures the best geocoding results while maintaining reliability.

#### Supported Providers and Categories

**Direct Providers:**
- `mybas-johor` (also accepts: "mybas", "mybas johor", "mybas johor bahru")
- `ktmb` (also accepts: "ktm", "keretapi tanah melayu", "keretapi tanah melayu berhad")
- `prasarana` (requires a category)

**Prasarana Categories:**
- `rapid-rail-kl` (also accepts: "rapid rail", "rapid rail kl")
- `rapid-bus-kl` (also accepts: "rapid bus kl")
- `rapid-bus-penang` (also accepts: "rapid penang", "rapid bus penang")
- `rapid-bus-kuantan` (also accepts: "rapid kuantan", "rapid bus kuantan")
- `rapid-bus-mrtfeeder` (also accepts: "mrt feeder", "rapid bus mrt feeder")

#### `parse_gtfs_static`

Parses GTFS Static data (ZIP files with CSV data) for a specific transport provider and returns structured data.

```json
{
  "provider": "ktmb", // required: "mybas-johor", "ktmb", or "prasarana" (or common names)
  "category": "rapid-rail-kl", // required only for prasarana provider
  "force_refresh": false // optional: force refresh the cache
}
```

**Example with common name:**
```json
{
  "provider": "rapid penang" // automatically maps to provider: "prasarana", category: "rapid-bus-penang"
}
```

**Example usage:**
```
parse_gtfs_static
{
  "provider": "ktmb"
}
```

**When to use:** This is a low-level tool. For most user queries about transit routes or stops, prefer using `get_transit_routes` or `get_transit_stops` instead. Only use this when you need access to the raw GTFS static data files.

#### `parse_gtfs_realtime`

Parses GTFS Realtime data (Protocol Buffers) for a specific transport provider and returns structured data.

```json
{
  "provider": "ktmb", // required: "mybas-johor", "ktmb", or "prasarana" (or common names)
  "category": "rapid-rail-kl", // required only for prasarana provider
  "force_refresh": false // optional: force refresh the cache
}
```

**Example with common name:**
```json
{
  "provider": "rapid penang" // automatically maps to provider: "prasarana", category: "rapid-bus-penang"
}
```

**Example usage:**
```
parse_gtfs_realtime
{
  "provider": "prasarana",
  "category": "rapid-rail-kl"
}
```

**When to use:** When you need real-time information about vehicle positions for a specific transit provider. You can use common names directly (e.g., "rapid penang", "ktmb", "mybas johor") without needing to call `list_transport_agencies` first.

#### `get_transit_routes`

Retrieves transit routes for a specific provider from parsed GTFS Static data. This is the preferred tool for answering questions about transit routes.

```json
{
  "provider": "ktmb", // required: "mybas-johor", "ktmb", or "prasarana" (or common names)
  "category": "rapid-rail-kl", // required only for prasarana provider
  "route_id": "1" // optional: filter by route_id
}
```

**Example with common name:**
```json
{
  "provider": "rapid penang" // automatically maps to provider: "prasarana", category: "rapid-bus-penang"
}
```

**Example usage:**
```
get_transit_routes
{
  "provider": "mybas-johor"
}
```

**When to use:** When you need information about transit routes without parsing the entire GTFS dataset.

#### `get_transit_stops`

Retrieves transit stops for a specific provider from parsed GTFS Static data.

```json
{
  "provider": "ktmb", // required: "mybas-johor", "ktmb", or "prasarana" (or common names)
  "category": "rapid-rail-kl", // required only for prasarana provider
  "stop_id": "1" // optional: filter by stop_id
}
```

**Example with common name:**
```json
{
  "provider": "rapid penang" // automatically maps to provider: "prasarana", category: "rapid-bus-penang"
}
```

**Example usage:**
```
get_transit_stops
{
  "provider": "prasarana",
  "category": "rapid-rail-kl",
  "route_id": "LRT-KJ"
}
```

**When to use:** When you need information about transit stops, optionally filtered by route.

#### `find_nearest_transit_stops`

Finds the nearest transit stops to a given location. This is the preferred tool for answering questions about finding nearby bus stops.

```json
{
  "provider": "ktmb", // required: "mybas-johor", "ktmb", or "prasarana" (or common names)
  "category": "rapid-rail-kl", // required only for prasarana provider
  "latitude": 3.1390, // required: latitude of the user's location
  "longitude": 101.6869, // required: longitude of the user's location
  "limit": 5, // optional: maximum number of stops to return (default: 5)
  "max_distance": 5 // optional: maximum distance in kilometers (default: 5)
}
```

**Example with common name:**
```json
{
  "provider": "rapid penang", // automatically maps to provider: "prasarana", category: "rapid-bus-penang"
  "latitude": 5.4141, 
  "longitude": 100.3292
}
```

**Example usage:**
```
find_nearest_transit_stops
{
  "provider": "rapid penang",
  "latitude": 5.4141,
  "longitude": 100.3292,
  "limit": 3
}
```

**When to use:** When a user asks about finding the nearest bus stop or station to their location. This tool calculates distances and returns stops sorted by proximity.

#### `get_transit_arrivals`

Get real-time transit arrivals at a specific stop. This is the preferred tool for answering questions about when the next bus or train will arrive.

```json
{
  "provider": "ktmb", // required: "mybas-johor", "ktmb", or "prasarana" (or common names)
  "category": "rapid-rail-kl", // required only for prasarana provider
  "stop_id": "1234", // required: ID of the stop to get arrivals for
  "route_id": "LRT-KJ", // optional: filter arrivals by route
  "limit": 10 // optional: maximum number of arrivals to return (default: 10)
}
```

**Example with common name:**
```json
{
  "provider": "rapid penang", // automatically maps to provider: "prasarana", category: "rapid-bus-penang"
  "stop_id": "1001"
}
```

**Example usage:**
```
get_transit_arrivals
{
  "provider": "rapid penang",
  "stop_id": "1001",
  "limit": 5
}
```

**When to use:** When a user asks about real-time bus or train arrivals at a specific stop, such as "When will the next bus arrive at my stop?" or "Show me arrival times for Rapid Penang buses at stop X".

#### `search_transit_stops_by_location`

Search for transit stops near a named location. This tool geocodes the location name to coordinates using the multi-provider geocoding system (Google Maps, GrabMaps, or Nominatim), then finds nearby stops with optional real-time arrival information. For Malaysian locations, GrabMaps is preferred when available as it's optimized for Southeast Asian locations.

```json
{
  "provider": "ktmb", // required: "mybas-johor", "ktmb", or "prasarana" (or common names)
  "category": "rapid-rail-kl", // required only for prasarana provider
  "location": "KLCC", // required: location name to search for
  "country": "my", // optional: country code to limit geocoding results (default: "my" for Malaysia)
  "limit": 5, // optional: maximum number of stops to return (default: 5)
  "max_distance": 5, // optional: maximum distance in kilometers (default: 5)
  "include_arrivals": true, // optional: whether to include upcoming arrivals for each stop (default: true)
  "arrivals_limit": 3 // optional: maximum number of arrivals to include per stop (default: 3)
}
```

**Example with common name:**
```json
{
  "provider": "rapid penang", // automatically maps to provider: "prasarana", category: "rapid-bus-penang"
  "location": "Penang Airport",
  "max_distance": 2
}
```

**Example usage:**
```
search_transit_stops_by_location
{
  "provider": "rapid penang",
  "location": "Penang Airport",
  "limit": 3,
  "include_arrivals": true
}
```

**When to use:** When a user asks about finding transit stops near a specific location by name, such as "Show me bus stops near KLCC" or "What buses stop at KL Sentral?". This tool combines geocoding with transit data to provide a complete solution.

```

--------------------------------------------------------------------------------
/src/dashboards.tools.ts:
--------------------------------------------------------------------------------

```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import axios from 'axios';
import fs from 'fs';
import path from 'path';

// Import the pre-generated dashboards index
import { DASHBOARDS_INDEX } from '../scripts/dashboards-index.js';
import { prefixToolName } from './utils/tool-naming.js';

// Define dashboard metadata interface
export interface DashboardMetadata {
  dashboard_name: string;
  data_last_updated?: string;
  data_next_update?: string;
  route?: string;
  sites?: string[];
  required_params?: string[];
  optional_params?: string[];
  charts?: Record<string, any>;
  [key: string]: any;
}

// GitHub raw content base URL for fetching specific dashboards
const GITHUB_RAW_BASE_URL = 'https://raw.githubusercontent.com/data-gov-my/datagovmy-meta/main/dashboards';

// Local dashboards directory path
const dashboardsDir = path.join(process.cwd(), 'dashboards');

// Check if the dashboards directory exists
const dashboardsDirExists = fs.existsSync(dashboardsDir);

// Cache for detailed dashboard metadata
let detailsCache: Record<string, DashboardMetadata> = {};
let lastCacheUpdate: number = 0;
const CACHE_TTL = 3600000; // 1 hour in milliseconds

// Get all dashboards from the pre-generated index
export function getAllDashboards(): DashboardMetadata[] {
  return DASHBOARDS_INDEX as DashboardMetadata[];
}

// Helper function to get dashboard by name
async function getDashboardByName(name: string): Promise<DashboardMetadata | null> {
  // First check if we have it in the index
  const basicInfo = getAllDashboards().find(d => {
    // Check if dashboard_name matches or if the filename (without .json) matches
    return d.dashboard_name === name || 
           (d.route && d.route.replace(/\//g, '_') === name);
  });
  
  if (!basicInfo) {
    return null; // Dashboard not found in index
  }
  
  // If we have detailed info cached and it's not expired, return it
  if (detailsCache[name] && Date.now() - lastCacheUpdate < CACHE_TTL) {
    return detailsCache[name];
  }
  
  try {
    // Always try to fetch from GitHub first to get the latest data
    try {
      const response = await axios.get(`${GITHUB_RAW_BASE_URL}/${name}.json`);
      const detailedData = response.data as DashboardMetadata;
      
      // Cache the detailed data
      detailsCache[name] = detailedData;
      lastCacheUpdate = Date.now();
      
      console.log(`Successfully fetched ${name} dashboard from GitHub`);
      return detailedData;
    } catch (error: any) {
      console.warn(`Could not fetch ${name} from GitHub, falling back to local file:`, error.message);
      
      // If GitHub fetch fails, check if we can fall back to local file
      if (dashboardsDirExists) {
        const filePath = path.join(dashboardsDir, `${name}.json`);
        if (fs.existsSync(filePath)) {
          const content = fs.readFileSync(filePath, 'utf8');
          const data = JSON.parse(content) as DashboardMetadata;
          
          // Cache the detailed data
          detailsCache[name] = data;
          lastCacheUpdate = Date.now();
          
          console.log(`Using local file for ${name} dashboard`);
          return data;
        }
      } else {
        console.log('Local dashboards directory does not exist, using only GitHub data');
      }
      
      // If local file doesn't exist either, throw error to be caught by outer catch
      throw new Error(`Dashboard ${name} not found locally or on GitHub`);
    }
  } catch (error) {
    console.error(`Error getting dashboard ${name}:`, error);
    // If we can't get detailed data, return the basic info from the index
    return basicInfo;
  }
}

// Helper function to tokenize a query into individual terms
function tokenizeQuery(query: string): string[] {
  // Remove special characters and split by spaces
  return query.toLowerCase()
    .replace(/[^a-z0-9\s]/g, ' ')
    .split(/\s+/)
    .filter(term => term.length > 0);
}

// Helper function to normalize terms by removing common prefixes and handling variations
function normalizeTerm(term: string): string[] {
  // Remove hyphens and normalize spacing
  let normalized = term.replace(/-/g, '').trim();
  
  // Handle common prefixes/variations
  if (normalized.startsWith('e') && normalized.length > 1) {
    // e.g., 'epayment' -> also try 'payment'
    return [normalized, normalized.substring(1)];
  }
  
  return [normalized];
}

// A small set of common synonyms for frequently used terms
const COMMON_SYNONYMS: Record<string, string[]> = {
  'payment': ['payment', 'pay', 'transaction'],
  'electronic': ['electronic', 'digital', 'online', 'cashless'],
  'statistics': ['statistics', 'stats', 'data', 'figures', 'numbers'],
  'dashboard': ['dashboard', 'visualization', 'chart', 'graph'],
  'dataset': ['dataset', 'data set', 'database', 'data'],
};

// Helper function to expand search terms for better matching
function expandSearchTerms(term: string): string[] {
  const normalizedTerm = term.toLowerCase().trim();
  
  // Start with the original term
  let expanded = [normalizedTerm];
  
  // Add normalized variations
  expanded = expanded.concat(normalizeTerm(normalizedTerm));
  
  // Check for common synonyms
  for (const [key, synonyms] of Object.entries(COMMON_SYNONYMS)) {
    if (normalizedTerm === key || synonyms.includes(normalizedTerm)) {
      expanded = expanded.concat(synonyms);
      break;
    }
  }
  
  // Basic stemming for plurals
  if (normalizedTerm.endsWith('s')) {
    expanded.push(normalizedTerm.slice(0, -1)); // Remove trailing 's'
  } else {
    expanded.push(normalizedTerm + 's'); // Add trailing 's'
  }
  
  // Remove duplicates and return
  return [...new Set(expanded)];
}

// Helper function to search dashboards with improved matching
export function searchDashboards(query: string): DashboardMetadata[] {
  const dashboards = getAllDashboards();
  
  // Tokenize the query
  const queryTerms = tokenizeQuery(query);
  const expandedTerms = queryTerms.flatMap(term => expandSearchTerms(term));
  
  // If we have no valid terms after tokenization, fall back to the original query
  if (expandedTerms.length === 0) {
    const lowerCaseQuery = query.toLowerCase();
    return dashboards.filter(d => 
      d.dashboard_name.toLowerCase().includes(lowerCaseQuery) ||
      (d.route && d.route.toLowerCase().includes(lowerCaseQuery))
    );
  }
  
  // Search using expanded terms
  return dashboards.filter(d => {
    const name = d.dashboard_name.toLowerCase();
    const route = d.route ? d.route.toLowerCase() : '';
    
    // Check if any of the expanded terms match
    return expandedTerms.some(term => 
      name.includes(term) || route.includes(term)
    );
  });
}

export function registerDashboardTools(server: McpServer) {
  // List all available dashboards
  server.tool(
    prefixToolName('list_dashboards'),
    'Lists all available dashboards from the Malaysia Open Data platform',
    {
      limit: z.number().min(1).max(100).optional().describe('Number of results to return (1-100)'),
      offset: z.number().min(0).optional().describe('Number of records to skip for pagination'),
    },
    async ({ limit = 20, offset = 0 }) => {
      try {
        const allDashboards = getAllDashboards();
        const paginatedDashboards = allDashboards.slice(offset, offset + limit);
        const total = allDashboards.length;
        
        // Create a simplified version of the dashboards for the response
        const simplifiedDashboards = paginatedDashboards.map(d => ({
          dashboard_name: d.dashboard_name,
          route: d.route,
          sites: d.sites,
          data_last_updated: d.data_last_updated,
          required_params: d.required_params,
          chart_count: d.charts ? Object.keys(d.charts).length : 0
        }));
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: 'Dashboards retrieved successfully',
                total_dashboards: total,
                showing: `${offset + 1}-${Math.min(offset + limit, total)} of ${total}`,
                pagination: {
                  limit,
                  offset,
                  next_offset: offset + limit < total ? offset + limit : null,
                  previous_offset: offset > 0 ? Math.max(0, offset - limit) : null,
                },
                dashboards: simplifiedDashboards,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to retrieve dashboards',
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
  
  // Search dashboards by query
  server.tool(
    prefixToolName('search_dashboards'),
    '⚠️ CONSIDER USING search_all INSTEAD: This only searches dashboards. For comprehensive results across datasets and dashboards, use search_all tool. ⚠️',
    {
      query: z.string().describe('Search query to match against dashboard metadata'),
      limit: z.number().min(1).max(100).optional().describe('Number of results to return (1-100)'),
    },
    async ({ query, limit = 20 }) => {
      try {
        const searchResults = searchDashboards(query);
        const limitedResults = searchResults.slice(0, limit);
        
        // Create a simplified version of the dashboards for the response
        const simplifiedResults = limitedResults.map(d => ({
          dashboard_name: d.dashboard_name,
          route: d.route,
          sites: d.sites,
          data_last_updated: d.data_last_updated,
          required_params: d.required_params,
          chart_count: d.charts ? Object.keys(d.charts).length : 0
        }));
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: 'Search results for dashboards',
                query,
                total_matches: searchResults.length,
                showing: Math.min(limit, searchResults.length),
                dashboards: simplifiedResults,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to search dashboards',
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
  
  // Get dashboard details by name
  server.tool(
    prefixToolName('get_dashboard_details'),
    'Get comprehensive metadata for a dashboard by name',
    {
      name: z.string().describe('Name of the dashboard to retrieve metadata for'),
    },
    async ({ name }) => {
      try {
        const dashboard = await getDashboardByName(name);
        
        if (!dashboard) {
          // Try to find similar dashboards for suggestion
          const allDashboards = getAllDashboards();
          const similarDashboards = allDashboards
            .filter(d => d.dashboard_name.includes(name) || name.includes(d.dashboard_name))
            .map(d => ({ dashboard_name: d.dashboard_name, route: d.route }))
            .slice(0, 5);
          
          return {
            content: [
              {
                type: 'text',
                text: JSON.stringify({
                  error: `Dashboard '${name}' not found`,
                  suggestions: similarDashboards.length > 0 ? similarDashboards : undefined,
                  timestamp: new Date().toISOString()
                }, null, 2),
              },
            ],
          };
        }
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: `Dashboard '${name}' details retrieved successfully`,
                dashboard,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: `Failed to retrieve dashboard '${name}'`,
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
  
  // Get charts for a dashboard
  server.tool(
    prefixToolName('get_dashboard_charts'),
    'Get chart configurations for a specific dashboard',
    {
      name: z.string().describe('Name of the dashboard to retrieve charts for'),
    },
    async ({ name }) => {
      try {
        const dashboard = await getDashboardByName(name);
        
        if (!dashboard) {
          return {
            content: [
              {
                type: 'text',
                text: JSON.stringify({
                  error: `Dashboard '${name}' not found`,
                  timestamp: new Date().toISOString()
                }, null, 2),
              },
            ],
          };
        }
        
        if (!dashboard.charts) {
          return {
            content: [
              {
                type: 'text',
                text: JSON.stringify({
                  error: `No charts found for dashboard '${name}'`,
                  timestamp: new Date().toISOString()
                }, null, 2),
              },
            ],
          };
        }
        
        const charts = dashboard.charts;
        const chartList = Object.entries(charts).map(([key, chart]) => {
          const chartObj = chart as any;
          return {
            chart_id: key,
            name: chartObj.name,
            type: chartObj.chart_type,
            source: chartObj.chart_source,
            data_as_of: chartObj.data_as_of,
            api_type: chartObj.api_type,
            api_params: chartObj.api_params
          };
        });
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: `Charts for dashboard '${name}' retrieved successfully`,
                dashboard_name: dashboard.dashboard_name,
                route: dashboard.route,
                chart_count: chartList.length,
                charts: chartList,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: `Failed to retrieve charts for dashboard '${name}'`,
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
}

```

--------------------------------------------------------------------------------
/src/datacatalogue.tools.ts:
--------------------------------------------------------------------------------

```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import axios from 'axios';
import fs from 'fs';
import path from 'path';
import { prefixToolName } from './utils/tool-naming.js';

// Import the pre-generated catalogue index
import { CATALOGUE_INDEX } from '../scripts/catalogue-index.js';

// GitHub raw content base URL for fetching specific datasets
const GITHUB_RAW_BASE_URL = 'https://raw.githubusercontent.com/data-gov-my/datagovmy-meta/main/data-catalogue';

// Local data-catalogue directory path
const dataCatalogueDir = path.join(process.cwd(), 'data-catalogue');

// Check if the data-catalogue directory exists
const dataCatalogueDirExists = fs.existsSync(dataCatalogueDir);

// Define dataset metadata interface
export interface DatasetMetadata {
  id: string;
  title_en: string;
  title_ms: string;
  description_en: string;
  description_ms: string;
  frequency: string;
  geography: string[];
  demography: string[];
  dataset_begin?: number;
  dataset_end?: number;
  data_source: string[];
  [key: string]: any;
}

// Cache for detailed dataset metadata
let detailsCache: Record<string, DatasetMetadata> = {};
let filtersCache: any = null;
let lastCacheUpdate: number = 0;
const CACHE_TTL = 3600000; // 1 hour in milliseconds

// Helper function to get all datasets
export function getAllDatasets(): DatasetMetadata[] {
  return CATALOGUE_INDEX as DatasetMetadata[];
}

// Fetch filters from GitHub (only needed once)
async function fetchFilters(): Promise<any> {
  if (filtersCache !== null && Date.now() - lastCacheUpdate < CACHE_TTL) {
    return filtersCache;
  }
  
  try {
    const filtersResponse = await axios.get(`${GITHUB_RAW_BASE_URL}/filters.json`);
    filtersCache = filtersResponse.data;
    lastCacheUpdate = Date.now();
    return filtersCache;
  } catch (error) {
    console.error('Error fetching filters:', error);
    if (filtersCache !== null) {
      return filtersCache; // Return stale cache if available
    }
    throw new Error('Failed to fetch filters and no cache available');
  }
}

// Helper function to get detailed dataset by ID
async function getDatasetById(id: string): Promise<DatasetMetadata | null> {
  // First check if we have it in the index
  const basicInfo = getAllDatasets().find(d => d.id === id);
  if (!basicInfo) {
    return null; // Dataset ID not found in index
  }
  
  // If we have detailed info cached and it's not expired, return it
  if (detailsCache[id] && Date.now() - lastCacheUpdate < CACHE_TTL) {
    return detailsCache[id];
  }
  
  // Always try to fetch from GitHub first to get the latest data
  try {
    const response = await axios.get(`${GITHUB_RAW_BASE_URL}/${id}.json`);
    const detailedData = response.data as DatasetMetadata;
    
    // Cache the detailed data
    detailsCache[id] = detailedData;
    lastCacheUpdate = Date.now();
    
    console.log(`Successfully fetched ${id} dataset from GitHub`);
    return detailedData;
  } catch (error: any) {
    console.warn(`Error fetching dataset ${id} from GitHub:`, error.message);
    // If we can't get detailed data, return the basic info from the index
    return basicInfo;
  }
}

// Helper function to tokenize a query into individual terms
function tokenizeQuery(query: string): string[] {
  // Remove special characters and split by spaces
  return query.toLowerCase()
    .replace(/[^a-z0-9\s]/g, ' ')
    .split(/\s+/)
    .filter(term => term.length > 0);
}

// Helper function to normalize terms by removing common prefixes and handling variations
function normalizeTerm(term: string): string[] {
  // Remove hyphens and normalize spacing
  let normalized = term.replace(/-/g, '').trim();
  
  // Handle common prefixes/variations
  if (normalized.startsWith('e') && normalized.length > 1) {
    // e.g., 'epayment' -> also try 'payment'
    return [normalized, normalized.substring(1)];
  }
  
  return [normalized];
}

// A small set of common synonyms for frequently used terms
const COMMON_SYNONYMS: Record<string, string[]> = {
  'payment': ['payment', 'pay', 'transaction'],
  'electronic': ['electronic', 'digital', 'online', 'cashless'],
  'statistics': ['statistics', 'stats', 'data', 'figures', 'numbers'],
  'dashboard': ['dashboard', 'visualization', 'chart', 'graph'],
  'dataset': ['dataset', 'data set', 'database', 'data'],
};

// Helper function to expand search terms for better matching
function expandSearchTerms(term: string): string[] {
  const normalizedTerm = term.toLowerCase().trim();
  
  // Start with the original term
  let expanded = [normalizedTerm];
  
  // Add normalized variations
  expanded = expanded.concat(normalizeTerm(normalizedTerm));
  
  // Check for common synonyms
  for (const [key, synonyms] of Object.entries(COMMON_SYNONYMS)) {
    if (normalizedTerm === key || synonyms.includes(normalizedTerm)) {
      expanded = expanded.concat(synonyms);
      break;
    }
  }
  
  // Basic stemming for plurals
  if (normalizedTerm.endsWith('s')) {
    expanded.push(normalizedTerm.slice(0, -1)); // Remove trailing 's'
  } else {
    expanded.push(normalizedTerm + 's'); // Add trailing 's'
  }
  
  // Remove duplicates and return
  return [...new Set(expanded)];
}

// Helper function to search datasets with improved matching
export function searchDatasets(query: string): DatasetMetadata[] {
  const datasets = getAllDatasets();
  
  // Tokenize the query
  const queryTerms = tokenizeQuery(query);
  const expandedTerms = queryTerms.flatMap(term => expandSearchTerms(term));
  
  // If we have no valid terms after tokenization, fall back to the original query
  if (expandedTerms.length === 0) {
    const lowerCaseQuery = query.toLowerCase();
    return datasets.filter(d => 
      d.title_en.toLowerCase().includes(lowerCaseQuery) ||
      d.title_ms.toLowerCase().includes(lowerCaseQuery) ||
      d.description_en.toLowerCase().includes(lowerCaseQuery) ||
      d.description_ms.toLowerCase().includes(lowerCaseQuery) ||
      d.id.toLowerCase().includes(lowerCaseQuery)
    );
  }
  
  // Search using expanded terms
  return datasets.filter(d => {
    const title_en = d.title_en.toLowerCase();
    const title_ms = d.title_ms.toLowerCase();
    const desc_en = d.description_en.toLowerCase();
    const desc_ms = d.description_ms.toLowerCase();
    const id = d.id.toLowerCase();
    
    // Check if any of the expanded terms match
    return expandedTerms.some(term => 
      title_en.includes(term) ||
      title_ms.includes(term) ||
      desc_en.includes(term) ||
      desc_ms.includes(term) ||
      id.includes(term)
    );
  });
}

// Helper function to filter datasets
function filterDatasets(filters: any, datasets: DatasetMetadata[]): DatasetMetadata[] {
  return datasets.filter(d => {
    if (filters.frequency && d.frequency !== filters.frequency) return false;
    if (filters.geography && filters.geography.length > 0 && !filters.geography.some((g: string) => d.geography.includes(g))) return false;
    if (filters.demography && filters.demography.length > 0 && !filters.demography.some((dem: string) => d.demography.includes(dem))) return false;
    if (filters.dataSource && filters.dataSource.length > 0 && !filters.dataSource.some((ds: string) => d.data_source.includes(ds))) return false;
    if (filters.yearRange && d.dataset_begin !== undefined && d.dataset_end !== undefined && 
        (d.dataset_begin > filters.yearRange[1] || d.dataset_end < filters.yearRange[0])) return false;
    return true;
  });
}

export function registerDataCatalogueTools(server: McpServer) {
  // List all datasets with rich metadata
  server.tool(
    prefixToolName('list_datasets_catalogue'),
    'Lists all datasets from the comprehensive catalogue with rich metadata',
    {
      limit: z.number().min(1).max(100).optional().describe('Number of results to return (1-100)'),
      offset: z.number().min(0).optional().describe('Number of records to skip for pagination'),
    },
    async ({ limit = 20, offset = 0 }) => {
      try {
        const datasets = getAllDatasets();
        const paginatedDatasets = datasets.slice(offset, offset + limit);
        const total = datasets.length;
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: 'Datasets retrieved from comprehensive catalogue',
                total_datasets: total,
                showing: `${offset + 1}-${Math.min(offset + limit, total)} of ${total}`,
                pagination: {
                  limit,
                  offset,
                  next_offset: offset + limit < total ? offset + limit : null,
                  previous_offset: offset > 0 ? Math.max(0, offset - limit) : null,
                },
                datasets: paginatedDatasets,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to retrieve datasets',
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
  
  // Search datasets by query
  server.tool(
    prefixToolName('search_datasets_catalogue'),
    '⚠️ CONSIDER USING search_all INSTEAD: This only searches datasets. For comprehensive results across datasets and dashboards, use search_all tool. ⚠️',
    {
      query: z.string().describe('Search query to match against dataset metadata'),
      limit: z.number().min(1).max(100).optional().describe('Number of results to return (1-100)'),
    },
    async ({ query, limit = 20 }) => {
      try {
        const searchResults = searchDatasets(query);
        const limitedResults = searchResults.slice(0, limit);
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: 'Search results for datasets',
                query,
                total_matches: searchResults.length,
                showing: Math.min(limit, searchResults.length),
                datasets: limitedResults,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to search datasets',
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
  
  // Filter datasets by criteria
  server.tool(
    prefixToolName('filter_datasets_catalogue'),
    'Filter datasets by various criteria such as frequency, geography, etc.',
    {
      frequency: z.string().optional().describe('Filter by data frequency (e.g., DAILY, MONTHLY, ANNUAL)'),
      geography: z.array(z.string()).optional().describe('Filter by geographic coverage'),
      demography: z.array(z.string()).optional().describe('Filter by demographic coverage'),
      dataSource: z.array(z.string()).optional().describe('Filter by data source organization'),
      yearRange: z.tuple([z.number(), z.number()]).optional().describe('Filter by year range [start, end]'),
      limit: z.number().min(1).max(100).optional().describe('Number of results to return (1-100)'),
      offset: z.number().min(0).optional().describe('Number of records to skip for pagination'),
    },
    async ({ frequency, geography, demography, dataSource, yearRange, limit = 20, offset = 0 }) => {
      try {
        const datasets = getAllDatasets();
        const filters = await fetchFilters();
        
        // Apply filters
        const filterCriteria = {
          frequency,
          geography,
          demography,
          dataSource,
          yearRange
        };
        
        const filteredDatasets = filterDatasets(filterCriteria, datasets);
        const paginatedResults = filteredDatasets.slice(offset, offset + limit);
        const total = filteredDatasets.length;
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: 'Filtered datasets',
                filters: filterCriteria,
                total_matches: total,
                showing: `${offset + 1}-${Math.min(offset + limit, total)} of ${total}`,
                pagination: {
                  limit,
                  offset,
                  next_offset: offset + limit < total ? offset + limit : null,
                  previous_offset: offset > 0 ? Math.max(0, offset - limit) : null,
                },
                datasets: paginatedResults,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to filter datasets',
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
  
  // Get available filter options
  server.tool(
    prefixToolName('get_dataset_filters'),
    'Get available filter options for datasets',
    {},
    async () => {
      try {
        const filters = await fetchFilters();
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: 'Dataset filter options retrieved successfully',
                filters,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: 'Failed to retrieve filter options',
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
  
  // Get dataset details by ID
  server.tool(
    prefixToolName('get_dataset_details'),
    'Get comprehensive metadata for a dataset by ID',
    {
      id: z.string().describe('ID of the dataset to retrieve metadata for'),
    },
    async ({ id }) => {
      try {
        const dataset = await getDatasetById(id);
        
        if (!dataset) {
          // Try to find similar datasets for suggestion
          const allDatasets = getAllDatasets();
          const similarDatasets = allDatasets
            .filter((d: DatasetMetadata) => d.id.includes(id) || id.includes(d.id))
            .map((d: DatasetMetadata) => ({ id: d.id, title_en: d.title_en }))
            .slice(0, 5);
          
          return {
            content: [
              {
                type: 'text',
                text: JSON.stringify({
                  error: `Dataset '${id}' not found`,
                  suggestions: similarDatasets.length > 0 ? similarDatasets : undefined,
                  timestamp: new Date().toISOString()
                }, null, 2),
              },
            ],
          };
        }
        
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                message: `Dataset '${id}' details retrieved successfully`,
                dataset,
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                error: `Failed to retrieve dataset '${id}'`,
                message: error instanceof Error ? error.message : String(error),
                timestamp: new Date().toISOString()
              }, null, 2),
            },
          ],
        };
      }
    }
  );
}

```
Page 1/5FirstPrevNextLast