This is page 2 of 5. Use http://codebase.md/hithereiamaliff/mcp-datagovmy?page={x} to view the full context.
# Directory Structure
```
├── .eslintrc.json
├── .github
│ └── workflows
│ └── deploy-vps.yml
├── .gitignore
├── .npmignore
├── .prettierrc
├── .smithery
│ └── index.cjs
├── deploy
│ ├── DEPLOYMENT.md
│ └── nginx-mcp.conf
├── docker-compose.yml
├── Dockerfile
├── index.js
├── LICENSE
├── malaysia_open_data_mcp_plan.md
├── mcp-server.js
├── package-lock.json
├── package.json
├── PROMPT.md
├── README.md
├── response.txt
├── scripts
│ ├── build.js
│ ├── catalogue-index.d.ts
│ ├── catalogue-index.js
│ ├── catalogue-index.ts
│ ├── dashboards-index.d.ts
│ ├── dashboards-index.js
│ ├── deploy.js
│ ├── extract-dataset-ids.js
│ ├── extracted-datasets.js
│ ├── index-catalogue-files.cjs
│ ├── index-dashboards.cjs
│ └── update-tool-names.ts
├── smithery.yaml
├── src
│ ├── api
│ │ ├── catalogue.js
│ │ ├── client.js
│ │ ├── dosm.js
│ │ ├── transport.js
│ │ └── weather.js
│ ├── catalogue.tools.ts
│ ├── dashboards.tools.ts
│ ├── datacatalogue.tools.ts
│ ├── dosm.tools.ts
│ ├── firebase-analytics.ts
│ ├── flood.tools.ts
│ ├── gtfs.tools.ts
│ ├── http-server.ts
│ ├── index.cjs
│ ├── index.js
│ ├── index.ts
│ ├── parquet.tools.ts
│ ├── tools
│ │ ├── catalogue.js
│ │ ├── dosm.js
│ │ ├── test.js
│ │ ├── transport.js
│ │ └── weather.js
│ ├── transport.tools.ts
│ ├── types.d.ts
│ ├── unified-search.tools.ts
│ ├── utils
│ │ ├── query-builder.js
│ │ └── tool-naming.ts
│ └── weather.tools.ts
├── TOOLS.md
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/scripts/extracted-datasets.js:
--------------------------------------------------------------------------------
```javascript
// Generated from GitHub repository: data-gov-my/datagovmy-meta
// Timestamp: 2025-07-27T03:12:32.956Z
// Total datasets: 237
const EXTRACTED_DATASETS = [
{ id: 'air_pollution', description: 'Monthly Air Pollution' },
{ id: 'arc_dosm', description: 'DOSM\'s Advance Release Calendar' },
{ id: 'arrivals', description: 'Monthly Arrivals by Nationality & Sex' },
{ id: 'arrivals_soe', description: 'Monthly Arrivals by State of Entry, Nationality & Sex' },
{ id: 'births', description: 'Daily Live Births' },
{ id: 'births_annual', description: 'Annual Live Births' },
{ id: 'births_annual_sex_ethnic', description: 'Annual Live Births by Sex & Ethnicity' },
{ id: 'births_annual_sex_ethnic_state', description: 'Annual Live Births by State, Sex, & Ethnicity' },
{ id: 'births_annual_state', description: 'Annual Live Births by State' },
{ id: 'births_district_sex', description: 'Annual Live Births by District & Sex' },
{ id: 'blood_donations', description: 'Daily Blood Donations by Blood Group' },
{ id: 'blood_donations_state', description: 'Daily Blood Donations by Blood Group & State' },
{ id: 'bop_balance', description: 'Balance of Key BOP Components' },
{ id: 'cellular_subscribers', description: 'Cellular Subscribers by Plan Type' },
{ id: 'completion_school_state', description: 'School Completion Rates by State' },
{ id: 'cosmetic_notifications', description: 'Notified Cosmetic Products' },
{ id: 'cosmetic_notifications_cancelled', description: 'Cancelled Cosmetic Product Notifications' },
{ id: 'cosmetics_manufacturers', description: 'Approved Manufacturers of Cosmetic Products' },
{ id: 'covid_cases', description: 'Daily COVID-19 Cases by State' },
{ id: 'covid_cases_age', description: 'Daily COVID-19 Cases by Age Group & State' },
{ id: 'covid_cases_vaxstatus', description: 'Daily COVID-19 Cases by Vaccination Status & State' },
{ id: 'covid_deaths_linelist', description: 'Transactional Records: Deaths due to COVID-19' },
{ id: 'cpi_3d', description: 'Monthly CPI by Group (3-digit)' },
{ id: 'cpi_4d', description: 'Monthly CPI by Class (4-digit)' },
{ id: 'cpi_5d', description: 'Monthly CPI by Subclass (5-digit)' },
{ id: 'cpi_annual', description: 'Annual CPI by Division (2-digit)' },
{ id: 'cpi_annual_inflation', description: 'Annual CPI Inflation by Division (2-digit)' },
{ id: 'cpi_core', description: 'Monthly Core CPI by Division (2-digit)' },
{ id: 'cpi_core_inflation', description: 'Monthly Core CPI Inflation by Division (2-digit)' },
{ id: 'cpi_headline', description: 'Monthly CPI by Division (2-digit)' },
{ id: 'cpi_headline_inflation', description: 'Monthly CPI Inflation by Division (2-digit)' },
{ id: 'cpi_lowincome', description: 'Monthly CPI for Low-Income Households' },
{ id: 'cpi_state', description: 'Monthly CPI by State & Division (2-digit)' },
{ id: 'cpi_state_inflation', description: 'Monthly CPI Inflation by State & Division (2-digit)' },
{ id: 'cpi_strata', description: 'Monthly CPI by Strata & Division (2-digit)' },
{ id: 'crime_district', description: 'Crimes by District & Crime Type' },
{ id: 'crops_district_area', description: 'Crop Area by District' },
{ id: 'crops_district_production', description: 'Crop Production by District' },
{ id: 'crops_state', description: 'Crop Area and Production by State' },
{ id: 'currency_in_circulation', description: 'Monthly Currency in Circulation' },
{ id: 'currency_in_circulation_annual', description: 'Annual Currency in Circulation' },
{ id: 'deaths', description: 'Annual Deaths' },
{ id: 'deaths_district_sex', description: 'Annual Deaths by District & Sex' },
{ id: 'deaths_early_childhood', description: 'Annual Early Childhood Deaths' },
{ id: 'deaths_early_childhood_sex', description: 'Annual Early Childhood Deaths by Sex' },
{ id: 'deaths_early_childhood_state', description: 'Annual Early Childhood Deaths by State' },
{ id: 'deaths_early_childhood_state_sex', description: 'Annual Early Childhood Deaths by State & Sex' },
{ id: 'deaths_maternal', description: 'Annual Maternal Deaths' },
{ id: 'deaths_maternal_state', description: 'Annual Maternal Deaths by State' },
{ id: 'deaths_sex_ethnic', description: 'Annual Deaths by Sex & Ethnicity' },
{ id: 'deaths_sex_ethnic_state', description: 'Annual Deaths by State, Sex, & Ethnicity' },
{ id: 'deaths_state', description: 'Annual Deaths by State' },
{ id: 'domains', description: 'Number of Registered .MY Domains' },
{ id: 'domains_dnssec', description: 'Number of Registered .MY Domains with DNSSEC' },
{ id: 'domains_idn', description: 'Number of Registered Internationalised .MY Domains' },
{ id: 'domains_ipv6', description: 'Number of Registered .MY Domains with IPv6 DNS' },
{ id: 'economic_indicators', description: 'Malaysian Economic Indicators' },
{ id: 'electricity_access', description: 'Households with Access to Electricity' },
{ id: 'electricity_consumption', description: 'Monthly Electricity Consumption' },
{ id: 'electricity_supply', description: 'Electricity Supply' },
{ id: 'employment_sector', description: 'Employment by MSIC Sector and Sex' },
{ id: 'enrolment_school_district', description: 'Enrolment in Government Schools by District' },
{ id: 'exchangerates', description: 'Exchange Rates' },
{ id: 'fdi_flows', description: 'Foreign Direct Investment (FDI) Flows' },
{ id: 'federal_budget_moe', description: 'Annual Budget Allocation for the Ministry of Education' },
{ id: 'federal_budget_moh', description: 'Annual Budget Allocation for the Ministry of Health' },
{ id: 'federal_finance_qtr', description: 'Quarterly Federal Government Finance' },
{ id: 'federal_finance_qtr_de', description: 'Quarterly Federal Government Development Expenditure by Function' },
{ id: 'federal_finance_qtr_oe', description: 'Quarterly Federal Government Operating Expenditure by Object' },
{ id: 'federal_finance_qtr_revenue', description: 'Quarterly Federal Government Revenue' },
{ id: 'federal_finance_year', description: 'Annual Federal Government Finance' },
{ id: 'federal_finance_year_de', description: 'Annual Federal Government Development Expenditure by Function' },
{ id: 'federal_finance_year_oe', description: 'Annual Federal Government Operating Expenditure by Object' },
{ id: 'federal_finance_year_revenue', description: 'Annual Federal Government Revenue' },
{ id: 'fertility', description: 'TFR and ASFR' },
{ id: 'fertility_state', description: 'TFR and ASFR by State' },
{ id: 'fish_landings', description: 'Monthly Landings of Marine Fish by State' },
{ id: 'forest_reserve', description: 'Area of Permanent Forest Reserves' },
{ id: 'forest_reserve_state', description: 'Area of Permanent Forest Reserves by State' },
{ id: 'fuelprice', description: 'Price of Petroleum & Diesel' },
{ id: 'gdp_annual_nominal_demand', description: 'Annual Nominal GDP by Expenditure Type' },
{ id: 'gdp_annual_nominal_demand_granular', description: 'Annual Nominal GDP by Expenditure Subtype' },
{ id: 'gdp_annual_nominal_income', description: 'Annual Nominal GDP by Income Component' },
{ id: 'gdp_annual_nominal_supply', description: 'Annual Nominal GDP by Economic Sector' },
{ id: 'gdp_annual_nominal_supply_granular', description: 'Annual Nominal GDP by Economic Subsector' },
{ id: 'gdp_annual_real_demand', description: 'Annual Real GDP by Expenditure Type' },
{ id: 'gdp_annual_real_demand_granular', description: 'Annual Real GDP by Expenditure Subtype' },
{ id: 'gdp_annual_real_supply', description: 'Annual Real GDP by Economic Sector' },
{ id: 'gdp_annual_real_supply_granular', description: 'Annual Real GDP by Economic Subsector' },
{ id: 'gdp_district_real_supply', description: 'Annual Real GDP by District & Economic Sector' },
{ id: 'gdp_gni_annual_nominal', description: 'Annual Nominal GDP & GNI: 1947 to Present' },
{ id: 'gdp_gni_annual_real', description: 'Annual Real GDP & GNI: 1970 to Present' },
{ id: 'gdp_lookup', description: 'Lookup Table: GDP' },
{ id: 'gdp_qtr_nominal', description: 'Quarterly Nominal GDP' },
{ id: 'gdp_qtr_nominal_demand', description: 'Quarterly Nominal GDP by Expenditure Type' },
{ id: 'gdp_qtr_nominal_demand_granular', description: 'Quarterly Nominal GDP by Expenditure Subtype' },
{ id: 'gdp_qtr_nominal_supply', description: 'Quarterly Nominal GDP by Economic Sector' },
{ id: 'gdp_qtr_nominal_supply_granular', description: 'Quarterly Nominal GDP by Economic Subsector' },
{ id: 'gdp_qtr_real', description: 'Quarterly Real GDP' },
{ id: 'gdp_qtr_real_demand', description: 'Quarterly Real GDP by Expenditure Type' },
{ id: 'gdp_qtr_real_demand_granular', description: 'Quarterly Real GDP by Expenditure Subtype' },
{ id: 'gdp_qtr_real_sa', description: 'Quarterly Real GDP (Seasonally Adjusted)' },
{ id: 'gdp_qtr_real_sa_demand', description: 'Quarterly Real GDP (Seasonally Adjusted) by Expenditure Type' },
{ id: 'gdp_qtr_real_sa_supply', description: 'Quarterly Real GDP (Seasonally Adjusted) by Economic Sector' },
{ id: 'gdp_qtr_real_supply', description: 'Quarterly Real GDP by Economic Sector' },
{ id: 'gdp_qtr_real_supply_granular', description: 'Quarterly Real GDP by Economic Subsector' },
{ id: 'gdp_state_real_supply', description: 'Annual Real GDP by State & Economic Sector' },
{ id: 'ghg_emissions', description: 'Greenhouse Gas Emissions' },
{ id: 'healthcare_staff', description: 'Healthcare Staff by State and Staff Type' },
{ id: 'hh_access_amenities', description: 'Access to Basic Amenities by State & District' },
{ id: 'hh_income', description: 'Household Income' },
{ id: 'hh_income_district', description: 'Household Income by Administrative District' },
{ id: 'hh_income_state', description: 'Household Income by State' },
{ id: 'hh_inequality', description: 'Income Inequality' },
{ id: 'hh_inequality_district', description: 'Income Inequality by District' },
{ id: 'hh_inequality_state', description: 'Income Inequality by State' },
{ id: 'hh_poverty', description: 'Poverty' },
{ id: 'hh_poverty_district', description: 'Poverty by Administrative District' },
{ id: 'hh_poverty_state', description: 'Poverty by State' },
{ id: 'hh_profile', description: 'Number of Households and Living Quarters' },
{ id: 'hh_profile_state', description: 'Number of Households and Living Quarters by State' },
{ id: 'hies_district', description: 'Household Income and Expenditure: Administrative Districts' },
{ id: 'hies_malaysia_percentile', description: 'Household Income by Percentile' },
{ id: 'hies_state', description: 'Household Income and Expenditure: States' },
{ id: 'hies_state_percentile', description: 'Household Income by State & Percentile' },
{ id: 'hospital_beds', description: 'Hospital Beds by State and Hospital Type' },
{ id: 'infant_immunisation', description: 'Infant Immunisation Coverage' },
{ id: 'interestrates', description: 'Monthly Interest Rates' },
{ id: 'interestrates_annual', description: 'Annual Interest Rates' },
{ id: 'iowrt', description: 'Headline Wholesale & Retail Trade' },
{ id: 'iowrt_2d', description: 'Wholesale & Retail Trade by Division (2 digit)' },
{ id: 'iowrt_3d', description: 'Wholesale & Retail Trade by Group (3 digit)' },
{ id: 'ipi', description: 'Industrial Production Index (IPI)' },
{ id: 'ipi_1d', description: 'IPI by Section (1 digit)' },
{ id: 'ipi_2d', description: 'IPI by Division (2 digit)' },
{ id: 'ipi_3d', description: 'IPI by Group (3 digit)' },
{ id: 'ipi_5d', description: 'IPI by Item (5 digit)' },
{ id: 'ipi_domestic', description: 'IPI for Domestic-Oriented Divisions (2 digit)' },
{ id: 'ipi_export', description: 'IPI for Export-Oriented Divisions (2 digit)' },
{ id: 'lecturers_uni', description: 'Lecturers in Public Universities by Citizenship & Sex' },
{ id: 'lfs_district', description: 'Annual Principal Labour Force Statistics by District' },
{ id: 'lfs_month', description: 'Monthly Principal Labour Force Statistics' },
{ id: 'lfs_month_duration', description: 'Monthly Unemployment by Duration' },
{ id: 'lfs_month_sa', description: 'Monthly Principal Labour Force Statistics, Seasonally Adjusted' },
{ id: 'lfs_month_status', description: 'Monthly Employment by Status in Employment' },
{ id: 'lfs_month_youth', description: 'Monthly Youth Unemployment' },
{ id: 'lfs_qtr', description: 'Quarterly Principal Labour Force Statistics' },
{ id: 'lfs_qtr_sru_age', description: 'Quarterly Skills-Related Underemployment by Age' },
{ id: 'lfs_qtr_sru_sex', description: 'Quarterly Skills-Related Underemployment by Sex' },
{ id: 'lfs_qtr_state', description: 'Quarterly Principal Labour Force Statistics by State' },
{ id: 'lfs_qtr_tru_age', description: 'Quarterly Time-Related Underemployment by Age' },
{ id: 'lfs_qtr_tru_sex', description: 'Quarterly Time-Related Underemployment by Sex' },
{ id: 'lfs_state_sex', description: 'Annual Principal Labour Force Statistics by State & Sex' },
{ id: 'lfs_year', description: 'Annual Principal Labour Force Statistics' },
{ id: 'lfs_year_sex', description: 'Annual Principal Labour Force Statistics by Sex' },
{ id: 'local_authority_sex', description: 'Female Representation in Local Authorities' },
{ id: 'lookup_federal_finance', description: 'Lookup Table: Federal Finance' },
{ id: 'lookup_item', description: 'PriceCatcher: Item Lookup' },
{ id: 'lookup_money_banking', description: 'Lookup Table: Money & Banking' },
{ id: 'lookup_premise', description: 'PriceCatcher: Premise Lookup' },
{ id: 'marriages', description: 'Annual Marriages' },
{ id: 'marriages_age', description: 'Annual Marriage by Age Group' },
{ id: 'marriages_state', description: 'Annual Marriages by State' },
{ id: 'marriages_state_age', description: 'Annual Marriage by State & Age Group' },
{ id: 'mcoicop', description: 'MCOICOP' },
{ id: 'mineral_extraction', description: 'Extraction of Minerals by State and Commodity' },
{ id: 'mnha', description: 'MNHA: Total (TEH) and Current (CHE) Expenditure on Health' },
{ id: 'mnha_moh', description: 'MNHA: MOH Expenditure on Health' },
{ id: 'monetary_aggregates', description: 'Monetary Aggregates: M1, M2, M3' },
{ id: 'msic', description: 'MSIC' },
{ id: 'nutrition_children_sex', description: 'Nutritional Status of Children Under 5 by Sex' },
{ id: 'nutrition_children_strata', description: 'Nutritional Status of Children Under 5 by Strata' },
{ id: 'organ_pledges', description: 'Daily Organ Donation Pledges' },
{ id: 'organ_pledges_state', description: 'Daily Organ Donation Pledges by State' },
{ id: 'parliament_sex', description: 'Female Representation in Parliament' },
{ id: 'passports', description: 'Monthly Passport Issuances by State and Branch' },
{ id: 'pekab40_screenings', description: 'Daily PeKaB40 Health Screenings' },
{ id: 'pekab40_screenings_state', description: 'Daily PeKaB40 Health Screenings by State' },
{ id: 'pharmaceutical_importers', description: 'Approved Importers of Pharmaceutical Products' },
{ id: 'pharmaceutical_manufacturers', description: 'Approved Manufacturers of Pharmaceutical Products' },
{ id: 'pharmaceutical_products', description: 'Approved Pharmaceutical Products' },
{ id: 'pharmaceutical_products_cancelled', description: 'Cancelled Pharmaceutical Products' },
{ id: 'pharmaceutical_wholesalers', description: 'Approved Wholesalers of Pharmaceutical Products' },
{ id: 'population_district', description: 'Population Table: Administrative Districts' },
{ id: 'population_dun', description: 'Population Table: State Legislative Assemblies (DUNs)' },
{ id: 'population_malaysia', description: 'Population Table: Malaysia' },
{ id: 'population_parlimen', description: 'Population Table: Parliamentary Constituencies' },
{ id: 'population_state', description: 'Population Table: States' },
{ id: 'ppi', description: 'Monthly Producer Price Index (PPI)' },
{ id: 'ppi_1d', description: 'Monthly PPI by Section (1 digit)' },
{ id: 'ppi_2d', description: 'Monthly PPI by Division (2 digit)' },
{ id: 'ppi_3d', description: 'Monthly PPI by Group (3 digit)' },
{ id: 'ppi_sitc', description: 'Monthly PPI by SITC Section (1 digit)' },
{ id: 'ppi_sop', description: 'Monthly PPI by Stage of Processing' },
{ id: 'pricecatcher', description: 'PriceCatcher: Transactional Records' },
{ id: 'prisoners_prison', description: 'Prisoners by Prison Centre and Sex' },
{ id: 'prisoners_state', description: 'Prisoners by State and Sex' },
{ id: 'productivity_annual', description: 'Annual Productivity by Economic Sector' },
{ id: 'productivity_annual_priority', description: 'Annual Productivity for Priority Subsectors' },
{ id: 'productivity_lookup', description: 'Lookup Table: Labour Productivity' },
{ id: 'productivity_qtr', description: 'Quarterly Productivity by Economic Sector' },
{ id: 'registration_transactions_car', description: 'Vehicle Registration Transactions: Cars' },
{ id: 'ridership_headline', description: 'Daily Public Transport Ridership' },
{ id: 'ridership_ktmb_daily', description: 'Daily KTMB Ridership' },
{ id: 'ridership_ktmb_monthly', description: 'Monthly KTMB Ridership' },
{ id: 'ridership_od_ets', description: 'Hourly Origin-Destination Ridership: ETS' },
{ id: 'ridership_od_intercity', description: 'Hourly Origin-Destination Ridership: Intercity' },
{ id: 'ridership_od_komuter', description: 'Hourly Origin-Destination Ridership: Komuter' },
{ id: 'ridership_od_komuter_utara', description: 'Hourly Origin-Destination Ridership: Komuter Utara' },
{ id: 'ridership_od_shuttle_tebrau', description: 'Hourly Origin-Destination Ridership: Shuttle Tebrau' },
{ id: 'sanitation_access', description: 'Access to Sanitary Latrines by State' },
{ id: 'schools_district', description: 'Public Education Institutions by District' },
{ id: 'sdg_03-3-1', description: 'SDG 03-3-1: HIV Incidence per 1,000 Uninfected Population' },
{ id: 'sdg_04-6-1', description: 'SDG 04-6-1: Proficiency in Functional Literacy and Numeracy' },
{ id: 'sdg_10-c-1', description: 'SDG 10-C-1: Remittance Costs as a % of the Amount Remitted' },
{ id: 'sdg_16-1-1', description: 'SDG 16-1-1: Victims of Intentional Homicide' },
{ id: 'sdg_16-2-2', description: 'SDG 16-2-2: Victims of Human Trafficking' },
{ id: 'sitc', description: 'SITC' },
{ id: 'sitc_sop', description: 'SITC: Stage of Processing' },
{ id: 'sppi', description: 'Headline Services Producer Price Index (SPPI)' },
{ id: 'sppi_1d', description: 'SPPI by Section (1 digit)' },
{ id: 'sppi_2d', description: 'SPPI by Division (2 digits)' },
{ id: 'sppi_3d', description: 'SPPI by Group (3 digits)' },
{ id: 'state_finance_expenditure', description: 'State Government Expenditure' },
{ id: 'state_finance_revenue', description: 'State Government Revenue' },
{ id: 'std_state', description: 'Sexually Transmitted Diseases (STDs) by State' },
{ id: 'stillbirths', description: 'Annual Stillbirths' },
{ id: 'stillbirths_state', description: 'Annual Stillbirths by State' },
{ id: 'teachers_district', description: 'Teachers in Government Schools by District' },
{ id: 'timber_production', description: 'Production of Major Timber Products by State' },
{ id: 'trade_sitc_1d', description: 'Monthly Trade by SITC Section' },
{ id: 'vaxreg_covid', description: 'Daily COVID-19 Vaccine Registrations by State' },
{ id: 'vaxreg_covid_demog', description: 'Daily COVID-19 Vaccine Registrations by State, Sex & Age' },
{ id: 'water_access', description: 'Access to Treated Water by State & Strata' },
{ id: 'water_consumption', description: 'Water Consumption by State and Sector' },
{ id: 'water_pollution_basin', description: 'River Basin Pollution Monitoring' },
{ id: 'water_production', description: 'Water Production by State' }
];
export default EXTRACTED_DATASETS;
```
--------------------------------------------------------------------------------
/src/parquet.tools.ts:
--------------------------------------------------------------------------------
```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import axios from 'axios';
// Import hyparquet with correct function names
import { parquetReadObjects, asyncBufferFromUrl } from 'hyparquet';
// Import compressors for BROTLI support
import { compressors } from 'hyparquet-compressors';
import { prefixToolName } from './utils/tool-naming.js';
/**
* Custom JSON serializer that handles BigInt values by converting them to strings
* @param key The key of the current property being serialized
* @param value The value of the current property being serialized
* @returns The serialized value
*/
function bigIntSerializer(key: string, value: any): any {
// Convert BigInt values to strings
if (typeof value === 'bigint') {
return value.toString();
}
return value;
}
/**
* Process an object to convert all BigInt values to strings
* This is needed because JSON.stringify cannot handle BigInt values directly
* @param obj The object to process
* @returns A new object with all BigInt values converted to strings
*/
function processBigIntValues(obj: any): any {
if (obj === null || obj === undefined) {
return obj;
}
if (typeof obj === 'bigint') {
return obj.toString();
}
if (Array.isArray(obj)) {
return obj.map(item => processBigIntValues(item));
}
if (typeof obj === 'object') {
const result: Record<string, any> = {};
for (const key in obj) {
if (Object.prototype.hasOwnProperty.call(obj, key)) {
result[key] = processBigIntValues(obj[key]);
}
}
return result;
}
return obj;
}
/**
* Process empty date objects in parquet data
* @param data Array of data rows to process
* @param url URL of the parquet file
* @returns Processed data with proper date strings
*/
function processEmptyDateObjects(data: any[], url: string): any[] {
if (!data || data.length === 0) {
return data;
}
// Extract dataset name from URL
const urlParts = url.split('/');
const filename = urlParts[urlParts.length - 1] || '';
const datasetName = filename.split('.')[0] || '';
// Find the date column(s) in the data
const firstRow = data[0];
const dateColumns: string[] = [];
// Look for columns that might be date columns
Object.keys(firstRow).forEach(key => {
const lowerKey = key.toLowerCase();
if (lowerKey === 'date' || lowerKey.includes('date') || lowerKey === 'dt' || lowerKey === 'timestamp') {
dateColumns.push(key);
}
});
if (dateColumns.length === 0) {
return data; // No date columns found
}
// Check if we have any rows with valid date strings to use as a pattern
let datePattern: string | null = null;
for (const dateColumn of dateColumns) {
for (const row of data) {
const dateValue = row[dateColumn];
if (typeof dateValue === 'string' && dateValue.trim() !== '') {
datePattern = dateValue;
break;
}
}
if (datePattern) break;
}
// Dataset-specific handling
if (datasetName === 'employment_sector') {
// For employment_sector dataset, we know it's annual data from 2001-2022
// with 3 sectors (agriculture, industry, services) and 3 sexes (both, male, female)
const uniqueSectors = new Set();
const uniqueSexes = new Set();
data.forEach(row => {
if (row.sector) uniqueSectors.add(row.sector);
if (row.sex) uniqueSexes.add(row.sex);
});
const rowsPerYear = uniqueSectors.size * uniqueSexes.size || 9;
const startYear = 2001; // First year in the dataset
// Process each row
return data.map((row, index) => {
const newRow = { ...row };
// Check if there's a date field that's an empty object
if (row.date && typeof row.date === 'object' && Object.keys(row.date).length === 0) {
// Calculate which year this row belongs to
const yearIndex = Math.floor(index / rowsPerYear);
const year = startYear + yearIndex;
// Replace empty date object with a proper date string
newRow.date = `${year}-01-01 00:00:00`;
}
return newRow;
});
}
// Generic handling for other datasets
return data.map(row => {
const newRow = { ...row };
// Process each date column
for (const dateColumn of dateColumns) {
const dateValue = row[dateColumn];
// Check if there's a date field that's an empty object
if (dateValue && typeof dateValue === 'object' && Object.keys(dateValue).length === 0) {
// If we have a date pattern, use it as a template
if (datePattern) {
// Extract the format from the pattern (e.g., YYYY-MM-DD HH:MM:SS)
if (datePattern.includes(' ')) {
// Full datetime format
newRow[dateColumn] = `${new Date().getFullYear()}-01-01 00:00:00`; // Use current year
} else {
// Date only format
newRow[dateColumn] = `${new Date().getFullYear()}-01-01`; // Use current year
}
} else {
// Default format if no pattern found - use current date for latest data
const now = new Date();
const year = now.getFullYear();
const month = String(now.getMonth() + 1).padStart(2, '0');
const day = String(now.getDate()).padStart(2, '0');
newRow[dateColumn] = `${year}-${month}-${day}`;
}
}
}
return newRow;
});
}
/**
* Extract the latest date from parquet data
* @param data The parquet data to analyze
* @returns The latest date in YYYY-MM format, or undefined if no date found
*/
function extractLatestDate(data: any[]): string | undefined {
if (!data || data.length === 0) {
return undefined;
}
// Check if data has a date column
const firstRow = data[0];
let dateColumn: string | null = null;
// Find the date column - could be named 'date', 'dt', or contain 'date' in the name
for (const key of Object.keys(firstRow)) {
const lowerKey = key.toLowerCase();
if (lowerKey === 'date' || lowerKey === 'dt' || lowerKey.includes('date')) {
dateColumn = key;
break;
}
}
// If no date column found, check if this is callout data with 'latest' values
// This is a special case for the epayment_systems_timeseries_callout.parquet file
if (!dateColumn && firstRow.hasOwnProperty('latest') && firstRow.hasOwnProperty('chart')) {
// This is likely callout data, which represents the latest month
// Use the current date as the reference
const now = new Date();
const year = now.getFullYear();
const month = String(now.getMonth() + 1).padStart(2, '0');
return `${year}-${month}`;
}
if (!dateColumn) {
// If we still don't have a date column, try to infer from the data structure
// For time series data, we can assume the latest data is for the current month
const now = new Date();
const year = now.getFullYear();
const month = String(now.getMonth() + 1).padStart(2, '0');
return `${year}-${month}`;
}
// Extract all dates and find the latest one
const dates: Date[] = [];
for (const row of data) {
const dateValue = row[dateColumn];
if (dateValue) {
// Handle different date formats
let date: Date | null = null;
if (dateValue instanceof Date) {
date = dateValue;
} else if (typeof dateValue === 'string') {
// Try to parse the date string
date = new Date(dateValue);
} else if (typeof dateValue === 'number') {
// Assume timestamp in milliseconds
date = new Date(dateValue);
} else if (typeof dateValue === 'object') {
// Some date objects might be serialized in a special way
// Try to extract year and month if available
if (dateValue.year && dateValue.month) {
date = new Date(dateValue.year, dateValue.month - 1);
} else {
// If we can't extract a proper date, use current date
date = new Date();
}
}
if (date && !isNaN(date.getTime())) {
dates.push(date);
}
}
}
if (dates.length === 0) {
// If we couldn't extract any valid dates, use the current date
const now = new Date();
const year = now.getFullYear();
const month = String(now.getMonth() + 1).padStart(2, '0');
return `${year}-${month}`;
}
// Find the latest date
const latestDate = new Date(Math.max(...dates.map(d => d.getTime())));
// Format as YYYY-MM
const year = latestDate.getFullYear();
const month = String(latestDate.getMonth() + 1).padStart(2, '0');
return `${year}-${month}`;
}
// Define the structure for Parquet metadata
interface ParquetMetadata {
filename: string;
url: string;
fileSize?: number;
lastModified?: string;
contentType?: string;
}
/**
* Helper function to get metadata about a parquet file from a URL
* @param url URL of the parquet file
* @returns Metadata about the parquet file
*/
async function getParquetMetadata(url: string): Promise<ParquetMetadata> {
try {
// First make a HEAD request to get metadata without downloading the file
const headResponse = await axios({
method: 'head',
url,
timeout: 5000, // 5 second timeout
});
// Extract the filename from the URL
const filename = url.split('/').pop() || 'unknown.parquet';
return {
filename,
url,
fileSize: parseInt(headResponse.headers['content-length'] || '0', 10),
lastModified: headResponse.headers['last-modified'],
contentType: headResponse.headers['content-type'],
};
} catch (error) {
console.error('Error getting parquet file metadata:', error);
// Return basic metadata even if HEAD request fails
return {
filename: url.split('/').pop() || 'unknown.parquet',
url,
};
}
}
/**
* Get information about a parquet file's structure based on common patterns
* @param filename Name of the parquet file
* @returns Estimated structure information
*/
function estimateParquetStructure(filename: string): Record<string, any> {
// Extract information from filename
const nameParts = filename.replace('.parquet', '').split('_');
// Try to guess the structure based on common naming patterns
if (filename.includes('epayment') || filename.includes('payment')) {
return {
estimatedSchema: {
'date': 'datetime',
'payment_channel': 'string',
'transaction_count': 'integer',
'transaction_value': 'float',
'growth_rate': 'float',
},
estimatedRowCount: 'Unknown (typically monthly data for 2-5 years)',
possibleVisualization: 'Time series chart showing payment channel usage over time',
dataType: 'Payment transaction data',
};
}
if (filename.includes('timeseries')) {
return {
estimatedSchema: {
'date': 'datetime',
'value': 'float',
'category': 'string',
},
estimatedRowCount: 'Unknown (typically daily or monthly time series)',
possibleVisualization: 'Line chart showing trends over time',
dataType: 'Time series data',
};
}
// Generic estimation based on filename parts
const estimatedColumns: Record<string, string> = {};
nameParts.forEach(part => {
if (part.includes('date') || part === 'dt') {
estimatedColumns['date'] = 'datetime';
} else if (part.includes('value') || part.includes('amount')) {
estimatedColumns[part] = 'float';
} else if (part.includes('count') || part.includes('num')) {
estimatedColumns[part] = 'integer';
} else if (part.includes('id')) {
estimatedColumns[part] = 'string';
} else if (part.includes('name') || part.includes('type') || part.includes('category')) {
estimatedColumns[part] = 'string';
}
});
// Add some generic columns if we couldn't extract much
if (Object.keys(estimatedColumns).length < 2) {
estimatedColumns['value'] = 'unknown';
estimatedColumns['category'] = 'unknown';
}
return {
estimatedSchema: estimatedColumns,
estimatedRowCount: 'Unknown',
dataType: 'Unknown structured data',
};
}
/**
* Generate a dashboard URL from a parquet file URL
* @param parquetUrl URL of the parquet file
* @returns Estimated dashboard URL
*/
function estimateDashboardUrl(parquetUrl: string): string {
// Extract the filename and try to map it to a dashboard
const filename = parquetUrl.split('/').pop() || '';
if (filename.includes('epayment') || filename.includes('payment')) {
return 'https://data.gov.my/dashboard/electronic-payments';
}
if (filename.includes('dosm') || filename.includes('statistics')) {
return 'https://open.dosm.gov.my/dashboard';
}
// Default to the main dashboard page
return 'https://data.gov.my/dashboard';
}
/**
* Parse a Parquet file from a URL using hyparquet
* @param url URL of the Parquet file
* @param maxRows Maximum number of rows to return
* @param datasetId Optional dataset ID to fetch metadata
* @returns Parsed Parquet data
*/
async function parseParquetFromUrl(url: string, maxRows: number = 500, datasetId?: string): Promise<any> {
try {
// Create an async buffer from the URL
const file = await asyncBufferFromUrl({ url });
// Parse the Parquet file using hyparquet with compressors for BROTLI support
const rowEnd = maxRows > 0 ? maxRows : undefined;
const parquetData = await parquetReadObjects({
file,
rowStart: 0,
rowEnd,
compressors // Add compressors to support BROTLI compression
});
// Process the data to handle BigInt values
let processedData = processBigIntValues(parquetData);
// Process empty date objects
processedData = processEmptyDateObjects(processedData, url);
// Get metadata to extract schema information
// This is a workaround since we don't have direct schema access
// We'll infer schema from the first row
const schema: Record<string, string> = {};
if (processedData.length > 0) {
const firstRow = processedData[0];
Object.keys(firstRow).forEach(key => {
const value = firstRow[key];
// Store the original type if it was a BigInt
const originalValue = parquetData[0][key];
// Special handling for date fields
if (key.toLowerCase() === 'date') {
schema[key] = 'string';
} else {
schema[key] = typeof originalValue === 'bigint' ? 'bigint' : typeof value;
}
});
}
// Removed sorting and filtering code
// Get the total number of rows - approximation since we don't have direct access
const totalRows = processedData.length;
// Extract the latest date from the data
const latestDate = extractLatestDate(processedData);
return {
schema,
totalRows,
displayedRows: processedData.length,
data: processedData,
detected_date: latestDate // Include the detected date in the response
};
} catch (error) {
console.error('Error parsing Parquet file:', error);
throw error;
}
}
export function registerParquetTools(server: McpServer) {
// Parse a Parquet file from a URL
server.tool(
prefixToolName('parse_parquet_file'),
'Parse and display data from a Parquet file URL',
{
url: z.string().url().describe('URL of the Parquet file to parse'),
maxRows: z.number().min(1).max(2000).optional().describe('Maximum number of rows to return (1-2000)'),
},
async ({ url, maxRows = 500 }) => {
try {
// Extract the filename from the URL
const filename = url.split('/').pop() || 'unknown.parquet';
// Removed dataset metadata handling code
// Parse the Parquet file
const parquetData = await parseParquetFromUrl(url, maxRows);
// Format the data for display
const currentDate = new Date();
const currentYear = currentDate.getFullYear();
const currentMonth = currentDate.getMonth() + 1;
// Get the detected date or use current date
const detectedDate = parquetData.detected_date || `${currentYear}-${String(currentMonth).padStart(2, '0')}`;
// Parse the detected date
const [year, month] = detectedDate.split('-').map((part: string) => parseInt(part, 10));
// Format the date for display
const monthNames = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'];
const formattedMonth = monthNames[month - 1] || 'Unknown';
const formattedYear = year || currentYear;
const formattedData = {
filename,
url,
schema: parquetData.schema,
totalRows: parquetData.totalRows,
displayedRows: parquetData.displayedRows,
data: parquetData.data,
data_as_of: detectedDate,
data_as_of_formatted: `${formattedMonth} ${formattedYear}`, // Add formatted date
timestamp: new Date().toISOString()
};
return {
content: [
{
type: 'text',
text: JSON.stringify(formattedData, bigIntSerializer, 2),
},
],
};
} catch (error) {
// If parsing fails, fall back to metadata and estimation
try {
// Get metadata about the Parquet file
const metadata = await getParquetMetadata(url);
// Estimate the structure based on the filename
const structureInfo = estimateParquetStructure(metadata.filename);
// Estimate the dashboard URL
const dashboardUrl = estimateDashboardUrl(url);
// Format the information for display
const formattedInfo = {
filename: metadata.filename,
url: metadata.url,
fileSize: metadata.fileSize ? `${Math.round(metadata.fileSize / 1024 / 1024 * 100) / 100} MB` : 'Unknown',
lastModified: metadata.lastModified || 'Unknown',
contentType: metadata.contentType || 'application/octet-stream',
estimatedStructure: structureInfo,
viewableAt: dashboardUrl,
error: 'Failed to parse Parquet file',
errorMessage: error instanceof Error ? error.message : String(error),
note: 'Falling back to estimated structure. You can view the data at the dashboard URL.',
timestamp: new Date().toISOString()
};
return {
content: [
{
type: 'text',
text: JSON.stringify(formattedInfo, null, 2),
},
],
};
} catch (fallbackError) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
error: 'Failed to process Parquet file',
message: error instanceof Error ? error.message : String(error),
fallbackError: fallbackError instanceof Error ? fallbackError.message : String(fallbackError),
url,
timestamp: new Date().toISOString(),
note: 'Parquet files can be viewed through their corresponding dashboards on data.gov.my',
}, null, 2),
},
],
};
}
}
}
);
// Get information about a Parquet file from a URL
server.tool(
prefixToolName('get_parquet_info'),
'Get metadata and structure information about a Parquet file',
{
url: z.string().url().describe('URL of the Parquet file to analyze'),
},
async ({ url }) => {
try {
// Try to parse the Parquet file to get accurate schema information
const parquetData = await parseParquetFromUrl(url, 0);
// Get metadata about the Parquet file
const metadata = await getParquetMetadata(url);
// Estimate the dashboard URL
const dashboardUrl = estimateDashboardUrl(url);
// Format the information for display
const currentDate = new Date();
const currentYear = currentDate.getFullYear();
const currentMonth = currentDate.getMonth() + 1;
// Get the detected date or use current date
const detectedDate = parquetData.detected_date || `${currentYear}-${String(currentMonth).padStart(2, '0')}`;
// Parse the detected date
const [year, month] = detectedDate.split('-').map((part: string) => parseInt(part, 10));
// Format the date for display
const monthNames = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'];
const formattedMonth = monthNames[month - 1] || 'Unknown';
const formattedYear = year || currentYear;
const formattedInfo = {
filename: metadata.filename,
url: metadata.url,
fileSize: metadata.fileSize ? `${Math.round(metadata.fileSize / 1024 / 1024 * 100) / 100} MB` : 'Unknown',
lastModified: metadata.lastModified || 'Unknown',
contentType: metadata.contentType || 'application/octet-stream',
schema: parquetData.schema,
totalRows: parquetData.totalRows,
data_as_of: detectedDate,
data_as_of_formatted: `${formattedMonth} ${formattedYear}`, // Add formatted date
viewableAt: dashboardUrl,
timestamp: new Date().toISOString()
};
return {
content: [
{
type: 'text',
text: JSON.stringify(formattedInfo, bigIntSerializer, 2),
},
],
};
} catch (error) {
// Fall back to estimation if parsing fails
try {
// Get metadata about the Parquet file
const metadata = await getParquetMetadata(url);
// Estimate the structure based on the filename
const structureInfo = estimateParquetStructure(metadata.filename);
// Estimate the dashboard URL
const dashboardUrl = estimateDashboardUrl(url);
// Format the information for display
const formattedInfo = {
filename: metadata.filename,
url: metadata.url,
fileSize: metadata.fileSize ? `${Math.round(metadata.fileSize / 1024 / 1024 * 100) / 100} MB` : 'Unknown',
lastModified: metadata.lastModified || 'Unknown',
contentType: metadata.contentType || 'application/octet-stream',
estimatedStructure: structureInfo,
viewableAt: dashboardUrl,
note: 'Could not parse the Parquet file directly. This is an estimation based on the filename and common patterns.',
timestamp: new Date().toISOString()
};
return {
content: [
{
type: 'text',
text: JSON.stringify(formattedInfo, null, 2),
},
],
};
} catch (fallbackError) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
error: 'Failed to process Parquet file',
message: error instanceof Error ? error.message : String(error),
fallbackError: fallbackError instanceof Error ? fallbackError.message : String(fallbackError),
url,
timestamp: new Date().toISOString(),
note: 'Parquet files can be viewed through their corresponding dashboards on data.gov.my',
}, null, 2),
},
],
};
}
}
}
);
// Link to dashboard for a parquet file
server.tool(
prefixToolName('find_dashboard_for_parquet'),
'Find the corresponding dashboard for a Parquet file',
{
url: z.string().url().describe('URL of the parquet file'),
},
async ({ url }) => {
try {
// Extract the filename from the URL
const filename = url.split('/').pop() || 'unknown.parquet';
// Estimate the dashboard URL
const dashboardUrl = estimateDashboardUrl(url);
return {
content: [
{
type: 'text',
text: JSON.stringify({
filename,
parquetUrl: url,
dashboardUrl,
note: 'This is the estimated dashboard where you can view the visualized data from this Parquet file.',
timestamp: new Date().toISOString()
}, bigIntSerializer, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
error: 'Failed to find dashboard',
message: error instanceof Error ? error.message : String(error),
url,
fallbackUrl: 'https://data.gov.my/dashboard',
timestamp: new Date().toISOString()
}, bigIntSerializer, 2),
},
],
};
}
}
);
}
```
--------------------------------------------------------------------------------
/src/http-server.ts:
--------------------------------------------------------------------------------
```typescript
/**
* Malaysia Open Data MCP Server - Streamable HTTP Transport
*
* This file provides an HTTP server for self-hosting the MCP server on a VPS.
* It uses the Streamable HTTP transport for MCP communication.
*
* Usage:
* npm run build
* node dist/http-server.js
*
* Or with environment variables:
* PORT=8080 node dist/http-server.js
*/
import dotenv from 'dotenv';
dotenv.config();
import express, { Request, Response } from 'express';
import cors from 'cors';
import fs from 'fs';
import path from 'path';
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
import { z } from 'zod';
// Import tool registration functions
import { registerFloodTools } from './flood.tools.js';
import { registerWeatherTools } from './weather.tools.js';
import { registerTransportTools } from './transport.tools.js';
import { registerDataCatalogueTools } from './datacatalogue.tools.js';
import { registerDosmTools } from './dosm.tools.js';
import { registerDashboardTools } from './dashboards.tools.js';
import { registerUnifiedSearchTools } from './unified-search.tools.js';
import { registerParquetTools } from './parquet.tools.js';
import { registerGtfsTools } from './gtfs.tools.js';
import { prefixToolName } from './utils/tool-naming.js';
// Import Firebase analytics
import { saveAnalyticsToFirebase, loadAnalyticsFromFirebase } from './firebase-analytics.js';
// Type definition for tool registration functions
type ToolRegistrationFn = (server: McpServer) => void;
// ============================================================================
// Analytics Tracking with File Persistence + Firebase
// ============================================================================
interface ToolCall {
tool: string;
timestamp: string;
clientIp: string;
userAgent: string;
}
interface Analytics {
serverStartTime: string;
totalRequests: number;
totalToolCalls: number;
requestsByMethod: Record<string, number>;
requestsByEndpoint: Record<string, number>;
toolCalls: Record<string, number>;
recentToolCalls: ToolCall[];
clientsByIp: Record<string, number>;
clientsByUserAgent: Record<string, number>;
hourlyRequests: Record<string, number>;
}
// Analytics file path - use /app/data for Docker volume mount
const ANALYTICS_DIR = process.env.ANALYTICS_DIR || '/app/data';
const ANALYTICS_FILE = path.join(ANALYTICS_DIR, 'analytics.json');
const MAX_RECENT_CALLS = 100;
const SAVE_INTERVAL_MS = 30000; // Save every 30 seconds
// Default analytics state
const defaultAnalytics: Analytics = {
serverStartTime: new Date().toISOString(),
totalRequests: 0,
totalToolCalls: 0,
requestsByMethod: {},
requestsByEndpoint: {},
toolCalls: {},
recentToolCalls: [],
clientsByIp: {},
clientsByUserAgent: {},
hourlyRequests: {},
};
// Load analytics from Firebase first, then fall back to file
async function loadAnalytics(): Promise<Analytics> {
// Try Firebase first
console.log('Attempting to load analytics from Firebase...');
const firebaseData = await loadAnalyticsFromFirebase();
if (firebaseData) {
return firebaseData;
}
// Fall back to local file
try {
// Ensure directory exists
if (!fs.existsSync(ANALYTICS_DIR)) {
fs.mkdirSync(ANALYTICS_DIR, { recursive: true });
}
if (fs.existsSync(ANALYTICS_FILE)) {
const data = fs.readFileSync(ANALYTICS_FILE, 'utf-8');
const loaded = JSON.parse(data) as Analytics;
console.log(`📊 Loaded analytics from ${ANALYTICS_FILE}:`, {
totalRequests: loaded.totalRequests,
totalToolCalls: loaded.totalToolCalls,
});
return loaded;
}
} catch (error) {
console.error('Failed to load analytics from file:', error);
}
console.log('Starting with fresh analytics');
return { ...defaultAnalytics };
}
// Save analytics to both file and Firebase
async function saveAnalytics(): Promise<void> {
// Save to local file (synchronous backup)
try {
// Ensure directory exists
if (!fs.existsSync(ANALYTICS_DIR)) {
fs.mkdirSync(ANALYTICS_DIR, { recursive: true });
}
fs.writeFileSync(ANALYTICS_FILE, JSON.stringify(analytics, null, 2));
// Don't log every save to reduce noise
} catch (error) {
console.error('Failed to save analytics locally:', error);
}
// Save to Firebase (async, non-blocking)
saveAnalyticsToFirebase(analytics).catch(err => {
console.error('Firebase save error:', err);
});
}
// Initialize analytics from Firebase/file
let analytics: Analytics;
// Load analytics asynchronously on startup
loadAnalytics().then(data => {
analytics = data;
console.log('✅ Analytics initialized:', {
totalRequests: analytics.totalRequests.toLocaleString(),
totalToolCalls: analytics.totalToolCalls,
});
}).catch(error => {
console.error('Failed to initialize analytics:', error);
analytics = { ...defaultAnalytics };
});
// Periodic save
setInterval(saveAnalytics, SAVE_INTERVAL_MS);
// Save on process exit
process.on('SIGTERM', () => {
console.log('Received SIGTERM, saving analytics...');
saveAnalytics();
process.exit(0);
});
process.on('SIGINT', () => {
console.log('Received SIGINT, saving analytics...');
saveAnalytics();
process.exit(0);
});
function trackRequest(req: Request, endpoint: string) {
analytics.totalRequests++;
// Track by method
const method = req.method;
analytics.requestsByMethod[method] = (analytics.requestsByMethod[method] || 0) + 1;
// Track by endpoint
analytics.requestsByEndpoint[endpoint] = (analytics.requestsByEndpoint[endpoint] || 0) + 1;
// Track by client IP
const clientIp = req.ip || req.headers['x-forwarded-for'] as string || 'unknown';
analytics.clientsByIp[clientIp] = (analytics.clientsByIp[clientIp] || 0) + 1;
// Track by user agent
const userAgent = req.headers['user-agent'] || 'unknown';
const shortAgent = userAgent.substring(0, 50);
analytics.clientsByUserAgent[shortAgent] = (analytics.clientsByUserAgent[shortAgent] || 0) + 1;
// Track hourly
const hour = new Date().toISOString().substring(0, 13); // YYYY-MM-DDTHH
analytics.hourlyRequests[hour] = (analytics.hourlyRequests[hour] || 0) + 1;
}
function trackToolCall(toolName: string, req: Request) {
analytics.totalToolCalls++;
analytics.toolCalls[toolName] = (analytics.toolCalls[toolName] || 0) + 1;
const toolCall: ToolCall = {
tool: toolName,
timestamp: new Date().toISOString(),
clientIp: req.ip || req.headers['x-forwarded-for'] as string || 'unknown',
userAgent: (req.headers['user-agent'] || 'unknown').substring(0, 50),
};
analytics.recentToolCalls.unshift(toolCall);
if (analytics.recentToolCalls.length > MAX_RECENT_CALLS) {
analytics.recentToolCalls.pop();
}
}
function getUptime(): string {
const start = new Date(analytics.serverStartTime).getTime();
const now = Date.now();
const diff = now - start;
const days = Math.floor(diff / (1000 * 60 * 60 * 24));
const hours = Math.floor((diff % (1000 * 60 * 60 * 24)) / (1000 * 60 * 60));
const minutes = Math.floor((diff % (1000 * 60 * 60)) / (1000 * 60));
if (days > 0) return `${days}d ${hours}h ${minutes}m`;
if (hours > 0) return `${hours}h ${minutes}m`;
return `${minutes}m`;
}
// Configuration
const PORT = parseInt(process.env.PORT || '8080', 10);
const HOST = process.env.HOST || '0.0.0.0';
// Default API keys from environment
const DEFAULT_GOOGLE_MAPS_API_KEY = process.env.GOOGLE_MAPS_API_KEY;
const DEFAULT_GRABMAPS_API_KEY = process.env.GRABMAPS_API_KEY;
const DEFAULT_AWS_ACCESS_KEY_ID = process.env.AWS_ACCESS_KEY_ID;
const DEFAULT_AWS_SECRET_ACCESS_KEY = process.env.AWS_SECRET_ACCESS_KEY;
const DEFAULT_AWS_REGION = process.env.AWS_REGION || 'ap-southeast-5';
/**
* Extract API keys from request query params or headers
* User-provided keys take priority over default environment keys
*/
function extractApiKeys(req: Request): void {
// Google Maps API key
const googleMapsKey = req.query.googleMapsApiKey as string ||
req.headers['x-google-maps-api-key'] as string;
if (googleMapsKey) {
process.env.GOOGLE_MAPS_API_KEY = googleMapsKey;
console.log('Using user-provided Google Maps API key');
} else if (DEFAULT_GOOGLE_MAPS_API_KEY) {
process.env.GOOGLE_MAPS_API_KEY = DEFAULT_GOOGLE_MAPS_API_KEY;
}
// GrabMaps API key
const grabMapsKey = req.query.grabMapsApiKey as string ||
req.headers['x-grabmaps-api-key'] as string;
if (grabMapsKey) {
process.env.GRABMAPS_API_KEY = grabMapsKey;
console.log('Using user-provided GrabMaps API key');
} else if (DEFAULT_GRABMAPS_API_KEY) {
process.env.GRABMAPS_API_KEY = DEFAULT_GRABMAPS_API_KEY;
}
// AWS credentials (for AWS Location Service / GrabMaps integration)
const awsAccessKeyId = req.query.awsAccessKeyId as string ||
req.headers['x-aws-access-key-id'] as string;
if (awsAccessKeyId) {
process.env.AWS_ACCESS_KEY_ID = awsAccessKeyId;
console.log('Using user-provided AWS Access Key ID');
} else if (DEFAULT_AWS_ACCESS_KEY_ID) {
process.env.AWS_ACCESS_KEY_ID = DEFAULT_AWS_ACCESS_KEY_ID;
}
const awsSecretAccessKey = req.query.awsSecretAccessKey as string ||
req.headers['x-aws-secret-access-key'] as string;
if (awsSecretAccessKey) {
process.env.AWS_SECRET_ACCESS_KEY = awsSecretAccessKey;
console.log('Using user-provided AWS Secret Access Key');
} else if (DEFAULT_AWS_SECRET_ACCESS_KEY) {
process.env.AWS_SECRET_ACCESS_KEY = DEFAULT_AWS_SECRET_ACCESS_KEY;
}
const awsRegion = req.query.awsRegion as string ||
req.headers['x-aws-region'] as string;
if (awsRegion) {
process.env.AWS_REGION = awsRegion;
console.log(`Using user-provided AWS Region: ${awsRegion}`);
} else {
process.env.AWS_REGION = DEFAULT_AWS_REGION;
}
}
// Create MCP server
const mcpServer = new McpServer({
name: 'Malaysia Open Data MCP Server',
version: '1.0.0',
});
// Register all tool sets
const toolSets: ToolRegistrationFn[] = [
registerDataCatalogueTools,
registerDosmTools,
registerWeatherTools,
registerDashboardTools,
registerUnifiedSearchTools,
registerParquetTools,
registerGtfsTools,
registerTransportTools,
registerFloodTools,
];
// Register all tools
toolSets.forEach((toolSet) => toolSet(mcpServer));
// Register hello tool for testing
mcpServer.tool(
prefixToolName('hello'),
'A simple test tool to verify that the MCP server is working correctly',
{},
async () => {
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Hello from Malaysia Open Data MCP!',
timestamp: new Date().toISOString(),
transport: 'streamable-http',
}, null, 2),
},
],
};
}
);
// Create Express app
const app = express();
// Middleware
app.use(cors({
origin: '*', // Allow all origins for MCP clients
methods: ['GET', 'POST', 'DELETE', 'OPTIONS'],
allowedHeaders: ['Content-Type', 'Accept', 'Authorization', 'Mcp-Session-Id'],
exposedHeaders: ['Mcp-Session-Id'],
}));
app.use(express.json());
// Health check endpoint
app.get('/health', (req: Request, res: Response) => {
trackRequest(req, '/health');
res.json({
status: 'healthy',
server: 'Malaysia Open Data MCP',
version: '1.0.0',
transport: 'streamable-http',
timestamp: new Date().toISOString(),
});
});
// Analytics endpoint - summary
app.get('/analytics', (req: Request, res: Response) => {
trackRequest(req, '/analytics');
// Sort tool calls by count
const sortedTools = Object.entries(analytics.toolCalls)
.sort(([, a], [, b]) => b - a)
.reduce((acc, [k, v]) => ({ ...acc, [k]: v }), {});
// Sort clients by count
const sortedClients = Object.entries(analytics.clientsByIp)
.sort(([, a], [, b]) => b - a)
.slice(0, 20)
.reduce((acc, [k, v]) => ({ ...acc, [k]: v }), {});
// Get last 24 hours of hourly data
const last24Hours = Object.entries(analytics.hourlyRequests)
.sort(([a], [b]) => b.localeCompare(a))
.slice(0, 24)
.reverse()
.reduce((acc, [k, v]) => ({ ...acc, [k]: v }), {});
res.json({
server: 'Malaysia Open Data MCP',
uptime: getUptime(),
serverStartTime: analytics.serverStartTime,
summary: {
totalRequests: analytics.totalRequests,
totalToolCalls: analytics.totalToolCalls,
uniqueClients: Object.keys(analytics.clientsByIp).length,
},
breakdown: {
byMethod: analytics.requestsByMethod,
byEndpoint: analytics.requestsByEndpoint,
byTool: sortedTools,
},
clients: {
byIp: sortedClients,
byUserAgent: analytics.clientsByUserAgent,
},
hourlyRequests: last24Hours,
recentToolCalls: analytics.recentToolCalls.slice(0, 20),
});
});
// Analytics endpoint - detailed tool stats
app.get('/analytics/tools', (req: Request, res: Response) => {
trackRequest(req, '/analytics/tools');
const sortedTools = Object.entries(analytics.toolCalls)
.sort(([, a], [, b]) => b - a)
.map(([tool, count]) => ({
tool,
count,
percentage: analytics.totalToolCalls > 0
? ((count / analytics.totalToolCalls) * 100).toFixed(1) + '%'
: '0%',
}));
res.json({
totalToolCalls: analytics.totalToolCalls,
tools: sortedTools,
recentCalls: analytics.recentToolCalls,
});
});
// Analytics endpoint - reset (protected by query param)
app.post('/analytics/reset', (req: Request, res: Response) => {
const resetKey = req.query.key;
if (resetKey !== process.env.ANALYTICS_RESET_KEY && resetKey !== 'malaysia-opendata-2024') {
res.status(403).json({ error: 'Invalid reset key' });
return;
}
analytics.totalRequests = 0;
analytics.totalToolCalls = 0;
analytics.requestsByMethod = {};
analytics.requestsByEndpoint = {};
analytics.toolCalls = {};
analytics.recentToolCalls = [];
analytics.clientsByIp = {};
analytics.clientsByUserAgent = {};
analytics.hourlyRequests = {};
analytics.serverStartTime = new Date().toISOString();
saveAnalytics();
res.json({ message: 'Analytics reset successfully', timestamp: analytics.serverStartTime });
});
// Analytics endpoint - import/restore (protected by query param)
app.post('/analytics/import', (req: Request, res: Response) => {
const importKey = req.query.key;
if (importKey !== process.env.ANALYTICS_RESET_KEY && importKey !== 'malaysia-opendata-2024') {
res.status(403).json({ error: 'Invalid import key' });
return;
}
try {
const importData = req.body;
// Merge imported data with current analytics (add to existing counts)
if (importData.totalRequests) {
analytics.totalRequests += importData.totalRequests;
}
if (importData.totalToolCalls) {
analytics.totalToolCalls += importData.totalToolCalls;
}
// Merge tool calls
if (importData.toolCalls || importData.breakdown?.byTool) {
const toolData = importData.toolCalls || importData.breakdown?.byTool || {};
for (const [tool, count] of Object.entries(toolData)) {
analytics.toolCalls[tool] = (analytics.toolCalls[tool] || 0) + (count as number);
}
}
// Merge request methods
if (importData.requestsByMethod || importData.breakdown?.byMethod) {
const methodData = importData.requestsByMethod || importData.breakdown?.byMethod || {};
for (const [method, count] of Object.entries(methodData)) {
analytics.requestsByMethod[method] = (analytics.requestsByMethod[method] || 0) + (count as number);
}
}
// Merge endpoints
if (importData.requestsByEndpoint || importData.breakdown?.byEndpoint) {
const endpointData = importData.requestsByEndpoint || importData.breakdown?.byEndpoint || {};
for (const [endpoint, count] of Object.entries(endpointData)) {
analytics.requestsByEndpoint[endpoint] = (analytics.requestsByEndpoint[endpoint] || 0) + (count as number);
}
}
// Merge hourly requests
if (importData.hourlyRequests) {
for (const [hour, count] of Object.entries(importData.hourlyRequests)) {
analytics.hourlyRequests[hour] = (analytics.hourlyRequests[hour] || 0) + (count as number);
}
}
// Merge clients by IP
if (importData.clientsByIp || importData.clients?.byIp) {
const ipData = importData.clientsByIp || importData.clients?.byIp || {};
for (const [ip, count] of Object.entries(ipData)) {
analytics.clientsByIp[ip] = (analytics.clientsByIp[ip] || 0) + (count as number);
}
}
// Merge clients by user agent
if (importData.clientsByUserAgent || importData.clients?.byUserAgent) {
const agentData = importData.clientsByUserAgent || importData.clients?.byUserAgent || {};
for (const [agent, count] of Object.entries(agentData)) {
analytics.clientsByUserAgent[agent] = (analytics.clientsByUserAgent[agent] || 0) + (count as number);
}
}
// Add recent tool calls (prepend imported ones)
if (importData.recentToolCalls) {
analytics.recentToolCalls = [...importData.recentToolCalls, ...analytics.recentToolCalls].slice(0, MAX_RECENT_CALLS);
}
saveAnalytics();
res.json({
message: 'Analytics imported successfully',
current: {
totalRequests: analytics.totalRequests,
totalToolCalls: analytics.totalToolCalls,
}
});
} catch (error) {
console.error('Failed to import analytics:', error);
res.status(400).json({ error: 'Failed to import analytics data' });
}
});
// Analytics dashboard - visual HTML page
app.get('/analytics/dashboard', (req: Request, res: Response) => {
trackRequest(req, '/analytics/dashboard');
const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Malaysia Open Data MCP - Analytics Dashboard</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
min-height: 100vh;
color: #e4e4e7;
padding: 20px;
}
.container { max-width: 1400px; margin: 0 auto; }
header {
text-align: center;
margin-bottom: 30px;
padding: 20px;
background: rgba(255,255,255,0.05);
border-radius: 16px;
backdrop-filter: blur(10px);
}
header h1 {
font-size: 2rem;
background: linear-gradient(90deg, #60a5fa, #a78bfa);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 8px;
}
header p { color: #a1a1aa; }
.stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-bottom: 30px;
}
.stat-card {
background: rgba(255,255,255,0.05);
border-radius: 12px;
padding: 24px;
text-align: center;
border: 1px solid rgba(255,255,255,0.1);
transition: transform 0.2s;
}
.stat-card:hover { transform: translateY(-4px); }
.stat-value {
font-size: 2.5rem;
font-weight: 700;
background: linear-gradient(90deg, #34d399, #60a5fa);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
.stat-label { color: #a1a1aa; margin-top: 8px; font-size: 0.9rem; }
.charts-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
gap: 20px;
margin-bottom: 30px;
}
.chart-card {
background: rgba(255,255,255,0.05);
border-radius: 12px;
padding: 24px;
border: 1px solid rgba(255,255,255,0.1);
}
.chart-card h3 {
margin-bottom: 16px;
color: #e4e4e7;
font-size: 1.1rem;
}
.chart-container { position: relative; height: 300px; }
.recent-calls {
background: rgba(255,255,255,0.05);
border-radius: 12px;
padding: 24px;
border: 1px solid rgba(255,255,255,0.1);
}
.recent-calls h3 { margin-bottom: 16px; }
.call-list { max-height: 400px; overflow-y: auto; }
.call-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 12px;
background: rgba(255,255,255,0.03);
border-radius: 8px;
margin-bottom: 8px;
}
.call-tool {
font-weight: 600;
color: #60a5fa;
font-family: monospace;
}
.call-time { color: #71717a; font-size: 0.85rem; }
.call-client { color: #a1a1aa; font-size: 0.8rem; }
.refresh-btn {
position: fixed;
bottom: 20px;
right: 20px;
background: linear-gradient(90deg, #3b82f6, #8b5cf6);
color: white;
border: none;
padding: 12px 24px;
border-radius: 50px;
cursor: pointer;
font-weight: 600;
box-shadow: 0 4px 15px rgba(59, 130, 246, 0.4);
transition: transform 0.2s;
}
.refresh-btn:hover { transform: scale(1.05); }
.uptime-badge {
display: inline-block;
background: rgba(52, 211, 153, 0.2);
color: #34d399;
padding: 4px 12px;
border-radius: 20px;
font-size: 0.85rem;
margin-top: 8px;
}
@media (max-width: 768px) {
.charts-grid { grid-template-columns: 1fr; }
.stat-value { font-size: 2rem; }
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>🇲🇾 Malaysia Open Data MCP Analytics</h1>
<p>Real-time usage statistics for the MCP server</p>
<div style="margin-top: 12px;">
<span class="uptime-badge" id="uptime">Loading...</span>
<span class="uptime-badge" style="background: rgba(251, 146, 60, 0.2); color: #fb923c; margin-left: 8px;">🔥 Firebase Connected</span>
</div>
</header>
<div class="stats-grid">
<div class="stat-card">
<div class="stat-value" id="totalRequests">-</div>
<div class="stat-label">Total Requests</div>
</div>
<div class="stat-card">
<div class="stat-value" id="totalToolCalls">-</div>
<div class="stat-label">Tool Calls</div>
</div>
<div class="stat-card">
<div class="stat-value" id="uniqueClients">-</div>
<div class="stat-label">Unique Clients</div>
</div>
<div class="stat-card">
<div class="stat-value" id="topTool">-</div>
<div class="stat-label">Top Tool</div>
</div>
</div>
<div class="charts-grid">
<div class="chart-card">
<h3>📊 Tool Usage Distribution</h3>
<div class="chart-container">
<canvas id="toolChart"></canvas>
</div>
</div>
<div class="chart-card">
<h3>📈 Hourly Requests (Last 24h)</h3>
<div class="chart-container">
<canvas id="hourlyChart"></canvas>
</div>
</div>
<div class="chart-card">
<h3>🔗 Requests by Endpoint</h3>
<div class="chart-container">
<canvas id="endpointChart"></canvas>
</div>
</div>
<div class="chart-card">
<h3>👥 Top Clients</h3>
<div class="chart-container">
<canvas id="clientChart"></canvas>
</div>
</div>
</div>
<div class="recent-calls">
<h3>🕐 Recent Tool Calls</h3>
<div class="call-list" id="recentCalls">Loading...</div>
</div>
</div>
<button class="refresh-btn" onclick="loadData()">🔄 Refresh</button>
<script>
let toolChart, hourlyChart, endpointChart, clientChart;
const chartColors = [
'#60a5fa', '#a78bfa', '#34d399', '#fbbf24', '#f87171',
'#38bdf8', '#c084fc', '#4ade80', '#facc15', '#fb923c'
];
async function loadData() {
try {
// Get base path from current URL (handles nginx reverse proxy paths like /datagovmy/)
const basePath = window.location.pathname.replace(/\\/analytics\\/dashboard\\/?$/, '');
const res = await fetch(basePath + '/analytics');
const data = await res.json();
document.getElementById('uptime').textContent = 'Uptime: ' + data.uptime;
document.getElementById('totalRequests').textContent = data.summary.totalRequests.toLocaleString();
document.getElementById('totalToolCalls').textContent = data.summary.totalToolCalls.toLocaleString();
document.getElementById('uniqueClients').textContent = data.summary.uniqueClients.toLocaleString();
const tools = Object.entries(data.breakdown.byTool);
document.getElementById('topTool').textContent = tools.length > 0 ? tools[0][0].replace('datagovmy_', '') : '-';
updateToolChart(data.breakdown.byTool);
updateHourlyChart(data.hourlyRequests);
updateEndpointChart(data.breakdown.byEndpoint);
updateClientChart(data.clients.byUserAgent);
updateRecentCalls(data.recentToolCalls);
} catch (err) {
console.error('Failed to load analytics:', err);
}
}
function updateToolChart(toolData) {
const labels = Object.keys(toolData).map(t => t.replace('datagovmy_', ''));
const values = Object.values(toolData);
if (toolChart) toolChart.destroy();
toolChart = new Chart(document.getElementById('toolChart'), {
type: 'doughnut',
data: {
labels,
datasets: [{
data: values,
backgroundColor: chartColors,
borderWidth: 0
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: {
legend: { position: 'right', labels: { color: '#a1a1aa' } }
}
}
});
}
function updateHourlyChart(hourlyData) {
const labels = Object.keys(hourlyData).map(h => h.substring(11) + ':00');
const values = Object.values(hourlyData);
if (hourlyChart) hourlyChart.destroy();
hourlyChart = new Chart(document.getElementById('hourlyChart'), {
type: 'line',
data: {
labels,
datasets: [{
label: 'Requests',
data: values,
borderColor: '#60a5fa',
backgroundColor: 'rgba(96, 165, 250, 0.1)',
fill: true,
tension: 0.4
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
x: { ticks: { color: '#71717a' }, grid: { color: 'rgba(255,255,255,0.05)' } },
y: { ticks: { color: '#71717a' }, grid: { color: 'rgba(255,255,255,0.05)' } }
}
}
});
}
function updateEndpointChart(endpointData) {
const labels = Object.keys(endpointData);
const values = Object.values(endpointData);
if (endpointChart) endpointChart.destroy();
endpointChart = new Chart(document.getElementById('endpointChart'), {
type: 'bar',
data: {
labels,
datasets: [{
data: values,
backgroundColor: chartColors,
borderRadius: 4
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
x: { ticks: { color: '#71717a' }, grid: { display: false } },
y: { ticks: { color: '#71717a' }, grid: { color: 'rgba(255,255,255,0.05)' } }
}
}
});
}
function updateClientChart(clientData) {
const entries = Object.entries(clientData).slice(0, 5);
const labels = entries.map(([k]) => k.substring(0, 30));
const values = entries.map(([, v]) => v);
if (clientChart) clientChart.destroy();
clientChart = new Chart(document.getElementById('clientChart'), {
type: 'bar',
data: {
labels,
datasets: [{
data: values,
backgroundColor: chartColors,
borderRadius: 4
}]
},
options: {
indexAxis: 'y',
responsive: true,
maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
x: { ticks: { color: '#71717a' }, grid: { color: 'rgba(255,255,255,0.05)' } },
y: { ticks: { color: '#71717a' }, grid: { display: false } }
}
}
});
}
function updateRecentCalls(calls) {
const container = document.getElementById('recentCalls');
if (calls.length === 0) {
container.innerHTML = '<p style="color: #71717a;">No tool calls yet</p>';
return;
}
container.innerHTML = calls.map(call => \`
<div class="call-item">
<div>
<span class="call-tool">\${call.tool.replace('datagovmy_', '')}</span>
<div class="call-client">\${call.userAgent}</div>
</div>
<span class="call-time">\${new Date(call.timestamp).toLocaleTimeString()}</span>
</div>
\`).join('');
}
loadData();
setInterval(loadData, 30000);
</script>
</body>
</html>
`;
res.setHeader('Content-Type', 'text/html');
res.send(html);
});
// Create Streamable HTTP transport (stateless)
const transport = new StreamableHTTPServerTransport({
sessionIdGenerator: undefined, // Stateless transport
});
// MCP endpoint - handles POST (requests), GET (SSE), DELETE (session close)
app.all('/mcp', async (req: Request, res: Response) => {
try {
// Track request
trackRequest(req, '/mcp');
// Extract API keys from query params or headers (user's keys take priority)
extractApiKeys(req);
// Track tool calls from request body
if (req.body && req.body.method === 'tools/call' && req.body.params?.name) {
trackToolCall(req.body.params.name, req);
}
// Log request info
console.log('Received MCP request:', {
method: req.method,
path: req.path,
mcpMethod: req.body?.method,
hasGoogleMapsKey: !!process.env.GOOGLE_MAPS_API_KEY,
hasGrabMapsKey: !!process.env.GRABMAPS_API_KEY,
hasAwsCredentials: !!(process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY),
});
await transport.handleRequest(req, res, req.body);
} catch (error) {
console.error('MCP request error:', error);
if (!res.headersSent) {
res.status(500).json({
jsonrpc: '2.0',
error: {
code: -32603,
message: 'Internal server error'
},
id: null,
});
}
}
});
// Root endpoint with server info
app.get('/', (req: Request, res: Response) => {
trackRequest(req, '/');
res.json({
name: 'Malaysia Open Data MCP Server',
version: '1.0.0',
description: 'MCP server for Malaysia Open Data APIs (data.gov.my, OpenDOSM, weather, transport)',
transport: 'streamable-http',
endpoints: {
mcp: '/mcp',
health: '/health',
analytics: '/analytics',
analyticsTools: '/analytics/tools',
analyticsDashboard: '/analytics/dashboard',
},
apiKeySupport: {
description: 'You can provide your own API keys via URL query params or headers',
queryParams: {
googleMapsApiKey: 'Google Maps API key for geocoding',
grabMapsApiKey: 'GrabMaps API key for Southeast Asia geocoding',
awsAccessKeyId: 'AWS Access Key ID for AWS Location Service',
awsSecretAccessKey: 'AWS Secret Access Key',
awsRegion: 'AWS Region (default: ap-southeast-5)',
},
headers: {
'X-Google-Maps-Api-Key': 'Google Maps API key',
'X-GrabMaps-Api-Key': 'GrabMaps API key',
'X-AWS-Access-Key-Id': 'AWS Access Key ID',
'X-AWS-Secret-Access-Key': 'AWS Secret Access Key',
'X-AWS-Region': 'AWS Region',
},
example: '/mcp?googleMapsApiKey=YOUR_KEY',
important: 'GrabMaps requires ALL FOUR params: grabMapsApiKey + awsAccessKeyId + awsSecretAccessKey + awsRegion. Without any one of these, GrabMaps will not work.',
},
documentation: 'https://github.com/hithereiamaliff/mcp-datagovmy',
});
});
// Connect server to transport and start listening
mcpServer.server.connect(transport)
.then(() => {
app.listen(PORT, HOST, () => {
console.log('='.repeat(60));
console.log('🇲🇾 Malaysia Open Data MCP Server (Streamable HTTP)');
console.log('='.repeat(60));
console.log(`📍 Server running on http://${HOST}:${PORT}`);
console.log(`📡 MCP endpoint: http://${HOST}:${PORT}/mcp`);
console.log(`❤️ Health check: http://${HOST}:${PORT}/health`);
console.log('='.repeat(60));
console.log('');
console.log('Test with MCP Inspector:');
console.log(` npx @modelcontextprotocol/inspector`);
console.log(` Select "Streamable HTTP" and enter: http://localhost:${PORT}/mcp`);
console.log('');
});
})
.catch((error) => {
console.error('Failed to start MCP server:', error);
process.exit(1);
});
// Graceful shutdown
process.on('SIGTERM', () => {
console.log('Received SIGTERM, shutting down gracefully...');
process.exit(0);
});
process.on('SIGINT', () => {
console.log('Received SIGINT, shutting down gracefully...');
process.exit(0);
});
```
--------------------------------------------------------------------------------
/src/catalogue.tools.ts:
--------------------------------------------------------------------------------
```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import axios from 'axios';
import {
CATALOGUE_INDEX as catalogueIndex,
CATALOGUE_FILTERS as catalogueFilters,
DatasetMetadata,
SiteCategory
} from '../scripts/catalogue-index.js';
// API Base URL for Malaysia Open Data API
const API_BASE_URL = 'https://api.data.gov.my';
// Helper functions for searching and filtering the catalogue
const getDatasetById = (id: string): DatasetMetadata | undefined => {
return catalogueIndex.find(d => d.id === id);
};
const searchDatasets = (query: string): DatasetMetadata[] => {
const lowerCaseQuery = query.toLowerCase();
return catalogueIndex.filter(d =>
d.title_en.toLowerCase().includes(lowerCaseQuery) ||
d.title_ms.toLowerCase().includes(lowerCaseQuery) ||
d.description_en.toLowerCase().includes(lowerCaseQuery) ||
d.description_ms.toLowerCase().includes(lowerCaseQuery)
);
};
const filterDatasets = (filters: any): DatasetMetadata[] => {
return catalogueIndex.filter(d => {
if (filters.frequency && d.frequency !== filters.frequency) return false;
if (filters.geography && !filters.geography.every((g: string) => d.geography.includes(g))) return false;
if (filters.demography && !filters.demography.every((dem: string) => d.demography.includes(dem))) return false;
if (filters.dataSource && !filters.dataSource.every((ds: string) => d.data_source.includes(ds))) return false;
return true;
});
};
// Data Catalogue endpoints - correct endpoint for Malaysia Open Data API
const OPENDOSM_ENDPOINT = '/opendosm';
// Legacy list of known dataset IDs (keeping for backward compatibility)
const KNOWN_DATASETS = [
{ id: 'air_pollution', description: 'Monthly Air Pollution' },
{ id: 'arc_dosm', description: 'DOSM\'s Advance Release Calendar' },
{ id: 'arrivals', description: 'Monthly Arrivals by Nationality & Sex' },
{ id: 'arrivals_soe', description: 'Monthly Arrivals by State of Entry, Nationality & Sex' },
{ id: 'births', description: 'Daily Live Births' },
{ id: 'births_annual', description: 'Annual Live Births' },
{ id: 'births_annual_sex_ethnic', description: 'Annual Live Births by Sex & Ethnicity' },
{ id: 'births_annual_sex_ethnic_state', description: 'Annual Live Births by State, Sex, & Ethnicity' },
{ id: 'births_annual_state', description: 'Annual Live Births by State' },
{ id: 'births_district_sex', description: 'Annual Live Births by District & Sex' },
{ id: 'blood_donations', description: 'Daily Blood Donations by Blood Group' },
{ id: 'blood_donations_state', description: 'Daily Blood Donations by Blood Group & State' },
{ id: 'bop_balance', description: 'Balance of Key BOP Components' },
{ id: 'cellular_subscribers', description: 'Cellular Subscribers by Plan Type' },
{ id: 'completion_school_state', description: 'School Completion Rates by State' },
{ id: 'cosmetic_notifications', description: 'Notified Cosmetic Products' },
{ id: 'cosmetic_notifications_cancelled', description: 'Cancelled Cosmetic Product Notifications' },
{ id: 'cosmetics_manufacturers', description: 'Approved Manufacturers of Cosmetic Products' },
{ id: 'covid_cases', description: 'Daily COVID-19 Cases by State' },
{ id: 'covid_cases_age', description: 'Daily COVID-19 Cases by Age Group & State' },
{ id: 'covid_cases_vaxstatus', description: 'Daily COVID-19 Cases by Vaccination Status & State' },
{ id: 'covid_deaths_linelist', description: 'Transactional Records: Deaths due to COVID-19' },
{ id: 'cpi_3d', description: 'Monthly CPI by Group (3-digit)' },
{ id: 'cpi_4d', description: 'Monthly CPI by Class (4-digit)' },
{ id: 'cpi_5d', description: 'Monthly CPI by Subclass (5-digit)' },
{ id: 'cpi_annual', description: 'Annual CPI by Division (2-digit)' },
{ id: 'cpi_annual_inflation', description: 'Annual CPI Inflation by Division (2-digit)' },
{ id: 'cpi_core', description: 'Monthly Core CPI by Division (2-digit)' },
{ id: 'cpi_core_inflation', description: 'Monthly Core CPI Inflation by Division (2-digit)' },
{ id: 'cpi_headline', description: 'Monthly CPI by Division (2-digit)' },
{ id: 'cpi_headline_inflation', description: 'Monthly CPI Inflation by Division (2-digit)' },
{ id: 'cpi_lowincome', description: 'Monthly CPI for Low-Income Households' },
{ id: 'cpi_state', description: 'Monthly CPI by State & Division (2-digit)' },
{ id: 'cpi_state_inflation', description: 'Monthly CPI Inflation by State & Division (2-digit)' },
{ id: 'cpi_strata', description: 'Monthly CPI by Strata & Division (2-digit)' },
{ id: 'crime_district', description: 'Crimes by District & Crime Type' },
{ id: 'crops_district_area', description: 'Crop Area by District' },
{ id: 'crops_district_production', description: 'Crop Production by District' },
{ id: 'crops_state', description: 'Crop Area and Production by State' },
{ id: 'currency_in_circulation', description: 'Monthly Currency in Circulation' },
{ id: 'currency_in_circulation_annual', description: 'Annual Currency in Circulation' },
{ id: 'deaths', description: 'Annual Deaths' },
{ id: 'deaths_district_sex', description: 'Annual Deaths by District & Sex' },
{ id: 'deaths_early_childhood', description: 'Annual Early Childhood Deaths' },
{ id: 'deaths_early_childhood_sex', description: 'Annual Early Childhood Deaths by Sex' },
{ id: 'deaths_early_childhood_state', description: 'Annual Early Childhood Deaths by State' },
{ id: 'deaths_early_childhood_state_sex', description: 'Annual Early Childhood Deaths by State & Sex' },
{ id: 'deaths_maternal', description: 'Annual Maternal Deaths' },
{ id: 'deaths_maternal_state', description: 'Annual Maternal Deaths by State' },
{ id: 'deaths_sex_ethnic', description: 'Annual Deaths by Sex & Ethnicity' },
{ id: 'deaths_sex_ethnic_state', description: 'Annual Deaths by State, Sex, & Ethnicity' },
{ id: 'deaths_state', description: 'Annual Deaths by State' },
{ id: 'domains', description: 'Number of Registered .MY Domains' },
{ id: 'domains_dnssec', description: 'Number of Registered .MY Domains with DNSSEC' },
{ id: 'domains_idn', description: 'Number of Registered Internationalised .MY Domains' },
{ id: 'domains_ipv6', description: 'Number of Registered .MY Domains with IPv6 DNS' },
{ id: 'economic_indicators', description: 'Malaysian Economic Indicators' },
{ id: 'electricity_access', description: 'Households with Access to Electricity' },
{ id: 'electricity_consumption', description: 'Monthly Electricity Consumption' },
{ id: 'electricity_supply', description: 'Electricity Supply' },
{ id: 'employment_sector', description: 'Employment by MSIC Sector and Sex' },
{ id: 'enrolment_school_district', description: 'Enrolment in Government Schools by District' },
{ id: 'exchangerates', description: 'Exchange Rates' },
{ id: 'fdi_flows', description: 'Foreign Direct Investment (FDI) Flows' },
{ id: 'federal_budget_moe', description: 'Annual Budget Allocation for the Ministry of Education' },
{ id: 'federal_budget_moh', description: 'Annual Budget Allocation for the Ministry of Health' },
{ id: 'federal_finance_qtr', description: 'Quarterly Federal Government Finance' },
{ id: 'federal_finance_qtr_de', description: 'Quarterly Federal Government Development Expenditure by Function' },
{ id: 'federal_finance_qtr_oe', description: 'Quarterly Federal Government Operating Expenditure by Object' },
{ id: 'federal_finance_qtr_revenue', description: 'Quarterly Federal Government Revenue' },
{ id: 'federal_finance_year', description: 'Annual Federal Government Finance' },
{ id: 'federal_finance_year_de', description: 'Annual Federal Government Development Expenditure by Function' },
{ id: 'federal_finance_year_oe', description: 'Annual Federal Government Operating Expenditure by Object' },
{ id: 'federal_finance_year_revenue', description: 'Annual Federal Government Revenue' },
{ id: 'fertility', description: 'TFR and ASFR' },
{ id: 'fertility_state', description: 'TFR and ASFR by State' },
{ id: 'fish_landings', description: 'Monthly Landings of Marine Fish by State' },
{ id: 'forest_reserve', description: 'Area of Permanent Forest Reserves' },
{ id: 'forest_reserve_state', description: 'Area of Permanent Forest Reserves by State' },
{ id: 'fuelprice', description: 'Price of Petroleum & Diesel' },
{ id: 'gdp_annual_nominal_demand', description: 'Annual Nominal GDP by Expenditure Type' },
{ id: 'gdp_annual_nominal_demand_granular', description: 'Annual Nominal GDP by Expenditure Subtype' },
{ id: 'gdp_annual_nominal_income', description: 'Annual Nominal GDP by Income Component' },
{ id: 'gdp_annual_nominal_supply', description: 'Annual Nominal GDP by Economic Sector' },
{ id: 'gdp_annual_nominal_supply_granular', description: 'Annual Nominal GDP by Economic Subsector' },
{ id: 'gdp_annual_real_demand', description: 'Annual Real GDP by Expenditure Type' },
{ id: 'gdp_annual_real_demand_granular', description: 'Annual Real GDP by Expenditure Subtype' },
{ id: 'gdp_annual_real_supply', description: 'Annual Real GDP by Economic Sector' },
{ id: 'gdp_annual_real_supply_granular', description: 'Annual Real GDP by Economic Subsector' },
{ id: 'gdp_district_real_supply', description: 'Annual Real GDP by District & Economic Sector' },
{ id: 'gdp_gni_annual_nominal', description: 'Annual Nominal GDP & GNI: 1947 to Present' },
{ id: 'gdp_gni_annual_real', description: 'Annual Real GDP & GNI: 1970 to Present' },
{ id: 'gdp_lookup', description: 'Lookup Table: GDP' },
{ id: 'gdp_qtr_nominal', description: 'Quarterly Nominal GDP' },
{ id: 'gdp_qtr_nominal_demand', description: 'Quarterly Nominal GDP by Expenditure Type' },
{ id: 'gdp_qtr_nominal_demand_granular', description: 'Quarterly Nominal GDP by Expenditure Subtype' },
{ id: 'gdp_qtr_nominal_supply', description: 'Quarterly Nominal GDP by Economic Sector' },
{ id: 'gdp_qtr_nominal_supply_granular', description: 'Quarterly Nominal GDP by Economic Subsector' },
{ id: 'gdp_qtr_real', description: 'Quarterly Real GDP' },
{ id: 'gdp_qtr_real_demand', description: 'Quarterly Real GDP by Expenditure Type' },
{ id: 'gdp_qtr_real_demand_granular', description: 'Quarterly Real GDP by Expenditure Subtype' },
{ id: 'gdp_qtr_real_sa', description: 'Quarterly Real GDP (Seasonally Adjusted)' },
{ id: 'gdp_qtr_real_sa_demand', description: 'Quarterly Real GDP (Seasonally Adjusted) by Expenditure Type' },
{ id: 'gdp_qtr_real_sa_supply', description: 'Quarterly Real GDP (Seasonally Adjusted) by Economic Sector' },
{ id: 'gdp_qtr_real_supply', description: 'Quarterly Real GDP by Economic Sector' },
{ id: 'gdp_qtr_real_supply_granular', description: 'Quarterly Real GDP by Economic Subsector' },
{ id: 'gdp_state_real_supply', description: 'Annual Real GDP by State & Economic Sector' },
{ id: 'ghg_emissions', description: 'Greenhouse Gas Emissions' },
{ id: 'healthcare_staff', description: 'Healthcare Staff by State and Staff Type' },
{ id: 'hh_access_amenities', description: 'Access to Basic Amenities by State & District' },
{ id: 'hh_income', description: 'Household Income' },
{ id: 'hh_income_district', description: 'Household Income by Administrative District' },
{ id: 'hh_income_state', description: 'Household Income by State' },
{ id: 'hh_inequality', description: 'Income Inequality' },
{ id: 'hh_inequality_district', description: 'Income Inequality by District' },
{ id: 'hh_inequality_state', description: 'Income Inequality by State' },
{ id: 'hh_poverty', description: 'Poverty' },
{ id: 'hh_poverty_district', description: 'Poverty by Administrative District' },
{ id: 'hh_poverty_state', description: 'Poverty by State' },
{ id: 'hh_profile', description: 'Number of Households and Living Quarters' },
{ id: 'hh_profile_state', description: 'Number of Households and Living Quarters by State' },
{ id: 'hies_district', description: 'Household Income and Expenditure: Administrative Districts' },
{ id: 'hies_malaysia_percentile', description: 'Household Income by Percentile' },
{ id: 'hies_state', description: 'Household Income and Expenditure: States' },
{ id: 'hies_state_percentile', description: 'Household Income by State & Percentile' },
{ id: 'hospital_beds', description: 'Hospital Beds by State and Hospital Type' },
{ id: 'infant_immunisation', description: 'Infant Immunisation Coverage' },
{ id: 'interestrates', description: 'Monthly Interest Rates' },
{ id: 'interestrates_annual', description: 'Annual Interest Rates' },
{ id: 'iowrt', description: 'Headline Wholesale & Retail Trade' },
{ id: 'iowrt_2d', description: 'Wholesale & Retail Trade by Division (2 digit)' },
{ id: 'iowrt_3d', description: 'Wholesale & Retail Trade by Group (3 digit)' },
{ id: 'ipi', description: 'Industrial Production Index (IPI)' },
{ id: 'ipi_1d', description: 'IPI by Section (1 digit)' },
{ id: 'ipi_2d', description: 'IPI by Division (2 digit)' },
{ id: 'ipi_3d', description: 'IPI by Group (3 digit)' },
{ id: 'ipi_5d', description: 'IPI by Item (5 digit)' },
{ id: 'ipi_domestic', description: 'IPI for Domestic-Oriented Divisions (2 digit)' },
{ id: 'ipi_export', description: 'IPI for Export-Oriented Divisions (2 digit)' },
{ id: 'lecturers_uni', description: 'Lecturers in Public Universities by Citizenship & Sex' },
{ id: 'lfs_district', description: 'Annual Principal Labour Force Statistics by District' },
{ id: 'lfs_month', description: 'Monthly Principal Labour Force Statistics' },
{ id: 'lfs_month_duration', description: 'Monthly Unemployment by Duration' },
{ id: 'lfs_month_sa', description: 'Monthly Principal Labour Force Statistics, Seasonally Adjusted' },
{ id: 'lfs_month_status', description: 'Monthly Employment by Status in Employment' },
{ id: 'lfs_month_youth', description: 'Monthly Youth Unemployment' },
{ id: 'lfs_qtr', description: 'Quarterly Principal Labour Force Statistics' },
{ id: 'lfs_qtr_sru_age', description: 'Quarterly Skills-Related Underemployment by Age' },
{ id: 'lfs_qtr_sru_sex', description: 'Quarterly Skills-Related Underemployment by Sex' },
{ id: 'lfs_qtr_state', description: 'Quarterly Principal Labour Force Statistics by State' },
{ id: 'lfs_qtr_tru_age', description: 'Quarterly Time-Related Underemployment by Age' },
{ id: 'lfs_qtr_tru_sex', description: 'Quarterly Time-Related Underemployment by Sex' },
{ id: 'lfs_state_sex', description: 'Annual Principal Labour Force Statistics by State & Sex' },
{ id: 'lfs_year', description: 'Annual Principal Labour Force Statistics' },
{ id: 'lfs_year_sex', description: 'Annual Principal Labour Force Statistics by Sex' },
{ id: 'local_authority_sex', description: 'Female Representation in Local Authorities' },
{ id: 'lookup_federal_finance', description: 'Lookup Table: Federal Finance' },
{ id: 'lookup_item', description: 'PriceCatcher: Item Lookup' },
{ id: 'lookup_money_banking', description: 'Lookup Table: Money & Banking' },
{ id: 'lookup_premise', description: 'PriceCatcher: Premise Lookup' },
{ id: 'lookup_state', description: 'PriceCatcher: State Lookup' },
{ id: 'mpr', description: 'Monetary Policy Rate' },
{ id: 'msic_lookup', description: 'Lookup Table: MSIC' },
{ id: 'pe_bop', description: 'Balance of Payments' },
{ id: 'pe_bop_qtr', description: 'Quarterly Balance of Payments' },
{ id: 'pe_iip', description: 'International Investment Position' },
{ id: 'pe_iip_qtr', description: 'Quarterly International Investment Position' },
{ id: 'pe_reserves', description: 'International Reserves' },
{ id: 'pms_state', description: 'Manufacturing Statistics by State' },
{ id: 'pms_subsector', description: 'Manufacturing Statistics by Subsector' },
{ id: 'population_age', description: 'Population by Age Group, Sex and Ethnicity' },
{ id: 'population_district', description: 'Population by District, Sex and Ethnicity' },
{ id: 'population_state', description: 'Population by State, Sex and Ethnicity' },
{ id: 'pricecatcher', description: 'PriceCatcher: Daily Prices' },
{ id: 'producer_price_index', description: 'Producer Price Index' },
{ id: 'producer_price_index_1d', description: 'Producer Price Index by Section (1 digit)' },
{ id: 'producer_price_index_2d', description: 'Producer Price Index by Division (2 digit)' },
{ id: 'producer_price_index_3d', description: 'Producer Price Index by Group (3 digit)' },
{ id: 'property_commercial_all', description: 'Commercial Property Transactions' },
{ id: 'property_commercial_state', description: 'Commercial Property Transactions by State' },
{ id: 'property_residences_all', description: 'Residential Property Transactions' },
{ id: 'property_residences_state', description: 'Residential Property Transactions by State' },
{ id: 'public_expenditure', description: 'Federal Government Expenditure' },
{ id: 'public_finance_snapshot', description: 'Snapshot of Public Finance' },
{ id: 'public_revenue', description: 'Federal Government Revenue' },
{ id: 'school_enrolment_nat', description: 'National School Enrolment by Type of School' },
{ id: 'school_enrolment_state', description: 'School Enrolment by State and Type of School' },
{ id: 'services_producer_price_index', description: 'Services Producer Price Index' },
{ id: 'services_producer_price_index_2d', description: 'Services Producer Price Index by Division (2 digit)' },
{ id: 'services_producer_price_index_3d', description: 'Services Producer Price Index by Group (3 digit)' },
{ id: 'social_security', description: 'Social Security Protection' },
{ id: 'student_enrolment_higher', description: 'Student Enrolment in Higher Education' },
{ id: 'student_enrolment_tvt', description: 'Student Enrolment in TVET' },
{ id: 'student_enrolment_uni', description: 'Student Enrolment in Public Universities' },
{ id: 'tourism_inbound_asean', description: 'Inbound Tourists by Country (ASEAN)' },
{ id: 'tourism_inbound_east_asia', description: 'Inbound Tourists by Country (East Asia)' },
{ id: 'tourism_inbound_europe', description: 'Inbound Tourists by Country (Europe)' },
{ id: 'tourism_inbound_long_haul', description: 'Inbound Tourists by Country (Long Haul)' },
{ id: 'tourism_inbound_monthly', description: 'Monthly Inbound Tourists' },
{ id: 'tourism_inbound_purpose', description: 'Inbound Tourists by Purpose of Visit' },
{ id: 'tourism_inbound_south_asia', description: 'Inbound Tourists by Country (South Asia)' },
{ id: 'tourism_inbound_total', description: 'Total Inbound Tourists' },
{ id: 'trade_balance', description: 'Balance of Trade' },
{ id: 'trade_balance_1d', description: 'Balance of Trade by Section (1 digit)' },
{ id: 'trade_balance_2d', description: 'Balance of Trade by Division (2 digit)' },
{ id: 'trade_country', description: 'Trade by Country' },
{ id: 'trade_export_1d', description: 'Exports by Section (1 digit)' },
{ id: 'trade_export_2d', description: 'Exports by Division (2 digit)' },
{ id: 'trade_export_3d', description: 'Exports by Group (3 digit)' },
{ id: 'trade_export_5d', description: 'Exports by Item (5 digit)' },
{ id: 'trade_import_1d', description: 'Imports by Section (1 digit)' },
{ id: 'trade_import_2d', description: 'Imports by Division (2 digit)' },
{ id: 'trade_import_3d', description: 'Imports by Group (3 digit)' },
{ id: 'trade_import_5d', description: 'Imports by Item (5 digit)' },
{ id: 'unemployment_rate', description: 'Unemployment Rate' },
{ id: 'unemployment_rate_sa', description: 'Unemployment Rate, Seasonally Adjusted' },
{ id: 'water_supply_area', description: 'Water Supply Coverage by Area' },
{ id: 'water_supply_state', description: 'Water Supply Coverage by State' }
];
import { prefixToolName } from './utils/tool-naming.js';
export function registerDataCatalogueTools(server: McpServer) {
// List all datasets with rich metadata
server.tool(
prefixToolName('list_datasets_catalogue'),
'Lists all datasets from the comprehensive catalogue with rich metadata',
{
limit: z.number().min(1).max(100).optional().describe('Number of results to return (1-100)'),
offset: z.number().min(0).optional().describe('Number of records to skip for pagination'),
},
async ({ limit = 20, offset = 0 }) => {
const paginatedDatasets = catalogueIndex.slice(offset, offset + limit);
const total = catalogueIndex.length;
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Datasets retrieved from comprehensive catalogue',
total_datasets: total,
showing: `${offset + 1}-${Math.min(offset + limit, total)} of ${total}`,
pagination: {
limit,
offset,
next_offset: offset + limit < total ? offset + limit : null,
previous_offset: offset > 0 ? Math.max(0, offset - limit) : null,
},
datasets: paginatedDatasets,
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
);
// Search datasets by query
server.tool(
prefixToolName('search_datasets'),
'Search datasets by keywords across titles, descriptions and metadata',
{
query: z.string().describe('Search query to match against dataset metadata'),
limit: z.number().min(1).max(100).optional().describe('Number of results to return (1-100)'),
},
async ({ query, limit = 20 }) => {
const searchResults = searchDatasets(query);
const limitedResults = searchResults.slice(0, limit);
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Search results for datasets',
query,
total_matches: searchResults.length,
showing: Math.min(limit, searchResults.length),
datasets: limitedResults,
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
);
// Filter datasets by various criteria
server.tool(
prefixToolName('filter_datasets'),
'Filter datasets by category, geography, frequency, demography, data source or year range',
{
category: z.string().optional().describe('Category or subcategory to filter by'),
geography: z.string().optional().describe('Geographic coverage to filter by (e.g., NATIONAL, STATE, DISTRICT)'),
frequency: z.string().optional().describe('Data frequency to filter by (e.g., DAILY, MONTHLY, YEARLY)'),
demography: z.string().optional().describe('Demographic dimension to filter by (e.g., SEX, AGE)'),
dataSource: z.string().optional().describe('Data source agency to filter by (e.g., DOSM, BNM)'),
startYear: z.number().optional().describe('Start year for filtering datasets by time coverage'),
endYear: z.number().optional().describe('End year for filtering datasets by time coverage'),
limit: z.number().min(1).max(100).optional().describe('Number of results to return (1-100)'),
},
async ({ category, geography, frequency, demography, dataSource, startYear, endYear, limit = 20 }) => {
const yearRange = startYear && endYear ? [startYear, endYear] as [number, number] : undefined;
const filteredDatasets = filterDatasets({
category,
geography,
frequency,
demography,
dataSource,
yearRange
});
const limitedResults = filteredDatasets.slice(0, limit);
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Filtered datasets',
filters: {
category,
geography,
frequency,
demography,
dataSource,
year_range: yearRange ? `${yearRange[0]}-${yearRange[1]}` : undefined
},
total_matches: filteredDatasets.length,
showing: Math.min(limit, filteredDatasets.length),
datasets: limitedResults,
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
);
// Get dataset details by ID from catalogue
server.tool(
prefixToolName('get_dataset_metadata'),
'Get comprehensive metadata for a dataset by ID from the local catalogue',
{
id: z.string().describe('ID of the dataset to retrieve metadata for'),
},
async ({ id }) => {
const dataset = getDatasetById(id);
if (!dataset) {
// Try to find similar datasets for suggestion
const similarDatasets = catalogueIndex
.filter((d: DatasetMetadata) => d.id.includes(id) || id.includes(d.id))
.map((d: DatasetMetadata) => ({ id: d.id, title_en: d.title_en }))
.slice(0, 5);
return {
content: [
{
type: 'text',
text: JSON.stringify({
error: `Dataset with ID "${id}" not found in the catalogue`,
suggested_datasets: similarDatasets.length > 0 ? similarDatasets : undefined,
total_datasets_available: catalogueIndex.length,
note: 'Use list_datasets_catalogue to see all available datasets',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Dataset metadata retrieved successfully',
dataset,
download_links: {
parquet: dataset.link_parquet || null,
csv: dataset.link_csv || null,
preview: dataset.link_preview || null
},
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
);
// Get available filter values
server.tool(
prefixToolName('get_catalogue_filters'),
'Get all available filter values for searching and filtering datasets',
{},
async () => {
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Available filter values for the dataset catalogue',
filters: catalogueFilters,
total_datasets: catalogueIndex.length,
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
);
// Legacy tool - List known dataset IDs (keeping for backward compatibility)
server.tool(
prefixToolName('list_known_datasets'),
'Lists known dataset IDs that can be used with the OpenDOSM API',
{},
async () => {
// Convert our rich catalogue to the simple format for backward compatibility
const simpleDatasets = catalogueIndex.map((dataset: DatasetMetadata) => ({
id: dataset.id,
description: dataset.title_en
}));
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Available dataset IDs for OpenDOSM API',
datasets: simpleDatasets,
note: 'Use these dataset IDs with the get_dataset_details tool, or try the new get_dataset_metadata tool for richer information',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
);
// List datasets
server.tool(
prefixToolName('list_datasets'),
'Lists available datasets in the Malaysia Open Data catalogue',
{
id: z.string().optional().describe('Dataset ID to retrieve (e.g., "cpi_core")'),
limit: z.number().min(1).max(100).optional().describe('Number of results to return (1-100)'),
meta: z.boolean().optional().describe('Whether to return metadata about available datasets'),
},
async ({ id, limit = 10, meta = false }) => {
try {
// If no dataset ID is provided, return the list of known datasets instead
if (!id) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'The OpenDOSM API requires a specific dataset ID',
note: 'Please use one of the following dataset IDs:',
available_datasets: KNOWN_DATASETS,
example_usage: 'Use list_datasets with id="cpi_core" or get_dataset_details with id="cpi_core"',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
const url = `${API_BASE_URL}${OPENDOSM_ENDPOINT}`;
const params: Record<string, any> = { id };
// Add additional parameters if provided
if (limit) params.limit = limit;
if (meta) params.meta = 1;
// Setup request headers
const headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
};
const response = await axios.get(url, { params, headers });
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Datasets retrieved successfully',
dataset_id: id,
params: params,
endpoint: url,
datasets: response.data,
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
} catch (error) {
console.error('Error fetching datasets:', error);
// Check if this might be due to an invalid dataset ID
const knownIds = KNOWN_DATASETS.map(dataset => dataset.id);
const suggestedDatasets = id ?
KNOWN_DATASETS.filter(dataset => dataset.id.includes(id.toLowerCase()) ||
dataset.description.toLowerCase().includes(id.toLowerCase())) :
[];
return {
content: [
{
type: 'text',
text: JSON.stringify({
error: 'Failed to fetch datasets',
message: error instanceof Error ? error.message : 'Unknown error',
status: axios.isAxiosError(error) ? error.response?.status : undefined,
possible_issue: id && !knownIds.includes(id) ? `Dataset ID "${id}" may not be valid` : undefined,
suggested_datasets: suggestedDatasets.length > 0 ? suggestedDatasets : undefined,
available_datasets: 'Use list_known_datasets tool to see all available dataset IDs',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
}
);
// Get dataset details
server.tool(
prefixToolName('get_dataset_details'),
'Gets detailed information about a specific dataset',
{
id: z.string().describe('ID of the dataset to retrieve (e.g., "cpi_core")'),
limit: z.number().min(1).optional().describe('Maximum number of records to return'),
offset: z.number().min(0).optional().describe('Number of records to skip for pagination'),
},
async ({ id, limit = 10, offset }) => {
try {
// Validate if the dataset ID is known
const knownIds = KNOWN_DATASETS.map(dataset => dataset.id);
if (!knownIds.includes(id)) {
const suggestedDatasets = KNOWN_DATASETS.filter(dataset =>
dataset.id.includes(id.toLowerCase()) ||
dataset.description.toLowerCase().includes(id.toLowerCase())
);
return {
content: [
{
type: 'text',
text: JSON.stringify({
warning: `Dataset ID "${id}" may not be valid`,
suggested_datasets: suggestedDatasets.length > 0 ? suggestedDatasets : undefined,
available_datasets: KNOWN_DATASETS,
note: 'The dataset ID you provided is not in our known list, but we will try to fetch it anyway.',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
const url = `${API_BASE_URL}${OPENDOSM_ENDPOINT}`;
const params: Record<string, any> = { id };
// Add optional parameters if provided
if (limit) params.limit = limit;
if (offset !== undefined) params.offset = offset;
// Setup request headers
const headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
};
const response = await axios.get(url, { params, headers });
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Dataset details retrieved successfully',
dataset_id: id,
endpoint: url,
details: response.data,
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
} catch (error) {
console.error('Error fetching dataset details:', error);
// Check if this might be due to an invalid dataset ID
const knownIds = KNOWN_DATASETS.map(dataset => dataset.id);
const suggestedDatasets = KNOWN_DATASETS.filter(dataset =>
dataset.id.includes(id.toLowerCase()) ||
dataset.description.toLowerCase().includes(id.toLowerCase())
);
return {
content: [
{
type: 'text',
text: JSON.stringify({
error: 'Failed to fetch dataset details',
dataset_id: id,
message: error instanceof Error ? error.message : 'Unknown error',
status: axios.isAxiosError(error) ? error.response?.status : undefined,
possible_issue: !knownIds.includes(id) ? `Dataset ID "${id}" may not be valid` : undefined,
suggested_datasets: suggestedDatasets.length > 0 ? suggestedDatasets : undefined,
available_datasets: 'Use list_known_datasets tool to see all available dataset IDs',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
}
);
// List dataset categories from our comprehensive filters
server.tool(
prefixToolName('list_dataset_categories'),
'Lists all available dataset categories from the catalogue',
{},
async () => {
try {
// Group datasets by category
const categoryCounts: Record<string, number> = {};
// Count datasets in each category
catalogueIndex.forEach((dataset: DatasetMetadata) => {
dataset.site_category.forEach((cat: SiteCategory) => {
const category = cat.category_en;
categoryCounts[category] = (categoryCounts[category] || 0) + 1;
});
});
// Format as array of objects with category and count
const categoriesWithCounts = Object.entries(categoryCounts).map(([category, count]) => ({
category,
dataset_count: count
})).sort((a, b) => a.category.localeCompare(b.category));
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Dataset categories available',
categories: categoriesWithCounts,
total_categories: categoriesWithCounts.length,
note: 'For specific datasets, use the filter_datasets tool with the category parameter',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
} catch (error) {
console.error('Error generating dataset categories:', error);
return {
content: [
{
type: 'text',
text: JSON.stringify({
error: 'Failed to generate dataset categories',
message: error instanceof Error ? error.message : 'Unknown error',
alternative: 'Please use list_datasets_catalogue to see available datasets',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
}
);
// List dataset agencies from our comprehensive filters
server.tool(
prefixToolName('list_dataset_agencies'),
'Lists all agencies (data sources) providing datasets',
{},
async () => {
try {
// Count datasets from each data source
const agencyCounts: Record<string, number> = {};
catalogueIndex.forEach((dataset: DatasetMetadata) => {
dataset.data_source.forEach((source: string) => {
agencyCounts[source] = (agencyCounts[source] || 0) + 1;
});
});
// Format as array of objects with agency and count
const agenciesWithCounts = Object.entries(agencyCounts).map(([agency, count]) => ({
agency,
dataset_count: count
})).sort((a, b) => b.dataset_count - a.dataset_count);
return {
content: [
{
type: 'text',
text: JSON.stringify({
message: 'Dataset agencies available',
agencies: agenciesWithCounts,
total_agencies: agenciesWithCounts.length,
note: 'For specific datasets, use the filter_datasets tool with the dataSource parameter',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
} catch (error) {
console.error('Error generating dataset agencies:', error);
return {
content: [
{
type: 'text',
text: JSON.stringify({
error: 'Failed to generate dataset agencies',
message: error instanceof Error ? error.message : 'Unknown error',
alternative: 'Please use list_datasets_catalogue to see available datasets',
timestamp: new Date().toISOString()
}, null, 2),
},
],
};
}
}
);
// The following tools are kept for backward compatibility with the OpenDOSM API
// They make direct API calls rather than using our local catalogue
}
```
--------------------------------------------------------------------------------
/src/gtfs.tools.ts:
--------------------------------------------------------------------------------
```typescript
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import axios from 'axios';
import * as GtfsRealtimeBindings from 'gtfs-realtime-bindings';
import JSZip from 'jszip';
import csvParser from 'csv-parser';
import { Readable } from 'stream';
import { prefixToolName } from './utils/tool-naming.js';
import { LocationClient, SearchPlaceIndexForTextCommand } from '@aws-sdk/client-location';
// API Base URL for Malaysia Open Data API
const API_BASE_URL = 'https://api.data.gov.my';
// GTFS endpoints
const GTFS_STATIC_ENDPOINT = '/gtfs-static';
const GTFS_REALTIME_ENDPOINT = '/gtfs-realtime/vehicle-position';
const GTFS_TRIP_UPDATES_ENDPOINT = '/gtfs-realtime/trip-update';
// Real-time data availability note
const REALTIME_DATA_NOTE = "Real-time data access through this MCP is limited. For up-to-date train and bus schedules, bus locations, and arrivals in real-time, please use these apps: Google Maps (Penang, Kuala Lumpur, Selangor, Putrajaya, Kuantan, Johor Bahru), MyRapid PULSE (Penang, Kuala Lumpur, Selangor, Putrajaya, Kuantan), Moovit (Penang, Kuala Lumpur, Selangor, Putrajaya, Kuantan, Johor Bahru), or Lugo (Johor Bahru).";
// Error note for 404 errors
const ERROR_404_NOTE = "If you're getting a 404 error, please check that the provider and category are correct. For Prasarana, a valid category is required.";
// Combined note for error responses
const COMBINED_ERROR_NOTE = `${ERROR_404_NOTE} ${REALTIME_DATA_NOTE}`;
// Geocoding APIs
const GOOGLE_MAPS_GEOCODING_API = 'https://maps.googleapis.com/maps/api/geocode/json';
const NOMINATIM_API = 'https://nominatim.openstreetmap.org/search';
// Google Maps API Key from environment variable
// We'll determine this dynamically in the geocodeLocation function to ensure
// it picks up any changes made after server initialization
let googleMapsApiKeyLastChecked = 0;
let cachedGoogleMapsApiKey = '';
function getGoogleMapsApiKey(): string {
// Only check once per minute to avoid excessive environment variable lookups
const now = Date.now();
if (now - googleMapsApiKeyLastChecked > 60000) {
cachedGoogleMapsApiKey = process.env.GOOGLE_MAPS_API_KEY || '';
googleMapsApiKeyLastChecked = now;
if (!cachedGoogleMapsApiKey) {
console.log('No Google Maps API key found. Using Nominatim API for geocoding as fallback.');
} else {
console.log('Using Google Maps API for geocoding.');
}
}
return cachedGoogleMapsApiKey;
}
// Valid providers and categories
const VALID_PROVIDERS = ['mybas-johor', 'ktmb', 'prasarana'];
// Valid categories for Prasarana
const PRASARANA_CATEGORIES = [
'rapid-bus-penang',
'rapid-bus-kuantan',
'rapid-bus-mrtfeeder',
'rapid-rail-kl',
'rapid-bus-kl'
];
// Common name mappings to help with user queries
const PROVIDER_MAPPINGS: Record<string, { provider: string; category?: string }> = {
// Direct provider mappings
'mybas': { provider: 'mybas-johor' },
'mybas johor': { provider: 'mybas-johor' },
'mybas johor bahru': { provider: 'mybas-johor' },
'ktmb': { provider: 'ktmb' },
'ktm': { provider: 'ktmb' },
'keretapi tanah melayu': { provider: 'ktmb' },
'keretapi tanah melayu berhad': { provider: 'ktmb' },
'prasarana': { provider: 'prasarana', category: 'rapid-rail-kl' },
// Prasarana services (mapped to provider + category)
'rapid rail': { provider: 'prasarana', category: 'rapid-rail-kl' },
'rapid rail kl': { provider: 'prasarana', category: 'rapid-rail-kl' },
'rapid kl rail': { provider: 'prasarana', category: 'rapid-rail-kl' },
'rapid-rail': { provider: 'prasarana', category: 'rapid-rail-kl' },
'rapid-rail-kl': { provider: 'prasarana', category: 'rapid-rail-kl' },
'mrt': { provider: 'prasarana', category: 'rapid-rail-kl' },
'lrt': { provider: 'prasarana', category: 'rapid-rail-kl' },
'monorail': { provider: 'prasarana', category: 'rapid-rail-kl' },
'monorel': { provider: 'prasarana', category: 'rapid-rail-kl' },
'kl mrt': { provider: 'prasarana', category: 'rapid-rail-kl' },
'kl lrt': { provider: 'prasarana', category: 'rapid-rail-kl' },
'kl monorail': { provider: 'prasarana', category: 'rapid-rail-kl' },
'kl monorel': { provider: 'prasarana', category: 'rapid-rail-kl' },
'rapid kl bus': { provider: 'prasarana', category: 'rapid-bus-kl' },
'rapid bus kl': { provider: 'prasarana', category: 'rapid-bus-kl' },
'rapid kl': { provider: 'prasarana', category: 'rapid-rail-kl' }, // Default to rail when just 'rapid kl' is specified
'rapid penang': { provider: 'prasarana', category: 'rapid-bus-penang' },
'rapid bus penang': { provider: 'prasarana', category: 'rapid-bus-penang' },
'rapid kuantan': { provider: 'prasarana', category: 'rapid-bus-kuantan' },
'rapid bus kuantan': { provider: 'prasarana', category: 'rapid-bus-kuantan' },
'mrt feeder': { provider: 'prasarana', category: 'rapid-bus-mrtfeeder' },
'rapid feeder': { provider: 'prasarana', category: 'rapid-bus-mrtfeeder' },
'rapid feeder kl': { provider: 'prasarana', category: 'rapid-bus-mrtfeeder' },
'rapid bus mrt feeder': { provider: 'prasarana', category: 'rapid-bus-mrtfeeder' }
};
/**
* Normalize provider and category from user input
* @param provider Provider name from user input
* @param category Optional category from user input
* @returns Normalized provider and category
*/
function normalizeProviderAndCategory(provider: string, category?: string): { provider: string; category?: string; error?: string } {
// Convert to lowercase for case-insensitive matching
const normalizedProvider = provider.toLowerCase();
let normalizedCategory = category;
// Check if this is a known provider/service in our mappings
if (PROVIDER_MAPPINGS[normalizedProvider]) {
return PROVIDER_MAPPINGS[normalizedProvider];
}
// If not in mappings, check if it's a valid provider
if (!VALID_PROVIDERS.includes(normalizedProvider)) {
return {
provider,
category,
error: `Invalid provider: ${provider}. Valid providers are: ${VALID_PROVIDERS.join(', ')}`
};
}
// For prasarana, validate the category
if (normalizedProvider === 'prasarana') {
if (!category) {
return {
provider: normalizedProvider,
error: 'Category parameter is required for prasarana provider'
};
}
// Normalize category to lowercase for case-insensitive matching
normalizedCategory = category.toLowerCase();
if (!PRASARANA_CATEGORIES.includes(normalizedCategory)) {
return {
provider: normalizedProvider,
category,
error: `Invalid category for prasarana: ${category}. Valid categories are: ${PRASARANA_CATEGORIES.join(', ')}`
};
}
}
// Return normalized values
return {
provider: normalizedProvider,
category: normalizedCategory
};
}
// Export geocoding functions for testing
export { geocodeLocation, geocodeWithGrabMaps, geocodeWithNominatim, haversineDistance };
// Cache for GTFS data to avoid repeated downloads and parsing
const gtfsCache = {
static: new Map<string, { data: any; timestamp: number }>(),
realtime: new Map<string, { data: any; timestamp: number }>(),
tripUpdates: new Map<string, { data: any; timestamp: number }>(),
};
// Cache expiry times (in milliseconds)
const STATIC_CACHE_EXPIRY = 24 * 60 * 60 * 1000; // 24 hours
const REALTIME_CACHE_EXPIRY = 30 * 1000; // 30 seconds
const TRIP_UPDATES_CACHE_EXPIRY = 30 * 1000; // 30 seconds
/**
* Parse CSV data from a readable stream
* @param stream Readable stream containing CSV data
* @returns Promise resolving to an array of parsed objects
*/
async function parseCsv(stream: Readable): Promise<any[]> {
return new Promise((resolve, reject) => {
const results: any[] = [];
stream
.pipe(csvParser())
.on('data', (data) => results.push(data))
.on('end', () => resolve(results))
.on('error', (error) => reject(error));
});
}
/**
* Parse GTFS Static data from a ZIP file
* @param buffer Buffer containing the ZIP file
* @returns Promise resolving to parsed GTFS data
*/
async function parseGtfsStaticZip(buffer: Buffer): Promise<Record<string, any[]>> {
const zip = new JSZip();
const contents = await zip.loadAsync(buffer);
const result: Record<string, any[]> = {};
// List of core GTFS files to parse
const coreFiles = [
'agency.txt',
'stops.txt',
'routes.txt',
'trips.txt',
'stop_times.txt',
'calendar.txt',
'calendar_dates.txt',
'shapes.txt',
'frequencies.txt',
];
// Parse each file in the ZIP
for (const fileName of Object.keys(contents.files)) {
// Skip directories and non-core files
if (contents.files[fileName].dir || !coreFiles.includes(fileName)) {
continue;
}
try {
// Get file content as text
const fileData = await contents.files[fileName].async('nodebuffer');
const stream = Readable.from(fileData);
// Parse CSV data
const parsedData = await parseCsv(stream);
// Store parsed data
const fileNameWithoutExt = fileName.replace('.txt', '');
result[fileNameWithoutExt] = parsedData;
} catch (error) {
console.error(`Error parsing ${fileName}:`, error);
}
}
return result;
}
/**
* Enhance location query with Malaysian context if needed
* @param query Original location query
* @returns Enhanced query with better context for geocoding
*/
function enhanceLocationQuery(query: string): string {
// Don't modify if already contains state/country information
const malaysianStates = ['penang', 'pulau pinang', 'selangor', 'kuala lumpur', 'kl', 'johor', 'kedah', 'kelantan',
'melaka', 'malacca', 'negeri sembilan', 'pahang', 'perak', 'perlis', 'sabah',
'sarawak', 'terengganu', 'labuan', 'putrajaya'];
// Check if query already contains state information
const lowercaseQuery = query.toLowerCase();
const hasStateInfo = malaysianStates.some(state => lowercaseQuery.includes(state));
if (hasStateInfo || lowercaseQuery.includes('malaysia')) {
return query; // Already has sufficient context
}
// Special handling for specific hotels in Penang
const penangHotels = [
'hompton hotel', 'cititel', 'g hotel', 'eastern & oriental', 'e&o hotel', 'shangri-la',
'shangri la', 'holiday inn', 'tune hotel', 'hotel jen', 'the light', 'lexis suites',
'hard rock hotel', 'bayview', 'equatorial', 'four points', 'vouk hotel', 'neo+', 'neo plus',
'royale chulan', 'the wembley', 'sunway hotel', 'hotel royal', 'st giles', 'flamingo'
];
// Check if query contains any Penang hotel names
if (penangHotels.some(hotel => lowercaseQuery.includes(hotel))) {
return `${query}, Penang, Malaysia`;
}
// Check for common hotel chains or landmarks that might need context
if (lowercaseQuery.includes('hotel') ||
lowercaseQuery.includes('mall') ||
lowercaseQuery.includes('airport')) {
// Check for Penang-specific locations
if (lowercaseQuery.includes('bayan lepas') ||
lowercaseQuery.includes('georgetown') ||
lowercaseQuery.includes('george town') ||
lowercaseQuery.includes('butterworth') ||
lowercaseQuery.includes('bukit mertajam') ||
lowercaseQuery.includes('batu ferringhi')) {
return `${query}, Penang, Malaysia`;
}
// Add Malaysia as context to improve geocoding results
return `${query}, Malaysia`;
}
return query;
}
// Get GrabMaps API key from environment variable
let grabMapsApiKeyLastChecked = 0;
let cachedGrabMapsApiKey = '';
function getGrabMapsApiKey(): string {
// Only check once per minute to avoid excessive environment variable lookups
const now = Date.now();
if (now - grabMapsApiKeyLastChecked > 60000) {
cachedGrabMapsApiKey = process.env.GRABMAPS_API_KEY || '';
grabMapsApiKeyLastChecked = now;
if (!cachedGrabMapsApiKey) {
console.log('No GrabMaps API key found.');
} else {
console.log('GrabMaps API key available.');
}
}
return cachedGrabMapsApiKey;
}
/**
* Geocode a location name to coordinates using available providers with fallback
* @param query Location name to geocode
* @param country Optional country code to limit results (e.g., 'my' for Malaysia)
* @returns Promise with coordinates or null if not found
*/
async function geocodeLocation(query: string, country: string = 'my'): Promise<{ lat: number; lon: number } | null> {
try {
// Enhance the query with better context
const enhancedQuery = enhanceLocationQuery(query);
// Get API keys for different providers
const googleMapsApiKey = getGoogleMapsApiKey();
const grabMapsApiKey = getGrabMapsApiKey();
// Try GrabMaps first for Southeast Asian countries (preferred for the region)
const seaCountries = ['my', 'sg', 'id', 'th', 'ph', 'vn', 'mm', 'la', 'kh', 'bn', 'tl'];
if (grabMapsApiKey && seaCountries.includes(country.toLowerCase())) {
console.log('Attempting to geocode with GrabMaps (preferred for Southeast Asia)');
const grabMapsResult = await geocodeWithGrabMaps(enhancedQuery, query, country, grabMapsApiKey);
if (grabMapsResult) {
return grabMapsResult;
}
console.log('GrabMaps geocoding failed, falling back to other providers');
}
// Try Google Maps if API key is available
if (googleMapsApiKey) {
console.log('Attempting to geocode with Google Maps');
const googleResult = await geocodeWithGoogleMaps(enhancedQuery, query, country, googleMapsApiKey);
if (googleResult) {
return googleResult;
}
console.log('Google Maps geocoding failed, falling back to Nominatim');
}
// Fall back to Nominatim (always available as open source solution)
console.log('Attempting to geocode with Nominatim');
return await geocodeWithNominatim(enhancedQuery, query, country);
} catch (error) {
console.error('Geocoding error:', error);
return null;
}
}
/**
* Geocode using Google Maps API
*/
async function geocodeWithGoogleMaps(enhancedQuery: string, originalQuery: string, country: string, apiKey: string): Promise<{ lat: number; lon: number } | null> {
// Build URL with parameters for Google Maps API
const params = new URLSearchParams({
address: enhancedQuery,
components: `country:${country}`,
key: apiKey
});
// Make request to Google Maps Geocoding API
console.log(`Geocoding with Google Maps API: "${enhancedQuery}"`);
const response = await axios.get(`${GOOGLE_MAPS_GEOCODING_API}?${params.toString()}`);
// Check if we got any results
if (response.data &&
response.data.status === 'OK' &&
response.data.results &&
response.data.results.length > 0) {
const result = response.data.results[0];
const location = result.geometry.location;
console.log(`Google Maps found location: ${result.formatted_address}`);
return {
lat: location.lat,
lon: location.lng
};
} else {
console.log(`Google Maps API returned status: ${response.data.status}`);
}
// If enhanced query failed and it was different from original, try the original
if (enhancedQuery !== originalQuery) {
console.log(`Enhanced query failed, trying original query: ${originalQuery}`);
const originalParams = new URLSearchParams({
address: originalQuery,
components: `country:${country}`,
key: apiKey
});
const originalResponse = await axios.get(`${GOOGLE_MAPS_GEOCODING_API}?${originalParams.toString()}`);
if (originalResponse.data &&
originalResponse.data.status === 'OK' &&
originalResponse.data.results &&
originalResponse.data.results.length > 0) {
const result = originalResponse.data.results[0];
const location = result.geometry.location;
console.log(`Google Maps found location with original query: ${result.formatted_address}`);
return {
lat: location.lat,
lon: location.lng
};
} else {
console.log(`Google Maps API returned status for original query: ${originalResponse.data.status}`);
}
}
return null;
}
/**
* Geocode using GrabMaps API via AWS Location Service
*
* Note: This requires valid AWS credentials with permissions to access AWS Location Service.
* If the credentials are invalid or missing, the function will return null and log an error.
*
* Prerequisites for using this function:
* 1. Valid AWS Access Key ID and Secret Access Key with Location Service permissions
* 2. A Place Index created in AWS Location Service with GrabMaps as the data provider
* 3. GrabMaps API key
* 4. Correct AWS region configuration (ap-southeast-5 for Malaysia)
*
* @param enhancedQuery Enhanced query with additional context
* @param originalQuery Original query without enhancement
* @param country Country code (e.g., 'my' for Malaysia)
* @param apiKey GrabMaps API key
* @returns Coordinates or null if geocoding failed
*/
async function geocodeWithGrabMaps(enhancedQuery: string, originalQuery: string, country: string, apiKey: string): Promise<{ lat: number; lon: number } | null> {
console.log(`Attempting to geocode with GrabMaps via AWS Location Service: "${enhancedQuery}"`);
try {
// Check for required AWS credentials
const accessKeyId = process.env.AWS_ACCESS_KEY_ID;
const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY;
const awsRegion = process.env.AWS_REGION || 'ap-southeast-5';
const grabMapsApiKey = process.env.GRABMAPS_API_KEY || apiKey;
if (!accessKeyId) {
console.error('AWS Access Key ID not found in environment variables');
return null;
}
if (!secretAccessKey) {
console.error('AWS Secret Access Key not found in environment variables');
return null;
}
if (!grabMapsApiKey) {
console.error('GrabMaps API key not found in environment variables');
return null;
}
if (!awsRegion) {
console.error('AWS Region not found in environment variables, using default ap-southeast-5');
// We don't return null here as we have a default value
}
// Create a new AWS Location Service client
const client = new LocationClient({
region: awsRegion, // Use region from env vars or default to Singapore
credentials: {
accessKeyId,
secretAccessKey
}
});
console.log(`Using AWS region: ${awsRegion}`);
console.log('AWS Location Service client created. Attempting to geocode...');
// Convert 2-letter country code to 3-letter code for AWS Location Service
// AWS Location Service requires 3-letter ISO country codes
const countryCode2 = country.toLowerCase();
let countryCode3 = 'MYS'; // Default to Malaysia
// Map of 2-letter to 3-letter country codes for Southeast Asia
const countryCodes: Record<string, string> = {
'my': 'MYS', // Malaysia
'sg': 'SGP', // Singapore
'id': 'IDN', // Indonesia
'th': 'THA', // Thailand
'ph': 'PHL', // Philippines
'vn': 'VNM', // Vietnam
'mm': 'MMR', // Myanmar
'la': 'LAO', // Laos
'kh': 'KHM', // Cambodia
'bn': 'BRN', // Brunei
'tl': 'TLS' // Timor-Leste
};
if (countryCode2 in countryCodes) {
countryCode3 = countryCodes[countryCode2 as keyof typeof countryCodes];
}
console.log(`Using 3-letter country code: ${countryCode3}`);
// Create the search command
const command = new SearchPlaceIndexForTextCommand({
IndexName: 'explore.place.Grab', // The name of your Place Index with GrabMaps data provider
Text: enhancedQuery,
BiasPosition: [101.6942371, 3.1516964], // Bias towards KL, Malaysia
FilterCountries: [countryCode3], // Filter by country
MaxResults: 1
});
// Send the command
const response = await client.send(command);
// Process the response
if (response.Results && response.Results.length > 0 && response.Results[0].Place?.Geometry?.Point) {
const point = response.Results[0].Place.Geometry.Point;
const result = {
lat: point[1], // AWS returns [longitude, latitude]
lon: point[0]
};
console.log(`\u2705 GrabMaps geocoding successful: ${JSON.stringify(result)}`);
console.log(`Location: ${response.Results[0].Place.Label}`);
return result;
}
console.log('No results found with GrabMaps via AWS Location Service');
return null;
} catch (error) {
console.error('Error geocoding with GrabMaps via AWS Location Service:', error);
// Check for specific AWS errors
if (error && typeof error === 'object' && 'name' in error) {
const awsError = error as { name: string };
if (awsError.name === 'UnrecognizedClientException') {
console.error('AWS authentication failed. Please check your AWS credentials.');
} else if (awsError.name === 'ValidationException') {
console.error('AWS Location Service validation error. Please check your request parameters.');
} else if (awsError.name === 'ResourceNotFoundException') {
console.error('Place Index not found. Please check if "explore.place.Grab" exists in your AWS account.');
}
}
return null;
}
}
/**
* Geocode using Nominatim API (OpenStreetMap)
*/
async function geocodeWithNominatim(enhancedQuery: string, originalQuery: string, country: string): Promise<{ lat: number; lon: number } | null> {
// Build URL with parameters for Nominatim
const params = new URLSearchParams({
q: enhancedQuery,
format: 'json',
limit: '1',
countrycodes: country,
});
// Make request to Nominatim API
console.log(`Geocoding with Nominatim API: "${enhancedQuery}"`);
const response = await axios.get(`${NOMINATIM_API}?${params.toString()}`, {
headers: {
'User-Agent': 'Malaysia-Open-Data-MCP-Server/1.0',
},
});
// Check if we got any results
if (response.data && response.data.length > 0) {
const result = response.data[0];
console.log(`Nominatim found location: ${result.display_name}`);
return {
lat: parseFloat(result.lat),
lon: parseFloat(result.lon),
};
}
// If enhanced query failed and it was different from original, try the original
if (enhancedQuery !== originalQuery) {
console.log(`Enhanced query failed, trying original query with Nominatim: ${originalQuery}`);
const originalParams = new URLSearchParams({
q: originalQuery,
format: 'json',
limit: '1',
countrycodes: country,
});
const originalResponse = await axios.get(`${NOMINATIM_API}?${originalParams.toString()}`, {
headers: {
'User-Agent': 'Malaysia-Open-Data-MCP-Server/1.0',
},
});
if (originalResponse.data && originalResponse.data.length > 0) {
const result = originalResponse.data[0];
console.log(`Nominatim found location with original query: ${result.display_name}`);
return {
lat: parseFloat(result.lat),
lon: parseFloat(result.lon),
};
}
}
return null;
}
/**
* Calculate the Haversine distance between two points in kilometers
* @param lat1 Latitude of point 1
* @param lon1 Longitude of point 1
* @param lat2 Latitude of point 2
* @param lon2 Longitude of point 2
* @returns Distance in kilometers
*/
function haversineDistance(lat1: number, lon1: number, lat2: number, lon2: number): number {
// Convert latitude and longitude from degrees to radians
const toRadians = (degrees: number) => degrees * Math.PI / 180;
const dLat = toRadians(lat2 - lat1);
const dLon = toRadians(lon2 - lon1);
lat1 = toRadians(lat1);
lat2 = toRadians(lat2);
// Haversine formula
const a = Math.sin(dLat / 2) * Math.sin(dLat / 2) +
Math.sin(dLon / 2) * Math.sin(dLon / 2) * Math.cos(lat1) * Math.cos(lat2);
const c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a));
// Earth's radius in kilometers
const R = 6371;
// Return distance in kilometers
return R * c;
}
/**
* Register GTFS tools with the MCP server
* @param server MCP server instance
*/
export function registerGtfsTools(server: McpServer) {
// Parse GTFS Static data
server.tool(
prefixToolName('parse_gtfs_static'),
'Parse GTFS Static data for a specific transport provider. IMPORTANT: For transit queries like "Show me routes from Rapid Penang", use get_transit_routes directly with the provider name. This is a low-level tool - prefer using get_transit_routes or get_transit_stops for most user queries.',
{
provider: z.string().describe('Provider name (e.g., "mybas-johor", "ktmb", "prasarana")'),
category: z.string().optional().describe('Category for Prasarana data (required only for prasarana provider)'),
force_refresh: z.boolean().optional().describe('Force refresh the cache'),
},
async ({ provider, category, force_refresh = false }) => {
try {
// Normalize provider and category
const normalized = normalizeProviderAndCategory(provider, category);
// If there's an error, return it
if (normalized.error) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: normalized.error,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES,
common_names: Object.keys(PROVIDER_MAPPINGS),
example: normalized.provider === 'prasarana' ? {
provider: 'prasarana',
category: 'rapid-rail-kl'
} : undefined
}, null, 2),
},
],
};
}
// Use normalized values
const normalizedProvider = normalized.provider;
const normalizedCategory = normalized.category;
// Build cache key
const cacheKey = `${normalizedProvider}-${normalizedCategory || 'default'}`;
// Check cache if not forcing refresh
if (!force_refresh && gtfsCache.static.has(cacheKey)) {
const cached = gtfsCache.static.get(cacheKey)!;
// Return cached data if not expired
if (Date.now() - cached.timestamp < STATIC_CACHE_EXPIRY) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: 'Successfully retrieved GTFS static data from cache',
data: cached.data,
cached: true,
timestamp: cached.timestamp,
}, null, 2),
},
],
};
}
}
// Build URL
let url = `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${provider}`;
if (category) {
url += `?category=${category}`;
}
// Download ZIP file
const response = await axios.get(url, { responseType: 'arraybuffer' });
// Parse GTFS data
const gtfsData = await parseGtfsStaticZip(Buffer.from(response.data));
// Cache the result
gtfsCache.static.set(cacheKey, {
data: gtfsData,
timestamp: Date.now(),
});
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: `Successfully parsed GTFS static data for provider: ${provider}${category ? `, category: ${category}` : ''}`,
data: gtfsData,
cached: false,
}, null, 2),
},
],
};
} catch (error) {
// Check if it's an axios error with response data
const axiosError = error as any;
const statusCode = axiosError?.response?.status;
const responseData = axiosError?.response?.data;
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: 'Failed to parse GTFS static data',
error: error instanceof Error ? error.message : 'Unknown error',
status_code: statusCode,
api_url: `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${provider}${category ? `?category=${category}` : ''}`,
response_data: responseData,
provider_info: {
provider: provider,
category: category,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES
},
note: "If you're getting a 404 error, please check that the provider and category are correct. For Prasarana, a valid category is required."
}, null, 2),
},
],
};
}
}
);
// Parse GTFS Realtime data
server.tool(
prefixToolName('parse_gtfs_realtime'),
'Parse GTFS Realtime data for a specific transport provider. IMPORTANT: For transit queries like "Show me bus locations from Rapid Penang", use this tool directly with the provider name. Common names like "rapid penang", "rapid kuantan", or "mybas johor" are automatically mapped to the correct provider-category pairs.',
{
provider: z.string().describe('Provider name (e.g., "mybas-johor", "ktmb", "prasarana")'),
category: z.string().optional().describe('Category for Prasarana data (required only for prasarana provider)'),
force_refresh: z.boolean().optional().describe('Force refresh the cache'),
},
async ({ provider, category, force_refresh = false }) => {
try {
// Normalize provider and category
const normalized = normalizeProviderAndCategory(provider, category);
// If there's an error, return it
if (normalized.error) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: normalized.error,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES,
common_names: Object.keys(PROVIDER_MAPPINGS),
example: normalized.provider === 'prasarana' ? {
provider: 'prasarana',
category: 'rapid-rail-kl'
} : undefined
}, null, 2),
},
],
};
}
// Use normalized values
const normalizedProvider = normalized.provider;
const normalizedCategory = normalized.category;
// Build cache key
const cacheKey = `${normalizedProvider}-${normalizedCategory || 'default'}`;
// Check cache if not forcing refresh
if (!force_refresh && gtfsCache.realtime.has(cacheKey)) {
const cached = gtfsCache.realtime.get(cacheKey)!;
// Return cached data if not expired
if (Date.now() - cached.timestamp < REALTIME_CACHE_EXPIRY) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: 'Successfully retrieved GTFS realtime data from cache',
data: cached.data,
cached: true,
timestamp: cached.timestamp,
}, null, 2),
},
],
};
}
}
// Build URL
let url = `${API_BASE_URL}${GTFS_REALTIME_ENDPOINT}/${provider}/`;
if (category) {
url += `?category=${category}`;
}
url += '/';
// Download Protocol Buffer data
const response = await axios.get(url, { responseType: 'arraybuffer' });
// Parse Protocol Buffer data
const feed = GtfsRealtimeBindings.transit_realtime.FeedMessage.decode(
new Uint8Array(response.data)
);
// Convert to plain JavaScript object
const vehiclePositions = feed.entity.map(entity => {
if (!entity.vehicle) {
return null;
}
const vehicle = entity.vehicle;
return {
id: entity.id,
vehicle: {
trip: vehicle.trip ? {
tripId: vehicle.trip.tripId,
routeId: vehicle.trip.routeId,
directionId: vehicle.trip.directionId,
startTime: vehicle.trip.startTime,
startDate: vehicle.trip.startDate,
scheduleRelationship: vehicle.trip.scheduleRelationship,
} : undefined,
position: vehicle.position ? {
latitude: vehicle.position.latitude,
longitude: vehicle.position.longitude,
bearing: vehicle.position.bearing,
speed: vehicle.position.speed,
} : undefined,
currentStopSequence: vehicle.currentStopSequence,
stopId: vehicle.stopId,
currentStatus: vehicle.currentStatus,
timestamp: vehicle.timestamp ? new Date(typeof vehicle.timestamp === 'number' ? vehicle.timestamp * 1000 : (vehicle.timestamp as any).low * 1000).toISOString() : undefined,
congestionLevel: vehicle.congestionLevel,
occupancyStatus: vehicle.occupancyStatus,
},
};
}).filter(Boolean);
// Cache the result
gtfsCache.realtime.set(cacheKey, {
data: vehiclePositions,
timestamp: Date.now(),
});
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: `Successfully parsed GTFS realtime data for provider: ${provider}${category ? `, category: ${category}` : ''}`,
data: vehiclePositions,
cached: false,
count: vehiclePositions.length,
}, null, 2),
},
],
};
} catch (error) {
// Check if it's an axios error with response data
const axiosError = error as any;
const statusCode = axiosError?.response?.status;
const responseData = axiosError?.response?.data;
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: 'Failed to parse GTFS realtime data',
error: error instanceof Error ? error.message : 'Unknown error',
status_code: statusCode,
api_url: `${API_BASE_URL}${GTFS_REALTIME_ENDPOINT}/${provider}${category ? `?category=${category}` : ''}`,
response_data: responseData,
provider_info: {
provider: provider,
category: category,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES
},
note: "If you're getting a 404 error, please check that the provider and category are correct. For Prasarana, a valid category is required."
}, null, 2),
},
],
};
}
}
);
// Get transit routes
server.tool(
prefixToolName('get_transit_routes'),
'Get transit routes from GTFS data. IMPORTANT: For transit route queries like "Show me bus routes for Rapid Penang", use this tool directly with the provider name.',
{
provider: z.string().describe('Provider name (e.g., "mybas-johor", "ktmb", "prasarana")'),
category: z.string().optional().describe('Category for Prasarana data (required only for prasarana provider)'),
route_id: z.string().optional().describe('Specific route ID to filter by'),
},
async ({ provider, category, route_id }) => {
try {
// Normalize provider and category
const normalized = normalizeProviderAndCategory(provider, category);
// If there's an error, return it
if (normalized.error) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: normalized.error,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES,
common_names: Object.keys(PROVIDER_MAPPINGS),
example: normalized.provider === 'prasarana' ? {
provider: 'prasarana',
category: 'rapid-rail-kl'
} : undefined
}, null, 2),
},
],
};
}
// Use normalized values
const normalizedProvider = normalized.provider;
const normalizedCategory = normalized.category;
// Build cache key
const cacheKey = `${normalizedProvider}-${normalizedCategory || 'default'}`;
// Check if we have cached GTFS data
let gtfsData;
if (gtfsCache.static.has(cacheKey)) {
const cached = gtfsCache.static.get(cacheKey)!;
// Use cached data if not expired
if (Date.now() - cached.timestamp < STATIC_CACHE_EXPIRY) {
gtfsData = cached.data;
}
}
// If no cached data, fetch and parse GTFS data
if (!gtfsData) {
// Build URL
let url = `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${normalizedProvider}/`;
if (normalizedCategory) {
url += `?category=${normalizedCategory}`;
}
// Trailing slash already added
// Download ZIP file
const response = await axios.get(url, { responseType: 'arraybuffer' });
// Parse GTFS data
gtfsData = await parseGtfsStaticZip(Buffer.from(response.data));
// Cache the result
gtfsCache.static.set(cacheKey, {
data: gtfsData,
timestamp: Date.now(),
});
}
// Extract routes data
const routes = gtfsData.routes || [];
// Filter by route_id if provided
const filteredRoutes = route_id
? routes.filter((route: { route_id: string }) => route.route_id === route_id)
: routes;
// Add trips information to each route
const routesWithTrips = filteredRoutes.map((route: { route_id: string }) => {
const trips = (gtfsData.trips || [])
.filter((trip: { route_id: string }) => trip.route_id === route.route_id);
return {
...route,
trips_count: trips.length,
};
});
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: `Successfully retrieved routes for provider: ${provider}${category ? `, category: ${category}` : ''}`,
data: routesWithTrips,
count: routesWithTrips.length,
}, null, 2),
},
],
};
} catch (error) {
// Check if it's an axios error with response data
const axiosError = error as any;
const statusCode = axiosError?.response?.status;
const responseData = axiosError?.response?.data;
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: 'Failed to retrieve transit routes',
error: error instanceof Error ? error.message : 'Unknown error',
status_code: statusCode,
api_url: `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${provider}${category ? `?category=${category}` : ''}`,
response_data: responseData,
provider_info: {
provider: provider,
category: category,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES
},
note: "If you're getting a 404 error, please check that the provider and category are correct. For Prasarana, a valid category is required."
}, null, 2),
},
],
};
}
}
);
// Get transit stops
server.tool(
prefixToolName('get_transit_stops'),
'Get transit stops from GTFS data. IMPORTANT: For transit stop queries like "Show me bus stops for Rapid Penang", use this tool directly with the provider name. The tool supports common names like "rapid penang", "rapid kuantan", "ktmb", or "mybas johor" which will be automatically mapped to the correct provider and category. No need to use list_transport_agencies first.',
{
provider: z.string().describe('Provider name (e.g., "mybas-johor", "ktmb", "prasarana")'),
category: z.string().optional().describe('Category for Prasarana data (required only for prasarana provider)'),
route_id: z.string().optional().describe('Filter stops by route ID (optional)'),
stop_id: z.string().optional().describe('Specific stop ID to retrieve (optional)'),
},
async ({ provider, category, route_id, stop_id }) => {
try {
// Normalize provider and category
const normalized = normalizeProviderAndCategory(provider, category);
// If there's an error, return it
if (normalized.error) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: normalized.error,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES,
common_names: Object.keys(PROVIDER_MAPPINGS),
example: normalized.provider === 'prasarana' ? {
provider: 'prasarana',
category: 'rapid-rail-kl'
} : undefined
}, null, 2),
},
],
};
}
// Use normalized values
const normalizedProvider = normalized.provider;
const normalizedCategory = normalized.category;
// Build cache key
const cacheKey = `${normalizedProvider}-${normalizedCategory || 'default'}`;
// Check if we have cached GTFS data
let gtfsData;
if (gtfsCache.static.has(cacheKey)) {
const cached = gtfsCache.static.get(cacheKey)!;
// Use cached data if not expired
if (Date.now() - cached.timestamp < STATIC_CACHE_EXPIRY) {
gtfsData = cached.data;
}
}
// If no cached data, fetch and parse GTFS data
if (!gtfsData) {
// Build URL
let url = `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${normalizedProvider}/`;
if (normalizedCategory) {
url += `?category=${normalizedCategory}`;
}
// Trailing slash already added
// Download ZIP file
const response = await axios.get(url, { responseType: 'arraybuffer' });
// Parse GTFS data
gtfsData = await parseGtfsStaticZip(Buffer.from(response.data));
// Cache the result
gtfsCache.static.set(cacheKey, {
data: gtfsData,
timestamp: Date.now(),
});
}
// Extract stops data
const stops = gtfsData.stops || [];
// Filter by stop_id if provided
let filteredStops = stop_id
? stops.filter((stop: { stop_id: string }) => stop.stop_id === stop_id)
: stops;
// If route_id is provided, filter stops by route
if (route_id) {
// Get trips for the route
const routeTrips = (gtfsData.trips || [])
.filter((trip: { route_id: string; trip_id: string }) => trip.route_id === route_id)
.map((trip: { trip_id: string }) => trip.trip_id);
// Get stop_times for the trips
const stopTimes = (gtfsData.stop_times || [])
.filter((stopTime: { trip_id: string }) => routeTrips.includes(stopTime.trip_id));
// Get stop_ids from stop_times
const stopIds = [...new Set(stopTimes.map((stopTime: { stop_id: string }) => stopTime.stop_id))];
// Filter stops by stop_ids
filteredStops = filteredStops.filter((stop: { stop_id: string }) => stopIds.includes(stop.stop_id));
}
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: `Successfully retrieved stops for provider: ${provider}${category ? `, category: ${category}` : ''}`,
data: filteredStops,
count: filteredStops.length,
}, null, 2),
},
],
};
} catch (error) {
// Check if it's an axios error with response data
const axiosError = error as any;
const statusCode = axiosError?.response?.status;
const responseData = axiosError?.response?.data;
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: 'Failed to retrieve transit stops',
error: error instanceof Error ? error.message : 'Unknown error',
status_code: statusCode,
api_url: `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${provider}${category ? `?category=${category}` : ''}`,
response_data: responseData,
provider_info: {
provider: provider,
category: category,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES
},
note: "If you're getting a 404 error, please check that the provider and category are correct. For Prasarana, a valid category is required."
}, null, 2),
},
],
};
}
}
);
// Get transit arrivals
server.tool(
prefixToolName('get_transit_arrivals'),
'Get real-time transit arrivals at a specific stop. IMPORTANT: Use this tool directly for queries like "When will the next bus arrive at my stop?" or "Show me arrival times for Rapid Penang buses at stop X".',
{
provider: z.string().describe('Provider name (e.g., "mybas-johor", "ktmb", "prasarana", or common names like "rapid penang")'),
category: z.string().optional().describe('Category for Prasarana data (required only for prasarana provider)'),
stop_id: z.string().describe('ID of the stop to get arrivals for'),
route_id: z.string().optional().describe('Optional: filter arrivals by route'),
limit: z.number().optional().describe('Maximum number of arrivals to return (default: 10)'),
},
async ({ provider, category, stop_id, route_id, limit = 10 }) => {
try {
// Normalize provider and category
const normalized = normalizeProviderAndCategory(provider, category);
// If there's an error, return it
if (normalized.error) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: normalized.error,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES,
common_names: Object.keys(PROVIDER_MAPPINGS),
example: normalized.provider === 'prasarana' ? {
provider: 'prasarana',
category: 'rapid-rail-kl'
} : undefined
}, null, 2),
},
],
};
}
// Use normalized values
const normalizedProvider = normalized.provider;
const normalizedCategory = normalized.category;
// Build cache key
const cacheKey = `${normalizedProvider}-${normalizedCategory || 'default'}`;
// Get static GTFS data (for stop and route information)
let gtfsStaticData;
if (gtfsCache.static.has(cacheKey)) {
const cached = gtfsCache.static.get(cacheKey)!;
// Use cached data if not expired
if (Date.now() - cached.timestamp < STATIC_CACHE_EXPIRY) {
gtfsStaticData = cached.data;
}
}
// If no cached static data, fetch and parse GTFS static data
if (!gtfsStaticData) {
// Build URL
let url = `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${normalizedProvider}/`;
if (normalizedCategory) {
url += `?category=${normalizedCategory}`;
}
// Trailing slash already added
// Download ZIP file
const response = await axios.get(url, { responseType: 'arraybuffer' });
// Parse GTFS data
gtfsStaticData = await parseGtfsStaticZip(Buffer.from(response.data));
// Cache the result
gtfsCache.static.set(cacheKey, {
data: gtfsStaticData,
timestamp: Date.now(),
});
}
// Get trip updates data (for real-time arrivals)
let tripUpdatesData: any[] = [];
if (gtfsCache.tripUpdates.has(cacheKey)) {
const cached = gtfsCache.tripUpdates.get(cacheKey)!;
// Use cached data if not expired
if (Date.now() - cached.timestamp < TRIP_UPDATES_CACHE_EXPIRY) {
tripUpdatesData = cached.data;
}
}
// If no cached trip updates data, fetch and parse GTFS trip updates
if (!tripUpdatesData) {
// Build URL
let url = `${API_BASE_URL}${GTFS_TRIP_UPDATES_ENDPOINT}/${provider}/`;
if (category) {
url += `?category=${category}`;
}
// Trailing slash already added
try {
// Download Protocol Buffer data
const response = await axios.get(url, { responseType: 'arraybuffer' });
// Parse Protocol Buffer data
const feed = GtfsRealtimeBindings.transit_realtime.FeedMessage.decode(
new Uint8Array(response.data)
);
// Convert to plain JavaScript object
tripUpdatesData = feed.entity.map(entity => {
if (!entity.tripUpdate) {
return null;
}
const tripUpdate = entity.tripUpdate;
return {
id: entity.id,
tripUpdate: {
trip: tripUpdate.trip ? {
tripId: tripUpdate.trip.tripId,
routeId: tripUpdate.trip.routeId,
directionId: tripUpdate.trip.directionId,
startTime: tripUpdate.trip.startTime,
startDate: tripUpdate.trip.startDate,
scheduleRelationship: tripUpdate.trip.scheduleRelationship,
} : undefined,
stopTimeUpdate: tripUpdate.stopTimeUpdate ? tripUpdate.stopTimeUpdate.map(update => ({
stopSequence: update.stopSequence,
stopId: update.stopId,
arrival: update.arrival ? {
delay: update.arrival.delay,
time: update.arrival.time ? new Date(typeof update.arrival.time === 'number' ? update.arrival.time * 1000 : (update.arrival.time as any).low * 1000).toISOString() : undefined,
uncertainty: update.arrival.uncertainty,
} : undefined,
departure: update.departure ? {
delay: update.departure.delay,
time: update.departure.time ? new Date(typeof update.departure.time === 'number' ? update.departure.time * 1000 : (update.departure.time as any).low * 1000).toISOString() : undefined,
uncertainty: update.departure.uncertainty,
} : undefined,
scheduleRelationship: update.scheduleRelationship,
})) : [],
timestamp: tripUpdate.timestamp ? new Date(typeof tripUpdate.timestamp === 'number' ? tripUpdate.timestamp * 1000 : (tripUpdate.timestamp as any).low * 1000).toISOString() : undefined,
delay: tripUpdate.delay,
}
};
}).filter(Boolean);
// Cache the result
gtfsCache.tripUpdates.set(cacheKey, {
data: tripUpdatesData,
timestamp: Date.now(),
});
} catch (error) {
// If trip updates are not available, set to empty array
tripUpdatesData = [];
// Still cache the empty result to avoid repeated failed requests
gtfsCache.tripUpdates.set(cacheKey, {
data: tripUpdatesData,
timestamp: Date.now(),
});
console.error(`Error fetching trip updates for ${provider}${category ? `, category: ${category}` : ''}:`, error);
}
}
// Get stop information
const stops = gtfsStaticData.stops || [];
const stop = stops.find((s: any) => s.stop_id === stop_id);
if (!stop) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: `Stop ID ${stop_id} not found for provider: ${provider}${category ? `, category: ${category}` : ''}`,
valid_stop_ids: stops.map((s: any) => s.stop_id).slice(0, 10),
total_stops: stops.length,
}, null, 2),
},
],
};
}
// Filter trip updates for the specified stop
const arrivalsForStop = [];
for (const entity of tripUpdatesData || []) {
if (!entity?.tripUpdate?.stopTimeUpdate) continue;
// Find updates for this stop
const stopUpdates = entity.tripUpdate.stopTimeUpdate.filter((update: any) =>
update.stopId === stop_id
);
if (stopUpdates.length === 0) continue;
// Skip if route_id filter is provided and doesn't match
if (route_id && entity.tripUpdate.trip?.routeId !== route_id) continue;
// Get route information
const routes = gtfsStaticData.routes || [];
const route = routes.find((r: any) => r.route_id === entity.tripUpdate.trip?.routeId);
// Add to arrivals list
for (const update of stopUpdates) {
arrivalsForStop.push({
trip_id: entity.tripUpdate.trip?.tripId,
route_id: entity.tripUpdate.trip?.routeId,
route_short_name: route?.route_short_name,
route_long_name: route?.route_long_name,
direction_id: entity.tripUpdate.trip?.directionId,
arrival_time: update.arrival?.time,
arrival_delay: update.arrival?.delay,
departure_time: update.departure?.time,
departure_delay: update.departure?.delay,
stop_sequence: update.stopSequence,
schedule_relationship: update.scheduleRelationship,
});
}
}
// Sort by arrival time
arrivalsForStop.sort((a: any, b: any) => {
const timeA = a.arrival_time || a.departure_time || '';
const timeB = b.arrival_time || b.departure_time || '';
return timeA.localeCompare(timeB);
});
// Limit results
const limitedArrivals = arrivalsForStop.slice(0, limit);
// Calculate time until arrival
const now = Date.now();
const arrivalsWithCountdown = limitedArrivals.map((arrival: any) => {
const arrivalTime = arrival.arrival_time ? new Date(arrival.arrival_time).getTime() : null;
const departureTime = arrival.departure_time ? new Date(arrival.departure_time).getTime() : null;
const nextTime = arrivalTime || departureTime;
let minutesUntil = null;
if (nextTime) {
minutesUntil = Math.round((nextTime - now) / (60 * 1000));
}
return {
...arrival,
minutes_until_arrival: minutesUntil,
};
});
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: `Successfully retrieved arrivals for stop: ${stop_id} (${stop.stop_name})`,
stop: stop,
arrivals: arrivalsWithCountdown,
count: arrivalsWithCountdown.length,
current_time: new Date().toISOString(),
note: arrivalsWithCountdown.length === 0 ? "No upcoming arrivals found for this stop. This could be due to no scheduled service or no real-time data available." : undefined,
}, null, 2),
},
],
};
} catch (error) {
// Check if it's an axios error with response data
const axiosError = error as any;
const statusCode = axiosError?.response?.status;
const responseData = axiosError?.response?.data;
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: 'Failed to get transit arrivals',
error: error instanceof Error ? error.message : 'Unknown error',
status_code: statusCode,
api_url: `${API_BASE_URL}${GTFS_TRIP_UPDATES_ENDPOINT}/${provider}${category ? `?category=${category}` : ''}`,
response_data: responseData,
provider_info: {
provider: provider,
category: category,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES
},
note: "If you're getting a 404 error, please check that the provider and category are correct. For Prasarana, a valid category is required."
}, null, 2),
},
],
};
}
}
);
// Search transit stops by location name
server.tool(
prefixToolName('search_transit_stops_by_location'),
'Search for transit stops near a named location. IMPORTANT: Use this tool for queries like "Show me bus stops near KLCC" or "What buses stop at KL Sentral?" This tool geocodes the location name to coordinates, then finds nearby stops. CRITICAL: For Rapid KL services, ALWAYS use specific terms in the provider parameter like "rapid kl bus", "rapid rail", "mrt feeder", "lrt", "mrt" instead of using "prasarana" with a separate category parameter. DO NOT use provider="prasarana" with category="rapid-rail-kl" as this causes 404 errors. Instead use provider="rapid rail" or provider="lrt" or provider="mrt" or provider="mrt feeder" or provider="rapid kl bus" without a category parameter.',
{
provider: z.string().describe('Provider name (e.g., "mybas-johor", "ktmb", "prasarana", or common names like "rapid penang")'),
category: z.string().optional().describe('Category for Prasarana data (required only for prasarana provider)'),
location: z.string().describe('Location name to search for (e.g., "KLCC", "KL Sentral", "Penang Airport")'),
country: z.string().optional().describe('Country code to limit geocoding results (default: "my" for Malaysia)'),
limit: z.number().optional().describe('Maximum number of stops to return (default: 5)'),
max_distance: z.number().optional().describe('Maximum distance in kilometers (default: 5)'),
include_arrivals: z.boolean().optional().describe('Whether to include upcoming arrivals for each stop (default: true)'),
arrivals_limit: z.number().optional().describe('Maximum number of arrivals to include per stop (default: 3)'),
},
async ({ provider, category, location, country = 'my', limit = 5, max_distance = 5, include_arrivals = true, arrivals_limit = 3 }) => {
// Store normalized values at function scope so they're available in catch block
let normalizedProvider = provider;
let normalizedCategory = category;
try {
// If provider looks like prasarana but no category is provided, set a default category
// This helps users who don't specify a category in their query
if ((provider.toLowerCase() === 'prasarana' || provider.toLowerCase().includes('rapid')) && !category) {
// Analyze the location query to determine if it's likely a bus or rail search
const locationLower = location.toLowerCase();
// Check if the location contains keywords suggesting rail/LRT/MRT
const railKeywords = ['lrt', 'mrt', 'monorail', 'train', 'station', 'rail', 'kelana jaya', 'ampang', 'sri petaling'];
const isBusKeyword = locationLower.includes('bus') || locationLower.includes('stop');
const isRailKeyword = railKeywords.some(keyword => locationLower.includes(keyword));
if (isRailKeyword && !isBusKeyword) {
// If location suggests rail and not bus, use rail category
category = 'rapid-rail-kl';
} else {
// Default to bus if not clearly rail or if both bus and rail are mentioned
category = 'rapid-bus-kl';
}
}
// Step 1: Normalize provider and category first
const normalized = normalizeProviderAndCategory(provider, category);
// Update function scope variables for catch block
normalizedProvider = normalized.provider;
normalizedCategory = normalized.category;
// If there's an error, return it
if (normalized.error) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: normalized.error,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES,
common_names: Object.keys(PROVIDER_MAPPINGS),
example: normalized.provider === 'prasarana' ? {
provider: 'prasarana',
category: 'rapid-rail-kl'
} : undefined
}, null, 2),
},
],
};
}
// Step 2: Geocode the location name to coordinates
console.log(`Attempting to geocode location: ${location}`);
let coordinates = await geocodeLocation(location, country);
// If initial geocoding fails, try with additional context
if (!coordinates) {
console.log(`Geocoding failed for "${location}", trying with additional context...`);
// Try with state/city context for Malaysian locations
const locationVariations = [
// Add full country name
`${location}, Malaysia`,
// Add common Malaysian states if not already in the query
...(!/penang|pulau pinang/i.test(location) ? [`${location}, Penang`, `${location}, Pulau Pinang`] : []),
...(!/selangor/i.test(location) ? [`${location}, Selangor`] : []),
...(!/kuala lumpur|kl/i.test(location) ? [`${location}, Kuala Lumpur`, `${location}, KL`] : []),
...(!/johor/i.test(location) ? [`${location}, Johor`] : []),
// Try with common prefixes for condos/apartments
...(!/condo|condominium|apartment|residence|residency|heights|court|villa|garden|park/i.test(location) ?
[`${location} Condominium`, `${location} Residence`, `${location} Apartment`] : [])
];
// Try each variation until we get coordinates
for (const variation of locationVariations) {
console.log(`Trying variation: "${variation}"`);
coordinates = await geocodeLocation(variation, country);
if (coordinates) {
console.log(`Successfully geocoded with variation: "${variation}"`);
break;
}
}
}
// If all geocoding attempts fail, return error
if (!coordinates) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: `Could not geocode location: "${location}". Please try a different location name or provide more specific details.`,
location,
country,
provider_info: {
provider: normalizedProvider,
category: normalizedCategory,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES
},
suggestion: 'Please try a more specific address with city/state name, or use a nearby landmark.'
}, null, 2),
},
],
};
}
// Use normalized values for provider and category
provider = normalized.provider;
category = normalized.category;
// Build cache key
const cacheKey = `${provider}-${category || 'default'}`;
// Get static GTFS data
let gtfsStaticData;
if (gtfsCache.static.has(cacheKey)) {
const cached = gtfsCache.static.get(cacheKey)!;
// Use cached data if not expired
if (Date.now() - cached.timestamp < STATIC_CACHE_EXPIRY) {
gtfsStaticData = cached.data;
}
}
// If no cached data, fetch and parse GTFS static data
if (!gtfsStaticData) {
// Build URL
let url = `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${normalizedProvider}/`;
if (normalizedCategory) {
url += `?category=${normalizedCategory}`;
}
// Trailing slash already added
// Download ZIP file
const response = await axios.get(url, { responseType: 'arraybuffer' });
// Parse GTFS data
gtfsStaticData = await parseGtfsStaticZip(Buffer.from(response.data));
// Cache the result
gtfsCache.static.set(cacheKey, {
data: gtfsStaticData,
timestamp: Date.now(),
});
}
// Step 3: Extract stops from GTFS data
const stops = gtfsStaticData.stops || [];
if (stops.length === 0) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: `No stops found for provider: ${provider}${category ? `, category: ${category}` : ''}`,
provider: provider,
category: category,
}, null, 2),
},
],
};
}
// Step 4: Calculate distances from user location to each stop
const stopsWithDistance = stops.map((stop: any) => {
// Skip stops without coordinates
if (!stop.stop_lat || !stop.stop_lon) {
return null;
}
const distance = haversineDistance(
coordinates.lat,
coordinates.lon,
parseFloat(stop.stop_lat),
parseFloat(stop.stop_lon)
);
return {
...stop,
distance_km: distance,
distance_m: Math.round(distance * 1000),
};
}).filter(Boolean);
// Step 5: Filter stops by max distance and sort by proximity
const nearbyStops = stopsWithDistance
.filter((stop: any) => stop.distance_km <= max_distance)
.sort((a: any, b: any) => a.distance_km - b.distance_km)
.slice(0, limit);
if (nearbyStops.length === 0) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: `No stops found within ${max_distance} km of "${location}"`,
location,
coordinates,
provider: provider,
category: category,
max_distance,
suggestion: 'Try increasing the max_distance parameter or searching for a different location.',
}, null, 2),
},
],
};
}
// Step 6: If requested, get real-time arrivals for each stop
let stopsWithArrivals = nearbyStops;
if (include_arrivals) {
// Get trip updates data (for real-time arrivals)
let tripUpdatesData: any[] = [];
if (gtfsCache.tripUpdates.has(cacheKey)) {
const cached = gtfsCache.tripUpdates.get(cacheKey)!;
// Use cached data if not expired
if (Date.now() - cached.timestamp < TRIP_UPDATES_CACHE_EXPIRY) {
tripUpdatesData = cached.data;
}
}
// If no cached trip updates data, fetch and parse GTFS trip updates
if (!tripUpdatesData || tripUpdatesData.length === 0) {
// Build URL
let url = `${API_BASE_URL}${GTFS_TRIP_UPDATES_ENDPOINT}/${normalizedProvider}/`;
if (normalizedCategory) {
url += `?category=${normalizedCategory}`;
}
// Trailing slash already added
try {
// Download Protocol Buffer data
const response = await axios.get(url, { responseType: 'arraybuffer' });
// Parse Protocol Buffer data
const feed = GtfsRealtimeBindings.transit_realtime.FeedMessage.decode(
new Uint8Array(response.data)
);
// Convert to plain JavaScript object
tripUpdatesData = feed.entity.map(entity => {
if (!entity.tripUpdate) {
return null;
}
const tripUpdate = entity.tripUpdate;
return {
id: entity.id,
tripUpdate: {
trip: tripUpdate.trip ? {
tripId: tripUpdate.trip.tripId,
routeId: tripUpdate.trip.routeId,
directionId: tripUpdate.trip.directionId,
startTime: tripUpdate.trip.startTime,
startDate: tripUpdate.trip.startDate,
scheduleRelationship: tripUpdate.trip.scheduleRelationship,
} : undefined,
stopTimeUpdate: tripUpdate.stopTimeUpdate ? tripUpdate.stopTimeUpdate.map(update => ({
stopSequence: update.stopSequence,
stopId: update.stopId,
arrival: update.arrival ? {
delay: update.arrival.delay,
time: update.arrival.time ? new Date(typeof update.arrival.time === 'number' ? update.arrival.time * 1000 : (update.arrival.time as any).low * 1000).toISOString() : undefined,
uncertainty: update.arrival.uncertainty,
} : undefined,
departure: update.departure ? {
delay: update.departure.delay,
time: update.departure.time ? new Date(typeof update.departure.time === 'number' ? update.departure.time * 1000 : (update.departure.time as any).low * 1000).toISOString() : undefined,
uncertainty: update.departure.uncertainty,
} : undefined,
scheduleRelationship: update.scheduleRelationship,
})) : [],
timestamp: tripUpdate.timestamp ? new Date(typeof tripUpdate.timestamp === 'number' ? tripUpdate.timestamp * 1000 : (tripUpdate.timestamp as any).low * 1000).toISOString() : undefined,
delay: tripUpdate.delay,
}
};
}).filter(Boolean);
// Cache the result
gtfsCache.tripUpdates.set(cacheKey, {
data: tripUpdatesData,
timestamp: Date.now(),
});
} catch (error) {
// If trip updates are not available, set to empty array
tripUpdatesData = [];
// Still cache the empty result to avoid repeated failed requests
gtfsCache.tripUpdates.set(cacheKey, {
data: tripUpdatesData,
timestamp: Date.now(),
});
console.error(`Error fetching trip updates for ${provider}${category ? `, category: ${category}` : ''}:`, error);
}
}
// Get routes information for better display
const routes = gtfsStaticData.routes || [];
// Add arrivals to each stop
stopsWithArrivals = nearbyStops.map((stop: any) => {
// Find arrivals for this stop
const arrivalsForStop: any[] = [];
for (const entity of tripUpdatesData || []) {
if (!entity?.tripUpdate?.stopTimeUpdate) continue;
// Find updates for this stop
const stopUpdates = entity.tripUpdate.stopTimeUpdate.filter((update: any) =>
update.stopId === stop.stop_id
);
if (stopUpdates.length === 0) continue;
// Get route information
const route = routes.find((r: any) => r.route_id === entity.tripUpdate.trip?.routeId);
// Add to arrivals list
for (const update of stopUpdates) {
arrivalsForStop.push({
trip_id: entity.tripUpdate.trip?.tripId,
route_id: entity.tripUpdate.trip?.routeId,
route_short_name: route?.route_short_name,
route_long_name: route?.route_long_name,
direction_id: entity.tripUpdate.trip?.directionId,
arrival_time: update.arrival?.time,
arrival_delay: update.arrival?.delay,
departure_time: update.departure?.time,
departure_delay: update.departure?.delay,
stop_sequence: update.stopSequence,
schedule_relationship: update.scheduleRelationship,
});
}
}
// Sort by arrival time
arrivalsForStop.sort((a: any, b: any) => {
const timeA = a.arrival_time || a.departure_time || '';
const timeB = b.arrival_time || b.departure_time || '';
return timeA.localeCompare(timeB);
});
// Limit results
const limitedArrivals = arrivalsForStop.slice(0, arrivals_limit);
// Calculate time until arrival
const now = Date.now();
const arrivalsWithCountdown = limitedArrivals.map((arrival: any) => {
const arrivalTime = arrival.arrival_time ? new Date(arrival.arrival_time).getTime() : null;
const departureTime = arrival.departure_time ? new Date(arrival.departure_time).getTime() : null;
const nextTime = arrivalTime || departureTime;
let minutesUntil = null;
if (nextTime) {
minutesUntil = Math.round((nextTime - now) / (60 * 1000));
}
return {
...arrival,
minutes_until_arrival: minutesUntil,
};
});
return {
...stop,
upcoming_arrivals: arrivalsWithCountdown,
has_realtime_data: arrivalsWithCountdown.length > 0,
};
});
}
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: `Found ${stopsWithArrivals.length} stops near "${location}"`,
location,
coordinates,
provider,
category,
stops: stopsWithArrivals,
count: stopsWithArrivals.length,
include_arrivals,
current_time: new Date().toISOString(),
search_parameters: {
max_distance,
limit,
arrivals_limit: include_arrivals ? arrivals_limit : undefined,
},
note: stopsWithArrivals.some((s: any) => s.has_realtime_data) ? undefined : `No real-time arrival data available for these stops. ${REALTIME_DATA_NOTE}`,
}, null, 2),
},
],
};
} catch (error) {
// Check if it's an axios error with response data
const axiosError = error as any;
const statusCode = axiosError?.response?.status;
const responseData = axiosError?.response?.data;
// Try to parse the Buffer data if present
let parsedResponseData = responseData;
if (responseData && responseData.type === 'Buffer' && Array.isArray(responseData.data)) {
try {
const buffer = Buffer.from(responseData.data);
parsedResponseData = JSON.parse(buffer.toString());
} catch (parseError) {
console.error('Error parsing buffer data:', parseError);
}
}
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: 'Failed to search transit stops by location',
error: error instanceof Error ? error.message : 'Unknown error',
status_code: statusCode,
response_data: parsedResponseData,
location,
provider_info: {
provider: provider,
category: category,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES
},
suggestion: 'Make sure you are using a valid category for the provider. For Prasarana, use one of: ' + PRASARANA_CATEGORIES.join(', ') + '. For location-based searches, try adding more context like city or state name.'
}, null, 2),
},
],
};
}
}
);
// Find nearest transit stops
server.tool(
prefixToolName('find_nearest_transit_stops'),
'Find the nearest transit stops to a given location. IMPORTANT: Use this tool directly for queries like "Where is the nearest bus stop to my location?" or "How do I get to the nearest Rapid Penang bus stop?"',
{
provider: z.string().describe('Provider name (e.g., "mybas-johor", "ktmb", "prasarana", or common names like "rapid penang")'),
category: z.string().optional().describe('Category for Prasarana data (required only for prasarana provider)'),
latitude: z.number().describe('Latitude of the user\'s location'),
longitude: z.number().describe('Longitude of the user\'s location'),
limit: z.number().optional().describe('Maximum number of stops to return (default: 5)'),
max_distance: z.number().optional().describe('Maximum distance in kilometers (default: 5)'),
},
async ({ provider, category, latitude, longitude, limit = 5, max_distance = 5 }) => {
try {
// Normalize provider and category
const normalized = normalizeProviderAndCategory(provider, category);
// If there's an error, return it
if (normalized.error) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: normalized.error,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES,
common_names: Object.keys(PROVIDER_MAPPINGS),
example: normalized.provider === 'prasarana' ? {
provider: 'prasarana',
category: 'rapid-rail-kl'
} : undefined
}, null, 2),
},
],
};
}
// Use normalized values
const normalizedProvider = normalized.provider;
const normalizedCategory = normalized.category;
// Build cache key
const cacheKey = `${normalizedProvider}-${normalizedCategory || 'default'}`;
// Check if we have cached GTFS data
let gtfsData;
if (gtfsCache.static.has(cacheKey)) {
const cached = gtfsCache.static.get(cacheKey)!;
// Use cached data if not expired
if (Date.now() - cached.timestamp < STATIC_CACHE_EXPIRY) {
gtfsData = cached.data;
}
}
// If no cached data, fetch and parse GTFS data
if (!gtfsData) {
// Build URL
let url = `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${normalizedProvider}/`;
if (normalizedCategory) {
url += `?category=${normalizedCategory}`;
}
// Download ZIP file
const response = await axios.get(url, { responseType: 'arraybuffer' });
// Parse GTFS data
gtfsData = await parseGtfsStaticZip(Buffer.from(response.data));
// Cache the result
gtfsCache.static.set(cacheKey, {
data: gtfsData,
timestamp: Date.now(),
});
}
// Extract stops data
const stops = gtfsData.stops || [];
// Calculate distance for each stop
const stopsWithDistance = stops.map((stop: any) => {
// Skip stops without lat/lon
if (!stop.stop_lat || !stop.stop_lon) {
return { ...stop, distance: Infinity };
}
// Calculate distance using Haversine formula
const stopLat = parseFloat(stop.stop_lat);
const stopLon = parseFloat(stop.stop_lon);
// Haversine formula
const R = 6371; // Earth radius in km
const dLat = (stopLat - latitude) * Math.PI / 180;
const dLon = (stopLon - longitude) * Math.PI / 180;
const a =
Math.sin(dLat/2) * Math.sin(dLat/2) +
Math.cos(latitude * Math.PI / 180) * Math.cos(stopLat * Math.PI / 180) *
Math.sin(dLon/2) * Math.sin(dLon/2);
const c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a));
const distance = R * c; // Distance in km
return { ...stop, distance };
});
// Filter by max distance and sort by distance
const nearestStops = stopsWithDistance
.filter((stop: any) => stop.distance <= max_distance)
.sort((a: any, b: any) => a.distance - b.distance)
.slice(0, limit);
// Format distances to be more readable
const formattedStops = nearestStops.map((stop: any) => ({
...stop,
distance_km: parseFloat(stop.distance.toFixed(2)),
distance_m: parseFloat((stop.distance * 1000).toFixed(0)),
}));
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: `Successfully found nearest stops for provider: ${provider}${category ? `, category: ${category}` : ''}`,
data: formattedStops,
count: formattedStops.length,
user_location: { latitude, longitude },
provider_info: { provider, category },
note: REALTIME_DATA_NOTE,
}, null, 2),
},
],
};
} catch (error) {
// Check if it's an axios error with response data
const axiosError = error as any;
const statusCode = axiosError?.response?.status;
const responseData = axiosError?.response?.data;
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: false,
message: 'Failed to find nearest transit stops',
error: error instanceof Error ? error.message : 'Unknown error',
status_code: statusCode,
api_url: `${API_BASE_URL}${GTFS_STATIC_ENDPOINT}/${provider}${category ? `?category=${category}` : ''}`,
response_data: responseData,
provider_info: {
provider: provider,
category: category,
valid_providers: VALID_PROVIDERS,
valid_categories: PRASARANA_CATEGORIES
},
note: COMBINED_ERROR_NOTE,
}, null, 2),
},
],
};
}
}
);
}
```