edgardamasceno-dev/ldoce-mcp-server # codebase.md

# Directory Structure

```
├── .gitignore
├── Dockerfile
├── package-lock.json
├── package.json
├── README.md
├── smithery.yaml
├── src
│   └── index.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
# Dependencies
node_modules/
package-lock.json

# Build outputs
build/
dist/
*.tsbuildinfo

# Logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Environment variables
.env*
.env.local
.env.development.local
.env.test.local
.env.production.local

# IDE
.idea/
.vscode/
*.swp
*.swo
.DS_Store

# Test coverage
coverage/
.nyc_output/

# Temporary files
*.tmp
*.temp
.cache/
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
# Ldoce MCP Server
[![smithery badge](https://smithery.ai/badge/@edgardamasceno-dev/ldoce-mcp-server)](https://smithery.ai/server/@edgardamasceno-dev/ldoce-mcp-server)


Este é um MCP server desenvolvido em Node.js e TypeScript que consome a página do Longman Dictionary para uma determinada palavra e retorna os dados extraídos no formato JSON padronizado para uso por agentes de IA.

## Descrição

O servidor se conecta à URL `https://www.ldoceonline.com/dictionary/<word>`, extrai informações como a introdução, tópicos relacionados, entradas (verb e noun), corpus examples e origem, e retorna esses dados estruturados em um objeto JSON. O projeto segue os padrões do Model Context Protocol (MCP) e utiliza os pacotes Axios e Cheerio para requisições HTTP e parsing de HTML.

## Recursos

- **Extrai informações do Longman Dictionary:**
  - Introdução e tópicos relacionados
  - Entradas com detalhes de pronúncias, sentidos, exemplos, etc.
  - Corpus examples
  - Origem da palavra

- **Utiliza MCP SDK para expor uma ferramenta** que pode ser integrada a clientes MCP, como o Claude Desktop.

## Pré-requisitos

- Node.js (versão 16 ou superior)
- npm
- Git

## Instalação

### Installing via Smithery

To install Ldoce Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@edgardamasceno-dev/ldoce-mcp-server):

```bash
npx -y @smithery/cli install @edgardamasceno-dev/ldoce-mcp-server --client claude
```

### Manual Installation
1. Clone o repositório:
   ```bash
   git clone https://github.com/seuusuario/ldoce-mcp-server.git
   cd ldoce-mcp-server

```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
{
  "compilerOptions": {
    "target": "ES2022",
    "module": "Node16",
    "moduleResolution": "Node16",
    "outDir": "./build",
    "rootDir": "./src",
    "strict": true,
    "esModuleInterop": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules"]
}
```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
# Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
FROM node:lts-alpine

# Create app directory
WORKDIR /usr/src/app

# Install app dependencies
COPY package.json package-lock.json ./
RUN npm install --ignore-scripts

# Bundle app source
COPY . .

# Build the project
RUN npm run build

# Expose any ports if needed (MCP typically uses stdio, so not required here)

# Start the MCP server
CMD [ "node", "build/index.js" ]

```

--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------

```yaml
# Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml

startCommand:
  type: stdio
  configSchema:
    # JSON Schema defining the configuration options for the MCP.
    type: object
    properties: {}
    description: No configuration needed.
  commandFunction:
    # A JS function that produces the CLI command based on the given config to start the MCP on stdio.
    |-
    (config) => ({ command: 'node', args: ['build/index.js'] })
  exampleConfig: {}

```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
{
  "name": "ldoce-mcp-server",
  "version": "0.1.0",
  "description": "A Model Context Protocol server",
  "type": "module",
  "bin": {
    "ldoce-mcp-server": "./build/index.js"
  },
  "files": [
    "build"
  ],
  "private": true,
  "scripts": {
    "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"",
    "prepare": "npm run build",
    "watch": "tsc --watch",
    "inspector": "npx @modelcontextprotocol/inspector build/index.js"
  },
  "keywords": [],
  "author": "",
  "license": "ISC",
  "dependencies": {
    "@modelcontextprotocol/sdk": "^1.7.0",
    "axios": "^1.8.3",
    "cheerio": "^1.0.0",
    "phantomjs-prebuilt": "^2.1.16",
    "turndown": "^7.2.0"
  },
  "devDependencies": {
    "@types/cheerio": "^0.22.35",
    "@types/node": "^22.13.10",
    "@types/turndown": "^5.0.5",
    "ts-node": "^10.9.2",
    "typescript": "^5.8.2"
  }
}

```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
  CallToolRequestSchema,
  ListToolsRequestSchema,
  McpError,
  ErrorCode,
} from '@modelcontextprotocol/sdk/types.js';
import axios from 'axios';
import * as cheerio from 'cheerio';

/**
 * Estruturas de dados finais (semelhantes às que você pediu)
 */

// Exemplo de JSON final:
//
// {
//   "dictionaryEntries": [ ... ],
//   "simpleForm": { ... },
//   "continuousForm": { ... }
// }

interface DictionaryExample {
  text: string;
  audioUrl?: string;
}

interface DictionarySense {
  number?: number;
  grammar?: string;
  activation?: string;
  definition?: string | { text: string; url: string };
  examples?: DictionaryExample[];
}

interface DictionaryParsedEntry {
  word: string;           // ex.: "rot"
  pronunciation: string;  // ex.: "/rɒt/ (US: rɑːt)"
  partOfSpeech: string;   // ex.: "verb", "noun", etc.
  inflections: string[];  // ex.: ["rotted", "rotting"]
  relatedTopics: string[]; // ex.: ["Biology"]
  senses: DictionarySense[];
}

interface ConjugationTable {
  [tense: string]: {
    [subject: string]: string;
  };
}

interface FinalDictionaryJson {
  dictionaryEntries: DictionaryParsedEntry[];
  simpleForm: ConjugationTable;
  continuousForm: ConjugationTable;
}

/** 
 * Função principal que extrai e retorna o JSON final 
 * conforme o formato solicitado.
 */
async function fetchDictionaryData(word: string): Promise<FinalDictionaryJson> {
  const url = `https://www.ldoceonline.com/dictionary/${encodeURIComponent(word)}`;

  const { data: html } = await axios.get(url, {
    timeout: 10000,
    headers: {
      'User-Agent': 'Mozilla/5.0 (compatible; MCP-Server/0.1.0)',
    },
  });

  const $ = cheerio.load(html);

  // ==========================
  // 1) Extrair .dictentry (as entradas do dicionário)
  // ==========================
  const dictionaryEntries: DictionaryParsedEntry[] = [];
  
  // Para cada <span class="dictentry">...
  $('span.dictentry').each((_, dictentryEl) => {
    const dictentry = $(dictentryEl);

    // Dentro dele, encontramos .ldoceEntry.Entry
    const ldoceEntryEl = dictentry.find('.ldoceEntry.Entry').first();
    if (!ldoceEntryEl || ldoceEntryEl.length === 0) {
      return; // pula se não achar
    }

    // Extrair "relatedTopics"
    const relatedTopics: string[] = [];
    ldoceEntryEl.find('.topics_container a.topic').each((_, topicEl) => {
      relatedTopics.push($(topicEl).text().trim());
    });

    // Extrair "head" (palavra, pronúncia, etc.)
    // Pode ser .frequent.Head ou .Head
    const headEl = ldoceEntryEl.find('.frequent.Head, .Head').first();
    const extractedWord = headEl.find('.HWD').text().trim() || word;
    const hyphenation = headEl.find('.HYPHENATION').text().trim() || '';
    const homnum = headEl.find('.HOMNUM').text().trim() || '';
    const pos = headEl.find('.POS').text().trim() || '';
    
    // Pronúncia britânica e americana
    const brit = headEl.find('span.brefile').attr('data-src-mp3');
    const ame = headEl.find('span.amefile').attr('data-src-mp3');

    // Ou extrair do .PronCodes:
    let textPron = '';
    const pronCodes = headEl.find('.PronCodes').first();
    if (pronCodes && pronCodes.length > 0) {
      // Montamos algo tipo "/rɒt/ (US: rɑːt)"
      const pronSpans = pronCodes.find('span.PRON, span.AMEVARPRON, span.neutral');
      let collected = '';
      pronSpans.each((i, elSpan) => {
        collected += $(elSpan).text();
      });
      textPron = collected.trim();
    }

    // Se preferir simplificar: "/rɒt/ (US: rɑːt)"
    // ex: textPron = "/rɒt/ $ rɑːt/"
    // convert $ -> (US:)
    textPron = textPron.replace(/\s*\$\s*/g, '(US: ').replace(/\/\s*$/, '/)').replace(/\)\)/, ')');
    if (!textPron.includes('(US:') && textPron.endsWith('/)')) {
      textPron = textPron.replace('/)', '/');
    }

    // Inflections (ex. (rotted, rotting))
    const inflectionsText = headEl.find('.Inflections').text().trim();
    // ex. "(rotted, rotting)"
    let inflections: string[] = [];
    if (inflectionsText) {
      // remove parênteses
      const inf = inflectionsText.replace(/[()]/g, '');
      // separa por vírgula
      inflections = inf.split(',').map(s => s.trim()).filter(Boolean);
    }

    // 2) Extrair "senses"
    const senses: DictionarySense[] = [];
    ldoceEntryEl.find('.Sense').each((_, senseEl) => {
      const sense = $(senseEl);
      const number = Number.parseInt(sense.find('.sensenum').first().text().trim(), 10) || undefined;
      const grammar = sense.find('.GRAM').text().trim() || undefined;
      const activation = sense.find('.ACTIV').text().trim() || undefined;

      // "Definition" pode ser um texto normal ou algo do tipo "(→ rot in hell/jail)"
      const definitionText = sense.find('.DEF').text().trim();
      let definitionObj: string | { text: string; url: string } = definitionText;

      // Se a definition for algo tipo "(→ rot in hell/jail)",
      // transformamos em { text: "🔗 rot in hell/jail", url: ... }
      // Precisamos ver se há link .Crossref ou algo do tipo
      if (!definitionText && sense.find('.Crossref a').length > 0) {
        // ex: "rot in hell/jail"
        const crossLink = sense.find('.Crossref a').first();
        const crossText = crossLink.text().trim();
        const crossHref = crossLink.attr('href');
        if (crossText && crossHref) {
          definitionObj = {
            text: `🔗 ${crossText}`,
            url: `https://www.ldoceonline.com${crossHref}`
          };
        }
      }

      // se for algo como a .DEF vem só com → e link
      // ex: " → rot in hell/jail"
      if (definitionText.startsWith('→')) {
        // Tentar extrair a link
        const crossLink = sense.find('.Crossref a').first();
        if (crossLink && crossLink.length > 0) {
          const crossText = crossLink.text().trim();
          const crossHref = crossLink.attr('href');
          definitionObj = {
            text: `🔗 ${crossText}`,
            url: `https://www.ldoceonline.com${crossHref}`
          };
        } else {
          definitionObj = definitionText;
        }
      }

      // Se a .DEF tiver link <a>, substituímos trechos "decay" e "gradual" etc?
      // Faremos simples, manteremos o text.
      // 3) Extrair EXAMPLE
      const examples: DictionaryExample[] = [];
      sense.find('.EXAMPLE').each((_, exEl) => {
        const ex = $(exEl);
        const text = ex.text().trim();
        // pegar audio se houver
        let audioUrl = ex.find('.speaker.exafile').attr('data-src-mp3');
        if (!audioUrl) {
          // ou exafile
          audioUrl = ex.find('.speaker').attr('data-src-mp3') || undefined;
        }
        examples.push({
          text,
          audioUrl
        });
      });

      senses.push({
        number,
        grammar: grammar || undefined,
        activation: activation || undefined,
        definition: definitionObj,
        examples
      });
    });

    dictionaryEntries.push({
      word,
      pronunciation: textPron || '',
      partOfSpeech: pos || '',
      inflections,
      relatedTopics,
      senses
    });
  });

  // ==========================
  // 3) Extrair a Tabela (Verb table) -> simpleForm e continuousForm
  // ==========================
  // A tabela fica dentro de <div class="verbTable"> no snippet.
  // Precisamos de .simpleForm e .continuousForm
  const simpleForm: ConjugationTable = {};
  const continuousForm: ConjugationTable = {};

  // Achar <div class="verbTable">
  const verbTableEl = $('.verbTable').first();
  if (verbTableEl && verbTableEl.length > 0) {
    // ============ SIMPLE FORM ============
    const simpleFormEl = verbTableEl.find('table.simpleForm').first();
    if (simpleFormEl && simpleFormEl.length > 0) {
      parseConjugationTable(simpleFormEl, simpleForm);
    }

    // ============ CONTINUOUS FORM ============
    const continuousFormEl = verbTableEl.find('table.continuousForm').first();
    if (continuousFormEl && continuousFormEl.length > 0) {
      parseConjugationTable(continuousFormEl, continuousForm);
    }
  }

  // Montamos o objeto final
  const finalJson: FinalDictionaryJson = {
    dictionaryEntries,
    simpleForm,
    continuousForm
  };

  return finalJson;
}

/**
 * Função auxiliar que extrai as conjugações de um <table> (ex.: "simpleForm")
 * e preenche o objeto de forma { Tense: { "I / you / we / they": "rot", ... } }
 */
/**
 * Função auxiliar que extrai as conjugações de um <table> (ex.: "simpleForm")
 * e preenche o objeto de forma { Tense: { "I / you / we / they": "rot", ... } }
 */
function parseConjugationTable(
    tableEl: cheerio.Cheerio,
    tableObj: ConjugationTable
  ) {
    const $table = cheerio.load(tableEl.html() || '');
    let currentTense = ''; // Ex.: "Present", "Past", etc.
  
    $table('tr').each((_, trEl) => {
      const tr = $table(trEl);
  
      // Verifica se é um header
      const header = tr.find('td.header').text().trim();
      if (header) {
        return;
      }
  
      if (tr.find('td.view_more, td.view_less').length > 0) {
        return;
      }
  
      // Se tiver <td class="col1">, assumimos que é um Tense
      const col1Value = tr.find('td.col1').text().trim();
      if (col1Value) {
        currentTense = col1Value;
        if (!tableObj[currentTense]) {
          tableObj[currentTense] = {};
        }
        return;
      }
  
      // senão, pegamos as colunas .col2 e interpretamos "subject" e "verbForm"
      const col2First = tr.find('td.firsts.col2, td.col2').first();
      const subject = col2First.text().trim();
  
      const col2Second = tr.find('td.col2').last();
      const verbForm = col2Second.text().trim();
  
      // Armazenamos no objeto
      if (currentTense && subject) {
        tableObj[currentTense][subject] = verbForm;
      }
    });
  }

/* =======================
   MCP Server
   ======================= */
class LdoceMcpServer {
  private server: Server;

  constructor() {
    console.error('[Setup] Initializing MCP server with JSON output...');
    this.server = new Server(
      {
        name: 'ldoce-json-server',
        id: 'ldoce-json-server',
        version: '0.1.0',
      },
      { capabilities: { tools: {} } }
    );

    this.setupToolHandlers();
    this.server.onerror = (error) => console.error('[Error]', error);
    // biome-ignore lint/suspicious/noExplicitAny: <explanation>
    this.server.onclose = (error?: any) => {
      console.error('[Server] Connection closed', error);
      process.exit(0);
    };
    process.on('SIGINT', async () => {
      await this.server.close();
      process.exit(0);
    });
  }

  private async setupToolHandlers() {
    // Handler para listar as ferramentas disponíveis
    this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
      tools: [
        {
          name: 'get_dictionary_entry',
          description: 'Busca o HTML do Longman para uma palavra e retorna JSON parseado (dictionaryEntries, simpleForm, continuousForm)',
          inputSchema: {
            type: 'object',
            properties: {
              word: {
                type: 'string',
                description: 'A palavra a ser consultada (ex: rot)',
              },
            },
            required: ['word'],
          },
        },
      ],
    }));

    // Handler para a ferramenta get_dictionary_entry
    this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
      try {
        if (request.params.name !== 'get_dictionary_entry') {
          throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}`);
        }
        const args = request.params.arguments as { word: string };
        if (!args.word) {
          throw new McpError(ErrorCode.InvalidParams, '"word" parameter is required.');
        }

        console.error(`[API] Searching dictionary data for word: ${args.word}`);

        // Busca o JSON extraído
        const finalJson = await fetchDictionaryData(args.word);

        // Retorna no "content" do MCP
        // Observação: finalJson é objeto, precisamos serializar para string
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(finalJson, null, 2),
            },
          ],
        };
      } catch (error: unknown) {
        if (error instanceof Error) {
          console.error('[Error] Failed to fetch entry:', error.message);
          throw new McpError(ErrorCode.InternalError, `Falha ao buscar a entrada: ${error.message}`);
        }
        console.error('[Error] Unknown error occurred');
        throw new McpError(ErrorCode.InternalError, 'Falha ao buscar a entrada: Unknown error');
      }
    });
  }

  async run() {
    const transport = new StdioServerTransport();
    await this.server.connect(transport);
    console.error('Ldoce JSON server running via stdio');
  }
}

// Executar o servidor
const serverInstance = new LdoceMcpServer();
serverInstance.run().catch(console.error);

```