edgardamasceno-dev/ldoce-mcp-server # codebase.md

# Directory Structure

```
├── .gitignore
├── Dockerfile
├── package-lock.json
├── package.json
├── README.md
├── smithery.yaml
├── src
│   └── index.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
 1 | # Dependencies
 2 | node_modules/
 3 | package-lock.json
 4 | 
 5 | # Build outputs
 6 | build/
 7 | dist/
 8 | *.tsbuildinfo
 9 | 
10 | # Logs
11 | *.log
12 | npm-debug.log*
13 | yarn-debug.log*
14 | yarn-error.log*
15 | 
16 | # Environment variables
17 | .env*
18 | .env.local
19 | .env.development.local
20 | .env.test.local
21 | .env.production.local
22 | 
23 | # IDE
24 | .idea/
25 | .vscode/
26 | *.swp
27 | *.swo
28 | .DS_Store
29 | 
30 | # Test coverage
31 | coverage/
32 | .nyc_output/
33 | 
34 | # Temporary files
35 | *.tmp
36 | *.temp
37 | .cache/
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Ldoce MCP Server
 2 | [![smithery badge](https://smithery.ai/badge/@edgardamasceno-dev/ldoce-mcp-server)](https://smithery.ai/server/@edgardamasceno-dev/ldoce-mcp-server)
 3 | 
 4 | 
 5 | Este é um MCP server desenvolvido em Node.js e TypeScript que consome a página do Longman Dictionary para uma determinada palavra e retorna os dados extraídos no formato JSON padronizado para uso por agentes de IA.
 6 | 
 7 | ## Descrição
 8 | 
 9 | O servidor se conecta à URL `https://www.ldoceonline.com/dictionary/<word>`, extrai informações como a introdução, tópicos relacionados, entradas (verb e noun), corpus examples e origem, e retorna esses dados estruturados em um objeto JSON. O projeto segue os padrões do Model Context Protocol (MCP) e utiliza os pacotes Axios e Cheerio para requisições HTTP e parsing de HTML.
10 | 
11 | ## Recursos
12 | 
13 | - **Extrai informações do Longman Dictionary:**
14 |   - Introdução e tópicos relacionados
15 |   - Entradas com detalhes de pronúncias, sentidos, exemplos, etc.
16 |   - Corpus examples
17 |   - Origem da palavra
18 | 
19 | - **Utiliza MCP SDK para expor uma ferramenta** que pode ser integrada a clientes MCP, como o Claude Desktop.
20 | 
21 | ## Pré-requisitos
22 | 
23 | - Node.js (versão 16 ou superior)
24 | - npm
25 | - Git
26 | 
27 | ## Instalação
28 | 
29 | ### Installing via Smithery
30 | 
31 | To install Ldoce Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@edgardamasceno-dev/ldoce-mcp-server):
32 | 
33 | ```bash
34 | npx -y @smithery/cli install @edgardamasceno-dev/ldoce-mcp-server --client claude
35 | ```
36 | 
37 | ### Manual Installation
38 | 1. Clone o repositório:
39 |    ```bash
40 |    git clone https://github.com/seuusuario/ldoce-mcp-server.git
41 |    cd ldoce-mcp-server
42 | 
```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "Node16",
 5 |     "moduleResolution": "Node16",
 6 |     "outDir": "./build",
 7 |     "rootDir": "./src",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true
12 |   },
13 |   "include": ["src/**/*"],
14 |   "exclude": ["node_modules"]
15 | }
```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | FROM node:lts-alpine
 3 | 
 4 | # Create app directory
 5 | WORKDIR /usr/src/app
 6 | 
 7 | # Install app dependencies
 8 | COPY package.json package-lock.json ./
 9 | RUN npm install --ignore-scripts
10 | 
11 | # Bundle app source
12 | COPY . .
13 | 
14 | # Build the project
15 | RUN npm run build
16 | 
17 | # Expose any ports if needed (MCP typically uses stdio, so not required here)
18 | 
19 | # Start the MCP server
20 | CMD [ "node", "build/index.js" ]
21 | 
```

--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     properties: {}
 9 |     description: No configuration needed.
10 |   commandFunction:
11 |     # A JS function that produces the CLI command based on the given config to start the MCP on stdio.
12 |     |-
13 |     (config) => ({ command: 'node', args: ['build/index.js'] })
14 |   exampleConfig: {}
15 | 
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "ldoce-mcp-server",
 3 |   "version": "0.1.0",
 4 |   "description": "A Model Context Protocol server",
 5 |   "type": "module",
 6 |   "bin": {
 7 |     "ldoce-mcp-server": "./build/index.js"
 8 |   },
 9 |   "files": [
10 |     "build"
11 |   ],
12 |   "private": true,
13 |   "scripts": {
14 |     "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"",
15 |     "prepare": "npm run build",
16 |     "watch": "tsc --watch",
17 |     "inspector": "npx @modelcontextprotocol/inspector build/index.js"
18 |   },
19 |   "keywords": [],
20 |   "author": "",
21 |   "license": "ISC",
22 |   "dependencies": {
23 |     "@modelcontextprotocol/sdk": "^1.7.0",
24 |     "axios": "^1.8.3",
25 |     "cheerio": "^1.0.0",
26 |     "phantomjs-prebuilt": "^2.1.16",
27 |     "turndown": "^7.2.0"
28 |   },
29 |   "devDependencies": {
30 |     "@types/cheerio": "^0.22.35",
31 |     "@types/node": "^22.13.10",
32 |     "@types/turndown": "^5.0.5",
33 |     "ts-node": "^10.9.2",
34 |     "typescript": "^5.8.2"
35 |   }
36 | }
37 | 
```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
  1 | #!/usr/bin/env node
  2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
  3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
  4 | import {
  5 |   CallToolRequestSchema,
  6 |   ListToolsRequestSchema,
  7 |   McpError,
  8 |   ErrorCode,
  9 | } from '@modelcontextprotocol/sdk/types.js';
 10 | import axios from 'axios';
 11 | import * as cheerio from 'cheerio';
 12 | 
 13 | /**
 14 |  * Estruturas de dados finais (semelhantes às que você pediu)
 15 |  */
 16 | 
 17 | // Exemplo de JSON final:
 18 | //
 19 | // {
 20 | //   "dictionaryEntries": [ ... ],
 21 | //   "simpleForm": { ... },
 22 | //   "continuousForm": { ... }
 23 | // }
 24 | 
 25 | interface DictionaryExample {
 26 |   text: string;
 27 |   audioUrl?: string;
 28 | }
 29 | 
 30 | interface DictionarySense {
 31 |   number?: number;
 32 |   grammar?: string;
 33 |   activation?: string;
 34 |   definition?: string | { text: string; url: string };
 35 |   examples?: DictionaryExample[];
 36 | }
 37 | 
 38 | interface DictionaryParsedEntry {
 39 |   word: string;           // ex.: "rot"
 40 |   pronunciation: string;  // ex.: "/rɒt/ (US: rɑːt)"
 41 |   partOfSpeech: string;   // ex.: "verb", "noun", etc.
 42 |   inflections: string[];  // ex.: ["rotted", "rotting"]
 43 |   relatedTopics: string[]; // ex.: ["Biology"]
 44 |   senses: DictionarySense[];
 45 | }
 46 | 
 47 | interface ConjugationTable {
 48 |   [tense: string]: {
 49 |     [subject: string]: string;
 50 |   };
 51 | }
 52 | 
 53 | interface FinalDictionaryJson {
 54 |   dictionaryEntries: DictionaryParsedEntry[];
 55 |   simpleForm: ConjugationTable;
 56 |   continuousForm: ConjugationTable;
 57 | }
 58 | 
 59 | /** 
 60 |  * Função principal que extrai e retorna o JSON final 
 61 |  * conforme o formato solicitado.
 62 |  */
 63 | async function fetchDictionaryData(word: string): Promise<FinalDictionaryJson> {
 64 |   const url = `https://www.ldoceonline.com/dictionary/${encodeURIComponent(word)}`;
 65 | 
 66 |   const { data: html } = await axios.get(url, {
 67 |     timeout: 10000,
 68 |     headers: {
 69 |       'User-Agent': 'Mozilla/5.0 (compatible; MCP-Server/0.1.0)',
 70 |     },
 71 |   });
 72 | 
 73 |   const $ = cheerio.load(html);
 74 | 
 75 |   // ==========================
 76 |   // 1) Extrair .dictentry (as entradas do dicionário)
 77 |   // ==========================
 78 |   const dictionaryEntries: DictionaryParsedEntry[] = [];
 79 |   
 80 |   // Para cada <span class="dictentry">...
 81 |   $('span.dictentry').each((_, dictentryEl) => {
 82 |     const dictentry = $(dictentryEl);
 83 | 
 84 |     // Dentro dele, encontramos .ldoceEntry.Entry
 85 |     const ldoceEntryEl = dictentry.find('.ldoceEntry.Entry').first();
 86 |     if (!ldoceEntryEl || ldoceEntryEl.length === 0) {
 87 |       return; // pula se não achar
 88 |     }
 89 | 
 90 |     // Extrair "relatedTopics"
 91 |     const relatedTopics: string[] = [];
 92 |     ldoceEntryEl.find('.topics_container a.topic').each((_, topicEl) => {
 93 |       relatedTopics.push($(topicEl).text().trim());
 94 |     });
 95 | 
 96 |     // Extrair "head" (palavra, pronúncia, etc.)
 97 |     // Pode ser .frequent.Head ou .Head
 98 |     const headEl = ldoceEntryEl.find('.frequent.Head, .Head').first();
 99 |     const extractedWord = headEl.find('.HWD').text().trim() || word;
100 |     const hyphenation = headEl.find('.HYPHENATION').text().trim() || '';
101 |     const homnum = headEl.find('.HOMNUM').text().trim() || '';
102 |     const pos = headEl.find('.POS').text().trim() || '';
103 |     
104 |     // Pronúncia britânica e americana
105 |     const brit = headEl.find('span.brefile').attr('data-src-mp3');
106 |     const ame = headEl.find('span.amefile').attr('data-src-mp3');
107 | 
108 |     // Ou extrair do .PronCodes:
109 |     let textPron = '';
110 |     const pronCodes = headEl.find('.PronCodes').first();
111 |     if (pronCodes && pronCodes.length > 0) {
112 |       // Montamos algo tipo "/rɒt/ (US: rɑːt)"
113 |       const pronSpans = pronCodes.find('span.PRON, span.AMEVARPRON, span.neutral');
114 |       let collected = '';
115 |       pronSpans.each((i, elSpan) => {
116 |         collected += $(elSpan).text();
117 |       });
118 |       textPron = collected.trim();
119 |     }
120 | 
121 |     // Se preferir simplificar: "/rɒt/ (US: rɑːt)"
122 |     // ex: textPron = "/rɒt/ $ rɑːt/"
123 |     // convert $ -> (US:)
124 |     textPron = textPron.replace(/\s*\$\s*/g, '(US: ').replace(/\/\s*$/, '/)').replace(/\)\)/, ')');
125 |     if (!textPron.includes('(US:') && textPron.endsWith('/)')) {
126 |       textPron = textPron.replace('/)', '/');
127 |     }
128 | 
129 |     // Inflections (ex. (rotted, rotting))
130 |     const inflectionsText = headEl.find('.Inflections').text().trim();
131 |     // ex. "(rotted, rotting)"
132 |     let inflections: string[] = [];
133 |     if (inflectionsText) {
134 |       // remove parênteses
135 |       const inf = inflectionsText.replace(/[()]/g, '');
136 |       // separa por vírgula
137 |       inflections = inf.split(',').map(s => s.trim()).filter(Boolean);
138 |     }
139 | 
140 |     // 2) Extrair "senses"
141 |     const senses: DictionarySense[] = [];
142 |     ldoceEntryEl.find('.Sense').each((_, senseEl) => {
143 |       const sense = $(senseEl);
144 |       const number = Number.parseInt(sense.find('.sensenum').first().text().trim(), 10) || undefined;
145 |       const grammar = sense.find('.GRAM').text().trim() || undefined;
146 |       const activation = sense.find('.ACTIV').text().trim() || undefined;
147 | 
148 |       // "Definition" pode ser um texto normal ou algo do tipo "(→ rot in hell/jail)"
149 |       const definitionText = sense.find('.DEF').text().trim();
150 |       let definitionObj: string | { text: string; url: string } = definitionText;
151 | 
152 |       // Se a definition for algo tipo "(→ rot in hell/jail)",
153 |       // transformamos em { text: "🔗 rot in hell/jail", url: ... }
154 |       // Precisamos ver se há link .Crossref ou algo do tipo
155 |       if (!definitionText && sense.find('.Crossref a').length > 0) {
156 |         // ex: "rot in hell/jail"
157 |         const crossLink = sense.find('.Crossref a').first();
158 |         const crossText = crossLink.text().trim();
159 |         const crossHref = crossLink.attr('href');
160 |         if (crossText && crossHref) {
161 |           definitionObj = {
162 |             text: `🔗 ${crossText}`,
163 |             url: `https://www.ldoceonline.com${crossHref}`
164 |           };
165 |         }
166 |       }
167 | 
168 |       // se for algo como a .DEF vem só com → e link
169 |       // ex: " → rot in hell/jail"
170 |       if (definitionText.startsWith('→')) {
171 |         // Tentar extrair a link
172 |         const crossLink = sense.find('.Crossref a').first();
173 |         if (crossLink && crossLink.length > 0) {
174 |           const crossText = crossLink.text().trim();
175 |           const crossHref = crossLink.attr('href');
176 |           definitionObj = {
177 |             text: `🔗 ${crossText}`,
178 |             url: `https://www.ldoceonline.com${crossHref}`
179 |           };
180 |         } else {
181 |           definitionObj = definitionText;
182 |         }
183 |       }
184 | 
185 |       // Se a .DEF tiver link <a>, substituímos trechos "decay" e "gradual" etc?
186 |       // Faremos simples, manteremos o text.
187 |       // 3) Extrair EXAMPLE
188 |       const examples: DictionaryExample[] = [];
189 |       sense.find('.EXAMPLE').each((_, exEl) => {
190 |         const ex = $(exEl);
191 |         const text = ex.text().trim();
192 |         // pegar audio se houver
193 |         let audioUrl = ex.find('.speaker.exafile').attr('data-src-mp3');
194 |         if (!audioUrl) {
195 |           // ou exafile
196 |           audioUrl = ex.find('.speaker').attr('data-src-mp3') || undefined;
197 |         }
198 |         examples.push({
199 |           text,
200 |           audioUrl
201 |         });
202 |       });
203 | 
204 |       senses.push({
205 |         number,
206 |         grammar: grammar || undefined,
207 |         activation: activation || undefined,
208 |         definition: definitionObj,
209 |         examples
210 |       });
211 |     });
212 | 
213 |     dictionaryEntries.push({
214 |       word,
215 |       pronunciation: textPron || '',
216 |       partOfSpeech: pos || '',
217 |       inflections,
218 |       relatedTopics,
219 |       senses
220 |     });
221 |   });
222 | 
223 |   // ==========================
224 |   // 3) Extrair a Tabela (Verb table) -> simpleForm e continuousForm
225 |   // ==========================
226 |   // A tabela fica dentro de <div class="verbTable"> no snippet.
227 |   // Precisamos de .simpleForm e .continuousForm
228 |   const simpleForm: ConjugationTable = {};
229 |   const continuousForm: ConjugationTable = {};
230 | 
231 |   // Achar <div class="verbTable">
232 |   const verbTableEl = $('.verbTable').first();
233 |   if (verbTableEl && verbTableEl.length > 0) {
234 |     // ============ SIMPLE FORM ============
235 |     const simpleFormEl = verbTableEl.find('table.simpleForm').first();
236 |     if (simpleFormEl && simpleFormEl.length > 0) {
237 |       parseConjugationTable(simpleFormEl, simpleForm);
238 |     }
239 | 
240 |     // ============ CONTINUOUS FORM ============
241 |     const continuousFormEl = verbTableEl.find('table.continuousForm').first();
242 |     if (continuousFormEl && continuousFormEl.length > 0) {
243 |       parseConjugationTable(continuousFormEl, continuousForm);
244 |     }
245 |   }
246 | 
247 |   // Montamos o objeto final
248 |   const finalJson: FinalDictionaryJson = {
249 |     dictionaryEntries,
250 |     simpleForm,
251 |     continuousForm
252 |   };
253 | 
254 |   return finalJson;
255 | }
256 | 
257 | /**
258 |  * Função auxiliar que extrai as conjugações de um <table> (ex.: "simpleForm")
259 |  * e preenche o objeto de forma { Tense: { "I / you / we / they": "rot", ... } }
260 |  */
261 | /**
262 |  * Função auxiliar que extrai as conjugações de um <table> (ex.: "simpleForm")
263 |  * e preenche o objeto de forma { Tense: { "I / you / we / they": "rot", ... } }
264 |  */
265 | function parseConjugationTable(
266 |     tableEl: cheerio.Cheerio,
267 |     tableObj: ConjugationTable
268 |   ) {
269 |     const $table = cheerio.load(tableEl.html() || '');
270 |     let currentTense = ''; // Ex.: "Present", "Past", etc.
271 |   
272 |     $table('tr').each((_, trEl) => {
273 |       const tr = $table(trEl);
274 |   
275 |       // Verifica se é um header
276 |       const header = tr.find('td.header').text().trim();
277 |       if (header) {
278 |         return;
279 |       }
280 |   
281 |       if (tr.find('td.view_more, td.view_less').length > 0) {
282 |         return;
283 |       }
284 |   
285 |       // Se tiver <td class="col1">, assumimos que é um Tense
286 |       const col1Value = tr.find('td.col1').text().trim();
287 |       if (col1Value) {
288 |         currentTense = col1Value;
289 |         if (!tableObj[currentTense]) {
290 |           tableObj[currentTense] = {};
291 |         }
292 |         return;
293 |       }
294 |   
295 |       // senão, pegamos as colunas .col2 e interpretamos "subject" e "verbForm"
296 |       const col2First = tr.find('td.firsts.col2, td.col2').first();
297 |       const subject = col2First.text().trim();
298 |   
299 |       const col2Second = tr.find('td.col2').last();
300 |       const verbForm = col2Second.text().trim();
301 |   
302 |       // Armazenamos no objeto
303 |       if (currentTense && subject) {
304 |         tableObj[currentTense][subject] = verbForm;
305 |       }
306 |     });
307 |   }
308 | 
309 | /* =======================
310 |    MCP Server
311 |    ======================= */
312 | class LdoceMcpServer {
313 |   private server: Server;
314 | 
315 |   constructor() {
316 |     console.error('[Setup] Initializing MCP server with JSON output...');
317 |     this.server = new Server(
318 |       {
319 |         name: 'ldoce-json-server',
320 |         id: 'ldoce-json-server',
321 |         version: '0.1.0',
322 |       },
323 |       { capabilities: { tools: {} } }
324 |     );
325 | 
326 |     this.setupToolHandlers();
327 |     this.server.onerror = (error) => console.error('[Error]', error);
328 |     // biome-ignore lint/suspicious/noExplicitAny: <explanation>
329 |     this.server.onclose = (error?: any) => {
330 |       console.error('[Server] Connection closed', error);
331 |       process.exit(0);
332 |     };
333 |     process.on('SIGINT', async () => {
334 |       await this.server.close();
335 |       process.exit(0);
336 |     });
337 |   }
338 | 
339 |   private async setupToolHandlers() {
340 |     // Handler para listar as ferramentas disponíveis
341 |     this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
342 |       tools: [
343 |         {
344 |           name: 'get_dictionary_entry',
345 |           description: 'Busca o HTML do Longman para uma palavra e retorna JSON parseado (dictionaryEntries, simpleForm, continuousForm)',
346 |           inputSchema: {
347 |             type: 'object',
348 |             properties: {
349 |               word: {
350 |                 type: 'string',
351 |                 description: 'A palavra a ser consultada (ex: rot)',
352 |               },
353 |             },
354 |             required: ['word'],
355 |           },
356 |         },
357 |       ],
358 |     }));
359 | 
360 |     // Handler para a ferramenta get_dictionary_entry
361 |     this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
362 |       try {
363 |         if (request.params.name !== 'get_dictionary_entry') {
364 |           throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}`);
365 |         }
366 |         const args = request.params.arguments as { word: string };
367 |         if (!args.word) {
368 |           throw new McpError(ErrorCode.InvalidParams, '"word" parameter is required.');
369 |         }
370 | 
371 |         console.error(`[API] Searching dictionary data for word: ${args.word}`);
372 | 
373 |         // Busca o JSON extraído
374 |         const finalJson = await fetchDictionaryData(args.word);
375 | 
376 |         // Retorna no "content" do MCP
377 |         // Observação: finalJson é objeto, precisamos serializar para string
378 |         return {
379 |           content: [
380 |             {
381 |               type: 'text',
382 |               text: JSON.stringify(finalJson, null, 2),
383 |             },
384 |           ],
385 |         };
386 |       } catch (error: unknown) {
387 |         if (error instanceof Error) {
388 |           console.error('[Error] Failed to fetch entry:', error.message);
389 |           throw new McpError(ErrorCode.InternalError, `Falha ao buscar a entrada: ${error.message}`);
390 |         }
391 |         console.error('[Error] Unknown error occurred');
392 |         throw new McpError(ErrorCode.InternalError, 'Falha ao buscar a entrada: Unknown error');
393 |       }
394 |     });
395 |   }
396 | 
397 |   async run() {
398 |     const transport = new StdioServerTransport();
399 |     await this.server.connect(transport);
400 |     console.error('Ldoce JSON server running via stdio');
401 |   }
402 | }
403 | 
404 | // Executar o servidor
405 | const serverInstance = new LdoceMcpServer();
406 | serverInstance.run().catch(console.error);
407 | 
```