# Directory Structure
```
├── .gitignore
├── Dockerfile
├── package-lock.json
├── package.json
├── README.md
├── smithery.yaml
├── src
│ └── index.ts
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | # Dependencies
2 | node_modules/
3 | package-lock.json
4 |
5 | # Build outputs
6 | build/
7 | dist/
8 | *.tsbuildinfo
9 |
10 | # Logs
11 | *.log
12 | npm-debug.log*
13 | yarn-debug.log*
14 | yarn-error.log*
15 |
16 | # Environment variables
17 | .env*
18 | .env.local
19 | .env.development.local
20 | .env.test.local
21 | .env.production.local
22 |
23 | # IDE
24 | .idea/
25 | .vscode/
26 | *.swp
27 | *.swo
28 | .DS_Store
29 |
30 | # Test coverage
31 | coverage/
32 | .nyc_output/
33 |
34 | # Temporary files
35 | *.tmp
36 | *.temp
37 | .cache/
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # Ldoce MCP Server
2 | [](https://smithery.ai/server/@edgardamasceno-dev/ldoce-mcp-server)
3 |
4 |
5 | Este é um MCP server desenvolvido em Node.js e TypeScript que consome a página do Longman Dictionary para uma determinada palavra e retorna os dados extraídos no formato JSON padronizado para uso por agentes de IA.
6 |
7 | ## Descrição
8 |
9 | O servidor se conecta à URL `https://www.ldoceonline.com/dictionary/<word>`, extrai informações como a introdução, tópicos relacionados, entradas (verb e noun), corpus examples e origem, e retorna esses dados estruturados em um objeto JSON. O projeto segue os padrões do Model Context Protocol (MCP) e utiliza os pacotes Axios e Cheerio para requisições HTTP e parsing de HTML.
10 |
11 | ## Recursos
12 |
13 | - **Extrai informações do Longman Dictionary:**
14 | - Introdução e tópicos relacionados
15 | - Entradas com detalhes de pronúncias, sentidos, exemplos, etc.
16 | - Corpus examples
17 | - Origem da palavra
18 |
19 | - **Utiliza MCP SDK para expor uma ferramenta** que pode ser integrada a clientes MCP, como o Claude Desktop.
20 |
21 | ## Pré-requisitos
22 |
23 | - Node.js (versão 16 ou superior)
24 | - npm
25 | - Git
26 |
27 | ## Instalação
28 |
29 | ### Installing via Smithery
30 |
31 | To install Ldoce Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@edgardamasceno-dev/ldoce-mcp-server):
32 |
33 | ```bash
34 | npx -y @smithery/cli install @edgardamasceno-dev/ldoce-mcp-server --client claude
35 | ```
36 |
37 | ### Manual Installation
38 | 1. Clone o repositório:
39 | ```bash
40 | git clone https://github.com/seuusuario/ldoce-mcp-server.git
41 | cd ldoce-mcp-server
42 |
```
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "compilerOptions": {
3 | "target": "ES2022",
4 | "module": "Node16",
5 | "moduleResolution": "Node16",
6 | "outDir": "./build",
7 | "rootDir": "./src",
8 | "strict": true,
9 | "esModuleInterop": true,
10 | "skipLibCheck": true,
11 | "forceConsistentCasingInFileNames": true
12 | },
13 | "include": ["src/**/*"],
14 | "exclude": ["node_modules"]
15 | }
```
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
```dockerfile
1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
2 | FROM node:lts-alpine
3 |
4 | # Create app directory
5 | WORKDIR /usr/src/app
6 |
7 | # Install app dependencies
8 | COPY package.json package-lock.json ./
9 | RUN npm install --ignore-scripts
10 |
11 | # Bundle app source
12 | COPY . .
13 |
14 | # Build the project
15 | RUN npm run build
16 |
17 | # Expose any ports if needed (MCP typically uses stdio, so not required here)
18 |
19 | # Start the MCP server
20 | CMD [ "node", "build/index.js" ]
21 |
```
--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
```yaml
1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
2 |
3 | startCommand:
4 | type: stdio
5 | configSchema:
6 | # JSON Schema defining the configuration options for the MCP.
7 | type: object
8 | properties: {}
9 | description: No configuration needed.
10 | commandFunction:
11 | # A JS function that produces the CLI command based on the given config to start the MCP on stdio.
12 | |-
13 | (config) => ({ command: 'node', args: ['build/index.js'] })
14 | exampleConfig: {}
15 |
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "name": "ldoce-mcp-server",
3 | "version": "0.1.0",
4 | "description": "A Model Context Protocol server",
5 | "type": "module",
6 | "bin": {
7 | "ldoce-mcp-server": "./build/index.js"
8 | },
9 | "files": [
10 | "build"
11 | ],
12 | "private": true,
13 | "scripts": {
14 | "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"",
15 | "prepare": "npm run build",
16 | "watch": "tsc --watch",
17 | "inspector": "npx @modelcontextprotocol/inspector build/index.js"
18 | },
19 | "keywords": [],
20 | "author": "",
21 | "license": "ISC",
22 | "dependencies": {
23 | "@modelcontextprotocol/sdk": "^1.7.0",
24 | "axios": "^1.8.3",
25 | "cheerio": "^1.0.0",
26 | "phantomjs-prebuilt": "^2.1.16",
27 | "turndown": "^7.2.0"
28 | },
29 | "devDependencies": {
30 | "@types/cheerio": "^0.22.35",
31 | "@types/node": "^22.13.10",
32 | "@types/turndown": "^5.0.5",
33 | "ts-node": "^10.9.2",
34 | "typescript": "^5.8.2"
35 | }
36 | }
37 |
```
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
```typescript
1 | #!/usr/bin/env node
2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4 | import {
5 | CallToolRequestSchema,
6 | ListToolsRequestSchema,
7 | McpError,
8 | ErrorCode,
9 | } from '@modelcontextprotocol/sdk/types.js';
10 | import axios from 'axios';
11 | import * as cheerio from 'cheerio';
12 |
13 | /**
14 | * Estruturas de dados finais (semelhantes às que você pediu)
15 | */
16 |
17 | // Exemplo de JSON final:
18 | //
19 | // {
20 | // "dictionaryEntries": [ ... ],
21 | // "simpleForm": { ... },
22 | // "continuousForm": { ... }
23 | // }
24 |
25 | interface DictionaryExample {
26 | text: string;
27 | audioUrl?: string;
28 | }
29 |
30 | interface DictionarySense {
31 | number?: number;
32 | grammar?: string;
33 | activation?: string;
34 | definition?: string | { text: string; url: string };
35 | examples?: DictionaryExample[];
36 | }
37 |
38 | interface DictionaryParsedEntry {
39 | word: string; // ex.: "rot"
40 | pronunciation: string; // ex.: "/rɒt/ (US: rɑːt)"
41 | partOfSpeech: string; // ex.: "verb", "noun", etc.
42 | inflections: string[]; // ex.: ["rotted", "rotting"]
43 | relatedTopics: string[]; // ex.: ["Biology"]
44 | senses: DictionarySense[];
45 | }
46 |
47 | interface ConjugationTable {
48 | [tense: string]: {
49 | [subject: string]: string;
50 | };
51 | }
52 |
53 | interface FinalDictionaryJson {
54 | dictionaryEntries: DictionaryParsedEntry[];
55 | simpleForm: ConjugationTable;
56 | continuousForm: ConjugationTable;
57 | }
58 |
59 | /**
60 | * Função principal que extrai e retorna o JSON final
61 | * conforme o formato solicitado.
62 | */
63 | async function fetchDictionaryData(word: string): Promise<FinalDictionaryJson> {
64 | const url = `https://www.ldoceonline.com/dictionary/${encodeURIComponent(word)}`;
65 |
66 | const { data: html } = await axios.get(url, {
67 | timeout: 10000,
68 | headers: {
69 | 'User-Agent': 'Mozilla/5.0 (compatible; MCP-Server/0.1.0)',
70 | },
71 | });
72 |
73 | const $ = cheerio.load(html);
74 |
75 | // ==========================
76 | // 1) Extrair .dictentry (as entradas do dicionário)
77 | // ==========================
78 | const dictionaryEntries: DictionaryParsedEntry[] = [];
79 |
80 | // Para cada <span class="dictentry">...
81 | $('span.dictentry').each((_, dictentryEl) => {
82 | const dictentry = $(dictentryEl);
83 |
84 | // Dentro dele, encontramos .ldoceEntry.Entry
85 | const ldoceEntryEl = dictentry.find('.ldoceEntry.Entry').first();
86 | if (!ldoceEntryEl || ldoceEntryEl.length === 0) {
87 | return; // pula se não achar
88 | }
89 |
90 | // Extrair "relatedTopics"
91 | const relatedTopics: string[] = [];
92 | ldoceEntryEl.find('.topics_container a.topic').each((_, topicEl) => {
93 | relatedTopics.push($(topicEl).text().trim());
94 | });
95 |
96 | // Extrair "head" (palavra, pronúncia, etc.)
97 | // Pode ser .frequent.Head ou .Head
98 | const headEl = ldoceEntryEl.find('.frequent.Head, .Head').first();
99 | const extractedWord = headEl.find('.HWD').text().trim() || word;
100 | const hyphenation = headEl.find('.HYPHENATION').text().trim() || '';
101 | const homnum = headEl.find('.HOMNUM').text().trim() || '';
102 | const pos = headEl.find('.POS').text().trim() || '';
103 |
104 | // Pronúncia britânica e americana
105 | const brit = headEl.find('span.brefile').attr('data-src-mp3');
106 | const ame = headEl.find('span.amefile').attr('data-src-mp3');
107 |
108 | // Ou extrair do .PronCodes:
109 | let textPron = '';
110 | const pronCodes = headEl.find('.PronCodes').first();
111 | if (pronCodes && pronCodes.length > 0) {
112 | // Montamos algo tipo "/rɒt/ (US: rɑːt)"
113 | const pronSpans = pronCodes.find('span.PRON, span.AMEVARPRON, span.neutral');
114 | let collected = '';
115 | pronSpans.each((i, elSpan) => {
116 | collected += $(elSpan).text();
117 | });
118 | textPron = collected.trim();
119 | }
120 |
121 | // Se preferir simplificar: "/rɒt/ (US: rɑːt)"
122 | // ex: textPron = "/rɒt/ $ rɑːt/"
123 | // convert $ -> (US:)
124 | textPron = textPron.replace(/\s*\$\s*/g, '(US: ').replace(/\/\s*$/, '/)').replace(/\)\)/, ')');
125 | if (!textPron.includes('(US:') && textPron.endsWith('/)')) {
126 | textPron = textPron.replace('/)', '/');
127 | }
128 |
129 | // Inflections (ex. (rotted, rotting))
130 | const inflectionsText = headEl.find('.Inflections').text().trim();
131 | // ex. "(rotted, rotting)"
132 | let inflections: string[] = [];
133 | if (inflectionsText) {
134 | // remove parênteses
135 | const inf = inflectionsText.replace(/[()]/g, '');
136 | // separa por vírgula
137 | inflections = inf.split(',').map(s => s.trim()).filter(Boolean);
138 | }
139 |
140 | // 2) Extrair "senses"
141 | const senses: DictionarySense[] = [];
142 | ldoceEntryEl.find('.Sense').each((_, senseEl) => {
143 | const sense = $(senseEl);
144 | const number = Number.parseInt(sense.find('.sensenum').first().text().trim(), 10) || undefined;
145 | const grammar = sense.find('.GRAM').text().trim() || undefined;
146 | const activation = sense.find('.ACTIV').text().trim() || undefined;
147 |
148 | // "Definition" pode ser um texto normal ou algo do tipo "(→ rot in hell/jail)"
149 | const definitionText = sense.find('.DEF').text().trim();
150 | let definitionObj: string | { text: string; url: string } = definitionText;
151 |
152 | // Se a definition for algo tipo "(→ rot in hell/jail)",
153 | // transformamos em { text: "🔗 rot in hell/jail", url: ... }
154 | // Precisamos ver se há link .Crossref ou algo do tipo
155 | if (!definitionText && sense.find('.Crossref a').length > 0) {
156 | // ex: "rot in hell/jail"
157 | const crossLink = sense.find('.Crossref a').first();
158 | const crossText = crossLink.text().trim();
159 | const crossHref = crossLink.attr('href');
160 | if (crossText && crossHref) {
161 | definitionObj = {
162 | text: `🔗 ${crossText}`,
163 | url: `https://www.ldoceonline.com${crossHref}`
164 | };
165 | }
166 | }
167 |
168 | // se for algo como a .DEF vem só com → e link
169 | // ex: " → rot in hell/jail"
170 | if (definitionText.startsWith('→')) {
171 | // Tentar extrair a link
172 | const crossLink = sense.find('.Crossref a').first();
173 | if (crossLink && crossLink.length > 0) {
174 | const crossText = crossLink.text().trim();
175 | const crossHref = crossLink.attr('href');
176 | definitionObj = {
177 | text: `🔗 ${crossText}`,
178 | url: `https://www.ldoceonline.com${crossHref}`
179 | };
180 | } else {
181 | definitionObj = definitionText;
182 | }
183 | }
184 |
185 | // Se a .DEF tiver link <a>, substituímos trechos "decay" e "gradual" etc?
186 | // Faremos simples, manteremos o text.
187 | // 3) Extrair EXAMPLE
188 | const examples: DictionaryExample[] = [];
189 | sense.find('.EXAMPLE').each((_, exEl) => {
190 | const ex = $(exEl);
191 | const text = ex.text().trim();
192 | // pegar audio se houver
193 | let audioUrl = ex.find('.speaker.exafile').attr('data-src-mp3');
194 | if (!audioUrl) {
195 | // ou exafile
196 | audioUrl = ex.find('.speaker').attr('data-src-mp3') || undefined;
197 | }
198 | examples.push({
199 | text,
200 | audioUrl
201 | });
202 | });
203 |
204 | senses.push({
205 | number,
206 | grammar: grammar || undefined,
207 | activation: activation || undefined,
208 | definition: definitionObj,
209 | examples
210 | });
211 | });
212 |
213 | dictionaryEntries.push({
214 | word,
215 | pronunciation: textPron || '',
216 | partOfSpeech: pos || '',
217 | inflections,
218 | relatedTopics,
219 | senses
220 | });
221 | });
222 |
223 | // ==========================
224 | // 3) Extrair a Tabela (Verb table) -> simpleForm e continuousForm
225 | // ==========================
226 | // A tabela fica dentro de <div class="verbTable"> no snippet.
227 | // Precisamos de .simpleForm e .continuousForm
228 | const simpleForm: ConjugationTable = {};
229 | const continuousForm: ConjugationTable = {};
230 |
231 | // Achar <div class="verbTable">
232 | const verbTableEl = $('.verbTable').first();
233 | if (verbTableEl && verbTableEl.length > 0) {
234 | // ============ SIMPLE FORM ============
235 | const simpleFormEl = verbTableEl.find('table.simpleForm').first();
236 | if (simpleFormEl && simpleFormEl.length > 0) {
237 | parseConjugationTable(simpleFormEl, simpleForm);
238 | }
239 |
240 | // ============ CONTINUOUS FORM ============
241 | const continuousFormEl = verbTableEl.find('table.continuousForm').first();
242 | if (continuousFormEl && continuousFormEl.length > 0) {
243 | parseConjugationTable(continuousFormEl, continuousForm);
244 | }
245 | }
246 |
247 | // Montamos o objeto final
248 | const finalJson: FinalDictionaryJson = {
249 | dictionaryEntries,
250 | simpleForm,
251 | continuousForm
252 | };
253 |
254 | return finalJson;
255 | }
256 |
257 | /**
258 | * Função auxiliar que extrai as conjugações de um <table> (ex.: "simpleForm")
259 | * e preenche o objeto de forma { Tense: { "I / you / we / they": "rot", ... } }
260 | */
261 | /**
262 | * Função auxiliar que extrai as conjugações de um <table> (ex.: "simpleForm")
263 | * e preenche o objeto de forma { Tense: { "I / you / we / they": "rot", ... } }
264 | */
265 | function parseConjugationTable(
266 | tableEl: cheerio.Cheerio,
267 | tableObj: ConjugationTable
268 | ) {
269 | const $table = cheerio.load(tableEl.html() || '');
270 | let currentTense = ''; // Ex.: "Present", "Past", etc.
271 |
272 | $table('tr').each((_, trEl) => {
273 | const tr = $table(trEl);
274 |
275 | // Verifica se é um header
276 | const header = tr.find('td.header').text().trim();
277 | if (header) {
278 | return;
279 | }
280 |
281 | if (tr.find('td.view_more, td.view_less').length > 0) {
282 | return;
283 | }
284 |
285 | // Se tiver <td class="col1">, assumimos que é um Tense
286 | const col1Value = tr.find('td.col1').text().trim();
287 | if (col1Value) {
288 | currentTense = col1Value;
289 | if (!tableObj[currentTense]) {
290 | tableObj[currentTense] = {};
291 | }
292 | return;
293 | }
294 |
295 | // senão, pegamos as colunas .col2 e interpretamos "subject" e "verbForm"
296 | const col2First = tr.find('td.firsts.col2, td.col2').first();
297 | const subject = col2First.text().trim();
298 |
299 | const col2Second = tr.find('td.col2').last();
300 | const verbForm = col2Second.text().trim();
301 |
302 | // Armazenamos no objeto
303 | if (currentTense && subject) {
304 | tableObj[currentTense][subject] = verbForm;
305 | }
306 | });
307 | }
308 |
309 | /* =======================
310 | MCP Server
311 | ======================= */
312 | class LdoceMcpServer {
313 | private server: Server;
314 |
315 | constructor() {
316 | console.error('[Setup] Initializing MCP server with JSON output...');
317 | this.server = new Server(
318 | {
319 | name: 'ldoce-json-server',
320 | id: 'ldoce-json-server',
321 | version: '0.1.0',
322 | },
323 | { capabilities: { tools: {} } }
324 | );
325 |
326 | this.setupToolHandlers();
327 | this.server.onerror = (error) => console.error('[Error]', error);
328 | // biome-ignore lint/suspicious/noExplicitAny: <explanation>
329 | this.server.onclose = (error?: any) => {
330 | console.error('[Server] Connection closed', error);
331 | process.exit(0);
332 | };
333 | process.on('SIGINT', async () => {
334 | await this.server.close();
335 | process.exit(0);
336 | });
337 | }
338 |
339 | private async setupToolHandlers() {
340 | // Handler para listar as ferramentas disponíveis
341 | this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
342 | tools: [
343 | {
344 | name: 'get_dictionary_entry',
345 | description: 'Busca o HTML do Longman para uma palavra e retorna JSON parseado (dictionaryEntries, simpleForm, continuousForm)',
346 | inputSchema: {
347 | type: 'object',
348 | properties: {
349 | word: {
350 | type: 'string',
351 | description: 'A palavra a ser consultada (ex: rot)',
352 | },
353 | },
354 | required: ['word'],
355 | },
356 | },
357 | ],
358 | }));
359 |
360 | // Handler para a ferramenta get_dictionary_entry
361 | this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
362 | try {
363 | if (request.params.name !== 'get_dictionary_entry') {
364 | throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}`);
365 | }
366 | const args = request.params.arguments as { word: string };
367 | if (!args.word) {
368 | throw new McpError(ErrorCode.InvalidParams, '"word" parameter is required.');
369 | }
370 |
371 | console.error(`[API] Searching dictionary data for word: ${args.word}`);
372 |
373 | // Busca o JSON extraído
374 | const finalJson = await fetchDictionaryData(args.word);
375 |
376 | // Retorna no "content" do MCP
377 | // Observação: finalJson é objeto, precisamos serializar para string
378 | return {
379 | content: [
380 | {
381 | type: 'text',
382 | text: JSON.stringify(finalJson, null, 2),
383 | },
384 | ],
385 | };
386 | } catch (error: unknown) {
387 | if (error instanceof Error) {
388 | console.error('[Error] Failed to fetch entry:', error.message);
389 | throw new McpError(ErrorCode.InternalError, `Falha ao buscar a entrada: ${error.message}`);
390 | }
391 | console.error('[Error] Unknown error occurred');
392 | throw new McpError(ErrorCode.InternalError, 'Falha ao buscar a entrada: Unknown error');
393 | }
394 | });
395 | }
396 |
397 | async run() {
398 | const transport = new StdioServerTransport();
399 | await this.server.connect(transport);
400 | console.error('Ldoce JSON server running via stdio');
401 | }
402 | }
403 |
404 | // Executar o servidor
405 | const serverInstance = new LdoceMcpServer();
406 | serverInstance.run().catch(console.error);
407 |
```