# Directory Structure ``` ├── .gitignore ├── LICENSE ├── pom.xml ├── README.md └── src ├── main │ ├── java │ │ └── org │ │ └── rainyheart │ │ └── cawl4ai │ │ └── mcp │ │ ├── conf │ │ │ └── ConfigProperties.java │ │ ├── controller │ │ │ ├── Crawl4aiApi.java │ │ │ └── impl │ │ │ └── Crawl4aiApiImpl.java │ │ ├── http │ │ │ ├── request │ │ │ │ ├── CrawlRequest.java │ │ │ │ └── CrawlTaskRequest.java │ │ │ └── response │ │ │ ├── CrawlResponse.java │ │ │ └── CrawlTaskResponse.java │ │ └── McpServerApplication.java │ └── resources │ └── application.properties └── test └── java └── org └── rainyheart └── cawl4ai └── client └── ClientStdio.java ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` /target/ .settings/ .project .classpath ``` -------------------------------------------------------------------------------- /src/main/java/org/rainyheart/cawl4ai/mcp/controller/Crawl4aiApi.java: -------------------------------------------------------------------------------- ```java /** * Create on Apr 21, 2025 * * Copyright (c) 2025 by Ken Ye. * * All Rights Reserved, Designed By Ken Ye * * Copyright: Copyright(C) 2025-2035 * * Company: Ken Ye */ package org.rainyheart.cawl4ai.mcp.controller; public interface Crawl4aiApi { public String task(String taskId); public String crawl(String[] urls, String strategy, Integer max_depth, String output_format); } ``` -------------------------------------------------------------------------------- /src/main/java/org/rainyheart/cawl4ai/mcp/http/request/CrawlTaskRequest.java: -------------------------------------------------------------------------------- ```java /** * Create on Apr 21, 2025 * * Copyright (c) 2025 by Ken Ye. * * All Rights Reserved, Designed By Ken Ye * * Copyright: Copyright(C) 2025-2035 * * Company: Ken Ye */ package org.rainyheart.cawl4ai.mcp.http.request; public class CrawlTaskRequest { private String taskId; public String getTaskId() { return taskId; } public void setTaskId(String taskId) { this.taskId = taskId; } } ``` -------------------------------------------------------------------------------- /src/main/java/org/rainyheart/cawl4ai/mcp/http/response/CrawlResponse.java: -------------------------------------------------------------------------------- ```java /** * Create on Apr 21, 2025 * * Copyright (c) 2025 by Ken Ye. * * All Rights Reserved, Designed By Ken Ye * * Copyright: Copyright(C) 2025-2035 * * Company: Ken Ye */ package org.rainyheart.cawl4ai.mcp.http.response; import java.util.HashMap; import java.util.Map; public class CrawlResponse { private String taskId; public String getTaskId() { return taskId; } public void setTaskId(String taskId) { this.taskId = taskId; } public Map<String, Object> toMap() { Map<String, Object> map = new HashMap<String, Object>(); map.put("taskId", taskId); return map; } } ``` -------------------------------------------------------------------------------- /src/main/resources/application.properties: -------------------------------------------------------------------------------- ``` spring.main.web-application-type=none # NOTE: You must disable the banner and the console logging # to allow the STDIO transport to work !!! spring.main.banner-mode=off # logging.pattern.console=%d{yyyy-MMM-dd HH:mm:ss.SSS} %-5level [%thread] %logger{15} - %msg%n logging.pattern.console= spring.ai.mcp.server.name=jcrawl4ai-mcp-server ## crawl4ai Settings - update your URL and token below crawl4ai.base-url=http://your-cral4ai-server-url:11235 crawl4ai.api-token=your-api-token # MCP spring.ai.mcp.server.enabled=true spring.ai.mcp.server.version=1.0.0 spring.ai.mcp.server.type=SYNC spring.ai.mcp.server.stdio=true logging.level.org.springframework.web=DEBUG logging.file.name=./target/mcp-stdio-server.log ``` -------------------------------------------------------------------------------- /src/main/java/org/rainyheart/cawl4ai/mcp/conf/ConfigProperties.java: -------------------------------------------------------------------------------- ```java /** * Create on Apr 21, 2025 * * Copyright (c) 2025 by Ken Ye. * * All Rights Reserved, Designed By Ken Ye * * Copyright: Copyright(C) 2025-2035 * * Company: Ken Ye */ package org.rainyheart.cawl4ai.mcp.conf; import org.springframework.boot.context.properties.ConfigurationProperties; @ConfigurationProperties(prefix = "crawl4ai") public class ConfigProperties { private String baseUrl; private String apiToken; public String getBaseUrl() { return baseUrl; } public void setBaseUrl(String baseUrl) { this.baseUrl = baseUrl; } public String getApiToken() { return apiToken; } public void setApiToken(String apiToken) { this.apiToken = apiToken; } } ``` -------------------------------------------------------------------------------- /src/main/java/org/rainyheart/cawl4ai/mcp/McpServerApplication.java: -------------------------------------------------------------------------------- ```java /** * Create on Apr 21, 2025 * * Copyright (c) 2025 by Ken Ye. * * All Rights Reserved, Designed By Ken Ye * * Copyright: Copyright(C) 2025-2035 * * Company: Ken Ye */ package org.rainyheart.cawl4ai.mcp; import org.rainyheart.cawl4ai.mcp.conf.ConfigProperties; import org.rainyheart.cawl4ai.mcp.controller.Crawl4aiApi; import org.springframework.ai.tool.ToolCallbackProvider; import org.springframework.ai.tool.method.MethodToolCallbackProvider; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ComponentScan; @SpringBootApplication @EnableConfigurationProperties({ConfigProperties.class}) @ComponentScan(basePackages = "org.rainyheart.cawl4ai.mcp") public class McpServerApplication { public static void main(String[] args) { SpringApplication.run(McpServerApplication.class, args); } @Bean public ToolCallbackProvider tools(Crawl4aiApi crawl4aiApi) { return MethodToolCallbackProvider.builder().toolObjects(crawl4aiApi).build(); } } ``` -------------------------------------------------------------------------------- /src/main/java/org/rainyheart/cawl4ai/mcp/http/request/CrawlRequest.java: -------------------------------------------------------------------------------- ```java /** * Create on Apr 21, 2025 * * Copyright (c) 2025 by Ken Ye. * * All Rights Reserved, Designed By Ken Ye * * Copyright: Copyright(C) 2025-2035 * * Company: Ken Ye */ package org.rainyheart.cawl4ai.mcp.http.request; import java.util.HashMap; import java.util.Map; import cn.hutool.json.JSONUtil; public class CrawlRequest { private String[] urls; private String strategy; private Integer max_depth; private String output_format; public String[] getUrls() { return urls; } public void setUrls(String[] urls) { this.urls = urls; } public String getStrategy() { return strategy; } public void setStrategy(String strategy) { this.strategy = strategy; } public Integer getMax_depth() { return max_depth; } public void setMax_depth(Integer max_depth) { this.max_depth = max_depth; } public String getOutput_format() { return output_format; } public void setOutput_format(String output_format) { this.output_format = output_format; } public String toJson() { return JSONUtil.toJsonStr(this); } public Map<String, Object> toMap() { Map<String, Object> map = new HashMap<String, Object>(); map.put("urls", urls); map.put("urstrategyls", strategy); map.put("max_depth", max_depth); map.put("output_format", output_format); return map; } } ``` -------------------------------------------------------------------------------- /src/main/java/org/rainyheart/cawl4ai/mcp/controller/impl/Crawl4aiApiImpl.java: -------------------------------------------------------------------------------- ```java /** * Create on Apr 21, 2025 * * Copyright (c) 2025 by Ken Ye. * * All Rights Reserved, Designed By Ken Ye * * Copyright: Copyright(C) 2025-2035 * * Company: Ken Ye */ package org.rainyheart.cawl4ai.mcp.controller.impl; import org.rainyheart.cawl4ai.mcp.conf.ConfigProperties; import org.rainyheart.cawl4ai.mcp.controller.Crawl4aiApi; import org.rainyheart.cawl4ai.mcp.http.request.CrawlRequest; import org.rainyheart.cawl4ai.mcp.http.response.CrawlResponse; import org.rainyheart.cawl4ai.mcp.http.response.CrawlTaskResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.tool.annotation.Tool; import org.springframework.ai.tool.annotation.ToolParam; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import cn.hutool.http.HttpRequest; import cn.hutool.http.HttpResponse; import cn.hutool.json.JSONUtil; @Service public class Crawl4aiApiImpl implements Crawl4aiApi { @Autowired private ConfigProperties configProperties; private Logger logger = LoggerFactory.getLogger(this.getClass()); @Override @Tool(description = "Call crawl4ai API to crawl a URL") public String crawl(@ToolParam(description = "the target websites urls") String[] urls, @ToolParam(description = "cawl strategy") String strategy, @ToolParam(description = "max_depth for cawl") Integer max_depth, @ToolParam(description = "response output format for cawl") String output_format) { logger.debug("Crawl4aiApiImpl.crawl() urls: {}, strategy: {}, max_depth: {}, output_format: {}", urls, strategy, max_depth, output_format); CrawlRequest request = new CrawlRequest(); request.setUrls(urls); request.setStrategy(strategy); request.setMax_depth(max_depth); request.setOutput_format(output_format); HttpRequest httpRequest = HttpRequest.post(configProperties.getBaseUrl() + "/crawl") .bearerAuth(configProperties.getApiToken()).body(request.toJson()); HttpResponse response = httpRequest.execute(); logger.debug(response.body()); CrawlResponse rsp = JSONUtil.toBean(response.body(), CrawlResponse.class); return JSONUtil.toJsonStr(rsp); } @Override @Tool(description = "Get the crawl result by the given taskId") public String task(String taskId) { logger.debug("taskId: {}", taskId); HttpRequest httpRequest = HttpRequest.get(configProperties.getBaseUrl() + "/task/" + taskId) .bearerAuth(configProperties.getApiToken()); HttpResponse response = httpRequest.execute(); logger.debug(response.body()); CrawlTaskResponse rsp = JSONUtil.toBean(response.body(), CrawlTaskResponse.class); return JSONUtil.toJsonStr(rsp); } } ``` -------------------------------------------------------------------------------- /src/test/java/org/rainyheart/cawl4ai/client/ClientStdio.java: -------------------------------------------------------------------------------- ```java /* * Copyright 2024 - 2024 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.rainyheart.cawl4ai.client; import org.rainyheart.cawl4ai.mcp.http.request.CrawlRequest; import org.rainyheart.cawl4ai.mcp.http.response.CrawlResponse; import cn.hutool.json.JSONObject; import cn.hutool.json.JSONUtil; import io.modelcontextprotocol.client.McpClient; import io.modelcontextprotocol.client.transport.ServerParameters; import io.modelcontextprotocol.client.transport.StdioClientTransport; import io.modelcontextprotocol.spec.McpSchema; import io.modelcontextprotocol.spec.McpSchema.CallToolRequest; import io.modelcontextprotocol.spec.McpSchema.CallToolResult; import io.modelcontextprotocol.spec.McpSchema.ListToolsResult; import io.modelcontextprotocol.spec.McpSchema.TextContent; /** * With stdio transport, the MCP server is automatically started by the client. But you * have to build the server jar first: * * <pre> * ./mvnw clean install * </pre> */ public class ClientStdio { public static void main(String[] args) throws InterruptedException { var stdioParams = ServerParameters.builder("java") .args("-jar", "./target/jcrawl4ai-mcp-server-1.0.0.jar") .build(); var transport = new StdioClientTransport(stdioParams); var client = McpClient.sync(transport).build(); client.initialize(); // List and demonstrate tools ListToolsResult toolsList = client.listTools(); System.out.println("Available Tools = " + toolsList); CrawlRequest request = new CrawlRequest(); request.setMax_depth(10); request.setStrategy("best_first"); request.setOutput_format("markdown"); request.setUrls(new String[] { "https://www.weather.gov" }); CallToolResult crawlResult = client.callTool(new CallToolRequest("crawl", request.toMap())); System.out.println("Crawl response: " + crawlResult); String taskId = null; for(McpSchema.Content content : crawlResult.content()) { if(content instanceof TextContent) { taskId = ((TextContent)content).text(); } } JSONObject json = JSONUtil.parseObj(JSONUtil.toJsonPrettyStr(taskId.replace("\\", "").replace("\"", ""))); taskId = (String) json.get("taskId"); CrawlResponse response = new CrawlResponse(); response.setTaskId(taskId); CallToolResult finalResult = client.callTool(new CallToolRequest("task", response.toMap())); String status = finalResult.content().get(0).toString(); while (status.indexOf("completed") < 0) { finalResult = client.callTool(new CallToolRequest("task", response.toMap())); status = finalResult.content().get(0).toString(); } System.out.println("Task Response = " + finalResult); client.closeGracefully(); } } ``` -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- ``` <?xml version='1.0' encoding='UTF-8'?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>org.rainyheart</groupId> <artifactId>jcrawl4ai-mcp-server</artifactId> <version>1.0.0</version> <packaging>jar</packaging> <name>jcrawl4ai-mcp-server</name> <description>Crawl4ai MCP Server</description> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>3.4.4</version> </parent> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <java.version>17</java.version> <hutool.version>5.8.20</hutool.version> </properties> <dependencyManagement> <dependencies> <dependency> <groupId>org.springframework.ai</groupId> <artifactId>spring-ai-bom</artifactId> <version>1.0.0-M7</version> <type>pom</type> <scope>import</scope> </dependency> </dependencies> </dependencyManagement> <dependencies> <dependency> <groupId>org.springframework.ai</groupId> <artifactId>spring-ai-starter-mcp-server</artifactId> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-web</artifactId> </dependency> <dependency> <groupId>cn.hutool</groupId> <artifactId>hutool-all</artifactId> <version>${hutool.version}</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> </plugin> </plugins> </build> <repositories> <repository> <id>spring-milestones</id> <name>Spring Milestones</name> <url>https://repo.spring.io/milestone</url> <snapshots> <enabled>false</enabled> </snapshots> </repository> <repository> <id>spring-snapshots</id> <name>Spring Snapshots</name> <url>https://repo.spring.io/snapshot</url> <releases> <enabled>false</enabled> </releases> </repository> <repository> <id>central-portal-snapshots</id> <name>Central Portal Snapshots</name> <url>https://central.sonatype.com/repository/maven-snapshots/</url> <releases> <enabled>false</enabled> </releases> <snapshots> <enabled>true</enabled> </snapshots> </repository> </repositories> <distributionManagement> <repository> <id>github</id> <name>GitHub Ken Ye Apache Maven Packages</name> <url>https://maven.pkg.github.com/rainyheart/jcrawl4ai-mcp-server</url> </repository> </distributionManagement> </project> ``` -------------------------------------------------------------------------------- /src/main/java/org/rainyheart/cawl4ai/mcp/http/response/CrawlTaskResponse.java: -------------------------------------------------------------------------------- ```java /** * Create on Apr 21, 2025 * * Copyright (c) 2025 by Ken Ye. * * All Rights Reserved, Designed By Ken Ye * * Copyright: Copyright(C) 2025-2035 * * Company: Ken Ye */ package org.rainyheart.cawl4ai.mcp.http.response; public class CrawlTaskResponse { private String status; private String created_at; private Result[] results; public String getStatus() { return status; } public void setStatus(String status) { this.status = status; } public String getCreated_at() { return created_at; } public void setCreated_at(String created_at) { this.created_at = created_at; } public Result[] getResults() { return results; } public void setResults(Result[] results) { this.results = results; } public class Result { private String url; private String html; private Boolean success; private String cleaned_html; private String[] media; private String[] links; private String downloaded_files; private String screenshot; private String markdown; private String markdown_v2; private String fit_markdown; private String fit_html; private String extracted_content; private String[] metadata; private String error_message; private String session_id; private String[] response_headers; private String status_code; public Result() { super(); } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public String getHtml() { return html; } public void setHtml(String html) { this.html = html; } public Boolean getSuccess() { return success; } public void setSuccess(Boolean success) { this.success = success; } public String getCleaned_html() { return cleaned_html; } public void setCleaned_html(String cleaned_html) { this.cleaned_html = cleaned_html; } public String[] getMedia() { return media; } public void setMedia(String[] media) { this.media = media; } public String[] getLinks() { return links; } public void setLinks(String[] links) { this.links = links; } public String getDownloaded_files() { return downloaded_files; } public void setDownloaded_files(String downloaded_files) { this.downloaded_files = downloaded_files; } public String getScreenshot() { return screenshot; } public void setScreenshot(String screenshot) { this.screenshot = screenshot; } public String getMarkdown() { return markdown; } public void setMarkdown(String markdown) { this.markdown = markdown; } public String getMarkdown_v2() { return markdown_v2; } public void setMarkdown_v2(String markdown_v2) { this.markdown_v2 = markdown_v2; } public String getFit_markdown() { return fit_markdown; } public void setFit_markdown(String fit_markdown) { this.fit_markdown = fit_markdown; } public String getFit_html() { return fit_html; } public void setFit_html(String fit_html) { this.fit_html = fit_html; } public String getExtracted_content() { return extracted_content; } public void setExtracted_content(String extracted_content) { this.extracted_content = extracted_content; } public String[] getMetadata() { return metadata; } public void setMetadata(String[] metadata) { this.metadata = metadata; } public String getError_message() { return error_message; } public void setError_message(String error_message) { this.error_message = error_message; } public String getSession_id() { return session_id; } public void setSession_id(String session_id) { this.session_id = session_id; } public String[] getResponse_headers() { return response_headers; } public void setResponse_headers(String[] response_headers) { this.response_headers = response_headers; } public String getStatus_code() { return status_code; } public void setStatus_code(String status_code) { this.status_code = status_code; } } } ```