# Directory Structure
```
├── .gitignore
├── LICENSE
├── package.json
├── pnpm-lock.yaml
├── README.md
├── src
│ ├── index.ts
│ └── xiaohongshu.ts
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | pnpm-debug.log*
8 | lerna-debug.log*
9 |
10 | # Dependency directories
11 | node_modules/
12 | dist/
13 | build/
14 |
15 | # TypeScript cache
16 | *.tsbuildinfo
17 |
18 | # Optional npm cache directory
19 | .npm
20 |
21 | # Optional REPL history
22 | .node_repl_history
23 |
24 | # Environment variables
25 | .env
26 | .env.local
27 | .env.development.local
28 | .env.test.local
29 | .env.production.local
30 |
31 | # Debug files
32 | debug-*.png
33 |
34 | # OS specific files
35 | .DS_Store
36 | Thumbs.db
37 | cookies/
38 | results/
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # RedNote MCP - Xiaohongshu Content Search Tool
2 |
3 | ## Overview
4 |
5 | RedNote MCP is a Model Context Protocol (MCP) server for searching and retrieving content from Xiaohongshu (Red Book) platform. It provides intelligent content extraction with automatic login management and parallel processing capabilities.
6 |
7 | ## Features
8 |
9 | - **Smart Search**: Keyword-based content search on Xiaohongshu
10 | - **Auto Login**: Automatic cookie management and login handling
11 | - **Parallel Processing**: Efficient concurrent content retrieval
12 | - **Rich Data Extraction**:
13 | - Note titles and content
14 | - Author information and descriptions
15 | - Interaction metrics (likes, favorites, comments)
16 | - Images and hashtags
17 | - Direct note links
18 |
19 | ## Technical Stack
20 |
21 | - **Runtime**: Node.js with TypeScript
22 | - **Browser Automation**: Playwright
23 | - **Protocol**: Model Context Protocol (MCP) SDK
24 | - **Validation**: Zod schema validation
25 | - **Package Manager**: pnpm
26 |
27 | ## Data Structure
28 |
29 | ```typescript
30 | interface RedBookNote {
31 | title: string; // Note title
32 | content: string; // Note content
33 | author: string; // Author name
34 | authorDesc?: string; // Author description
35 | link: string; // Note URL
36 | likes?: number; // Like count
37 | collects?: number; // Favorite count
38 | comments?: number; // Comment count
39 | tags?: string[]; // Hashtag list
40 | images?: string[]; // Image URLs (WebP format)
41 | }
42 | ```
43 |
44 | ## Installation
45 |
46 | ### Prerequisites
47 | - Node.js 18+
48 | - pnpm package manager
49 |
50 | ### Setup
51 |
52 | 1. Clone the repository:
53 | ```bash
54 | git clone <repository-url>
55 | cd rednote-mcp
56 | ```
57 |
58 | 2. Install dependencies:
59 | ```bash
60 | pnpm install
61 | ```
62 |
63 | 3. Install Playwright browsers:
64 | ```bash
65 | pnpm exec playwright install
66 | ```
67 |
68 | 4. Build the project:
69 | ```bash
70 | pnpm build
71 | ```
72 |
73 | ## Usage
74 |
75 | ### Running the MCP Server
76 |
77 | ```bash
78 | pnpm start
79 | ```
80 |
81 | ### Development Mode
82 |
83 | ```bash
84 | pnpm dev
85 | ```
86 |
87 | ### Testing
88 |
89 | ```bash
90 | pnpm test
91 | ```
92 |
93 | ## MCP Client Configuration
94 |
95 | ### Claude Desktop
96 |
97 | Add the following configuration to your Claude Desktop config file:
98 |
99 | **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
100 | **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
101 |
102 | ```json
103 | {
104 | "mcpServers": {
105 | "rednote-mcp": {
106 | "command": "node",
107 | "args": [
108 | "C:\\ABSOLUTE\\PATH\\TO\\rednote-mcp\\build\\index.js"
109 | ]
110 | }
111 | }
112 | }
113 | ```
114 |
115 | **For macOS/Linux users:**
116 | ```json
117 | {
118 | "mcpServers": {
119 | "rednote-mcp": {
120 | "command": "node",
121 | "args": [
122 | "/absolute/path/to/rednote-mcp/build/index.js"
123 | ]
124 | }
125 | }
126 | }
127 | ```
128 |
129 | Replace the path with your actual project directory.
130 |
131 | ### Other MCP Clients
132 |
133 | For other MCP-compatible clients, use the built server file:
134 | ```bash
135 | node build/index.js
136 | ```
137 |
138 | ## Tool Usage
139 |
140 | Once configured, you can use the search tool in your MCP client:
141 |
142 | ```
143 | Search for "food recommendation" on Xiaohongshu
144 | ```
145 |
146 | The tool will return structured data including titles, content, author information, and images.
147 |
148 | ## Important Notes
149 |
150 | - **First Run**: Manual login to Xiaohongshu is required on first use
151 | - **Performance**: Initial searches may take 30-60 seconds due to browser startup and content loading
152 | - **Rate Limiting**: Concurrent requests are limited to 3 to avoid platform restrictions
153 | - **Image Format**: Images are provided in WebP format
154 | - **Cookie Management**: Login state is automatically saved and reused
155 |
156 | ## Development
157 |
158 | ### Project Structure
159 | ```
160 | rednote-mcp/
161 | ├── src/
162 | │ ├── index.ts # MCP server entry point
163 | │ └── xiaohongshu.ts # Core scraping logic
164 | ├── cookies/ # Auto-generated cookie storage
165 | ├── results/ # Optional: saved search results
166 | ├── build/ # Compiled JavaScript output
167 | ├── package.json
168 | ├── tsconfig.json
169 | └── README.md
170 | ```
171 |
172 | ### Available Scripts
173 |
174 | - `pnpm build` - Build TypeScript to JavaScript
175 | - `pnpm start` - Run the built MCP server
176 | - `pnpm dev` - Development mode with auto-reload
177 | - `pnpm test` - Run tests (if available)
178 | - `pnpm clean` - Clean build directory
179 |
180 | ## Troubleshooting
181 |
182 | ### Common Issues
183 |
184 | 1. **Login Required**: If you see login prompts, delete the `cookies/` directory and restart
185 | 2. **Timeout Errors**: Increase the MCP client timeout settings
186 | 3. **Browser Not Found**: Run `pnpm exec playwright install` to install browsers
187 | 4. **Permission Errors**: Ensure the project directory has proper read/write permissions
188 |
189 | ## License
190 |
191 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
192 |
193 | ## Disclaimer
194 |
195 | This tool is for educational and research purposes. Please respect Xiaohongshu's terms of service and rate limits when using this tool.
196 |
```
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "compilerOptions": {
3 | "target": "ES2022",
4 | "module": "NodeNext",
5 | "moduleResolution": "NodeNext",
6 | "outDir": "./dist",
7 | "rootDir": "./src",
8 | "strict": true,
9 | "esModuleInterop": true,
10 | "skipLibCheck": true,
11 | "forceConsistentCasingInFileNames": true
12 | },
13 | "include": ["src/**/*"],
14 | "exclude": ["node_modules"]
15 | }
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "name": "rednote-mcp",
3 | "version": "1.0.0",
4 | "description": "MCP server for searching and retrieving content from Xiaohongshu (Red Note) platform.",
5 | "main": "dist/index.js",
6 | "type": "module",
7 | "scripts": {
8 | "build": "tsc",
9 | "start": "node dist/index.js",
10 | "dev": "tsc -w"
11 | },
12 | "keywords": ["mcp", "xiaohongshu", "redbook"],
13 | "author": "",
14 | "license": "ISC",
15 | "packageManager": "[email protected]",
16 | "devDependencies": {
17 | "@modelcontextprotocol/sdk": "^1.10.1",
18 | "@types/node": "^22.14.1",
19 | "playwright": "^1.52.0",
20 | "typescript": "^5.8.3",
21 | "zod": "^3.24.3"
22 | }
23 | }
24 |
```
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
```typescript
1 | #!/usr/bin/env node
2 |
3 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
4 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
5 | import { searchXiaohongshu } from "./xiaohongshu.js";
6 | import { z } from "zod";
7 | import { text } from "stream/consumers";
8 |
9 | // Create MCP server
10 | const server = new McpServer({
11 | name: "rednote-mcp",
12 | version: "1.1.0",
13 | description: "MCP server for searching and retrieving content from Xiaohongshu (Red Note) platform.",
14 | });
15 |
16 | type ContentBlock =
17 | | {
18 | type: "text";
19 | text: string;
20 | }
21 | | {
22 | type: "resource";
23 | resource: {
24 | uri: string;
25 | text: string;
26 | mimeType?: string; // mimeType is optional
27 | };
28 | };
29 |
30 | server.tool(
31 | "search_xiaohongshu",
32 | "Searches for content on Xiaohongshu (Red Note) based on a query",
33 | {
34 | query: z.string().describe("Search query for Xiaohongshu content"),
35 | count: z.number().optional().default(10).describe("Number of results to return")
36 | },
37 | async (params: { query: string; count: number }, extra) => {
38 | const { query, count } = params;
39 | try {
40 | console.error(`Searching Xiaohongshu: ${query}, Count: ${count}`);
41 |
42 | // 1. Fetch search results from Xiaohongshu
43 | const results = await searchXiaohongshu(query, count);
44 |
45 | // 2. Initialize an array for content blocks with our explicit type.
46 | const contentBlocks: ContentBlock[] = [];
47 |
48 | // Add a main header for the search results.
49 | contentBlocks.push({
50 | type: "text",
51 | text: `# Xiaohongshu Search Results for "${query}"\n\nFound ${results.length} related notes.`
52 | });
53 |
54 | // 3. Loop through each note to generate its corresponding text and image blocks.
55 | for (let i = 0; i < results.length; i++) {
56 | const note = results[i];
57 |
58 | // --- Generate text content for the current note ---
59 | // Requirement: Add a number to each note title.
60 | let noteTextContent = `## ${i + 1}. ${note.title}\n\n`;
61 |
62 | // Author information
63 | noteTextContent += `**Author:** ${note.author}`;
64 | if (note.authorDesc) {
65 | noteTextContent += ` (${note.authorDesc})`;
66 | }
67 | noteTextContent += '\n\n';
68 |
69 | // Interaction data
70 | const interactionInfo = [];
71 | if (typeof note.likes !== 'undefined') interactionInfo.push(`👍 ${note.likes}`);
72 | if (typeof note.collects !== 'undefined') interactionInfo.push(`⭐ ${note.collects}`);
73 | if (typeof note.comments !== 'undefined') interactionInfo.push(`💬 ${note.comments}`);
74 | if (interactionInfo.length > 0) {
75 | noteTextContent += `**Interactions:** ${interactionInfo.join(' · ')}\n\n`;
76 | }
77 |
78 | // Note content body
79 | noteTextContent += `### Content\n${note.content.trim()}\n\n`;
80 |
81 | // Tags
82 | if (note.tags && note.tags.length > 0) {
83 | noteTextContent += `**Tags:** ${note.tags.map(tag => `#${tag}`).join(' ')}\n\n`;
84 | }
85 |
86 | // Original Link
87 | noteTextContent += `**Original Link:** ${note.link}`;
88 |
89 | // Add the formatted text block to the array
90 | contentBlocks.push({
91 | type: "text",
92 | text: noteTextContent
93 | });
94 |
95 | // --- Generate resource links for images in the current note ---
96 | if (note.images && note.images.length > 0) {
97 | for (let j = 0; j < note.images.length; j++) {
98 | const imageUrl = note.images[j];
99 |
100 | // Requirement: Number each image in its description text.
101 | // Add each image as a separate resource link object.
102 | contentBlocks.push({
103 | type: "resource",
104 | resource: {
105 | uri: imageUrl,
106 | // The 'text' property is required by the type definition.
107 | text: `Image ${j + 1} for note: "${note.title}"`
108 | }
109 | });
110 | }
111 | }
112 |
113 | // Add a separator block to visually distinguish notes.
114 | contentBlocks.push({
115 | type: "text",
116 | text: "\n\n---\n\n"
117 | });
118 | }
119 |
120 | // 4. Return the structured JSON object containing all content blocks.
121 | return {
122 | content: contentBlocks
123 | };
124 |
125 | } catch (error) {
126 | console.error("Xiaohongshu search error:", error);
127 | return {
128 | content: [{
129 | type: "text",
130 | text: `Error searching Xiaohongshu content: ${error instanceof Error ? error.message : String(error)}`
131 | }],
132 | isError: true
133 | };
134 | }
135 | }
136 | );
137 |
138 | // Start server
139 | async function main() {
140 | try {
141 | const transport = new StdioServerTransport();
142 | await server.connect(transport);
143 | console.error("Xiaohongshu MCP server started, listening for messages via stdio");
144 | } catch (error) {
145 | console.error("Failed to start server:", error);
146 | process.exit(1);
147 | }
148 | }
149 |
150 | main();
```
--------------------------------------------------------------------------------
/src/xiaohongshu.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { chromium, Browser, BrowserContext, Page } from 'playwright';
2 | import * as fs from 'fs/promises';
3 | import * as path from 'path';
4 |
5 | // RedNote interface to support new fields
6 | export interface RedBookNote {
7 | title: string;
8 | content: string;
9 | author: string;
10 | authorDesc?: string;
11 | link: string;
12 | likes?: number;
13 | collects?: number;
14 | comments?: number;
15 | tags?: string[];
16 | images?: string[];
17 | }
18 |
19 | // Cookies file path
20 | const COOKIES_PATH = path.join(process.cwd(), 'cookies', 'xiaohongshu-cookies.json');
21 |
22 | // Check if cookies exist
23 | async function cookiesExist(): Promise<boolean> {
24 | try {
25 | await fs.access(COOKIES_PATH);
26 | return true;
27 | } catch {
28 | return false;
29 | }
30 | }
31 |
32 | // Save cookies
33 | async function saveCookies(context: BrowserContext): Promise<void> {
34 | try {
35 | const cookies = await context.cookies();
36 | const cookiesDir = path.join(process.cwd(), 'cookies');
37 |
38 | await fs.mkdir(cookiesDir, { recursive: true });
39 | await fs.writeFile(COOKIES_PATH, JSON.stringify(cookies, null, 2));
40 | console.error('Successfully saved cookies for next use');
41 | } catch (error) {
42 | console.error('Failed to save cookies:', error);
43 | }
44 | }
45 |
46 | // Load cookies
47 | async function loadCookies(context: BrowserContext): Promise<boolean> {
48 | try {
49 | const cookiesJson = await fs.readFile(COOKIES_PATH, 'utf-8');
50 | const cookies = JSON.parse(cookiesJson);
51 | await context.addCookies(cookies);
52 | console.error('Successfully loaded cookies');
53 | return true;
54 | } catch (error) {
55 | console.error('Failed to load cookies:', error);
56 | return false;
57 | }
58 | }
59 |
60 | // Check login status
61 | async function checkLoginStatus(page: Page): Promise<boolean> {
62 | const loginButtonSelector = '.login-container';
63 |
64 | try {
65 | const currentUrl = page.url();
66 | if (currentUrl.includes('login') || currentUrl.includes('sign')) {
67 | return false;
68 | }
69 |
70 | const loginButton = await page.$(loginButtonSelector);
71 | return !loginButton;
72 | } catch (error) {
73 | console.error('Error checking login status:', error);
74 | return false;
75 | }
76 | }
77 |
78 | // Auto-scroll page to load more content
79 | async function autoScroll(page: Page): Promise<void> {
80 | await page.evaluate(async () => {
81 | await new Promise<void>((resolve) => {
82 | let totalHeight = 0;
83 | const distance = 300;
84 | const maxScrolls = 10; // Limit maximum scroll count
85 | let scrollCount = 0;
86 |
87 | const timer = setInterval(() => {
88 | const scrollHeight = document.body.scrollHeight;
89 | window.scrollBy(0, distance);
90 | totalHeight += distance;
91 | scrollCount++;
92 |
93 | if (totalHeight >= scrollHeight || scrollCount >= maxScrolls) {
94 | clearInterval(timer);
95 | resolve();
96 | }
97 | }, 200); // More reasonable scroll interval
98 | });
99 | });
100 |
101 | // Wait for content to load
102 | await page.waitForTimeout(2000);
103 | }
104 |
105 | // Get detailed content from note detail page
106 | async function getNoteDetailData(page: Page): Promise<{
107 | title: string;
108 | content: string;
109 | author: string;
110 | authorDesc?: string;
111 | images: string[];
112 | likes: number;
113 | collects: number;
114 | comments: number;
115 | tags: string[];
116 | }> {
117 | try {
118 | // Wait for core page elements to load
119 | await page.waitForSelector('#detail-title', { timeout: 3000 })
120 | .catch(() => console.error('Title element not found, page structure may have changed'));
121 |
122 | // Extract detailed note data
123 | return await page.evaluate(() => {
124 | // Get title
125 | const titleElement = document.querySelector('#detail-title');
126 | const title = titleElement ? titleElement.textContent?.trim() || '' : '';
127 |
128 | // Get content text
129 | const contentElement = document.querySelector('#detail-desc .note-text');
130 | let content = '';
131 | if (contentElement) {
132 | // Remove internal tag elements, keep plain text
133 | // Copy node content instead of using innerHTML to avoid HTML tags
134 | Array.from(contentElement.childNodes).forEach(node => {
135 | if (node.nodeType === Node.TEXT_NODE) {
136 | content += node.textContent;
137 | } else if (node.nodeType === Node.ELEMENT_NODE) {
138 | // If it's a tag node and not an a tag (tag link), add content
139 | if ((node as Element).tagName !== 'A') {
140 | content += node.textContent;
141 | }
142 | }
143 | });
144 | }
145 | content = content.trim();
146 |
147 | // Get author information
148 | const authorElement = document.querySelector('.author-wrapper .username');
149 | const author = authorElement ? authorElement.textContent?.trim() || '' : '';
150 |
151 | // Try to get author description (if exists)
152 | const authorDescElement = document.querySelector('.user-desc');
153 | const authorDesc = authorDescElement ? authorDescElement.textContent?.trim() : undefined;
154 |
155 | // Get image list
156 | const imageElements = document.querySelectorAll('.note-slider-img');
157 | const images = Array.from(imageElements).map(img => (img as HTMLImageElement).src);
158 |
159 | // Get interaction data: likes, favorites, comments count
160 | const interactionButtons = document.querySelectorAll('.engage-bar-style .count');
161 | let likes = 0, collects = 0, comments = 0;
162 |
163 | if (interactionButtons.length >= 3) {
164 | const likesText = interactionButtons[0].textContent?.trim() || '0';
165 | const collectsText = interactionButtons[1].textContent?.trim() || '0';
166 | const commentsText = interactionButtons[2].textContent?.trim() || '0';
167 |
168 | // Handle special text like "赞", convert to numbers
169 | likes = likesText === '赞' ? 0 : parseInt(likesText) || 0;
170 | collects = collectsText === '收藏' ? 0 : parseInt(collectsText) || 0;
171 | comments = commentsText === '评论' ? 0 : parseInt(commentsText) || 0;
172 | }
173 |
174 | // Get tag list
175 | const tagElements = document.querySelectorAll('#detail-desc .tag');
176 | const tags = Array.from(tagElements).map(tag => tag.textContent?.trim() || '');
177 |
178 | return {
179 | title,
180 | content,
181 | author,
182 | authorDesc,
183 | images,
184 | likes,
185 | collects,
186 | comments,
187 | tags
188 | };
189 | });
190 | } catch (error) {
191 | console.error('Failed to extract note detail data:', error);
192 | return {
193 | title: '',
194 | content: '',
195 | author: '',
196 | images: [],
197 | likes: 0,
198 | collects: 0,
199 | comments: 0,
200 | tags: []
201 | };
202 | }
203 | }
204 |
205 | // Get note comment data (optional)
206 | async function getComments(page: Page, maxComments: number = 5): Promise<Array<{
207 | author: string;
208 | content: string;
209 | likes: number;
210 | images?: string[];
211 | }>> {
212 | try {
213 | return await page.evaluate((max) => {
214 | const commentItems = document.querySelectorAll('.parent-comment .comment-item');
215 | const comments = [];
216 |
217 | for (let i = 0; i < Math.min(commentItems.length, max); i++) {
218 | const item = commentItems[i];
219 | const authorElement = item.querySelector('.author .name');
220 | const contentElement = item.querySelector('.content .note-text');
221 | const likesElement = item.querySelector('.like .count');
222 |
223 | // Get images in comments (if any)
224 | const imageElements = item.querySelectorAll('.comment-picture img');
225 | const images = Array.from(imageElements).map(img => (img as HTMLImageElement).src);
226 |
227 | if (authorElement && contentElement) {
228 | const author = authorElement.textContent?.trim() || '';
229 | const content = contentElement.textContent?.trim() || '';
230 | const likesText = likesElement?.textContent?.trim() || '0';
231 | const likes = likesText === '赞' ? 0 : parseInt(likesText) || 0;
232 |
233 | comments.push({
234 | author,
235 | content,
236 | likes,
237 | ...(images.length > 0 ? { images } : {})
238 | });
239 | }
240 | }
241 |
242 | return comments;
243 | }, maxComments);
244 | } catch (error) {
245 | console.error('Failed to extract comment data:', error);
246 | return [];
247 | }
248 | }
249 |
250 | // Extract note links from page
251 | async function extractNoteLinks(page: Page, count: number): Promise<Array<{title: string, link: string, author: string}>> {
252 | try {
253 | const links = await page.evaluate((maxCount) => {
254 | const noteElements = Array.from(document.querySelectorAll('.note-item'));
255 | return noteElements.slice(0, maxCount).map(element => {
256 | try {
257 | // Extract title
258 | const titleElement = element.querySelector('.title span') as HTMLElement;
259 |
260 | // Extract link - try to get visible link first, then hidden link
261 | const visibleLinkElement = element.querySelector('a.cover.mask') as HTMLAnchorElement;
262 | const hiddenLinkElement = element.querySelector('a[style="display: none;"]') as HTMLAnchorElement;
263 |
264 | // Extract author
265 | const authorElement = element.querySelector('.card-bottom-wrapper .name span.name') as HTMLElement;
266 |
267 | return {
268 | title: titleElement ? titleElement.innerText.trim() : 'No Title',
269 | // Link path processing: ensure link is complete URL
270 | link: (visibleLinkElement?.href || hiddenLinkElement?.href || '')
271 | .replace(/^\//, 'https://www.xiaohongshu.com/'),
272 | author: authorElement ? authorElement.innerText.trim() : 'Unknown Author'
273 | };
274 | } catch (error) {
275 | console.error('Error extracting note link', error);
276 | return null;
277 | }
278 | });
279 | }, count);
280 |
281 | // Explicitly filter out null values and satisfy TypeScript type checking
282 | return links.filter((item): item is {title: string, link: string, author: string} =>
283 | item !== null && typeof item === 'object' && 'link' in item && !!item.link);
284 | } catch (error) {
285 | console.error('Failed to extract note links:', error);
286 | return [];
287 | }
288 | }
289 |
290 | // Get individual note details based on user-defined count
291 | async function getNoteDetail(context: BrowserContext, noteInfo: {title: string, link: string, author: string}, index: number): Promise<RedBookNote> {
292 | let notePage: Page | null = null;
293 |
294 | try {
295 | console.error(`Starting to get note ${index + 1} details: ${noteInfo.title}`);
296 | notePage = await context.newPage();
297 |
298 | // Set longer timeout
299 | await notePage.goto(noteInfo.link, {
300 | timeout: 30000,
301 | waitUntil: 'domcontentloaded'
302 | });
303 |
304 | // Wait for page to load completely
305 | await notePage.waitForSelector('#noteContainer', { timeout: 15000 })
306 | .catch(() => console.error('Note container not found, trying to continue getting content'));
307 |
308 | // Get detailed data
309 | const detailData = await getNoteDetailData(notePage);
310 | // // Can save screenshot for debugging
311 | // await notePage.screenshot({ path: `note-${index + 1}.png` });
312 |
313 | // Build complete note object
314 | return {
315 | title: detailData.title || noteInfo.title,
316 | content: detailData.content || 'No content',
317 | author: detailData.author || noteInfo.author,
318 | authorDesc: detailData.authorDesc,
319 | link: noteInfo.link,
320 | likes: detailData.likes,
321 | collects: detailData.collects,
322 | comments: detailData.comments,
323 | // Add enhanced data
324 | tags: detailData.tags,
325 | images: detailData.images
326 | };
327 | } catch (error) {
328 | console.error(`Failed to get note ${index + 1} details:`, error);
329 | // Return basic information when error occurs
330 | return {
331 | title: noteInfo.title,
332 | content: 'Failed to get content',
333 | author: noteInfo.author,
334 | link: noteInfo.link
335 | };
336 | } finally {
337 | // Ensure tab is closed
338 | if (notePage) {
339 | await notePage.close().catch(err => console.error('Error closing tab:', err));
340 | }
341 | }
342 | }
343 |
344 | // Extract note content from search page
345 | async function extractNotes(page: Page, count: number, context: BrowserContext): Promise<RedBookNote[]> {
346 | try {
347 | // Get note links list
348 | const noteLinks = await extractNoteLinks(page, count);
349 | console.error(`Found ${noteLinks.length} notes, starting parallel content retrieval`);
350 |
351 | if (noteLinks.length === 0) {
352 | console.error('No note links found, returning empty result');
353 | return [];
354 | }
355 |
356 | // Control concurrency to avoid too many concurrent requests causing blocks
357 | const concurrency = Math.min(3, noteLinks.length);
358 | console.error(`Setting concurrency to ${concurrency}`);
359 |
360 | // Create task queue
361 | const queue = [...noteLinks];
362 | const results: RedBookNote[] = [];
363 |
364 | // Start concurrency number of tasks simultaneously
365 | const workers = Array(concurrency).fill(null).map(async (_, workerIndex) => {
366 | while (queue.length > 0) {
367 | const noteInfo = queue.shift();
368 | if (!noteInfo) break;
369 |
370 | const index = noteLinks.indexOf(noteInfo);
371 | console.error(`Worker ${workerIndex+1} processing note ${index+1}`);
372 |
373 | try {
374 | // Slightly stagger time between workers to reduce simultaneous requests
375 | await new Promise(resolve => setTimeout(resolve, workerIndex * 1000));
376 |
377 | const note = await getNoteDetail(context, noteInfo, index);
378 | if (note) {
379 | results[index] = note; // Maintain original order
380 | }
381 |
382 | // Interval between requests to avoid too frequent requests
383 | await new Promise(resolve => setTimeout(resolve, 2000));
384 | } catch (error) {
385 | console.error(`Failed to process note ${index+1}:`, error);
386 | }
387 | }
388 | });
389 |
390 | // Wait for all workers to complete
391 | await Promise.all(workers);
392 |
393 | // Filter out undefined results and return
394 | return results.filter(note => note !== undefined);
395 | } catch (error) {
396 | console.error('Failed to extract note content:', error);
397 | return [];
398 | }
399 | }
400 |
401 | // Perform search
402 | async function performSearch(page: Page, keyword: string, count: number, context: BrowserContext): Promise<void> {
403 | // Set longer timeout and better waiting strategy
404 | await page.goto(`https://www.xiaohongshu.com/search_result?keyword=${encodeURIComponent(keyword)}`, {
405 | timeout: 30000,
406 | waitUntil: 'domcontentloaded'
407 | });
408 |
409 | // Wait for page to load
410 | await page.waitForSelector('.feeds-container', { timeout: 15000 }).catch(() => {
411 | console.error('Note list container not found, trying to wait longer');
412 | return page.waitForTimeout(5000);
413 | });
414 |
415 | // If need to get more content, scroll page
416 | if (count > 6) {
417 | await autoScroll(page);
418 | }
419 | }
420 |
421 | // Search Xiaohongshu content function
422 | export async function searchXiaohongshu(query: string, count: number = 5): Promise<RedBookNote[]> {
423 | let browser: Browser | null = null;
424 |
425 | try {
426 | const searchKeyword = query;
427 | console.error(`Search keyword: ${searchKeyword}`);
428 |
429 | // Check if login is needed
430 | const needLogin = !await cookiesExist();
431 |
432 | // Create browser instance
433 | browser = await chromium.launch({
434 | headless: !needLogin, // If login needed, show browser
435 | });
436 |
437 | if (needLogin) {
438 | // Handle login process
439 | const loginSuccess = await handleLogin(browser);
440 | if (!loginSuccess) {
441 | throw new Error('User login failed, unable to continue search');
442 | }
443 | }
444 |
445 | await browser.close();
446 |
447 | browser = await chromium.launch({ headless: true });
448 |
449 | // Create new context for search
450 | let context = await browser.newContext({
451 | userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
452 | });
453 |
454 | // Load cookies
455 | await loadCookies(context);
456 |
457 | // Create page
458 | let page = await context.newPage();
459 |
460 | // Verify login status
461 | await page.goto('https://www.xiaohongshu.com/explore');
462 | await page.waitForTimeout(5000);
463 |
464 | const isLoggedIn = await checkLoginStatus(page);
465 |
466 | if (!isLoggedIn) {
467 | console.error('Cookies expired or invalid, need to login again');
468 |
469 | // Close current browser context
470 | await context.close();
471 |
472 | // Need to change browser headless mode, close browser first
473 | await browser.close();
474 |
475 | // Restart browser in headed mode
476 | browser = await chromium.launch({ headless: false });
477 |
478 | // Login again
479 | const loginSuccess = await handleLogin(browser);
480 | if (!loginSuccess) {
481 | throw new Error('User login failed, unable to continue search');
482 | }
483 |
484 | await context.close();
485 |
486 | await browser.close();
487 |
488 | browser = await chromium.launch({ headless: true });
489 |
490 | // After successful login, create new search context
491 | context = await browser.newContext({
492 | userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
493 | });
494 |
495 | // Load latest cookies
496 | await loadCookies(context);
497 |
498 | // Create new page
499 | page = await context.newPage();
500 | }
501 |
502 | // Already logged in, proceed with search
503 | await performSearch(page, searchKeyword, count, context);
504 |
505 | // Get search results - pass context parameter
506 | const notes = await extractNotes(page, count, context);
507 |
508 | // Save latest cookies
509 | await saveCookies(context);
510 |
511 | return notes;
512 | } catch (error) {
513 | console.error('Error searching Xiaohongshu:', error);
514 | throw error;
515 | } finally {
516 | // Ensure browser is closed
517 | if (browser) {
518 | await browser.close();
519 | }
520 | }
521 | }
522 |
523 | // Handle login process
524 | async function handleLogin(browser: Browser): Promise<boolean> {
525 | const context = await browser.newContext({
526 | userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
527 | viewport: { width: 1280, height: 800 },
528 | });
529 |
530 | let page;
531 |
532 | try {
533 | page = await context.newPage();
534 |
535 | // Visit Xiaohongshu homepage
536 | await page.goto('https://www.xiaohongshu.com/explore');
537 | await page.waitForTimeout(5000);
538 |
539 | console.error('Please login to Xiaohongshu in the browser, system will automatically detect login status');
540 |
541 | // Wait for user login operation to complete
542 | let isLoggedIn = false;
543 |
544 | // Check login status every 2 seconds, wait up to 5 minutes
545 | for (let i = 0; i < 150; i++) {
546 | await new Promise(resolve => setTimeout(resolve, 2000));
547 |
548 | isLoggedIn = await checkLoginStatus(page);
549 | if (isLoggedIn) {
550 | console.error('Successfully logged in detected');
551 | break;
552 | }
553 | }
554 |
555 | if (isLoggedIn) {
556 | // Save cookies
557 | await saveCookies(context);
558 | return true;
559 | } else {
560 | console.error('Login timeout, please try again');
561 | return false;
562 | }
563 | } catch (error) {
564 | console.error('Error during user login process:', error);
565 | return false;
566 | } finally {
567 | if (page) await page.close();
568 | await context.close();
569 | }
570 | }
571 |
572 | // Add function to save note data to JSON file
573 | async function saveNotesToFile(notes: RedBookNote[], keyword: string): Promise<void> {
574 | try {
575 | // Create results directory
576 | const resultsDir = path.join(process.cwd(), 'results');
577 | await fs.mkdir(resultsDir, { recursive: true });
578 |
579 | // Generate filename (use timestamp to avoid overwriting)
580 | const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
581 | const fileName = `${resultsDir}/xiaohongshu_${encodeURIComponent(keyword)}_${timestamp}.json`;
582 |
583 | // Write data to file
584 | await fs.writeFile(fileName, JSON.stringify(notes, null, 2), 'utf-8');
585 | console.error(`Note data saved to file: ${fileName}`);
586 | } catch (error) {
587 | console.error('Error saving note data to file:', error);
588 | }
589 | }
```