This is page 3 of 3. Use http://codebase.md/deepspringai/search_mcp_server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .chainlit
│ ├── config.toml
│ └── translations
│ ├── bn.json
│ ├── en-US.json
│ ├── gu.json
│ ├── he-IL.json
│ ├── hi.json
│ ├── ja.json
│ ├── kn.json
│ ├── ml.json
│ ├── mr.json
│ ├── nl.json
│ ├── ta.json
│ ├── te.json
│ └── zh-CN.json
├── .gitignore
├── .python-version
├── chainlit.md
├── Dockerfile
├── embedding_server.log
├── final_response_output.txt
├── pyproject.toml
├── README.md
├── smithery.yaml
├── src
│ ├── parquet_mcp_server
│ │ ├── __init__.py
│ │ ├── chainlit.md
│ │ ├── chatAgent.py
│ │ ├── client.py
│ │ ├── main.py
│ │ └── src
│ │ ├── search_helper.py
│ │ └── supabase_db.py
│ └── tests
│ ├── test_search.py
│ └── test_similarity.py
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/.chainlit/translations/ml.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "common": {
3 | "actions": {
4 | "cancel": "\u0d31\u0d26\u0d4d\u0d26\u0d3e\u0d15\u0d4d\u0d15\u0d41\u0d15",
5 | "confirm": "\u0d38\u0d4d\u0d25\u0d3f\u0d30\u0d40\u0d15\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
6 | "continue": "\u0d24\u0d41\u0d1f\u0d30\u0d41\u0d15",
7 | "goBack": "\u0d24\u0d3f\u0d30\u0d3f\u0d15\u0d46 \u0d2a\u0d4b\u0d15\u0d41\u0d15",
8 | "reset": "\u0d2a\u0d41\u0d28\u0d03\u0d38\u0d1c\u0d4d\u0d1c\u0d2e\u0d3e\u0d15\u0d4d\u0d15\u0d41\u0d15",
9 | "submit": "\u0d38\u0d2e\u0d7c\u0d2a\u0d4d\u0d2a\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15"
10 | },
11 | "status": {
12 | "loading": "\u0d32\u0d4b\u0d21\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d41...",
13 | "error": {
14 | "default": "\u0d12\u0d30\u0d41 \u0d2a\u0d3f\u0d36\u0d15\u0d4d \u0d38\u0d02\u0d2d\u0d35\u0d3f\u0d1a\u0d4d\u0d1a\u0d41",
15 | "serverConnection": "\u0d38\u0d46\u0d7c\u0d35\u0d31\u0d41\u0d2e\u0d3e\u0d2f\u0d3f \u0d2c\u0d28\u0d4d\u0d27\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d3e\u0d7b \u0d15\u0d34\u0d3f\u0d1e\u0d4d\u0d1e\u0d3f\u0d32\u0d4d\u0d32"
16 | }
17 | }
18 | },
19 | "auth": {
20 | "login": {
21 | "title": "\u0d06\u0d2a\u0d4d\u0d2a\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d15\u0d4d\u0d15\u0d3e\u0d7b \u0d32\u0d4b\u0d17\u0d3f\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15",
22 | "form": {
23 | "email": {
24 | "label": "\u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d35\u0d3f\u0d32\u0d3e\u0d38\u0d02",
25 | "required": "\u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d12\u0d30\u0d41 \u0d06\u0d35\u0d36\u0d4d\u0d2f\u0d2e\u0d3e\u0d2f \u0d2b\u0d40\u0d7d\u0d21\u0d4d \u0d06\u0d23\u0d4d"
26 | },
27 | "password": {
28 | "label": "\u0d2a\u0d3e\u0d38\u0d4d\u200c\u0d35\u0d47\u0d21\u0d4d",
29 | "required": "\u0d2a\u0d3e\u0d38\u0d4d\u200c\u0d35\u0d47\u0d21\u0d4d \u0d12\u0d30\u0d41 \u0d06\u0d35\u0d36\u0d4d\u0d2f\u0d2e\u0d3e\u0d2f \u0d2b\u0d40\u0d7d\u0d21\u0d4d \u0d06\u0d23\u0d4d"
30 | },
31 | "actions": {
32 | "signin": "\u0d38\u0d48\u0d7b \u0d07\u0d7b"
33 | },
34 | "alternativeText": {
35 | "or": "\u0d05\u0d32\u0d4d\u0d32\u0d46\u0d19\u0d4d\u0d15\u0d3f\u0d7d"
36 | }
37 | },
38 | "errors": {
39 | "default": "\u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d15\u0d34\u0d3f\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d3f\u0d32\u0d4d\u0d32",
40 | "signin": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
41 | "oauthSignin": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
42 | "redirectUriMismatch": "\u0d31\u0d40\u0d21\u0d2f\u0d31\u0d15\u0d4d\u0d1f\u0d4d URI oauth \u0d06\u0d2a\u0d4d\u0d2a\u0d4d \u0d15\u0d4b\u0d7a\u0d2b\u0d3f\u0d17\u0d31\u0d47\u0d37\u0d28\u0d41\u0d2e\u0d3e\u0d2f\u0d3f \u0d2a\u0d4a\u0d30\u0d41\u0d24\u0d4d\u0d24\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d41\u0d28\u0d4d\u0d28\u0d3f\u0d32\u0d4d\u0d32",
43 | "oauthCallback": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
44 | "oauthCreateAccount": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
45 | "emailCreateAccount": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
46 | "callback": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
47 | "oauthAccountNotLinked": "\u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d35\u0d4d\u0d2f\u0d15\u0d4d\u0d24\u0d3f\u0d24\u0d4d\u0d35\u0d02 \u0d38\u0d4d\u0d25\u0d3f\u0d30\u0d40\u0d15\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d3e\u0d7b, \u0d06\u0d26\u0d4d\u0d2f\u0d02 \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a \u0d05\u0d24\u0d47 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15",
48 | "emailSignin": "\u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d05\u0d2f\u0d2f\u0d4d\u0d15\u0d4d\u0d15\u0d3e\u0d7b \u0d15\u0d34\u0d3f\u0d1e\u0d4d\u0d1e\u0d3f\u0d32\u0d4d\u0d32",
49 | "emailVerify": "\u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d2a\u0d30\u0d3f\u0d36\u0d4b\u0d27\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15, \u0d12\u0d30\u0d41 \u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d05\u0d2f\u0d1a\u0d4d\u0d1a\u0d3f\u0d1f\u0d4d\u0d1f\u0d41\u0d23\u0d4d\u0d1f\u0d4d",
50 | "credentialsSignin": "\u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d2a\u0d30\u0d3e\u0d1c\u0d2f\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d4d\u0d1f\u0d41. \u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d7e \u0d28\u0d7d\u0d15\u0d3f\u0d2f \u0d35\u0d3f\u0d35\u0d30\u0d19\u0d4d\u0d19\u0d7e \u0d36\u0d30\u0d3f\u0d2f\u0d3e\u0d23\u0d46\u0d28\u0d4d\u0d28\u0d4d \u0d2a\u0d30\u0d3f\u0d36\u0d4b\u0d27\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
51 | "sessionRequired": "\u0d08 \u0d2a\u0d47\u0d1c\u0d4d \u0d06\u0d15\u0d4d\u0d38\u0d38\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d26\u0d2f\u0d35\u0d3e\u0d2f\u0d3f \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15"
52 | }
53 | },
54 | "provider": {
55 | "continue": "{{provider}} \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d24\u0d41\u0d1f\u0d30\u0d41\u0d15"
56 | }
57 | },
58 | "chat": {
59 | "input": {
60 | "placeholder": "\u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d38\u0d28\u0d4d\u0d26\u0d47\u0d36\u0d02 \u0d07\u0d35\u0d3f\u0d1f\u0d46 \u0d1f\u0d48\u0d2a\u0d4d\u0d2a\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15...",
61 | "actions": {
62 | "send": "\u0d38\u0d28\u0d4d\u0d26\u0d47\u0d36\u0d02 \u0d05\u0d2f\u0d2f\u0d4d\u0d15\u0d4d\u0d15\u0d41\u0d15",
63 | "stop": "\u0d1f\u0d3e\u0d38\u0d4d\u0d15\u0d4d \u0d28\u0d3f\u0d7c\u0d24\u0d4d\u0d24\u0d41\u0d15",
64 | "attachFiles": "\u0d2b\u0d2f\u0d32\u0d41\u0d15\u0d7e \u0d05\u0d31\u0d4d\u0d31\u0d3e\u0d1a\u0d4d\u0d1a\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15"
65 | }
66 | },
67 | "speech": {
68 | "start": "\u0d31\u0d46\u0d15\u0d4d\u0d15\u0d4b\u0d7c\u0d21\u0d3f\u0d02\u0d17\u0d4d \u0d06\u0d30\u0d02\u0d2d\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
69 | "stop": "\u0d31\u0d46\u0d15\u0d4d\u0d15\u0d4b\u0d7c\u0d21\u0d3f\u0d02\u0d17\u0d4d \u0d28\u0d3f\u0d7c\u0d24\u0d4d\u0d24\u0d41\u0d15",
70 | "connecting": "\u0d2c\u0d28\u0d4d\u0d27\u0d3f\u0d2a\u0d4d\u0d2a\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d28\u0d4d\u0d28\u0d41"
71 | },
72 | "fileUpload": {
73 | "dragDrop": "\u0d2b\u0d2f\u0d32\u0d41\u0d15\u0d7e \u0d07\u0d35\u0d3f\u0d1f\u0d46 \u0d35\u0d32\u0d3f\u0d1a\u0d4d\u0d1a\u0d3f\u0d1f\u0d41\u0d15",
74 | "browse": "\u0d2b\u0d2f\u0d32\u0d41\u0d15\u0d7e \u0d24\u0d3f\u0d30\u0d2f\u0d41\u0d15",
75 | "sizeLimit": "\u0d2a\u0d30\u0d3f\u0d27\u0d3f:",
76 | "errors": {
77 | "failed": "\u0d05\u0d2a\u0d4d\u200c\u0d32\u0d4b\u0d21\u0d4d \u0d2a\u0d30\u0d3e\u0d1c\u0d2f\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d4d\u0d1f\u0d41",
78 | "cancelled": "\u0d05\u0d2a\u0d4d\u200c\u0d32\u0d4b\u0d21\u0d4d \u0d31\u0d26\u0d4d\u0d26\u0d3e\u0d15\u0d4d\u0d15\u0d3f"
79 | }
80 | },
81 | "messages": {
82 | "status": {
83 | "using": "\u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d28\u0d4d\u0d28\u0d41",
84 | "used": "\u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d41"
85 | },
86 | "actions": {
87 | "copy": {
88 | "button": "\u0d15\u0d4d\u0d32\u0d3f\u0d2a\u0d4d\u0d2a\u0d4d\u0d2c\u0d4b\u0d7c\u0d21\u0d3f\u0d32\u0d47\u0d15\u0d4d\u0d15\u0d4d \u0d2a\u0d15\u0d7c\u0d24\u0d4d\u0d24\u0d41\u0d15",
89 | "success": "\u0d2a\u0d15\u0d7c\u0d24\u0d4d\u0d24\u0d3f!"
90 | }
91 | },
92 | "feedback": {
93 | "positive": "\u0d38\u0d39\u0d3e\u0d2f\u0d15\u0d30\u0d02",
94 | "negative": "\u0d38\u0d39\u0d3e\u0d2f\u0d15\u0d30\u0d2e\u0d32\u0d4d\u0d32",
95 | "edit": "\u0d2b\u0d40\u0d21\u0d4d\u0d2c\u0d3e\u0d15\u0d4d\u0d15\u0d4d \u0d0e\u0d21\u0d3f\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15",
96 | "dialog": {
97 | "title": "\u0d12\u0d30\u0d41 \u0d15\u0d2e\u0d28\u0d4d\u0d31\u0d4d \u0d1a\u0d47\u0d7c\u0d15\u0d4d\u0d15\u0d41\u0d15",
98 | "submit": "\u0d2b\u0d40\u0d21\u0d4d\u0d2c\u0d3e\u0d15\u0d4d\u0d15\u0d4d \u0d38\u0d2e\u0d7c\u0d2a\u0d4d\u0d2a\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15"
99 | },
100 | "status": {
101 | "updating": "\u0d05\u0d2a\u0d4d\u0d21\u0d47\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d41",
102 | "updated": "\u0d2b\u0d40\u0d21\u0d4d\u0d2c\u0d3e\u0d15\u0d4d\u0d15\u0d4d \u0d05\u0d2a\u0d4d\u0d21\u0d47\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d24\u0d41"
103 | }
104 | }
105 | },
106 | "history": {
107 | "title": "\u0d05\u0d35\u0d38\u0d3e\u0d28 \u0d07\u0d7b\u0d2a\u0d41\u0d1f\u0d4d\u0d1f\u0d41\u0d15\u0d7e",
108 | "empty": "\u0d12\u0d28\u0d4d\u0d28\u0d41\u0d2e\u0d3f\u0d32\u0d4d\u0d32...",
109 | "show": "\u0d39\u0d3f\u0d38\u0d4d\u0d31\u0d4d\u0d31\u0d31\u0d3f \u0d15\u0d3e\u0d23\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15"
110 | },
111 | "settings": {
112 | "title": "\u0d15\u0d4d\u0d30\u0d2e\u0d40\u0d15\u0d30\u0d23\u0d19\u0d4d\u0d19\u0d7e \u0d2a\u0d3e\u0d28\u0d7d"
113 | },
114 | "watermark": "\u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d28\u0d3f\u0d7c\u0d2e\u0d4d\u0d2e\u0d3f\u0d1a\u0d4d\u0d1a\u0d24\u0d4d"
115 | },
116 | "threadHistory": {
117 | "sidebar": {
118 | "title": "\u0d2e\u0d41\u0d7b \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d41\u0d15\u0d7e",
119 | "filters": {
120 | "search": "\u0d24\u0d3f\u0d30\u0d2f\u0d41\u0d15",
121 | "placeholder": "Search conversations..."
122 | },
123 | "timeframes": {
124 | "today": "\u0d07\u0d28\u0d4d\u0d28\u0d4d",
125 | "yesterday": "\u0d07\u0d28\u0d4d\u0d28\u0d32\u0d46",
126 | "previous7days": "\u0d15\u0d34\u0d3f\u0d1e\u0d4d\u0d1e 7 \u0d26\u0d3f\u0d35\u0d38\u0d02",
127 | "previous30days": "\u0d15\u0d34\u0d3f\u0d1e\u0d4d\u0d1e 30 \u0d26\u0d3f\u0d35\u0d38\u0d02"
128 | },
129 | "empty": "\u0d24\u0d4d\u0d30\u0d46\u0d21\u0d41\u0d15\u0d7e \u0d15\u0d23\u0d4d\u0d1f\u0d46\u0d24\u0d4d\u0d24\u0d3f\u0d2f\u0d3f\u0d32\u0d4d\u0d32",
130 | "actions": {
131 | "close": "\u0d38\u0d48\u0d21\u0d4d\u0d2c\u0d3e\u0d7c \u0d05\u0d1f\u0d2f\u0d4d\u0d15\u0d4d\u0d15\u0d41\u0d15",
132 | "open": "\u0d38\u0d48\u0d21\u0d4d\u0d2c\u0d3e\u0d7c \u0d24\u0d41\u0d31\u0d15\u0d4d\u0d15\u0d41\u0d15"
133 | }
134 | },
135 | "thread": {
136 | "untitled": "\u0d2a\u0d47\u0d30\u0d3f\u0d32\u0d4d\u0d32\u0d3e\u0d24\u0d4d\u0d24 \u0d38\u0d02\u0d2d\u0d3e\u0d37\u0d23\u0d02",
137 | "menu": {
138 | "rename": "Rename",
139 | "delete": "Delete"
140 | },
141 | "actions": {
142 | "delete": {
143 | "title": "\u0d21\u0d3f\u0d32\u0d40\u0d31\u0d4d\u0d31\u0d4d \u0d38\u0d4d\u0d25\u0d3f\u0d30\u0d40\u0d15\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
144 | "description": "\u0d07\u0d24\u0d4d \u0d24\u0d4d\u0d30\u0d46\u0d21\u0d41\u0d02 \u0d05\u0d24\u0d3f\u0d28\u0d4d\u0d31\u0d46 \u0d38\u0d28\u0d4d\u0d26\u0d47\u0d36\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d02 \u0d18\u0d1f\u0d15\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d02 \u0d21\u0d3f\u0d32\u0d40\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d02. \u0d08 \u0d2a\u0d4d\u0d30\u0d35\u0d7c\u0d24\u0d4d\u0d24\u0d3f \u0d2a\u0d34\u0d2f\u0d2a\u0d1f\u0d3f\u0d2f\u0d3e\u0d15\u0d4d\u0d15\u0d3e\u0d7b \u0d15\u0d34\u0d3f\u0d2f\u0d3f\u0d32\u0d4d\u0d32",
145 | "success": "\u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d \u0d21\u0d3f\u0d32\u0d40\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d24\u0d41",
146 | "inProgress": "\u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d \u0d21\u0d3f\u0d32\u0d40\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d41"
147 | },
148 | "rename": {
149 | "title": "\u0d24\u0d4d\u0d30\u0d46\u0d21\u0d4d \u0d2a\u0d41\u0d28\u0d7c\u0d28\u0d3e\u0d2e\u0d15\u0d30\u0d23\u0d02 \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15",
150 | "description": "\u0d08 \u0d24\u0d4d\u0d30\u0d46\u0d21\u0d3f\u0d28\u0d4d \u0d12\u0d30\u0d41 \u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d2a\u0d47\u0d30\u0d4d \u0d28\u0d7d\u0d15\u0d41\u0d15",
151 | "form": {
152 | "name": {
153 | "label": "\u0d2a\u0d47\u0d30\u0d4d",
154 | "placeholder": "\u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d2a\u0d47\u0d30\u0d4d \u0d28\u0d7d\u0d15\u0d41\u0d15"
155 | }
156 | },
157 | "success": "\u0d24\u0d4d\u0d30\u0d46\u0d21\u0d4d \u0d2a\u0d41\u0d28\u0d7c\u0d28\u0d3e\u0d2e\u0d15\u0d30\u0d23\u0d02 \u0d1a\u0d46\u0d2f\u0d4d\u0d24\u0d41!",
158 | "inProgress": "\u0d24\u0d4d\u0d30\u0d46\u0d21\u0d4d \u0d2a\u0d41\u0d28\u0d7c\u0d28\u0d3e\u0d2e\u0d15\u0d30\u0d23\u0d02 \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d41"
159 | }
160 | }
161 | }
162 | },
163 | "navigation": {
164 | "header": {
165 | "chat": "\u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d",
166 | "readme": "\u0d35\u0d3e\u0d2f\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
167 | "theme": {
168 | "light": "Light Theme",
169 | "dark": "Dark Theme",
170 | "system": "Follow System"
171 | }
172 | },
173 | "newChat": {
174 | "button": "\u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d",
175 | "dialog": {
176 | "title": "\u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d \u0d38\u0d43\u0d37\u0d4d\u0d1f\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
177 | "description": "\u0d07\u0d24\u0d4d \u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d28\u0d3f\u0d32\u0d35\u0d3f\u0d32\u0d46 \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d \u0d39\u0d3f\u0d38\u0d4d\u0d31\u0d4d\u0d31\u0d31\u0d3f \u0d2e\u0d3e\u0d2f\u0d4d\u0d15\u0d4d\u0d15\u0d41\u0d02. \u0d24\u0d41\u0d1f\u0d30\u0d3e\u0d7b \u0d24\u0d3e\u0d7d\u0d2a\u0d4d\u0d2a\u0d30\u0d4d\u0d2f\u0d2e\u0d41\u0d23\u0d4d\u0d1f\u0d4b?",
178 | "tooltip": "\u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d"
179 | }
180 | },
181 | "user": {
182 | "menu": {
183 | "settings": "\u0d15\u0d4d\u0d30\u0d2e\u0d40\u0d15\u0d30\u0d23\u0d19\u0d4d\u0d19\u0d7e",
184 | "settingsKey": "S",
185 | "apiKeys": "API \u0d15\u0d40\u0d15\u0d7e",
186 | "logout": "\u0d32\u0d4b\u0d17\u0d4d\u0d14\u0d1f\u0d4d\u0d1f\u0d4d"
187 | }
188 | }
189 | },
190 | "apiKeys": {
191 | "title": "\u0d06\u0d35\u0d36\u0d4d\u0d2f\u0d2e\u0d3e\u0d2f API \u0d15\u0d40\u0d15\u0d7e",
192 | "description": "\u0d08 \u0d06\u0d2a\u0d4d\u0d2a\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d15\u0d4d\u0d15\u0d3e\u0d7b, \u0d24\u0d3e\u0d34\u0d46\u0d2a\u0d4d\u0d2a\u0d31\u0d2f\u0d41\u0d28\u0d4d\u0d28 API \u0d15\u0d40\u0d15\u0d7e \u0d06\u0d35\u0d36\u0d4d\u0d2f\u0d2e\u0d3e\u0d23\u0d4d. \u0d15\u0d40\u0d15\u0d7e \u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d09\u0d2a\u0d15\u0d30\u0d23\u0d24\u0d4d\u0d24\u0d3f\u0d28\u0d4d\u0d31\u0d46 \u0d32\u0d4b\u0d15\u0d4d\u0d15\u0d7d \u0d38\u0d4d\u0d31\u0d4d\u0d31\u0d4b\u0d31\u0d47\u0d1c\u0d3f\u0d7d \u0d38\u0d02\u0d2d\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d41\u0d28\u0d4d\u0d28\u0d41.",
193 | "success": {
194 | "saved": "\u0d35\u0d3f\u0d1c\u0d2f\u0d15\u0d30\u0d2e\u0d3e\u0d2f\u0d3f \u0d38\u0d02\u0d30\u0d15\u0d4d\u0d37\u0d3f\u0d1a\u0d4d\u0d1a\u0d41"
195 | }
196 | },
197 | "alerts": {
198 | "info": "Info",
199 | "note": "Note",
200 | "tip": "Tip",
201 | "important": "Important",
202 | "warning": "Warning",
203 | "caution": "Caution",
204 | "debug": "Debug",
205 | "example": "Example",
206 | "success": "Success",
207 | "help": "Help",
208 | "idea": "Idea",
209 | "pending": "Pending",
210 | "security": "Security",
211 | "beta": "Beta",
212 | "best-practice": "Best Practice"
213 | }
214 | }
```
--------------------------------------------------------------------------------
/src/parquet_mcp_server/src/search_helper.py:
--------------------------------------------------------------------------------
```python
1 | import json
2 | import logging
3 | import tempfile
4 | from pathlib import Path
5 | import re
6 | import requests
7 | import os
8 | from dotenv import load_dotenv
9 | import os
10 | import time
11 | from firecrawl import FirecrawlApp
12 | from datetime import datetime # Import datetime module
13 | import numpy as np
14 | from langchain_ollama import ChatOllama
15 | from langchain_core.messages import HumanMessage
16 | from parquet_mcp_server.client import perform_search_and_scrape_async
17 | import asyncio
18 | from parquet_mcp_server.src.supabase_db import SupabaseDB
19 | import time
20 | import random
21 | import hashlib
22 |
23 | def generate_unique_id():
24 | # Get the current time in milliseconds
25 | current_time = int(time.time() * 1000)
26 |
27 | # Generate a random number
28 | random_number = random.randint(1000, 9999)
29 |
30 | # Combine the time and random number to form a unique string
31 | unique_string = f"{current_time}-{random_number}"
32 |
33 | # Optionally, hash the string to shorten or obscure the ID
34 | unique_id = hashlib.sha256(unique_string.encode()).hexdigest()
35 |
36 | return unique_id
37 |
38 |
39 | # Set up logging
40 | logging.basicConfig(
41 | level=logging.INFO,
42 | format='%(asctime)s - %(levelname)s - %(message)s'
43 | )
44 |
45 | # Load environment variables from .env file
46 | load_dotenv()
47 |
48 | # Configuration
49 | USE_SUPABASE = os.getenv("USE_SUPABASE", "true").lower() == "true"
50 | JSON_FILE_PATH = os.getenv("JSON_FILE_PATH", "output.json")
51 |
52 | # Initialize storage based on configuration
53 | if USE_SUPABASE:
54 | db = SupabaseDB()
55 | else:
56 | db = None
57 |
58 | # Initialize Ollama LangChain model
59 | ollama_model = ChatOllama(
60 | base_url=os.getenv("OLLAMA_URL"),
61 | model="llama3.1:8b",
62 | )
63 |
64 |
65 | def chunk_text(text: str, chunk_size: int = 500) -> list:
66 | """
67 | Split text into chunks of specified size.
68 |
69 | Args:
70 | text (str): Text to split
71 | chunk_size (int): Number of characters per chunk
72 |
73 | Returns:
74 | list: List of text chunks
75 | """
76 | return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
77 |
78 | def get_embedding(texts: list) -> list:
79 | """
80 | Fetch embeddings for a batch of texts from the embedding server.
81 |
82 | Args:
83 | texts (list): A list of texts to generate embeddings for.
84 | url (str): The URL of the embedding server.
85 |
86 | Returns:
87 | list: A list of embeddings corresponding to the input texts.
88 | """
89 | model = "bge-m3" # Changed from nomic-embed-text to bge-m3
90 | payload = {
91 | "model": model,
92 | "input": texts # Pass all texts in a batch
93 | }
94 | logging.info('Make Embedding')
95 | try:
96 | response = requests.post(os.getenv('EMBEDDING_URL'), json=payload, verify=False)
97 | if response.status_code == 200:
98 | result = response.json()
99 | if 'embeddings' in result:
100 | return result['embeddings']
101 | else:
102 | logging.error(f"No embeddings found in response: {result}")
103 | return []
104 | else:
105 | logging.error(f"Error: {response.status_code}, {response.text}")
106 | return []
107 | except Exception as e:
108 | logging.error(f"Exception during request: {str(e)}")
109 | return []
110 |
111 | def search_web(query, page=1):
112 | """
113 | Perform a web search using the SearchAPI.io API.
114 |
115 | Args:
116 | query (str): The search query
117 | page (int): The page number for pagination (default: 1)
118 |
119 | Returns:
120 | tuple: A tuple containing (organic_results, related_searches)
121 | - organic_results: List of search results
122 | - related_searches: List of related search queries
123 |
124 | Raises:
125 | ValueError: If API key is not found in environment variables
126 | requests.exceptions.RequestException: If there's an error making the HTTP request
127 | json.JSONDecodeError: If there's an error parsing the JSON response
128 | """
129 | url = "https://www.searchapi.io/api/v1/search"
130 |
131 | # Get API key from environment variables
132 | api_key = os.getenv("SEARCHAPI_API_KEY")
133 | if not api_key:
134 | logging.error("API key not found in environment variables")
135 | raise ValueError("API key not found. Please set SEARCHAPI_API_KEY in your .env file.")
136 |
137 | params = {
138 | "engine": "google",
139 | "q": query,
140 | "api_key": api_key,
141 | "page": page,
142 | "num": 3
143 | }
144 |
145 | try:
146 | logging.info(f"Making search request for query: {query}")
147 | response = requests.get(url, params=params)
148 | response.raise_for_status()
149 |
150 | data = response.json()
151 | organic_results = data.get("organic_results", [])
152 | related_searches = data.get("related_searches", [])
153 |
154 | logging.info(f"Search successful for query: {query}")
155 | return organic_results, related_searches
156 |
157 | except requests.exceptions.RequestException as e:
158 | logging.error(f"Error making search request: {str(e)}")
159 | raise RuntimeError(f"Request error: {str(e)}") from e
160 | except json.JSONDecodeError as e:
161 | logging.error(f"Error parsing JSON response: {str(e)}")
162 | raise ValueError(f"JSON parsing error: {str(e)}") from e
163 |
164 | def get_links(markdown_content):
165 | """
166 | Filter markdown content by extracting all links and returning both the filtered content
167 | and a list of all links found.
168 |
169 | Args:
170 | markdown_content (str): The markdown content to filter
171 |
172 | Returns:
173 | tuple: A tuple containing (filtered_content, links)
174 | - filtered_content: The markdown content with all links removed
175 | - links: A list of all links found in the content
176 | """
177 | # Regular expression to match markdown links
178 | link_pattern = r'\[([^\]]+)\]\(([^)]+)\)|<([^>]+)>'
179 |
180 | # Find all links in the content
181 | link_matches = re.findall(link_pattern, markdown_content)
182 |
183 | # Extract the actual URLs from the matches
184 | links = []
185 | for match in link_matches:
186 | if isinstance(match, tuple):
187 | # For [text](url) format, the URL is the second element
188 | links.append(match[1])
189 | else:
190 | # For <url> format, the URL is the entire match
191 | links.append(match)
192 |
193 |
194 | # Clean up any double newlines that might have been created
195 | return links
196 |
197 | def remove_markdown_links(text):
198 | # Remove markdown links while preserving the text
199 | text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
200 |
201 | # Remove any remaining URLs with percent encoding
202 | text = re.sub(r'https?://[^\s\]]+%[^\s\]]+', '', text)
203 |
204 | # Remove any remaining standalone URLs
205 | text = re.sub(r'https?://\S+', '', text)
206 |
207 | # Clean up any double newlines that might have been created
208 | text = re.sub(r'\n\s*\n', '\n\n', text)
209 |
210 | return text
211 |
212 | def scrape_urls(organic_results):
213 | """
214 | Scrape each URL from the organic search results using Firecrawl API.
215 |
216 | Args:
217 | organic_results (list): List of organic search results
218 |
219 | Returns:
220 | dict: Dictionary mapping URLs to their scrape status and content
221 | """
222 | # Get Firecrawl API key from environment variables
223 | firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
224 | if not firecrawl_api_key:
225 | raise ValueError("Firecrawl API key not found. Please set FIRECRAWL_API_KEY in your .env file.")
226 |
227 | # Initialize FirecrawlApp
228 | app = FirecrawlApp(api_key=firecrawl_api_key)
229 |
230 | # Dictionary to store scrape results for each URL
231 | scrape_results = {}
232 |
233 | # Scrape each URL
234 | for i, result in enumerate(organic_results):
235 | url = result.get("link")
236 | if not url:
237 | continue
238 |
239 | logging.info(f"Scraping URL {i+1}/{len(organic_results)}: {url}")
240 |
241 | try:
242 | # Scrape the URL
243 | scrape_status = app.scrape_url(
244 | url,
245 | params={'formats': ['markdown']}
246 | )
247 |
248 | # Store the scrape status and content if successful
249 | if scrape_status['metadata']['statusCode'] == 200:
250 | scrape_results[url] = {
251 | 'status': scrape_status['metadata']['statusCode'],
252 | 'content': scrape_status['markdown']
253 | }
254 | logging.info(f"Successfully scraped {url}")
255 | else:
256 | scrape_results[url] = {
257 | 'status': scrape_status['metadata']['statusCode'],
258 | 'error': f"Scraping failed with status: {scrape_status.status}"
259 | }
260 | logging.warning(f"Scraping failed with status: {scrape_status.status}")
261 |
262 | # Add a delay between requests to avoid rate limiting
263 | time.sleep(2)
264 |
265 | except Exception as e:
266 | logging.error(f"Error scraping {url}: {e}")
267 | raise Exception(f"Error scraping {url}: {e}")
268 |
269 | return scrape_results
270 |
271 |
272 | async def perform_search_and_scrape(search_queries: list[str], page_number: int = 1) -> tuple[bool, str]:
273 | """
274 | Perform searches and scrape URLs from the organic results for multiple queries.
275 |
276 | Args:
277 | search_queries (list[str]): The list of search queries to use.
278 | page_number (int): The page number for the search results.
279 |
280 | Returns:
281 | tuple[bool, str]: (success status, message)
282 | """
283 | # Example usage
284 | unique_id = generate_unique_id()
285 |
286 | all_results = [] # List to store all results with text and embeddings
287 |
288 | for search_query in search_queries:
289 | try:
290 | organic_results, related_searches = search_web(search_query, page_number)
291 | except Exception as e:
292 | return False, f"Error in SearchAPI: {str(e)}"
293 |
294 | # Log the search query results
295 | logging.info(f"Results for query '{search_query}' retrieved.")
296 |
297 | # Scrape URLs and save content as markdown files
298 | if organic_results:
299 | logging.info(f"Scraping URLs from organic search results for query '{search_query}'...")
300 | try:
301 | scrape_results = scrape_urls(organic_results)
302 | except Exception as e:
303 | return False, f"Error in Scraping {str(e)}"
304 |
305 | # Process and save markdown content for successful scrapes
306 | for i, (url, result) in enumerate(scrape_results.items()):
307 | if result['status'] == 200:
308 | # Filter the markdown content
309 | links = get_links(result['content'])
310 | logging.info(f"Found {len(links)} links in {url}")
311 |
312 | # Remove markdown links from the content
313 | filtered_content = remove_markdown_links(result['content'])
314 |
315 | # Chunk the text
316 | chunks = chunk_text(filtered_content, chunk_size=500)
317 |
318 | # Generate embeddings for all chunks
319 | embeddings = get_embedding(chunks) # Get embeddings for all chunks
320 |
321 | # Combine text and embeddings into the result structure
322 | current_date = datetime.now().strftime("%Y-%m-%d") # Get current date as string
323 | for chunk, embed in zip(chunks, embeddings):
324 | result_data = {
325 | 'text': chunk,
326 | 'metadata': {
327 | 'url': url,
328 | 'date': current_date,
329 | 'query': search_query,
330 | 'search_id': unique_id
331 | },
332 | 'embedding': embed # Changed from 'embed' to 'embedding' to match Supabase schema
333 | }
334 | all_results.append(result_data)
335 |
336 | # Save to storage based on configuration
337 | if USE_SUPABASE:
338 | db_result = db.add_new_data(result_data)
339 | logging.info(f"Saved to Supabase")
340 | # Save all results to a JSON file
341 | with open(JSON_FILE_PATH, 'w', encoding='utf-8') as output_file:
342 | json.dump(all_results, output_file, ensure_ascii=False, indent=4)
343 |
344 | logging.info(f"All results saved to {JSON_FILE_PATH}")
345 | return await find_similar_chunks(search_queries)
346 |
347 |
348 | async def summary_with_ollama(text: str, user_query: str) -> str:
349 | """
350 | Process text with the Ollama model in chunks and ensure the final result is under 4000 characters.
351 |
352 | Args:
353 | text (str): The complete text to process
354 | user_query (str): The user's query
355 |
356 | Returns:
357 | str: Final response from the model under 4000 characters
358 | """
359 | logging.info("Starting summary_with_ollama processing")
360 |
361 | async def process_chunk(chunk: str) -> str:
362 | """Process a single chunk with the Ollama model."""
363 | try:
364 | prompt_content = f"This is the user input query: {user_query}\nand this is the extracted information from the internet. Please summarize the results but mention all the information related to user query. Don't forget to add the sources links: \n{chunk}"
365 | chunk_response = await ollama_model.ainvoke([HumanMessage(content=prompt_content)])
366 | return chunk_response.content
367 | except Exception as e:
368 | logging.error(f"Error processing chunk: {str(e)}")
369 | return ""
370 |
371 | async def process_text_in_chunks(input_text: str) -> str:
372 | """Process text in chunks and combine results."""
373 | chunk_size = 3000
374 | chunks = [input_text[i:i + chunk_size] for i in range(0, len(input_text), chunk_size)]
375 | logging.info(f"Split text into {len(chunks)} chunks of size {chunk_size}")
376 |
377 | # Process all chunks concurrently
378 | chunk_tasks = [process_chunk(chunk) for chunk in chunks]
379 | chunk_responses = await asyncio.gather(*chunk_tasks)
380 |
381 | # Combine all responses
382 | combined_response = "\n\n\n------------------------------------------------ \n\n\n".join(chunk_responses)
383 | logging.info(f"Combined response length: {len(combined_response)}")
384 |
385 | return combined_response
386 |
387 | # First pass: process the original text
388 | first_pass_result = await process_text_in_chunks(text)
389 |
390 | # If the result is still too long, process it again
391 | if len(first_pass_result) > 4000:
392 | logging.info("First pass result too long, processing again")
393 | final_result = await process_text_in_chunks(first_pass_result)
394 | else:
395 | final_result = first_pass_result
396 |
397 | logging.info(f"Final result length: {len(final_result)}")
398 | return final_result
399 |
400 | async def find_similar_chunks(queries: list[str]) -> tuple[bool, str]:
401 | """
402 | Get information from the results of a previous search using either Supabase or JSON file.
403 |
404 | Args:
405 | queries (list[str]): List of search queries to merge.
406 |
407 | Returns:
408 | tuple[bool, str]: (success status, message with similar text chunks)
409 | """
410 | logging.info(f"Starting find_similar_chunks with queries: {queries}")
411 |
412 | # Merge queries with 'and'
413 | merged_query = ' and '.join(queries)
414 | logging.info(f"Merged query: {merged_query}")
415 |
416 | try:
417 | # Get query embedding
418 | logging.info("Generating query embedding")
419 | query_embeddings = get_embedding([merged_query])
420 | if not query_embeddings:
421 | logging.error("Failed to generate query embedding")
422 | return False, "Failed to generate query embedding"
423 | logging.info("Successfully generated query embedding")
424 |
425 | if USE_SUPABASE:
426 | # Use Supabase similarity search
427 | logging.info("Performing similarity search in Supabase Database")
428 | similar_results = db.search_results_by_similarity(
429 | query_embedding=query_embeddings[0],
430 | threshold=0.55,
431 | match_count=100
432 | )
433 |
434 | if not similar_results["success"]:
435 | logging.error(f"Error in similarity search: {similar_results['error']}")
436 | return False, f"Error in similarity search: {similar_results['error']}"
437 |
438 | data = similar_results["data"]
439 | else:
440 | # Use JSON file for similarity search
441 | logging.info(f"Loading JSON file from {JSON_FILE_PATH}")
442 | try:
443 | with open(JSON_FILE_PATH, 'r', encoding='utf-8') as f:
444 | all_data = json.load(f)
445 |
446 | # Calculate similarities locally
447 | texts = [item['text'] for item in all_data]
448 | links = [item.get('metadata', {}).get('url', '') for item in all_data]
449 | embeddings = np.array([item['embedding'] for item in all_data])
450 |
451 | query_embedding = np.array(query_embeddings[0])
452 | similarities = np.dot(embeddings, query_embedding) / (
453 | np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_embedding)
454 | )
455 |
456 | # Get indices of chunks with similarity above threshold
457 | high_similarity_indices = np.where(similarities > 0.55)[0]
458 |
459 | # Prepare data in the same format as Supabase results
460 | data = []
461 | for idx in high_similarity_indices:
462 | data.append({
463 | 'text': texts[idx],
464 | 'metadata': {'url': links[idx]},
465 | 'similarity': float(similarities[idx])
466 | })
467 |
468 | except Exception as e:
469 | logging.error(f"Error loading JSON file: {str(e)}")
470 | return False, f"Error loading JSON file: {str(e)}"
471 |
472 | if not data:
473 | logging.info("No similar results found")
474 | return True, "No similar results found"
475 |
476 | # Prepare the output
477 | output_texts = []
478 | links = set()
479 | for item in data:
480 | text = item.get('text', '')
481 | url = item.get('metadata', {}).get('url', '')
482 | similarity = item.get('similarity', 0)
483 |
484 | if text and url:
485 | output_texts.append(f"{text}\nSource: {url} (Similarity: {similarity:.2f})")
486 | links.add(url)
487 |
488 | output_message = "\n\n--------------------\n\n".join(output_texts)
489 | logging.info(f"Prepared output message with {len(output_texts)} chunks")
490 |
491 | # Process with Ollama model
492 | logging.info("Starting Ollama model processing")
493 | final_response = await summary_with_ollama(output_message, merged_query)
494 | logging.info("Successfully completed Ollama model processing")
495 |
496 | # Add all links to the final response
497 | final_response = f"{final_response}\n\n--------------------\n\nAll of the searched websites is listed here: \n - {'\n - '.join(list(links))}"
498 |
499 | # Create tmp directory if it doesn't exist
500 | os.makedirs('./tmp', exist_ok=True)
501 | output_file = f'./tmp/output_{int(time.time())}.txt'
502 | with open(output_file, 'w', encoding='utf-8') as f:
503 | f.write(final_response)
504 | logging.info(f"Successfully wrote output to {output_file}")
505 |
506 | return True, final_response
507 |
508 | except Exception as e:
509 | logging.error(f"Error in find_similar_chunks: {str(e)}")
510 | return False, f"Error in find_similar_chunks: {str(e)}"
511 |
512 |
513 | if __name__ == "__main__":
514 | logging.info("Starting main execution")
515 | queries = ["آیفون ۱۶ قیمت"]
516 | logging.info(f"Running with queries: {queries}")
517 | # success, message = asyncio.run(find_similar_chunks(queries))
518 | success, message = asyncio.run(perform_search_and_scrape(queries))
519 | logging.info(message)
```