#
tokens: 16718/50000 2/30 files (page 3/3)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 3 of 3. Use http://codebase.md/deepspringai/search_mcp_server?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .chainlit
│   ├── config.toml
│   └── translations
│       ├── bn.json
│       ├── en-US.json
│       ├── gu.json
│       ├── he-IL.json
│       ├── hi.json
│       ├── ja.json
│       ├── kn.json
│       ├── ml.json
│       ├── mr.json
│       ├── nl.json
│       ├── ta.json
│       ├── te.json
│       └── zh-CN.json
├── .gitignore
├── .python-version
├── chainlit.md
├── Dockerfile
├── embedding_server.log
├── final_response_output.txt
├── pyproject.toml
├── README.md
├── smithery.yaml
├── src
│   ├── parquet_mcp_server
│   │   ├── __init__.py
│   │   ├── chainlit.md
│   │   ├── chatAgent.py
│   │   ├── client.py
│   │   ├── main.py
│   │   └── src
│   │       ├── search_helper.py
│   │       └── supabase_db.py
│   └── tests
│       ├── test_search.py
│       └── test_similarity.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/.chainlit/translations/ml.json:
--------------------------------------------------------------------------------

```json
  1 | {
  2 |     "common": {
  3 |         "actions": {
  4 |             "cancel": "\u0d31\u0d26\u0d4d\u0d26\u0d3e\u0d15\u0d4d\u0d15\u0d41\u0d15",
  5 |             "confirm": "\u0d38\u0d4d\u0d25\u0d3f\u0d30\u0d40\u0d15\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
  6 |             "continue": "\u0d24\u0d41\u0d1f\u0d30\u0d41\u0d15",
  7 |             "goBack": "\u0d24\u0d3f\u0d30\u0d3f\u0d15\u0d46 \u0d2a\u0d4b\u0d15\u0d41\u0d15",
  8 |             "reset": "\u0d2a\u0d41\u0d28\u0d03\u0d38\u0d1c\u0d4d\u0d1c\u0d2e\u0d3e\u0d15\u0d4d\u0d15\u0d41\u0d15",
  9 |             "submit": "\u0d38\u0d2e\u0d7c\u0d2a\u0d4d\u0d2a\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15"
 10 |         },
 11 |         "status": {
 12 |             "loading": "\u0d32\u0d4b\u0d21\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d41...",
 13 |             "error": {
 14 |                 "default": "\u0d12\u0d30\u0d41 \u0d2a\u0d3f\u0d36\u0d15\u0d4d \u0d38\u0d02\u0d2d\u0d35\u0d3f\u0d1a\u0d4d\u0d1a\u0d41",
 15 |                 "serverConnection": "\u0d38\u0d46\u0d7c\u0d35\u0d31\u0d41\u0d2e\u0d3e\u0d2f\u0d3f \u0d2c\u0d28\u0d4d\u0d27\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d3e\u0d7b \u0d15\u0d34\u0d3f\u0d1e\u0d4d\u0d1e\u0d3f\u0d32\u0d4d\u0d32"
 16 |             }
 17 |         }
 18 |     },
 19 |     "auth": {
 20 |         "login": {
 21 |             "title": "\u0d06\u0d2a\u0d4d\u0d2a\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d15\u0d4d\u0d15\u0d3e\u0d7b \u0d32\u0d4b\u0d17\u0d3f\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15",
 22 |             "form": {
 23 |                 "email": {
 24 |                     "label": "\u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d35\u0d3f\u0d32\u0d3e\u0d38\u0d02",
 25 |                     "required": "\u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d12\u0d30\u0d41 \u0d06\u0d35\u0d36\u0d4d\u0d2f\u0d2e\u0d3e\u0d2f \u0d2b\u0d40\u0d7d\u0d21\u0d4d \u0d06\u0d23\u0d4d"
 26 |                 },
 27 |                 "password": {
 28 |                     "label": "\u0d2a\u0d3e\u0d38\u0d4d\u200c\u0d35\u0d47\u0d21\u0d4d",
 29 |                     "required": "\u0d2a\u0d3e\u0d38\u0d4d\u200c\u0d35\u0d47\u0d21\u0d4d \u0d12\u0d30\u0d41 \u0d06\u0d35\u0d36\u0d4d\u0d2f\u0d2e\u0d3e\u0d2f \u0d2b\u0d40\u0d7d\u0d21\u0d4d \u0d06\u0d23\u0d4d"
 30 |                 },
 31 |                 "actions": {
 32 |                     "signin": "\u0d38\u0d48\u0d7b \u0d07\u0d7b"
 33 |                 },
 34 |                 "alternativeText": {
 35 |                     "or": "\u0d05\u0d32\u0d4d\u0d32\u0d46\u0d19\u0d4d\u0d15\u0d3f\u0d7d"
 36 |                 }
 37 |             },
 38 |             "errors": {
 39 |                 "default": "\u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d15\u0d34\u0d3f\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d3f\u0d32\u0d4d\u0d32",
 40 |                 "signin": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
 41 |                 "oauthSignin": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
 42 |                 "redirectUriMismatch": "\u0d31\u0d40\u0d21\u0d2f\u0d31\u0d15\u0d4d\u0d1f\u0d4d URI oauth \u0d06\u0d2a\u0d4d\u0d2a\u0d4d \u0d15\u0d4b\u0d7a\u0d2b\u0d3f\u0d17\u0d31\u0d47\u0d37\u0d28\u0d41\u0d2e\u0d3e\u0d2f\u0d3f \u0d2a\u0d4a\u0d30\u0d41\u0d24\u0d4d\u0d24\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d41\u0d28\u0d4d\u0d28\u0d3f\u0d32\u0d4d\u0d32",
 43 |                 "oauthCallback": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
 44 |                 "oauthCreateAccount": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
 45 |                 "emailCreateAccount": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
 46 |                 "callback": "\u0d2e\u0d31\u0d4d\u0d31\u0d4a\u0d30\u0d41 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d36\u0d4d\u0d30\u0d2e\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
 47 |                 "oauthAccountNotLinked": "\u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d35\u0d4d\u0d2f\u0d15\u0d4d\u0d24\u0d3f\u0d24\u0d4d\u0d35\u0d02 \u0d38\u0d4d\u0d25\u0d3f\u0d30\u0d40\u0d15\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d3e\u0d7b, \u0d06\u0d26\u0d4d\u0d2f\u0d02 \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a \u0d05\u0d24\u0d47 \u0d05\u0d15\u0d4d\u0d15\u0d57\u0d23\u0d4d\u0d1f\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15",
 48 |                 "emailSignin": "\u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d05\u0d2f\u0d2f\u0d4d\u0d15\u0d4d\u0d15\u0d3e\u0d7b \u0d15\u0d34\u0d3f\u0d1e\u0d4d\u0d1e\u0d3f\u0d32\u0d4d\u0d32",
 49 |                 "emailVerify": "\u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d2a\u0d30\u0d3f\u0d36\u0d4b\u0d27\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15, \u0d12\u0d30\u0d41 \u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d07\u0d2e\u0d46\u0d2f\u0d3f\u0d7d \u0d05\u0d2f\u0d1a\u0d4d\u0d1a\u0d3f\u0d1f\u0d4d\u0d1f\u0d41\u0d23\u0d4d\u0d1f\u0d4d",
 50 |                 "credentialsSignin": "\u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d2a\u0d30\u0d3e\u0d1c\u0d2f\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d4d\u0d1f\u0d41. \u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d7e \u0d28\u0d7d\u0d15\u0d3f\u0d2f \u0d35\u0d3f\u0d35\u0d30\u0d19\u0d4d\u0d19\u0d7e \u0d36\u0d30\u0d3f\u0d2f\u0d3e\u0d23\u0d46\u0d28\u0d4d\u0d28\u0d4d \u0d2a\u0d30\u0d3f\u0d36\u0d4b\u0d27\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
 51 |                 "sessionRequired": "\u0d08 \u0d2a\u0d47\u0d1c\u0d4d \u0d06\u0d15\u0d4d\u0d38\u0d38\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d3e\u0d7b \u0d26\u0d2f\u0d35\u0d3e\u0d2f\u0d3f \u0d38\u0d48\u0d7b \u0d07\u0d7b \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15"
 52 |             }
 53 |         },
 54 |         "provider": {
 55 |             "continue": "{{provider}} \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d24\u0d41\u0d1f\u0d30\u0d41\u0d15"
 56 |         }
 57 |     },
 58 |     "chat": {
 59 |         "input": {
 60 |             "placeholder": "\u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d38\u0d28\u0d4d\u0d26\u0d47\u0d36\u0d02 \u0d07\u0d35\u0d3f\u0d1f\u0d46 \u0d1f\u0d48\u0d2a\u0d4d\u0d2a\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15...",
 61 |             "actions": {
 62 |                 "send": "\u0d38\u0d28\u0d4d\u0d26\u0d47\u0d36\u0d02 \u0d05\u0d2f\u0d2f\u0d4d\u0d15\u0d4d\u0d15\u0d41\u0d15",
 63 |                 "stop": "\u0d1f\u0d3e\u0d38\u0d4d\u0d15\u0d4d \u0d28\u0d3f\u0d7c\u0d24\u0d4d\u0d24\u0d41\u0d15",
 64 |                 "attachFiles": "\u0d2b\u0d2f\u0d32\u0d41\u0d15\u0d7e \u0d05\u0d31\u0d4d\u0d31\u0d3e\u0d1a\u0d4d\u0d1a\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15"
 65 |             }
 66 |         },
 67 |         "speech": {
 68 |             "start": "\u0d31\u0d46\u0d15\u0d4d\u0d15\u0d4b\u0d7c\u0d21\u0d3f\u0d02\u0d17\u0d4d \u0d06\u0d30\u0d02\u0d2d\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
 69 |             "stop": "\u0d31\u0d46\u0d15\u0d4d\u0d15\u0d4b\u0d7c\u0d21\u0d3f\u0d02\u0d17\u0d4d \u0d28\u0d3f\u0d7c\u0d24\u0d4d\u0d24\u0d41\u0d15",
 70 |             "connecting": "\u0d2c\u0d28\u0d4d\u0d27\u0d3f\u0d2a\u0d4d\u0d2a\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d28\u0d4d\u0d28\u0d41"
 71 |         },
 72 |         "fileUpload": {
 73 |             "dragDrop": "\u0d2b\u0d2f\u0d32\u0d41\u0d15\u0d7e \u0d07\u0d35\u0d3f\u0d1f\u0d46 \u0d35\u0d32\u0d3f\u0d1a\u0d4d\u0d1a\u0d3f\u0d1f\u0d41\u0d15",
 74 |             "browse": "\u0d2b\u0d2f\u0d32\u0d41\u0d15\u0d7e \u0d24\u0d3f\u0d30\u0d2f\u0d41\u0d15",
 75 |             "sizeLimit": "\u0d2a\u0d30\u0d3f\u0d27\u0d3f:",
 76 |             "errors": {
 77 |                 "failed": "\u0d05\u0d2a\u0d4d\u200c\u0d32\u0d4b\u0d21\u0d4d \u0d2a\u0d30\u0d3e\u0d1c\u0d2f\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d4d\u0d1f\u0d41",
 78 |                 "cancelled": "\u0d05\u0d2a\u0d4d\u200c\u0d32\u0d4b\u0d21\u0d4d \u0d31\u0d26\u0d4d\u0d26\u0d3e\u0d15\u0d4d\u0d15\u0d3f"
 79 |             }
 80 |         },
 81 |         "messages": {
 82 |             "status": {
 83 |                 "using": "\u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d28\u0d4d\u0d28\u0d41",
 84 |                 "used": "\u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d41"
 85 |             },
 86 |             "actions": {
 87 |                 "copy": {
 88 |                     "button": "\u0d15\u0d4d\u0d32\u0d3f\u0d2a\u0d4d\u0d2a\u0d4d\u0d2c\u0d4b\u0d7c\u0d21\u0d3f\u0d32\u0d47\u0d15\u0d4d\u0d15\u0d4d \u0d2a\u0d15\u0d7c\u0d24\u0d4d\u0d24\u0d41\u0d15",
 89 |                     "success": "\u0d2a\u0d15\u0d7c\u0d24\u0d4d\u0d24\u0d3f!"
 90 |                 }
 91 |             },
 92 |             "feedback": {
 93 |                 "positive": "\u0d38\u0d39\u0d3e\u0d2f\u0d15\u0d30\u0d02",
 94 |                 "negative": "\u0d38\u0d39\u0d3e\u0d2f\u0d15\u0d30\u0d2e\u0d32\u0d4d\u0d32",
 95 |                 "edit": "\u0d2b\u0d40\u0d21\u0d4d\u0d2c\u0d3e\u0d15\u0d4d\u0d15\u0d4d \u0d0e\u0d21\u0d3f\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15",
 96 |                 "dialog": {
 97 |                     "title": "\u0d12\u0d30\u0d41 \u0d15\u0d2e\u0d28\u0d4d\u0d31\u0d4d \u0d1a\u0d47\u0d7c\u0d15\u0d4d\u0d15\u0d41\u0d15",
 98 |                     "submit": "\u0d2b\u0d40\u0d21\u0d4d\u0d2c\u0d3e\u0d15\u0d4d\u0d15\u0d4d \u0d38\u0d2e\u0d7c\u0d2a\u0d4d\u0d2a\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15"
 99 |                 },
100 |                 "status": {
101 |                     "updating": "\u0d05\u0d2a\u0d4d\u0d21\u0d47\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d41",
102 |                     "updated": "\u0d2b\u0d40\u0d21\u0d4d\u0d2c\u0d3e\u0d15\u0d4d\u0d15\u0d4d \u0d05\u0d2a\u0d4d\u0d21\u0d47\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d24\u0d41"
103 |                 }
104 |             }
105 |         },
106 |         "history": {
107 |             "title": "\u0d05\u0d35\u0d38\u0d3e\u0d28 \u0d07\u0d7b\u0d2a\u0d41\u0d1f\u0d4d\u0d1f\u0d41\u0d15\u0d7e",
108 |             "empty": "\u0d12\u0d28\u0d4d\u0d28\u0d41\u0d2e\u0d3f\u0d32\u0d4d\u0d32...",
109 |             "show": "\u0d39\u0d3f\u0d38\u0d4d\u0d31\u0d4d\u0d31\u0d31\u0d3f \u0d15\u0d3e\u0d23\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15"
110 |         },
111 |         "settings": {
112 |             "title": "\u0d15\u0d4d\u0d30\u0d2e\u0d40\u0d15\u0d30\u0d23\u0d19\u0d4d\u0d19\u0d7e \u0d2a\u0d3e\u0d28\u0d7d"
113 |         },
114 |         "watermark": "\u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d1a\u0d4d\u0d1a\u0d4d \u0d28\u0d3f\u0d7c\u0d2e\u0d4d\u0d2e\u0d3f\u0d1a\u0d4d\u0d1a\u0d24\u0d4d"
115 |     },
116 |     "threadHistory": {
117 |         "sidebar": {
118 |             "title": "\u0d2e\u0d41\u0d7b \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d41\u0d15\u0d7e",
119 |             "filters": {
120 |                 "search": "\u0d24\u0d3f\u0d30\u0d2f\u0d41\u0d15",
121 |                 "placeholder": "Search conversations..."
122 |             },
123 |             "timeframes": {
124 |                 "today": "\u0d07\u0d28\u0d4d\u0d28\u0d4d",
125 |                 "yesterday": "\u0d07\u0d28\u0d4d\u0d28\u0d32\u0d46",
126 |                 "previous7days": "\u0d15\u0d34\u0d3f\u0d1e\u0d4d\u0d1e 7 \u0d26\u0d3f\u0d35\u0d38\u0d02",
127 |                 "previous30days": "\u0d15\u0d34\u0d3f\u0d1e\u0d4d\u0d1e 30 \u0d26\u0d3f\u0d35\u0d38\u0d02"
128 |             },
129 |             "empty": "\u0d24\u0d4d\u0d30\u0d46\u0d21\u0d41\u0d15\u0d7e \u0d15\u0d23\u0d4d\u0d1f\u0d46\u0d24\u0d4d\u0d24\u0d3f\u0d2f\u0d3f\u0d32\u0d4d\u0d32",
130 |             "actions": {
131 |                 "close": "\u0d38\u0d48\u0d21\u0d4d\u0d2c\u0d3e\u0d7c \u0d05\u0d1f\u0d2f\u0d4d\u0d15\u0d4d\u0d15\u0d41\u0d15",
132 |                 "open": "\u0d38\u0d48\u0d21\u0d4d\u0d2c\u0d3e\u0d7c \u0d24\u0d41\u0d31\u0d15\u0d4d\u0d15\u0d41\u0d15"
133 |             }
134 |         },
135 |         "thread": {
136 |             "untitled": "\u0d2a\u0d47\u0d30\u0d3f\u0d32\u0d4d\u0d32\u0d3e\u0d24\u0d4d\u0d24 \u0d38\u0d02\u0d2d\u0d3e\u0d37\u0d23\u0d02",
137 |             "menu": {
138 |                 "rename": "Rename",
139 |                 "delete": "Delete"
140 |             },
141 |             "actions": {
142 |                 "delete": {
143 |                     "title": "\u0d21\u0d3f\u0d32\u0d40\u0d31\u0d4d\u0d31\u0d4d \u0d38\u0d4d\u0d25\u0d3f\u0d30\u0d40\u0d15\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
144 |                     "description": "\u0d07\u0d24\u0d4d \u0d24\u0d4d\u0d30\u0d46\u0d21\u0d41\u0d02 \u0d05\u0d24\u0d3f\u0d28\u0d4d\u0d31\u0d46 \u0d38\u0d28\u0d4d\u0d26\u0d47\u0d36\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d02 \u0d18\u0d1f\u0d15\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d02 \u0d21\u0d3f\u0d32\u0d40\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d02. \u0d08 \u0d2a\u0d4d\u0d30\u0d35\u0d7c\u0d24\u0d4d\u0d24\u0d3f \u0d2a\u0d34\u0d2f\u0d2a\u0d1f\u0d3f\u0d2f\u0d3e\u0d15\u0d4d\u0d15\u0d3e\u0d7b \u0d15\u0d34\u0d3f\u0d2f\u0d3f\u0d32\u0d4d\u0d32",
145 |                     "success": "\u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d \u0d21\u0d3f\u0d32\u0d40\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d24\u0d41",
146 |                     "inProgress": "\u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d \u0d21\u0d3f\u0d32\u0d40\u0d31\u0d4d\u0d31\u0d4d \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d41"
147 |                 },
148 |                 "rename": {
149 |                     "title": "\u0d24\u0d4d\u0d30\u0d46\u0d21\u0d4d \u0d2a\u0d41\u0d28\u0d7c\u0d28\u0d3e\u0d2e\u0d15\u0d30\u0d23\u0d02 \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d15",
150 |                     "description": "\u0d08 \u0d24\u0d4d\u0d30\u0d46\u0d21\u0d3f\u0d28\u0d4d \u0d12\u0d30\u0d41 \u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d2a\u0d47\u0d30\u0d4d \u0d28\u0d7d\u0d15\u0d41\u0d15",
151 |                     "form": {
152 |                         "name": {
153 |                             "label": "\u0d2a\u0d47\u0d30\u0d4d",
154 |                             "placeholder": "\u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d2a\u0d47\u0d30\u0d4d \u0d28\u0d7d\u0d15\u0d41\u0d15"
155 |                         }
156 |                     },
157 |                     "success": "\u0d24\u0d4d\u0d30\u0d46\u0d21\u0d4d \u0d2a\u0d41\u0d28\u0d7c\u0d28\u0d3e\u0d2e\u0d15\u0d30\u0d23\u0d02 \u0d1a\u0d46\u0d2f\u0d4d\u0d24\u0d41!",
158 |                     "inProgress": "\u0d24\u0d4d\u0d30\u0d46\u0d21\u0d4d \u0d2a\u0d41\u0d28\u0d7c\u0d28\u0d3e\u0d2e\u0d15\u0d30\u0d23\u0d02 \u0d1a\u0d46\u0d2f\u0d4d\u0d2f\u0d41\u0d28\u0d4d\u0d28\u0d41"
159 |                 }
160 |             }
161 |         }
162 |     },
163 |     "navigation": {
164 |         "header": {
165 |             "chat": "\u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d",
166 |             "readme": "\u0d35\u0d3e\u0d2f\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
167 |             "theme": {
168 |                 "light": "Light Theme",
169 |                 "dark": "Dark Theme",
170 |                 "system": "Follow System"
171 |             }
172 |         },
173 |         "newChat": {
174 |             "button": "\u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d",
175 |             "dialog": {
176 |                 "title": "\u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d \u0d38\u0d43\u0d37\u0d4d\u0d1f\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d15",
177 |                 "description": "\u0d07\u0d24\u0d4d \u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d28\u0d3f\u0d32\u0d35\u0d3f\u0d32\u0d46 \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d \u0d39\u0d3f\u0d38\u0d4d\u0d31\u0d4d\u0d31\u0d31\u0d3f \u0d2e\u0d3e\u0d2f\u0d4d\u0d15\u0d4d\u0d15\u0d41\u0d02. \u0d24\u0d41\u0d1f\u0d30\u0d3e\u0d7b \u0d24\u0d3e\u0d7d\u0d2a\u0d4d\u0d2a\u0d30\u0d4d\u0d2f\u0d2e\u0d41\u0d23\u0d4d\u0d1f\u0d4b?",
178 |                 "tooltip": "\u0d2a\u0d41\u0d24\u0d3f\u0d2f \u0d1a\u0d3e\u0d31\u0d4d\u0d31\u0d4d"
179 |             }
180 |         },
181 |         "user": {
182 |             "menu": {
183 |                 "settings": "\u0d15\u0d4d\u0d30\u0d2e\u0d40\u0d15\u0d30\u0d23\u0d19\u0d4d\u0d19\u0d7e",
184 |                 "settingsKey": "S",
185 |                 "apiKeys": "API \u0d15\u0d40\u0d15\u0d7e",
186 |                 "logout": "\u0d32\u0d4b\u0d17\u0d4d\u0d14\u0d1f\u0d4d\u0d1f\u0d4d"
187 |             }
188 |         }
189 |     },
190 |     "apiKeys": {
191 |         "title": "\u0d06\u0d35\u0d36\u0d4d\u0d2f\u0d2e\u0d3e\u0d2f API \u0d15\u0d40\u0d15\u0d7e",
192 |         "description": "\u0d08 \u0d06\u0d2a\u0d4d\u0d2a\u0d4d \u0d09\u0d2a\u0d2f\u0d4b\u0d17\u0d3f\u0d15\u0d4d\u0d15\u0d3e\u0d7b, \u0d24\u0d3e\u0d34\u0d46\u0d2a\u0d4d\u0d2a\u0d31\u0d2f\u0d41\u0d28\u0d4d\u0d28 API \u0d15\u0d40\u0d15\u0d7e \u0d06\u0d35\u0d36\u0d4d\u0d2f\u0d2e\u0d3e\u0d23\u0d4d. \u0d15\u0d40\u0d15\u0d7e \u0d28\u0d3f\u0d19\u0d4d\u0d19\u0d33\u0d41\u0d1f\u0d46 \u0d09\u0d2a\u0d15\u0d30\u0d23\u0d24\u0d4d\u0d24\u0d3f\u0d28\u0d4d\u0d31\u0d46 \u0d32\u0d4b\u0d15\u0d4d\u0d15\u0d7d \u0d38\u0d4d\u0d31\u0d4d\u0d31\u0d4b\u0d31\u0d47\u0d1c\u0d3f\u0d7d \u0d38\u0d02\u0d2d\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d2a\u0d4d\u0d2a\u0d46\u0d1f\u0d41\u0d28\u0d4d\u0d28\u0d41.",
193 |         "success": {
194 |             "saved": "\u0d35\u0d3f\u0d1c\u0d2f\u0d15\u0d30\u0d2e\u0d3e\u0d2f\u0d3f \u0d38\u0d02\u0d30\u0d15\u0d4d\u0d37\u0d3f\u0d1a\u0d4d\u0d1a\u0d41"
195 |         }
196 |     },
197 |     "alerts": {
198 |         "info": "Info",
199 |         "note": "Note",
200 |         "tip": "Tip",
201 |         "important": "Important",
202 |         "warning": "Warning",
203 |         "caution": "Caution",
204 |         "debug": "Debug",
205 |         "example": "Example",
206 |         "success": "Success",
207 |         "help": "Help",
208 |         "idea": "Idea",
209 |         "pending": "Pending",
210 |         "security": "Security",
211 |         "beta": "Beta",
212 |         "best-practice": "Best Practice"
213 |     }
214 | }
```

--------------------------------------------------------------------------------
/src/parquet_mcp_server/src/search_helper.py:
--------------------------------------------------------------------------------

```python
  1 | import json
  2 | import logging
  3 | import tempfile
  4 | from pathlib import Path
  5 | import re 
  6 | import requests
  7 | import os
  8 | from dotenv import load_dotenv
  9 | import os
 10 | import time
 11 | from firecrawl import FirecrawlApp
 12 | from datetime import datetime  # Import datetime module
 13 | import numpy as np
 14 | from langchain_ollama import ChatOllama
 15 | from langchain_core.messages import HumanMessage
 16 | from parquet_mcp_server.client import perform_search_and_scrape_async
 17 | import asyncio
 18 | from parquet_mcp_server.src.supabase_db import SupabaseDB
 19 | import time
 20 | import random
 21 | import hashlib
 22 | 
 23 | def generate_unique_id():
 24 |     # Get the current time in milliseconds
 25 |     current_time = int(time.time() * 1000)
 26 |     
 27 |     # Generate a random number
 28 |     random_number = random.randint(1000, 9999)
 29 |     
 30 |     # Combine the time and random number to form a unique string
 31 |     unique_string = f"{current_time}-{random_number}"
 32 |     
 33 |     # Optionally, hash the string to shorten or obscure the ID
 34 |     unique_id = hashlib.sha256(unique_string.encode()).hexdigest()
 35 |     
 36 |     return unique_id
 37 | 
 38 | 
 39 | # Set up logging
 40 | logging.basicConfig(
 41 |     level=logging.INFO,
 42 |     format='%(asctime)s - %(levelname)s - %(message)s'
 43 | )
 44 | 
 45 | # Load environment variables from .env file
 46 | load_dotenv()
 47 | 
 48 | # Configuration
 49 | USE_SUPABASE = os.getenv("USE_SUPABASE", "true").lower() == "true"
 50 | JSON_FILE_PATH = os.getenv("JSON_FILE_PATH", "output.json")
 51 | 
 52 | # Initialize storage based on configuration
 53 | if USE_SUPABASE:
 54 |     db = SupabaseDB()
 55 | else:
 56 |     db = None
 57 | 
 58 | # Initialize Ollama LangChain model
 59 | ollama_model = ChatOllama(
 60 |     base_url=os.getenv("OLLAMA_URL"),
 61 |     model="llama3.1:8b",
 62 | )
 63 | 
 64 | 
 65 | def chunk_text(text: str, chunk_size: int = 500) -> list:
 66 |     """
 67 |     Split text into chunks of specified size.
 68 |     
 69 |     Args:
 70 |         text (str): Text to split
 71 |         chunk_size (int): Number of characters per chunk
 72 |     
 73 |     Returns:
 74 |         list: List of text chunks
 75 |     """
 76 |     return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
 77 | 
 78 | def get_embedding(texts: list) -> list:
 79 |     """
 80 |     Fetch embeddings for a batch of texts from the embedding server.
 81 | 
 82 |     Args:
 83 |         texts (list): A list of texts to generate embeddings for.
 84 |         url (str): The URL of the embedding server.
 85 | 
 86 |     Returns:
 87 |         list: A list of embeddings corresponding to the input texts.
 88 |     """
 89 |     model = "bge-m3"  # Changed from nomic-embed-text to bge-m3
 90 |     payload = {
 91 |         "model": model,
 92 |         "input": texts  # Pass all texts in a batch
 93 |     }
 94 |     logging.info('Make Embedding')
 95 |     try:
 96 |         response = requests.post(os.getenv('EMBEDDING_URL'), json=payload, verify=False)
 97 |         if response.status_code == 200:
 98 |             result = response.json()
 99 |             if 'embeddings' in result:
100 |                 return result['embeddings']
101 |             else:
102 |                 logging.error(f"No embeddings found in response: {result}")
103 |                 return []
104 |         else:
105 |             logging.error(f"Error: {response.status_code}, {response.text}")
106 |             return []
107 |     except Exception as e:
108 |         logging.error(f"Exception during request: {str(e)}")
109 |         return []
110 | 
111 | def search_web(query, page=1):
112 |     """
113 |     Perform a web search using the SearchAPI.io API.
114 |     
115 |     Args:
116 |         query (str): The search query
117 |         page (int): The page number for pagination (default: 1)
118 |         
119 |     Returns:
120 |         tuple: A tuple containing (organic_results, related_searches)
121 |             - organic_results: List of search results
122 |             - related_searches: List of related search queries
123 |             
124 |     Raises:
125 |         ValueError: If API key is not found in environment variables
126 |         requests.exceptions.RequestException: If there's an error making the HTTP request
127 |         json.JSONDecodeError: If there's an error parsing the JSON response
128 |     """
129 |     url = "https://www.searchapi.io/api/v1/search"
130 |     
131 |     # Get API key from environment variables
132 |     api_key = os.getenv("SEARCHAPI_API_KEY")
133 |     if not api_key:
134 |         logging.error("API key not found in environment variables")
135 |         raise ValueError("API key not found. Please set SEARCHAPI_API_KEY in your .env file.")
136 |     
137 |     params = {
138 |         "engine": "google",
139 |         "q": query,
140 |         "api_key": api_key,
141 |         "page": page,
142 |         "num": 3
143 |     }
144 |     
145 |     try:
146 |         logging.info(f"Making search request for query: {query}")
147 |         response = requests.get(url, params=params)
148 |         response.raise_for_status()
149 |         
150 |         data = response.json()
151 |         organic_results = data.get("organic_results", [])
152 |         related_searches = data.get("related_searches", [])
153 |         
154 |         logging.info(f"Search successful for query: {query}")
155 |         return organic_results, related_searches
156 |     
157 |     except requests.exceptions.RequestException as e:
158 |         logging.error(f"Error making search request: {str(e)}")
159 |         raise RuntimeError(f"Request error: {str(e)}") from e
160 |     except json.JSONDecodeError as e:
161 |         logging.error(f"Error parsing JSON response: {str(e)}")
162 |         raise ValueError(f"JSON parsing error: {str(e)}") from e
163 | 
164 | def get_links(markdown_content):
165 |     """
166 |     Filter markdown content by extracting all links and returning both the filtered content
167 |     and a list of all links found.
168 |     
169 |     Args:
170 |         markdown_content (str): The markdown content to filter
171 |         
172 |     Returns:
173 |         tuple: A tuple containing (filtered_content, links)
174 |             - filtered_content: The markdown content with all links removed
175 |             - links: A list of all links found in the content
176 |     """
177 |     # Regular expression to match markdown links
178 |     link_pattern = r'\[([^\]]+)\]\(([^)]+)\)|<([^>]+)>'
179 |     
180 |     # Find all links in the content
181 |     link_matches = re.findall(link_pattern, markdown_content)
182 |     
183 |     # Extract the actual URLs from the matches
184 |     links = []
185 |     for match in link_matches:
186 |         if isinstance(match, tuple):
187 |             # For [text](url) format, the URL is the second element
188 |             links.append(match[1])
189 |         else:
190 |             # For <url> format, the URL is the entire match
191 |             links.append(match)
192 |     
193 |     
194 |     # Clean up any double newlines that might have been created    
195 |     return links 
196 | 
197 | def remove_markdown_links(text):
198 |     # Remove markdown links while preserving the text
199 |     text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
200 |     
201 |     # Remove any remaining URLs with percent encoding
202 |     text = re.sub(r'https?://[^\s\]]+%[^\s\]]+', '', text)
203 |     
204 |     # Remove any remaining standalone URLs
205 |     text = re.sub(r'https?://\S+', '', text)
206 |     
207 |     # Clean up any double newlines that might have been created
208 |     text = re.sub(r'\n\s*\n', '\n\n', text)
209 |     
210 |     return text
211 | 
212 | def scrape_urls(organic_results):
213 |     """
214 |     Scrape each URL from the organic search results using Firecrawl API.
215 |     
216 |     Args:
217 |         organic_results (list): List of organic search results
218 |         
219 |     Returns:
220 |         dict: Dictionary mapping URLs to their scrape status and content
221 |     """
222 |     # Get Firecrawl API key from environment variables
223 |     firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
224 |     if not firecrawl_api_key:
225 |         raise ValueError("Firecrawl API key not found. Please set FIRECRAWL_API_KEY in your .env file.")
226 |     
227 |     # Initialize FirecrawlApp
228 |     app = FirecrawlApp(api_key=firecrawl_api_key)
229 |     
230 |     # Dictionary to store scrape results for each URL
231 |     scrape_results = {}
232 |     
233 |     # Scrape each URL
234 |     for i, result in enumerate(organic_results):
235 |         url = result.get("link")
236 |         if not url:
237 |             continue
238 |         
239 |         logging.info(f"Scraping URL {i+1}/{len(organic_results)}: {url}")
240 |         
241 |         try:
242 |             # Scrape the URL
243 |             scrape_status = app.scrape_url(
244 |                 url,
245 |                 params={'formats': ['markdown']}
246 |             )
247 | 
248 |             # Store the scrape status and content if successful
249 |             if scrape_status['metadata']['statusCode'] == 200:
250 |                 scrape_results[url] = {
251 |                     'status': scrape_status['metadata']['statusCode'],
252 |                     'content': scrape_status['markdown']
253 |                 }
254 |                 logging.info(f"Successfully scraped {url}")
255 |             else:
256 |                 scrape_results[url] = {
257 |                     'status': scrape_status['metadata']['statusCode'],
258 |                     'error': f"Scraping failed with status: {scrape_status.status}"
259 |                 }
260 |                 logging.warning(f"Scraping failed with status: {scrape_status.status}")
261 |             
262 |             # Add a delay between requests to avoid rate limiting
263 |             time.sleep(2)
264 |             
265 |         except Exception as e:
266 |             logging.error(f"Error scraping {url}: {e}")
267 |             raise Exception(f"Error scraping {url}: {e}")
268 |     
269 |     return scrape_results 
270 | 
271 | 
272 | async def perform_search_and_scrape(search_queries: list[str], page_number: int = 1) -> tuple[bool, str]:
273 |     """
274 |     Perform searches and scrape URLs from the organic results for multiple queries.
275 |     
276 |     Args:
277 |         search_queries (list[str]): The list of search queries to use.
278 |         page_number (int): The page number for the search results.
279 |     
280 |     Returns:
281 |         tuple[bool, str]: (success status, message)
282 |     """
283 |     # Example usage
284 |     unique_id = generate_unique_id()
285 | 
286 |     all_results = []  # List to store all results with text and embeddings
287 | 
288 |     for search_query in search_queries:
289 |         try:
290 |             organic_results, related_searches = search_web(search_query, page_number)
291 |         except Exception as e:
292 |             return False, f"Error in SearchAPI: {str(e)}"
293 |         
294 |         # Log the search query results
295 |         logging.info(f"Results for query '{search_query}' retrieved.")
296 |         
297 |         # Scrape URLs and save content as markdown files
298 |         if organic_results:
299 |             logging.info(f"Scraping URLs from organic search results for query '{search_query}'...")
300 |             try:
301 |                 scrape_results = scrape_urls(organic_results)
302 |             except Exception as e:
303 |                 return False, f"Error in Scraping {str(e)}"
304 |         
305 |             # Process and save markdown content for successful scrapes
306 |             for i, (url, result) in enumerate(scrape_results.items()):
307 |                 if result['status'] == 200:
308 |                     # Filter the markdown content
309 |                     links = get_links(result['content'])
310 |                     logging.info(f"Found {len(links)} links in {url}")
311 | 
312 |                     # Remove markdown links from the content
313 |                     filtered_content = remove_markdown_links(result['content'])
314 |                     
315 |                     # Chunk the text
316 |                     chunks = chunk_text(filtered_content, chunk_size=500)
317 |                     
318 |                     # Generate embeddings for all chunks
319 |                     embeddings = get_embedding(chunks)  # Get embeddings for all chunks
320 | 
321 |                     # Combine text and embeddings into the result structure
322 |                     current_date = datetime.now().strftime("%Y-%m-%d")  # Get current date as string
323 |                     for chunk, embed in zip(chunks, embeddings):
324 |                         result_data = {
325 |                             'text': chunk,
326 |                             'metadata': {
327 |                                 'url': url,
328 |                                 'date': current_date,
329 |                                 'query': search_query,
330 |                                 'search_id': unique_id
331 |                             },
332 |                             'embedding': embed  # Changed from 'embed' to 'embedding' to match Supabase schema
333 |                         }
334 |                         all_results.append(result_data)
335 |                         
336 |                         # Save to storage based on configuration
337 |                         if USE_SUPABASE:
338 |                             db_result = db.add_new_data(result_data)
339 |                             logging.info(f"Saved to Supabase")
340 |     # Save all results to a JSON file
341 |     with open(JSON_FILE_PATH, 'w', encoding='utf-8') as output_file:
342 |         json.dump(all_results, output_file, ensure_ascii=False, indent=4)
343 |     
344 |     logging.info(f"All results saved to {JSON_FILE_PATH}")
345 |     return await find_similar_chunks(search_queries)
346 | 
347 | 
348 | async def summary_with_ollama(text: str, user_query: str) -> str:
349 |     """
350 |     Process text with the Ollama model in chunks and ensure the final result is under 4000 characters.
351 |     
352 |     Args:
353 |         text (str): The complete text to process
354 |         user_query (str): The user's query
355 |     
356 |     Returns:
357 |         str: Final response from the model under 4000 characters
358 |     """
359 |     logging.info("Starting summary_with_ollama processing")
360 |     
361 |     async def process_chunk(chunk: str) -> str:
362 |         """Process a single chunk with the Ollama model."""
363 |         try:
364 |             prompt_content = f"This is the user input query: {user_query}\nand this is the extracted information from the internet. Please summarize the results but mention all the information related to user query. Don't forget to add the sources links: \n{chunk}"
365 |             chunk_response = await ollama_model.ainvoke([HumanMessage(content=prompt_content)])
366 |             return chunk_response.content
367 |         except Exception as e:
368 |             logging.error(f"Error processing chunk: {str(e)}")
369 |             return ""
370 | 
371 |     async def process_text_in_chunks(input_text: str) -> str:
372 |         """Process text in chunks and combine results."""
373 |         chunk_size = 3000
374 |         chunks = [input_text[i:i + chunk_size] for i in range(0, len(input_text), chunk_size)]
375 |         logging.info(f"Split text into {len(chunks)} chunks of size {chunk_size}")
376 |         
377 |         # Process all chunks concurrently
378 |         chunk_tasks = [process_chunk(chunk) for chunk in chunks]
379 |         chunk_responses = await asyncio.gather(*chunk_tasks)
380 |         
381 |         # Combine all responses
382 |         combined_response = "\n\n\n------------------------------------------------ \n\n\n".join(chunk_responses)
383 |         logging.info(f"Combined response length: {len(combined_response)}")
384 |         
385 |         return combined_response
386 | 
387 |     # First pass: process the original text
388 |     first_pass_result = await process_text_in_chunks(text)
389 |     
390 |     # If the result is still too long, process it again
391 |     if len(first_pass_result) > 4000:
392 |         logging.info("First pass result too long, processing again")
393 |         final_result = await process_text_in_chunks(first_pass_result)
394 |     else:
395 |         final_result = first_pass_result
396 |     
397 |     logging.info(f"Final result length: {len(final_result)}")
398 |     return final_result
399 | 
400 | async def find_similar_chunks(queries: list[str]) -> tuple[bool, str]:
401 |     """
402 |     Get information from the results of a previous search using either Supabase or JSON file.
403 |     
404 |     Args:
405 |         queries (list[str]): List of search queries to merge.
406 |     
407 |     Returns:
408 |         tuple[bool, str]: (success status, message with similar text chunks)
409 |     """
410 |     logging.info(f"Starting find_similar_chunks with queries: {queries}")
411 |     
412 |     # Merge queries with 'and'
413 |     merged_query = ' and '.join(queries)
414 |     logging.info(f"Merged query: {merged_query}")
415 | 
416 |     try:
417 |         # Get query embedding
418 |         logging.info("Generating query embedding")
419 |         query_embeddings = get_embedding([merged_query])
420 |         if not query_embeddings:
421 |             logging.error("Failed to generate query embedding")
422 |             return False, "Failed to generate query embedding"
423 |         logging.info("Successfully generated query embedding")
424 | 
425 |         if USE_SUPABASE:
426 |             # Use Supabase similarity search
427 |             logging.info("Performing similarity search in Supabase Database")
428 |             similar_results = db.search_results_by_similarity(
429 |                 query_embedding=query_embeddings[0],
430 |                 threshold=0.55,
431 |                 match_count=100
432 |             )
433 | 
434 |             if not similar_results["success"]:
435 |                 logging.error(f"Error in similarity search: {similar_results['error']}")
436 |                 return False, f"Error in similarity search: {similar_results['error']}"
437 | 
438 |             data = similar_results["data"]
439 |         else:
440 |             # Use JSON file for similarity search
441 |             logging.info(f"Loading JSON file from {JSON_FILE_PATH}")
442 |             try:
443 |                 with open(JSON_FILE_PATH, 'r', encoding='utf-8') as f:
444 |                     all_data = json.load(f)
445 |                 
446 |                 # Calculate similarities locally
447 |                 texts = [item['text'] for item in all_data]
448 |                 links = [item.get('metadata', {}).get('url', '') for item in all_data]
449 |                 embeddings = np.array([item['embedding'] for item in all_data])
450 |                 
451 |                 query_embedding = np.array(query_embeddings[0])
452 |                 similarities = np.dot(embeddings, query_embedding) / (
453 |                     np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_embedding)
454 |                 )
455 |                 
456 |                 # Get indices of chunks with similarity above threshold
457 |                 high_similarity_indices = np.where(similarities > 0.55)[0]
458 |                 
459 |                 # Prepare data in the same format as Supabase results
460 |                 data = []
461 |                 for idx in high_similarity_indices:
462 |                     data.append({
463 |                         'text': texts[idx],
464 |                         'metadata': {'url': links[idx]},
465 |                         'similarity': float(similarities[idx])
466 |                     })
467 |                 
468 |             except Exception as e:
469 |                 logging.error(f"Error loading JSON file: {str(e)}")
470 |                 return False, f"Error loading JSON file: {str(e)}"
471 | 
472 |         if not data:
473 |             logging.info("No similar results found")
474 |             return True, "No similar results found"
475 | 
476 |         # Prepare the output
477 |         output_texts = []
478 |         links = set()
479 |         for item in data:
480 |             text = item.get('text', '')
481 |             url = item.get('metadata', {}).get('url', '')
482 |             similarity = item.get('similarity', 0)
483 |             
484 |             if text and url:
485 |                 output_texts.append(f"{text}\nSource: {url} (Similarity: {similarity:.2f})")
486 |                 links.add(url)
487 | 
488 |         output_message = "\n\n--------------------\n\n".join(output_texts)
489 |         logging.info(f"Prepared output message with {len(output_texts)} chunks")
490 | 
491 |         # Process with Ollama model
492 |         logging.info("Starting Ollama model processing")
493 |         final_response = await summary_with_ollama(output_message, merged_query)
494 |         logging.info("Successfully completed Ollama model processing")
495 | 
496 |         # Add all links to the final response
497 |         final_response = f"{final_response}\n\n--------------------\n\nAll of the searched websites is listed here: \n - {'\n - '.join(list(links))}"
498 | 
499 |         # Create tmp directory if it doesn't exist
500 |         os.makedirs('./tmp', exist_ok=True)
501 |         output_file = f'./tmp/output_{int(time.time())}.txt'
502 |         with open(output_file, 'w', encoding='utf-8') as f:
503 |             f.write(final_response)
504 |         logging.info(f"Successfully wrote output to {output_file}")
505 | 
506 |         return True, final_response
507 | 
508 |     except Exception as e:
509 |         logging.error(f"Error in find_similar_chunks: {str(e)}")
510 |         return False, f"Error in find_similar_chunks: {str(e)}"
511 | 
512 | 
513 | if __name__ == "__main__":
514 |     logging.info("Starting main execution")
515 |     queries = ["آیفون ۱۶ قیمت"]
516 |     logging.info(f"Running with queries: {queries}")
517 |     # success, message = asyncio.run(find_similar_chunks(queries))
518 |     success, message = asyncio.run(perform_search_and_scrape(queries))
519 |     logging.info(message)
```
Page 3/3FirstPrevNextLast